Compare commits

...

162 Commits
beta ... master

Author SHA1 Message Date
Milorad FIlipović
e00dce45c6
feat(core): Add telemetry for mcp skills use (no-changelog) (#31625)
Some checks are pending
CI: Master (Build, Test, Lint) / Build for Github Cache (push) Waiting to run
CI: Master (Build, Test, Lint) / Unit tests (22.22.3) (push) Waiting to run
CI: Master (Build, Test, Lint) / Unit tests (24.15.0) (push) Waiting to run
CI: Master (Build, Test, Lint) / Lint (push) Waiting to run
CI: Master (Build, Test, Lint) / Performance (push) Waiting to run
CI: Master (Build, Test, Lint) / Notify Slack on failure (push) Blocked by required conditions
Co-authored-by: Ricardo Espinoza <ricardo@n8n.io>
2026-06-04 19:54:28 +00:00
n8n-cat-bot[bot]
f301e434ee
build: Pin node24/alpine3.22 base images to digests in Docker build (#31772)
Co-authored-by: n8n-cat-bot[bot] <n8n-cat-bot[bot]@users.noreply.github.com>
Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-04 19:37:00 +00:00
n8n-cat-bot[bot]
942682e66a
test: Strengthen getChildNodes tests to raise mutation score (#31771)
Co-authored-by: n8n-cat-bot[bot] <n8n-cat-bot[bot]@users.noreply.github.com>
Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-04 18:43:54 +00:00
Guillaume Jacquart
b2942b4b54
fix(editor): Reset connection state when switching credential to private (#31713)
Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-04 18:23:02 +00:00
victorbjor
88fae045ef
ci: Allow environment variable override for Docker platform choice (no-changelog) (#17907)
Co-authored-by: Declan Carroll <declan@n8n.io>
2026-06-04 17:27:40 +00:00
Riqwan Thamir
d58b7ea943
fix(core): Temporarily disable error card from iAi chat (#31740) 2026-06-04 17:08:05 +00:00
n8n-cat-bot[bot]
6cfcc4c386
test: Strengthen getParentNodes tests to raise mutation score (#31759)
Co-authored-by: n8n-cat-bot[bot] <n8n-cat-bot[bot]@users.noreply.github.com>
Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-04 16:44:52 +00:00
n8n-cat-bot[bot]
91182ed02b
build: Add pnpm agent:setup for fresh-checkout install + build + test (#31756)
Co-authored-by: n8n-cat-bot[bot] <n8n-cat-bot[bot]@users.noreply.github.com>
Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-04 16:44:24 +00:00
n8n-cat-bot[bot]
ff3657ded7
ci: Activate V8 E2E impact map filter in PR CI (#31757)
Some checks are pending
Build: Benchmark Image / build (push) Waiting to run
CI: Master (Build, Test, Lint) / Build for Github Cache (push) Waiting to run
CI: Master (Build, Test, Lint) / Unit tests (22.22.3) (push) Waiting to run
CI: Master (Build, Test, Lint) / Unit tests (24.15.0) (push) Waiting to run
CI: Master (Build, Test, Lint) / Lint (push) Waiting to run
CI: Master (Build, Test, Lint) / Performance (push) Waiting to run
CI: Master (Build, Test, Lint) / Notify Slack on failure (push) Blocked by required conditions
Util: Sync API Docs / sync-public-api (push) Waiting to run
Co-authored-by: n8n-cat-bot[bot] <n8n-cat-bot[bot]@users.noreply.github.com>
Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-04 16:15:01 +00:00
n8n-cat-bot[bot]
61b2e7c382
test: Quarantine instance-ai workflow-setup suite on sqlite e2e (#31751)
Co-authored-by: n8n-cat-bot[bot] <n8n-cat-bot[bot]@users.noreply.github.com>
Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-04 16:11:39 +00:00
Albert Alises
491360fbc7
fix(core): Ground AI workflow completion in saved graph (#31742) 2026-06-04 16:08:54 +00:00
n8n-cat-bot[bot]
c02665a5b0
fix(editor): Make project delete dialog backdrop cover the sidebar (#31735)
Co-authored-by: n8n-cat-bot[bot] <n8n-cat-bot[bot]@users.noreply.github.com>
Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
Co-authored-by: Irénée <irenee.ajeneza@n8n.io>
2026-06-04 15:36:40 +00:00
eric-liu
add5ab29dd
feat: Add AWS Assume Role support for Bedrock nodes (#28663)
Co-authored-by: Alexander Gekov <40495748+alexander-gekov@users.noreply.github.com>
Co-authored-by: Declan Carroll <declan@n8n.io>
2026-06-04 15:08:41 +00:00
Tongbo
ee7aa0b66a
fix(Postgres Node): Spread array queryReplacement across multiple bind values (#31704)
Co-authored-by: Bao <bao0541@users.noreply.github.com>
2026-06-04 15:02:31 +00:00
n8n-cat-bot[bot]
6eb583b2bd
ci: Add source_file input to mutation-health workflow for on-demand (#31753)
Co-authored-by: n8n-cat-bot[bot] <n8n-cat-bot[bot]@users.noreply.github.com>
Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-04 14:52:34 +00:00
Benjamin Schroth
52f0960cea
feat(editor): Evaluations canvas info card (no-changelog) (#31318) 2026-06-04 14:44:13 +00:00
Stephen Wright
ac4778bb5c
feat(NVIDIA Nemotron Chat Model Node): Restrict model selector to supported models (#31698) 2026-06-04 14:30:22 +00:00
oleg
ab741ed6db
refactor(ai-builder): Remove retired AI builder experiment flags (no-changelog) (#31744) 2026-06-04 14:26:03 +00:00
Adilson Junior
a82384f90d
feat(core): Revamp infisical implementation (#30843) 2026-06-04 15:17:55 +01:00
Guillaume Jacquart
f1d87fddb2
fix(editor): Show switch-to-static warning after connecting a private credential (#31712)
Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-04 13:56:52 +00:00
Guillaume Jacquart
f723f54879
fix(editor): Persist switching workflow credential resolver back to system resolver (#31703)
Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-04 13:56:47 +00:00
Benjamin Schroth
a7f660c8d4
feat(editor): Evaluations empty-state landing page (no-changelog) (#31319) 2026-06-04 13:56:07 +00:00
Tomi Turtiainen
bca1e08ea8
chore(core): Require SQLite migration recreate acknowledgements (#31202) 2026-06-04 13:42:27 +00:00
Jaakko Husso
2993afb31d
fix(core): Keep Instance AI builder sandboxes thread-scoped and non-ephemeral (#31745) 2026-06-04 13:36:17 +00:00
Benjamin Schroth
c74fc95a3b
feat(editor): Evaluations wizard sidepanel (no-changelog) (#31317) 2026-06-04 12:41:31 +00:00
Bernhard Wittmann
f4e998f245
fix(core): Prefer MCP registry nodes when wiring AI Agent tools (#30774)
Co-authored-by: Roman Davydchuk <roman.davydchuk@n8n.io>
2026-06-04 12:24:13 +00:00
Raúl Gómez Morales
22eb20f183
feat(core): Surface workflow execution source to the editor (no-changelog) (#31724) 2026-06-04 12:22:10 +00:00
n8n-cat-bot[bot]
bc7aeb6fe8
ci: Raise Node heap for Discovery Evals build step (#31738)
Co-authored-by: n8n-cat-bot[bot] <n8n-cat-bot[bot]@users.noreply.github.com>
Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-04 12:18:35 +00:00
Alex Grozav
15749aa39e
refactor(editor): Resolve workflow push handlers by id and remove workflow state store and composable (no-changelog) (#31514)
Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-04 12:06:06 +00:00
Irénée
b66d33c305
feat: Gate custom telemetry attributes by license (#31723) 2026-06-04 11:54:02 +00:00
Svetoslav Dekov
f16befcb3b
feat(editor): Guard canvas node groups during graph edits (no-changelog) (#30473)
Co-authored-by: Daria <daria.staferova@n8n.io>
Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-06-04 11:32:28 +00:00
Savelii
f459d73236
feat(core): Add "Additional scopes" field to OIDC SSO setup (#31708)
Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-04 10:49:36 +00:00
n8n-cat-bot[bot]
203e816b82
ci: Route PR CI e2e to dedicated Currents project (#31722)
Co-authored-by: n8n-cat-bot[bot] <n8n-cat-bot[bot]@users.noreply.github.com>
2026-06-04 10:32:33 +00:00
Riqwan Thamir
790afbcf3d
feat(core): Add smooth stream options to agents sdk (#31715)
Co-authored-by: bjorger <50590409+bjorger@users.noreply.github.com>
2026-06-04 10:28:40 +00:00
Matsu
8eb25b78af
ci: Run stable branch removal every monday (#31725) 2026-06-04 10:14:06 +00:00
Emilia
0b2a6328cf
fix(core): Sub-workflows to respect own timeout settings and global timeout config (#31536)
Co-authored-by: Tomi Turtiainen <10324676+tomi@users.noreply.github.com>
Co-authored-by: Danny Martini <danny@n8n.io>
2026-06-04 10:07:29 +00:00
José Braulio González Valido
f25e12de87
ci(ai-builder): Add iterations + experiment-name inputs to instance-ai eval dispatch (no-changelog) (#31631)
Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-04 09:57:08 +00:00
Iván Ovejero
b858fbe91f
feat(core): Add Prometheus metrics for execution data (#31655) 2026-06-04 09:51:21 +00:00
Jaakko Husso
13a1a993f5
fix(core): Make AI assistant conversation pruning happen regularly (#31707) 2026-06-04 09:39:22 +00:00
n8n-cat-bot[bot]
780e86c838
ci: Skip grind workflow on fork PRs (#31717)
Co-authored-by: n8n-cat-bot[bot] <n8n-cat-bot[bot]@users.noreply.github.com>
Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-04 09:34:34 +00:00
Albert Alises
6cd07e5b42
fix: Add item-flow builder guidance (no-changelog) (#31660) 2026-06-04 09:23:20 +00:00
Michael Kret
e264e061b4
fix(AWS Rekognition Node): Handle all binary data modes correctly (#31065) 2026-06-04 09:21:51 +00:00
yehorkardash
bcbcf7be69
fix: Use isolated runtimes for agent calls (no-changelog) (#31658) 2026-06-04 09:21:13 +00:00
Matsu
77a1b844b5
ci: Add a self-rolled stale branch removal script (#31694) 2026-06-04 09:20:50 +00:00
Arvin A
3fb0540dfb
test(core): Cover AI tool continue-on-error default and canvas retry state (#31493)
Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-06-04 09:01:12 +00:00
Rob Hough
f484b6331b
fix(editor): Avoid duplicate empty-state prompt copy (no-changelog) (#31182) 2026-06-04 08:53:48 +00:00
Bernhard Wittmann
11dfca24d9
feat(editor): Add shared tools-connection modal (#31381)
Co-authored-by: Elias Meire <elias@meire.dev>
2026-06-04 08:36:42 +00:00
oleg
afd7ddf372
feat(core): Use n8n default sandbox for Instance AI (no-changelog) (#31335)
Signed-off-by: Oleg Ivaniv <me@olegivaniv.com>
Co-authored-by: Claude <noreply@anthropic.com>
2026-06-04 08:31:27 +00:00
Declan Carroll
07119ce61d
refactor: Consolidate janitor + code-health onto a shared rules-engine AST substrate (#31417)
Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-04 08:27:01 +00:00
Dimitri Lavrenük
85ecd70ccc
chore: Enhance PR template with testing instructions (no-changelog) (#31696)
Some checks are pending
Build: Benchmark Image / build (push) Waiting to run
CI: Master (Build, Test, Lint) / Build for Github Cache (push) Waiting to run
CI: Master (Build, Test, Lint) / Unit tests (22.22.3) (push) Waiting to run
CI: Master (Build, Test, Lint) / Unit tests (24.15.0) (push) Waiting to run
CI: Master (Build, Test, Lint) / Lint (push) Waiting to run
CI: Master (Build, Test, Lint) / Performance (push) Waiting to run
CI: Master (Build, Test, Lint) / Notify Slack on failure (push) Blocked by required conditions
Util: Sync API Docs / sync-public-api (push) Waiting to run
2026-06-04 08:11:11 +00:00
Arvin A
5e0e2661f5
fix(core): Polyfill DOMMatrix when parsing PDFs in Data Loader (#31669) 2026-06-04 08:06:30 +00:00
Declan Carroll
66308a6fc4
ci: Bump grind workflow runner to 4-vCPU (#31668)
Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-04 08:06:09 +00:00
Declan Carroll
61ea6881e0
ci: Run Postgres 16 DB tests on a single 8-vCPU runner (#31670)
Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-04 08:05:52 +00:00
n8n-cat-bot[bot]
53b56dbf0c
ci: Tune instance-ai discovery eval runner and triggers (#31697)
Co-authored-by: n8n-cat-bot[bot] <n8n-cat-bot[bot]@users.noreply.github.com>
Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-04 08:02:39 +00:00
Thanasis G
9ac7c5b0ee
feat(core): Add telemetry for data redaction enforcement instance policy (no-changelog) (#31572)
Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
Co-authored-by: Csaba Tuncsik <csaba.tuncsik@gmail.com>
2026-06-04 08:02:08 +00:00
oleg
69e9202822
chore(core): Update Instance AI default model (#31695) 2026-06-04 07:53:36 +00:00
n8n-cat-bot[bot]
e9ab2b4d31
ci: Make Playwright browsers path absolute in E2E workflow (#31686)
Co-authored-by: n8n-cat-bot[bot] <n8n-cat-bot[bot]@users.noreply.github.com>
2026-06-04 07:49:42 +00:00
RomanDavydchuk
a44197c135
fix(Google Sheets Trigger Node): Custom OAuth option not availble when there are managed credentials (#29933)
Co-authored-by: Michael Kret <88898367+michael-radency@users.noreply.github.com>
2026-06-04 07:36:55 +00:00
Riqwan Thamir
08180344ea
feat(core): Add templates to knowledge base (no-changelog) (#31642) 2026-06-04 06:41:13 +00:00
Michael Kret
ab849d3fa8
fix(Microsoft Entra Node): Refresh expired OAuth2 tokens (#30943) 2026-06-04 06:29:56 +00:00
Matsu
d7d2071bdd
test: Migrate @n8n/db from Jest to Vitest (no-changelog) (#31560)
Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-04 06:17:59 +00:00
Benjamin Schroth
ea800f715d
fix(core): Prevent evaluation executions from stalling in status new (#31619) 2026-06-04 06:13:06 +00:00
n8n-cat-bot[bot]
fe87a5da57
ci: Pin pnpm via packageManager and set PNPM_HOME (#31653)
Some checks are pending
Build: Benchmark Image / build (push) Waiting to run
CI: Master (Build, Test, Lint) / Build for Github Cache (push) Waiting to run
CI: Master (Build, Test, Lint) / Unit tests (22.22.3) (push) Waiting to run
CI: Master (Build, Test, Lint) / Unit tests (24.15.0) (push) Waiting to run
CI: Master (Build, Test, Lint) / Lint (push) Waiting to run
CI: Master (Build, Test, Lint) / Performance (push) Waiting to run
CI: Master (Build, Test, Lint) / Notify Slack on failure (push) Blocked by required conditions
Util: Sync API Docs / sync-public-api (push) Waiting to run
Co-authored-by: n8n-cat-bot[bot] <n8n-cat-bot[bot]@users.noreply.github.com>
Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-03 22:39:11 +00:00
Thanasis G
58b0965f66
feat(core): Track redaction policy source on execution runtime data (#31079)
Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-06-03 20:34:02 +00:00
n8n-cat-bot[bot]
868e988c09
fix(core): Bump qs, tmp, uuid, @tootallnate/once (#31674)
Co-authored-by: n8n-cat-bot[bot] <n8n-cat-bot[bot]@users.noreply.github.com>
Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-03 18:51:29 +00:00
bjorger
73d8bbe121
feat(core): Add inline sub-agent delegation (#31553)
Some checks are pending
Build: Benchmark Image / build (push) Waiting to run
CI: Master (Build, Test, Lint) / Build for Github Cache (push) Waiting to run
CI: Master (Build, Test, Lint) / Unit tests (22.22.3) (push) Waiting to run
CI: Master (Build, Test, Lint) / Unit tests (24.15.0) (push) Waiting to run
CI: Master (Build, Test, Lint) / Lint (push) Waiting to run
CI: Master (Build, Test, Lint) / Performance (push) Waiting to run
CI: Master (Build, Test, Lint) / Notify Slack on failure (push) Blocked by required conditions
Util: Sync API Docs / sync-public-api (push) Waiting to run
Co-authored-by: Cursor <cursoragent@cursor.com>
2026-06-03 16:00:54 +00:00
Danny Martini
a089408968
test(core): Guard $jmespath resolution under the VM expression engine (#31671)
Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-03 15:57:36 +00:00
Albert Alises
7e8c04299c
perf: Reduce Instance AI memory usage (#31656)
Co-authored-by: Jaakko Husso <jaakko@n8n.io>
2026-06-03 15:54:42 +00:00
Julian
68b205317a
fix(core): Include node and field details in WorkflowHasIssuesError message (#30944)
Co-authored-by: Danny Martini <danny@n8n.io>
Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-03 15:40:52 +00:00
Jaakko Husso
001d242af6
fix(core): Preserve filesystem binding when reading workspace files (no-changelog) (#31667)
Co-authored-by: Riqwan Thamir <rmthamir@gmail.com>
2026-06-03 15:40:41 +00:00
Hammad Khan
43d32fd28f
fix(TheHiveProject Node): Normalize analyzers when expression returns a string (#31580) 2026-06-03 15:06:28 +00:00
Eugene
9cb9a1fc46
feat(core): Manual workflow executions call the unpublished agent (no-changelog) (#31585) 2026-06-03 14:44:48 +00:00
Thomas Shellberg
a3e37fcd12
fix(core): Prevent isUniqueConstraintError false positives (#31284)
Co-authored-by: SAAKSHI GUPTA <saakshigupta2002@gmail.com>
Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-06-03 14:23:23 +00:00
Bernhard Wittmann
7efcc311b5
feat: Add MCP registry instance AI connections endpoints (#31618) 2026-06-03 14:21:36 +00:00
Tomi Turtiainen
6d73d8d9ca
build: Disable typecheck on build commands (no-changelog) (#31636) 2026-06-03 13:52:25 +00:00
Tomi Turtiainen
8570de09c0
chore: Add clean scripts to all buildable packages (no-changelog) (#31622) 2026-06-03 13:47:34 +00:00
Ilfat Mindubaev
21db4bcd6c
feat(core): Apply instance redaction floor to new workflows (#31532) 2026-06-03 13:46:33 +00:00
Matsu
5feafdfafd
test: Migrate @n8n/computer-use from Jest to Vitest (#31482)
Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-03 13:42:19 +00:00
Danny Martini
a3cb0c801a
docs: Clarify Linear ticket requirement in PR conventions (#31652) 2026-06-03 13:36:46 +00:00
Joco-95
ef3a5606e0
feat: Implements AI Assistant empty state workflow previews experiment (#31519) 2026-06-03 13:34:54 +00:00
Michael Kret
5504361d0f
fix(editor): Add evaluation trigger from setup (#31102)
Co-authored-by: Dawid Myslak <dawid.myslak@gmail.com>
2026-06-03 13:23:12 +00:00
Rohit-Gahlawat
330d44148f
fix(editor): Load all SSO role mapping rules instead of first page (#31523) 2026-06-03 13:11:35 +00:00
n8n-cat-bot[bot]
31c60bf959
ci: Pin behaviour of graph helpers to kill 0% mutation (#31611)
Co-authored-by: n8n-cat-bot[bot] <n8n-cat-bot[bot]@users.noreply.github.com>
Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-03 12:56:16 +00:00
bjorger
872171cd66
fix(core): Normalize LangChain parser errors (#31499)
Co-authored-by: Cursor <cursoragent@cursor.com>
2026-06-03 12:51:23 +00:00
Stephen Wright
d4bb69aee2
refactor(core): Store redaction enforcement as a floor enum natively (#31629) 2026-06-03 12:29:42 +00:00
Danny Martini
6e0517e134
refactor(core): Use Time constants for time-based config defaults (#31639)
Co-authored-by: Emilia <emilia.trinca@n8n.io>
Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-03 12:23:27 +00:00
Declan Carroll
d57545d4d0
ci: V8 E2E coverage + per-spec impact map (DEVP-205) (#31441)
Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-03 12:17:27 +00:00
Matsu
bc53dc101f
ci: Run ci-master only from master branch (#31651) 2026-06-03 12:09:39 +00:00
n8n-cat-bot[bot]
ecce35049d
fix(core): Reclassify expected backend errors out of the Sentry error stream (#31423)
Co-authored-by: n8n-cat-bot[bot] <n8n-cat-bot[bot]@users.noreply.github.com>
Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
Co-authored-by: Declan Carroll <declan@n8n.io>
2026-06-03 12:06:34 +00:00
n8n-cat-bot[bot]
7afc32a15d
feat(core): Attach executionId and execution deep link to Sentry events (#31645)
Co-authored-by: n8n-cat-bot[bot] <n8n-cat-bot[bot]@users.noreply.github.com>
Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-03 12:06:08 +00:00
Michael Drury
64d4a7457b
fix(core): Agents correctly clear thinking status once they respond in Slack DMs (no-changelog) (#31591) 2026-06-03 12:05:38 +00:00
n8n-cat-bot[bot]
3c46e3155c
ci: Bump Aikido safe-chain to 1.5.7 (#31649)
Co-authored-by: n8n-cat-bot[bot] <n8n-cat-bot[bot]@users.noreply.github.com>
Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-03 12:02:08 +00:00
Guillaume Jacquart
7e83c7b591
fix(editor): Polish private credential pills, callout, and banners (#31604)
Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-03 11:45:15 +00:00
n8n-cat-bot[bot]
8eda311630
test: Cover getChildNodes parameter defaults (#31456)
Co-authored-by: n8n-cat-bot[bot] <n8n-cat-bot[bot]@users.noreply.github.com>
Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-03 11:31:03 +00:00
Tomi Turtiainen
163c718a3a
refactor: Refine trigger naming in core and cli (#31632) 2026-06-03 11:30:45 +00:00
Raúl Gómez Morales
e27c4feaca
feat(core): Invalidate instance-ai build-workflow cache on canvas edits (#31274) 2026-06-03 11:09:23 +00:00
Declan Carroll
7bd7b9943b
test(benchmark): Add dedicated webhook procs + sizing matrix aggregator (DEVP-200 + DEVP-185) (#31037)
Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-06-03 10:49:17 +00:00
Matsu
3e5dcce911
ci: Prevent curl in release pipeline from hanging infinitely (#31637) 2026-06-03 10:31:11 +00:00
Matsu
6c5736f243
ci: Add workflow to clean up stale branches (#31641) 2026-06-03 10:28:37 +00:00
Albert Alises
67b177b156
test(core): Refresh Instance AI workflow builder recordings (#31413) 2026-06-03 10:28:33 +00:00
Iván Ovejero
c8ef1be0d4
perf(core): Skip execution data pre-read on full overwrite (#31634) 2026-06-03 10:27:32 +00:00
Jon
1113363eff
chore: Update community PR review skill (no-changelog) (#31380) 2026-06-03 10:23:22 +00:00
Alexander Gekov
5e9a8a071f
fix(core): Show actionable message when OAuth2 token refresh fails (#30460) 2026-06-03 10:12:48 +00:00
n8n-cat-bot[bot]
cf68ef1b8b
ci: Exclude test files from mutation-health picker (#31638)
Co-authored-by: n8n-cat-bot[bot] <n8n-cat-bot[bot]@users.noreply.github.com>
Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-03 10:06:49 +00:00
Irénée
c32a33cda2
feat(core): Track OpenTelemetry usage telemetry (#31570) 2026-06-03 10:03:44 +00:00
Dennis Kugelmann
b430039349
feat(Google Cloud Storage Node): Add service account authentication (#30928)
Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com>
Co-authored-by: IchordeDionysos <10195482+IchordeDionysos@users.noreply.github.com>
2026-06-03 10:02:33 +00:00
Csaba Tuncsik
de95eb84ae
feat(core): Allow stricter-than-floor workflow redaction updates (#31304) 2026-06-03 09:59:41 +00:00
Emilia
364c250ceb
fix(core): Report only error class and stack for unhandled node errors (#31628)
Co-authored-by: cubic-dev-ai[bot] <191113872+cubic-dev-ai[bot]@users.noreply.github.com>
2026-06-03 09:55:56 +00:00
Sandra Zollner
91166af946
feat(core): Add id-only credential resolution to package import (#31223)
Co-authored-by: Cursor <cursoragent@cursor.com>
2026-06-03 09:00:57 +00:00
yehorkardash
86e42203ff
feat: Allow cancelling HITL messages (no-changelog) (#31561) 2026-06-03 08:55:04 +00:00
Matsu
a3f663d1c2
test(core): Make CBC wrong-key decryption tests deterministic (#31471)
Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-03 08:47:55 +00:00
Tomi Turtiainen
d7b759dfb4
test: Fix binary data benchmarks (#31198) 2026-06-03 08:16:32 +00:00
Alexander Gekov
95e4ee7ee2
fix(Slack Node): Normalize multiOptions values when expression returns a string (#31269) 2026-06-03 08:15:04 +00:00
Riqwan Thamir
21d7daaa82
feat(core): Add KB tools to orchestrator and planner (#31608) 2026-06-03 08:14:49 +00:00
Csaba Tuncsik
0fef7d572c
refactor(core): Resolve execution-time redaction as strictest per channel (#31504) 2026-06-03 08:12:54 +00:00
Dawid Myslak
18d4fddebb
fix(LangChain Code Node): Hide node from the node catalogue (#31464)
Co-authored-by: Cursor <cursoragent@cursor.com>
2026-06-03 08:06:27 +00:00
Guillaume Jacquart
27aca712df
refactor(editor): Rename Dynamic credentials to Private credentials in the UI (#31555)
Co-authored-by: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-03 08:02:51 +00:00
Michael Kret
8376a4f2a4
fix(core): Allow exporting workflows with AI Gateway-managed credentials (#31136) 2026-06-03 07:54:36 +00:00
Rob Hough
87d1034900
refactor(editor): Add ChatInput to @n8n/design-system (#31361) 2026-06-03 07:52:08 +00:00
Ricardo Espinoza
412044731e
feat(core): Split API key scopes and let admins see and revoke any key (#31237)
Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-06-03 07:51:22 +00:00
Matsu
3ea606ac60
test: Resolve test-config module aliases dynamically instead of hardcoding versions (no-changelog) (#31621)
Some checks are pending
Build: Benchmark Image / build (push) Waiting to run
CI: Master (Build, Test, Lint) / Build for Github Cache (push) Waiting to run
CI: Master (Build, Test, Lint) / Unit tests (22.22.3) (push) Waiting to run
CI: Master (Build, Test, Lint) / Unit tests (24.15.0) (push) Waiting to run
CI: Master (Build, Test, Lint) / Lint (push) Waiting to run
CI: Master (Build, Test, Lint) / Performance (push) Waiting to run
CI: Master (Build, Test, Lint) / Notify Slack on failure (push) Blocked by required conditions
Util: Sync API Docs / sync-public-api (push) Waiting to run
Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-03 07:44:01 +00:00
Matsu
25766222b8
chore: Migrate instance-ai from Jest to Vitest (#31463) 2026-06-03 06:48:27 +00:00
Charlie Kolb
24f27ed559
fix(core): Improve validator error messages for name and label fields (#31391) 2026-06-03 06:41:25 +00:00
Matsu
0a3d04faa2
test: Migrate @n8n/workflow-sdk from Jest to Vitest (no-changelog) (#31546)
Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-03 06:08:31 +00:00
Michael Kret
7ab8254329
fix(core): Resolve custom node icon paths (#30946) 2026-06-03 06:00:56 +00:00
Matsu
2824370072
test: Migrate @n8n/mcp-browser test suite from Jest to Vitest (no-changelog) (#31527) 2026-06-03 05:57:20 +00:00
Michael Kret
151fd83e0a
fix(editor): Show error feedback when copying non-duplicatable triggers (#31104) 2026-06-03 05:57:00 +00:00
Michael Kret
ec44980689
fix: Format single-line PEM keys when newlines are stripped on paste (#31129)
Co-authored-by: Alexander Gekov <40495748+alexander-gekov@users.noreply.github.com>
2026-06-03 05:54:51 +00:00
Michael Kret
9963143c57
fix(HTTP Request Node): Sign Amazon Bedrock requests as 'bedrock' service (#31250) 2026-06-03 05:51:42 +00:00
Srinjoy
6bcd02a5f0
fix(core): Fix hardcoded aud claim in MCP OAuth tokens (#30558)
Some checks are pending
CI: Master (Build, Test, Lint) / Build for Github Cache (push) Waiting to run
CI: Master (Build, Test, Lint) / Unit tests (22.22.3) (push) Waiting to run
CI: Master (Build, Test, Lint) / Unit tests (24.15.0) (push) Waiting to run
CI: Master (Build, Test, Lint) / Lint (push) Waiting to run
CI: Master (Build, Test, Lint) / Performance (push) Waiting to run
CI: Master (Build, Test, Lint) / Notify Slack on failure (push) Blocked by required conditions
Util: Sync API Docs / sync-public-api (push) Waiting to run
Co-authored-by: Ricardo Espinoza <ricardo@n8n.io>
2026-06-02 21:37:30 +00:00
n8n-cat-bot[bot]
6b76fb7ed9
ci: Handle empty ledger body in mutation-health picker (#31607)
Co-authored-by: n8n-cat-bot[bot] <n8n-cat-bot[bot]@users.noreply.github.com>
Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-02 21:33:25 +00:00
n8n-cat-bot[bot]
098fcb39cb
test: Stabilize AI unit tests against cold heavy-load timeouts (#31592)
Co-authored-by: n8n-cat-bot[bot] <n8n-cat-bot[bot]@users.noreply.github.com>
Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-02 21:13:49 +00:00
Dawid Myslak
e8089b4ec7
chore: Add human-like code review skill (#31533)
Co-authored-by: Cursor <cursoragent@cursor.com>
2026-06-02 19:22:56 +00:00
Lorent Lempereur
957fdecabc
feat(core): Monitor and recover Postgres connection pool (#31008)
Co-authored-by: Danny Martini <danny@n8n.io>
Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-06-02 19:20:13 +00:00
Michael Drury
ff1a7aeb19
fix(core): Fix dropdown menu safety triangle (#31583) 2026-06-02 17:04:13 +00:00
kisst
717b8602df
feat(API): Expose mfaEnabled field in Public API user endpoints (#25512)
Co-authored-by: Cursor <cursoragent@cursor.com>
2026-06-02 17:00:26 +00:00
Michael Drury
2a9a23f774
fix(core): External agent channels correctly utilise the user ID for episodic memory (#31584) 2026-06-02 16:56:18 +00:00
bjorger
255b7a1543
fix(core): Strip legacy unsupported config before agent JSON validation (#31577)
Co-authored-by: Cursor <cursoragent@cursor.com>
2026-06-02 16:41:50 +00:00
Albert Alises
332d2df44e
fix(core): Move instance AI builds to a skill (#31412) 2026-06-02 16:06:00 +00:00
bjorger
bfff25f05d
feat(core): Add sub-agent executions (#31540)
Co-authored-by: Cursor <cursoragent@cursor.com>
Co-authored-by: cubic-dev-ai[bot] <191113872+cubic-dev-ai[bot]@users.noreply.github.com>
2026-06-02 15:37:39 +00:00
RomanDavydchuk
ee3b277ff0
feat(core): Add support for per-user connections to MCP servers from the registry in instance AI (#31325)
Some checks are pending
Build: Benchmark Image / build (push) Waiting to run
CI: Master (Build, Test, Lint) / Build for Github Cache (push) Waiting to run
CI: Master (Build, Test, Lint) / Unit tests (22.22.3) (push) Waiting to run
CI: Master (Build, Test, Lint) / Unit tests (24.15.0) (push) Waiting to run
CI: Master (Build, Test, Lint) / Lint (push) Waiting to run
CI: Master (Build, Test, Lint) / Performance (push) Waiting to run
CI: Master (Build, Test, Lint) / Notify Slack on failure (push) Blocked by required conditions
Util: Sync API Docs / sync-public-api (push) Waiting to run
2026-06-02 15:27:14 +00:00
Dmitrii
29b1220a90
refactor(core): Rename N8N_OTEL_TRACES_PUBLISHED_ONLY env var to N8N_OTEL_TRACES_PRODUCTION_ONLY (#31575) 2026-06-02 14:46:11 +00:00
Guillaume Jacquart
8de9958c5f
feat(core): Grant execution owners access to their own redacted data (#31139)
Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-02 14:35:51 +00:00
Declan Carroll
acc0cb321e
ci: Restore default build in grind-changed-tests workflow (#31571)
Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-02 14:07:21 +00:00
Riqwan Thamir
8a245a5a88
feat(core): Add knowledge base to snapshot image (#31551) 2026-06-02 14:00:24 +00:00
José Braulio González Valido
2a1f3d6b2d
feat(ai-builder): Add datasets field for PR-tier suite (no-changelog) (#31429)
Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-06-02 13:30:31 +00:00
Iván Ovejero
f83efe5b04
fix(core): Lock execution row during data-only condition re-check (#31547) 2026-06-02 13:17:04 +00:00
José Braulio González Valido
64f25b2ad8
feat(core): Show eval-mock executions in canvas list (no-changelog) (#31244)
Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-06-02 13:14:51 +00:00
Benjamin Schroth
2400bfed8b
feat(core): Evaluations wizard backend (no-changelog) (#31357) 2026-06-02 13:13:21 +00:00
Alex Grozav
a724624b1a
refactor(editor): Migrate getNewWorkflowData to the workflows API (#31556)
Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-02 13:12:48 +00:00
Garrit Franke
700b1cd227
feat(Trello Node): Add OAuth1 credential support (#30369)
Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
Co-authored-by: Dawid Myslak <dawid.myslak@gmail.com>
2026-06-02 13:02:57 +00:00
Konstantin Tieber
e785e4e7f3
fix(core): The n8n import:workflow --activeState=fromJson cli can fail for subworkflow dependencies (#31377) 2026-06-02 12:48:50 +00:00
n8n-cat-bot[bot]
485c153ad6
fix: Declare @smithy/node-http-handler and @aws-sdk (#31562)
Co-authored-by: n8n-cat-bot[bot] <n8n-cat-bot[bot]@users.noreply.github.com>
2026-06-02 12:46:57 +00:00
Ricardo Espinoza
cc9fa172c8
feat(core): Replace get_suggested_nodes MCP tool with get_workflow_best_practices (#31048) 2026-06-02 12:35:19 +00:00
Matsu
dd5d539398
test: Migrate @n8n/ai-utilities from Jest to Vitest (#31490)
Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-02 12:33:58 +00:00
Jaakko Husso
a0b616073b
refactor(core): Split planner run into briefing + coordinator modules (#31458)
Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-02 12:28:07 +00:00
Michael Drury
58a3fb2227
fix(core): Remove agents module dependence on chat-hub APIs (no-changelog) (#30769) 2026-06-02 11:57:52 +00:00
Eugene
e65b4abea1
fix(editor): Show actions on published version in agent history (no-changelog) (#31545)
Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-02 11:50:02 +00:00
bjorger
25f2d3cf32
feat(core): Add sub-agent session linkage migration (#31534)
Co-authored-by: Cursor <cursoragent@cursor.com>
2026-06-02 11:38:58 +00:00
Albert Alises
4e0e0ed11a
fix: Improve Instance AI workflow-builder eval reliability and node diagnostics (#31506) 2026-06-02 11:35:34 +00:00
Matsuuu
b170b07aec
Merge tag 'n8n@2.25.1' 2026-06-02 14:13:12 +03:00
n8n-cat-bot[bot]
0cb26bdea9
ci: Build tsc deps and surface vitest stderr in grind workflow (#31543)
Co-authored-by: n8n-cat-bot[bot] <n8n-cat-bot[bot]@users.noreply.github.com>
Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-02 10:05:57 +00:00
2021 changed files with 98135 additions and 50937 deletions

View File

@ -0,0 +1,239 @@
---
description: >-
Checks if a community pull request is ready for human review. Verifies CLA
signature, PR title format, description completeness, test coverage, and
cubic-dev-ai issues, then triages to the right Linear team or recommends a
close. Use when given a PR number or branch name to review, or when the user
says /community-pr-readiness-check, or asks to check if a PR is ready for
review.
allowed-tools: Bash(gh:*), Bash(git:*), Bash(node:*), Read, Glob, Grep, AskUserQuestion, mcp__linear-server__save_issue, mcp__linear-server__get_issue, mcp__linear-server__list_teams, mcp__linear-server__list_issue_statuses
---
# Community PR Readiness Check
Given a PR number or branch name, determine whether it is ready for human review and take the right follow-up action.
## Decision tree
1. **Bot author** (`n8n-cat-bot` / `aikido-autofix`) → cleanup-only, no review. See "Internal automation PRs" below.
2. **Auto-rejection screen matches** (typo-only / unsanctioned new node) → action path **D — close** with the matching template.
3. **All checks pass** (`readyForReview === true`) → action path **B — triage to team**.
4. **One or more checks fail** → action path **A** (if title is minor-fix only) then **C — post comment**.
## Step 1 — Resolve the PR
If given a branch name, find the PR number first:
```bash
gh pr view <branch> --repo n8n-io/n8n --json number --jq .number
```
## Step 2 — Fetch and pre-process
```bash
gh pr view <number> --repo n8n-io/n8n \
--json number,title,body,author,headRefName,headRefOid,files,isDraft,state,labels
```
### Internal automation PRs (bot authors)
If `author.login` is one of n8n's internal bots — `n8n-cat-bot` / `app/n8n-cat-bot` or `aikido-autofix` / `app/aikido-autofix` — skip the PR entirely and perform the cleanup actions below. Do **not** emit any JSON output.
1. Relabel the PR (both bots): swap `community``n8n team`:
```bash
gh pr edit <number> --repo n8n-io/n8n --remove-label community --add-label "n8n team"
```
2. Update the linked Linear ticket (extract `GHC-XXXX` per step 5):
- **`n8n-cat-bot`** — cancel: `mcp__linear-server__save_issue` with `state: "Canceled"`, no labels.
- **`aikido-autofix`** — route to Dev Platform: `mcp__linear-server__save_issue` with `team: "Developer Platform"`, `state: "Triage"`, no labels.
When reviewing a batch, omit the skipped PR from the output. For a single PR, emit a one-line note (e.g. `Skipped & cleaned up #30591 (n8n-cat-bot): relabeled to n8n team, cancelled GHC-8398.`).
### Collision guard
If `triage:in-progress` is already on the PR, another reviewer is mid-triage — **bail out** to avoid double-processing. Emit a one-line note (e.g. `Skipped #30205: already has triage:in-progress`) and move on to the next PR. Do not run the checks, do not modify labels, do not touch Linear.
If the user explicitly asks to re-process a PR that's stuck on `triage:in-progress` (e.g. a previous run crashed), they can clear the label manually and re-invoke.
### Otherwise — mark in-progress
Strip any existing `triage:*` state label before adding `triage:in-progress`, so the single-state invariant holds even when re-reviewing a PR that was previously sent back with `triage:needs-info` or `triage:tests-needed`:
```bash
gh pr edit <number> --repo n8n-io/n8n \
--remove-label "triage:pending" \
--remove-label "triage:needs-info" \
--remove-label "triage:tests-needed" \
--remove-label "triage:complete" \
--add-label "triage:in-progress"
```
Only one of those `triage:*` labels will actually be present; `--remove-label` errors when a label is missing, so run each removal as its own call (or batch and ignore errors) and then do the add. A PR carries exactly one `triage:<state>` label at a time; the skill replaces `triage:in-progress` with a terminal state before exit (see `reference/label-flow.md`).
### Also fetch (in parallel)
```bash
# cubic-dev-ai PR review comments (for check E)
gh api --paginate "repos/n8n-io/n8n/pulls/<number>/comments" \
--jq '.[] | select(.user.login == "cubic-dev-ai[bot]") | {body: .body, path: .path}'
# n8n-assistant issue comments (for the Linear ticket reference)
gh api --paginate "repos/n8n-io/n8n/issues/<number>/comments" \
--jq '[.[] | select(.user.login == "n8n-assistant[bot]" or .user.login == "n8n-assistant") | .body] | join("\n")'
```
## Step 2.5 — Auto-rejection screen
Per [`CONTRIBUTING.md`](../../../../../CONTRIBUTING.md), two PR patterns should be closed outright rather than reviewed:
- **Typo-only PR** — diff is entirely spelling/grammar fixes with no logic or tests.
- **New-node PR** — adds a brand-new node, unless the n8n team has explicitly agreed to take it.
If either matches, set `checks.AutoReject` and skip directly to action **D**. Full rules and how to verify each pattern: see `reference/checks.md`.
## Step 3 — Run the five checks
Run when `AutoReject` is `null`. Full rules for each in `reference/checks.md`:
- **A. CLA**`cla-signed` label present.
- **B. Title** — matches the conventional-commit regex. Authoritative rules in [`.github/pull_request_title_conventions.md`](../../../../../.github/pull_request_title_conventions.md).
- **C. Description** — every section heading and checklist item from [`.github/pull_request_template.md`](../../../../../.github/pull_request_template.md) is present in the PR body. The template is read at check time, so changes to it propagate automatically.
- **D. Tests** — source logic changes have matching test files. Skip for `docs/ci/chore/build` PRs.
- **E. cubic-dev-ai** — no unresolved comments (resolved = "Addressed in commit" marker).
## Step 4 — Identify the responsible team
Run `node .github/scripts/owners.mjs` against the changed file list and map the winning GitHub team to a Linear team. Full mapping table, sub-agent fallback procedure, and label rules: see `reference/teams.md`.
## Step 5 — Extract the Linear ticket
n8n-assistant leaves a comment on every community PR containing `This PR has been added to our internal tracker as "GHC-XXXX"`. Search the concatenated n8n-assistant comment body for `\bGHC-\d+\b`, take the first match.
If no n8n-assistant comment exists (older PRs that predate the automation), `linearTicket` is `null`.
## Step 6 — Output JSON
```json
{
"readyForReview": <true if all passing checks allow merge, false otherwise>,
"messageForUser": "<Short message to the contributor listing what they need to address. 'N/A' if ready.>",
"team": "<Linear team name (from reference/teams.md), or 'Engineering' as fallback>",
"linearTicket": "<GHC-XXXX or null>",
"checks": {
"AutoReject": <"typo-only" | "new-node" | null>,
"CLA": <bool>,
"Title": <bool>,
"Description": <bool>,
"TestsNeeded": <bool>,
"TestsIncluded": <bool>,
"CubicIssues": <true if unresolved cubic issues exist, false otherwise>
}
}
```
`readyForReview` is `true` only when: `AutoReject` is `null`; `CLA`, `Title`, and `Description` are all `true`; `CubicIssues` is `false`; and either `TestsNeeded` is `false` or `TestsIncluded` is `true`. If `AutoReject` is set, `readyForReview` is always `false`.
Emit the JSON first, then take the appropriate action path below.
## Step 7 — Action paths
Use `AskUserQuestion` for each prompt. Sub-agents called for analysis only should stop after step 6 and let the caller drive step 7.
### A — Minor title fix
A title issue is **minor** if it can be repaired by a deterministic transformation:
- Leading or trailing whitespace.
- First letter of the summary in the wrong case.
- Trailing period.
- Mixed case `revert:` requiring lowercase (no change needed, just flag).
If the *only* failing check is `Title` (or `Title` + `CubicIssues`) and the issue is minor, propose the fix and ask `Apply proposed / Edit before applying / Skip`. Apply with:
```bash
gh pr edit <number> --repo n8n-io/n8n --title "<new title>"
```
Then re-evaluate `Title` (now passes) and continue to **B** or **C**. Non-minor title problems (wrong/missing type, no colon, hyphenated scope) need contributor input — skip A and go to **C**.
### B — Triage to team (`readyForReview === true`)
Ask: *"PR is ready for review. Assign Linear ticket `<linearTicket>` to team `<team>` and move to <destination state>?"* Options: `Yes, assign and triage` / `No, leave as-is`.
Destination state: `Review` for NODES, `Triage` for every other team. Label composition: see `reference/teams.md`.
On `Yes`:
```python
# 1. Linear
mcp__linear-server__save_issue(
id=linearTicket,
team=<team>,
state=<destination>,
labels=<computed labels>,
)
# 2. GitHub (only if Linear succeeded) — see reference/label-flow.md
gh pr edit <number> --repo n8n-io/n8n \
--remove-label "triage:in-progress" \
--remove-label "status:pending-assignment" \
--add-label "team:<slug>" \
--add-label "status:team-assigned" \
--add-label "triage:complete"
```
If `linearTicket` is `null`, ask whether to create a new Linear ticket before triaging (older PRs predating n8n-assistant). Otherwise skip B and ask the user.
### C — Post contributor comment (`readyForReview === false`, no auto-reject)
Show `messageForUser` and ask `Post as-is / Edit before posting / Skip`. On post:
```bash
gh pr comment <number> --repo n8n-io/n8n --body "<final message>"
```
Then apply the right terminal triage label — exactly one, priority `triage:tests-needed` > `triage:needs-info`. See `reference/label-flow.md`. On `Skip`, leave the PR on `triage:in-progress` so the next loop picks it up.
Skip C entirely if A already handled the only failing check and the PR is now ready — run B instead.
### D — Close the PR
Used when the PR should be closed rather than reviewed. Three common triggers:
1. **Auto-rejection** (`AutoReject` set) — typo-only or unsanctioned new node.
2. **Duplicate** — another open PR addresses the same change.
3. **Out of scope / bundled** — multiple unrelated fixes that should be split, or scope n8n team has declined.
Ask `Close + comment / Edit before closing / Skip`. Templates below; pick one and adapt to the contributor and specifics.
**Typo-only:**
> Thanks for taking the time to send this in! Per our [contributing guide](../blob/master/CONTRIBUTING.md#community-pr-guidelines) we don't accept typo-only PRs — they create review overhead without changing functionality, and our spell-checker rules cover most cases automatically. Closing this for now; please feel free to open a PR that pairs a typo fix with a related logic change. 🙏
**New node:**
> Thanks for the contribution! n8n no longer accepts new nodes directly into the core monorepo unless the team has explicitly agreed to scope one in. Please publish this as a [community node](https://docs.n8n.io/integrations/creating-nodes/overview/) instead — that gives you full ownership and avoids the long review queue here. Closing this PR per our [contributing guide](../blob/master/CONTRIBUTING.md#community-pr-guidelines).
**Duplicate of another PR:**
> Thanks for the contribution! This change is already being handled in #<other-pr>, which is further along in review. Closing this in favour of that PR to keep the queue tidy — please feel free to chime in over there if there's anything missing.
**Bundled / out of scope:**
> Thanks for the contribution! Per our [contributing guide](../blob/master/CONTRIBUTING.md#community-pr-guidelines) we ask for one focused change per PR. This PR bundles <N> unrelated fixes — please reopen them as separate, focused PRs, each with the [template](../blob/master/.github/pull_request_template.md) filled in and a unit test that locks in the regression. Closing this one in the meantime. 🙏
Close action (same for every reason):
```bash
gh pr comment <number> --repo n8n-io/n8n --body "<final message>"
gh pr close <number> --repo n8n-io/n8n
gh pr edit <number> --repo n8n-io/n8n \
--remove-label "triage:in-progress" \
--remove-label "status:pending-assignment" \
--add-label "status:internal-closed" \
--add-label "triage:complete"
```
If `linearTicket` is set, also cancel it: `mcp__linear-server__save_issue(id=linearTicket, state="Canceled")`. If `gh pr close` reports the PR is already closed (contributor beat you to it), proceed with the comment, labels, and ticket cancellation anyway.
## Notes
- **Draft PRs** — report all findings but note the PR is a draft.
- **Already merged or closed** — say so and skip the checks (don't apply triage labels).
- **Re-reviewing a PR you've already commented on** — use the GitHub Timeline API to detect contributor activity since the last skill touch. See `reference/re-review.md`.
- **Label state machine** — single `triage:<state>` label at any time; transitions documented in `reference/label-flow.md`.

View File

@ -0,0 +1,140 @@
# Community PR Readiness Check — checks
The full ruleset for the auto-rejection screen and the five readiness checks. Loaded when the skill is mid-review.
## Contents
- Step 2.5 — Auto-rejection screening
- Typo-only PR
- New-node PR
- The five checks (run after auto-rejection screen clears)
- A. CLA signed
- B. PR title format
- C. PR description completeness
- D. Tests
- E. cubic-dev-ai issues
---
## Step 2.5 — Auto-rejection screening
Before running the five checks, screen for PRs that the project policy in [`CONTRIBUTING.md`](../../../../../../CONTRIBUTING.md) (section "Community PR Guidelines") says should be rejected outright. If a PR matches one of these patterns, skip the five checks and recommend a polite close instead.
### Typo-only PR
The diff consists **entirely** of:
- Spelling fixes in code comments, error messages, or user-visible description strings.
- No new tests, no logic changes, no behaviour changes, no config changes.
Per CONTRIBUTING.md: *"Typos are not sufficient justification for a PR and will be rejected."* Use `gh pr diff <number>` to verify the diff is purely text-edit. A PR that mixes a typo fix with a real logic fix is **not** typo-only — review it normally.
### New-node PR
The diff adds a new node entry: a new `*.node.ts` (and usually a sibling credentials file) under `packages/nodes-base/nodes/<NewNode>/` or `packages/@n8n/nodes-langchain/nodes/<NewNode>/`, with no corresponding node already existing in the codebase.
Per CONTRIBUTING.md: *"PRs that introduce new nodes will be auto-closed unless they are explicitly requested by the n8n team."* If the user calling the skill states the n8n team has agreed to take this node, proceed with the normal review; otherwise flag for close with a pointer to the [Community Nodes](https://docs.n8n.io/integrations/creating-nodes/overview/) flow.
### When a screen matches
Skip the five checks. Set `checks.AutoReject` in the JSON output to `"typo-only"` or `"new-node"` and proceed to action path 7D in SKILL.md (close the PR).
---
## The five checks
Run these in order when step 2.5 clears (`AutoReject` is `null`).
### A. CLA signed
Check the PR `labels` returned from `gh pr view`:
- `cla-signed` label present → ✅ signed
- `cla-signed` label missing → ❌ not signed
### B. PR title format
The authoritative title rules live in [`.github/pull_request_title_conventions.md`](../../../../../../.github/pull_request_title_conventions.md). Read that file at the start of the check — the allowed `type` list and scope rules come from there, not from this skill.
The matching regex below is a cached extraction of those rules. If the conventions file disagrees with the regex (a new type, a different scope syntax), trust the file and flag the divergence in your output.
For all types except `revert`:
```
^(feat|fix|perf|test|docs|refactor|build|ci|chore)(\([a-zA-Z0-9 ]+( Node)?\))?!?: [A-Z].+[^.]$
```
For `revert` titles, the summary is the original commit header (which starts with a lowercase type), so capitalization is not enforced:
```
^revert(\([a-zA-Z0-9 ]+( Node)?\))?!?: .+[^.]$
```
Quick recap of what the regex enforces (full detail in the conventions file):
- Type must be one of: `feat fix perf test docs refactor build ci chore revert`.
- Scope is optional, in parentheses; characters limited to `[a-zA-Z0-9 ]` — hyphens like `(nodes-base)` fail.
- Breaking changes: `!` before the colon.
- Summary: starts with capital letter (lowercase allowed for `revert:`), no trailing period.
- No Linear ticket IDs in the title (e.g. `N8N-1234`).
### C. PR description completeness
The PR template at [`.github/pull_request_template.md`](../../../../../../.github/pull_request_template.md) is the **source of truth** for what a complete PR description looks like. Read it at the start of the check — sections and checklist items shouldn't be hardcoded here.
Procedure:
1. Read the template file and extract:
- Every `## Heading` (the expected section list).
- Every `- [ ]` checkbox item under the `## Review / Merge checklist` heading.
2. For each `## Heading` from the template, check the PR body contains a heading with the same text. Treat headings case-insensitively but require the exact text — `## Related Issue` does **not** satisfy `## Related Linear tickets, Github issues, and Community forum posts`.
3. For the `## Summary` heading specifically, require non-empty content below it (the HTML comment placeholder doesn't count as content).
4. For the checklist, require every extracted checkbox item to appear in the PR body. Match by the item text (after stripping `- [ ]` / `- [x]`); the boxes can be checked or unchecked — community PRs commonly leave them unchecked, which is fine.
5. For the "Related tickets" section: if the heading is present, accept any of these as content: a URL (`http`), a GitHub closing keyword (`closes #N`, `fixes #N`, `resolves #N`), or empty (only the HTML comment). Only flag if the heading is missing entirely.
When the check fails, report **which** section or checkbox item is missing — that's what the contributor message includes.
### D. Tests
Skip this check entirely if the PR type (from the title) is `docs`, `ci`, `chore`, or `build`.
Otherwise:
1. Identify source files changed: non-test files under `packages/` from the `files` list.
2. If there are source file changes, read the diff via `gh pr diff`:
```bash
gh pr diff <number> --repo n8n-io/n8n
```
3. Use the diff to judge whether the changes introduce logic that warrants tests (new functions, bug fixes, behaviour changes, data transformations). Pure config changes, type-only changes, and trivial renames do not require tests.
4. Look for matching test files (`*.test.ts`, `*.spec.ts`, files inside `__tests__/`) among the changed files.
**Fallback only if needed**: if the diff alone is insufficient (e.g. you need to read the full surrounding function body to assess whether a refactor preserved behaviour), check out a temporary worktree:
```bash
git fetch origin pull/<number>/head:pr/<number>
git worktree add /tmp/pr-<number>-review pr/<number>
# … inspect files …
git worktree remove /tmp/pr-<number>-review --force
git branch -D pr/<number>
```
Always clean up the worktree afterwards. In sub-agent contexts, `git fetch` and `git worktree add` are often denied by the sandbox — that's fine, the `gh pr diff` path handles almost every PR on its own.
Report:
- ✅ Tests present, or change does not require tests
- ❌ Source logic changed but no test files found
### E. cubic-dev-ai issues
Fetch the PR review comments:
```bash
gh api --paginate "repos/n8n-io/n8n/pulls/<number>/comments" \
--jq '.[] | select(.user.login == "cubic-dev-ai[bot]") | {body: .body, path: .path}'
```
`cubic-dev-ai[bot]` leaves a comment for every issue it finds. An issue is **resolved** if its comment body contains an "addressed in commit" marker — typically `✅ Addressed in [<sha>]` or `Addressed in <sha>`. Resolved issues count as if they were never raised.
- No comments, every comment explicitly states no issues were found, or every issue is marked addressed → ✅
- One or more unresolved comments → ❌ report the unresolved count and priority breakdown (e.g. "3 unresolved issues: 1× P1, 1× P2, 1× P3")

View File

@ -0,0 +1,92 @@
# Community PR Readiness Check — triage label flow
A PR should carry **exactly one** `triage:<state>` label at any time. The skill drives the PR through this state machine.
## States
| State | Meaning |
|------------------------|------------------------------------------------------------------------|
| `triage:pending` | Auto-applied by n8n-assistant when the PR opens. Skill removes it. |
| `triage:in-progress` | Skill is actively reviewing. Set in step 2, replaced before exit. |
| `triage:complete` | PR has been triaged to a team (or closed). Terminal. |
| `triage:needs-info` | Comment posted; contributor needs to address something non-test. |
| `triage:tests-needed` | Comment posted; contributor needs to add tests (priority over info). |
## Transitions
### Skill entry (step 2)
Before transitioning, **check for an existing `triage:in-progress` label**. If present, another reviewer is mid-triage — bail out without touching labels, Linear, or comments. This prevents two parallel runs from double-processing the same PR.
Strip any existing `triage:*` state label before adding `triage:in-progress`, so the single-state invariant holds even when re-reviewing a PR that was previously sent back with `triage:needs-info` or `triage:tests-needed`:
```bash
gh pr edit <number> --repo n8n-io/n8n \
--remove-label "triage:pending" \
--remove-label "triage:needs-info" \
--remove-label "triage:tests-needed" \
--remove-label "triage:complete" \
--add-label "triage:in-progress"
```
Only one of those `triage:*` labels will actually be present; `--remove-label` errors when a label is missing, so run each removal as its own call (or batch and ignore errors) and then do the add.
### Skill exit — branch by outcome
#### Triaged to a team (action path 7B)
```bash
gh pr edit <number> --repo n8n-io/n8n \
--remove-label "triage:in-progress" \
--remove-label "status:pending-assignment" \
--add-label "team:<slug>" \
--add-label "status:team-assigned" \
--add-label "triage:complete"
```
The `team:<slug>` is taken from `reference/teams.md`. Apply only after the Linear `save_issue` call succeeds — if Linear fails, leave the PR on `triage:in-progress` for the next loop run. `status:pending-assignment` may not be present (e.g. older PRs that predate the convention) — if `--remove-label` errors, drop the flag and retry, or run it as a separate call.
#### Closed (action path 7D)
```bash
gh pr edit <number> --repo n8n-io/n8n \
--remove-label "triage:in-progress" \
--remove-label "status:pending-assignment" \
--add-label "status:internal-closed" \
--add-label "triage:complete"
```
Apply alongside `gh pr close <number>` and the close-comment template (see SKILL.md). If the PR is already closed (`gh pr close` reports "already closed"), still apply the labels and the Linear cancellation.
#### Comment posted, contributor must act (action path 7C)
Pick **one** terminal label using this priority:
| Condition | Label |
|------------------------------------------------------------------------|------------------------|
| `TestsNeeded === true` **and** `TestsIncluded === false` | `triage:tests-needed` |
| Any other failing check (`CLA`, `Title`, `Description`, `CubicIssues`) | `triage:needs-info` |
When both conditions hold, `triage:tests-needed` wins — it's the more specific signal and the comment already covers everything else.
```bash
gh pr edit <number> --repo n8n-io/n8n \
--remove-label "triage:in-progress" \
--add-label "triage:tests-needed" # or triage:needs-info
```
#### Skill aborted before posting
If the user picks `Skip` at the "post this comment?" prompt, leave the PR on `triage:in-progress` — don't apply a terminal label without an accompanying contributor message. The next loop run will pick it up.
## status:* labels
These are independent of the triage state and signal where the PR sits in the n8n internal flow:
| Label | Applied when |
|-----------------------------|----------------------------------------------------|
| `status:team-assigned` | PR triaged to a Linear team (path 7B). |
| `status:internal-closed` | PR closed via path 7D. |
| `status:pending-assignment` | Default before the skill runs (set elsewhere). |
The skill writes `status:team-assigned` (path 7B) or `status:internal-closed` (path 7D) and clears `status:pending-assignment` at the same time, so a triaged or closed PR drops out of any `status:pending-assignment` queue. It does not touch other `status:*` labels — reconciliation between team-assigned/internal-closed happens elsewhere if needed.

View File

@ -0,0 +1,55 @@
# Community PR Readiness Check — re-reviewing a PR you've already commented on
When looping over the GHC queue, skip PRs you reviewed before unless the contributor has actually done something. This avoids burning agent runs on PRs that are still waiting on the contributor.
## What does "the contributor did something" mean?
Any of:
- New commit pushed (`committed` event)
- Force-push / rebase (`head_ref_force_pushed`)
- Comment added (`commented`)
- Review submitted (`reviewed`)
Label changes, description edits by us, and skill-posted comments don't count.
## Use the GitHub Timeline API
The timeline endpoint returns every event on the PR with `actor.login` and a timestamp, so we can distinguish "contributor activity" from "skill activity" cleanly. This replaces an earlier heuristic that compared `committedDate` to the last skill comment — too narrow, since it missed contributor comments and force-pushes.
```bash
SKILL_USER=$(gh api user --jq .login)
gh api --paginate "repos/n8n-io/n8n/issues/<number>/timeline" \
-H "Accept: application/vnd.github+json"
```
## Procedure
1. Resolve the skill user's login (`$SKILL_USER` above). This is the GitHub handle the skill is running as.
2. Fetch the full timeline.
3. Find the **last skill-authored event** — any event where `actor.login == $SKILL_USER` (labels applied, comments posted, etc.). Take its timestamp as `lastSkillAt`. If no skill activity exists → first-time review, run the full check.
4. Scan for **contributor activity after `lastSkillAt`** — events where:
- `event` is one of `committed`, `head_ref_force_pushed`, `commented`, `reviewed`,
- AND, for `commented` / `reviewed` / `head_ref_force_pushed`, `actor.login != $SKILL_USER` (skill comments shouldn't trigger re-review of themselves),
- AND the event timestamp > `lastSkillAt`.
Timestamp field varies by event type:
| Event | Timestamp field |
|---------------------------|--------------------------|
| `committed` | `committer.date` |
| `head_ref_force_pushed` | `created_at` |
| `commented` | `created_at` |
| `reviewed` | `submitted_at` |
5. Decide:
- One or more qualifying events → run the full review (steps 27 in SKILL.md).
- None → skip the PR. Report "no contributor activity since previous review" and move on.
For `committed` events, there's no `actor.login` — the actor is the commit author email. Since the skill never pushes commits, treat every `committed` event as contributor activity unconditionally.
## Edge case: description-only edits
Body edits don't generate timeline events, so a contributor who only updates the PR description (e.g. fills in a missing template section) won't be picked up. That's a GitHub API limitation; the skill can't see body edits without diffing against a cached snapshot.
If the user explicitly mentions a PR they edited, just re-review it — that's faster than working around the API.

View File

@ -0,0 +1,104 @@
# Community PR Readiness Check — teams and labels
Owner resolution, the GitHub-team → Linear-team → label mapping, and the Linear ticket label rules used when triaging.
## Contents
- Identifying the owning team
- GitHub team → Linear team → GitHub label
- Linear ticket label rules
- Always-on label
- Type label (based on PR title prefix)
- Team-specific extra label
- Worked examples
---
## Identifying the owning team
Use the canonical owners script at `.github/scripts/owners.mjs`. It parses `.github/OWNERS` with last-match-wins semantics and returns allocations sorted by file count with a `share` percentage. Using the script keeps this skill consistent with whatever CI uses.
1. Write the PR's changed file paths (from the `files` list) to a temp file, one per line:
```bash
printf '%s\n' <path1> <path2> ... > /tmp/pr-<number>-files.txt
```
2. Run the script:
```bash
node .github/scripts/owners.mjs /tmp/pr-<number>-files.txt
```
3. The script prints JSON of the form:
```json
{
"totalFiles": 12,
"allocations": [
{ "team": "@n8n-io/ai", "fileCount": 10, "share": 83, "files": [...] },
{ "team": "@n8n-io/catalysts", "fileCount": 2, "share": 17, "files": [...] }
]
}
```
Allocations are already sorted by `fileCount` descending — take the first entry as the winning team.
4. Clean up: `rm /tmp/pr-<number>-files.txt`.
5. Strip the `@n8n-io/` prefix from `allocations[0].team` — the GitHub team slug is `nodes`, `iam`, `ai`, etc. If `allocations` is empty (no file matched any rule, which is possible only if `.github/OWNERS` lost its catch-all), fall back to `catalysts`.
6. Map the GitHub team slug to its Linear team name and PR label using the table below. The `team` field in the JSON output is the **Linear team name**. If the resolved GitHub team slug has no entry in the table, fall back to `Engineering`.
**Sub-agent fallback**: if `node` execution is denied by the sandbox, read `.github/OWNERS` directly and apply last-match-wins by hand. All the active rules fit on one screen.
## GitHub team → Linear team → GitHub label
| GitHub team (`@n8n-io/…`) | Linear team | GitHub team label |
|---------------------------|-------------------------|--------------------|
| `catalysts` | Catalysts | `team:cats` |
| `adore` | Adore | `team:adore` |
| `ai` | AI | `team:ai` |
| `nodes` | NODES | `team:nodes` |
| `design` | Design | `team:design` |
| `iam` | Identity & Access | `team:identity` |
| `ligo` | Lifecycle & Governance | `team:lifecycle` |
| `instance-ai` | instanceAI | `team:instance-ai` |
| `frontend` | Adore | `team:adore` |
| `qa-dx` | Developer Platform | `team:qa-dx` |
| `migrations-review` | Catalysts | `team:cats` |
The **GitHub team label** column is what gets applied to the PR after a successful Linear assignment (see `reference/label-flow.md`).
## Linear ticket label rules
When calling `mcp__linear-server__save_issue` to assign a ticket to a team, pass a `labels` array composed of three pieces:
### Always-on label
- `GitHub` — every community PR ticket carries this. In the Linear UI it renders as `source > GitHub`; the `source` prefix is a label-group parent, not part of the API name.
### Type label (from PR title prefix)
| PR title type | Linear label |
|----------------------------------------------------------------------------------------|----------------|
| `feat` | `feature` |
| `fix` | `bug` |
| anything else (`perf`, `refactor`, `docs`, `ci`, `chore`, `build`, `test`, `revert`) | `enhancement` |
Pass the **child** label name only — Linear silently drops unknown labels, so don't include `type > ` prefixes.
### Team-specific extra label
| Destination team | Extra label |
|------------------|----------------|
| Catalysts | `Community PR` |
| NODES | `community-pr` |
| other teams | _(no extra)_ |
### Worked examples
- `feat` PR going to Catalysts: `["GitHub", "feature", "Community PR"]`
- `fix` PR going to NODES: `["GitHub", "bug", "community-pr"]`
- `perf` PR going to AI: `["GitHub", "enhancement"]`
- `chore` PR going to Lifecycle & Governance: `["GitHub", "enhancement"]`
## Destination state
| Destination team | Linear state |
|------------------|--------------|
| NODES | `Review` |
| any other team | `Triage` |
NODES has a dedicated `Review` lane; every other team handles routing inside their own triage.

View File

@ -1,150 +0,0 @@
---
description: >-
Checks if a community pull request is ready for human review. Verifies CLA
signature, PR title format, description completeness, test coverage, and
cubic-dev-ai issues. Use when given a PR number or branch name to review,
or when the user says /community-pr-review, /pr-review, or asks to check if
a PR is ready for review.
allowed-tools: Bash(gh:*), Bash(git:*), Read, Glob, Grep
---
# Community PR Review
Given a PR number or branch name, determine whether it is ready for human review.
## Steps
### 1. Resolve the PR
If given a branch name, find the PR number first:
```bash
gh pr view <branch> --repo n8n-io/n8n --json number --jq .number
```
### 2. Fetch PR data
```bash
gh pr view <number> --repo n8n-io/n8n \
--json number,title,body,author,headRefName,headRefOid,files,isDraft,state
```
Fetch in parallel:
```bash
# CLA commit status (primary signal) — statuses are newest-first; use the first returned entry
gh api --paginate "repos/n8n-io/n8n/commits/<headRefOid>/statuses" \
--jq '[.[] | select(.context == "license/cla") | {state, description}] | first'
# CLAassistant issue comment (fallback when no commit status) — use the last returned entry
gh api --paginate "repos/n8n-io/n8n/issues/<number>/comments" \
--jq '[.[] | select(.user.login == "CLAassistant") | .body] | last'
# cubic-dev-ai PR review comments (streamed so results concatenate cleanly across pages)
gh api --paginate "repos/n8n-io/n8n/pulls/<number>/comments" \
--jq '.[] | select(.user.login == "cubic-dev-ai[bot]") | {body: .body, path: .path}'
```
### 3. Run the five checks
#### A. CLA signed
Check the `license/cla` commit status first; fall back to the CLAassistant comment if no status exists.
**Commit status** (`context == "license/cla"`):
- `state: "success"` → ✅ signed
- `state: "failure"` or `state: "error"` → ❌ not signed
- `state: "pending"` → ⏳ pending
- Not present → fall back to comment
**CLAassistant issue comment** (fallback):
- Body contains `"All committers have signed the CLA."` → ✅ signed
- Body contains `"not signed"` or a link to sign → ❌ not signed
- No comment → ❌ treat as not signed
#### B. PR title format
For all types except `revert`, the title must match:
```
^(feat|fix|perf|test|docs|refactor|build|ci|chore)(\([a-zA-Z0-9 ]+( Node)?\))?!?: [A-Z].+[^.]$
```
For `revert` titles, the summary is the original commit header (which starts with a lowercase type), so capitalization is not enforced:
```
^revert(\([a-zA-Z0-9 ]+( Node)?\))?!?: .+[^.]$
```
- Type must be one of: `feat fix perf test docs refactor build ci chore revert`
- Scope is optional, in parentheses e.g. `(editor)` or `(Slack Node)`
- Breaking changes: `!` before the colon
- Summary: starts with capital letter (lowercase allowed for `revert:`), no trailing period
- No Linear ticket IDs in the title (e.g. `N8N-1234`)
#### C. PR description completeness
1. **Summary** (`## Summary`) — must have non-empty content below the heading (not just the HTML comment).
2. **Related tickets** (`## Related Linear tickets, Github issues, and Community forum posts`) — acceptable content: a URL (`http`), a GitHub closing keyword (`closes #N`, `fixes #N`, `resolves #N`, etc.), or empty. Only flag if the section heading is missing entirely.
3. **Checklist** (`## Review / Merge checklist`) — all four items must be present. Unchecked checkboxes are expected for community PRs; do **not** flag them as missing.
#### D. Tests
Skip this check if the PR type (from the title) is `docs`, `ci`, `chore`, or `build`.
Otherwise:
1. Identify source files changed: non-test files under `packages/` from the `files` list.
2. If there are source file changes, check out the PR in a temporary worktree:
```bash
git fetch origin pull/<number>/head:pr/<number>
git worktree add /tmp/pr-<number>-review pr/<number>
```
3. Read the changed source files from the worktree to understand whether the changes introduce logic that warrants tests (new functions, bug fixes, behaviour changes, data transformations). Pure config changes, type-only changes, and trivial renames do not require tests.
4. Look for matching test files (`*.test.ts`, `*.spec.ts`, files inside `__tests__/`) among the changed files.
5. **Always clean up the worktree**, even if a previous check failed:
```bash
git worktree remove /tmp/pr-<number>-review --force
git branch -D pr/<number>
```
Report:
- ✅ Tests present, or change does not require tests
- ❌ Source logic changed but no test files found
#### E. cubic-dev-ai issues
Review the PR review comments fetched in step 2. `cubic-dev-ai[bot]` leaves comments for every issue it finds.
- No comments from `cubic-dev-ai[bot]`, or every comment explicitly states no issues were found → ✅
- Any other comment → ❌ report the total count and priority breakdown (e.g. "3 issues: 1× P1, 1× P2, 1× P3")
### 4. Output
Always output valid JSON in this exact shape:
```json
{
"readyForReview": <true if all passing checks allow merge, false otherwise>,
"messageForUser": "<Human-readable summary of what needs to change, written as if posted directly to the PR contributor. 'N/A' if nothing is needed.>",
"checks": {
"CLA": <true if signed, false if not signed or pending>,
"Title": <true if title matches convention, false otherwise>,
"Description": <true if all three template sections are complete, false otherwise>,
"TestsNeeded": <true if the code changes require tests, false if not applicable>,
"TestsIncluded": <true if test files are present in the PR, false otherwise>,
"CubicIssues": <true if cubic-dev-ai raised issues, false if no issues>
}
}
```
`readyForReview` is `true` only when: `CLA`, `Title`, and `Description` are all `true`; `CubicIssues` is `false`; and either `TestsNeeded` is `false` or `TestsIncluded` is `true`.
`messageForUser` should be a short, friendly message directed at the contributor listing exactly what they need to address. If `readyForReview` is `true`, set it to `"N/A"`.
Output nothing other than the JSON block.
## Notes
- Draft PRs — report all findings but note the PR is a draft.
- If the PR is already merged or closed, say so and skip the checks.
- Always remove the worktree even if earlier checks failed.

View File

@ -76,7 +76,8 @@ Creates GitHub PRs with titles that pass n8n's `check-pr-title` CI validation.
6. **Create PR** using gh CLI. Read `.github/pull_request_template.md` as the 6. **Create PR** using gh CLI. Read `.github/pull_request_template.md` as the
body structure, then populate each section with actual content before body structure, then populate each section with actual content before
creating the PR: creating the PR:
- **Summary**: describe what the PR does and how to test it - **Summary**: describe what the PR does
- **How to test**: describe how to test the changes. Include an example workflow if appropriate
- **Related tickets**: add the Linear ticket URL (`https://linear.app/n8n/issue/[TICKET-ID]`) and any GitHub issue links - **Related tickets**: add the Linear ticket URL (`https://linear.app/n8n/issue/[TICKET-ID]`) and any GitHub issue links
- **Checklist**: keep as-is from the template - **Checklist**: keep as-is from the template
- Add a "🤖 PR Summary generated by AI" at the end of the body - Add a "🤖 PR Summary generated by AI" at the end of the body
@ -94,9 +95,12 @@ Based on `.github/pull_request_template.md`:
### Summary Section ### Summary Section
- Describe what the PR does - Describe what the PR does
- Explain how to test the changes
- Include screenshots/videos for UI changes - Include screenshots/videos for UI changes
### How to test Section
- Explain how to test the changes
- Include an example workflow if appropriate
### Related Links Section ### Related Links Section
- Link to Linear ticket: `https://linear.app/n8n/issue/[TICKET-ID]` - Link to Linear ticket: `https://linear.app/n8n/issue/[TICKET-ID]`
- Link to GitHub issues using keywords to auto-close: - Link to GitHub issues using keywords to auto-close:

View File

@ -117,6 +117,7 @@ Run through this before requesting review. Each item is a real, recurring review
- [ ] **Sparse-unique columns:** use a partial index `WHERE col IS NOT NULL`. — [Index Management](#index-management) - [ ] **Sparse-unique columns:** use a partial index `WHERE col IS NOT NULL`. — [Index Management](#index-management)
- [ ] **Composite index column order** matches your actual `WHERE` / `ORDER BY` usage. — [Index Management](#index-management) - [ ] **Composite index column order** matches your actual `WHERE` / `ORDER BY` usage. — [Index Management](#index-management)
- [ ] **Entity ↔ migration parity**: column types, `notNull`, defaults, FKs, `@Index` decorators all match. — [Schema/Entity Drift](#schemaentity-drift) - [ ] **Entity ↔ migration parity**: column types, `notNull`, defaults, FKs, `@Index` decorators all match. — [Schema/Entity Drift](#schemaentity-drift)
- [ ] **If using `addColumns`, `dropColumns`, `addNotNull`, `dropNotNull`, `addEnumCheck`, or `dropEnumCheck`:** verified whether the target table has incoming FKs. If so, either set `withFKsDisabled = true as const` (in a `sqlite/` subclass if this is a `common/` migration) or use raw `ALTER TABLE ADD COLUMN` for nullable/defaulted columns. — [SQLite table recreation risk](#sqlite-table-recreation-risk)
- [ ] **No live-app value imports** in the migration body. Inline types/utility code locally. — [Never import entities as values](#never-import-entities-as-values) - [ ] **No live-app value imports** in the migration body. Inline types/utility code locally. — [Never import entities as values](#never-import-entities-as-values)
- [ ] **`async down()` was tested locally**: `pnpm start && pnpm start -- db:revert && pnpm start` on **both** SQLite and Postgres. — [Reversibility](#reversibility) - [ ] **`async down()` was tested locally**: `pnpm start && pnpm start -- db:revert && pnpm start` on **both** SQLite and Postgres. — [Reversibility](#reversibility)
- [ ] **One logical change per migration**; split unrelated table changes into separate files. — [Don't combine independent schema changes](#dont-combine-independent-schema-changes) - [ ] **One logical change per migration**; split unrelated table changes into separate files. — [Don't combine independent schema changes](#dont-combine-independent-schema-changes)
@ -218,7 +219,7 @@ export class MigrateThing1234567890000 implements IrreversibleMigration {
const { schemaBuilder: { addColumns, column, createIndex } } = ctx; const { schemaBuilder: { addColumns, column, createIndex } } = ctx;
// One-liner DSL calls stay inline — naming them adds no information. // One-liner DSL calls stay inline — naming them adds no information.
await addColumns('my_table', [column('slug').varchar(255)]); await addColumns('my_table', [column('slug').varchar(255)], { recreatesOnSqlite: true });
// The non-trivial step gets a named method. // The non-trivial step gets a named method.
await this.backfillSlugs(ctx); await this.backfillSlugs(ctx);
@ -350,6 +351,51 @@ export class CreateMyTable1234567890000 implements ReversibleMigration {
} }
``` ```
### SQLite table recreation risk
Six DSL methods trigger **full table recreation** on SQLite — TypeORM internally creates a temp copy, drops the original, and renames:
| Method | TypeORM internal call |
|---|---|
| `addColumns()` | `queryRunner.addColumns()` |
| `dropColumns()` | `queryRunner.dropColumns()` |
| `addNotNull()` | `queryRunner.changeColumn()` |
| `dropNotNull()` | `queryRunner.changeColumn()` |
| `addEnumCheck()` | `queryRunner.changeColumn()` |
| `dropEnumCheck()` | `queryRunner.changeColumn()` |
All six require a final options parameter with `recreatesOnSqlite: true` — TypeScript rejects calls that omit it.
**The danger:** If the target table has incoming FK constraints with `CASCADE` from other tables, the `DROP TABLE` during recreation fires cascading deletes and **wipes rows from those referencing tables**.
**Decision tree:**
1. Does the target table have incoming FK constraints from other tables?
- **No** → Safe to use the DSL method directly (with the ack parameter).
- **Yes** → Continue to step 2.
2. Is this an `addColumns` call where every new column is nullable or has a default?
- **Yes** → Use raw `ALTER TABLE ADD COLUMN` instead (avoids table recreation entirely):
```typescript
await runQuery(
`ALTER TABLE ${escape.tableName('my_table')} ADD COLUMN ${escape.columnName('col')} TEXT`,
);
```
See `1733133775640-AddMockedNodesColumnToTestDefinition.ts` for a real example.
- **No** → Continue to step 3.
3. Set `withFKsDisabled = true as const` on the migration class. For common migrations, create a SQLite subclass in `sqlite/` that extends the common migration and adds the flag:
```typescript
// sqlite/1234567890000-MyMigration.ts
import { MyMigration1234567890000 as BaseMigration } from '../common/1234567890000-MyMigration';
export class MyMigration1234567890000 extends BaseMigration {
withFKsDisabled = true as const;
}
```
**How `withFKsDisabled` works:** The migration wrapper calls `PRAGMA foreign_keys=OFF` before `up()`/`down()`, runs the migration inside a manual transaction, then re-enables foreign keys. This prevents CASCADE from firing during the internal table drop. It also sets `transaction = false` to avoid TypeORM's default transaction (since SQLite can't nest transactions with PRAGMA changes).
> **Note:** On Postgres, these methods use `ALTER TABLE` directly and don't recreate the table. The risk is SQLite-specific.
### Column types ### Column types
**Match column type to value semantics.** Never `varchar` as a catch-all for non-string values — storing numbers as strings loses sort order, range queries, and SUM/AVG aggregations. **Match column type to value semantics.** Never `varchar` as a catch-all for non-string values — storing numbers as strings loses sort order, range queries, and SUM/AVG aggregations.
@ -570,7 +616,11 @@ When a migration both adds a column and backfills data, structure it clearly wit
```typescript ```typescript
export class AddAndBackfillColumn1234567890000 implements IrreversibleMigration { export class AddAndBackfillColumn1234567890000 implements IrreversibleMigration {
async up(ctx: MigrationContext) { async up(ctx: MigrationContext) {
await ctx.schemaBuilder.addColumns('my_table', [ctx.schemaBuilder.column('newCol').text]); await ctx.schemaBuilder.addColumns(
'my_table',
[ctx.schemaBuilder.column('newCol').text],
{ recreatesOnSqlite: true },
);
await this.backfillNewCol(ctx); await this.backfillNewCol(ctx);
} }
@ -633,7 +683,7 @@ Some migrations override with `transaction = false as const` for big DDL on engi
- **Small differences** (a single statement, a CHECK constraint, slightly different syntax): keep one migration in `common/` and branch on `isSqlite` / `isPostgres`. - **Small differences** (a single statement, a CHECK constraint, slightly different syntax): keep one migration in `common/` and branch on `isSqlite` / `isPostgres`.
- **Large differences** (different table recreation strategies, different intermediate steps, fundamentally different SQL): write **separate files** in `postgresdb/` and `sqlite/`. A common migration full of `if (isSqlite) { ... }` blocks is harder to read and review than two focused files. - **Large differences** (different table recreation strategies, different intermediate steps, fundamentally different SQL): write **separate files** in `postgresdb/` and `sqlite/`. A common migration full of `if (isSqlite) { ... }` blocks is harder to read and review than two focused files.
If only Postgres needs the change, just put the file in `postgresdb/`; don't write a no-op SQLite migration with `if (isPostgres)`. SQLite no longer needs separate migrations for column adds (the recreate-table path was fixed) — verify before duplicating. If only Postgres needs the change, just put the file in `postgresdb/`; don't write a no-op SQLite migration with `if (isPostgres)`. For SQLite column adds, follow the [SQLite table recreation risk](#sqlite-table-recreation-risk) decision tree before deciding whether a common migration is enough or a SQLite subclass/raw `ALTER TABLE` path is needed.
### SQLite supports modern syntax ### SQLite supports modern syntax

View File

@ -0,0 +1,150 @@
---
description: Reviews a GitHub pull request like a thoughtful human reviewer and writes the feedback to a markdown file. Prioritizes bugs, behavioral regressions, security issues, and missing tests, ordered by severity. Use when given a PR URL to review, or when the user says /human-like-code-review.
allowed-tools: Bash(gh:*), Bash(git:*), Read, Glob, Grep
---
# Human-Like Code Review
Review a GitHub pull request with a code-review mindset and produce a copy/paste-friendly
markdown file of feedback. Findings are the primary focus: prioritize bugs, behavioral
regressions, security issues, and missing tests, ordered by severity. Do not make code
changes unless the user explicitly asks for them.
## Input
The user must provide a **GitHub pull request URL** (e.g. `https://github.com/n8n-io/n8n/pull/1234`).
If not provided, ask for it before proceeding.
Extract the PR number and repository from the URL and use the `gh` CLI to fetch the PR diff and metadata.
## Workflow
1. Parse the PR URL to get owner, repo, and PR number.
2. Fetch the PR diff: `gh pr diff <number> --repo <owner>/<repo>`
3. Fetch PR metadata: `gh pr view <number> --repo <owner>/<repo>`
4. Fetch existing review comments: `gh api repos/<owner>/<repo>/pulls/<number>/comments`
5. Review the diff thoroughly with a critical, code-review mindset.
6. Produce a new `.md` file named `review-<repo>-<number>.md` inside the repo's gitignored `tmp/` folder, so it is never committed (the `tmp` folder is listed in `.gitignore`). Create the folder if needed (`mkdir -p tmp`) and write to `tmp/review-<repo>-<number>.md`. Print the path to the file when done so the user can open it.
7. If a point was already raised in existing PR comments, check whether it's still valid - if resolved, confirm it's fixed; if still open, expand on it or add context instead of repeating it.
8. Before finishing, clean up any scratch files created during review. The only
file that should remain in `tmp/` from this skill run is the final
`tmp/review-<repo>-<number>.md` review file.
## Temporary file hygiene
Prefer reading `gh` output directly instead of writing extra files. If you need
scratch files for a complex review (for example, a saved diff or extracted file
contents), remove them before you finish. Do not leave `tmp/pr-*.diff`,
extracted source files, or empty temporary files behind.
## What to prioritize
Findings must be the primary focus, ordered by severity (most severe first):
1. **Bugs** - logic errors, off-by-one, null/undefined handling, incorrect conditions.
2. **Behavioral regressions** - changes that break or alter existing behavior.
3. **Security issues** - injection, auth/authorization gaps, unsafe input handling, secret exposure.
4. **Missing tests** - the actual change isn't covered, or edge cases are untested.
Style, naming, and minor nits come last, and only if they genuinely matter.
## Backward compatibility
Especially when nodes are changed, check that the change does not break backward
compatibility for existing users' workflows (renamed/removed parameters, changed
defaults, altered output shape, different behavior for the same input).
If there's a risk of broken backward compatibility, consider node versioning and
leave this inside the comments - point out the risk and suggest a new node version
(or a versioned default) rather than changing existing behavior in place.
## Output format
The markdown file must contain:
- A header with the PR title, URL, and date of review.
- A `## Hints for a reviewer` section (see below).
- A `## General` section (see below).
- A `## Comments` section with a list of review comments in this format:
`file name + line number + comment`
Comments should be easy to copy/paste. Do not quote comments using `>` - just write them directly.
It's totally okay to have no line comments. Do not force findings or point out
minor things just to have something to say. In those cases, prefer an empty
comments list and a short positive `## General` comment.
When a comment suggests something different, be precise about it. Either propose the actual code change (a short snippet or `suggestion` block the author can apply directly) or, if a full snippet isn't practical, state the concrete direction (which function/value/approach to use) rather than a vague hint. Avoid comments like "this could be cleaner" with no actionable next step.
### Hints for a reviewer
Right after the header, include a `## Hints for a reviewer` section to orient the
human reviewer before they read the diff:
- A short reason why the PR was created (the problem it solves or the goal).
- A few basic words explaining the solution, without overcomplication.
- If it's a community PR, mention it briefly. You can usually spot this from
`authorAssociation` or a fork-prefixed branch like `random-fork-owner:fix-node-option`.
Keep it to a couple of sentences. It's about saving the reviewer time, not a
detailed write-up.
### General summary comment
Before the line-by-line comments, include a `## General` section that can be
pasted as the review summary. Make it sound human and natural - it is okay to
start with something short and friendly like "Hey, nice job on this" when the
change deserves it. Then add any top-level, PR-wide feedback that doesn't belong
on a single line - e.g. design or architecture concerns, an implicit/type-unsafe
contract between files, repeated patterns, scope, or missing test coverage of
the actual change.
- Keep it short and conversational. Don't repeat or summarize the individual line comments here.
- The exception is a big design issue with the overall solution: when the whole approach is wrong or has a structural problem, explain the overall idea here rather than scattering it across individual comments.
- If there's genuinely nothing PR-wide to raise, write a short positive review
summary and move on - do not pad it.
## Line number rules
Line numbers MUST be the actual line numbers in the file on the PR branch (the new/right side of the diff), NOT the position within the diff hunk.
To get the correct line number: look at the `@@` hunk header (e.g. `@@ -19,10 +19,9 @@`). The `+19` means the new file starts at line 19. Count down from there for each line that is a context line (` `) or an added line (`+`). Skip removed lines (`-`) - they don't exist in the new file.
Example: if a hunk says `@@ -10,5 +10,6 @@` and you want to comment on the 3rd non-removed line in that hunk, the line number is 10 + 2 = 12.
Never guess line numbers. Always compute them from the hunk headers.
## Consistency validation
Before suggesting a change to a pattern (naming, structure, style), check whether the same pattern is used elsewhere in the codebase or in similar nodes/files. If it is an established convention, do NOT flag it. Only comment if something genuinely deviates from existing patterns.
## Formatting rules
- Never use long dashes or em-dashes. Use `-` instead.
- Keep comments as short as possible. One sentence is ideal.
- For line comments, avoid filler like "Nice work!", "Looks great!", or "Good job here." - keep them actionable or questioning.
## Tone
Write review comments naturally, like a friendly human reviewer.
Feel free to use phrases like:
- How about...
- I wonder if...
- WDYT?
You can also insert an emoji from time to time 🙂
Keep comments friendly, short, and collaborative. Avoid judgmental wording like "you made a mistake" or anything overly critical.
## Important
Do not make code changes unless the user explicitly asks for them. This skill produces a review, not a patch.
The very last sentence of your reply must be a clickable Markdown link to the
review file, so the user can open it from the agent chat immediately. Use this
format: `[tmp/review-<repo>-<number>.md](tmp/review-<repo>-<number>.md)`.
Nothing should come after the link.

File diff suppressed because it is too large Load Diff

View File

@ -11,5 +11,8 @@ RUN echo node ALL=\(root\) NOPASSWD:ALL > /etc/sudoers.d/node && chmod 0440 /etc
RUN mkdir /workspaces && chown node:node /workspaces RUN mkdir /workspaces && chown node:node /workspaces
RUN corepack enable RUN corepack enable
ENV PNPM_HOME=/home/node/.local/share/pnpm
ENV PATH=$PNPM_HOME/bin:$PATH
USER node USER node
RUN mkdir -p ~/.pnpm-store && pnpm config set store-dir ~/.pnpm-store --global RUN mkdir -p $PNPM_HOME ~/.pnpm-store && pnpm config set store-dir ~/.pnpm-store --global

View File

@ -9,7 +9,7 @@
"type=bind,source=${localEnv:HOME}/.n8n,target=/home/node/.n8n,consistency=cached" "type=bind,source=${localEnv:HOME}/.n8n,target=/home/node/.n8n,consistency=cached"
], ],
"forwardPorts": [8080, 5678], "forwardPorts": [8080, 5678],
"postCreateCommand": "corepack prepare --activate && pnpm install", "postCreateCommand": "corepack install && pnpm install",
"postAttachCommand": "pnpm build", "postAttachCommand": "pnpm build",
"customizations": { "customizations": {
"codespaces": { "codespaces": {

View File

@ -17,7 +17,7 @@ inputs:
build-command: build-command:
description: 'Command to execute for building the project or an optional command. Leave empty to skip build step.' description: 'Command to execute for building the project or an optional command. Leave empty to skip build step.'
required: false required: false
default: 'pnpm build' default: 'pnpm build:unchecked'
install-command: install-command:
description: 'Command to execute for installing project dependencies. Leave empty to skip install step.' description: 'Command to execute for installing project dependencies. Leave empty to skip install step.'
required: false required: false
@ -101,12 +101,12 @@ runs:
uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0
with: with:
path: ~/.safe-chain/bin path: ~/.safe-chain/bin
key: safe-chain-1.5.3-${{ runner.os }}-${{ runner.arch }} key: safe-chain-1.5.7-${{ runner.os }}-${{ runner.arch }}
- name: Install Aikido SafeChain - name: Install Aikido SafeChain
run: | run: |
VERSION="1.5.3" VERSION="1.5.7"
EXPECTED_SHA256="0107cbbbf90159379756157e902acae512d62ffbd174307e42c5fe9f266792d3" EXPECTED_SHA256="07ab512fd8795ce41b2275be369aced4c9a93cc7bca9b397951507891a955239"
node .github/scripts/retry.mjs --attempts 3 --delay 10 -- \ node .github/scripts/retry.mjs --attempts 3 --delay 10 -- \
curl -fsSL -o install-safe-chain.sh "https://github.com/AikidoSec/safe-chain/releases/download/${VERSION}/install-safe-chain.sh" curl -fsSL -o install-safe-chain.sh "https://github.com/AikidoSec/safe-chain/releases/download/${VERSION}/install-safe-chain.sh"
echo "${EXPECTED_SHA256} install-safe-chain.sh" | sha256sum -c - echo "${EXPECTED_SHA256} install-safe-chain.sh" | sha256sum -c -

View File

@ -1,10 +1,17 @@
## Summary ## Summary
<!-- <!--
Describe what the PR does and how to test. Describe what the PR does.
Photos and videos are recommended. Photos and videos are recommended.
--> -->
## How to test
<!--
Describe all steps needed to test the changes.
Include an example workflow if the changes affect Workflow builder, execution or a Node, that can be tested with a workflow.
-->
## Related Linear tickets, Github issues, and Community forum posts ## Related Linear tickets, Github issues, and Community forum posts
<!-- <!--

View File

@ -112,12 +112,14 @@ for (const file of files) {
} }
} }
const stderr = res.stderr ?? '';
if (!parsed) { if (!parsed) {
rows.push({ file, status: 'no-result' }); rows.push({ file, status: 'no-result', stderr });
continue; continue;
} }
rows.push({ file, status: 'ran', passed: parsed.passed, total: parsed.total }); rows.push({ file, status: 'ran', passed: parsed.passed, total: parsed.total, stderr });
} }
// --- Render markdown --- // --- Render markdown ---
@ -131,6 +133,28 @@ const renderRow = ({ file, status, passed, total }) => {
return `| \`${file}\` | ${fraction} ⚠️ flaky |`; return `| \`${file}\` | ${fraction} ⚠️ flaky |`;
}; };
const STDERR_EXCERPT_LINES = 20;
const renderDiagnostic = ({ file, stderr }) => {
const lines = stderr.split('\n');
const truncated = lines.length > STDERR_EXCERPT_LINES;
const excerpt = lines.slice(0, STDERR_EXCERPT_LINES).join('\n');
const trailer = truncated ? `\n... (${lines.length - STDERR_EXCERPT_LINES} more lines)` : '';
return [
`<details><summary><code>${file}</code> — first failure stderr</summary>`,
'',
'```',
excerpt + trailer,
'```',
'',
'</details>',
].join('\n');
};
const diagnostics = rows
.filter((r) => r.stderr && r.stderr.trim() && (r.status === 'no-result' || (r.status === 'ran' && r.passed < r.total)))
.map(renderDiagnostic);
const body = [ const body = [
'<!-- grind-results -->', '<!-- grind-results -->',
'## Grind results — pre-merge flake detection (N=' + n + ')', '## Grind results — pre-merge flake detection (N=' + n + ')',
@ -139,6 +163,7 @@ const body = [
'|---|---|', '|---|---|',
...rows.map(renderRow), ...rows.map(renderRow),
'', '',
...(diagnostics.length ? ['### First-failure diagnostics', '', ...diagnostics, ''] : []),
'_Spawn-per-iteration mode. Catches post-teardown async flakes that `vitest --repeat` misses. See [DEVP-198](https://linear.app/n8n/issue/DEVP-198) for design notes._', '_Spawn-per-iteration mode. Catches post-teardown async flakes that `vitest --repeat` misses. See [DEVP-198](https://linear.app/n8n/issue/DEVP-198) for design notes._',
'', '',
].join('\n'); ].join('\n');

View File

@ -1,7 +1,7 @@
{ {
"name": "workflow-scripts", "name": "workflow-scripts",
"scripts": { "scripts": {
"test": "node --test --experimental-test-module-mocks ./*.test.mjs ./quality/*.test.mjs ./slack/*.test.mjs ../../scripts/licenses/*.test.mjs", "test": "node --test --experimental-test-module-mocks ./*.test.mjs ./quality/*.test.mjs ./slack/*.test.mjs ./stale/*.test.mjs ../../scripts/licenses/*.test.mjs",
"generate-sbom": "FETCH_LICENSE=true cdxgen -t pnpm --no-install-deps --profile license-compliance -o ../../sbom-source.cdx.json ../../compiled/", "generate-sbom": "FETCH_LICENSE=true cdxgen -t pnpm --no-install-deps --profile license-compliance -o ../../sbom-source.cdx.json ../../compiled/",
"render-licenses-md": "node ../../scripts/licenses/render-licenses-md.mjs ../../sbom-source.cdx.json ../../packages/cli/THIRD_PARTY_LICENSES.md ../../compiled/node_modules", "render-licenses-md": "node ../../scripts/licenses/render-licenses-md.mjs ../../sbom-source.cdx.json ../../packages/cli/THIRD_PARTY_LICENSES.md ../../compiled/node_modules",
"generate-licenses": "pnpm generate-sbom && pnpm render-licenses-md" "generate-licenses": "pnpm generate-sbom && pnpm render-licenses-md"

View File

@ -0,0 +1,468 @@
#!/usr/bin/env node
/**
* Stale-branch cleanup for n8n-io/n8n.
*
* Decides, per branch, whether to KEEP or DELETE and prints the reasoning
* (age, ruleset protection, default branch). Dry-run is the DEFAULT nothing
* is deleted unless you pass `--execute`.
*
* How protection is decided:
* We fetch the repo's rulesets, keep the ones that (a) target branches,
* (b) are actively enforced, and (c) contain a `deletion` rule, then match
* each branch ref against their ref-name include/exclude glob patterns.
* This mirrors what the GitHub UI shows as "affected branches" for a ruleset.
* GitHub's server-side ruleset enforcement remains the hard backstop: even in
* --execute mode a delete of a protected branch is rejected unless the token
* is a bypass actor.
*
* Auth: uses GH_TOKEN / GITHUB_TOKEN if set (CI), otherwise falls back to
* `gh auth token` (local). Repo: GITHUB_REPOSITORY env, else `gh repo view`.
*
* Usage:
* node .github/scripts/stale/clean-stale-branches.mjs # dry run (default)
* node .github/scripts/stale/clean-stale-branches.mjs --days=120
* node .github/scripts/stale/clean-stale-branches.mjs --execute # actually delete
*
* Requires Node 18+ (global fetch).
*/
import { execFileSync } from 'node:child_process';
import { parseArgs } from 'node:util';
import { minimatch } from 'minimatch';
const API = 'https://api.github.com';
const DAY_MS = 86_400_000; // 24 hours in millis
// --- pure logic (exported for tests) ---------------------------------------
// GitHub ruleset ref globs use fnmatch semantics: `*` matches within a path
// segment, `**` matches across `/`, `?` matches a single char. minimatch (the
// same glob matcher used by our other workflow scripts) implements exactly
// these rules — `dot` so leading-dot segments aren't skipped, `noext` to keep
// GitHub's plain fnmatch behaviour (no `+(...)` extglobs).
const GLOB_OPTIONS = { dot: true, noext: true };
export function refMatches(ref, pattern, defaultBranch) {
if (pattern === '~ALL') {
return true;
}
if (pattern === '~DEFAULT_BRANCH') {
return ref === `refs/heads/${defaultBranch}`;
}
return minimatch(ref, pattern, GLOB_OPTIONS);
}
// Returns the first keep-pattern that matches the branch name, else null.
// These are operator-supplied globs matched against the bare branch name
// (e.g. `release/*`, `1.x`) — an explicit safety net independent of rulesets,
// so stale cleanup stays safe even if a protecting ruleset is ever removed.
export function matchingExcludePattern(branchName, excludePatterns) {
return excludePatterns.find((pattern) => minimatch(branchName, pattern, GLOB_OPTIONS)) ?? null;
}
// Returns the protecting ruleset's name if `ref` is protected from deletion, else null.
export function protectingRuleset(ref, rulesets, defaultBranch) {
for (const rs of rulesets) {
const included = rs.include.some((p) => refMatches(ref, p, defaultBranch));
if (!included) {
continue;
}
const excluded = rs.exclude.some((p) => refMatches(ref, p, defaultBranch));
if (excluded) {
continue;
}
return rs.name;
}
return null;
}
/**
* Classify branches into keep/delete buckets with a reason for each.
* Pure: no network, no clock `now` is injected so tests are deterministic.
*
* @param {{
* branches: Array<{ name: string, committedDate: string | null }>,
* rulesets: Array<{ name: string, include: string[], exclude: string[] }>,
* defaultBranch: string,
* staleDays: number,
* now: number,
* openPrRefs?: Map<string, number[]>,
* excludePatterns?: string[],
* }} input
* @returns {{ keep: Array<{name:string,ageDays:number|null,reason:string}>, remove: Array<{name:string,ageDays:number|null,reason:string}> }}
*/
export function classifyBranches({
branches,
rulesets,
defaultBranch,
staleDays,
now,
openPrRefs = new Map(),
excludePatterns = [],
}) {
const keep = [];
const remove = [];
for (const branch of branches) {
const ref = `refs/heads/${branch.name}`;
const ageDays =
branch.committedDate === null
? null
: Math.floor((now - new Date(branch.committedDate).getTime()) / DAY_MS);
const rulesetName = protectingRuleset(ref, rulesets, defaultBranch);
if (rulesetName) {
keep.push({ name: branch.name, ageDays, reason: `protected: ruleset "${rulesetName}" (deletion)` });
continue;
}
const excludePattern = matchingExcludePattern(branch.name, excludePatterns);
if (excludePattern) {
keep.push({ name: branch.name, ageDays, reason: `excluded: matches keep-pattern "${excludePattern}"` });
continue;
}
if (branch.name === defaultBranch) {
keep.push({ name: branch.name, ageDays, reason: 'protected: default branch' });
continue;
}
const prNumbers = openPrRefs.get(branch.name);
if (prNumbers && prNumbers.length > 0) {
const refs = prNumbers
.slice()
.sort((a, b) => a - b)
.map((n) => `#${n}`)
.join(', ');
keep.push({ name: branch.name, ageDays, reason: `open PR ${refs} (head or base)` });
continue;
}
if (ageDays === null) {
keep.push({ name: branch.name, ageDays, reason: 'kept: unknown last-commit date' });
continue;
}
if (ageDays < staleDays) {
keep.push({ name: branch.name, ageDays, reason: `active: last commit ${ageDays}d ago (< ${staleDays}d)` });
continue;
}
remove.push({
name: branch.name,
ageDays,
reason: `stale: last commit ${ageDays}d ago (>= ${staleDays}d), no deletion-protection ruleset`,
});
}
keep.sort((a, b) => a.name.localeCompare(b.name));
remove.sort((a, b) => (b.ageDays ?? 0) - (a.ageDays ?? 0)); // oldest first
return { keep, remove };
}
// --- environment / auth -----------------------------------------------------
function resolveToken() {
if (process.env.GH_TOKEN) return process.env.GH_TOKEN.trim();
if (process.env.GITHUB_TOKEN) return process.env.GITHUB_TOKEN.trim();
try {
return execFileSync('gh', ['auth', 'token'], { encoding: 'utf8' }).trim();
} catch {
throw new Error('No token found. Set GH_TOKEN/GITHUB_TOKEN or run `gh auth login`.');
}
}
function resolveRepo() {
if (process.env.GITHUB_REPOSITORY) {
const [owner, repo] = process.env.GITHUB_REPOSITORY.split('/');
return { owner, repo };
}
try {
const slug = execFileSync('gh', ['repo', 'view', '--json', 'nameWithOwner', '-q', '.nameWithOwner'], {
encoding: 'utf8',
}).trim();
const [owner, repo] = slug.split('/');
return { owner, repo };
} catch {
throw new Error('Cannot resolve repo. Set GITHUB_REPOSITORY or run inside a gh-authenticated repo.');
}
}
function resolveDryRun(values) {
// Dry run is the default. --execute (or DRY_RUN=false) opts into real deletes.
let dryRun = true;
if (values.execute) {
dryRun = false;
}
if (process.env.DRY_RUN === 'false' || process.env.DRY_RUN === '0') {
dryRun = false;
}
if (process.env.DRY_RUN === 'true' || process.env.DRY_RUN === '1') {
dryRun = true;
}
if (values['dry-run']) {
dryRun = true; // explicit override always wins toward safety
}
return dryRun;
}
// Glob patterns of branch names to always keep. Sourced from repeated
// `--exclude` flags and/or the comma-separated EXCLUDE_BRANCHES env var; both
// accept comma-separated lists. Blank entries are dropped.
function resolveExcludePatterns(values) {
const raw = [...(values.exclude ?? []), process.env.EXCLUDE_BRANCHES ?? ''];
return raw
.flatMap((entry) => entry.split(','))
.map((pattern) => pattern.trim())
.filter(Boolean);
}
// --- data fetching ----------------------------------------------------------
async function rest(ctx, path) {
const res = await fetch(`${API}${path}`, { headers: ctx.headers });
if (!res.ok) {
throw new Error(`GET ${path} -> ${res.status} ${await res.text()}`);
}
return res.json();
}
async function graphql(ctx, query, variables) {
const res = await fetch(`${API}/graphql`, {
method: 'POST',
headers: { ...ctx.headers, 'Content-Type': 'application/json' },
body: JSON.stringify({ query, variables }),
});
const json = await res.json();
if (json.errors) {
throw new Error(`GraphQL error: ${JSON.stringify(json.errors)}`);
}
return json.data;
}
async function fetchDeletionRulesets(ctx) {
const list = await rest(ctx, `/repos/${ctx.owner}/${ctx.repo}/rulesets?per_page=100`);
const detailed = [];
for (const summary of list) {
const rs = await rest(ctx, `/repos/${ctx.owner}/${ctx.repo}/rulesets/${summary.id}`);
if (rs.target !== 'branch') {
continue;
}
if (rs.enforcement !== 'active') {
continue;
}
const hasDeletionRule = (rs.rules ?? []).some((r) => r.type === 'deletion');
if (!hasDeletionRule) {
continue;
}
detailed.push({
name: rs.name,
include: rs.conditions?.ref_name?.include ?? [],
exclude: rs.conditions?.ref_name?.exclude ?? [],
});
}
return detailed;
}
async function fetchBranches(ctx) {
console.log('Fetching branches...');
const branches = [];
let cursor = null;
const query = `
query($owner:String!,$repo:String!,$cursor:String){
repository(owner:$owner,name:$repo){
defaultBranchRef{ name }
refs(refPrefix:"refs/heads/",first:100,after:$cursor){
pageInfo{ hasNextPage endCursor }
nodes{
name
target{ ... on Commit { committedDate } }
}
}
}
}`;
let defaultBranch = '';
do {
const data = await graphql(ctx, query, { owner: ctx.owner, repo: ctx.repo, cursor });
const repository = data.repository;
defaultBranch = repository.defaultBranchRef?.name ?? '';
for (const node of repository.refs.nodes) {
branches.push({
name: node.name,
committedDate: node.target?.committedDate ?? null,
});
}
cursor = repository.refs.pageInfo.hasNextPage ? repository.refs.pageInfo.endCursor : null;
} while (cursor);
return { branches, defaultBranch };
}
// Map of branch name -> open PR numbers that reference it as head or base.
// A branch in this map must never be deleted: deleting a PR's head closes the
// PR, and deleting a base orphans/closes PRs targeting it. Fork PR head refs
// live in the fork, so only count head refs from same-repo PRs; base refs are
// always in this repo.
async function fetchOpenPrRefs(ctx) {
console.log('Fetching open pull requests...');
const refs = new Map();
const sameRepo = `${ctx.owner}/${ctx.repo}`.toLowerCase();
const addRef = (ref, prNumber) => {
if (!ref) {
return;
}
if (!refs.has(ref)) {
refs.set(ref, []);
}
const arr = refs.get(ref);
if (!arr.includes(prNumber)) {
arr.push(prNumber);
}
};
let page = 1;
for (;;) {
const prs = await rest(ctx, `/repos/${ctx.owner}/${ctx.repo}/pulls?state=open&per_page=100&page=${page}`);
for (const pr of prs) {
if (pr.head?.repo?.full_name?.toLowerCase() === sameRepo) {
addRef(pr.head?.ref, pr.number);
}
addRef(pr.base?.ref, pr.number);
}
if (prs.length < 100) {
break;
}
page++;
}
return refs;
}
async function deleteBranch(ctx, name) {
const res = await fetch(`${API}/repos/${ctx.owner}/${ctx.repo}/git/refs/heads/${encodeURIComponent(name)}`, {
method: 'DELETE',
headers: ctx.headers,
});
return res;
}
// --- reporting --------------------------------------------------------------
function printReport({ owner, repo, defaultBranch, staleDays, dryRun, rulesets, excludePatterns, openPrCount, keep, remove }) {
console.log(`Repository: ${owner}/${repo}`);
console.log(`Default branch: ${defaultBranch}`);
console.log(`Stale threshold: ${staleDays} days`);
console.log(`Mode: ${dryRun ? 'DRY RUN (no deletions)' : 'EXECUTE (will delete)'}`);
console.log(`Deletion rulesets: ${rulesets.length ? rulesets.map((r) => `"${r.name}"`).join(', ') : '(none)'}`);
console.log(
`Keep patterns: ${excludePatterns?.length ? excludePatterns.map((p) => `"${p}"`).join(', ') : '(none)'}`,
);
console.log(`Open-PR refs kept: ${openPrCount ?? 0}`);
console.log('');
console.log(`Branches to keep (${keep.length}):`);
for (const b of keep) console.log(` KEEP ${b.name}${b.reason}`);
console.log('');
console.log(`Branches to delete (${remove.length}):`);
for (const b of remove) console.log(` DELETE ${b.name}${b.reason}`);
console.log('');
}
// --- main -------------------------------------------------------------------
async function main() {
const { values } = parseArgs({
options: {
execute: { type: 'boolean', default: false },
'dry-run': { type: 'boolean' },
days: { type: 'string' },
exclude: { type: 'string', multiple: true },
help: { type: 'boolean', default: false },
},
});
if (values.help) {
console.log(
'Usage: clean-stale-branches.mjs [--days=N] [--exclude=GLOB]... [--execute]\n' +
' --days=N Days of inactivity before a branch is stale (default 100, or STALE_DAYS env)\n' +
' --exclude=GLOB Branch-name glob to always keep; repeatable or comma-separated\n' +
' (e.g. --exclude="release/*,1.x", or EXCLUDE_BRANCHES env)\n' +
' --execute Actually delete stale branches (default is dry run)\n' +
'Auth via GH_TOKEN/GITHUB_TOKEN or `gh auth token`. Repo via GITHUB_REPOSITORY or `gh repo view`.',
);
return;
}
const dryRun = resolveDryRun(values);
const excludePatterns = resolveExcludePatterns(values);
const staleDays = Number(values.days ?? process.env.STALE_DAYS ?? 100);
if (!Number.isFinite(staleDays) || staleDays <= 0) {
throw new Error(`Invalid --days value: ${values.days ?? process.env.STALE_DAYS}`);
}
const token = resolveToken();
const { owner, repo } = resolveRepo();
const ctx = {
owner,
repo,
headers: {
Authorization: `Bearer ${token}`,
Accept: 'application/vnd.github+json',
'X-GitHub-Api-Version': '2022-11-28',
'User-Agent': 'n8n-stale-branches',
},
};
const rulesets = await fetchDeletionRulesets(ctx);
const { branches, defaultBranch } = await fetchBranches(ctx);
const openPrRefs = await fetchOpenPrRefs(ctx);
const { keep, remove } = classifyBranches({
branches,
rulesets,
defaultBranch,
staleDays,
now: Date.now(),
openPrRefs,
excludePatterns,
});
printReport({
owner,
repo,
defaultBranch,
staleDays,
dryRun,
rulesets,
excludePatterns,
openPrCount: openPrRefs.size,
keep,
remove,
});
if (dryRun) {
console.log(`Dry run complete. ${remove.length} branch(es) would be deleted. Re-run with --execute to delete.`);
return;
}
let deleted = 0;
let blocked = 0;
for (const b of remove) {
const res = await deleteBranch(ctx, b.name);
if (res.status === 204) {
deleted++;
console.log(` deleted ${b.name}`);
} else {
blocked++;
const body = await res.text();
console.log(` SKIPPED ${b.name}${res.status} ${body}`);
}
}
console.log('');
console.log(`Done. Deleted ${deleted}, skipped/blocked ${blocked}.`);
}
// only run when executed directly, not when imported by tests
if (import.meta.url === `file://${process.argv[1]}`) {
await main();
}

View File

@ -0,0 +1,285 @@
import { describe, it } from 'node:test';
import assert from 'node:assert/strict';
import { refMatches, matchingExcludePattern, protectingRuleset, classifyBranches } from './clean-stale-branches.mjs';
/**
* Run these tests with:
*
* node --test ./.github/scripts/stale/clean-stale-branches.test.mjs
* */
const DAY_MS = 86_400_000;
// Mirror of the repo's real deletion-protecting rulesets.
const RULESETS = [
{ name: 'master', include: ['refs/heads/master', '~DEFAULT_BRANCH'], exclude: [] },
{ name: '1.x Branch ruleset', include: ['refs/heads/1.x'], exclude: [] },
{ name: 'Release branch Ruleset', include: ['refs/heads/release/*'], exclude: [] },
{ name: 'Release Candidate branch Ruleset', include: ['refs/heads/release-candidate/*'], exclude: [] },
];
describe('refMatches', () => {
it('matches the ~ALL wildcard against any ref', () => {
assert.equal(refMatches('refs/heads/anything', '~ALL', 'master'), true);
});
it('resolves ~DEFAULT_BRANCH against the provided default branch', () => {
assert.equal(refMatches('refs/heads/master', '~DEFAULT_BRANCH', 'master'), true);
assert.equal(refMatches('refs/heads/develop', '~DEFAULT_BRANCH', 'master'), false);
});
it('matches a literal pattern exactly', () => {
assert.equal(refMatches('refs/heads/master', 'refs/heads/master', 'master'), true);
assert.equal(refMatches('refs/heads/master-2', 'refs/heads/master', 'master'), false);
assert.equal(refMatches('refs/heads/feature/master', 'refs/heads/master', 'master'), false);
});
it('treats * as within a single path segment', () => {
assert.equal(refMatches('refs/heads/release/1.50.1', 'refs/heads/release/*', 'master'), true);
// * does not cross a slash
assert.equal(refMatches('refs/heads/release/team/1.50.1', 'refs/heads/release/*', 'master'), false);
assert.equal(refMatches('refs/heads/release', 'refs/heads/release/*', 'master'), false);
assert.equal(refMatches('refs/heads/feature/x', 'refs/heads/release/*', 'master'), false);
});
it('treats ** as crossing path segments', () => {
assert.equal(refMatches('refs/heads/release/team/1.50.1', 'refs/heads/release/**', 'master'), true);
assert.equal(refMatches('refs/heads/release/1.50.1', 'refs/heads/release/**', 'master'), true);
});
it('treats metacharacters in the literal parts as literal', () => {
assert.equal(refMatches('refs/heads/1.x', 'refs/heads/1.x', 'master'), true);
// the dot must be literal, not "any char"
assert.equal(refMatches('refs/heads/1ax', 'refs/heads/1.x', 'master'), false);
});
});
describe('matchingExcludePattern', () => {
it('matches the bare branch name against keep-patterns', () => {
assert.equal(matchingExcludePattern('release/1.50.1', ['release/*']), 'release/*');
assert.equal(matchingExcludePattern('1.x', ['1.x']), '1.x');
});
it('returns the first matching pattern', () => {
assert.equal(matchingExcludePattern('release/1.50.1', ['1.x', 'release/*']), 'release/*');
});
it('returns null when nothing matches or the list is empty', () => {
assert.equal(matchingExcludePattern('some-feature', ['release/*', '1.x']), null);
assert.equal(matchingExcludePattern('some-feature', []), null);
});
it('supports ** crossing path segments', () => {
assert.equal(matchingExcludePattern('dependabot/npm/lodash', ['dependabot/**']), 'dependabot/**');
assert.equal(matchingExcludePattern('dependabot/npm/lodash', ['dependabot/*']), null);
});
});
describe('protectingRuleset', () => {
it('returns the ruleset name when an include pattern matches', () => {
assert.equal(protectingRuleset('refs/heads/release/1.50.1', RULESETS, 'master'), 'Release branch Ruleset');
assert.equal(protectingRuleset('refs/heads/1.x', RULESETS, 'master'), '1.x Branch ruleset');
assert.equal(protectingRuleset('refs/heads/master', RULESETS, 'master'), 'master');
});
it('returns null when no ruleset matches', () => {
assert.equal(protectingRuleset('refs/heads/some-old-feature', RULESETS, 'master'), null);
});
it('honors exclude patterns over include patterns', () => {
const rulesets = [{ name: 'releases', include: ['refs/heads/release/*'], exclude: ['refs/heads/release/0.*'] }];
assert.equal(protectingRuleset('refs/heads/release/1.50.1', rulesets, 'master'), 'releases');
assert.equal(protectingRuleset('refs/heads/release/0.236.1', rulesets, 'master'), null);
});
});
describe('classifyBranches', () => {
// Fixed clock so age math is deterministic.
const now = new Date('2026-06-04T00:00:00Z').getTime();
const daysAgo = (n) => new Date(now - n * DAY_MS).toISOString();
it('keeps ruleset-protected branches regardless of age', () => {
const { keep, remove } = classifyBranches({
branches: [{ name: 'release/1.50.1', committedDate: daysAgo(800) }],
rulesets: RULESETS,
defaultBranch: 'master',
staleDays: 100,
now,
});
assert.equal(remove.length, 0);
assert.equal(keep.length, 1);
assert.match(keep[0].reason, /protected: ruleset "Release branch Ruleset"/);
});
it('keeps the default branch even if no ruleset matched it', () => {
const { keep, remove } = classifyBranches({
branches: [{ name: 'develop', committedDate: daysAgo(900) }],
rulesets: [],
defaultBranch: 'develop',
staleDays: 100,
now,
});
assert.equal(remove.length, 0);
assert.equal(keep[0].reason, 'protected: default branch');
});
it('keeps branches newer than the stale threshold', () => {
const { keep, remove } = classifyBranches({
branches: [{ name: 'recent-feature', committedDate: daysAgo(10) }],
rulesets: RULESETS,
defaultBranch: 'master',
staleDays: 100,
now,
});
assert.equal(remove.length, 0);
assert.equal(keep[0].reason, 'active: last commit 10d ago (< 100d)');
});
it('deletes unprotected branches older than the threshold', () => {
const { keep, remove } = classifyBranches({
branches: [{ name: 'old-feature', committedDate: daysAgo(412) }],
rulesets: RULESETS,
defaultBranch: 'master',
staleDays: 100,
now,
});
assert.equal(keep.length, 0);
assert.equal(remove.length, 1);
assert.equal(remove[0].name, 'old-feature');
assert.equal(remove[0].ageDays, 412);
assert.match(remove[0].reason, /stale: last commit 412d ago \(>= 100d\)/);
});
it('keeps branches with an unknown last-commit date', () => {
const { keep, remove } = classifyBranches({
branches: [{ name: 'weird-branch', committedDate: null }],
rulesets: RULESETS,
defaultBranch: 'master',
staleDays: 100,
now,
});
assert.equal(remove.length, 0);
assert.equal(keep[0].reason, 'kept: unknown last-commit date');
});
it('sorts deletions oldest-first and keeps alphabetically', () => {
const { keep, remove } = classifyBranches({
branches: [
{ name: 'stale-newer', committedDate: daysAgo(150) },
{ name: 'stale-older', committedDate: daysAgo(500) },
{ name: 'zeta-active', committedDate: daysAgo(1) },
{ name: 'alpha-active', committedDate: daysAgo(2) },
],
rulesets: RULESETS,
defaultBranch: 'master',
staleDays: 100,
now,
});
assert.deepEqual(
remove.map((b) => b.name),
['stale-older', 'stale-newer'],
);
assert.deepEqual(
keep.map((b) => b.name),
['alpha-active', 'zeta-active'],
);
});
it('treats a branch exactly at the threshold as stale', () => {
const { keep, remove } = classifyBranches({
branches: [{ name: 'edge', committedDate: daysAgo(100) }],
rulesets: [],
defaultBranch: 'master',
staleDays: 100,
now,
});
assert.equal(keep.length, 0);
assert.equal(remove.length, 1);
});
it('keeps a stale branch that is the head or base of an open PR', () => {
const { keep, remove } = classifyBranches({
branches: [{ name: 'old-but-has-pr', committedDate: daysAgo(500) }],
rulesets: [],
defaultBranch: 'master',
staleDays: 100,
now,
openPrRefs: new Map([['old-but-has-pr', [24878]]]),
});
assert.equal(remove.length, 0);
assert.equal(keep[0].reason, 'open PR #24878 (head or base)');
});
it('lists multiple referencing PRs sorted ascending', () => {
const { keep } = classifyBranches({
branches: [{ name: 'shared-base', committedDate: daysAgo(500) }],
rulesets: [],
defaultBranch: 'master',
staleDays: 100,
now,
openPrRefs: new Map([['shared-base', [300, 42, 100]]]),
});
assert.equal(keep[0].reason, 'open PR #42, #100, #300 (head or base)');
});
it('still deletes stale branches with no open PR reference', () => {
const { remove } = classifyBranches({
branches: [{ name: 'no-pr', committedDate: daysAgo(500) }],
rulesets: [],
defaultBranch: 'master',
staleDays: 100,
now,
openPrRefs: new Map([['some-other-branch', [1]]]),
});
assert.equal(remove.length, 1);
assert.equal(remove[0].name, 'no-pr');
});
it('defaults openPrRefs to empty when omitted', () => {
const { remove } = classifyBranches({
branches: [{ name: 'lonely', committedDate: daysAgo(500) }],
rulesets: [],
defaultBranch: 'master',
staleDays: 100,
now,
});
assert.equal(remove.length, 1);
});
it('keeps a stale branch matching an exclude pattern even with no protecting ruleset', () => {
const { keep, remove } = classifyBranches({
branches: [{ name: 'release/1.50.1', committedDate: daysAgo(800) }],
rulesets: [],
defaultBranch: 'master',
staleDays: 100,
now,
excludePatterns: ['release/*'],
});
assert.equal(remove.length, 0);
assert.equal(keep[0].reason, 'excluded: matches keep-pattern "release/*"');
});
it('still deletes stale branches that match no exclude pattern', () => {
const { remove } = classifyBranches({
branches: [{ name: 'old-feature', committedDate: daysAgo(500) }],
rulesets: [],
defaultBranch: 'master',
staleDays: 100,
now,
excludePatterns: ['release/*', '1.x'],
});
assert.equal(remove.length, 1);
assert.equal(remove[0].name, 'old-feature');
});
it('defaults excludePatterns to empty when omitted', () => {
const { remove } = classifyBranches({
branches: [{ name: 'lonely', committedDate: daysAgo(500) }],
rulesets: [],
defaultBranch: 'master',
staleDays: 100,
now,
});
assert.equal(remove.length, 1);
});
});

File diff suppressed because one or more lines are too long

View File

@ -12,6 +12,14 @@ on:
description: 'Sandbox provider (n8n-sandbox or daytona)' description: 'Sandbox provider (n8n-sandbox or daytona)'
required: false required: false
default: 'n8n-sandbox' default: 'n8n-sandbox'
iterations:
description: 'Iterations per test case (use 10 for a baseline)'
required: false
default: '3'
experiment-name:
description: 'LangSmith experiment name (set to instance-ai-baseline to refresh the baseline)'
required: false
default: ''
concurrency: concurrency:
group: instance-ai-evals-${{ github.ref }} group: instance-ai-evals-${{ github.ref }}
@ -25,4 +33,6 @@ jobs:
with: with:
branch: ${{ inputs.branch }} branch: ${{ inputs.branch }}
sandbox-provider: ${{ inputs.sandbox-provider }} sandbox-provider: ${{ inputs.sandbox-provider }}
iterations: ${{ inputs.iterations }}
experiment-name: ${{ inputs.experiment-name }}
secrets: inherit secrets: inherit

View File

@ -4,7 +4,6 @@ on:
push: push:
branches: branches:
- master - master
- 1.x
paths-ignore: paths-ignore:
- packages/@n8n/task-runner-python/** - packages/@n8n/task-runner-python/**
@ -21,6 +20,9 @@ jobs:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Setup and Build - name: Setup and Build
uses: ./.github/actions/setup-nodejs uses: ./.github/actions/setup-nodejs
with:
# The default is build without typechecking, we want both
build-command: pnpm build
unit-test: unit-test:
name: Unit tests name: Unit tests

View File

@ -129,7 +129,7 @@ jobs:
if: fromJSON(steps.ci-filter.outputs.results).ci || fromJSON(steps.ci-filter.outputs.results).e2e if: fromJSON(steps.ci-filter.outputs.results).ci || fromJSON(steps.ci-filter.outputs.results).e2e
uses: ./.github/actions/setup-nodejs uses: ./.github/actions/setup-nodejs
with: with:
build-command: ${{ fromJSON(steps.ci-filter.outputs.results).ci && 'pnpm build' || 'pnpm turbo run build --filter=@n8n/playwright-janitor' }} build-command: ${{ fromJSON(steps.ci-filter.outputs.results).ci && 'pnpm build:unchecked' || 'pnpm turbo run build --filter=@n8n/playwright-janitor' }}
- name: Run format check - name: Run format check
if: fromJSON(steps.ci-filter.outputs.results).ci if: fromJSON(steps.ci-filter.outputs.results).ci
@ -340,6 +340,7 @@ jobs:
needs.install-and-build.result == 'success' && needs.install-and-build.result == 'success' &&
needs.install-and-build.outputs.instance_ai_workflow_eval == 'true' && needs.install-and-build.outputs.instance_ai_workflow_eval == 'true' &&
github.repository == 'n8n-io/n8n' && github.repository == 'n8n-io/n8n' &&
github.event_name != 'merge_group' &&
(github.event_name != 'pull_request' || !github.event.pull_request.head.repo.fork) (github.event_name != 'pull_request' || !github.event.pull_request.head.repo.fork)
uses: ./.github/workflows/test-evals-discovery.yml uses: ./.github/workflows/test-evals-discovery.yml
with: with:

View File

@ -0,0 +1,45 @@
name: 'Util: Clean Stale Branches'
on:
schedule:
- cron: "0 10 * * 1" # Every monday
workflow_dispatch:
inputs:
dry-run:
description: 'Dry run only lists branches; uncheck to actually delete.'
type: boolean
default: true
days:
description: 'Days of inactivity before a branch is considered stale.'
type: string
default: '100'
exclude:
description: 'Comma-separated branch-name globs to always keep (e.g. release/*,1.x).'
type: string
default: ''
permissions:
contents: write # delete branches; ruleset reads use the token's implicit metadata access
pull-requests: read # list open PRs so their head/base branches are never deleted
jobs:
remove-stale-branches:
name: Remove stale branches
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Setup Node.js
uses: ./.github/actions/setup-nodejs
with:
build-command: ''
install-command: pnpm install --frozen-lockfile --dir ./.github/scripts --ignore-workspace
cache-dependency-path: .github/scripts/pnpm-lock.yaml
- name: Clean stale branches
env:
GITHUB_TOKEN: ${{ github.token }}
STALE_DAYS: ${{ inputs.days || '100' }}
EXCLUDE_BRANCHES: ${{ inputs.exclude || '' }}
run: node .github/scripts/stale/clean-stale-branches.mjs --days="$STALE_DAYS" ${{ !inputs.dry-run && '--execute' || '--dry-run' }}

View File

@ -12,7 +12,14 @@ concurrency:
jobs: jobs:
grind: grind:
name: Grind changed editor-ui tests name: Grind changed editor-ui tests
runs-on: ${{ vars.RUNNER_PROVIDER == 'github' && 'ubuntu-latest' || 'blacksmith-2vcpu-ubuntu-2204' }} # Fork PRs get a read-only GITHUB_TOKEN, so posting the sticky comment
# 403s regardless of the `permissions` block below. Switching to
# `pull_request_target` would grant a write token but is unsafe here
# because grind executes the PR's changed test files. Skip forks; if we
# want fork coverage later, do it via a separate privileged
# `workflow_run` workflow that picks up an artifact from this one.
if: ${{ !github.event.pull_request.head.repo.fork }}
runs-on: ${{ vars.RUNNER_PROVIDER == 'github' && 'ubuntu-latest' || 'blacksmith-4vcpu-ubuntu-2204' }}
# Non-blocking: a failure here surfaces in the PR Checks tab as a soft # Non-blocking: a failure here surfaces in the PR Checks tab as a soft
# signal but never gates merge. Remove `continue-on-error` once we have # signal but never gates merge. Remove `continue-on-error` once we have
# confidence the grind has a low false-positive rate. # confidence the grind has a low false-positive rate.
@ -32,8 +39,6 @@ jobs:
- name: Setup Node.js - name: Setup Node.js
uses: ./.github/actions/setup-nodejs uses: ./.github/actions/setup-nodejs
with:
build-command: ''
- name: Install .github/scripts dependencies - name: Install .github/scripts dependencies
run: pnpm install --frozen-lockfile --dir ./.github/scripts --ignore-workspace run: pnpm install --frozen-lockfile --dir ./.github/scripts --ignore-workspace

View File

@ -14,6 +14,10 @@ on:
- baseline # score files with no result yet (the `new` bucket) - baseline # score files with no result yet (the `new` bucket)
- coverage # revisit the weakest scored files (`red`/`stale`, lowest first) - coverage # revisit the weakest scored files (`red`/`stale`, lowest first)
default: both default: both
source_file:
description: 'Optional: re-score this exact repo-relative file, skipping the picker (e.g. packages/workflow/src/common/get-node-by-name.ts). Used by the lane:mutation-increase close handler to refresh the ledger.'
type: string
default: ''
permissions: permissions:
contents: read contents: read
@ -36,6 +40,7 @@ jobs:
id: build id: build
env: env:
REQUESTED_MODE: ${{ github.event.inputs.mode || 'both' }} REQUESTED_MODE: ${{ github.event.inputs.mode || 'both' }}
SOURCE_FILE: ${{ github.event.inputs.source_file || '' }}
# vitest packages only. A package with its own stryker.config.mjs overrides # vitest packages only. A package with its own stryker.config.mjs overrides
# the shared default (scripts/mutation-health/stryker.default.mjs) — n8n-workflow # the shared default (scripts/mutation-health/stryker.default.mjs) — n8n-workflow
# does this to run the legacy expression engine and dodge the isolated-vm # does this to run the legacy expression engine and dodge the isolated-vm
@ -48,9 +53,17 @@ jobs:
{ name: "@n8n/crdt", dir: "packages/@n8n/crdt", slug: "crdt" }, { name: "@n8n/crdt", dir: "packages/@n8n/crdt", slug: "crdt" },
{ name: "@n8n/decorators", dir: "packages/@n8n/decorators", slug: "decorators" }, { name: "@n8n/decorators", dir: "packages/@n8n/decorators", slug: "decorators" },
]; ];
const req = process.env.REQUESTED_MODE; const sourceFile = (process.env.SOURCE_FILE || "").trim();
const modes = req === "both" ? ["baseline", "coverage"] : [req]; let include;
const include = packages.flatMap((p) => modes.map((mode) => ({ ...p, mode }))); if (sourceFile) {
const pkg = packages.find((p) => sourceFile.startsWith(p.dir + "/"));
if (!pkg) throw new Error("No mutation-tracked package owns " + sourceFile);
include = [{ ...pkg, mode: "file", source_file: sourceFile }];
} else {
const req = process.env.REQUESTED_MODE;
const modes = req === "both" ? ["baseline", "coverage"] : [req];
include = packages.flatMap((p) => modes.map((mode) => ({ ...p, mode })));
}
console.log("matrix=" + JSON.stringify({ include })); console.log("matrix=" + JSON.stringify({ include }));
' >> "$GITHUB_OUTPUT" ' >> "$GITHUB_OUTPUT"
@ -72,6 +85,7 @@ jobs:
PKG_DIR: ${{ matrix.dir }} PKG_DIR: ${{ matrix.dir }}
MODE: ${{ matrix.mode }} MODE: ${{ matrix.mode }}
REPORTS_DIR: ${{ matrix.dir }}/reports/mutation REPORTS_DIR: ${{ matrix.dir }}/reports/mutation
SOURCE_FILE: ${{ matrix.source_file || '' }}
steps: steps:
- name: Checkout - name: Checkout
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
@ -90,6 +104,12 @@ jobs:
- name: Pick next source file - name: Pick next source file
id: pick id: pick
run: | run: |
if [ -n "$SOURCE_FILE" ]; then
echo "::notice::On-demand re-score of $SOURCE_FILE (picker skipped)."
echo "skip=false" >> "$GITHUB_OUTPUT"
echo "source-rel=${SOURCE_FILE#"$PKG_DIR"/}" >> "$GITHUB_OUTPUT"
exit 0
fi
picked_json=$(node scripts/mutation-health/pick-next.mjs \ picked_json=$(node scripts/mutation-health/pick-next.mjs \
--package-dir "$PKG_DIR" \ --package-dir "$PKG_DIR" \
--ledger-file "$REPORTS_DIR/live-ledger.json" \ --ledger-file "$REPORTS_DIR/live-ledger.json" \

View File

@ -167,7 +167,8 @@ jobs:
name: Update latest and next in the docs name: Update latest and next in the docs
runs-on: ubuntu-latest runs-on: ubuntu-latest
needs: [validate-inputs, release-to-npm, release-to-docker-hub] needs: [validate-inputs, release-to-npm, release-to-docker-hub]
timeout-minutes: 2
environment: release environment: release
steps: steps:
- continue-on-error: true - continue-on-error: true
run: curl -u docsWorkflows:${{ secrets.N8N_WEBHOOK_DOCS_PASSWORD }} --request GET 'https://internal.users.n8n.cloud/webhook/update-latest-next' run: curl --connect-timeout 10 --max-time 30 -u docsWorkflows:${{ secrets.N8N_WEBHOOK_DOCS_PASSWORD }} --request GET 'https://internal.users.n8n.cloud/webhook/update-latest-next'

View File

@ -26,7 +26,7 @@ jobs:
migration-cmd: pnpm test:sqlite:migrations migration-cmd: pnpm test:sqlite:migrations
collectCoverage: 'false' collectCoverage: 'false'
- name: Postgres 16 - name: Postgres 16
runner: blacksmith-4vcpu-ubuntu-2204 runner: blacksmith-8vcpu-ubuntu-2204
test-cmd: pnpm test:postgres:integration:tc test-cmd: pnpm test:postgres:integration:tc
migration-cmd: pnpm test:postgres:migrations:tc migration-cmd: pnpm test:postgres:migrations:tc
TEST_IMAGE_POSTGRES: 'postgres:16' TEST_IMAGE_POSTGRES: 'postgres:16'
@ -50,6 +50,7 @@ jobs:
run: ${{ matrix.test-cmd }} run: ${{ matrix.test-cmd }}
- name: Run Migration Tests - name: Run Migration Tests
if: matrix.migration-cmd != ''
working-directory: packages/cli working-directory: packages/cli
run: ${{ matrix.migration-cmd }} run: ${{ matrix.migration-cmd }}

View File

@ -1,9 +1,13 @@
name: 'Test: E2E Coverage Weekly' name: 'Test: E2E Coverage Nightly'
on: on:
schedule: schedule:
- cron: '0 2 * * 1' # Every Monday at 2 AM - cron: '0 2 * * *' # Nightly at 02:00 UTC
workflow_dispatch: # Allow manual triggering workflow_dispatch:
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
jobs: jobs:
prepare-docker: prepare-docker:
@ -14,19 +18,46 @@ jobs:
runner: blacksmith-8vcpu-ubuntu-2204 runner: blacksmith-8vcpu-ubuntu-2204
secrets: inherit secrets: inherit
# Distribute the full e2e suite across shards by historical duration
# (janitor orchestrator) instead of Playwright's count-based --shard=N/total.
# Count-based sharding stacked the heavy + flaky specs onto one shard, which
# then blew past the timeout; duration-weighting keeps every shard even.
generate-matrix:
name: Generate shard matrix
runs-on: blacksmith-2vcpu-ubuntu-2204
outputs:
matrix: ${{ steps.gen.outputs.matrix }}
steps:
- name: Checkout
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Setup and Build janitor
uses: ./.github/actions/setup-nodejs
with:
build-command: pnpm turbo run build --filter=@n8n/playwright-janitor
- name: Generate matrix (8 shards, duration-weighted)
id: gen
run: |
MATRIX=$(node packages/testing/playwright/scripts/distribute-tests.mjs --matrix 8 --orchestrate)
echo "matrix=$MATRIX" >> "$GITHUB_OUTPUT"
e2e: e2e:
name: E2E (coverage) name: E2E (coverage)
needs: prepare-docker needs: [prepare-docker, generate-matrix]
uses: ./.github/workflows/test-e2e-reusable.yml uses: ./.github/workflows/test-e2e-reusable.yml
with: with:
test-mode: docker-artifact test-mode: docker-artifact
test-command: pnpm --filter=n8n-playwright test:container:coverage # Runs the coverage project + resolves this shard's V8 to lcov/per-spec
# (keeps test-e2e-reusable coverage-agnostic). See run-coverage-shard.mjs.
test-command: pnpm --filter=n8n-playwright coverage:shard
workers: '1' workers: '1'
runner: blacksmith-4vcpu-ubuntu-2204 runner: blacksmith-4vcpu-ubuntu-2204
timeout-minutes: 55 timeout-minutes: 55
pre-generated-matrix: '[{"shard":1,"images":""},{"shard":2,"images":""},{"shard":3,"images":""},{"shard":4,"images":""},{"shard":5,"images":""},{"shard":6,"images":""}]' pre-generated-matrix: ${{ needs.generate-matrix.outputs.matrix }}
artifact-prefix: coverage artifact-prefix: coverage
build-variant: coverage build-variant: coverage
currents-project-id: 'LRxcNt'
secrets: inherit secrets: inherit
aggregate: aggregate:
@ -47,18 +78,15 @@ jobs:
pattern: coverage-shard-* pattern: coverage-shard-*
path: /tmp/shards/ path: /tmp/shards/
- name: Collect coverage JSON - name: Build janitor (tested merge-coverage)
shell: bash run: pnpm turbo run build --filter=@n8n/playwright-janitor
run: |
mkdir -p packages/testing/playwright/.nyc_output/coverage
found=$(find /tmp/shards -path '*/.nyc_output/coverage/*.json' 2>/dev/null | wc -l)
echo "Found $found coverage JSON files across shards"
find /tmp/shards -path '*/.nyc_output/coverage/*.json' \
-exec cp {} packages/testing/playwright/.nyc_output/coverage/ \;
ls -la packages/testing/playwright/.nyc_output/coverage/ || true
- name: Generate Coverage Report # Report (shard lcovs → unified lcov) + impact map (per-spec lcovs →
run: pnpm --filter n8n-playwright coverage:report # spec-keyed map), both via the property-tested janitor merge-coverage.
# Logic lives in the tested aggregate-coverage.mjs, not inline bash.
- name: Aggregate coverage + build impact map
working-directory: packages/testing/playwright
run: node scripts/aggregate-coverage.mjs --shards=/tmp/shards --out=coverage
- name: Upload Coverage Report Artifact - name: Upload Coverage Report Artifact
if: always() if: always()

View File

@ -26,7 +26,10 @@ jobs:
shards: '[{"shard":1,"images":""}]' shards: '[{"shard":1,"images":""}]'
- lane: webhook - lane: webhook
grep: '@bench:webhook' grep: '@bench:webhook'
shards: '[{"shard":1,"images":""}]' # 2 shards — webhook lane grew from 6 to 9 spec files with the
# dedicated-proc topology (DEVP-200). Single shard pushed wall time
# past 40 min; two shards halves that. Bump to 3 if specs grow further.
shards: '[{"shard":1,"images":""},{"shard":2,"images":""}]'
- lane: kafka - lane: kafka
grep: '@bench:kafka' grep: '@bench:kafka'
shards: '[{"shard":1,"images":""},{"shard":2,"images":""}]' shards: '[{"shard":1,"images":""},{"shard":2,"images":""}]'
@ -39,4 +42,5 @@ jobs:
runner: blacksmith-8vcpu-ubuntu-2204 runner: blacksmith-8vcpu-ubuntu-2204
timeout-minutes: 120 timeout-minutes: 120
artifact-prefix: 'benchmark-${{ matrix.lane }}' artifact-prefix: 'benchmark-${{ matrix.lane }}'
currents-project-id: 'LRxcNt'
secrets: inherit secrets: inherit

View File

@ -35,7 +35,7 @@ on:
currents-project-id: currents-project-id:
description: 'Currents project ID for reporting' description: 'Currents project ID for reporting'
required: false required: false
default: 'LRxcNt' default: 'nHHLA5'
type: string type: string
pre-generated-matrix: pre-generated-matrix:
description: 'Pre-generated shard matrix JSON. Defaults to 1 shard; multi-shard callers pass their own.' description: 'Pre-generated shard matrix JSON. Defaults to 1 shard; multi-shard callers pass their own.'
@ -61,8 +61,10 @@ on:
env: env:
NODE_OPTIONS: ${{ contains(inputs.runner, '2vcpu') && '--max-old-space-size=6144' || '' }} NODE_OPTIONS: ${{ contains(inputs.runner, '2vcpu') && '--max-old-space-size=6144' || '' }}
PLAYWRIGHT_WORKERS: ${{ inputs.workers != '' && inputs.workers || '2' }} PLAYWRIGHT_WORKERS: ${{ inputs.workers != '' && inputs.workers || '2' }}
# Browser cache location - must match install-browsers script # Browser cache location - must match install-browsers script.
PLAYWRIGHT_BROWSERS_PATH: packages/testing/playwright/.playwright-browsers # Absolute so nested pnpm invocations (e.g. coverage shard runner) don't
# re-resolve a relative path against the package's INIT_CWD and double it.
PLAYWRIGHT_BROWSERS_PATH: ${{ github.workspace }}/packages/testing/playwright/.playwright-browsers
# docker-artifact loads the image locally as n8nio/n8n:local; unused in local mode. # docker-artifact loads the image locally as n8nio/n8n:local; unused in local mode.
TEST_IMAGE_N8N: 'n8nio/n8n:local' TEST_IMAGE_N8N: 'n8nio/n8n:local'
N8N_SKIP_LICENSES: 'true' N8N_SKIP_LICENSES: 'true'
@ -148,12 +150,10 @@ jobs:
path: | path: |
packages/testing/playwright/test-results/ packages/testing/playwright/test-results/
packages/testing/playwright/playwright-report/ packages/testing/playwright/playwright-report/
packages/testing/playwright/.nyc_output/ packages/testing/playwright/coverage/lcov.info
packages/testing/playwright/coverage/by-spec/
retention-days: 1 retention-days: 1
if-no-files-found: ignore if-no-files-found: ignore
# upload-artifact@v7 defaults this to false, which silently drops
# `.nyc_output/` (dotfile) — needed for the weekly coverage workflow.
include-hidden-files: true
- name: Cancel Currents run if workflow is cancelled - name: Cancel Currents run if workflow is cancelled
if: ${{ cancelled() }} if: ${{ cancelled() }}

View File

@ -30,6 +30,7 @@ jobs:
pre-generated-matrix: '[{"shard":1},{"shard":2},{"shard":3},{"shard":4},{"shard":5},{"shard":6},{"shard":7},{"shard":8},{"shard":9},{"shard":10},{"shard":11},{"shard":12},{"shard":13},{"shard":14},{"shard":15},{"shard":16}]' pre-generated-matrix: '[{"shard":1},{"shard":2},{"shard":3},{"shard":4},{"shard":5},{"shard":6},{"shard":7},{"shard":8},{"shard":9},{"shard":10},{"shard":11},{"shard":12},{"shard":13},{"shard":14},{"shard":15},{"shard":16}]'
n8n-env: '{"N8N_EXPRESSION_ENGINE":"vm"}' n8n-env: '{"N8N_EXPRESSION_ENGINE":"vm"}'
artifact-prefix: vm-expressions artifact-prefix: vm-expressions
currents-project-id: 'LRxcNt'
secrets: inherit secrets: inherit
notify-on-failure: notify-on-failure:

View File

@ -36,8 +36,10 @@ on:
jobs: jobs:
run-discovery-evals: run-discovery-evals:
name: 'Run Discovery Evals' name: 'Run Discovery Evals'
runs-on: blacksmith-2vcpu-ubuntu-2204 runs-on: blacksmith-4vcpu-ubuntu-2204
timeout-minutes: 15 timeout-minutes: 15
env:
NODE_OPTIONS: '--max-old-space-size=7168'
permissions: permissions:
contents: read contents: read
pull-requests: write pull-requests: write
@ -51,8 +53,6 @@ jobs:
- name: Setup Environment - name: Setup Environment
uses: ./.github/actions/setup-nodejs uses: ./.github/actions/setup-nodejs
with:
build-command: 'pnpm build'
- name: Export Node Types - name: Export Node Types
run: | run: |

View File

@ -18,6 +18,16 @@ on:
required: false required: false
type: string type: string
default: 'n8n-sandbox' default: 'n8n-sandbox'
iterations:
description: 'Iterations per test case'
required: false
type: string
default: '3'
experiment-name:
description: 'LangSmith experiment name (instance-ai-baseline refreshes the baseline)'
required: false
type: string
default: ''
workflow_dispatch: workflow_dispatch:
inputs: inputs:
branch: branch:
@ -32,17 +42,23 @@ on:
description: 'Sandbox provider (n8n-sandbox or daytona)' description: 'Sandbox provider (n8n-sandbox or daytona)'
required: false required: false
default: 'n8n-sandbox' default: 'n8n-sandbox'
iterations:
description: 'Iterations per test case (use 10 for a baseline)'
required: false
default: '3'
experiment-name:
description: 'LangSmith experiment name (instance-ai-baseline refreshes the baseline)'
required: false
default: ''
jobs: jobs:
run-evals: run-evals:
name: 'Run Evals' name: 'Run Evals'
runs-on: blacksmith-4vcpu-ubuntu-2204 runs-on: blacksmith-4vcpu-ubuntu-2204
timeout-minutes: 45 timeout-minutes: 90
env: env:
# Each port hosts an independent n8n container. The eval CLI's # Each port hosts an independent n8n container. The eval CLI's
# work-stealing allocator dispatches builds across them, capped per-lane. # work-stealing allocator dispatches builds across them, capped per-lane.
# 11 lanes on 4vcpu — builds are LLM-bound so CPU headroom is sufficient;
# bump back to 8vcpu if contention shows up.
LANE_PORTS: '5678,5679,5680,5681,5682,5683,5684,5685,5686,5687,5688' LANE_PORTS: '5678,5679,5680,5681,5682,5683,5684,5685,5686,5687,5688'
permissions: permissions:
contents: read contents: read
@ -207,6 +223,8 @@ jobs:
LANGSMITH_REVISION_ID: ${{ github.sha }} LANGSMITH_REVISION_ID: ${{ github.sha }}
LANGSMITH_BRANCH: ${{ github.event.pull_request.head.ref || github.head_ref || github.ref_name }} LANGSMITH_BRANCH: ${{ github.event.pull_request.head.ref || github.head_ref || github.ref_name }}
FILTER: ${{ inputs.filter }} FILTER: ${{ inputs.filter }}
ITERATIONS: ${{ inputs.iterations }}
EXPERIMENT_NAME: ${{ inputs.experiment-name }}
run: | run: |
IFS=',' read -ra PORTS <<< "$LANE_PORTS" IFS=',' read -ra PORTS <<< "$LANE_PORTS"
URLS=() URLS=()
@ -214,20 +232,10 @@ jobs:
URLS+=("http://localhost:$port") URLS+=("http://localhost:$port")
done done
BASE_URLS=$(IFS=,; printf '%s' "${URLS[*]}") BASE_URLS=$(IFS=,; printf '%s' "${URLS[*]}")
if [ -n "$FILTER" ]; then ARGS=(--base-url "$BASE_URLS" --concurrency 32 --verbose --iterations "${ITERATIONS:-3}")
pnpm eval:instance-ai \ [ -n "$FILTER" ] && ARGS+=(--filter "$FILTER")
--base-url "$BASE_URLS" \ [ -n "$EXPERIMENT_NAME" ] && ARGS+=(--experiment-name "$EXPERIMENT_NAME")
--concurrency 32 \ pnpm eval:instance-ai "${ARGS[@]}"
--verbose \
--iterations 3 \
--filter "$FILTER"
else
pnpm eval:instance-ai \
--base-url "$BASE_URLS" \
--concurrency 32 \
--verbose \
--iterations 3
fi
# Captures sandbox/builder diagnostic signals that surface during the # Captures sandbox/builder diagnostic signals that surface during the
# eval (after migrations finish). Two layers of secret-leak defense: # eval (after migrations finish). Two layers of secret-leak defense:

2
.gitignore vendored
View File

@ -4,6 +4,7 @@ node_modules
tmp tmp
dist dist
coverage coverage
coverage-by-spec
npm-debug.log* npm-debug.log*
yarn.lock yarn.lock
google-generated-credentials.json google-generated-credentials.json
@ -30,6 +31,7 @@ CHANGELOG-*.md
!packages/frontend/@n8n/design-system/**/*.mdx !packages/frontend/@n8n/design-system/**/*.mdx
build-storybook.log build-storybook.log
build.log build.log
.agent-setup/
sbom-source.cdx.json sbom-source.cdx.json
*.junit.xml *.junit.xml
junit.xml junit.xml

View File

@ -29,6 +29,23 @@ See [plugin README](.claude/plugins/n8n/README.md) for structure and details.
## Essential Commands ## Essential Commands
### Fresh checkout / agent setup
For a fresh checkout (cat-bot, a new hire, any agent verifying the repo
builds), prefer `pnpm agent:setup` over running install + build + tests by
hand. It chains them in one process, caps per-process memory and turbo
concurrency so a 6GB box doesn't OOM, streams all output to
`.agent-setup/<step>.log` (gitignored), and surfaces only a one-line summary
per step plus the tail of the failing log. A machine-readable
`.agent-setup/summary.json` is always written so a backgrounded run is
readable in a single shot — no polling, no scrolling logs.
```bash
pnpm agent:setup # install → build → test (full suite)
pnpm agent:setup install # one step at a time
pnpm agent:setup --json # JSON summary on stdout (for scripts/agents)
```
### Building ### Building
Use `pnpm build` to build all packages. ALWAYS redirect the output of the Use `pnpm build` to build all packages. ALWAYS redirect the output of the
build command to a file: build command to a file:
@ -228,6 +245,7 @@ titles, test descriptions, and Linear URLs.
`.github/pull_request_template.md` and `.github/pull_request_template.md` and
`.github/pull_request_title_conventions.md`. `.github/pull_request_title_conventions.md`.
- Use `gh pr create --draft` to create draft PRs. - Use `gh pr create --draft` to create draft PRs.
- Always reference the Linear ticket in the PR description, - If there is a corresponding Linear ticket, reference it in the PR
use `https://linear.app/n8n/issue/[TICKET-ID]` description using `https://linear.app/n8n/issue/[TICKET-ID]`. Do not
create a Linear ticket on your own — ask first.
- always link to the github issue if mentioned in the linear ticket. - always link to the github issue if mentioned in the linear ticket.

View File

@ -1,6 +1,8 @@
ARG NODE_VERSION=24.15.0 ARG NODE_VERSION=24.15.0
FROM dhi.io/node:${NODE_VERSION}-alpine3.22-dev # Pinned to multi-arch index digest (linux/amd64 + linux/arm64) for reproducible builds.
# Bump the digest together with the tag when updating the base image.
FROM dhi.io/node:24.15.0-alpine3.22-dev@sha256:a7eead704e9bd2d7a4c1b52cf595848f180365eba7c15a185ce1c3a690c1a19d
ARG NODE_VERSION ARG NODE_VERSION

View File

@ -2,7 +2,9 @@ ARG NODE_VERSION=24.15.0
ARG N8N_VERSION=snapshot ARG N8N_VERSION=snapshot
# Builder stage exists because the runtime base image has no toolchain. # Builder stage exists because the runtime base image has no toolchain.
FROM node:${NODE_VERSION}-alpine3.22 AS builder # Pinned to multi-arch index digest (linux/amd64 + linux/arm64) for reproducible builds.
# Bump the digest together with the tag when updating the base image.
FROM node:24.15.0-alpine3.22@sha256:b689d4005875ae167178471a7a622ec2909459a3bbb32277260be1971af7a99f AS builder
COPY ./compiled /usr/local/lib/node_modules/n8n COPY ./compiled /usr/local/lib/node_modules/n8n
RUN apk add --no-cache python3 make g++ && \ RUN apk add --no-cache python3 make g++ && \
cd /usr/local/lib/node_modules/n8n && \ cd /usr/local/lib/node_modules/n8n && \

View File

@ -11,6 +11,7 @@
"prepare": "node scripts/prepare.mjs", "prepare": "node scripts/prepare.mjs",
"preinstall": "node scripts/block-npm-install.js", "preinstall": "node scripts/block-npm-install.js",
"build": "turbo run build", "build": "turbo run build",
"build:unchecked": "turbo run build:unchecked",
"build:n8n": "node scripts/build-n8n.mjs", "build:n8n": "node scripts/build-n8n.mjs",
"build:deploy": "node scripts/build-n8n.mjs", "build:deploy": "node scripts/build-n8n.mjs",
"build:docker": "node scripts/build-n8n.mjs && node scripts/dockerize-n8n.mjs", "build:docker": "node scripts/build-n8n.mjs && node scripts/dockerize-n8n.mjs",
@ -30,6 +31,7 @@
"reset": "node scripts/ensure-zx.mjs && zx scripts/reset.mjs", "reset": "node scripts/ensure-zx.mjs && zx scripts/reset.mjs",
"format": "turbo run format && node scripts/format.mjs", "format": "turbo run format && node scripts/format.mjs",
"grind": "node scripts/grind.mjs", "grind": "node scripts/grind.mjs",
"agent:setup": "node scripts/agent-setup.mjs",
"format:check": "turbo run format:check", "format:check": "turbo run format:check",
"lint": "turbo run lint", "lint": "turbo run lint",
"lint:styles": "turbo run lint:styles", "lint:styles": "turbo run lint:styles",
@ -126,7 +128,7 @@
"date-fns-tz": "2.0.0", "date-fns-tz": "2.0.0",
"form-data": "4.0.4", "form-data": "4.0.4",
"pdf-parse": "catalog:", "pdf-parse": "catalog:",
"tmp": "0.2.4", "tmp": "0.2.6",
"nodemailer": "7.0.11", "nodemailer": "7.0.11",
"validator": "13.15.26", "validator": "13.15.26",
"zod": "3.25.67", "zod": "3.25.67",
@ -136,13 +138,13 @@
"glob@7": "7.2.3", "glob@7": "7.2.3",
"jws@3": "3.2.2", "jws@3": "3.2.2",
"jws@4": "4.0.1", "jws@4": "4.0.1",
"qs@6": "6.14.2", "qs@6": "6.15.2",
"@smithy/config-resolver": ">=4.4.0", "@smithy/config-resolver": ">=4.4.0",
"@rudderstack/rudder-sdk-node@<=3.0.0": "3.0.0", "@rudderstack/rudder-sdk-node@<=3.0.0": "3.0.0",
"diff": "8.0.3", "diff": "8.0.3",
"undici@5": "^6.24.0", "undici@5": "catalog:undici-v6",
"undici@6": "^6.24.0", "undici@6": "catalog:undici-v6",
"undici@7": "^7.24.0", "undici@7": "catalog:undici-v7",
"tar": "^7.5.11", "tar": "^7.5.11",
"ajv@6": "6.14.0", "ajv@6": "6.14.0",
"ajv@7": "8.18.0", "ajv@7": "8.18.0",
@ -177,6 +179,7 @@
"protobufjs": "7.5.8", "protobufjs": "7.5.8",
"ip-address@10": "10.1.1", "ip-address@10": "10.1.1",
"brace-expansion@5": "5.0.6", "brace-expansion@5": "5.0.6",
"@tootallnate/once@2": "2.0.1",
"@opentelemetry/exporter-prometheus@<=0.217.0": "0.217.0", "@opentelemetry/exporter-prometheus@<=0.217.0": "0.217.0",
"@opentelemetry/sdk-node@<=0.217.0": "0.217.0", "@opentelemetry/sdk-node@<=0.217.0": "0.217.0",
"langsmith": "0.6.0" "langsmith": "0.6.0"

View File

@ -9,9 +9,9 @@ Conventions for the `@n8n/agents` package.
- **Builder pattern with lazy build** — all public primitives use a fluent - **Builder pattern with lazy build** — all public primitives use a fluent
builder API. **User code never calls `.build()`**. Builders are passed builder API. **User code never calls `.build()`**. Builders are passed
directly to the consuming method (e.g. `agent.tool(myTool)`) which calls directly to the consuming method (e.g. `agent.tool(myTool)`) which calls
`.build()` internally. Agent and Network have `run()`/`stream()` directly `.build()` internally. Agent has `generate()`/`stream()` directly on the
on the class, which lazy-build via `ensureBuilt()` on first call. `build()` class, which lazy-build via `ensureBuilt()` on first call. `build()` is
is `protected` on Agent and Network to keep it out of the public API. `protected` on Agent to keep it out of the public API.
- **Zod for schemas** — all input/output schemas use Zod. - **Zod for schemas** — all input/output schemas use Zod.
## Package Structure ## Package Structure
@ -34,7 +34,6 @@ src/
mcp-client.ts # MCP client integration mcp-client.ts # MCP client integration
memory.ts # Memory builder memory.ts # Memory builder
message.ts # LLM/DB message helpers message.ts # LLM/DB message helpers
network.ts # Network builder
provider-tools.ts # Provider-defined tool factories provider-tools.ts # Provider-defined tool factories
telemetry.ts # Telemetry builder (OTel, redaction) telemetry.ts # Telemetry builder (OTel, redaction)
tool.ts # Tool builder tool.ts # Tool builder
@ -112,13 +111,12 @@ class EngineAgent extends Agent {
## Testing ## Testing
- Unit tests live in `src/__tests__/`, integration tests in `src/__tests__/integration/` - Unit tests live in `src/__tests__/`, integration tests in `src/__tests__/integration/`
- Unit tests use Jest (`pnpm test`) - Unit tests use Vitest (`pnpm test`)
- Integration tests use Vitest (`pnpm test:integration`) with real LLM calls - Integration tests use Vitest (`pnpm test:integration`) with real LLM calls
- A `.env` file at the package root is loaded automatically by the vitest config. - A `.env` file at the package root is loaded automatically by the vitest config.
Always assume it exists when running integration tests. Never commit it. Always assume it exists when running integration tests. Never commit it.
- Required keys: - Required keys:
- `ANTHROPIC_API_KEY` — all integration tests - `ANTHROPIC_API_KEY` — all integration tests
- `OPENAI_API_KEY` — semantic recall tests (embeddings)
- Tests skip automatically when the required API key is not set - Tests skip automatically when the required API key is not set
- Run from the package directory: `cd packages/@n8n/agents && pnpm test` - Run from the package directory: `cd packages/@n8n/agents && pnpm test`
@ -134,7 +132,7 @@ class EngineAgent extends Agent {
cd packages/@n8n/agents cd packages/@n8n/agents
pnpm build # rimraf dist && tsc -p tsconfig.build.json → dist/ pnpm build # rimraf dist && tsc -p tsconfig.build.json → dist/
pnpm typecheck # tsc --noEmit pnpm typecheck # tsc --noEmit
pnpm test # jest (unit) pnpm test # vitest (unit)
``` ```
## PR naming convention ## PR naming convention

View File

@ -12,7 +12,7 @@ final response.
for a single agent turn. It uses the Vercel AI SDK directly (`generateText` / for a single agent turn. It uses the Vercel AI SDK directly (`generateText` /
`streamText`) and is responsible for: `streamText`) and is responsible for:
- Building the LLM message context (memory history, semantic recall, working - Building the LLM message context (memory history, working
memory in the system prompt, user input) memory in the system prompt, user input)
- Stripping orphaned tool-call/tool-result pairs before LLM calls - Stripping orphaned tool-call/tool-result pairs before LLM calls
(`stripOrphanedToolMessages`) (`stripOrphanedToolMessages`)
@ -23,8 +23,7 @@ for a single agent turn. It uses the Vercel AI SDK directly (`generateText` /
in parallel) in parallel)
- Suspending and resuming runs for Human-in-the-Loop (HITL) **and** for tools - Suspending and resuming runs for Human-in-the-Loop (HITL) **and** for tools
that return a branded suspend result (`suspendSchema` / `resumeSchema`) that return a branded suspend result (`suspendSchema` / `resumeSchema`)
- Persisting new messages to a memory store at the end of each completed turn, - Persisting new messages to a memory store at the end of each completed turn
optionally saving **embeddings** for semantic recall
- Extracting and persisting **working memory** from assistant output when - Extracting and persisting **working memory** from assistant output when
configured configured
- Optional **structured output** (`Output.object` + Zod), **thinking** / - Optional **structured output** (`Output.object` + Zod), **thinking** /
@ -65,7 +64,6 @@ graph TD
| `on(event, handler)` | Register a lifecycle event handler | | `on(event, handler)` | Register a lifecycle event handler |
| `abort()` | Cancel the currently running agent | | `abort()` | Cancel the currently running agent |
| `getState()` | Return the latest `SerializableAgentState` snapshot | | `getState()` | Return the latest `SerializableAgentState` snapshot |
| `asTool(description)` | Wrap the agent as a `BuiltTool` for multi-agent composition |
`ExecutionOptions` includes `abortSignal?: AbortSignal`, forwarded into `ExecutionOptions` includes `abortSignal?: AbortSignal`, forwarded into
`AgentEventBus.resetAbort()` so callers can cancel via an external signal as `AgentEventBus.resetAbort()` so callers can cancel via an external signal as
@ -73,6 +71,40 @@ well as `agent.abort()`.
--- ---
## Inline Sub-Agent Delegation
`createDelegateSubAgentTool()` can be registered directly on an `Agent` without
a host `runSubAgent` callback. In that mode, `Agent.build()` completes the tool
with the SDK's inline child runner after the parent model and effective tool
surface have been resolved.
```typescript
const agent = new Agent('parent')
.model('anthropic/claude-sonnet-4-5')
.instructions('...')
.tool(searchTool)
.tool(createDelegateSubAgentTool());
```
The model selects the default inline path by passing `subAgentId: "inline"`.
When a host supplies a `runSubAgent` callback, `Agent.build()` routes every
delegation (including `"inline"`) through that callback and passes
`helpers.runInlineSubAgent` so the host can reuse the SDK inline runner. Without a
host callback, `"inline"` is handled by the SDK inline runner directly. Both paths
return the same `DelegateSubAgentToolOutput` shape and emit the same sub-agent
lifecycle events.
Inline children:
- reuse the parent model config for this first implementation
- start from the parent agent's effective local/deferred tool list
- always drop SDK-blocked tools such as `delegate_subagent`, `write_todos`, and memory recall
- may drop additional host-blocked local/deferred tool names configured on the delegate tool
- inherit parent provider tools after the same blocklist filtering
- run in a fresh context using the shared delegated-task prompt
---
## Event system ## Event system
### AgentEventBus ### AgentEventBus
@ -187,18 +219,6 @@ interface SerializableAgentState {
`suspendPayload`, `resumeSchema`) from calls not yet executed (`suspended: `suspendPayload`, `resumeSchema`) from calls not yet executed (`suspended:
false`) when a batch stops at the first suspension. false`) when a batch stops at the first suspension.
---
## asTool()
`agent.asTool(description)` wraps the agent as a `BuiltTool`. The handler calls
`agent.generate(input, { telemetry: ctx.parentTelemetry })`, collects assistant
text, and returns `{ result: string }`. When the sub-run produces usage,
results are wrapped so the parent runtime can merge **`SubAgentUsage`** and
**`totalCost`** into the parent `GenerateResult` / stream `finish` chunk.
---
## Message types ## Message types
| Type | Definition | Purpose | | Type | Definition | Purpose |
@ -364,8 +384,7 @@ implement TTL or eviction as needed.
## Memory persistence ## Memory persistence
At end of turn, `saveToMemory()` uses `list.turnDelta()` and At end of turn, `saveToMemory()` uses `list.turnDelta()` and
`saveMessagesToThread`. If **semantic recall** is configured with an embedder `saveMessagesToThread`.
and `memory.saveEmbeddings`, new messages are embedded and stored.
**Working memory:** when configured, the runtime injects an `update_working_memory` **Working memory:** when configured, the runtime injects an `update_working_memory`
tool into the agent's tool set. The current state is included in the system prompt tool into the agent's tool set. The current state is included in the system prompt
@ -395,7 +414,7 @@ readable side immediately; the loop writes chunks in the background.
| `tool-call-delta` | Streaming tool name / arguments | | `tool-call-delta` | Streaming tool name / arguments |
| `message` | Full assistant or tool message | | `message` | Full assistant or tool message |
| `tool-call-suspended` | Suspension: `runId`, `toolCallId`, tool metadata, optional `resumeSchema`, `suspendPayload` | | `tool-call-suspended` | Suspension: `runId`, `toolCallId`, tool metadata, optional `resumeSchema`, `suspendPayload` |
| `finish` | `finishReason`, `usage` (with optional **cost**), `model`, optional **`structuredOutput`**, **`subAgentUsage`**, **`totalCost`** | | `finish` | `finishReason`, `usage` (with optional **cost**), `model`, optional **`structuredOutput`** |
| `error` | Failure or abort | | `error` | Failure or abort |
--- ---
@ -412,7 +431,7 @@ src/
memory-store.ts — saveMessagesToThread helper memory-store.ts — saveMessagesToThread helper
messages.ts — AI SDK message conversion messages.ts — AI SDK message conversion
model-factory.ts — createModel / createEmbeddingModel model-factory.ts — createModel / createEmbeddingModel
tool-adapter.ts — buildToolMap, executeTool, toAiSdkTools, suspend / agent-result guards tool-adapter.ts — buildToolMap, executeTool, toAiSdkTools, suspend guards
stream.ts — convertChunk, toTokenUsage stream.ts — convertChunk, toTokenUsage
runtime-helpers.ts — normalizeInput, usage merge, stream error helpers, … runtime-helpers.ts — normalizeInput, usage merge, stream error helpers, …
working-memory.ts — instruction text, update_working_memory tool builder working-memory.ts — instruction text, update_working_memory tool builder

View File

@ -44,4 +44,11 @@ export default defineConfig(
'n8n-local-rules/no-uncaught-json-parse': 'off', 'n8n-local-rules/no-uncaught-json-parse': 'off',
}, },
}, },
{
files: ['**/*.test.ts'],
rules: {
'@typescript-eslint/no-unsafe-assignment': 'warn',
'@typescript-eslint/no-unsafe-member-access': 'warn',
},
},
); );

View File

@ -3,14 +3,14 @@
* *
* This example demonstrates the complete builder-pattern API for creating * This example demonstrates the complete builder-pattern API for creating
* and running AI agents. It shows: tools, agents, memory, guardrails, * and running AI agents. It shows: tools, agents, memory, guardrails,
* scorers, multi-agent patterns (agent-as-tool), and tool interrupts. * scorers, and tool interrupts.
* *
* To run with real LLM calls, set ANTHROPIC_API_KEY. * To run with real LLM calls, set ANTHROPIC_API_KEY.
* Without keys, the runtime will throw on actual LLM calls. * Without keys, the runtime will throw on actual LLM calls.
*/ */
import { z } from 'zod'; import { z } from 'zod';
import { Agent, Guardrail, Memory, Tool } from '../src'; import { Agent, Guardrail, Memory, Tool, createDelegateSubAgentTool } from '../src';
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------
// Tools // Tools
@ -64,10 +64,7 @@ const writeFileTool = new Tool('write-file')
// Memory // Memory
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------
const memory = new Memory().semanticRecall({ const memory = new Memory();
topK: 4,
messageRange: { before: 1, after: 1 },
});
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------
// Agents // Agents
@ -79,6 +76,10 @@ const researcher = new Agent('researcher')
'You are a research assistant. Search for information and return structured findings.', 'You are a research assistant. Search for information and return structured findings.',
) )
.tool(searchTool) .tool(searchTool)
// No runSubAgent callback: the SDK creates an inline child that reuses this
// agent's model and filtered tools whenever the model calls delegate_subagent
// with subAgentId: "inline".
.tool(createDelegateSubAgentTool({ policy: { maxChildren: 2 } }))
.memory(memory) .memory(memory)
.inputGuardrail( .inputGuardrail(
new Guardrail('injection-detector').type('prompt-injection').strategy('block').threshold(0.8), new Guardrail('injection-detector').type('prompt-injection').strategy('block').threshold(0.8),
@ -90,18 +91,6 @@ const writer = new Agent('writer')
.tool(writeFileTool) .tool(writeFileTool)
.checkpoint('memory'); .checkpoint('memory');
// ---------------------------------------------------------------------------
// Multi-Agent: Agent as Tool
// ---------------------------------------------------------------------------
const orchestrator = new Agent('orchestrator')
.model('anthropic/claude-sonnet-4')
.instructions(
'You coordinate research and writing. Delegate research to the researcher and writing to the writer.',
)
.tool(researcher.asTool('Delegate research tasks to the research specialist'))
.tool(writer.asTool('Delegate writing tasks to the content writer'));
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------
// Execution // Execution
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------
@ -132,13 +121,11 @@ async function main() {
console.log(' (Set ANTHROPIC_API_KEY to run with real LLM calls)'); console.log(' (Set ANTHROPIC_API_KEY to run with real LLM calls)');
} }
// --- 2. Orchestrator (agent-as-tool pattern) --- // --- 2. Tool interrupt ---
console.log('\n2. Orchestrator (agent-as-tool pattern):'); console.log('\n2. Tool interrupt:');
try { try {
const orchResult = await orchestrator.generate( const writerResult = await writer.generate('Write a short summary to /tmp/rag-summary.txt');
'Research RAG architectures and write a summary', const text = writerResult.messages
);
const text = orchResult.messages
.flatMap((m) => ('content' in m ? m.content : [])) .flatMap((m) => ('content' in m ? m.content : []))
.filter((c) => c.type === 'text') .filter((c) => c.type === 'text')
.map((c) => ('text' in c ? c.text : '')) .map((c) => ('text' in c ? c.text : ''))

View File

@ -38,6 +38,7 @@
"dev": "pnpm watch", "dev": "pnpm watch",
"typecheck": "tsc --noEmit", "typecheck": "tsc --noEmit",
"build": "rimraf dist && tsc -p tsconfig.build.json", "build": "rimraf dist && tsc -p tsconfig.build.json",
"build:unchecked": "rimraf dist && tsc -p tsconfig.build.json --noCheck",
"format": "biome format --write src examples", "format": "biome format --write src examples",
"format:check": "biome ci src examples", "format:check": "biome ci src examples",
"lint": "eslint . --quiet", "lint": "eslint . --quiet",
@ -59,6 +60,7 @@
"@ai-sdk/groq": "catalog:", "@ai-sdk/groq": "catalog:",
"@ai-sdk/mistral": "catalog:", "@ai-sdk/mistral": "catalog:",
"@ai-sdk/openai": "^3.0.41", "@ai-sdk/openai": "^3.0.41",
"@ai-sdk/openai-compatible": "catalog:",
"@ai-sdk/provider-utils": "^4.0.21", "@ai-sdk/provider-utils": "^4.0.21",
"@ai-sdk/xai": "^3.0.67", "@ai-sdk/xai": "^3.0.67",
"@modelcontextprotocol/sdk": "catalog:", "@modelcontextprotocol/sdk": "catalog:",

View File

@ -8,6 +8,7 @@ import {
createAgentWithConcurrentMixedTools, createAgentWithConcurrentMixedTools,
collectTextDeltas, collectTextDeltas,
} from './helpers'; } from './helpers';
import { createCancellation } from '../../index';
import type { StreamChunk } from '../../index'; import type { StreamChunk } from '../../index';
const describe = describeIf('anthropic'); const describe = describeIf('anthropic');
@ -99,6 +100,51 @@ describe('concurrent tool execution integration', () => {
expect(remainingIds).not.toContain(firstToolCallId); expect(remainingIds).not.toContain(firstToolCallId);
}); });
it('cancels one of multiple suspended delete_file tool calls and resolves the batch', async () => {
const agent = createAgentWithConcurrentInterruptibleCalls('anthropic');
const first = await agent.generate(
'Delete these two files: /tmp/cancel-a.txt and /tmp/cancel-b.txt. You MUST call delete_file for each file in a single turn using parallel tool calls.',
);
expect(first.finishReason).toBe('tool-calls');
expect(first.pendingSuspend).toBeDefined();
expect(first.pendingSuspend!.length).toBeGreaterThanOrEqual(2);
const { runId, toolCallId } = first.pendingSuspend![0];
const resumed = await agent.resume(
'generate',
createCancellation('Cancel the delete operation. Do not delete any of the files.'),
{ runId, toolCallId },
);
expect(resumed.finishReason).toBe('stop');
expect(resumed.pendingSuspend).toBeUndefined();
expect(resumed.toolCalls).toEqual(
expect.arrayContaining([
expect.objectContaining({
tool: 'delete_file',
output:
'[Tool call cancelled. User said: "Cancel the delete operation. Do not delete any of the files."]',
canceled: true,
}),
]),
);
expect(resumed.getState().messageList.messages).toEqual(
expect.arrayContaining([
expect.objectContaining({
content: expect.arrayContaining([
expect.objectContaining({
type: 'tool-call',
output: '[Skipped: a sibling tool call was cancelled]',
canceled: true,
}),
]),
}),
]),
);
});
it('resumes all suspended tools one by one until the LLM loop continues (stream)', async () => { it('resumes all suspended tools one by one until the LLM loop continues (stream)', async () => {
const agent = createAgentWithConcurrentInterruptibleCalls('anthropic'); const agent = createAgentWithConcurrentInterruptibleCalls('anthropic');

View File

@ -0,0 +1,91 @@
import { expect, it } from 'vitest';
import { describeIf } from './helpers';
import {
Agent,
createDelegateSubAgentTool,
filterLlmMessages,
type AgentMessage,
} from '../../index';
const describe = describeIf('anthropic');
const SENTINEL = 'SUBAGENT_OK_731';
describe('delegate_subagent integration', () => {
it('lets a real parent agent call delegate_subagent and use its result', async () => {
const delegateTool = createDelegateSubAgentTool({ policy: { maxChildren: 1 } });
const parent = new Agent('sub-agent-parent-integration')
.model('anthropic/claude-sonnet-4-5')
.instructions(
[
'You are a parent test agent.',
'This is a delegation wiring test: you must call delegate_subagent exactly once before answering.',
'Treat the child task as a bounded independent workstream that only the child should complete.',
'Set subAgentId to "inline" in that tool call.',
'The child result will contain a sentinel token.',
'After the tool returns, answer with exactly: PARENT_SAW_ followed by the child answer, with no extra text.',
].join(' '),
)
.tool(delegateTool);
try {
const result = await parent.generate(
`Complete this two-part verification task. Delegate the token-production workstream to a child agent, and make the delegated goal instruct the child to answer with exactly this token and nothing else: ${SENTINEL}. Then synthesize only from the child result.`,
);
expect(result.toolCalls?.map((toolCall) => toolCall.tool) ?? []).toContain(
'delegate_subagent',
);
expect(lastText(result.messages)).toContain(`PARENT_SAW_${SENTINEL}`);
const delegateToolCall = result.toolCalls?.find(
(toolCall) => toolCall.tool === 'delegate_subagent',
);
const delegateOutput = delegateToolCall?.output;
if (!isDelegateOutput(delegateOutput)) {
throw new Error('delegate_subagent did not return the expected output shape');
}
expect(delegateOutput.status).toBe('completed');
expect(delegateOutput.runId).toBeDefined();
expect(delegateOutput.answer).toContain(SENTINEL);
expect(delegateOutput.usage?.totalTokens).toBeGreaterThan(0);
expect(delegateOutput.taskPath).toMatch(/^\/root\/[a-z0-9_]+$/);
} finally {
await parent.close();
}
}, 60_000);
});
function lastText(messages: AgentMessage[]): string {
const llmMessages = filterLlmMessages(messages);
for (let i = llmMessages.length - 1; i >= 0; i--) {
const message = llmMessages[i];
if (!message) continue;
const text = message.content.find((content) => content.type === 'text');
if (text?.type === 'text') return text.text;
}
return '';
}
function isDelegateOutput(value: unknown): value is {
status: 'completed' | 'failed' | 'suspended';
taskPath: string;
runId: string;
answer: string;
usage: { totalTokens: number };
} {
return (
typeof value === 'object' &&
value !== null &&
'status' in value &&
'taskPath' in value &&
'runId' in value &&
'answer' in value &&
'usage' in value
);
}

View File

@ -175,105 +175,43 @@ describe('event system — stream', () => {
}); });
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------
// getState() // Result getState()
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------
describe('getState()', () => { describe('result getState()', () => {
it('returns idle before first run', () => {
const agent = createSimpleAgent();
const state = agent.getState();
expect(state.status).toBe('idle');
expect(state.messageList.messages).toHaveLength(0);
});
it('returns success after a successful generate()', async () => { it('returns success after a successful generate()', async () => {
const agent = createSimpleAgent(); const agent = createSimpleAgent();
await agent.generate('Say hello'); const result = await agent.generate('Say hello');
const state = agent.getState(); const state = result.getState();
expect(state.status).toBe('success'); expect(state.status).toBe('success');
}); });
it('returns success after a completed stream()', async () => { it('returns success after a completed stream()', async () => {
const agent = createSimpleAgent(); const agent = createSimpleAgent();
const { stream } = await agent.stream('Say hello'); const result = await agent.stream('Say hello');
const { stream } = result;
await collectStreamChunks(stream); await collectStreamChunks(stream);
const state = agent.getState(); const state = result.getState();
expect(state.status).toBe('success'); expect(state.status).toBe('success');
}); });
it('state is running during the generate loop (observed via event)', async () => { it('stream result state is running before the stream is drained', async () => {
const agent = createSimpleAgent(); const agent = createSimpleAgent();
let stateWhileRunning: string | undefined; const result = await agent.stream('Say hello');
agent.on(AgentEvent.TurnStart, () => { expect(result.getState().status).toBe('running');
stateWhileRunning = agent.getState().status;
});
await agent.generate('Say hello'); await collectStreamChunks(result.stream);
expect(result.getState().status).toBe('success');
expect(stateWhileRunning).toBe('running');
}); });
it('reflects resourceId and threadId from RunOptions', async () => { it('reflects resourceId and threadId from RunOptions', async () => {
const agent = createSimpleAgent(); const agent = createSimpleAgent();
await agent.generate('Say hello', { const result = await agent.generate('Say hello', {
persistence: { resourceId: 'user-123', threadId: 'thread-abc' }, persistence: { resourceId: 'user-123', threadId: 'thread-abc' },
}); });
const state = agent.getState(); const state = result.getState();
expect(state.persistence?.resourceId).toBe('user-123'); expect(state.persistence?.resourceId).toBe('user-123');
expect(state.persistence?.threadId).toBe('thread-abc'); expect(state.persistence?.threadId).toBe('thread-abc');
}); });
}); });
// ---------------------------------------------------------------------------
// asTool()
// ---------------------------------------------------------------------------
describe('asTool()', () => {
it('wraps the agent as a BuiltTool with the correct name and description', () => {
const agent = createSimpleAgent();
const tool = agent.asTool('A helpful assistant tool');
expect(tool.name).toBe('events-test-agent');
expect(tool.description).toBe('A helpful assistant tool');
expect(tool.inputSchema).toBeDefined();
expect(typeof tool.handler).toBe('function');
});
it('asTool handler calls the agent and returns text result', async () => {
const agent = createSimpleAgent();
const tool = agent.asTool('A helpful assistant tool');
const result = await tool.handler!({ input: 'Say "pong"' }, {});
expect(result).toHaveProperty('result');
expect(typeof (result as { result: string }).result).toBe('string');
expect((result as { result: string }).result.length).toBeGreaterThan(0);
});
it('coordinator agent can use sub-agent via asTool', async () => {
const specialist = new Agent('specialist')
.model(getModel('anthropic'))
.instructions('You are a specialist. When asked, reply with exactly "SPECIALIST_RESPONSE".');
const coordinator = new Agent('coordinator')
.model(getModel('anthropic'))
.instructions(
'You coordinate tasks. Use the specialist tool to answer questions. Relay the exact response.',
)
.tool(specialist.asTool('A specialist agent'));
const result = await coordinator.generate(
'Ask the specialist for their response and tell me what they said.',
);
const text = result.messages
.filter((m) => 'role' in m && m.role === 'assistant')
.flatMap((m) => ('content' in m ? m.content : []))
.filter((c) => c.type === 'text')
.map((c) => ('text' in c ? c.text : ''))
.join('');
expect(text.length).toBeGreaterThan(0);
});
});

View File

@ -1,50 +0,0 @@
import { expect, it, afterEach, describe as _describe } from 'vitest';
import { Agent, Memory } from '../../../index';
import { findLastTextContent, getModel, createInMemoryAgentMemory } from '../helpers';
// Only run when both API keys are present
const describe =
process.env.ANTHROPIC_API_KEY && process.env.OPENAI_API_KEY ? _describe : _describe.skip;
const cleanups: Array<() => void> = [];
afterEach(() => {
cleanups.forEach((fn) => fn());
cleanups.length = 0;
});
describe('semantic recall', () => {
it('recalls relevant info from earlier in the thread via semantic search', async () => {
const { memory, cleanup } = createInMemoryAgentMemory();
cleanups.push(cleanup);
const mem = new Memory()
.storage(memory)
.semanticRecall({ topK: 3, embedder: 'openai/text-embedding-3-small' });
const agent = new Agent('semantic-test')
.model(getModel('anthropic'))
.instructions('You are a helpful assistant. Be concise. Answer from your context.')
.memory(mem);
const threadId = `semantic-${Date.now()}`;
const resourceId = 'test-user';
const options = { persistence: { threadId, resourceId } };
// Turn 1: unique fact recalled later via semantic search
await agent.generate(
'The annual rainfall in Timbuktu is approximately 200mm. Just acknowledge.',
options,
);
// Filler turns between the fact and the later question
await agent.generate('What is 2 + 2?', options);
await agent.generate('Tell me a one-word synonym for happy.', options);
await agent.generate('What color is the sky?', options);
// Ask about the fact from turn 1 — should be recalled via semantic search
const result = await agent.generate('What is the annual rainfall in Timbuktu?', options);
expect(findLastTextContent(result.messages)?.toLowerCase()).toContain('200');
});
});

View File

@ -183,7 +183,7 @@ describe('external abort signal', () => {
}); });
expect(result.finishReason).toBe('error'); expect(result.finishReason).toBe('error');
expect(agent.getState().status).toBe('cancelled'); expect(result.getState().status).toBe('cancelled');
}); });
it('cancels a stream() call via external AbortSignal', async () => { it('cancels a stream() call via external AbortSignal', async () => {

View File

@ -1,88 +0,0 @@
import { expect, it } from 'vitest';
import {
chunksOfType,
collectStreamChunks,
collectTextDeltas,
describeIf,
getModel,
} from './helpers';
import { Agent } from '../../index';
const describe = describeIf('anthropic');
describe('sub-agent (asTool) integration', () => {
it('orchestrator calls a sub-agent as a tool and gets its response', async () => {
const mathAgent = new Agent('math-specialist')
.model(getModel('anthropic'))
.instructions(
'You are a math specialist. When given a math problem, compute the answer and reply with just the number. No explanation.',
);
const orchestrator = new Agent('orchestrator')
.model(getModel('anthropic'))
.instructions(
'You are a coordinator. When asked a math question, delegate to the math_specialist tool. ' +
'Pass the question as the prompt. Then relay the answer back.',
)
.tool(mathAgent.asTool('A math specialist that can solve math problems'));
const { stream: fullStream } = await orchestrator.stream('What is 15 * 4?');
const chunks = await collectStreamChunks(fullStream);
const text = collectTextDeltas(chunks);
const toolResults = chunksOfType(chunks, 'tool-result');
// The orchestrator should have called the sub-agent tool
expect(toolResults.length).toBeGreaterThan(0);
const mathCall = toolResults.find((tc) => tc.toolName === 'math-specialist');
expect(mathCall).toBeDefined();
// The output should contain the sub-agent's response
expect(mathCall!.output).toBeDefined();
// The final text should reference 60
expect(text).toBeTruthy();
expect(text).toContain('60');
});
it('handles a chain of two sub-agents', async () => {
const translatorAgent = new Agent('translator')
.model(getModel('anthropic'))
.instructions(
'You are a translator. Translate the given text to French. Reply with only the French translation.',
);
const uppercaseAgent = new Agent('uppercaser')
.model(getModel('anthropic'))
.instructions(
'You convert text to uppercase. Reply with the input text in all uppercase letters. Nothing else.',
);
const orchestrator = new Agent('chain-orchestrator')
.model(getModel('anthropic'))
.instructions(
'You are a coordinator with two tools. ' +
'When asked to translate and uppercase text: ' +
'1. First use the translator tool to translate to French. ' +
'2. Then use the uppercaser tool to convert the French text to uppercase. ' +
'Return the final uppercase French text.',
)
.tool(translatorAgent.asTool('Translates text to French'))
.tool(uppercaseAgent.asTool('Converts text to uppercase'));
const { stream: fullStream } = await orchestrator.stream(
'Translate "hello" to French and then make it uppercase.',
);
const chunks = await collectStreamChunks(fullStream);
const toolResults = chunksOfType(chunks, 'tool-result');
// Should have called both tools
expect(toolResults.length).toBeGreaterThanOrEqual(2);
const text = collectTextDeltas(chunks);
expect(text).toBeTruthy();
// The result should contain BONJOUR (or SALUT) — uppercase French for hello
expect(text).toMatch(/BONJOUR/i);
});
});

View File

@ -8,6 +8,7 @@ import {
createAgentWithMixedTools, createAgentWithMixedTools,
createAgentWithParallelInterruptibleCalls, createAgentWithParallelInterruptibleCalls,
} from './helpers'; } from './helpers';
import { createCancellation } from '../../index';
import type { StreamChunk } from '../../index'; import type { StreamChunk } from '../../index';
const describe = describeIf('anthropic'); const describe = describeIf('anthropic');
@ -88,6 +89,34 @@ describe('tool interrupt integration', () => {
expect(resumedTypes).toContain('text-delta'); expect(resumedTypes).toContain('text-delta');
}); });
it('cancels a suspended delete_file tool call and continues', async () => {
const agent = createAgentWithInterruptibleTool('anthropic');
const first = await agent.generate('Delete the file /tmp/cancel-me.txt');
expect(first.finishReason).toBe('tool-calls');
expect(first.pendingSuspend).toHaveLength(1);
const { runId, toolCallId } = first.pendingSuspend![0];
const resumed = await agent.resume(
'generate',
createCancellation('Do not delete the file. Tell me the deletion was cancelled.'),
{ runId, toolCallId },
);
expect(resumed.finishReason).toBe('stop');
expect(resumed.pendingSuspend).toBeUndefined();
expect(resumed.toolCalls).toEqual(
expect.arrayContaining([
expect.objectContaining({
tool: 'delete_file',
output:
'[Tool call cancelled. User said: "Do not delete the file. Tell me the deletion was cancelled."]',
canceled: true,
}),
]),
);
});
it('resumes each pending tool call one by one when multiple tool calls are suspended', async () => { it('resumes each pending tool call one by one when multiple tool calls are suspended', async () => {
const agent = createAgentWithParallelInterruptibleCalls('anthropic'); const agent = createAgentWithParallelInterruptibleCalls('anthropic');

View File

@ -74,81 +74,6 @@ describeAnthropic('usage and cost (Anthropic)', () => {
expect(finish.usage!.cost).toBeDefined(); expect(finish.usage!.cost).toBeDefined();
expect(finish.usage!.cost).toBeGreaterThan(0); expect(finish.usage!.cost).toBeGreaterThan(0);
}); });
it('aggregates sub-agent usage when using asTool()', async () => {
const subAgent = new Agent('translator')
.model(getModel('anthropic'))
.instructions('Translate the input to French. Reply with only the translation.');
const parentAgent = new Agent('orchestrator')
.model(getModel('anthropic'))
.instructions(
'You are an orchestrator. When asked to translate, use the translator tool. Be concise.',
)
.tool(subAgent.asTool('Translate text to French'));
const result = await parentAgent.generate('Translate "hello world" to French');
// Parent should have its own usage
expect(result.usage).toBeDefined();
expect(result.usage!.promptTokens).toBeGreaterThan(0);
expect(result.usage!.cost).toBeGreaterThan(0);
expect(result.model).toBe(getModel('anthropic'));
// Sub-agent usage should be captured
expect(result.subAgentUsage).toBeDefined();
expect(result.subAgentUsage!.length).toBeGreaterThan(0);
const translatorUsage = result.subAgentUsage!.find((s) => s.agent === 'translator');
expect(translatorUsage).toBeDefined();
expect(translatorUsage!.usage.promptTokens).toBeGreaterThan(0);
expect(translatorUsage!.usage.cost).toBeGreaterThan(0);
// Total cost should be parent + sub-agent
expect(result.totalCost).toBeDefined();
expect(result.totalCost!).toBeGreaterThan(result.usage!.cost!);
expect(result.totalCost!).toBeCloseTo(result.usage!.cost! + translatorUsage!.usage.cost!, 6);
});
it('aggregates sub-agent usage via stream()', async () => {
const subAgent = new Agent('stream-translator')
.model(getModel('anthropic'))
.instructions('Translate the input to French. Reply with only the translation.');
const parentAgent = new Agent('stream-orchestrator')
.model(getModel('anthropic'))
.instructions(
'You are an orchestrator. When asked to translate, use the stream-translator tool. Be concise.',
)
.tool(subAgent.asTool('Translate text to French'));
const { stream: fullStream } = await parentAgent.stream('Translate "goodbye" to French');
const chunks = await collectStreamChunks(fullStream);
const finishChunks = chunksOfType(chunks, 'finish');
expect(finishChunks.length).toBeGreaterThan(0);
const finish = finishChunks[finishChunks.length - 1] as StreamChunk & { type: 'finish' };
// Should have usage with cost
expect(finish.usage).toBeDefined();
expect(finish.usage!.cost).toBeGreaterThan(0);
// Should include model
expect(finish.model).toBe(getModel('anthropic'));
// Should include sub-agent usage
expect(finish.subAgentUsage).toBeDefined();
expect(finish.subAgentUsage!.length).toBeGreaterThan(0);
const translatorUsage = finish.subAgentUsage!.find((s) => s.agent === 'stream-translator');
expect(translatorUsage).toBeDefined();
expect(translatorUsage!.usage.promptTokens).toBeGreaterThan(0);
expect(translatorUsage!.usage.cost).toBeGreaterThan(0);
// Total cost should include parent + sub-agent
expect(finish.totalCost).toBeDefined();
expect(finish.totalCost!).toBeGreaterThan(finish.usage!.cost!);
});
}); });
const describeOpenAI = describeIf('openai'); const describeOpenAI = describeIf('openai');

View File

@ -19,7 +19,6 @@ export type {
InterruptibleToolContext, InterruptibleToolContext,
CheckpointStore, CheckpointStore,
StreamChunk, StreamChunk,
SubAgentUsage,
Provider, Provider,
ThinkingConfig, ThinkingConfig,
ThinkingConfigFor, ThinkingConfigFor,
@ -63,12 +62,12 @@ export type {
NewEpisodicMemoryEntrySource, NewEpisodicMemoryEntrySource,
NewEpisodicMemoryEntrySourceForEntry, NewEpisodicMemoryEntrySourceForEntry,
RetrievedEpisodicMemoryEntry, RetrievedEpisodicMemoryEntry,
SemanticRecallConfig,
ResumeOptions, ResumeOptions,
McpServerConfig, McpServerConfig,
McpVerifyResult, McpVerifyResult,
ModelConfig, ModelConfig,
ExecutionOptions, ExecutionOptions,
SmoothStreamOptions,
AgentExecutionCounter, AgentExecutionCounter,
PersistedExecutionOptions, PersistedExecutionOptions,
BuiltTelemetry, BuiltTelemetry,
@ -99,6 +98,8 @@ export {
OBSERVATION_LOG_STATUSES, OBSERVATION_LOG_STATUSES,
} from './types'; } from './types';
export { createCancellation, isCancellation, CANCELLATION_TYPE } from './sdk/cancellation';
export type { Cancellation } from './sdk/cancellation';
export { Tool, wrapToolForApproval } from './sdk/tool'; export { Tool, wrapToolForApproval } from './sdk/tool';
export { Memory } from './sdk/memory'; export { Memory } from './sdk/memory';
export { Guardrail } from './sdk/guardrail'; export { Guardrail } from './sdk/guardrail';
@ -161,7 +162,6 @@ export type {
CredentialListItem, CredentialListItem,
} from './types'; } from './types';
export { McpClient } from './sdk/mcp-client'; export { McpClient } from './sdk/mcp-client';
export { Network } from './sdk/network';
export { providerTools } from './sdk/provider-tools'; export { providerTools } from './sdk/provider-tools';
export { verify } from './sdk/verify'; export { verify } from './sdk/verify';
export type { VerifyResult } from './sdk/verify'; export type { VerifyResult } from './sdk/verify';
@ -198,6 +198,35 @@ export { BaseMemory } from './storage/base-memory';
export type { ToolDescriptor } from './types/sdk/tool-descriptor'; export type { ToolDescriptor } from './types/sdk/tool-descriptor';
export { createModel } from './runtime/model-factory'; export { createModel } from './runtime/model-factory';
export {
ROOT_SUB_AGENT_TASK_PATH,
assertSubAgentPolicyAllowsChildCount,
assertSubAgentTaskPath,
createChildSubAgentTaskPath,
isSubAgentTaskPath,
sanitizeSubAgentTaskName,
} from './runtime/sub-agent-task-path';
export type { SubAgentTaskPath, SubAgentTaskPathPolicy } from './runtime/sub-agent-task-path';
export {
DELEGATE_SUB_AGENT_TOOL_NAME,
DELEGATED_CHILD_SUSPEND_UNSUPPORTED_MESSAGE,
INLINE_SUB_AGENT_ID,
createDelegateSubAgentTool,
failedDelegatedChildSuspendOutput,
generateResultToDelegateSubAgentOutput,
getInlineDelegateSubAgentToolOptions,
renderDelegateSubAgentPrompt,
} from './runtime/delegate-sub-agent-tool';
export type {
CreateDelegateSubAgentToolOptions,
DelegateSubAgentInput,
DelegateSubAgentPolicy,
DelegateSubAgentRequest,
DelegateSubAgentRunner,
DelegateSubAgentRunnerHelpers,
DelegateSubAgentToolOutput,
} from './runtime/delegate-sub-agent-tool';
export { WRITE_TODOS_TOOL_NAME, createWriteTodosTool } from './runtime/write-todos-tool';
export { createEmbeddingModel } from './runtime/model-factory'; export { createEmbeddingModel } from './runtime/model-factory';
export { generateTitleFromMessage } from './runtime/title-generation'; export { generateTitleFromMessage } from './runtime/title-generation';
export { export {

View File

@ -2,14 +2,14 @@ import * as aiModule from 'ai';
import type { Mock, MockedFunction } from 'vitest'; import type { Mock, MockedFunction } from 'vitest';
import { z } from 'zod'; import { z } from 'zod';
import { createCancellation } from '../../sdk/cancellation';
import { isLlmMessage } from '../../sdk/message'; import { isLlmMessage } from '../../sdk/message';
import { Tool, Tool as ToolBuilder } from '../../sdk/tool'; import { Tool, Tool as ToolBuilder } from '../../sdk/tool';
import { AgentEvent } from '../../types/runtime/event'; import { AgentEvent } from '../../types/runtime/event';
import type { AgentEventData } from '../../types/runtime/event'; import type { AgentEventData } from '../../types/runtime/event';
import type { StreamChunk } from '../../types/sdk/agent'; import type { StreamChunk } from '../../types/sdk/agent';
import type { BuiltMemory } from '../../types/sdk/memory';
import type { ContentToolCall, Message } from '../../types/sdk/message'; import type { ContentToolCall, Message } from '../../types/sdk/message';
import type { BuiltTool, InterruptibleToolContext } from '../../types/sdk/tool'; import type { BuiltTool, InterruptibleToolContext, ToolContext } from '../../types/sdk/tool';
import type { BuiltTelemetry } from '../../types/telemetry'; import type { BuiltTelemetry } from '../../types/telemetry';
import { AgentRuntime } from '../agent-runtime'; import { AgentRuntime } from '../agent-runtime';
import { AgentEventBus } from '../event-bus'; import { AgentEventBus } from '../event-bus';
@ -144,6 +144,60 @@ function makeStreamSuccess(text = 'Hello') {
}; };
} }
/**
* streamText response where the model invokes a provider-executed tool (e.g.
* native web search): the SDK streams a `tool-call` and its terminal part
* (`tool-result` on success, `tool-error` on failure) with `providerExecuted`,
* then finishes with `stop` (the provider runs the tool server-side mid-step).
*/
function makeStreamWithProviderTool(opts: {
toolCallId: string;
toolName: string;
input: unknown;
output?: unknown;
error?: unknown;
text?: string;
}) {
const terminal =
opts.error !== undefined
? {
type: 'tool-error',
toolCallId: opts.toolCallId,
toolName: opts.toolName,
input: opts.input,
error: opts.error,
providerExecuted: true,
}
: {
type: 'tool-result',
toolCallId: opts.toolCallId,
toolName: opts.toolName,
input: opts.input,
output: opts.output,
providerExecuted: true,
};
const text = opts.text ?? 'done';
return {
fullStream: makeChunkStream([
{
type: 'tool-call',
toolCallId: opts.toolCallId,
toolName: opts.toolName,
input: opts.input,
providerExecuted: true,
},
terminal,
{ type: 'text-delta', textDelta: text },
]),
finishReason: Promise.resolve('stop'),
usage: Promise.resolve({ inputTokens: 10, outputTokens: 5, totalTokens: 15 }),
response: Promise.resolve({
messages: [{ role: 'assistant', content: [{ type: 'text', text }] }],
}),
toolCalls: Promise.resolve([]),
};
}
/** Build a default runtime wired to the shared eventBus for inspection. */ /** Build a default runtime wired to the shared eventBus for inspection. */
function createRuntime(eventBus?: AgentEventBus) { function createRuntime(eventBus?: AgentEventBus) {
const bus = eventBus ?? new AgentEventBus(); const bus = eventBus ?? new AgentEventBus();
@ -1124,6 +1178,140 @@ describe('AgentRuntime — concurrent tool execution', () => {
expect(third.finishReason).toBe('stop'); expect(third.finishReason).toBe('stop');
}); });
it('cancels a suspended tool before resume validation and adds the user message', async () => {
const handler = vi.fn(async (_input, ctx: InterruptibleToolContext) => {
if (ctx.resumeData) return { approved: true };
return await ctx.suspend({ reason: 'needs approval' });
});
const suspendTool = makeSuspendingTool('suspend_tool', handler);
const receivedMessages: unknown[] = [];
const { runtime } = createRuntimeWithTools([suspendTool], Infinity);
generateText.mockResolvedValueOnce(
makeGenerateWithToolCalls([
{ toolCallId: 'tc-1', toolName: 'suspend_tool', args: { value: 'a' } },
]),
);
const first = await runtime.generate('run tools');
const { runId, toolCallId } = first.pendingSuspend![0];
generateText.mockImplementationOnce(async ({ messages }: { messages: unknown[] }) => {
receivedMessages.push(...messages);
return await Promise.resolve(makeGenerateSuccess('Cancelled'));
});
const result = await runtime.resume('generate', createCancellation('Do not run this tool'), {
runId,
toolCallId,
});
expect(result.finishReason).toBe('stop');
expect(handler).toHaveBeenCalledTimes(1);
expect(result.toolCalls).toEqual(
expect.arrayContaining([
expect.objectContaining({
tool: 'suspend_tool',
output: '[Tool call cancelled. User said: "Do not run this tool"]',
canceled: true,
}),
]),
);
expect(receivedMessages).toEqual(
expect.arrayContaining([
expect.objectContaining({
role: 'user',
content: expect.arrayContaining([
expect.objectContaining({ type: 'text', text: 'Do not run this tool' }),
]),
}),
]),
);
});
it('streams cancellation as a normal tool result on resume', async () => {
const handler = vi.fn(async (_input, ctx: InterruptibleToolContext) => {
if (ctx.resumeData) return { approved: true };
return await ctx.suspend({ reason: 'needs approval' });
});
const suspendTool = makeSuspendingTool('suspend_tool', handler);
const { runtime } = createRuntimeWithTools([suspendTool], Infinity);
generateText.mockResolvedValueOnce(
makeGenerateWithToolCalls([
{ toolCallId: 'tc-1', toolName: 'suspend_tool', args: { value: 'a' } },
]),
);
const first = await runtime.generate('run tools');
const { runId, toolCallId } = first.pendingSuspend![0];
streamText.mockReturnValueOnce(makeStreamSuccess('Cancelled'));
const resumed = await runtime.resume('stream', createCancellation('Stop this action'), {
runId,
toolCallId,
});
const chunks = await collectChunks(resumed.stream as ReadableStream<unknown>);
expect(handler).toHaveBeenCalledTimes(1);
expect(chunks).toEqual(
expect.arrayContaining([
expect.objectContaining({
type: 'tool-result',
toolCallId,
toolName: 'suspend_tool',
output: '[Tool call cancelled. User said: "Stop this action"]',
canceled: true,
}),
]),
);
});
it('streams skipped sibling tool results when cancelling one of multiple suspensions', async () => {
const handler = vi.fn(async (_input, ctx: InterruptibleToolContext) => {
if (ctx.resumeData) return { approved: true };
return await ctx.suspend({ reason: 'needs approval' });
});
const suspendTool = makeSuspendingTool('suspend_tool', handler);
const { runtime } = createRuntimeWithTools([suspendTool], Infinity);
generateText.mockResolvedValueOnce(
makeGenerateWithToolCalls([
{ toolCallId: 'tc-1', toolName: 'suspend_tool', args: { value: 'a' } },
{ toolCallId: 'tc-2', toolName: 'suspend_tool', args: { value: 'b' } },
]),
);
const first = await runtime.generate('run tools');
const { runId } = first.pendingSuspend![0];
streamText.mockReturnValueOnce(makeStreamSuccess('Cancelled'));
const resumed = await runtime.resume('stream', createCancellation('Stop this action'), {
runId,
toolCallId: 'tc-1',
});
const chunks = await collectChunks(resumed.stream as ReadableStream<unknown>);
expect(handler).toHaveBeenCalledTimes(2);
expect(chunks).toEqual(
expect.arrayContaining([
expect.objectContaining({
type: 'tool-result',
toolCallId: 'tc-1',
toolName: 'suspend_tool',
output: '[Tool call cancelled. User said: "Stop this action"]',
canceled: true,
}),
expect.objectContaining({
type: 'tool-result',
toolCallId: 'tc-2',
toolName: 'suspend_tool',
output: '[Skipped: a sibling tool call was cancelled]',
canceled: true,
}),
]),
);
});
it('bounded concurrency (2) batches respects the limit', async () => { it('bounded concurrency (2) batches respects the limit', async () => {
const batchSizes: number[] = []; const batchSizes: number[] = [];
let activeConcurrency = 0; let activeConcurrency = 0;
@ -1435,6 +1623,80 @@ describe('AgentRuntime — concurrent tool execution', () => {
expect(finishChunks.length).toBe(1); expect(finishChunks.length).toBe(1);
expect((finishChunks[0] as StreamChunk & { type: 'finish' }).finishReason).toBe('tool-calls'); expect((finishChunks[0] as StreamChunk & { type: 'finish' }).finishReason).toBe('tool-calls');
}); });
it('bridges subagent lifecycle events from tool context into stream chunks', async () => {
const lifecycleTool: BuiltTool = {
name: 'delegate_subagent',
description: 'Delegate work',
inputSchema: z.object({ value: z.string().optional() }),
handler: async (_input, ctx) => {
const toolCtx = ctx as ToolContext;
const base = {
taskName: 'Research API',
taskPath: '/root/research_api',
...(toolCtx.runId !== undefined ? { parentRunId: toolCtx.runId } : {}),
...(toolCtx.toolCallId !== undefined ? { parentToolCallId: toolCtx.toolCallId } : {}),
};
toolCtx.emitEvent?.({
type: AgentEvent.SubAgentStarted,
...base,
startedAt: 100,
});
toolCtx.emitEvent?.({
type: AgentEvent.SubAgentCompleted,
...base,
status: 'completed',
startedAt: 100,
finishedAt: 200,
durationMs: 100,
runId: 'child-run-1',
finishReason: 'stop',
});
return await Promise.resolve({ ok: true });
},
};
const { runtime } = createRuntimeWithTools([lifecycleTool], 1);
streamText
.mockReturnValueOnce({
fullStream: makeChunkStream([{ type: 'text-delta', textDelta: 'thinking...' }]),
finishReason: Promise.resolve('tool-calls'),
usage: Promise.resolve({ inputTokens: 10, outputTokens: 5, totalTokens: 15 }),
response: Promise.resolve({
messages: [
{
role: 'assistant',
content: [
{
type: 'tool-call',
toolCallId: 'tc-1',
toolName: 'delegate_subagent',
args: { value: 'a' },
},
],
},
],
}),
toolCalls: Promise.resolve([
{ toolCallId: 'tc-1', toolName: 'delegate_subagent', input: { value: 'a' } },
]),
})
.mockReturnValueOnce(makeStreamSuccess('done'));
const { stream: readableStream } = await runtime.stream('run tools');
const chunks = await collectChunks(readableStream);
expect(chunks).toEqual(
expect.arrayContaining([
expect.objectContaining({ type: 'subagent-started', taskPath: '/root/research_api' }),
expect.objectContaining({
type: 'subagent-completed',
status: 'completed',
runId: 'child-run-1',
}),
]),
);
});
}); });
// Structured output — generate() // Structured output — generate()
@ -1518,6 +1780,80 @@ describe('AgentRuntime.generate() — structured output', () => {
}); });
}); });
// ---------------------------------------------------------------------------
// Provider-executed tool timing — stream()
// ---------------------------------------------------------------------------
describe('AgentRuntime.stream() — provider-executed tool timing', () => {
beforeEach(() => {
vi.clearAllMocks();
});
it('emits tool-execution-start/end for a provider-executed tool result', async () => {
streamText.mockReturnValue(
makeStreamWithProviderTool({
toolCallId: 'tc-ws',
toolName: 'web_search',
input: { query: 'n8n' },
output: [{ url: 'https://n8n.io' }],
}),
);
const { runtime } = createRuntime();
const { stream } = await runtime.stream('search please');
const chunks = await collectChunks(stream);
const start = chunks.find(
(c): c is Extract<StreamChunk, { type: 'tool-execution-start' }> =>
c.type === 'tool-execution-start' && c.toolCallId === 'tc-ws',
);
const end = chunks.find(
(c): c is Extract<StreamChunk, { type: 'tool-execution-end' }> =>
c.type === 'tool-execution-end' && c.toolCallId === 'tc-ws',
);
expect(start).toBeDefined();
expect(start?.toolName).toBe('web_search');
expect(typeof start?.startTime).toBe('number');
expect(end).toBeDefined();
expect(end?.isError).toBe(false);
expect(typeof end?.endTime).toBe('number');
});
it('emits tool-execution-end with isError on a provider-executed tool error', async () => {
streamText.mockReturnValue(
makeStreamWithProviderTool({
toolCallId: 'tc-ws-err',
toolName: 'web_search',
input: { query: 'n8n' },
error: new Error('search failed'),
}),
);
const { runtime } = createRuntime();
const { stream } = await runtime.stream('search please');
const chunks = await collectChunks(stream);
const end = chunks.find(
(c): c is Extract<StreamChunk, { type: 'tool-execution-end' }> =>
c.type === 'tool-execution-end' && c.toolCallId === 'tc-ws-err',
);
expect(end).toBeDefined();
expect(end?.isError).toBe(true);
expect(typeof end?.endTime).toBe('number');
const toolResult = chunks.find(
(c): c is Extract<StreamChunk, { type: 'tool-result' }> =>
c.type === 'tool-result' && c.toolCallId === 'tc-ws-err',
);
expect(toolResult).toBeDefined();
expect(toolResult?.isError).toBe(true);
expect(toolResult?.output).toEqual(new Error('search failed'));
});
});
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------
// Structured output — stream() // Structured output — stream()
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------
@ -2532,8 +2868,7 @@ describe('AgentRuntime — observation log jobs', () => {
it('schedules observation after a persisted stream turn', async () => { it('schedules observation after a persisted stream turn', async () => {
streamText.mockReturnValue(makeStreamSuccess('Remembered response')); streamText.mockReturnValue(makeStreamSuccess('Remembered response'));
const memory = new InMemoryMemory() as InMemoryMemory & const memory = new InMemoryMemory();
Required<Pick<BuiltMemory, 'saveEmbeddings' | 'queryEmbeddings'>>;
await memory.saveThread({ id: 'thread-1', resourceId: 'resource-1' }); await memory.saveThread({ id: 'thread-1', resourceId: 'resource-1' });
const runtime = new AgentRuntime({ const runtime = new AgentRuntime({
@ -2573,8 +2908,7 @@ describe('AgentRuntime — observation log jobs', () => {
it('schedules observation after a persisted generate turn', async () => { it('schedules observation after a persisted generate turn', async () => {
generateText.mockResolvedValue(makeGenerateSuccess('Remembered response')); generateText.mockResolvedValue(makeGenerateSuccess('Remembered response'));
const memory = new InMemoryMemory() as InMemoryMemory & const memory = new InMemoryMemory();
Required<Pick<BuiltMemory, 'saveEmbeddings' | 'queryEmbeddings'>>;
await memory.saveThread({ id: 'thread-1', resourceId: 'resource-1' }); await memory.saveThread({ id: 'thread-1', resourceId: 'resource-1' });
const runtime = new AgentRuntime({ const runtime = new AgentRuntime({
@ -2612,8 +2946,7 @@ describe('AgentRuntime — observation log jobs', () => {
generateText.mockResolvedValue(makeGenerateSuccess('Remembered response')); generateText.mockResolvedValue(makeGenerateSuccess('Remembered response'));
embed.mockResolvedValue({ embedding: [1, 0], usage: { tokens: 1 } }); embed.mockResolvedValue({ embedding: [1, 0], usage: { tokens: 1 } });
embedMany.mockResolvedValue({ embeddings: [[1, 0]], usage: { tokens: 1 } }); embedMany.mockResolvedValue({ embeddings: [[1, 0]], usage: { tokens: 1 } });
const memory = new InMemoryMemory() as InMemoryMemory & const memory = new InMemoryMemory();
Required<Pick<BuiltMemory, 'saveEmbeddings' | 'queryEmbeddings'>>;
const fakeEmbedder = { specificationVersion: 'v2' } as never; const fakeEmbedder = { specificationVersion: 'v2' } as never;
const observationLockSpy = vi.spyOn(memory, 'acquireObservationLogTaskLock'); const observationLockSpy = vi.spyOn(memory, 'acquireObservationLogTaskLock');
const episodicLockSpy = vi.spyOn(memory.episodic.taskLock!, 'acquire'); const episodicLockSpy = vi.spyOn(memory.episodic.taskLock!, 'acquire');
@ -2779,55 +3112,6 @@ describe('AgentRuntime — observation log jobs', () => {
expect(embed).not.toHaveBeenCalled(); expect(embed).not.toHaveBeenCalled();
}); });
it('counts semantic recall query and saved message embedding tokens', async () => {
generateText.mockResolvedValue(makeGenerateSuccess('Remembered response'));
embed.mockResolvedValue({ embedding: [1, 0], usage: { tokens: 5 } });
embedMany.mockResolvedValue({
embeddings: [
[1, 0],
[0, 1],
],
usage: { tokens: 13 },
});
const counter = makeExecutionCounter();
const memory = new InMemoryMemory() as InMemoryMemory &
Required<Pick<BuiltMemory, 'saveEmbeddings' | 'queryEmbeddings'>>;
await memory.saveThread({ id: 'thread-1', resourceId: 'resource-1' });
await memory.saveMessages({
threadId: 'thread-1',
resourceId: 'resource-1',
messages: [
{
id: 'old-1',
createdAt: new Date('2026-05-12T10:00:00.000Z'),
role: 'user',
content: [{ type: 'text', text: 'Earlier Postgres decision.' }],
},
],
});
memory.queryEmbeddings = async () => await Promise.resolve([{ id: 'old-1', score: 1 }]);
memory.saveEmbeddings = async () => await Promise.resolve();
const runtime = new AgentRuntime({
name: 'semantic-agent',
model: 'openai/gpt-4o-mini',
instructions: 'You are a test assistant.',
memory,
semanticRecall: {
embedder: 'openai/text-embedding-3-small',
topK: 1,
},
});
await runtime.generate('What did we decide?', {
persistence: { threadId: 'thread-1', resourceId: 'resource-1' },
executionCounter: counter,
});
expect(counter.incrementTokenCount).toHaveBeenCalledWith(5);
expect(counter.incrementTokenCount).toHaveBeenCalledWith(13);
});
it('counts recall_memory query embedding tokens', async () => { it('counts recall_memory query embedding tokens', async () => {
generateText generateText
.mockResolvedValueOnce( .mockResolvedValueOnce(
@ -3351,6 +3635,66 @@ describe('AgentRuntime — telemetry propagation', () => {
expect(expTelemetry.tracer).toBe(baseTelemetry.tracer); expect(expTelemetry.tracer).toBe(baseTelemetry.tracer);
}); });
it('enables smoothStream by default on streamText', async () => {
streamText.mockReturnValue(makeStreamSuccess());
const smoothStreamSpy = vi.spyOn(aiModule, 'smoothStream');
const runtime = new AgentRuntime({
name: 'smooth-stream-default-test',
model: 'openai/gpt-4o-mini',
instructions: 'test',
eventBus: new AgentEventBus(),
});
const { stream } = await runtime.stream('hello');
await collectChunks(stream);
const callArgs = streamText.mock.calls[0][0] as Record<string, unknown>;
expect(callArgs.experimental_transform).toEqual(expect.any(Function));
expect(smoothStreamSpy).toHaveBeenCalledWith({});
smoothStreamSpy.mockRestore();
});
it('omits smoothStream when explicitly disabled', async () => {
streamText.mockReturnValue(makeStreamSuccess());
const runtime = new AgentRuntime({
name: 'smooth-stream-disabled-test',
model: 'openai/gpt-4o-mini',
instructions: 'test',
eventBus: new AgentEventBus(),
});
const { stream } = await runtime.stream('hello', { smoothStream: false });
await collectChunks(stream);
const callArgs = streamText.mock.calls[0][0] as Record<string, unknown>;
expect(callArgs.experimental_transform).toBeUndefined();
});
it('forwards non-default smoothStream options to the AI SDK', async () => {
streamText.mockReturnValue(makeStreamSuccess());
const smoothStreamSpy = vi.spyOn(aiModule, 'smoothStream');
const runtime = new AgentRuntime({
name: 'smooth-stream-options-test',
model: 'openai/gpt-4o-mini',
instructions: 'test',
eventBus: new AgentEventBus(),
});
const smoothStreamOptions = { delayInMs: 25, chunking: 'line' as const };
const { stream } = await runtime.stream('hello', { smoothStream: smoothStreamOptions });
await collectChunks(stream);
expect(smoothStreamSpy).toHaveBeenCalledWith(smoothStreamOptions);
smoothStreamSpy.mockRestore();
});
it('inherits telemetry from ExecutionOptions when no own telemetry is set', async () => { it('inherits telemetry from ExecutionOptions when no own telemetry is set', async () => {
generateText.mockResolvedValue(makeGenerateSuccess()); generateText.mockResolvedValue(makeGenerateSuccess());
@ -3539,3 +3883,229 @@ describe('AgentRuntime — telemetry propagation', () => {
expect(callArgs.experimental_telemetry).toBeUndefined(); expect(callArgs.experimental_telemetry).toBeUndefined();
}); });
}); });
// Cancellation (Feature 1: cancel suspended tool via user message)
// ---------------------------------------------------------------------------
describe('AgentRuntime.resume() with createCancellation() — auto-bypass', () => {
beforeEach(() => {
vi.clearAllMocks();
});
/** A tool that suspends on first call and returns on resume. */
function makeSuspendToolForCancel(): BuiltTool {
return {
name: 'interactive_tool',
description: 'A tool that suspends',
inputSchema: z.object({ prompt: z.string() }),
suspendSchema: z.object({ prompt: z.string() }),
resumeSchema: z.object({ answer: z.string() }),
handler: async (_input: unknown, ctx: unknown) => {
const { suspend, resumeData } = ctx as InterruptibleToolContext;
if (!resumeData) {
return await suspend({ prompt: 'What should I do?' });
}
return { result: (resumeData as { answer: string }).answer };
},
};
}
it('auto-bypass: does NOT call the tool handler on cancellation', async () => {
const handlerSpy = vi.fn().mockImplementation(async (_input: unknown, ctx: unknown) => {
const { suspend, resumeData } = ctx as InterruptibleToolContext;
if (!resumeData) {
return await suspend({ prompt: 'What should I do?' });
}
return { result: (resumeData as { answer: string }).answer };
});
const tool: BuiltTool = {
name: 'interactive_tool',
description: 'A tool that suspends',
inputSchema: z.object({ prompt: z.string() }),
suspendSchema: z.object({ prompt: z.string() }),
resumeSchema: z.object({ answer: z.string() }),
handler: handlerSpy,
};
const { runtime } = createRuntimeWithTools([tool], 1);
generateText
.mockResolvedValueOnce(
makeGenerateWithToolCalls([
{ toolCallId: 'tc-1', toolName: 'interactive_tool', args: { prompt: 'continue?' } },
]),
)
.mockResolvedValueOnce(makeGenerateSuccess('Done after cancel'));
const first = await runtime.generate('start', {});
const { runId, toolCallId } = first.pendingSuspend![0];
// Reset call count to check the handler is NOT called on resume
handlerSpy.mockClear();
const resumed = await runtime.resume(
'generate',
createCancellation('do something else instead'),
{ runId, toolCallId },
);
// Handler should NOT have been called for the resume
expect(handlerSpy).not.toHaveBeenCalled();
// The generation should have continued after cancellation
expect(resumed.finishReason).toBe('stop');
});
it('auto-bypass: injects the steering message and the LLM sees it', async () => {
const tool = makeSuspendToolForCancel();
const { runtime } = createRuntimeWithTools([tool], 1);
generateText
.mockResolvedValueOnce(
makeGenerateWithToolCalls([
{ toolCallId: 'tc-1', toolName: 'interactive_tool', args: { prompt: 'continue?' } },
]),
)
.mockResolvedValueOnce(makeGenerateSuccess('Understood, doing something else'));
const first = await runtime.generate('start');
const { runId, toolCallId } = first.pendingSuspend![0];
await runtime.resume('generate', createCancellation('pivot to plan B'), { runId, toolCallId });
// The second generateText call should include the user steering message
const secondCallMessages = (
generateText.mock.calls[1][0] as { messages: Array<{ role: string; content: unknown }> }
).messages;
const userMessages = secondCallMessages.filter((m) => m.role === 'user');
const steeringMsg = userMessages.find((m) =>
JSON.stringify(m.content).includes('pivot to plan B'),
);
expect(steeringMsg).toBeDefined();
});
it('stream: emits a cancellation tool result then continues', async () => {
const tool = makeSuspendToolForCancel();
const { runtime } = createRuntimeWithTools([tool], 1);
streamText
.mockReturnValueOnce({
fullStream: makeChunkStream([{ type: 'text-delta', textDelta: 'thinking...' }]),
finishReason: Promise.resolve('tool-calls'),
usage: Promise.resolve({ inputTokens: 10, outputTokens: 5, totalTokens: 15 }),
response: Promise.resolve({
messages: [
{
role: 'assistant',
content: [
{
type: 'tool-call',
toolCallId: 'tc-1',
toolName: 'interactive_tool',
args: { prompt: 'continue?' },
},
],
},
],
}),
toolCalls: Promise.resolve([
{ toolCallId: 'tc-1', toolName: 'interactive_tool', input: { prompt: 'continue?' } },
]),
})
.mockReturnValueOnce(makeStreamSuccess('Redirected'));
const firstResult = await runtime.stream('start');
const firstChunks = await collectChunks(firstResult.stream);
const suspendChunk = firstChunks.find((c) => c.type === 'tool-call-suspended');
expect(suspendChunk).toBeDefined();
const { runId, toolCallId } = suspendChunk as Extract<
StreamChunk,
{ type: 'tool-call-suspended' }
>;
const resumed = await runtime.resume('stream', createCancellation('go another direction'), {
runId,
toolCallId,
});
const resumedChunks = await collectChunks(resumed.stream);
const cancellationResult = resumedChunks.find(
(c) => c.type === 'tool-result' && c.toolCallId === 'tc-1',
);
expect(cancellationResult).toEqual(
expect.objectContaining({
type: 'tool-result',
toolCallId: 'tc-1',
toolName: 'interactive_tool',
output: '[Tool call cancelled. User said: "go another direction"]',
canceled: true,
}),
);
// Generation should continue after the cancellation result
const textChunks = resumedChunks.filter((c) => c.type === 'text-delta');
expect(textChunks.length).toBeGreaterThan(0);
});
it('rejects with an error if the checkpoint is not found', async () => {
const { runtime } = createRuntimeWithTools([makeSuspendToolForCancel()], 1);
await expect(
runtime.resume('generate', createCancellation('no checkpoint'), {
runId: 'nonexistent',
toolCallId: 'tc-1',
}),
).rejects.toThrow('No suspended run found for runId: nonexistent');
});
});
describe('AgentRuntime.resume() with createCancellation() — manual handling (handleCancellation)', () => {
beforeEach(() => {
vi.clearAllMocks();
});
it('calls the tool handler with ctx.cancellation set', async () => {
const handlerSpy = vi.fn().mockImplementation(async (_input: unknown, ctx: unknown) => {
const { suspend, resumeData, cancellation } = ctx as InterruptibleToolContext;
if (cancellation) {
// Manual cleanup path — return a note for the LLM
return `Cancelled: ${cancellation.message}`;
}
if (!resumeData) {
return await suspend({ prompt: 'Confirm?' });
}
return 'done';
});
const tool: BuiltTool = {
name: 'manual_cancel_tool',
description: 'A tool with manual cancellation',
inputSchema: z.object({ value: z.string() }),
suspendSchema: z.object({ prompt: z.string() }),
resumeSchema: z.object({ confirmed: z.boolean() }),
handleCancellation: true,
handler: handlerSpy,
};
const { runtime } = createRuntimeWithTools([tool], 1);
generateText
.mockResolvedValueOnce(
makeGenerateWithToolCalls([
{ toolCallId: 'tc-1', toolName: 'manual_cancel_tool', args: { value: 'x' } },
]),
)
.mockResolvedValueOnce(makeGenerateSuccess('Done after manual cancel'));
const first = await runtime.generate('test');
const { runId, toolCallId } = first.pendingSuspend![0];
handlerSpy.mockClear();
await runtime.resume('generate', createCancellation('user said stop'), { runId, toolCallId });
// Handler SHOULD have been called for the resume
expect(handlerSpy).toHaveBeenCalledTimes(1);
const callCtx = handlerSpy.mock.calls[0][1] as InterruptibleToolContext;
expect(callCtx.cancellation).toEqual({ message: 'user said stop' });
expect(callCtx.resumeData).toBeUndefined();
});
});

View File

@ -0,0 +1,432 @@
import { vi } from 'vitest';
import { AgentEvent, type AgentEventData } from '../../types/runtime/event';
import type { GenerateResult } from '../../types/sdk/agent';
import {
DELEGATE_SUB_AGENT_TOOL_NAME,
INLINE_SUB_AGENT_ID,
createDelegateSubAgentTool,
generateResultToDelegateSubAgentOutput,
renderDelegateSubAgentPrompt,
type DelegateSubAgentRunner,
} from '../delegate-sub-agent-tool';
const input = {
subAgentId: INLINE_SUB_AGENT_ID,
taskName: 'Research API',
goal: 'Find the API behavior.',
context: 'Focus on auth endpoints.',
expectedOutput: 'A short summary.',
};
describe('createDelegateSubAgentTool', () => {
it('creates the delegate_subagent tool', () => {
const tool = createDelegateSubAgentTool({
runSubAgent: async () =>
await Promise.resolve({
status: 'completed',
taskPath: '/root/research_api',
runId: 'child-run-1',
answer: 'done',
getState: () => {
throw new Error('not implemented');
},
}),
});
expect(tool.name).toBe(DELEGATE_SUB_AGENT_TOOL_NAME);
expect(tool.description).toContain('focused child agent');
expect(tool.description).toContain('independent workstreams');
expect(tool.inputSchema).toBeDefined();
expect(tool.outputSchema).toBeDefined();
});
it('can be created without a host runner for SDK inline execution', async () => {
const tool = createDelegateSubAgentTool();
await expect(tool.handler?.(input, { runId: 'parent-run-1' })).resolves.toMatchObject({
status: 'failed',
answer: '',
error:
'delegate_subagent was registered without a runSubAgent callback, and no host runner was provided. Register it on an Agent (for inline delegation) or pass runSubAgent.',
});
});
it('passes model input and parent runtime context to the runner callback', async () => {
const runSubAgent = vi.fn<DelegateSubAgentRunner>().mockResolvedValue({
status: 'completed',
taskPath: '/root/research_api',
runId: 'child-run-1',
answer: 'done',
});
const tool = createDelegateSubAgentTool({
policy: { maxChildren: 2 },
runSubAgent,
});
await tool.handler?.(input, {
runId: 'parent-run-1',
toolCallId: 'tool-call-1',
});
expect(runSubAgent).toHaveBeenCalledWith(
{
...input,
taskPath: '/root/research_api_0',
parentRunId: 'parent-run-1',
parentToolCallId: 'tool-call-1',
childCount: 0,
policy: { maxChildren: 2 },
},
expect.objectContaining({
runInlineSubAgent: expect.any(Function),
}),
);
});
it('passes runInlineSubAgent helpers to the host runner callback', async () => {
const runSubAgent = vi.fn<DelegateSubAgentRunner>(async (_request, helpers) => {
expect(helpers.runInlineSubAgent).toEqual(expect.any(Function));
await Promise.resolve();
return {
status: 'completed',
taskPath: '/root/research_api_0',
answer: 'routed',
};
});
const tool = createDelegateSubAgentTool({ runSubAgent });
await tool.handler?.(input, { runId: 'parent-run-1' });
expect(runSubAgent).toHaveBeenCalledOnce();
});
it('forwards the parent persistence thread id and resource id', async () => {
const runSubAgent = vi
.fn<DelegateSubAgentRunner>()
.mockResolvedValue({ status: 'completed', taskPath: '/root/research_api', answer: 'done' });
const tool = createDelegateSubAgentTool({ runSubAgent });
await tool.handler?.(input, {
runId: 'parent-run-1',
persistence: { threadId: 'parent-thread-1', resourceId: 'resource-1' },
});
expect(runSubAgent).toHaveBeenCalledWith(
expect.objectContaining({
parentThreadId: 'parent-thread-1',
parentResourceId: 'resource-1',
}),
expect.objectContaining({
runInlineSubAgent: expect.any(Function),
}),
);
});
it('omits parent persistence fields when the parent run has no persistence scope', async () => {
const runSubAgent = vi
.fn<DelegateSubAgentRunner>()
.mockResolvedValue({ status: 'completed', taskPath: '/root/research_api', answer: 'done' });
const tool = createDelegateSubAgentTool({ runSubAgent });
await tool.handler?.(input, { runId: 'parent-run-1' });
expect(runSubAgent.mock.calls[0]?.[0]).not.toHaveProperty('parentThreadId');
expect(runSubAgent.mock.calls[0]?.[0]).not.toHaveProperty('parentResourceId');
expect(runSubAgent.mock.calls[0]?.[0]).not.toHaveProperty('parentAbortSignal');
});
it('forwards the parent run abort signal to the runner callback', async () => {
const runSubAgent = vi
.fn<DelegateSubAgentRunner>()
.mockResolvedValue({ status: 'completed', taskPath: '/root/research_api', answer: 'done' });
const tool = createDelegateSubAgentTool({ runSubAgent });
const controller = new AbortController();
await tool.handler?.(input, { runId: 'parent-run-1', abortSignal: controller.signal });
expect(runSubAgent).toHaveBeenCalledWith(
expect.objectContaining({ parentAbortSignal: controller.signal }),
expect.objectContaining({
runInlineSubAgent: expect.any(Function),
}),
);
});
it('emits lifecycle events around runner callback execution', async () => {
const events: AgentEventData[] = [];
const tool = createDelegateSubAgentTool({
runSubAgent: async () =>
await Promise.resolve({
status: 'completed',
taskPath: '/root/research_api',
runId: 'child-run-1',
threadId: 'child-thread-1',
answer: 'done',
usage: {
promptTokens: 3,
completionTokens: 2,
totalTokens: 5,
},
finishReason: 'stop',
}),
});
await tool.handler?.(input, {
runId: 'parent-run-1',
toolCallId: 'tool-call-1',
emitEvent: (event) => events.push(event),
});
expect(events.map((event) => event.type)).toEqual([
AgentEvent.SubAgentStarted,
AgentEvent.SubAgentCompleted,
]);
expect(events[0]).toMatchObject({
taskName: 'Research API',
taskPath: '/root/research_api_0',
parentRunId: 'parent-run-1',
parentToolCallId: 'tool-call-1',
});
expect(events[1]).toMatchObject({
status: 'completed',
runId: 'child-run-1',
threadId: 'child-thread-1',
usage: { totalTokens: 5 },
finishReason: 'stop',
});
});
it('tracks child count per parent run id', async () => {
const runSubAgent = vi.fn<DelegateSubAgentRunner>().mockResolvedValue({
status: 'completed',
taskPath: '/root/research_api',
runId: 'child-run-1',
answer: 'done',
});
const tool = createDelegateSubAgentTool({
policy: { maxChildren: 1 },
runSubAgent,
});
await expect(tool.handler?.(input, { runId: 'parent-run-1' })).resolves.toMatchObject({
status: 'completed',
});
await expect(tool.handler?.(input, { runId: 'parent-run-1' })).resolves.toMatchObject({
status: 'failed',
error: 'Sub-agent child count 2 exceeds maxChildren 1',
});
await expect(tool.handler?.(input, { runId: 'parent-run-2' })).resolves.toMatchObject({
status: 'completed',
});
expect(runSubAgent).toHaveBeenCalledTimes(2);
});
it('returns a failed output when the runner callback throws', async () => {
const events: AgentEventData[] = [];
const tool = createDelegateSubAgentTool({
runSubAgent: async () => await Promise.reject(new Error('Runner failed')),
});
await expect(
tool.handler?.(input, {
runId: 'parent-run-1',
emitEvent: (event) => events.push(event),
}),
).resolves.toMatchObject({
status: 'failed',
taskPath: '/root/research_api_0',
answer: '',
error: 'Runner failed',
});
expect(events[events.length - 1]).toMatchObject({
type: AgentEvent.SubAgentCompleted,
status: 'failed',
error: 'Runner failed',
});
});
it('returns a failed output for invalid task names', async () => {
const runSubAgent = vi.fn();
const tool = createDelegateSubAgentTool({ runSubAgent });
await expect(
tool.handler?.({ ...input, taskName: '!!!' }, { runId: 'parent-run-1' }),
).resolves.toMatchObject({
status: 'failed',
answer: '',
error: 'Sub-agent task name must contain at least one alphanumeric character',
});
expect(runSubAgent).not.toHaveBeenCalled();
});
});
describe('renderDelegateSubAgentPrompt', () => {
it('includes the goal and omits unset sections', () => {
const prompt = renderDelegateSubAgentPrompt({ goal: 'Find it.' });
expect(prompt).toContain('YOUR TASK:\nFind it.');
expect(prompt).not.toContain('CONTEXT:');
expect(prompt).not.toContain('EXPECTED OUTPUT:');
});
it('includes context and expected output when provided', () => {
const prompt = renderDelegateSubAgentPrompt({
goal: 'Find it.',
context: 'auth endpoints',
expectedOutput: 'a summary',
});
expect(prompt).toContain('YOUR TASK:\nFind it.');
expect(prompt).toContain('CONTEXT:\nauth endpoints');
expect(prompt).toContain('EXPECTED OUTPUT:\na summary');
});
it('uses generic summary guidance for delegated work', () => {
const prompt = renderDelegateSubAgentPrompt({ goal: 'Find it.' });
expect(prompt).toContain('- What you did');
expect(prompt).toContain('- What you found or accomplished');
expect(prompt).toContain('- Important outputs, decisions, or evidence');
expect(prompt).toContain('- Any issues, assumptions, or limitations');
expect(prompt).toContain(
'If the information above is insufficient, do your best with explicitly stated assumptions and note what was missing, rather than stopping to ask.',
);
expect(prompt).toContain(
'Be thorough but concise -- your response is returned to the parent agent as a summary.',
);
});
});
describe('generateResultToDelegateSubAgentOutput', () => {
it('maps a successful GenerateResult to the tool output', () => {
const result: GenerateResult = {
runId: 'child-run-1',
messages: [
{
role: 'assistant',
type: 'llm',
content: [
{ type: 'text', text: 'preamble' },
{ type: 'text', text: 'answer' },
],
},
],
finishReason: 'stop',
usage: { promptTokens: 3, completionTokens: 2, totalTokens: 5 },
getState: () => ({
status: 'success',
messageList: { messages: [], historyIds: [], inputIds: [], responseIds: [] },
pendingToolCalls: {},
}),
};
expect(
generateResultToDelegateSubAgentOutput('/root/research_api_0', result, 'child-thread-1'),
).toEqual({
status: 'completed',
taskPath: '/root/research_api_0',
runId: 'child-run-1',
threadId: 'child-thread-1',
answer: 'preamble\nanswer',
usage: { promptTokens: 3, completionTokens: 2, totalTokens: 5 },
finishReason: 'stop',
});
});
it('marks an errored result as failed', () => {
const result: GenerateResult = {
runId: 'child-run-2',
messages: [],
finishReason: 'error',
error: new Error('boom'),
getState: () => ({
status: 'failed',
messageList: { messages: [], historyIds: [], inputIds: [], responseIds: [] },
pendingToolCalls: {},
}),
};
expect(generateResultToDelegateSubAgentOutput('/root/x_0', result)).toMatchObject({
status: 'failed',
answer: '',
error: 'boom',
});
});
it('returns a failed delegate output for delegated child suspension stopgap', async () => {
const { failedDelegatedChildSuspendOutput } = await import('../delegate-sub-agent-tool');
expect(failedDelegatedChildSuspendOutput('/root/x_0')).toEqual({
status: 'failed',
taskPath: '/root/x_0',
answer: '',
error: 'agents.chat.delegate.childSuspendUnsupported',
});
});
it('maps a suspended child result to suspended with pendingSuspend metadata', () => {
const result: GenerateResult = {
runId: 'child-run-3',
messages: [
{
role: 'assistant',
type: 'llm',
content: [{ type: 'text', text: 'awaiting approval' }],
},
],
finishReason: 'tool-calls',
pendingSuspend: [
{
runId: 'child-run-3',
toolCallId: 'tool-call-1',
toolName: 'delete_file',
input: { path: '/tmp/foo.txt' },
suspendPayload: { message: 'Delete file?' },
},
],
getState: () => {
throw new Error('getState is not implemented');
},
};
expect(generateResultToDelegateSubAgentOutput('/root/x_0', result)).toEqual({
status: 'suspended',
taskPath: '/root/x_0',
runId: 'child-run-3',
answer: 'awaiting approval',
finishReason: 'tool-calls',
pendingSuspend: result.pendingSuspend,
});
});
it('prefers failed over suspended when the child result also has pendingSuspend', () => {
const result: GenerateResult = {
runId: 'child-run-4',
messages: [],
finishReason: 'error',
error: new Error('child failed'),
pendingSuspend: [
{
runId: 'child-run-4',
toolCallId: 'tool-call-1',
toolName: 'delete_file',
input: {},
suspendPayload: {},
},
],
getState: () => {
throw new Error('getState is not implemented');
},
};
expect(generateResultToDelegateSubAgentOutput('/root/x_0', result)).toMatchObject({
status: 'failed',
error: 'child failed',
});
expect(
generateResultToDelegateSubAgentOutput('/root/x_0', result).pendingSuspend,
).toBeUndefined();
});
});

View File

@ -142,6 +142,18 @@ vi.mock('@openrouter/ai-sdk-provider', () => ({
}), }),
})); }));
vi.mock('@ai-sdk/openai-compatible', () => ({
createOpenAICompatible: (opts: ProviderOpts & { name: string }) => (model: string) => ({
provider: opts.name,
modelId: model,
apiKey: opts.apiKey,
baseURL: opts.baseURL,
headers: opts.headers,
fetch: opts.fetch,
specificationVersion: 'v3',
}),
}));
vi.mock('@ai-sdk/amazon-bedrock', () => ({ vi.mock('@ai-sdk/amazon-bedrock', () => ({
createAmazonBedrock: createAmazonBedrock:
(opts?: { (opts?: {
@ -288,6 +300,18 @@ describe('createModel', () => {
expect(model.modelId).toBe('openai/gpt-4o'); expect(model.modelId).toBe('openai/gpt-4o');
expect(model.apiKey).toBe('or-test'); expect(model.apiKey).toBe('or-test');
}); });
it('should create model for nvidia', () => {
const model = createModel({
id: 'nvidia/nvidia/llama-3.3-nemotron-super-49b-v1',
apiKey: 'nv-test',
baseURL: 'https://integrate.api.nvidia.com/v1',
}) as unknown as Record<string, unknown>;
expect(model.provider).toBe('nvidia');
expect(model.modelId).toBe('nvidia/llama-3.3-nemotron-super-49b-v1');
expect(model.apiKey).toBe('nv-test');
expect(model.baseURL).toBe('https://integrate.api.nvidia.com/v1');
});
}); });
describe('azure-openai', () => { describe('azure-openai', () => {

View File

@ -3,6 +3,7 @@ import type { TextStreamPart, ToolSet } from 'ai';
import { convertChunk } from '../stream'; import { convertChunk } from '../stream';
type ToolCallChunk = Extract<TextStreamPart<ToolSet>, { type: 'tool-call' }>; type ToolCallChunk = Extract<TextStreamPart<ToolSet>, { type: 'tool-call' }>;
type ToolErrorChunk = Extract<TextStreamPart<ToolSet>, { type: 'tool-error' }>;
type ToolResultChunk = Extract<TextStreamPart<ToolSet>, { type: 'tool-result' }>; type ToolResultChunk = Extract<TextStreamPart<ToolSet>, { type: 'tool-result' }>;
describe('convertChunk — tool-call invalid/error handling', () => { describe('convertChunk — tool-call invalid/error handling', () => {
@ -138,3 +139,25 @@ describe('convertChunk — tool-result output passthrough', () => {
}); });
}); });
}); });
describe('convertChunk — tool-error handling', () => {
it('maps provider-executed tool-error to tool-result with isError', () => {
const error = new Error('search failed');
const chunk = {
type: 'tool-error',
toolCallId: 'tc-err',
toolName: 'web_search',
input: { query: 'n8n' },
error,
providerExecuted: true,
} as unknown as ToolErrorChunk;
expect(convertChunk(chunk)).toEqual({
type: 'tool-result',
toolCallId: 'tc-err',
toolName: 'web_search',
output: error,
isError: true,
});
});
});

View File

@ -0,0 +1,64 @@
import {
ROOT_SUB_AGENT_TASK_PATH,
assertSubAgentPolicyAllowsChildCount,
assertSubAgentTaskPath,
createChildSubAgentTaskPath,
isSubAgentTaskPath,
sanitizeSubAgentTaskName,
} from '../sub-agent-task-path';
describe('sub-agent task paths', () => {
it('sanitizes display task names into path segments', () => {
expect(sanitizeSubAgentTaskName('Research API')).toBe('research_api');
expect(sanitizeSubAgentTaskName('Check tests!!!')).toBe('check_tests');
expect(sanitizeSubAgentTaskName('__Already---Messy__')).toBe('already_messy');
});
it('rejects task names without alphanumeric content', () => {
expect(() => sanitizeSubAgentTaskName(' ')).toThrow('task name');
expect(() => sanitizeSubAgentTaskName('!!!')).toThrow('task name');
});
it('recognizes root and first-level child task paths', () => {
expect(isSubAgentTaskPath(ROOT_SUB_AGENT_TASK_PATH)).toBe(true);
expect(isSubAgentTaskPath('/root/research')).toBe(true);
expect(isSubAgentTaskPath('/root/research_api_0')).toBe(true);
});
it('rejects malformed task paths', () => {
for (const path of [
'',
'root',
'/',
'/root/',
'/root//child',
'/root/../child',
'/Root/child',
'/root/child with spaces',
'/root/research/check_tests',
]) {
expect(isSubAgentTaskPath(path)).toBe(false);
expect(() => assertSubAgentTaskPath(path)).toThrow('Invalid sub-agent task path');
}
});
it('creates child paths with the parent child index appended', () => {
expect(createChildSubAgentTaskPath('Research API', 0)).toBe('/root/research_api_0');
expect(createChildSubAgentTaskPath('Check tests', 1)).toBe('/root/check_tests_1');
});
it('disambiguates same-named siblings by child index', () => {
const first = createChildSubAgentTaskPath('research', 0);
const second = createChildSubAgentTaskPath('research', 1);
expect(first).toBe('/root/research_0');
expect(second).toBe('/root/research_1');
expect(first).not.toBe(second);
});
it('enforces max child count policy', () => {
expect(() => assertSubAgentPolicyAllowsChildCount(1, { maxChildren: 2 })).not.toThrow();
expect(() => assertSubAgentPolicyAllowsChildCount(2, { maxChildren: 2 })).toThrow(
'exceeds maxChildren',
);
});
});

View File

@ -2,7 +2,7 @@ import type { JSONSchema7 } from 'json-schema';
import { z } from 'zod'; import { z } from 'zod';
import type { BuiltTool } from '../../types'; import type { BuiltTool } from '../../types';
import { toAiSdkTools } from '../tool-adapter'; import { executeTool, toAiSdkTools } from '../tool-adapter';
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------
// Module mocks // Module mocks
@ -191,3 +191,34 @@ describe('toAiSdkTools — description forwarding', () => {
expect((result['myTool'] as { description: string }).description).toBe('Does something useful'); expect((result['myTool'] as { description: string }).description).toBe('Does something useful');
}); });
}); });
// ---------------------------------------------------------------------------
// executeTool — context propagation
// ---------------------------------------------------------------------------
describe('executeTool — context propagation', () => {
it('passes the run abort signal to the tool handler', async () => {
const handler = vi.fn().mockResolvedValue('ok');
const tool: BuiltTool = { name: 'cancellable', description: 'd', handler };
const { signal } = new AbortController();
await executeTool({}, tool, undefined, undefined, 'call-1', { abortSignal: signal });
expect(handler).toHaveBeenCalledWith({}, expect.objectContaining({ abortSignal: signal }));
});
it('passes the run abort signal to interruptible tool handlers', async () => {
const handler = vi.fn().mockResolvedValue('ok');
const tool: BuiltTool = {
name: 'interruptible',
description: 'd',
handler,
suspendSchema: z.object({}),
};
const { signal } = new AbortController();
await executeTool({}, tool, undefined, undefined, 'call-1', { abortSignal: signal });
expect(handler).toHaveBeenCalledWith({}, expect.objectContaining({ abortSignal: signal }));
});
});

View File

@ -0,0 +1,70 @@
import { describe, expect, it } from 'vitest';
import { isZodSchema } from '../../utils/zod';
import { WRITE_TODOS_TOOL_NAME, createWriteTodosTool } from '../write-todos-tool';
const sampleTodos = [
{
id: 'research',
content: 'Research API authentication options',
status: 'in_progress' as const,
delegateHint: {
subAgentId: 'inline',
expectedOutput: 'Short comparison of auth methods',
},
},
{
id: 'synthesize',
content: 'Synthesize findings into a recommendation',
status: 'pending' as const,
},
];
describe('createWriteTodosTool', () => {
it('creates the write_todos tool with planner guidance', () => {
const tool = createWriteTodosTool();
expect(tool.name).toBe(WRITE_TODOS_TOOL_NAME);
expect(tool.description).toContain('structured task list');
expect(tool.description).toContain('delegate_subagent');
expect(tool.inputSchema).toBeDefined();
expect(tool.outputSchema).toBeDefined();
});
it('returns the provided todo list with a count', async () => {
const tool = createWriteTodosTool();
await expect(
tool.handler?.(
{ todos: sampleTodos },
{ runId: 'parent-run-1', persistence: { threadId: 'thread-1', resourceId: 'res-1' } },
),
).resolves.toEqual({
status: 'ok',
todoCount: 2,
todos: sampleTodos,
});
});
it('rejects duplicate todo ids in a single update', () => {
const tool = createWriteTodosTool();
expect(tool.inputSchema).toBeDefined();
expect(isZodSchema(tool.inputSchema)).toBe(true);
if (!isZodSchema(tool.inputSchema)) {
throw new Error('Expected Zod input schema');
}
const result = tool.inputSchema.safeParse({
todos: [
{ id: 'dup', content: 'First', status: 'pending' },
{ id: 'dup', content: 'Second', status: 'pending' },
],
});
expect(result.success).toBe(false);
if (result.success) return;
expect(
result.error.issues.some((issue) => issue.message.includes('Duplicate todo id "dup"')),
).toBe(true);
});
});

View File

@ -1,5 +1,5 @@
import type { ProviderOptions } from '@ai-sdk/provider-utils'; import type { ProviderOptions } from '@ai-sdk/provider-utils';
import type { TelemetrySettings, ToolCallRepairFunction, ToolSet } from 'ai'; import type { StreamTextTransform, TelemetrySettings, ToolCallRepairFunction, ToolSet } from 'ai';
import type { z } from 'zod'; import type { z } from 'zod';
import { zodToJsonSchema, type JsonSchema7Type } from 'zod-to-json-schema'; import { zodToJsonSchema, type JsonSchema7Type } from 'zod-to-json-schema';
@ -26,11 +26,9 @@ import type {
OpenAIThinkingConfig, OpenAIThinkingConfig,
PendingToolCall, PendingToolCall,
RunOptions, RunOptions,
SemanticRecallConfig,
SerializableAgentState, SerializableAgentState,
StreamChunk, StreamChunk,
StreamResult, StreamResult,
SubAgentUsage,
ThinkingConfig, ThinkingConfig,
TitleGenerationConfig, TitleGenerationConfig,
TokenUsage, TokenUsage,
@ -55,7 +53,7 @@ import { createFilteredLogger } from './logger';
import { saveMessagesToThread } from './memory-store'; import { saveMessagesToThread } from './memory-store';
import { AgentMessageList, type SerializedMessageList } from './message-list'; import { AgentMessageList, type SerializedMessageList } from './message-list';
import { fromAiFinishReason, fromAiMessages } from './messages'; import { fromAiFinishReason, fromAiMessages } from './messages';
import { createEmbeddingModel, createModel } from './model-factory'; import { createModel } from './model-factory';
import { import {
runObservationLogObserver, runObservationLogObserver,
type ObservationLogObserverMemory, type ObservationLogObserverMemory,
@ -66,7 +64,6 @@ import { hasObservationLogStore, hasObservationLogTaskLockStore } from './observ
import { generateRunId, RunStateManager } from './run-state'; import { generateRunId, RunStateManager } from './run-state';
import { import {
accumulateUsage, accumulateUsage,
applySubAgentUsage,
extractSettledToolCalls, extractSettledToolCalls,
makeErrorStream, makeErrorStream,
normalizeInput, normalizeInput,
@ -78,11 +75,11 @@ import { generateThreadTitle } from './title-generation';
import { import {
buildToolMap, buildToolMap,
executeTool, executeTool,
isAgentToolResult,
isSuspendedToolResult, isSuspendedToolResult,
toAiSdkProviderTools, toAiSdkProviderTools,
toAiSdkTools, toAiSdkTools,
} from './tool-adapter'; } from './tool-adapter';
import { isCancellation } from '../sdk/cancellation';
import { Telemetry } from '../sdk/telemetry'; import { Telemetry } from '../sdk/telemetry';
import { AgentEvent } from '../types/runtime/event'; import { AgentEvent } from '../types/runtime/event';
import type { AgentEventData } from '../types/runtime/event'; import type { AgentEventData } from '../types/runtime/event';
@ -197,7 +194,6 @@ export interface AgentRuntimeConfig {
observationLog?: ObservationLogMemoryConfig; observationLog?: ObservationLogMemoryConfig;
observationalMemory?: ObservationalMemoryConfig; observationalMemory?: ObservationalMemoryConfig;
episodicMemory?: EpisodicMemoryConfig; episodicMemory?: EpisodicMemoryConfig;
semanticRecall?: SemanticRecallConfig;
structuredOutput?: z.ZodType; structuredOutput?: z.ZodType;
checkpointStorage?: 'memory' | CheckpointStore; checkpointStorage?: 'memory' | CheckpointStore;
thinking?: ThinkingConfig; thinking?: ThinkingConfig;
@ -206,6 +202,18 @@ export interface AgentRuntimeConfig {
toolCallConcurrency?: number; toolCallConcurrency?: number;
titleGeneration?: TitleGenerationConfig; titleGeneration?: TitleGenerationConfig;
telemetry?: BuiltTelemetry; telemetry?: BuiltTelemetry;
/** Existing run id to continue, used when resuming a suspended run. */
runId?: string;
/**
* Pre-fetched model cost from the catalog. When provided, skips the per-run
* catalog fetch. Set once during Agent.build() and shared across per-run runtimes.
*/
modelCost?: ModelCost;
/**
* Shared RunStateManager for suspend/resume. When provided, per-run runtimes
* use the same store so resume() can find state from a prior run.
*/
runState?: RunStateManager;
} }
const MAX_LOOP_ITERATIONS = 30; const MAX_LOOP_ITERATIONS = 30;
@ -262,7 +270,6 @@ type ToolCallOutcome =
* LLM saw (rather than the larger raw output). * LLM saw (rather than the larger raw output).
*/ */
modelOutput: unknown; modelOutput: unknown;
subAgentUsage?: SubAgentUsage[];
customMessage?: AgentMessage; customMessage?: AgentMessage;
} }
| { | {
@ -270,6 +277,13 @@ type ToolCallOutcome =
payload: unknown; payload: unknown;
resumeSchema: JsonSchema7Type; resumeSchema: JsonSchema7Type;
} }
| {
outcome: 'cancelled';
toolEntry: ToolResultEntry;
modelOutput: string;
userMessage: string;
canceled: true;
}
| { outcome: 'error'; error: unknown } | { outcome: 'error'; error: unknown }
| { outcome: 'noop' }; // tool call shouldn't be saved or logged anywhere, usually means that if was executed by AI SDK | { outcome: 'noop' }; // tool call shouldn't be saved or logged anywhere, usually means that if was executed by AI SDK
@ -280,7 +294,6 @@ interface ToolCallSuccess {
input: JSONValue; input: JSONValue;
toolEntry: ToolResultEntry; toolEntry: ToolResultEntry;
modelOutput: unknown; modelOutput: unknown;
subAgentUsage?: SubAgentUsage[];
customMessage?: AgentMessage; customMessage?: AgentMessage;
} }
@ -317,7 +330,6 @@ type RuntimeExecutionOptions = RunOptions & ExecutionOptions & { iterationCount?
interface LoopContext { interface LoopContext {
list: AgentMessageList; list: AgentMessageList;
options?: RuntimeExecutionOptions; options?: RuntimeExecutionOptions;
runId: string;
pendingResume?: PendingResume; pendingResume?: PendingResume;
} }
@ -369,15 +381,19 @@ export class AgentRuntime {
private deferredToolManager: DeferredToolManager | undefined; private deferredToolManager: DeferredToolManager | undefined;
private runId: string;
/** Resolved telemetry for the current run (own config or inherited from parent). */ /** Resolved telemetry for the current run (own config or inherited from parent). */
constructor(config: AgentRuntimeConfig) { constructor(config: AgentRuntimeConfig) {
this.config = config; this.config = config;
this.runId = config.runId ?? generateRunId();
if (config.deferredTools && config.deferredTools.length > 0) { if (config.deferredTools && config.deferredTools.length > 0) {
this.deferredToolManager = new DeferredToolManager(config.deferredTools, config.toolSearch); this.deferredToolManager = new DeferredToolManager(config.deferredTools, config.toolSearch);
} }
this.runState = new RunStateManager(config.checkpointStorage); this.runState = config.runState ?? new RunStateManager(config.checkpointStorage);
this.eventBus = config.eventBus ?? new AgentEventBus(); this.eventBus = config.eventBus ?? new AgentEventBus();
this.modelCost = config.modelCost;
this.currentState = { this.currentState = {
persistence: undefined, persistence: undefined,
status: 'idle', status: 'idle',
@ -395,6 +411,7 @@ export class AgentRuntime {
* observer cycles) to settle. Safe to call multiple times. * observer cycles) to settle. Safe to call multiple times.
*/ */
async dispose(): Promise<void> { async dispose(): Promise<void> {
this.eventBus.dispose();
await this.backgroundTasks.flush(); await this.backgroundTasks.flush();
} }
@ -413,7 +430,6 @@ export class AgentRuntime {
input: AgentMessage[] | string, input: AgentMessage[] | string,
options?: RunOptions & ExecutionOptions, options?: RunOptions & ExecutionOptions,
): Promise<GenerateResult> { ): Promise<GenerateResult> {
const runId = generateRunId();
let list: AgentMessageList | undefined = undefined; let list: AgentMessageList | undefined = undefined;
try { try {
const initializedList = await this.initRun(input, options); const initializedList = await this.initRun(input, options);
@ -421,10 +437,10 @@ export class AgentRuntime {
const rawResult = await this.withTelemetryRootSpan( const rawResult = await this.withTelemetryRootSpan(
'generate', 'generate',
options, options,
runId, this.runId,
async () => await this.runGenerateLoop({ list: initializedList, options, runId }), async () => await this.runGenerateLoop({ list: initializedList, options }),
); );
return this.finalizeGenerate(rawResult, list, runId); return this.finalizeGenerate(rawResult, list);
} catch (error) { } catch (error) {
await this.flushTelemetry(options); await this.flushTelemetry(options);
const isAbort = this.eventBus.isAborted; const isAbort = this.eventBus.isAborted;
@ -432,7 +448,13 @@ export class AgentRuntime {
if (!isAbort) { if (!isAbort) {
this.eventBus.emit({ type: AgentEvent.Error, message: String(error), error }); this.eventBus.emit({ type: AgentEvent.Error, message: String(error), error });
} }
return { runId, messages: list?.responseDelta() ?? [], finishReason: 'error', error }; return {
runId: this.runId,
messages: list?.responseDelta() ?? [],
finishReason: 'error',
error,
getState: () => this.getState(),
};
} }
} }
@ -441,7 +463,6 @@ export class AgentRuntime {
input: AgentMessage[] | string, input: AgentMessage[] | string,
options?: RunOptions & ExecutionOptions, options?: RunOptions & ExecutionOptions,
): Promise<StreamResult> { ): Promise<StreamResult> {
const runId = generateRunId();
let list: AgentMessageList; let list: AgentMessageList;
try { try {
list = await this.initRun(input, options); list = await this.initRun(input, options);
@ -451,10 +472,14 @@ export class AgentRuntime {
if (!isAbort) { if (!isAbort) {
this.eventBus.emit({ type: AgentEvent.Error, message: String(error), error }); this.eventBus.emit({ type: AgentEvent.Error, message: String(error), error });
} }
return { runId, stream: makeErrorStream(error) }; return { runId: this.runId, stream: makeErrorStream(error), getState: () => this.getState() };
} }
return { runId, stream: this.startStreamLoop({ list, options, runId }) }; return {
runId: this.runId,
stream: this.startStreamLoop({ list, options }),
getState: () => this.getState(),
};
} }
/** /**
@ -480,8 +505,9 @@ export class AgentRuntime {
data: unknown, data: unknown,
options: { runId: string; toolCallId: string } & ExecutionOptions, options: { runId: string; toolCallId: string } & ExecutionOptions,
): Promise<GenerateResult | StreamResult> { ): Promise<GenerateResult | StreamResult> {
const state = await this.runState.resume(options.runId); this.runId = options.runId;
if (!state) throw new Error(`No suspended run found for runId: ${options.runId}`); const state = await this.runState.resume(this.runId);
if (!state) throw new Error(`No suspended run found for runId: ${this.runId}`);
const toolCall = state.pendingToolCalls[options.toolCallId]; const toolCall = state.pendingToolCalls[options.toolCallId];
if (!toolCall) throw new Error(`No tool call found for toolCallId: ${options.toolCallId}`); if (!toolCall) throw new Error(`No tool call found for toolCallId: ${options.toolCallId}`);
@ -495,7 +521,8 @@ export class AgentRuntime {
if (!toolForValidation) throw new Error(`Tool ${toolCall.toolName} not found`); if (!toolForValidation) throw new Error(`Tool ${toolCall.toolName} not found`);
let resumeData: unknown = data; let resumeData: unknown = data;
if (toolForValidation.resumeSchema) {
if (!isCancellation(resumeData) && toolForValidation.resumeSchema) {
const parseResult = await parseWithSchema(toolForValidation.resumeSchema, data); const parseResult = await parseWithSchema(toolForValidation.resumeSchema, data);
if (!parseResult.success) { if (!parseResult.success) {
throw new Error(`Invalid resume payload: ${parseResult.error}`); throw new Error(`Invalid resume payload: ${parseResult.error}`);
@ -554,29 +581,28 @@ export class AgentRuntime {
const rawResult = await this.withTelemetryRootSpan( const rawResult = await this.withTelemetryRootSpan(
'generate', 'generate',
resumeOptions, resumeOptions,
options.runId, this.runId,
async () => async () =>
await this.runGenerateLoop({ await this.runGenerateLoop({
list, list,
options: resumeOptions, options: resumeOptions,
runId: options.runId,
pendingResume, pendingResume,
}), }),
); );
if (!rawResult.pendingSuspend) { if (!rawResult.pendingSuspend) {
await this.cleanupRun(options.runId); await this.cleanupRun();
} }
return this.finalizeGenerate(rawResult, list, options.runId); return this.finalizeGenerate(rawResult, list);
} }
return { return {
runId: options.runId, runId: this.runId,
stream: this.startStreamLoop({ stream: this.startStreamLoop({
list, list,
options: resumeOptions, options: resumeOptions,
runId: options.runId,
pendingResume, pendingResume,
}), }),
getState: () => this.getState(),
}; };
} catch (error) { } catch (error) {
const isAbort = this.eventBus.isAborted; const isAbort = this.eventBus.isAborted;
@ -586,13 +612,14 @@ export class AgentRuntime {
} }
if (method === 'generate') { if (method === 'generate') {
return { return {
runId: options.runId, runId: this.runId,
messages: [], messages: [],
finishReason: 'error' as const, finishReason: 'error' as const,
error, error,
getState: () => this.getState(),
}; };
} }
return { runId: options.runId, stream: makeErrorStream(error) }; return { runId: this.runId, stream: makeErrorStream(error), getState: () => this.getState() };
} }
} }
@ -620,11 +647,6 @@ export class AgentRuntime {
} }
} }
// Semantic recall — retrieve relevant past messages beyond the history window
if (this.config.semanticRecall && options?.persistence?.threadId) {
await this.performSemanticRecall(list, input, options.persistence, options.executionCounter);
}
await this.setListObservationLogMemory(list, options?.persistence); await this.setListObservationLogMemory(list, options?.persistence);
list.addInput(input); list.addInput(input);
@ -659,117 +681,6 @@ export class AgentRuntime {
}); });
} }
/**
* Perform semantic recall: embed the user's query, search for relevant past messages,
* expand by messageRange, deduplicate against history, and inject into the list.
*/
private async performSemanticRecall(
list: AgentMessageList,
input: AgentMessage[],
persistence: AgentPersistenceOptions,
executionCounter?: AgentExecutionCounter,
): Promise<void> {
if (!this.config.semanticRecall || !this.config.memory) return;
const userText = input
.filter((m) => isLlmMessage(m) && m.role === 'user')
.flatMap((m) => (isLlmMessage(m) ? m.content : []))
.filter((c): c is { type: 'text'; text: string } => c.type === 'text')
.map((c) => c.text)
.join(' ');
if (!userText) return;
let recalled: AgentDbMessage[] = [];
if (this.config.memory.queryEmbeddings && this.config.semanticRecall.embedder) {
// Tier 3: runtime embeds the query, backend does vector search
const { embed } = getAiSdk();
const embeddingModel = createEmbeddingModel(
this.config.semanticRecall.embedder,
this.config.semanticRecall.apiKey,
);
const { embedding, usage } = await embed({ model: embeddingModel, value: userText });
incrementTokenCountFromUsage(executionCounter, usage);
const hits = await this.config.memory.queryEmbeddings({
scope: this.config.semanticRecall.scope ?? 'resource',
threadId: persistence.threadId,
resourceId: persistence.resourceId,
vector: embedding,
topK: this.config.semanticRecall.topK,
});
if (hits.length > 0) {
const hitIds = new Set(hits.map((h) => h.id));
// TODO: add getMessagesByIds() to BuiltMemory to avoid loading all messages.
const allMsgs = await this.config.memory.getMessages(persistence.threadId);
if (this.config.semanticRecall.messageRange) {
recalled = this.expandMessageRange(
allMsgs,
hitIds,
this.config.semanticRecall.messageRange,
);
} else {
recalled = allMsgs.filter((m) => {
const id = m.id;
return id !== undefined && hitIds.has(id);
});
}
}
} else if (this.config.memory.search) {
// Fallback: high-level search (backend handles everything)
recalled = await this.config.memory.search(userText, {
threadId: persistence.threadId,
resourceId: persistence.resourceId,
topK: this.config.semanticRecall.topK,
messageRange: this.config.semanticRecall.messageRange,
});
}
if (recalled.length === 0) return;
// Deduplicate against already-loaded history by message ID
const { historyIds } = list.serialize();
const historyIdSet = new Set(historyIds);
const newRecalled = recalled.filter((m) => {
const id = m.id;
return !id || !historyIdSet.has(id);
});
if (newRecalled.length > 0) {
list.addHistory(newRecalled);
}
}
/** Expand hit IDs by messageRange (before/after) within the ordered message list. */
private expandMessageRange(
allMsgs: AgentDbMessage[],
hitIds: Set<string>,
range: { before: number; after: number },
): AgentDbMessage[] {
const expandedIds = new Set<string>();
for (const msg of allMsgs) {
const id = 'id' in msg && typeof msg.id === 'string' ? msg.id : undefined;
if (!id || !hitIds.has(id)) continue;
const idx = allMsgs.indexOf(msg);
const start = Math.max(0, idx - (range.before ?? 0));
const end = Math.min(allMsgs.length - 1, idx + (range.after ?? 0));
for (let i = start; i <= end; i++) {
const el = allMsgs[i];
const mid = 'id' in el && typeof el.id === 'string' ? el.id : undefined;
if (mid) expandedIds.add(mid);
}
}
return allMsgs.filter((m) => {
const mid = 'id' in m && typeof m.id === 'string' ? m.id : undefined;
return mid && expandedIds.has(mid);
});
}
/** /**
* Common setup for generate() and stream(): reset abort state, transition to running, * Common setup for generate() and stream(): reset abort state, transition to running,
* emit AgentStart, fetch model cost, normalize input, and build the message list. * emit AgentStart, fetch model cost, normalize input, and build the message list.
@ -795,18 +706,13 @@ export class AgentRuntime {
* Post-loop finalization for generate: apply cost, set model id, roll up sub-agent usage, * Post-loop finalization for generate: apply cost, set model id, roll up sub-agent usage,
* transition to success, and emit AgentEnd. Returns the finalized result. * transition to success, and emit AgentEnd. Returns the finalized result.
*/ */
private finalizeGenerate( private finalizeGenerate(result: GenerateResult, list: AgentMessageList): GenerateResult {
result: GenerateResult, result.runId = this.runId;
list: AgentMessageList,
runId: string,
): GenerateResult {
result.runId = runId;
result.usage = this.applyCost(result.usage); result.usage = this.applyCost(result.usage);
result.model = this.modelIdString; result.model = this.modelIdString;
const finalized = applySubAgentUsage(result);
this.updateState({ status: 'success', messageList: list.serialize() }); this.updateState({ status: 'success', messageList: list.serialize() });
this.eventBus.emit({ type: AgentEvent.AgentEnd, messages: finalized.messages }); this.eventBus.emit({ type: AgentEvent.AgentEnd, messages: result.messages });
return finalized; return { ...result, getState: () => this.getState() };
} }
/** Resolve telemetry: own config wins, then inherited from options, then nothing. */ /** Resolve telemetry: own config wins, then inherited from options, then nothing. */
@ -842,6 +748,17 @@ export class AgentRuntime {
}, },
}; };
} }
private buildSmoothStreamTransformOptions(options?: ExecutionOptions): {
experimental_transform?: StreamTextTransform<ToolSet>;
} {
if (options?.smoothStream === false) return {};
const { smoothStream } = loadAi();
return { experimental_transform: smoothStream(options?.smoothStream ?? {}) };
}
/** Map resolved telemetry to AI SDK's experimental_telemetry shape. */ /** Map resolved telemetry to AI SDK's experimental_telemetry shape. */
private buildTelemetryOptions(options?: ExecutionOptions): { private buildTelemetryOptions(options?: ExecutionOptions): {
experimental_telemetry?: TelemetrySettings; experimental_telemetry?: TelemetrySettings;
@ -1004,14 +921,13 @@ export class AgentRuntime {
/** Core generate loop using generateText (non-streaming). */ /** Core generate loop using generateText (non-streaming). */
private async runGenerateLoop(ctx: LoopContext): Promise<GenerateResult> { private async runGenerateLoop(ctx: LoopContext): Promise<GenerateResult> {
const { list, options, runId, pendingResume } = ctx; const { list, options, pendingResume } = ctx;
this.hydrateDeferredToolsFromList(list); this.hydrateDeferredToolsFromList(list);
let totalUsage: TokenUsage | undefined; let totalUsage: TokenUsage | undefined;
let lastFinishReason: FinishReason = 'stop'; let lastFinishReason: FinishReason = 'stop';
let structuredOutput: unknown; let structuredOutput: unknown;
const toolCallSummary: ToolResultEntry[] = []; const toolCallSummary: ToolResultEntry[] = [];
const collectedSubAgentUsage: SubAgentUsage[] = [];
// Resolve pending tool calls from a resumed run before the first LLM call. // Resolve pending tool calls from a resumed run before the first LLM call.
const runTelemetry = this.resolveTelemetry(options); const runTelemetry = this.resolveTelemetry(options);
@ -1027,7 +943,7 @@ export class AgentRuntime {
const pendingToolCtx: ToolBatchContext = { const pendingToolCtx: ToolBatchContext = {
toolMap: pendingLoopContext.toolMap, toolMap: pendingLoopContext.toolMap,
list, list,
runId, runId: this.runId,
persistence: options?.persistence, persistence: options?.persistence,
telemetry: runTelemetry, telemetry: runTelemetry,
executionCounter: options?.executionCounter, executionCounter: options?.executionCounter,
@ -1044,7 +960,6 @@ export class AgentRuntime {
for (const r of batch.results) { for (const r of batch.results) {
toolCallSummary.push(r.toolEntry); toolCallSummary.push(r.toolEntry);
if (r.subAgentUsage) collectedSubAgentUsage.push(...r.subAgentUsage);
} }
if (Object.keys(batch.pending).length > 0) { if (Object.keys(batch.pending).length > 0) {
@ -1053,7 +968,6 @@ export class AgentRuntime {
options, options,
list, list,
totalUsage, totalUsage,
runId,
maxIterations, maxIterations,
iterationCount, iterationCount,
); );
@ -1070,6 +984,7 @@ export class AgentRuntime {
suspendPayload: s.payload, suspendPayload: s.payload,
resumeSchema: s.resumeSchema, resumeSchema: s.resumeSchema,
})), })),
getState: () => this.getState(),
}; };
} }
} }
@ -1123,7 +1038,7 @@ export class AgentRuntime {
const batch = await this.iterateToolCallsConcurrent({ const batch = await this.iterateToolCallsConcurrent({
toolMap, toolMap,
list, list,
runId, runId: this.runId,
persistence: options?.persistence, persistence: options?.persistence,
telemetry: runTelemetry, telemetry: runTelemetry,
executionCounter: options?.executionCounter, executionCounter: options?.executionCounter,
@ -1132,7 +1047,6 @@ export class AgentRuntime {
for (const r of batch.results) { for (const r of batch.results) {
toolCallSummary.push(r.toolEntry); toolCallSummary.push(r.toolEntry);
if (r.subAgentUsage) collectedSubAgentUsage.push(...r.subAgentUsage);
} }
if (Object.keys(batch.pending).length > 0) { if (Object.keys(batch.pending).length > 0) {
@ -1141,7 +1055,6 @@ export class AgentRuntime {
options, options,
list, list,
totalUsage, totalUsage,
runId,
maxIterations, maxIterations,
iterationCount + 1, iterationCount + 1,
); );
@ -1158,6 +1071,7 @@ export class AgentRuntime {
suspendPayload: s.payload, suspendPayload: s.payload,
resumeSchema: s.resumeSchema, resumeSchema: s.resumeSchema,
})), })),
getState: () => this.getState(),
}; };
} }
@ -1189,13 +1103,13 @@ export class AgentRuntime {
} }
return { return {
runId: runId ?? '', runId: this.runId,
messages: list.responseDelta(), messages: list.responseDelta(),
finishReason: lastFinishReason, finishReason: lastFinishReason,
usage: totalUsage, usage: totalUsage,
...(structuredOutput !== undefined && { structuredOutput }), ...(structuredOutput !== undefined && { structuredOutput }),
...(toolCallSummary.length > 0 && { toolCalls: toolCallSummary }), ...(toolCallSummary.length > 0 && { toolCalls: toolCallSummary }),
...(collectedSubAgentUsage.length > 0 && { subAgentUsage: collectedSubAgentUsage }), getState: () => this.getState(),
}; };
} }
@ -1204,7 +1118,7 @@ export class AgentRuntime {
* Returns the readable side immediately; the loop runs in the background. * Returns the readable side immediately; the loop runs in the background.
*/ */
private startStreamLoop(ctx: LoopContext): ReadableStream<StreamChunk> { private startStreamLoop(ctx: LoopContext): ReadableStream<StreamChunk> {
const { options, runId } = ctx; const { options } = ctx;
const { readable, writable } = new TransformStream<StreamChunk, StreamChunk>(); const { readable, writable } = new TransformStream<StreamChunk, StreamChunk>();
const writer = writable.getWriter(); const writer = writable.getWriter();
@ -1212,30 +1126,55 @@ export class AgentRuntime {
// can show a mid-flight indicator between the LLM's tool-call message // can show a mid-flight indicator between the LLM's tool-call message
// and the eventual tool-result message. Writer queues writes in order // and the eventual tool-result message. Writer queues writes in order
// so the fire-and-forget is safe. // so the fire-and-forget is safe.
const onToolExecutionStart = (data: AgentEventData): void => { const writeEventChunk = (chunk: StreamChunk): void => {
if (data.type !== AgentEvent.ToolExecutionStart) return;
// Swallow rejections: if the writer is already closed/errored (e.g. // Swallow rejections: if the writer is already closed/errored (e.g.
// an abort raced ahead of the subscription cleanup) there is nothing // an abort raced ahead of the subscription cleanup) there is nothing
// useful to do with the chunk. // useful to do with the chunk.
writer writer.write(chunk).catch(() => {});
.write({ };
type: 'tool-execution-start', const onToolExecutionStart = (data: AgentEventData): void => {
toolCallId: data.toolCallId, if (data.type !== AgentEvent.ToolExecutionStart) return;
toolName: data.toolName, writeEventChunk({
}) type: 'tool-execution-start',
.catch(() => {}); toolCallId: data.toolCallId,
toolName: data.toolName,
startTime: Date.now(),
});
};
const onToolExecutionEnd = (data: AgentEventData): void => {
if (data.type !== AgentEvent.ToolExecutionEnd) return;
writeEventChunk({
type: 'tool-execution-end',
toolCallId: data.toolCallId,
toolName: data.toolName,
isError: data.isError,
endTime: Date.now(),
});
};
const onSubAgentStarted = (data: AgentEventData): void => {
if (data.type !== AgentEvent.SubAgentStarted) return;
const { type: _type, ...payload } = data;
writeEventChunk({ type: 'subagent-started', ...payload });
};
const onSubAgentCompleted = (data: AgentEventData): void => {
if (data.type !== AgentEvent.SubAgentCompleted) return;
const { type: _type, ...payload } = data;
writeEventChunk({ type: 'subagent-completed', ...payload });
}; };
this.eventBus.on(AgentEvent.ToolExecutionStart, onToolExecutionStart); this.eventBus.on(AgentEvent.ToolExecutionStart, onToolExecutionStart);
this.eventBus.on(AgentEvent.ToolExecutionEnd, onToolExecutionEnd);
this.eventBus.on(AgentEvent.SubAgentStarted, onSubAgentStarted);
this.eventBus.on(AgentEvent.SubAgentCompleted, onSubAgentCompleted);
this.withTelemetryRootSpan( this.withTelemetryRootSpan(
'stream', 'stream',
options, options,
runId, this.runId,
async () => await this.runStreamLoop({ ...ctx, writer }), async () => await this.runStreamLoop({ ...ctx, writer }),
) )
.catch(async (error: unknown) => { .catch(async (error: unknown) => {
await this.flushTelemetry(options); await this.flushTelemetry(options);
await this.cleanupRun(runId); await this.cleanupRun();
try { try {
await writer.write({ type: 'error', error }); await writer.write({ type: 'error', error });
await writer.write({ type: 'finish', finishReason: 'error' }); await writer.write({ type: 'finish', finishReason: 'error' });
@ -1246,6 +1185,9 @@ export class AgentRuntime {
}) })
.finally(() => { .finally(() => {
this.eventBus.off(AgentEvent.ToolExecutionStart, onToolExecutionStart); this.eventBus.off(AgentEvent.ToolExecutionStart, onToolExecutionStart);
this.eventBus.off(AgentEvent.ToolExecutionEnd, onToolExecutionEnd);
this.eventBus.off(AgentEvent.SubAgentStarted, onSubAgentStarted);
this.eventBus.off(AgentEvent.SubAgentCompleted, onSubAgentCompleted);
}); });
return readable; return readable;
@ -1255,7 +1197,7 @@ export class AgentRuntime {
private async runStreamLoop( private async runStreamLoop(
ctx: LoopContext & { writer: WritableStreamDefaultWriter<StreamChunk> }, ctx: LoopContext & { writer: WritableStreamDefaultWriter<StreamChunk> },
): Promise<void> { ): Promise<void> {
const { list, options, runId, pendingResume, writer } = ctx; const { list, options, pendingResume, writer } = ctx;
this.hydrateDeferredToolsFromList(list); this.hydrateDeferredToolsFromList(list);
const writeChunk = async (chunk: StreamChunk): Promise<void> => { const writeChunk = async (chunk: StreamChunk): Promise<void> => {
@ -1265,14 +1207,13 @@ export class AgentRuntime {
let totalUsage: TokenUsage | undefined; let totalUsage: TokenUsage | undefined;
let lastFinishReason: FinishReason = 'stop'; let lastFinishReason: FinishReason = 'stop';
let structuredOutput: unknown; let structuredOutput: unknown;
const collectedSubAgentUsage: SubAgentUsage[] = [];
const maxIterations = options?.maxIterations ?? MAX_LOOP_ITERATIONS; const maxIterations = options?.maxIterations ?? MAX_LOOP_ITERATIONS;
let iterationCount = options?.iterationCount ?? 0; let iterationCount = options?.iterationCount ?? 0;
let reachedStopCondition = false; let reachedStopCondition = false;
const { streamText } = loadAi(); const { streamText } = loadAi();
const closeStreamWithError = async (error: unknown, status: AgentRunState): Promise<void> => { const closeStreamWithError = async (error: unknown, status: AgentRunState): Promise<void> => {
await this.cleanupRun(runId); await this.cleanupRun();
this.updateState({ status }); this.updateState({ status });
await writer.write({ type: 'error', error }); await writer.write({ type: 'error', error });
await writer.write({ type: 'finish', finishReason: 'error' }); await writer.write({ type: 'finish', finishReason: 'error' });
@ -1299,7 +1240,7 @@ export class AgentRuntime {
const pendingToolCtx: ToolBatchContext = { const pendingToolCtx: ToolBatchContext = {
toolMap: pendingLoopContext.toolMap, toolMap: pendingLoopContext.toolMap,
list, list,
runId, runId: this.runId,
persistence: options?.persistence, persistence: options?.persistence,
telemetry: runTelemetry, telemetry: runTelemetry,
executionCounter: options?.executionCounter, executionCounter: options?.executionCounter,
@ -1312,12 +1253,12 @@ export class AgentRuntime {
}); });
for (const r of batch.results) { for (const r of batch.results) {
if (r.subAgentUsage) collectedSubAgentUsage.push(...r.subAgentUsage);
await writer.write({ await writer.write({
type: 'tool-result', type: 'tool-result',
toolCallId: r.toolCallId, toolCallId: r.toolCallId,
toolName: r.toolName, toolName: r.toolName,
output: r.modelOutput, output: r.modelOutput,
...(r.toolEntry.canceled ? { canceled: true } : {}),
}); });
if (r.customMessage) { if (r.customMessage) {
await writer.write({ type: 'message', message: r.customMessage }); await writer.write({ type: 'message', message: r.customMessage });
@ -1340,7 +1281,6 @@ export class AgentRuntime {
options, options,
list, list,
totalUsage, totalUsage,
runId,
maxIterations, maxIterations,
iterationCount, iterationCount,
); );
@ -1386,6 +1326,7 @@ export class AgentRuntime {
: {}), : {}),
...(staticLoopContext.outputSpec ? { output: staticLoopContext.outputSpec } : {}), ...(staticLoopContext.outputSpec ? { output: staticLoopContext.outputSpec } : {}),
...this.buildAiSdkOptions(toolMap, options), ...this.buildAiSdkOptions(toolMap, options),
...this.buildSmoothStreamTransformOptions(options),
}); });
// Consume the stream. When the AbortSignal fires mid-stream the // Consume the stream. When the AbortSignal fires mid-stream the
@ -1398,8 +1339,37 @@ export class AgentRuntime {
// `start-step` / `finish-step` are passed through so consumers // `start-step` / `finish-step` are passed through so consumers
// can use them as LLM-iteration boundaries. // can use them as LLM-iteration boundaries.
if (chunk.type === 'finish') continue; if (chunk.type === 'finish') continue;
// Provider-executed tools (e.g. native web search) skip the
// local execution loop that emits tool-execution lifecycle
// events via the event bus. Stamp them here at chunk-arrival
// time so live chat and the persisted timeline both show a
// duration. A failed call arrives as a `tool-error` part
// (never a `tool-result`), so close its timing there too.
if (
(chunk.type === 'tool-result' || chunk.type === 'tool-error') &&
chunk.providerExecuted
) {
await writeChunk({
type: 'tool-execution-end',
toolCallId: chunk.toolCallId,
toolName: chunk.toolName ?? '',
isError: chunk.type === 'tool-error',
endTime: Date.now(),
});
}
const converted = convertChunk(chunk); const converted = convertChunk(chunk);
if (converted) await writeChunk(converted); if (converted) await writeChunk(converted);
if (chunk.type === 'tool-call' && chunk.providerExecuted) {
await writeChunk({
type: 'tool-execution-start',
toolCallId: chunk.toolCallId,
toolName: chunk.toolName ?? '',
startTime: Date.now(),
});
}
} }
} catch (streamError) { } catch (streamError) {
if (await handleAbort()) return; if (await handleAbort()) return;
@ -1442,7 +1412,7 @@ export class AgentRuntime {
const batch = await this.iterateToolCallsConcurrent({ const batch = await this.iterateToolCallsConcurrent({
toolMap, toolMap,
list, list,
runId, runId: this.runId,
persistence: options?.persistence, persistence: options?.persistence,
telemetry: runTelemetry, telemetry: runTelemetry,
executionCounter: options?.executionCounter, executionCounter: options?.executionCounter,
@ -1452,12 +1422,12 @@ export class AgentRuntime {
if (await handleAbort()) return; if (await handleAbort()) return;
for (const r of batch.results) { for (const r of batch.results) {
if (r.subAgentUsage) collectedSubAgentUsage.push(...r.subAgentUsage);
await writer.write({ await writer.write({
type: 'tool-result', type: 'tool-result',
toolCallId: r.toolCallId, toolCallId: r.toolCallId,
toolName: r.toolName, toolName: r.toolName,
output: r.modelOutput, output: r.modelOutput,
...(r.toolEntry.canceled ? { canceled: true } : {}),
}); });
if (r.customMessage) { if (r.customMessage) {
await writer.write({ type: 'message', message: r.customMessage }); await writer.write({ type: 'message', message: r.customMessage });
@ -1480,7 +1450,6 @@ export class AgentRuntime {
options, options,
list, list,
totalUsage, totalUsage,
runId,
maxIterations, maxIterations,
iterationCount + 1, iterationCount + 1,
); );
@ -1513,18 +1482,12 @@ export class AgentRuntime {
} }
const costUsage = this.applyCost(totalUsage); const costUsage = this.applyCost(totalUsage);
const parentCost = costUsage?.cost ?? 0;
const subCost = collectedSubAgentUsage.reduce((sum, s) => sum + (s.usage.cost ?? 0), 0);
await writer.write({ await writer.write({
type: 'finish', type: 'finish',
finishReason: lastFinishReason, finishReason: lastFinishReason,
...(costUsage && { usage: costUsage }), ...(costUsage && { usage: costUsage }),
model: this.modelIdString, model: this.modelIdString,
...(structuredOutput !== undefined && { structuredOutput }), ...(structuredOutput !== undefined && { structuredOutput }),
...(collectedSubAgentUsage.length > 0 && {
subAgentUsage: collectedSubAgentUsage,
totalCost: parentCost + subCost,
}),
}); });
try { try {
@ -1546,7 +1509,7 @@ export class AgentRuntime {
} }
} }
await this.cleanupRun(runId); await this.cleanupRun();
await this.flushTelemetry(options); await this.flushTelemetry(options);
this.updateState({ status: 'success', messageList: list.serialize() }); this.updateState({ status: 'success', messageList: list.serialize() });
@ -1574,16 +1537,6 @@ export class AgentRuntime {
// Memory jobs receive the execution counter so their LLM and embedding // Memory jobs receive the execution counter so their LLM and embedding
// usage contributes to token_count. // usage contributes to token_count.
// Generate and save embeddings if semantic recall is configured
if (this.config.semanticRecall?.embedder && this.config.memory.saveEmbeddings) {
await this.saveEmbeddingsForMessages(
options.persistence.threadId,
options.persistence.resourceId,
delta,
options.executionCounter,
);
}
const observationTasks = this.scheduleObservationLogJobs( const observationTasks = this.scheduleObservationLogJobs(
options.persistence, options.persistence,
options.executionCounter, options.executionCounter,
@ -1776,51 +1729,6 @@ export class AgentRuntime {
}; };
} }
private async saveEmbeddingsForMessages(
threadId: string,
resourceId: string | undefined,
messages: AgentDbMessage[],
executionCounter?: AgentExecutionCounter,
): Promise<void> {
// Extract text from user and assistant messages
const embeddable: Array<{ id: string; text: string }> = [];
for (const msg of messages) {
if (!isLlmMessage(msg) || (msg.role !== 'user' && msg.role !== 'assistant')) continue;
const text = msg.content
.filter((c): c is { type: 'text'; text: string } => c.type === 'text')
.map((c) => c.text)
.join('\n');
if (!text) continue;
embeddable.push({ id: msg.id, text });
}
if (embeddable.length === 0) return;
const embedder = this.config.semanticRecall?.embedder;
if (!embedder) return;
const { embedMany } = getAiSdk();
const embeddingModel = createEmbeddingModel(embedder, this.config.semanticRecall?.apiKey);
const { embeddings, usage } = await embedMany({
model: embeddingModel,
values: embeddable.map((e) => e.text),
});
incrementTokenCountFromUsage(executionCounter, usage);
await this.config.memory!.saveEmbeddings!({
scope: this.config.semanticRecall?.scope ?? 'resource',
threadId,
resourceId,
entries: embeddable.map((e, i) => ({
id: e.id,
vector: embeddings[i],
text: e.text,
model: embedder,
})),
});
}
/** Build the providerOptions object for thinking/reasoning config. */ /** Build the providerOptions object for thinking/reasoning config. */
private buildThinkingProviderOptions(): Record<string, Record<string, unknown>> | undefined { private buildThinkingProviderOptions(): Record<string, Record<string, unknown>> | undefined {
if (!this.config.thinking) return undefined; if (!this.config.thinking) return undefined;
@ -1996,7 +1904,6 @@ export class AgentRuntime {
input: toolInput, input: toolInput,
toolEntry: result.value.toolEntry, toolEntry: result.value.toolEntry,
modelOutput: result.value.modelOutput, modelOutput: result.value.modelOutput,
subAgentUsage: result.value.subAgentUsage,
customMessage: result.value.customMessage, customMessage: result.value.customMessage,
}); });
} else if (result.value.outcome === 'error') { } else if (result.value.outcome === 'error') {
@ -2099,9 +2006,44 @@ export class AgentRuntime {
input: resumedEntry.input, input: resumedEntry.input,
toolEntry: processResult.toolEntry, toolEntry: processResult.toolEntry,
modelOutput: processResult.modelOutput, modelOutput: processResult.modelOutput,
subAgentUsage: processResult.subAgentUsage,
customMessage: processResult.customMessage, customMessage: processResult.customMessage,
}); });
} else if (processResult.outcome === 'cancelled') {
results.push({
toolCallId: resumedEntry.toolCallId,
toolName: resumedToolName,
input: resumedEntry.input,
toolEntry: processResult.toolEntry,
modelOutput: processResult.modelOutput,
});
list.addInput([
{ role: 'user', content: [{ type: 'text', text: processResult.userMessage }] },
]);
for (const id of Object.keys(pendingResume.pendingToolCalls)) {
if (id !== resumedId) {
const siblingEntry = pendingResume.pendingToolCalls[id];
const modelOutput = '[Skipped: a sibling tool call was cancelled]';
list.setToolCallResult(id, modelOutput, {
canceled: true,
});
results.push({
toolCallId: siblingEntry.toolCallId,
toolName: siblingEntry.toolName,
input: siblingEntry.input,
toolEntry: {
tool: siblingEntry.toolName,
input: siblingEntry.input,
output: modelOutput,
transformed: false,
canceled: true,
},
modelOutput,
});
}
}
return { results, suspensions, errors, pending };
} else if (processResult.outcome === 'error') { } else if (processResult.outcome === 'error') {
errors.push({ errors.push({
toolCallId: resumedEntry.toolCallId, toolCallId: resumedEntry.toolCallId,
@ -2238,6 +2180,31 @@ export class AgentRuntime {
return { outcome: 'noop' }; return { outcome: 'noop' };
} }
if (isCancellation(resumeData) && !builtTool.handleCancellation) {
const modelOutput = `[Tool call cancelled. User said: "${resumeData.message}"]`;
this.eventBus.emit({
type: AgentEvent.ToolExecutionEnd,
toolCallId,
toolName,
result: modelOutput,
isError: false,
});
list.setToolCallResult(toolCallId, modelOutput, { canceled: true });
return {
outcome: 'cancelled',
toolEntry: {
tool: toolName,
input: toolInput,
output: modelOutput,
transformed: false,
canceled: true,
},
modelOutput,
userMessage: resumeData.message,
canceled: true,
};
}
if (countToolCall) { if (countToolCall) {
this.incrementToolCallCount(executionCounter); this.incrementToolCallCount(executionCounter);
} }
@ -2261,6 +2228,8 @@ export class AgentRuntime {
await executeTool(toolInput, builtTool, resumeData, resolvedTelemetry, toolCallId, { await executeTool(toolInput, builtTool, resumeData, resolvedTelemetry, toolCallId, {
runId, runId,
persistence, persistence,
emitEvent: (event) => this.eventBus.emit(event),
abortSignal: this.eventBus.signal,
}), }),
); );
} catch (error) { } catch (error) {
@ -2292,30 +2261,21 @@ export class AgentRuntime {
}; };
} }
let actualResult = toolResult;
let extractedSubAgentUsage: SubAgentUsage[] | undefined;
if (isAgentToolResult(toolResult)) {
actualResult = toolResult.output;
extractedSubAgentUsage = toolResult.subAgentUsage;
}
this.eventBus.emit({ this.eventBus.emit({
type: AgentEvent.ToolExecutionEnd, type: AgentEvent.ToolExecutionEnd,
toolCallId, toolCallId,
toolName, toolName,
result: actualResult, result: toolResult,
isError: false, isError: false,
}); });
// Apply toModelOutput transform: the raw result goes to history/events, // Apply toModelOutput transform: the raw result goes to history/events,
// but the transformed version is what the LLM sees as the tool result. // but the transformed version is what the LLM sees as the tool result.
const modelResult = builtTool.toModelOutput const modelResult = builtTool.toModelOutput ? builtTool.toModelOutput(toolResult) : toolResult;
? builtTool.toModelOutput(actualResult)
: actualResult;
list.setToolCallResult(toolCallId, toJsonValue(modelResult)); list.setToolCallResult(toolCallId, toJsonValue(modelResult));
const customMessage = builtTool?.toMessage?.(actualResult); const customMessage = builtTool?.toMessage?.(toolResult);
if (customMessage) { if (customMessage) {
list.addResponse([customMessage]); list.addResponse([customMessage]);
} }
@ -2325,11 +2285,10 @@ export class AgentRuntime {
toolEntry: { toolEntry: {
tool: toolName, tool: toolName,
input: toolInput, input: toolInput,
output: actualResult, output: toolResult,
transformed: !!builtTool.toModelOutput, transformed: !!builtTool.toModelOutput,
}, },
modelOutput: modelResult, modelOutput: modelResult,
subAgentUsage: extractedSubAgentUsage,
customMessage, customMessage,
}; };
} }
@ -2441,19 +2400,16 @@ export class AgentRuntime {
/** /**
* Persist a suspended run state and update the current state snapshot. * Persist a suspended run state and update the current state snapshot.
* Returns the runId (reuses existingRunId when resuming to prevent dangling runs). * Returns the runtime's runId.
*/ */
private async persistSuspension( private async persistSuspension(
pendingToolCalls: Record<string, PendingToolCall>, pendingToolCalls: Record<string, PendingToolCall>,
options: RuntimeExecutionOptions | undefined, options: RuntimeExecutionOptions | undefined,
list: AgentMessageList, list: AgentMessageList,
totalUsage: TokenUsage | undefined, totalUsage: TokenUsage | undefined,
existingRunId?: string,
maxIterations?: number, maxIterations?: number,
iterationCount?: number, iterationCount?: number,
): Promise<string> { ): Promise<string> {
const runId = existingRunId ?? generateRunId();
// Persist loop controls only. providerOptions are intentionally excluded // Persist loop controls only. providerOptions are intentionally excluded
// because they may contain sensitive data (API keys, auth headers). // because they may contain sensitive data (API keys, auth headers).
const resolvedMaxIterations = maxIterations ?? options?.maxIterations; const resolvedMaxIterations = maxIterations ?? options?.maxIterations;
@ -2470,16 +2426,14 @@ export class AgentRuntime {
executionOptions, executionOptions,
...(resolvedIterationCount !== undefined ? { iterationCount: resolvedIterationCount } : {}), ...(resolvedIterationCount !== undefined ? { iterationCount: resolvedIterationCount } : {}),
}; };
await this.runState.suspend(runId, state); await this.runState.suspend(this.runId, state);
this.updateState({ status: 'suspended', pendingToolCalls, messageList: list.serialize() }); this.updateState({ status: 'suspended', pendingToolCalls, messageList: list.serialize() });
return runId; return this.runId;
} }
/** Clean up stored state for a run when it finishes without re-suspending. */ /** Clean up stored state for a run when it finishes without re-suspending. */
private async cleanupRun(runId: string | undefined): Promise<void> { private async cleanupRun(): Promise<void> {
if (runId) { await this.runState.complete(this.runId);
await this.runState.complete(runId);
}
} }
/** Emit a TurnEnd event when an assistant message is present in `newMessages`. */ /** Emit a TurnEnd event when an assistant message is present in `newMessages`. */
@ -2557,7 +2511,7 @@ export class AgentRuntime {
/** /**
* Configured telemetry handle (build-time). Run-time inheritance via * Configured telemetry handle (build-time). Run-time inheritance via
* `ExecutionOptions.parentTelemetry` only applies inside an active * `ExecutionOptions.telemetry` only applies inside an active
* agentic loop; out-of-band callers like `agent.reflect()` see the * agentic loop; out-of-band callers like `agent.reflect()` see the
* builder-time value. * builder-time value.
*/ */

View File

@ -0,0 +1,532 @@
import { z } from 'zod';
import { withSdkOwnedBuiltInMetadata } from './sdk-owned-tool';
import {
assertSubAgentPolicyAllowsChildCount,
createChildSubAgentTaskPath,
type SubAgentTaskPath,
type SubAgentTaskPathPolicy,
} from './sub-agent-task-path';
import { filterLlmMessages } from '../sdk/message';
import { Tool } from '../sdk/tool';
import { AgentEvent } from '../types/runtime/event';
import type { FinishReason, GenerateResult, TokenUsage } from '../types/sdk/agent';
import type { AgentMessage } from '../types/sdk/message';
import type { BuiltTool, ToolContext } from '../types/sdk/tool';
export const DELEGATE_SUB_AGENT_TOOL_NAME = 'delegate_subagent';
export const INLINE_SUB_AGENT_ID = 'inline';
/** i18n key — localized in the agent chat UI; see `agents.chat.delegate.childSuspendUnsupported`. */
export const DELEGATED_CHILD_SUSPEND_UNSUPPORTED_MESSAGE =
'agents.chat.delegate.childSuspendUnsupported';
export const INLINE_DELEGATE_SUB_AGENT_TOOL_METADATA_KEY = 'inlineDelegateSubAgent';
// Model-facing input: the arguments the LLM fills in when it calls the tool.
// The `.describe(...)` text is what the model reads, so keep it task-oriented.
const delegateSubAgentInputSchema = z.object({
subAgentId: z
.string()
.min(1)
.describe(
'Required. Use "inline" for a one-off inline sub-agent. Use an exact configured sub-agent ID only when one is listed and fits the task.',
),
taskName: z
.string()
.min(1)
.describe('Short human-readable name for this delegated task, e.g. "research_api".'),
goal: z.string().min(1).describe('The concrete goal the sub-agent should accomplish.'),
context: z
.string()
.optional()
.describe(
'All details the child needs, since it sees nothing else: constraints, paths, data, prior decisions, acceptance criteria, and what you have already tried or ruled out.',
),
expectedOutput: z.string().optional().describe('The expected shape or contents of the answer.'),
});
// Documents the tool result shape for typing/introspection. Note: the handler's
// returned object (not this schema) is what is actually sent back to the model,
// so this is kept in sync with DelegateSubAgentToolOutput by hand.
const delegateSubAgentOutputSchema = z.object({
status: z.enum(['completed', 'failed', 'suspended']),
taskPath: z.string().optional(),
runId: z.string().optional(),
threadId: z.string().optional(),
answer: z.string(),
structuredOutput: z.unknown().optional(),
usage: z
.object({
promptTokens: z.number().optional(),
completionTokens: z.number().optional(),
totalTokens: z.number().optional(),
cost: z.number().optional(),
})
.optional(),
finishReason: z.string().optional(),
error: z.string().optional(),
pendingSuspend: z
.array(
z.object({
runId: z.string(),
toolCallId: z.string(),
toolName: z.string(),
input: z.unknown(),
suspendPayload: z.unknown(),
resumeSchema: z.unknown().optional(),
}),
)
.optional(),
});
/** The arguments the LLM provides when calling delegate_subagent. */
export type DelegateSubAgentInput = z.infer<typeof delegateSubAgentInputSchema>;
/**
* Limits the delegate tool enforces structurally for a delegation: fan-out
* and the on/off switch (see {@link SubAgentTaskPathPolicy}).
*
* Per-run runtime constraints (e.g. a wall-clock timeout) are intentionally not
* here they're a host concern, enforced inside the `runSubAgent` callback (as
* the n8n CLI runner does).
*/
export type DelegateSubAgentPolicy = SubAgentTaskPathPolicy;
/**
* What a host's `runSubAgent` callback receives: the model's
* {@link DelegateSubAgentInput} plus runtime-derived context the host needs to
* run and link the child. All `parent*` fields come from the parent's tool
* execution context and are used for tracing/linkage, not required to run.
*/
export interface DelegateSubAgentRequest extends DelegateSubAgentInput {
/** Direct child path assigned to this delegation (e.g. `/root/research_api_0`). */
taskPath: SubAgentTaskPath;
/** Parent run id (`ctx.runId`), e.g. for memory scoping / correlation. */
parentRunId?: string;
/** Parent's persisted memory thread id (`ctx.persistence.threadId`). */
parentThreadId?: string;
/** Parent's episodic-memory resource id (`ctx.persistence.resourceId`). */
parentResourceId?: string;
/** Parent's tool-call id that triggered this delegation. */
parentToolCallId?: string;
/**
* Parent run's abort signal (`ctx.abortSignal`). Forward it to the child so
* cancelling the parent run also cancels the delegated work.
*/
parentAbortSignal?: AbortSignal;
/** How many siblings the parent already spawned before this one (0-based). */
childCount: number;
/** Effective policy for this delegation. */
policy?: DelegateSubAgentPolicy;
}
/** The result a delegation returns to the parent model and to lifecycle events. */
export interface DelegateSubAgentToolOutput {
status: 'completed' | 'failed' | 'suspended';
/** Echoed back so consumers can correlate the result with the delegation. */
taskPath?: SubAgentTaskPath;
/** The child run's id, when the executor produced one. */
runId?: string;
/**
* The child run's memory thread id (`persistence.threadId`), when the
* executor used one. Surfaced so a consumer can correlate the child run or
* re-supply it to continue the same thread on a later delegation.
*/
threadId?: string;
/** The child's answer — the main payload the parent acts on. */
answer: string;
structuredOutput?: unknown;
/** Child token usage + cost, surfaced so the parent can account for it. */
usage?: Pick<TokenUsage, 'promptTokens' | 'completionTokens' | 'totalTokens' | 'cost'>;
finishReason?: FinishReason;
/** Present when status is 'failed'. */
error?: string;
/** Present when status is 'suspended' — child run paused awaiting tool resume. */
pendingSuspend?: GenerateResult['pendingSuspend'];
}
/**
* Options for the `delegate_subagent` tool.
*
* You supply `runSubAgent` the host callback that actually runs the child for
* a delegation and returns its result. Everything else (input/output schema,
* system prompt, task-path bookkeeping, policy enforcement, and the
* `subagent-started` / `-completed` lifecycle events) is owned by
* the tool.
*/
/**
* Helpers passed to a host `runSubAgent` callback so the host can route
* `subAgentId: "inline"` while reusing the SDK inline child runner implementation.
*/
export interface DelegateSubAgentRunnerHelpers {
/** Run a one-off inline child using the parent agent's inherited tool set. */
runInlineSubAgent: (request: DelegateSubAgentRequest) => Promise<DelegateSubAgentToolOutput>;
}
export type DelegateSubAgentRunner = (
request: DelegateSubAgentRequest,
helpers: DelegateSubAgentRunnerHelpers,
) => Promise<DelegateSubAgentToolOutput>;
export interface CreateDelegateSubAgentToolOptions {
/**
* Sub-agents the model may choose between. Listed in the system prompt; the
* model selects one by passing its id as `subAgentId`.
*/
availableSubAgents?: Array<{ id: string; name: string; description?: string }>;
/** Fan-out limits and spawn switch enforced before each delegation. */
policy?: DelegateSubAgentPolicy;
/** Additional local/deferred tool names the host removes from inline children. */
inlineSubAgentBlockedTools?: string[];
/**
* Run the child for this delegation and return its result. When provided, the
* host receives every `subAgentId` (including `"inline"`) and may call
* `helpers.runInlineSubAgent` for inline work.
*/
runSubAgent?: DelegateSubAgentRunner;
}
export type DelegateSubAgentToolMetadata = CreateDelegateSubAgentToolOptions;
/**
* Build the generic `delegate_subagent` tool lets a parent agent hand a
* bounded subtask to a child agent and get back a concise result.
*
* The tool owns the cross-cutting concerns: the model-facing input/output
* schema, the description + system instruction that teach the LLM when/how to
* delegate, task-path bookkeeping, fan-out policy enforcement, and the
* `subagent-started` / `-completed`
* lifecycle events. You only supply HOW to run the child, via `runSubAgent`.
*
* @example Host-controlled execution (what the n8n CLI does):
* agent.tool(createDelegateSubAgentTool({
* runSubAgent: (request) => runner.run(request),
* availableSubAgents,
* policy: { maxChildren: 5 },
* }));
*/
export function createDelegateSubAgentTool(options: CreateDelegateSubAgentToolOptions = {}) {
// Per-parent fan-out counter keyed by run/thread/task — drives maxChildren.
const childCounts = new Map<string, number>();
const tool = new Tool(DELEGATE_SUB_AGENT_TOOL_NAME)
.description(
'Delegate a bounded, self-contained subtask to a focused child agent that runs in an isolated context and returns only a concise final result. ' +
'Use it for reasoning-heavy subtasks, context-flooding investigations, or independent workstreams inside a larger deliverable. ' +
'Do not use it for trivial work, single tool calls, mechanical steps, tasks that need hidden conversation context, or pass-through delegation of the entire user request.',
)
.systemInstruction(
[
'delegate_subagent runs a focused child agent in a fresh, isolated context and returns only its final answer. Always set subAgentId. Use subAgentId: "inline" to run a one-off inline child that inherits your available tools after safety filtering. The child cannot see this conversation or your memory, so everything it needs must be in the call.',
'Use a configured subagent ID only when one is listed and its name/description fits the subtask better than a generic inline child.',
...formatAvailableSubAgents(options.availableSubAgents),
'WHEN TO USE delegate_subagent:\n- The request decomposes into 2+ independent workstreams that can be handled separately.\n- A workstream needs substantial research, review, comparison, or analysis.\n- Doing the work inline would flood your context with intermediate findings.\n- A fresh isolated perspective would materially improve a bounded subtask.',
'WHEN NOT TO USE delegate_subagent:\n- Single-step mechanical work: do it directly.\n- Trivial tasks or one/two tool calls: do them yourself.\n- Tasks that need user interaction or hidden conversation context.\n- Your core synthesis, final judgment, or recommendation.\n- The entire user request as one delegated task; that is pass-through with no value added.',
'HOW TO DELEGATE:\n- Delegate bounded workstreams, not the final answer.\n- Pass all required context, constraints, language/tone, and expected output.\n- If multiple independent workstreams exist, delegate them separately.\n- Inline children inherit your available tools after safety filtering; you cannot change their tool set per delegation.\n- Inspect results and synthesize the final response yourself.\n- Verify side-effect claims before presenting them as done.',
].join('\n'),
)
.input(delegateSubAgentInputSchema)
.output(delegateSubAgentOutputSchema)
.handler(async (input, ctx) => await handleDelegateSubAgent(input, ctx, options, childCounts))
.build();
return withSdkOwnedBuiltInMetadata({
...tool,
metadata: {
...tool.metadata,
[INLINE_DELEGATE_SUB_AGENT_TOOL_METADATA_KEY]: {
...(options.availableSubAgents !== undefined
? { availableSubAgents: options.availableSubAgents }
: {}),
...(options.policy !== undefined ? { policy: options.policy } : {}),
...(options.inlineSubAgentBlockedTools !== undefined
? { inlineSubAgentBlockedTools: options.inlineSubAgentBlockedTools }
: {}),
...(options.runSubAgent !== undefined ? { runSubAgent: options.runSubAgent } : {}),
} satisfies DelegateSubAgentToolMetadata,
},
});
}
export function getInlineDelegateSubAgentToolOptions(
tool: BuiltTool,
): DelegateSubAgentToolMetadata | undefined {
const value = tool.metadata?.[INLINE_DELEGATE_SUB_AGENT_TOOL_METADATA_KEY];
if (typeof value !== 'object' || value === null) return undefined;
return value as DelegateSubAgentToolMetadata;
}
function formatAvailableSubAgents(
availableSubAgents: CreateDelegateSubAgentToolOptions['availableSubAgents'],
): string[] {
if (!availableSubAgents?.length) return [];
return [
'Configured subagents are available as specialist options. Use subAgentId: "inline" for the default inline child; pass one of these exact IDs only when that specialist is a better fit:',
...availableSubAgents.map((subAgent) => {
const description = subAgent.description ? ` - ${subAgent.description}` : '';
return `- ${subAgent.id}: ${subAgent.name}${description}`;
}),
];
}
/**
* Tool handler: enforce policy (fan-out), assign the child's task path,
* assemble the {@link DelegateSubAgentRequest} from the model input plus the
* parent tool context, then run the child via the host `runSubAgent` callback
* while emitting started/progress/completed lifecycle events. Any error is
* converted into a `status: 'failed'` output (never thrown) so one failed
* delegation can't abort the parent's run.
*/
async function handleDelegateSubAgent(
input: DelegateSubAgentInput,
ctx: ToolContext,
options: CreateDelegateSubAgentToolOptions,
childCounts: Map<string, number>,
): Promise<DelegateSubAgentToolOutput> {
let taskPath: SubAgentTaskPath | undefined;
let request: DelegateSubAgentRequest | undefined;
let startedAt: number | undefined;
try {
const childCountKey = getChildCountKey(ctx);
const childCount = childCounts.get(childCountKey) ?? 0;
assertSubAgentPolicyAllowsChildCount(childCount, options.policy);
taskPath = createChildSubAgentTaskPath(input.taskName, childCount);
childCounts.set(childCountKey, childCount + 1);
request = {
...input,
taskPath,
childCount,
...(ctx.runId !== undefined ? { parentRunId: ctx.runId } : {}),
...(ctx.persistence?.threadId !== undefined
? { parentThreadId: ctx.persistence.threadId }
: {}),
...(ctx.persistence?.resourceId !== undefined
? { parentResourceId: ctx.persistence.resourceId }
: {}),
...(ctx.abortSignal !== undefined ? { parentAbortSignal: ctx.abortSignal } : {}),
...(ctx.toolCallId !== undefined ? { parentToolCallId: ctx.toolCallId } : {}),
...(options.policy !== undefined ? { policy: options.policy } : {}),
};
startedAt = Date.now();
emitSubAgentStarted(ctx, request, startedAt);
if (!options.runSubAgent) {
throw new Error(
'delegate_subagent was registered without a runSubAgent callback, and no host runner was provided. Register it on an Agent (for inline delegation) or pass runSubAgent.',
);
}
const output = await options.runSubAgent(request, {
runInlineSubAgent: () => {
throw new Error(
'delegate_subagent host runner does not support inline delegation without helpers.runInlineSubAgent from an Agent build.',
);
},
});
emitSubAgentCompleted(ctx, request, output, startedAt);
return output;
} catch (error) {
if (request !== undefined && startedAt !== undefined) {
emitSubAgentCompleted(
ctx,
request,
{
status: 'failed',
...(taskPath !== undefined ? { taskPath } : {}),
answer: '',
error: stringifyUnknown(error),
},
startedAt,
);
}
return {
status: 'failed',
...(taskPath !== undefined ? { taskPath } : {}),
answer: '',
error: error instanceof Error ? error.message : String(error),
};
}
}
function emitSubAgentStarted(
ctx: ToolContext,
request: DelegateSubAgentRequest,
startedAt: number,
): void {
ctx.emitEvent?.({
type: AgentEvent.SubAgentStarted,
...subAgentLifecycleBase(request),
startedAt,
});
}
function emitSubAgentCompleted(
ctx: ToolContext,
request: DelegateSubAgentRequest,
output: DelegateSubAgentToolOutput,
startedAt: number,
): void {
const finishedAt = Date.now();
ctx.emitEvent?.({
type: AgentEvent.SubAgentCompleted,
...subAgentLifecycleBase(request),
status: output.status,
startedAt,
finishedAt,
durationMs: finishedAt - startedAt,
...(output.runId !== undefined ? { runId: output.runId } : {}),
...(output.threadId !== undefined ? { threadId: output.threadId } : {}),
...(output.usage !== undefined ? { usage: output.usage } : {}),
...(output.finishReason !== undefined ? { finishReason: output.finishReason } : {}),
...(output.error !== undefined ? { error: output.error } : {}),
});
}
function subAgentLifecycleBase(request: DelegateSubAgentRequest) {
return {
taskName: request.taskName,
taskPath: request.taskPath,
...(request.parentRunId !== undefined ? { parentRunId: request.parentRunId } : {}),
...(request.parentToolCallId !== undefined
? { parentToolCallId: request.parentToolCallId }
: {}),
...(request.subAgentId !== undefined ? { subAgentId: request.subAgentId } : {}),
};
}
function getChildCountKey(ctx: ToolContext): string {
return ctx.runId ?? ctx.persistence?.threadId ?? ctx.persistence?.resourceId ?? 'adhoc';
}
function stringifyUnknown(value: unknown): string {
if (value instanceof Error) return value.message;
if (typeof value === 'string') return value;
if (typeof value === 'number' || typeof value === 'boolean' || value === null) {
return String(value);
}
try {
return JSON.stringify(value);
} catch {
return 'Unknown error';
}
}
/**
* Optional helpers for a `runSubAgent` implementation.
*
* A host that runs the child by calling `agent.generate(...)`/`stream(...)` can
* reuse these instead of hand-rolling the delegation prompt and the result
* mapping. They are NOT wired into the tool call them from your `runSubAgent`
* (the n8n CLI runner does).
*/
/** Render the default delegation prompt from a request's goal / context / expectedOutput. */
export function renderDelegateSubAgentPrompt(request: {
goal: string;
context?: string;
expectedOutput?: string;
}): string {
const sections = [
'You are a focused subagent working on a specific delegated task.',
`YOUR TASK:\n${request.goal}`,
];
if (request.context) {
sections.push(`CONTEXT:\n${request.context}`);
}
if (request.expectedOutput) {
sections.push(`EXPECTED OUTPUT:\n${request.expectedOutput}`);
}
sections.push(
[
'Complete this task using the tools available to you. When finished, provide a clear, concise summary of:',
'- What you did',
'- What you found or accomplished',
'- Important outputs, decisions, or evidence',
'- Any issues, assumptions, or limitations',
'',
'If the information above is insufficient, do your best with explicitly stated assumptions and note what was missing, rather than stopping to ask.',
'',
'Be thorough but concise -- your response is returned to the parent agent as a summary.',
].join('\n'),
);
return sections.join('\n\n');
}
function resolveDelegateSubAgentStatus(
result: GenerateResult,
): DelegateSubAgentToolOutput['status'] {
if (result.finishReason === 'error' || result.error !== undefined) {
return 'failed';
}
if (result.pendingSuspend !== undefined && result.pendingSuspend.length > 0) {
return 'suspended';
}
return 'completed';
}
/** Failed delegate output when a child run suspends for user input (not yet resumable). */
export function failedDelegatedChildSuspendOutput(
taskPath: SubAgentTaskPath,
): DelegateSubAgentToolOutput {
return {
status: 'failed',
taskPath,
answer: '',
error: DELEGATED_CHILD_SUSPEND_UNSUPPORTED_MESSAGE,
};
}
/** Map an agent {@link GenerateResult} into the delegate tool's output shape. */
export function generateResultToDelegateSubAgentOutput(
taskPath: SubAgentTaskPath,
result: GenerateResult,
threadId?: string,
): DelegateSubAgentToolOutput {
const status = resolveDelegateSubAgentStatus(result);
return {
status,
taskPath,
runId: result.runId,
...(threadId !== undefined ? { threadId } : {}),
answer: lastText(result.messages),
...(result.structuredOutput !== undefined ? { structuredOutput: result.structuredOutput } : {}),
...(result.usage !== undefined
? {
usage: {
promptTokens: result.usage.promptTokens,
completionTokens: result.usage.completionTokens,
totalTokens: result.usage.totalTokens,
...(result.usage.cost !== undefined ? { cost: result.usage.cost } : {}),
},
}
: {}),
...(result.finishReason !== undefined ? { finishReason: result.finishReason } : {}),
...(result.error !== undefined ? { error: stringifyUnknown(result.error) } : {}),
...(status === 'suspended' && result.pendingSuspend !== undefined
? { pendingSuspend: result.pendingSuspend }
: {}),
};
}
/** Last non-empty assistant text across the run's messages. */
function lastText(messages: AgentMessage[]): string {
const llmMessages = filterLlmMessages(messages);
for (let i = llmMessages.length - 1; i >= 0; i--) {
const message = llmMessages[i];
if (!message) continue;
const text = message.content
.filter((content) => content.type === 'text')
.map((content) => content.text)
.join('\n')
.trim();
if (text) return text;
}
return '';
}

View File

@ -146,7 +146,11 @@ export class AgentMessageList {
* Returns the mutated host message, or `undefined` if the toolCallId is * Returns the mutated host message, or `undefined` if the toolCallId is
* not found (internal invariant violation caller should log/throw). * not found (internal invariant violation caller should log/throw).
*/ */
setToolCallResult(toolCallId: string, output: JSONValue): AgentDbMessage | undefined { setToolCallResult(
toolCallId: string,
output: JSONValue,
options?: { canceled?: boolean },
): AgentDbMessage | undefined {
const host = this.findToolCallHost(toolCallId); const host = this.findToolCallHost(toolCallId);
if (!host) return undefined; if (!host) return undefined;
@ -156,6 +160,11 @@ export class AgentMessageList {
const mutableBlock = block; const mutableBlock = block;
mutableBlock.state = 'resolved'; mutableBlock.state = 'resolved';
(mutableBlock as Extract<ContentToolCall, { state: 'resolved' }>).output = output; (mutableBlock as Extract<ContentToolCall, { state: 'resolved' }>).output = output;
if (options?.canceled) {
(mutableBlock as Extract<ContentToolCall, { state: 'resolved' }>).canceled = true;
} else if ('canceled' in mutableBlock) {
delete (mutableBlock as { canceled?: boolean }).canceled;
}
if ('error' in mutableBlock) { if ('error' in mutableBlock) {
delete (mutableBlock as { error: unknown }).error; delete (mutableBlock as { error: unknown }).error;
} }

View File

@ -126,6 +126,19 @@ const LANGUAGE_PROVIDERS: ProviderRegistry = {
return createOpenRouter({ apiKey: creds.apiKey, baseURL: creds.baseURL, fetch })(model); return createOpenRouter({ apiKey: creds.apiKey, baseURL: creds.baseURL, fetch })(model);
}, },
}, },
nvidia: {
build: (creds, model, fetch) => {
const { createOpenAICompatible } =
require('@ai-sdk/openai-compatible') as typeof import('@ai-sdk/openai-compatible');
return createOpenAICompatible({
name: 'nvidia',
baseURL: creds.baseURL ?? 'https://integrate.api.nvidia.com/v1',
apiKey: creds.apiKey,
headers: creds.headers,
fetch,
})(model);
},
},
'azure-openai': { 'azure-openai': {
build: (creds, model, fetch) => { build: (creds, model, fetch) => {
const { createAzure } = require('@ai-sdk/azure') as typeof import('@ai-sdk/azure'); const { createAzure } = require('@ai-sdk/azure') as typeof import('@ai-sdk/azure');

View File

@ -21,6 +21,7 @@ export const PROVIDER_CREDENTIAL_SCHEMAS = {
mistral: apiKeyCreds, mistral: apiKeyCreds,
vercel: apiKeyCreds, vercel: apiKeyCreds,
openrouter: apiKeyCreds, openrouter: apiKeyCreds,
nvidia: apiKeyCreds,
'azure-openai': z.object({ 'azure-openai': z.object({
apiKey: z.string().optional(), apiKey: z.string().optional(),

View File

@ -2,7 +2,7 @@
* Pure utility functions used by AgentRuntime that require no class context. * Pure utility functions used by AgentRuntime that require no class context.
* These are extracted here to keep agent-runtime.ts focused on orchestration logic. * These are extracted here to keep agent-runtime.ts focused on orchestration logic.
*/ */
import type { GenerateResult, StreamChunk, TokenUsage } from '../types'; import type { StreamChunk, TokenUsage } from '../types';
import { toTokenUsage } from './stream'; import { toTokenUsage } from './stream';
import type { AgentMessage, ContentToolCall } from '../types/sdk/message'; import type { AgentMessage, ContentToolCall } from '../types/sdk/message';
@ -95,13 +95,3 @@ export function accumulateUsage(
if (!raw) return current; if (!raw) return current;
return mergeUsage(current, toTokenUsage(raw)); return mergeUsage(current, toTokenUsage(raw));
} }
/** Compute totalCost from sub-agent usage already present on the result. */
export function applySubAgentUsage(result: GenerateResult): GenerateResult {
if (!result.subAgentUsage || result.subAgentUsage.length === 0) return result;
const parentCost = result.usage?.cost ?? 0;
const subCost = result.subAgentUsage.reduce((sum, s) => sum + (s.usage.cost ?? 0), 0);
return { ...result, totalCost: parentCost + subCost };
}

View File

@ -0,0 +1,17 @@
import type { BuiltTool } from '../types/sdk/tool';
export const SDK_OWNED_BUILTIN_TOOL_METADATA_KEY = 'sdkOwnedBuiltinTool';
export function isSdkOwnedBuiltInTool(tool: BuiltTool): boolean {
return tool.metadata?.[SDK_OWNED_BUILTIN_TOOL_METADATA_KEY] === true;
}
export function withSdkOwnedBuiltInMetadata(tool: BuiltTool): BuiltTool {
return {
...tool,
metadata: {
...tool.metadata,
[SDK_OWNED_BUILTIN_TOOL_METADATA_KEY]: true,
},
};
}

View File

@ -108,6 +108,18 @@ export function convertChunk(c: TextStreamPart<ToolSet>): StreamChunk | undefine
output: c.output, output: c.output,
}; };
case 'tool-error':
// Provider-executed tools (e.g. native web search) surface failures
// as `tool-error` rather than `tool-result`. Map to our tool-result
// shape so stream consumers receive the error payload.
return {
type: 'tool-result',
toolCallId: c.toolCallId ?? '',
toolName: c.toolName ?? '',
output: c.error,
isError: true,
};
case 'error': case 'error':
return { type: 'error', error: c.error }; return { type: 'error', error: c.error };

View File

@ -0,0 +1,132 @@
/**
* Task paths for sub-agent delegation.
*
* A "task path" is a filesystem-like address that gives every agent run a
* stable, human-readable position in the delegation flow, e.g.:
*
* /root the top-level (orchestrating) agent
* /root/research_api_0 a direct child delegation from the orchestrator
*
* Each child segment carries the parent's 0-based child index (`_0`, `_1`, ) so
* that delegations with the same task name stay distinct.
*
* Why this concept exists:
* - Identity: each delegated unit of work gets a unique, traceable name we can
* log and surface in the timeline without the parent having to invent ids.
* (Memory/session ids are independent a run gets its own thread id.)
* - Policy enforcement: together with {@link SubAgentTaskPathPolicy}, the path
* lets us cap per-parent fan-out so a misbehaving agent can't spawn hundreds
* of parallel children, which would blow up cost, latency, and resources.
*
* Everything in this file is pure (no I/O, no n8n-specific concepts), which is
* why it lives in the runtime SDK: it is shared verbatim by both the generic
* `delegate_subagent` tool and the n8n CLI runner.
*/
/**
* A delegation task path: `/root` or a single direct-child segment under `/root`.
* Modeled as a template-literal type so a plain string can be narrowed to a
* validated path via {@link assertSubAgentTaskPath}.
*/
export type SubAgentTaskPath = '/root' | `/root/${string}`;
/**
* Guardrails applied when a parent tries to spawn a child sub-agent. Every limit
* is optional; an undefined field means "no limit for that dimension".
*/
export interface SubAgentTaskPathPolicy {
/** Maximum number of children a single parent may spawn. Bounds fan-out width. */
maxChildren?: number;
}
/** Path of the initiating (orchestrating) agent. */
export const ROOT_SUB_AGENT_TASK_PATH = '/root' satisfies SubAgentTaskPath;
/** Upper bound on a single path segment, so paths stay bounded and readable. */
const MAX_TASK_NAME_LENGTH = 64;
/** A valid path is `/root` or `/root` plus one lowercase alphanumeric/underscore segment. */
const SUB_AGENT_TASK_PATH_PATTERN = /^\/root(?:\/[a-z0-9_]+)?$/;
/**
* Turn a free-text, model-supplied task name (e.g. "Research API pricing!")
* into a safe, deterministic path segment (e.g. "research_api_pricing").
*
* The task name comes from the LLM, so it can contain anything. We normalize to
* lowercase, collapse each run of non-alphanumerics into a single underscore,
* strip leading/trailing underscores, and cap the length producing segments
* that are collision-resistant, log/URL-safe, and accepted by
* {@link SUB_AGENT_TASK_PATH_PATTERN}.
*
* @throws if nothing alphanumeric survives (we refuse to build a nameless path).
*/
export function sanitizeSubAgentTaskName(taskName: string): string {
const sanitized = taskName
.trim()
.toLowerCase()
.replace(/[^a-z0-9]+/g, '_')
.replace(/_+/g, '_')
.replace(/^_+|_+$/g, '')
.slice(0, MAX_TASK_NAME_LENGTH)
.replace(/_+$/g, '');
if (!sanitized) {
throw new Error('Sub-agent task name must contain at least one alphanumeric character');
}
return sanitized;
}
/** Type guard: does this string match `/root` or `/root/<segment>`? */
export function isSubAgentTaskPath(value: string): value is SubAgentTaskPath {
return SUB_AGENT_TASK_PATH_PATTERN.test(value);
}
/**
* Assert (and type-narrow) that a string is a valid task path. Used to validate
* paths that were constructed here or received from elsewhere before we rely on
* their shape.
*/
export function assertSubAgentTaskPath(value: string): asserts value is SubAgentTaskPath {
if (!isSubAgentTaskPath(value)) {
throw new Error(`Invalid sub-agent task path: ${value}`);
}
}
/**
* Build a first-level child path: `/root/<sanitized task name>_<childCount>`.
*
* `childCount` is the parent's 0-based index for this child (the number of
* children it had already spawned). Appending it disambiguates same-named
* siblings within a single parent run.
*/
export function createChildSubAgentTaskPath(
taskName: string,
childCount: number,
): SubAgentTaskPath {
const childPath = `${ROOT_SUB_AGENT_TASK_PATH}/${sanitizeSubAgentTaskName(taskName)}_${childCount}`;
assertSubAgentTaskPath(childPath);
return childPath;
}
/**
* Fan-out-dimension gate, checked BEFORE a child is spawned.
*
* `childCount` is how many children the parent has ALREADY spawned. If that has
* reached `maxChildren`, spawning one more (the `+ 1` in the message is the
* would-be new total) exceeds the limit, so we reject. This stops a single agent
* from fanning out to an unbounded number of parallel sub-agents. When
* `maxChildren` is undefined, fan-out is unbounded.
*/
export function assertSubAgentPolicyAllowsChildCount(
childCount: number,
policy: SubAgentTaskPathPolicy | undefined,
): void {
if (policy?.maxChildren === undefined) return;
if (childCount >= policy.maxChildren) {
throw new Error(
`Sub-agent child count ${childCount + 1} exceeds maxChildren ${policy.maxChildren}`,
);
}
}

View File

@ -3,6 +3,7 @@ import type { JSONSchema7 } from 'json-schema';
import { z } from 'zod'; import { z } from 'zod';
import { loadAi } from './lazy-ai'; import { loadAi } from './lazy-ai';
import { isCancellation } from '../sdk/cancellation';
import { import {
type BuiltProviderTool, type BuiltProviderTool,
type BuiltTool, type BuiltTool,
@ -11,7 +12,6 @@ import {
type ToolExecutionContext, type ToolExecutionContext,
type ToolContext, type ToolContext,
} from '../types'; } from '../types';
import type { SubAgentUsage } from '../types/sdk/agent';
import { isZodSchema } from '../utils/zod'; import { isZodSchema } from '../utils/zod';
type AiSdkProviderTool = AiSdkTool & { type AiSdkProviderTool = AiSdkTool & {
@ -24,13 +24,6 @@ type AiSdkProviderTool = AiSdkTool & {
*/ */
const SUSPEND_BRAND = Symbol('SuspendBrand'); const SUSPEND_BRAND = Symbol('SuspendBrand');
/**
* Branded symbol used to tag tool results from agent-as-tool calls.
* Carries sub-agent usage so the parent runtime can aggregate costs
* without any external state (WeakMap, mutable tool fields, etc.).
*/
const AGENT_TOOL_BRAND = Symbol('AgentToolBrand');
export interface SuspendedToolResult { export interface SuspendedToolResult {
readonly [SUSPEND_BRAND]: true; readonly [SUSPEND_BRAND]: true;
payload: unknown; payload: unknown;
@ -41,32 +34,6 @@ export function isSuspendedToolResult(value: unknown): value is SuspendedToolRes
return typeof value === 'object' && value !== null && SUSPEND_BRAND in value; return typeof value === 'object' && value !== null && SUSPEND_BRAND in value;
} }
export interface AgentToolResult {
readonly [AGENT_TOOL_BRAND]: true;
/** The actual tool output (passed back to the LLM). */
readonly output: unknown;
/** Sub-agent usage entries to aggregate into the parent's result. */
readonly subAgentUsage: SubAgentUsage[];
}
/** Type guard: returns true when a tool result carries sub-agent usage. */
export function isAgentToolResult(value: unknown): value is AgentToolResult {
return typeof value === 'object' && value !== null && AGENT_TOOL_BRAND in value;
}
/**
* Create a branded agent-tool result that carries sub-agent usage alongside the output.
* The output properties are spread onto the object so it remains a valid tool output
* even when accessed directly (e.g. in tests). The runtime detects the brand via
* isAgentToolResult() and extracts the sub-agent usage.
* Typed as `never` so `return createAgentToolResult(...)` satisfies any handler return type
* (same pattern as ctx.suspend).
*/
export function createAgentToolResult(output: unknown, subAgentUsage: SubAgentUsage[]): never {
const base = typeof output === 'object' && output !== null ? output : {};
return { ...base, [AGENT_TOOL_BRAND]: true, output, subAgentUsage } as never;
}
/** /**
* Convert an array of BuiltProviderTools into a Record of AI SDK provider-defined tool objects. * Convert an array of BuiltProviderTools into a Record of AI SDK provider-defined tool objects.
* Provider tools are executed on the provider's infrastructure (e.g. Anthropic web search, * Provider tools are executed on the provider's infrastructure (e.g. Anthropic web search,
@ -153,15 +120,19 @@ export async function executeTool(
} }
if (builtTool.suspendSchema) { if (builtTool.suspendSchema) {
const isCancelled = isCancellation(resumeData);
const ctx: InterruptibleToolContext = { const ctx: InterruptibleToolContext = {
suspend: async (payload: unknown): Promise<never> => { suspend: async (payload: unknown): Promise<never> => {
return await Promise.resolve({ [SUSPEND_BRAND]: true, payload } as never); return await Promise.resolve({ [SUSPEND_BRAND]: true, payload } as never);
}, },
resumeData, resumeData: isCancelled ? undefined : resumeData,
cancellation: isCancelled ? { message: resumeData.message } : undefined,
parentTelemetry, parentTelemetry,
toolCallId, toolCallId,
runId: executionContext.runId, runId: executionContext.runId,
persistence: executionContext.persistence, persistence: executionContext.persistence,
emitEvent: executionContext.emitEvent,
abortSignal: executionContext.abortSignal,
}; };
return await builtTool.handler(args, ctx); return await builtTool.handler(args, ctx);
} }
@ -171,6 +142,8 @@ export async function executeTool(
toolCallId, toolCallId,
runId: executionContext.runId, runId: executionContext.runId,
persistence: executionContext.persistence, persistence: executionContext.persistence,
emitEvent: executionContext.emitEvent,
abortSignal: executionContext.abortSignal,
}; };
return await builtTool.handler(args, ctx); return await builtTool.handler(args, ctx);
} }

View File

@ -0,0 +1,106 @@
import { z } from 'zod';
import { withSdkOwnedBuiltInMetadata } from './sdk-owned-tool';
import { Tool } from '../sdk/tool';
import type { BuiltTool } from '../types/sdk/tool';
export const WRITE_TODOS_TOOL_NAME = 'write_todos';
const todoStatusSchema = z.enum(['pending', 'in_progress', 'completed', 'blocked', 'cancelled']);
const todoDelegateHintSchema = z
.object({
subAgentId: z
.string()
.optional()
.describe(
'Optional sub-agent id when this task is a delegate_subagent candidate. Use "inline" for one-off inline sub-agents.',
),
expectedOutput: z
.string()
.optional()
.describe('Optional expected output shape when delegating this task.'),
})
.optional();
const todoItemSchema = z.object({
id: z.string().min(1).describe('Stable identifier for this task within the current plan.'),
content: z.string().min(1).describe('Concrete, self-contained task description.'),
status: todoStatusSchema,
delegateHint: todoDelegateHintSchema,
});
const writeTodosInputSchema = z
.object({
todos: z
.array(todoItemSchema)
.describe('Full task list for the current run. Replaces any previous list.'),
})
.superRefine((value, ctx) => {
const seen = new Set<string>();
for (const [index, todo] of value.todos.entries()) {
if (seen.has(todo.id)) {
ctx.addIssue({
code: 'custom',
message: `Duplicate todo id "${todo.id}". Each task must have a unique id.`,
path: ['todos', index, 'id'],
});
}
seen.add(todo.id);
}
});
const writeTodosOutputSchema = z.object({
status: z.literal('ok'),
todoCount: z.number(),
todos: z.array(todoItemSchema),
});
const WRITE_TODOS_DESCRIPTION =
'Create or update a structured task list for complex agent work. Use it to decompose a larger request into concrete workstreams, track progress, and identify which tasks should be handled separately with delegate_subagent. Do not use it for trivial work, single-step tasks, or purely conversational answers. This tool only updates the task list; it does not run sub-agents or answer the user.';
const WRITE_TODOS_SYSTEM_INSTRUCTION = [
'write_todos helps you plan and track complex objectives before and during execution. It updates the current task list only; it does not complete tasks, run sub-agents, or answer the user.',
'WHEN TO USE write_todos:',
'- The user request has 3+ meaningful steps or multiple deliverables.',
'- The request decomposes into 2+ independent workstreams.',
'- Some workstreams are good candidates for delegate_subagent.',
'- You need to track progress, revise the plan, or avoid losing context.',
'WHEN NOT TO USE write_todos:',
'- The request is trivial, conversational, or informational.',
'- The task can be completed directly in one or two simple steps.',
"- You would only create a todo list to restate the user's request.",
'HOW TO USE write_todos:',
'- Write concrete, self-contained tasks, not vague phases.',
'- Mark the first active task, or independent active tasks, as in_progress immediately.',
'- For sub-agent-worthy work, create one todo per bounded workstream, then call delegate_subagent separately for that task.',
'- Do not delegate the entire user request as one task.',
'- Update task status as soon as work completes; do not batch completions at the end.',
'- Revise the list when new information changes the plan.',
'- Do not call write_todos multiple times in parallel; send one full list update at a time.',
'- After all work is done, send the final answer as normal assistant text after the last write_todos call.',
].join('\n');
/**
* Build the planner-only `write_todos` tool lets a parent agent maintain a
* structured task list for complex work without auto-dispatching sub-agents.
*/
export function createWriteTodosTool(): BuiltTool {
const tool = new Tool(WRITE_TODOS_TOOL_NAME)
.description(WRITE_TODOS_DESCRIPTION)
.systemInstruction(WRITE_TODOS_SYSTEM_INSTRUCTION)
.input(writeTodosInputSchema)
.output(writeTodosOutputSchema)
.handler(async (input) => {
const todos = [...input.todos];
return await Promise.resolve({
status: 'ok' as const,
todoCount: todos.length,
todos,
});
})
.build();
return withSdkOwnedBuiltInMetadata(tool);
}

View File

@ -0,0 +1,129 @@
import * as aiModule from 'ai';
import type { Mock } from 'vitest';
import type { AgentRuntimeConfig } from '../../runtime/agent-runtime';
import type { AgentEventBus } from '../../runtime/event-bus';
import { AgentEvent } from '../../runtime/event-bus';
import type { StreamChunk } from '../../types';
import { Agent } from '../agent';
vi.mock('@ai-sdk/openai', () => ({
createOpenAI: () => () => ({ provider: 'openai', modelId: 'mock', specificationVersion: 'v3' }),
}));
// eslint-disable-next-line @typescript-eslint/consistent-type-imports
type AiImport = typeof import('ai');
vi.mock('ai', async () => {
const actual = await vi.importActual<AiImport>('ai');
return {
...actual,
generateText: vi.fn(),
};
});
const { generateText } = aiModule as unknown as {
generateText: Mock;
};
type ActiveRuntime = {
bus: AgentEventBus;
};
type AgentInternals = {
ensureBuilt(): Promise<AgentRuntimeConfig>;
createRuntime(config: AgentRuntimeConfig, runId?: string): ActiveRuntime;
trackStreamRuntime(
stream: ReadableStream<StreamChunk>,
active: ActiveRuntime,
): ReadableStream<StreamChunk>;
cleanupRuntime(active: ActiveRuntime): Promise<void>;
activeRuntimes: Set<ActiveRuntime>;
};
function makeGenerateSuccess(text: string) {
return {
finishReason: 'stop',
usage: { inputTokens: 10, outputTokens: 5, totalTokens: 15 },
response: {
messages: [
{
role: 'assistant',
content: [{ type: 'text', text }],
},
],
},
toolCalls: [],
};
}
describe('Agent isolated runtimes', () => {
beforeEach(() => {
vi.clearAllMocks();
});
it('keeps result state bound to the runtime that produced it', async () => {
generateText
.mockResolvedValueOnce(makeGenerateSuccess('first response'))
.mockResolvedValueOnce(makeGenerateSuccess('second response'));
const agent = new Agent('agent').model('openai/gpt-4o-mini').instructions('test');
const first = await agent.generate('first');
const second = await agent.generate('second');
expect(first.getState().messageList.messages).toEqual(
expect.arrayContaining([
expect.objectContaining({
content: expect.arrayContaining([expect.objectContaining({ text: 'first response' })]),
}),
]),
);
expect(second.getState().messageList.messages).toEqual(
expect.arrayContaining([
expect.objectContaining({
content: expect.arrayContaining([expect.objectContaining({ text: 'second response' })]),
}),
]),
);
});
it('applies event handler changes to active runtimes', async () => {
const agent = new Agent('agent').model('openai/gpt-4o-mini').instructions('test');
const internals = agent as unknown as AgentInternals;
const active = internals.createRuntime(await internals.ensureBuilt());
const handler = vi.fn();
agent.on(AgentEvent.AgentEnd, handler);
active.bus.emit({ type: AgentEvent.AgentEnd, messages: [] });
agent.off(AgentEvent.AgentEnd, handler);
active.bus.emit({ type: AgentEvent.AgentEnd, messages: [] });
expect(handler).toHaveBeenCalledTimes(1);
await internals.cleanupRuntime(active);
});
it('cleans up the active runtime when a wrapped stream is cancelled', async () => {
const agent = new Agent('agent').model('openai/gpt-4o-mini').instructions('test');
const internals = agent as unknown as AgentInternals;
const active = internals.createRuntime(await internals.ensureBuilt());
const sourceCancel = vi.fn();
const stream = internals.trackStreamRuntime(
new ReadableStream<StreamChunk>({
start(controller) {
controller.enqueue({ type: 'start-step' });
},
cancel: sourceCancel,
}),
active,
);
const reader = stream.getReader();
expect(internals.activeRuntimes.has(active)).toBe(true);
await reader.read();
await reader.cancel('client disconnected');
reader.releaseLock();
expect(sourceCancel).toHaveBeenCalledWith('client disconnected');
expect(internals.activeRuntimes.has(active)).toBe(false);
});
});

View File

@ -0,0 +1,55 @@
import { createCancellation, isCancellation, CANCELLATION_TYPE } from '../cancellation';
describe('createCancellation', () => {
it('creates an object with the correct _type and message', () => {
const c = createCancellation('do something else');
expect(c._type).toBe(CANCELLATION_TYPE);
expect(c.message).toBe('do something else');
});
it('is detected by isCancellation', () => {
const c = createCancellation('steer me');
expect(isCancellation(c)).toBe(true);
});
});
describe('isCancellation', () => {
it('returns true for a valid cancellation object', () => {
expect(isCancellation({ _type: 'agent.cancellation', message: 'hello' })).toBe(true);
});
it('returns false for null', () => {
expect(isCancellation(null)).toBe(false);
});
it('returns false for undefined', () => {
expect(isCancellation(undefined)).toBe(false);
});
it('returns false for a plain resume payload', () => {
expect(isCancellation({ approved: true })).toBe(false);
});
it('returns false when _type is wrong', () => {
expect(isCancellation({ _type: 'something.else', message: 'hi' })).toBe(false);
});
it('returns false when message is missing', () => {
expect(isCancellation({ _type: 'agent.cancellation' })).toBe(false);
});
it('returns false when message is not a string', () => {
expect(isCancellation({ _type: 'agent.cancellation', message: 42 })).toBe(false);
});
it('survives a JSON round-trip (simulating HTTP wire format)', () => {
const original = createCancellation('change direction');
const serialized = JSON.stringify(original);
// eslint-disable-next-line n8n-local-rules/no-uncaught-json-parse
const deserialized = JSON.parse(serialized) as unknown;
expect(isCancellation(deserialized)).toBe(true);
expect((deserialized as ReturnType<typeof createCancellation>).message).toBe(
'change direction',
);
});
});

View File

@ -0,0 +1,73 @@
import { fetchProviderCatalog } from '../catalog';
describe('fetchProviderCatalog', () => {
const originalFetch = global.fetch;
afterEach(() => {
global.fetch = originalFetch;
vi.restoreAllMocks();
});
it('returns provider ids that match the agents runtime', async () => {
const fetchMock = vi.fn().mockResolvedValue({
ok: true,
json: async () =>
await Promise.resolve({
openai: {
id: 'openai',
name: 'OpenAI',
models: {
'gpt-5': {
id: 'gpt-5',
name: 'GPT-5',
tool_call: true,
},
},
},
'amazon-bedrock': {
id: 'amazon-bedrock',
name: 'Amazon Bedrock',
models: {
'anthropic.claude-sonnet-4-5-v1:0': {
id: 'anthropic.claude-sonnet-4-5-v1:0',
name: 'Claude Sonnet 4.5',
},
},
},
azure: {
id: 'azure',
name: 'Azure',
models: {
'gpt-4o': {
id: 'gpt-4o',
name: 'GPT-4o',
},
},
},
'azure-cognitive-services': {
id: 'azure-cognitive-services',
name: 'Azure Cognitive Services',
models: {
'cohere-command-a': {
id: 'cohere-command-a',
name: 'Command A',
},
},
},
}),
});
global.fetch = fetchMock as typeof fetch;
const catalog = await fetchProviderCatalog();
expect(catalog.openai.models['gpt-5'].toolCall).toBe(true);
expect(catalog['aws-bedrock'].models['anthropic.claude-sonnet-4-5-v1:0'].name).toBe(
'Claude Sonnet 4.5',
);
expect(catalog['azure-openai'].models['gpt-4o'].name).toBe('GPT-4o');
expect(catalog['azure-openai'].models['cohere-command-a'].name).toBe('Command A');
expect(catalog['amazon-bedrock']).toBeUndefined();
expect(catalog.azure).toBeUndefined();
expect(catalog['azure-cognitive-services']).toBeUndefined();
});
});

View File

@ -0,0 +1,232 @@
import { z } from 'zod';
import type * as AgentRuntimeModule from '../../runtime/agent-runtime';
import {
DELEGATED_CHILD_SUSPEND_UNSUPPORTED_MESSAGE,
DELEGATE_SUB_AGENT_TOOL_NAME,
INLINE_SUB_AGENT_ID,
createDelegateSubAgentTool,
getInlineDelegateSubAgentToolOptions,
type DelegateSubAgentRunner,
type DelegateSubAgentRunnerHelpers,
} from '../../runtime/delegate-sub-agent-tool';
import type { BuiltTool, GenerateResult, SerializableAgentState } from '../../types';
import { Agent } from '../agent';
const runtimeConfigs: Array<Record<string, unknown>> = [];
let inlineChildGenerateResult: GenerateResult | undefined;
const mockState = (): SerializableAgentState => ({
status: 'success',
messageList: { messages: [], historyIds: [], inputIds: [], responseIds: [] },
pendingToolCalls: {},
});
vi.mock('../../runtime/agent-runtime', async (importOriginal) => {
const actual = await importOriginal<typeof AgentRuntimeModule>();
return {
...actual,
AgentRuntime: class MockAgentRuntime {
constructor(config: Record<string, unknown>) {
runtimeConfigs.push(config);
}
async generate() {
if (inlineChildGenerateResult !== undefined) {
return await Promise.resolve(inlineChildGenerateResult);
}
return await Promise.resolve({
runId: 'child-run',
finishReason: 'stop',
messages: [
{
role: 'assistant',
type: 'llm',
content: [{ type: 'text', text: 'inline answer' }],
},
],
usage: { promptTokens: 0, completionTokens: 0, totalTokens: 0 },
getState: mockState,
});
}
async dispose() {
return await Promise.resolve();
}
},
};
});
function makeTool(name: string): BuiltTool {
return {
name,
description: `${name} tool`,
inputSchema: z.object({}),
handler: async () => await Promise.resolve({ ok: true }),
};
}
const delegateInput = {
subAgentId: INLINE_SUB_AGENT_ID,
taskName: 'Research API',
goal: 'Find the API behavior.',
};
async function buildAgentConfig(agent: Agent): Promise<AgentRuntimeModule.AgentRuntimeConfig> {
return await (
agent as unknown as { build(): Promise<AgentRuntimeModule.AgentRuntimeConfig> }
).build();
}
describe('delegate sub-agent routing', () => {
beforeEach(() => {
runtimeConfigs.length = 0;
inlineChildGenerateResult = undefined;
});
it('routes inline delegations through a host runner with runInlineSubAgent helpers', async () => {
const hostRunSubAgent = vi.fn<DelegateSubAgentRunner>(async (request, helpers) => {
expect(request.subAgentId).toBe(INLINE_SUB_AGENT_ID);
return await helpers.runInlineSubAgent(request);
});
const agent = new Agent('parent')
.model('openai', 'gpt-4o-mini')
.instructions('Delegate when needed.')
.tool(
createDelegateSubAgentTool({
runSubAgent: hostRunSubAgent,
}),
)
.tool(makeTool('lookup'));
const runtimeConfig = await buildAgentConfig(agent);
expect(runtimeConfigs).toHaveLength(0);
const builtTools = runtimeConfig.tools;
const delegateTool = builtTools?.find((tool) => tool.name === DELEGATE_SUB_AGENT_TOOL_NAME);
expect(delegateTool).toBeDefined();
await expect(
delegateTool?.handler?.(delegateInput, { runId: 'parent-run-1' }),
).resolves.toMatchObject({
status: 'completed',
answer: 'inline answer',
});
expect(hostRunSubAgent).toHaveBeenCalledOnce();
const helpers = hostRunSubAgent.mock.calls[0]?.[1];
expect(helpers).toBeDefined();
expect(typeof helpers?.runInlineSubAgent).toBe('function');
expect(runtimeConfigs).toHaveLength(1);
});
it('runs inline delegations without a host runner when the tool is built on an Agent', async () => {
const agent = new Agent('parent')
.model('openai', 'gpt-4o-mini')
.instructions('Delegate when needed.')
.tool(createDelegateSubAgentTool())
.tool(makeTool('lookup'));
const runtimeConfig = await buildAgentConfig(agent);
expect(runtimeConfigs).toHaveLength(0);
const builtTools = runtimeConfig.tools;
const delegateTool = builtTools?.find((tool) => tool.name === DELEGATE_SUB_AGENT_TOOL_NAME);
expect(delegateTool).toBeDefined();
await expect(
delegateTool?.handler?.(delegateInput, { runId: 'parent-run-1' }),
).resolves.toMatchObject({
status: 'completed',
answer: 'inline answer',
});
expect(runtimeConfigs).toHaveLength(1);
});
it('lets a host-style runner delegate inline through helpers from tool metadata', async () => {
const runInlineSubAgent = vi
.fn<DelegateSubAgentRunnerHelpers['runInlineSubAgent']>()
.mockResolvedValue({
status: 'completed',
taskPath: '/root/research_api_0',
answer: 'inline via helper',
});
const hostRunSubAgent = vi.fn<DelegateSubAgentRunner>(async (request, helpers) => {
if (request.subAgentId === INLINE_SUB_AGENT_ID) {
return await helpers.runInlineSubAgent(request);
}
return {
status: 'failed',
taskPath: request.taskPath,
answer: '',
error: 'unexpected',
};
});
const tool = createDelegateSubAgentTool({ runSubAgent: hostRunSubAgent });
const options = getInlineDelegateSubAgentToolOptions(tool);
expect(options?.runSubAgent).toBe(hostRunSubAgent);
await expect(
options?.runSubAgent?.(
{
...delegateInput,
taskPath: '/root/research_api_0',
childCount: 0,
},
{ runInlineSubAgent },
),
).resolves.toMatchObject({
status: 'completed',
answer: 'inline via helper',
});
expect(runInlineSubAgent).toHaveBeenCalledOnce();
});
it('returns a failed delegate output when an inline child run suspends', async () => {
inlineChildGenerateResult = {
runId: 'child-run-suspended',
finishReason: 'tool-calls',
messages: [
{
role: 'assistant',
type: 'llm',
content: [{ type: 'text', text: 'awaiting approval' }],
},
],
pendingSuspend: [
{
runId: 'child-run-suspended',
toolCallId: 'tool-call-1',
toolName: 'delete_file',
input: { path: '/tmp/foo.txt' },
suspendPayload: { message: 'Delete file?' },
},
],
getState: mockState,
};
const agent = new Agent('parent')
.model('openai', 'gpt-4o-mini')
.instructions('Delegate when needed.')
.tool(createDelegateSubAgentTool())
.tool(makeTool('lookup'));
const runtimeConfig = await buildAgentConfig(agent);
const builtTools = runtimeConfig.tools;
const delegateTool = builtTools?.find((tool) => tool.name === DELEGATE_SUB_AGENT_TOOL_NAME);
expect(delegateTool).toBeDefined();
await expect(
delegateTool?.handler?.(delegateInput, { runId: 'parent-run-1' }),
).resolves.toMatchObject({
status: 'failed',
answer: '',
error: DELEGATED_CHILD_SUSPEND_UNSUPPORTED_MESSAGE,
});
});
});

View File

@ -0,0 +1,152 @@
import { z } from 'zod';
import type * as AgentRuntimeModule from '../../runtime/agent-runtime';
import type { DelegateSubAgentRequest } from '../../runtime/delegate-sub-agent-tool';
import {
DELEGATE_SUB_AGENT_TOOL_NAME,
INLINE_SUB_AGENT_ID,
} from '../../runtime/delegate-sub-agent-tool';
import { RECALL_MEMORY_TOOL_NAME } from '../../runtime/episodic-memory';
import { WRITE_TODOS_TOOL_NAME } from '../../runtime/write-todos-tool';
import type { BuiltProviderTool, BuiltTool } from '../../types';
import { Agent, filterInlineSubAgentTools } from '../agent';
const runtimeConfigs: Array<Record<string, unknown>> = [];
vi.mock('../../runtime/agent-runtime', async (importOriginal) => {
const actual = await importOriginal<typeof AgentRuntimeModule>();
return {
...actual,
AgentRuntime: class MockAgentRuntime {
constructor(config: Record<string, unknown>) {
runtimeConfigs.push(config);
}
async generate() {
return await Promise.resolve({
runId: 'child-run',
finishReason: 'stop',
messages: [
{
role: 'assistant',
type: 'llm',
content: [{ type: 'text', text: 'done' }],
},
],
usage: {},
});
}
async dispose() {
return await Promise.resolve();
}
},
};
});
function makeTool(name: string): BuiltTool {
return {
name,
description: `${name} tool`,
inputSchema: z.object({}),
handler: async () => await Promise.resolve({ ok: true }),
};
}
const openaiWebSearchProviderTool: BuiltProviderTool = {
name: 'openai.web_search_preview',
args: {},
};
const anthropicWebSearchProviderTool: BuiltProviderTool = {
name: 'anthropic.web_search_20250305',
args: {},
};
type AgentWithInlineRunner = {
createInlineSubAgentRunner: (options: {
deferredTools: BuiltTool[];
modelConfig: string;
providerTools: BuiltProviderTool[];
tools: BuiltTool[];
inlineSubAgentBlockedTools?: string[];
}) => (request: DelegateSubAgentRequest) => Promise<unknown>;
};
function createInlineRunner(options: {
providerTools: BuiltProviderTool[];
tools?: BuiltTool[];
inlineSubAgentBlockedTools?: string[];
}) {
const agent = new Agent('parent');
return (agent as unknown as AgentWithInlineRunner).createInlineSubAgentRunner({
deferredTools: [],
modelConfig: 'openai/gpt-4o-mini',
tools: options.tools ?? [makeTool('lookup')],
providerTools: options.providerTools,
inlineSubAgentBlockedTools: options.inlineSubAgentBlockedTools,
});
}
describe('inline sub-agent tool filtering', () => {
beforeEach(() => {
runtimeConfigs.length = 0;
});
it.each([
{
name: 'blocks SDK-owned tools by default but not other tool names',
tools: [
makeTool(DELEGATE_SUB_AGENT_TOOL_NAME),
makeTool(RECALL_MEMORY_TOOL_NAME),
makeTool(WRITE_TODOS_TOOL_NAME),
makeTool('host_tool'),
makeTool('lookup'),
],
blockedTools: undefined,
expected: ['host_tool', 'lookup'],
},
{
name: 'blocks host-supplied tool names when configured',
tools: [makeTool('host_tool'), makeTool('lookup')],
blockedTools: ['host_tool'],
expected: ['lookup'],
},
])('$name', ({ tools, blockedTools, expected }) => {
expect(filterInlineSubAgentTools(tools, blockedTools).map((tool) => tool.name)).toEqual(
expected,
);
});
it('inherits all provider tools when not blocked', () => {
expect(
filterInlineSubAgentTools([openaiWebSearchProviderTool, anthropicWebSearchProviderTool]).map(
(tool) => tool.name,
),
).toEqual(['openai.web_search_preview', 'anthropic.web_search_20250305']);
});
it('passes all provider tools to inline child runtimes by default', async () => {
const runner = createInlineRunner({
providerTools: [openaiWebSearchProviderTool, anthropicWebSearchProviderTool],
});
await runner({
subAgentId: INLINE_SUB_AGENT_ID,
taskName: 'research',
goal: 'Find the answer',
taskPath: '/root/research',
childCount: 0,
});
expect(runtimeConfigs).toHaveLength(1);
expect(
(runtimeConfigs[0]?.providerTools as BuiltProviderTool[] | undefined)?.map(
(tool) => tool.name,
),
).toEqual(['openai.web_search_preview', 'anthropic.web_search_20250305']);
expect((runtimeConfigs[0]?.tools as BuiltTool[] | undefined)?.map((tool) => tool.name)).toEqual(
['lookup'],
);
});
});

View File

@ -1,4 +1,4 @@
import type { AgentRuntime } from '../../runtime/agent-runtime'; import type { AgentRuntimeConfig } from '../../runtime/agent-runtime';
import { import {
DEFAULT_EPISODIC_MEMORY_EMBEDDING_MODEL, DEFAULT_EPISODIC_MEMORY_EMBEDDING_MODEL,
DEFAULT_EPISODIC_MEMORY_MAX_ENTRIES_PER_RUN, DEFAULT_EPISODIC_MEMORY_MAX_ENTRIES_PER_RUN,
@ -68,14 +68,9 @@ describe('Memory builder — episodic memory', () => {
.instructions('You are a test assistant.') .instructions('You are a test assistant.')
.memory(memory); .memory(memory);
const runtime = await (agent as unknown as { build(): Promise<AgentRuntime> }).build(); const runtimeConfig = await (
const runtimeConfig = ( agent as unknown as { build(): Promise<AgentRuntimeConfig> }
runtime as unknown as { ).build();
config: {
episodicMemory?: EpisodicMemoryConfig;
};
}
).config;
const embedder = runtimeConfig.episodicMemory?.embedder as unknown as Record<string, unknown>; const embedder = runtimeConfig.episodicMemory?.embedder as unknown as Record<string, unknown>;
expect(runtimeConfig.episodicMemory).toMatchObject({ expect(runtimeConfig.episodicMemory).toMatchObject({

View File

@ -1,6 +1,6 @@
import type { AgentRuntime } from '../../runtime/agent-runtime'; import type { AgentRuntimeConfig } from '../../runtime/agent-runtime';
import { InMemoryMemory } from '../../runtime/memory-store'; import { InMemoryMemory } from '../../runtime/memory-store';
import type { BuiltMemory, MemoryConfig, ObservationalMemoryConfig } from '../../types'; import type { BuiltMemory, MemoryConfig } from '../../types';
import { Agent } from '../agent'; import { Agent } from '../agent';
import { import {
DEFAULT_OBSERVATION_LOG_LOCK_TTL_MS, DEFAULT_OBSERVATION_LOG_LOCK_TTL_MS,
@ -137,15 +137,9 @@ describe('Memory builder — observation log memory', () => {
.instructions('You are a test assistant.') .instructions('You are a test assistant.')
.memory(memory); .memory(memory);
const runtime = await (agent as unknown as { build(): Promise<AgentRuntime> }).build(); const runtimeConfig = await (
const runtimeConfig = ( agent as unknown as { build(): Promise<AgentRuntimeConfig> }
runtime as unknown as { ).build();
config: {
observationLog?: { renderTokenBudget?: number };
observationalMemory?: ObservationalMemoryConfig;
};
}
).config;
expect(runtimeConfig.observationLog).toEqual({ expect(runtimeConfig.observationLog).toEqual({
renderTokenBudget: DEFAULT_OBSERVATION_LOG_RENDER_TOKEN_BUDGET, renderTokenBudget: DEFAULT_OBSERVATION_LOG_RENDER_TOKEN_BUDGET,

View File

@ -0,0 +1,52 @@
import { z } from 'zod';
import {
createDelegateSubAgentTool,
DELEGATE_SUB_AGENT_TOOL_NAME,
} from '../../runtime/delegate-sub-agent-tool';
import { isSdkOwnedBuiltInTool } from '../../runtime/sdk-owned-tool';
import { createWriteTodosTool, WRITE_TODOS_TOOL_NAME } from '../../runtime/write-todos-tool';
import { Agent } from '../agent';
import { Tool } from '../tool';
function makeCustomTool(name: string) {
return new Tool(name)
.description('Custom tool')
.input(z.object({}))
.handler(async () => await Promise.resolve({ ok: true }))
.build();
}
function makeAgent() {
return new Agent('parent').model('openai', 'gpt-4o-mini').instructions('Test agent.');
}
describe('SDK reserved built-in tool names', () => {
it.each([DELEGATE_SUB_AGENT_TOOL_NAME, WRITE_TODOS_TOOL_NAME])(
'rejects a custom static tool named %s',
(toolName) => {
expect(() => makeAgent().tool(makeCustomTool(toolName))).toThrow(
`Tool name "${toolName}" is reserved for SDK built-in tools`,
);
},
);
it.each([DELEGATE_SUB_AGENT_TOOL_NAME, WRITE_TODOS_TOOL_NAME])(
'rejects a deferred tool named %s',
(toolName) => {
expect(() => makeAgent().deferredTool(makeCustomTool(toolName))).toThrow(
`Tool name "${toolName}" is reserved for SDK built-in tools`,
);
},
);
it('allows official SDK built-in tools to be registered', () => {
const agent = makeAgent().tool(createDelegateSubAgentTool()).tool(createWriteTodosTool());
expect(agent.declaredTools.map((tool) => tool.name)).toEqual([
DELEGATE_SUB_AGENT_TOOL_NAME,
WRITE_TODOS_TOOL_NAME,
]);
expect(agent.declaredTools.every((tool) => isSdkOwnedBuiltInTool(tool))).toBe(true);
});
});

View File

@ -318,4 +318,56 @@ describe('wrapToolForApproval — telemetry propagation', () => {
expect(capturedCtx).toBeDefined(); expect(capturedCtx).toBeDefined();
expect(capturedCtx!.parentTelemetry).toBe(fakeTelemetry); expect(capturedCtx!.parentTelemetry).toBe(fakeTelemetry);
}); });
it('forwards the full ToolContext to the original handler after approval', async () => {
let capturedCtx: ToolContext | undefined;
const baseTool = makeBuiltTool({
handler: async (_input, ctx) => {
capturedCtx = ctx as ToolContext;
return await Promise.resolve({ result: 'ok' });
},
});
const wrapped = wrapToolForApproval(baseTool, { requireApproval: true });
const { ctx } = makeCtx({ approved: true });
const abortController = new AbortController();
const emitEvent = vi.fn();
ctx.parentTelemetry = fakeTelemetry;
ctx.runId = 'parent-run-1';
ctx.toolCallId = 'tool-call-1';
ctx.persistence = { resourceId: 'resource-1', threadId: 'thread-1' };
ctx.emitEvent = emitEvent;
ctx.abortSignal = abortController.signal;
await wrapped.handler!({ id: 'test' }, ctx);
expect(capturedCtx).toEqual({
runId: 'parent-run-1',
toolCallId: 'tool-call-1',
persistence: { resourceId: 'resource-1', threadId: 'thread-1' },
parentTelemetry: fakeTelemetry,
emitEvent,
abortSignal: abortController.signal,
suspend: ctx.suspend,
resumeData: { approved: true },
});
});
it('forwards the full ToolContext when approval is not needed', async () => {
let capturedCtx: ToolContext | undefined;
const baseTool = makeBuiltTool({
handler: async (_input, ctx) => {
capturedCtx = ctx as ToolContext;
return await Promise.resolve({ result: 'ok' });
},
});
const wrapped = wrapToolForApproval(baseTool, { requireApproval: false });
const { ctx } = makeCtx();
ctx.runId = 'parent-run-2';
ctx.toolCallId = 'tool-call-2';
await wrapped.handler!({ id: 'test' }, ctx);
expect(capturedCtx?.runId).toBe('parent-run-2');
expect(capturedCtx?.toolCallId).toBe('tool-call-2');
});
}); });

View File

@ -1,15 +1,30 @@
import type { ProviderOptions } from '@ai-sdk/provider-utils'; import type { ProviderOptions } from '@ai-sdk/provider-utils';
import { z } from 'zod'; import type { z } from 'zod';
import { getModelCost } from './catalog';
import type { Eval } from './eval'; import type { Eval } from './eval';
import type { McpClient } from './mcp-client'; import type { McpClient } from './mcp-client';
import { Memory, normalizeMemoryConfig, resolveMemoryConfigDefaults } from './memory'; import { Memory, normalizeMemoryConfig, resolveMemoryConfigDefaults } from './memory';
import { Telemetry } from './telemetry'; import { Telemetry } from './telemetry';
import { Tool, wrapToolForApproval } from './tool'; import { wrapToolForApproval } from './tool';
import { AgentRuntime } from '../runtime/agent-runtime'; import { AgentRuntime, type AgentRuntimeConfig } from '../runtime/agent-runtime';
import { LOAD_TOOL_TOOL_NAME, SEARCH_TOOLS_TOOL_NAME } from '../runtime/deferred-tool-manager'; import { LOAD_TOOL_TOOL_NAME, SEARCH_TOOLS_TOOL_NAME } from '../runtime/deferred-tool-manager';
import {
DELEGATE_SUB_AGENT_TOOL_NAME,
INLINE_SUB_AGENT_ID,
createDelegateSubAgentTool,
failedDelegatedChildSuspendOutput,
generateResultToDelegateSubAgentOutput,
getInlineDelegateSubAgentToolOptions,
renderDelegateSubAgentPrompt,
type DelegateSubAgentRequest,
type DelegateSubAgentToolOutput,
} from '../runtime/delegate-sub-agent-tool';
import { RECALL_MEMORY_TOOL_NAME } from '../runtime/episodic-memory';
import { AgentEventBus } from '../runtime/event-bus'; import { AgentEventBus } from '../runtime/event-bus';
import { createAgentToolResult } from '../runtime/tool-adapter'; import { RunStateManager } from '../runtime/run-state';
import { isSdkOwnedBuiltInTool } from '../runtime/sdk-owned-tool';
import { WRITE_TODOS_TOOL_NAME } from '../runtime/write-todos-tool';
import { import {
appendSkillCatalogToInstructions, appendSkillCatalogToInstructions,
createRuntimeSkillSource, createRuntimeSkillSource,
@ -34,26 +49,41 @@ import type {
ModelConfig, ModelConfig,
Provider, Provider,
RunOptions, RunOptions,
SerializableAgentState,
StreamResult, StreamResult,
SubAgentUsage,
ThinkingConfig, ThinkingConfig,
ThinkingConfigFor, ThinkingConfigFor,
ResumeOptions, ResumeOptions,
} from '../types'; } from '../types';
import type { AgentEvent } from '../types/runtime/event'; import type { AgentEvent } from '../types/runtime/event';
import type { StreamChunk } from '../types/sdk/agent';
import type { AgentBuilder } from '../types/sdk/agent-builder'; import type { AgentBuilder } from '../types/sdk/agent-builder';
import type { AgentMessage } from '../types/sdk/message'; import type { AgentMessage } from '../types/sdk/message';
import type { Workspace } from '../workspace/workspace'; import type { Workspace } from '../workspace/workspace';
type ToolParameter = BuiltTool | { build(): BuiltTool }; type ToolParameter = BuiltTool | { build(): BuiltTool };
const SDK_INLINE_SUB_AGENT_BLOCKED_TOOL_NAMES = new Set([
DELEGATE_SUB_AGENT_TOOL_NAME,
RECALL_MEMORY_TOOL_NAME,
WRITE_TODOS_TOOL_NAME,
]);
const SDK_RESERVED_BUILTIN_TOOL_NAMES = new Set([
DELEGATE_SUB_AGENT_TOOL_NAME,
WRITE_TODOS_TOOL_NAME,
]);
interface DeferredToolOptions { interface DeferredToolOptions {
search?: { search?: {
topK?: number; topK?: number;
}; };
} }
type ActiveRuntime = {
runtime: AgentRuntime;
bus: AgentEventBus;
};
/** /**
* Lightweight read-only view of an agent's configured state. * Lightweight read-only view of an agent's configured state.
* Returned by `Agent.snapshot` for testing and debugging purposes. * Returned by `Agent.snapshot` for testing and debugging purposes.
@ -132,8 +162,6 @@ export class Agent implements BuiltAgent, AgentBuilder {
private thinkingConfig?: ThinkingConfig; private thinkingConfig?: ThinkingConfig;
private runtime?: AgentRuntime;
private concurrencyValue?: number; private concurrencyValue?: number;
private telemetryBuilder?: Telemetry; private telemetryBuilder?: Telemetry;
@ -148,9 +176,11 @@ export class Agent implements BuiltAgent, AgentBuilder {
private defaultExecutionOptions?: ExecutionOptions; private defaultExecutionOptions?: ExecutionOptions;
private buildPromise: Promise<AgentRuntime> | undefined; private buildPromise: Promise<AgentRuntimeConfig> | undefined;
private eventBus = new AgentEventBus(); private agentHandlers = new Map<AgentEvent, Set<AgentEventHandler>>();
private activeRuntimes = new Set<ActiveRuntime>();
private workspaceInstance?: Workspace; private workspaceInstance?: Workspace;
@ -199,7 +229,7 @@ export class Agent implements BuiltAgent, AgentBuilder {
const tools = Array.isArray(t) ? t : [t]; const tools = Array.isArray(t) ? t : [t];
const builtTools = tools.map((tool) => ('build' in tool ? tool.build() : tool)); const builtTools = tools.map((tool) => ('build' in tool ? tool.build() : tool));
for (const built of builtTools) { for (const built of builtTools) {
this.assertToolNameAvailable(built.name); this.assertToolRegistrationAllowed(built);
} }
this.tools.push(...builtTools); this.tools.push(...builtTools);
return this; return this;
@ -210,6 +240,7 @@ export class Agent implements BuiltAgent, AgentBuilder {
const tools = Array.isArray(t) ? t : [t]; const tools = Array.isArray(t) ? t : [t];
for (const tool of tools) { for (const tool of tools) {
const built = 'build' in tool ? tool.build() : tool; const built = 'build' in tool ? tool.build() : tool;
this.assertReservedSdkBuiltInToolName(built);
this.deferredTools.push(built); this.deferredTools.push(built);
} }
if (options?.search?.topK !== undefined) { if (options?.search?.topK !== undefined) {
@ -379,7 +410,7 @@ export class Agent implements BuiltAgent, AgentBuilder {
} else { } else {
this.telemetryBuilder = undefined; this.telemetryBuilder = undefined;
this.telemetryConfig = t; this.telemetryConfig = t;
this.runtime?.setTelemetry(t); this.buildPromise = undefined;
} }
return this; return this;
} }
@ -479,7 +510,15 @@ export class Agent implements BuiltAgent, AgentBuilder {
* Handlers are called synchronously during the agentic loop. * Handlers are called synchronously during the agentic loop.
*/ */
on(event: AgentEvent, handler: AgentEventHandler): void { on(event: AgentEvent, handler: AgentEventHandler): void {
this.eventBus.on(event, handler); let handlers = this.agentHandlers.get(event);
if (!handlers) {
handlers = new Set();
this.agentHandlers.set(event, handlers);
}
handlers.add(handler);
for (const { bus } of this.activeRuntimes) {
bus.on(event, handler);
}
} }
/** /**
@ -488,68 +527,15 @@ export class Agent implements BuiltAgent, AgentBuilder {
* cleanly between turns instead of accumulating on a long-lived agent. * cleanly between turns instead of accumulating on a long-lived agent.
*/ */
off(event: AgentEvent, handler: AgentEventHandler): void { off(event: AgentEvent, handler: AgentEventHandler): void {
this.eventBus.off(event, handler); const handlers = this.agentHandlers.get(event);
} if (!handlers) return;
handlers.delete(handler);
/** if (handlers.size === 0) {
* Wrap this agent as a tool for use in multi-agent composition. this.agentHandlers.delete(event);
* The tool sends a text prompt to this agent and returns the text of the response. }
* for (const { bus } of this.activeRuntimes) {
* @example bus.off(event, handler);
* ```typescript }
* const coordinatorAgent = new Agent('coordinator')
* .model('anthropic/claude-sonnet-4-5')
* .instructions('Route tasks to specialist agents.')
* .tool(writerAgent.asTool('Write content given a topic'));
* ```
*/
asTool(description: string): BuiltTool {
// eslint-disable-next-line @typescript-eslint/no-this-alias
const agent = this;
const tool = new Tool(this.name)
.description(description)
.input(
z.object({
input: z.string().describe('The input to send to the agent'),
}),
)
.output(
z.object({
result: z.string().describe('The result of the agent'),
}),
)
.handler(async (rawInput, ctx) => {
const { input } = rawInput as { input: string };
const result = await agent.generate(input, {
telemetry: ctx.parentTelemetry,
} as RunOptions & ExecutionOptions);
const text = result.messages
.filter((m) => 'role' in m && m.role === 'assistant')
.flatMap((m) => ('content' in m ? m.content : []))
.filter((c) => c.type === 'text')
.map((c) => ('text' in c ? c.text : ''))
.join('');
// Collect sub-agent usage: this agent's own + any nested sub-agents
const subAgentUsage: SubAgentUsage[] = [];
if (result.usage) {
subAgentUsage.push({ agent: agent.name, model: result.model, usage: result.usage });
}
if (result.subAgentUsage) {
subAgentUsage.push(...result.subAgentUsage);
}
// Return branded result — the runtime unwraps it to extract sub-agent usage.
// createAgentToolResult returns `never`, same pattern as ctx.suspend().
if (subAgentUsage.length > 0) {
return createAgentToolResult({ result: text }, subAgentUsage);
}
return { result: text };
});
return tool.build();
} }
/** /**
@ -593,25 +579,14 @@ export class Agent implements BuiltAgent, AgentBuilder {
}; };
} }
/** Return the latest state snapshot of the agent. Returns `{ status: 'idle' }` before first run. */
getState(): SerializableAgentState {
if (!this.runtime) {
return {
persistence: undefined,
status: 'idle',
messageList: { messages: [], historyIds: [], inputIds: [], responseIds: [] },
pendingToolCalls: {},
};
}
return this.runtime.getState();
}
/** /**
* Cancel the currently running agent. * Cancel the currently running agent.
* Synchronous sets an abort flag; the agentic loop checks it asynchronously. * Synchronous sets an abort flag; the agentic loop checks it asynchronously.
*/ */
abort(): void { abort(): void {
this.eventBus.abort(); for (const { bus } of this.activeRuntimes) {
bus.abort();
}
} }
/** /**
@ -627,7 +602,10 @@ export class Agent implements BuiltAgent, AgentBuilder {
*/ */
async close(): Promise<void> { async close(): Promise<void> {
const tasks: Array<Promise<unknown>> = []; const tasks: Array<Promise<unknown>> = [];
if (this.runtime) tasks.push(this.runtime.dispose()); for (const active of this.activeRuntimes) {
active.bus.abort();
tasks.push(this.cleanupRuntime(active));
}
tasks.push(...this.mcpClients.map(async (c) => await c.close())); tasks.push(...this.mcpClients.map(async (c) => await c.close()));
await Promise.allSettled(tasks); await Promise.allSettled(tasks);
} }
@ -637,9 +615,14 @@ export class Agent implements BuiltAgent, AgentBuilder {
input: AgentMessage[] | string, input: AgentMessage[] | string,
options?: RunOptions & ExecutionOptions, options?: RunOptions & ExecutionOptions,
): Promise<GenerateResult> { ): Promise<GenerateResult> {
const runtime = await this.ensureBuilt(); const config = await this.ensureBuilt();
const active = this.createRuntime(config);
const mergedOptions = this.mergeWithDefaults(options); const mergedOptions = this.mergeWithDefaults(options);
return await runtime.generate(this.toMessages(input), mergedOptions); try {
return await active.runtime.generate(this.toMessages(input), mergedOptions);
} finally {
await this.cleanupRuntime(active);
}
} }
/** Stream a response. Lazy-builds on first call. */ /** Stream a response. Lazy-builds on first call. */
@ -647,9 +630,16 @@ export class Agent implements BuiltAgent, AgentBuilder {
input: AgentMessage[] | string, input: AgentMessage[] | string,
options?: RunOptions & ExecutionOptions, options?: RunOptions & ExecutionOptions,
): Promise<StreamResult> { ): Promise<StreamResult> {
const runtime = await this.ensureBuilt(); const config = await this.ensureBuilt();
const active = this.createRuntime(config);
const mergedOptions = this.mergeWithDefaults(options); const mergedOptions = this.mergeWithDefaults(options);
return await runtime.stream(this.toMessages(input), mergedOptions); try {
const result = await active.runtime.stream(this.toMessages(input), mergedOptions);
return { ...result, stream: this.trackStreamRuntime(result.stream, active) };
} catch (error) {
await this.cleanupRuntime(active);
throw error;
}
} }
/** Resume a suspended tool call with data. Lazy-builds on first call. */ /** Resume a suspended tool call with data. Lazy-builds on first call. */
@ -668,11 +658,23 @@ export class Agent implements BuiltAgent, AgentBuilder {
data: unknown, data: unknown,
options: ResumeOptions & ExecutionOptions, options: ResumeOptions & ExecutionOptions,
): Promise<GenerateResult | StreamResult> { ): Promise<GenerateResult | StreamResult> {
const runtime = await this.ensureBuilt(); const config = await this.ensureBuilt();
if (method === 'generate') { if (method === 'generate') {
return await runtime.resume('generate', data, options); const active = this.createRuntime(config, options.runId);
try {
return await active.runtime.resume('generate', data, options);
} finally {
await this.cleanupRuntime(active);
}
}
const active = this.createRuntime(config, options.runId);
try {
const result = await active.runtime.resume('stream', data, options);
return { ...result, stream: this.trackStreamRuntime(result.stream, active) };
} catch (error) {
await this.cleanupRuntime(active);
throw error;
} }
return await runtime.resume('stream', data, options);
} }
approve(method: 'generate', options: ResumeOptions & ExecutionOptions): Promise<GenerateResult>; approve(method: 'generate', options: ResumeOptions & ExecutionOptions): Promise<GenerateResult>;
@ -711,7 +713,7 @@ export class Agent implements BuiltAgent, AgentBuilder {
* concurrent callers share one build operation. On error the promise is * concurrent callers share one build operation. On error the promise is
* cleared so the caller can retry. * cleared so the caller can retry.
*/ */
private async ensureBuilt(): Promise<AgentRuntime> { private async ensureBuilt(): Promise<AgentRuntimeConfig> {
if (!this.buildPromise) { if (!this.buildPromise) {
const p = this.build(); const p = this.build();
this.buildPromise = p; this.buildPromise = p;
@ -722,13 +724,76 @@ export class Agent implements BuiltAgent, AgentBuilder {
return await this.buildPromise; return await this.buildPromise;
} }
private createRuntime(config: AgentRuntimeConfig, runId?: string): ActiveRuntime {
const bus = new AgentEventBus();
for (const [event, handlers] of this.agentHandlers) {
for (const handler of handlers) {
bus.on(event, handler);
}
}
const runtime = new AgentRuntime({ ...config, eventBus: bus, runId });
const active = { runtime, bus };
this.activeRuntimes.add(active);
return active;
}
private trackStreamRuntime(
stream: ReadableStream<StreamChunk>,
active: ActiveRuntime,
): ReadableStream<StreamChunk> {
const reader = stream.getReader();
let cleanupPromise: Promise<void> | undefined;
const cleanup = async () => {
const doCleanup = async () => {
try {
reader.releaseLock();
} catch {
// The lock may already be released after cancellation/error cleanup.
}
await this.cleanupRuntime(active);
};
cleanupPromise ??= doCleanup();
return await cleanupPromise;
};
return new ReadableStream<StreamChunk>({
async pull(controller) {
try {
const { done, value } = await reader.read();
if (done) {
controller.close();
await cleanup();
return;
}
controller.enqueue(value);
} catch (error) {
controller.error(error);
await cleanup();
}
},
async cancel(reason) {
try {
await reader.cancel(reason);
} finally {
await cleanup();
}
},
});
}
private async cleanupRuntime(active: ActiveRuntime): Promise<void> {
if (!this.activeRuntimes.delete(active)) return;
active.bus.dispose();
await active.runtime.dispose();
}
private toMessages(input: string | AgentMessage[]): AgentMessage[] { private toMessages(input: string | AgentMessage[]): AgentMessage[] {
if (Array.isArray(input)) return input; if (Array.isArray(input)) return input;
return [{ role: 'user', content: [{ type: 'text', text: input }] }]; return [{ role: 'user', content: [{ type: 'text', text: input }] }];
} }
/** @internal Validate configuration and produce an AgentRuntime. Overridden by the execution engine. */ /** @internal Validate configuration and produce an AgentRuntime. Overridden by the execution engine. */
protected async build(): Promise<AgentRuntime> { protected async build(): Promise<AgentRuntimeConfig> {
if (!this.modelConfig) { if (!this.modelConfig) {
throw new Error(`Agent "${this.name}" requires a model`); throw new Error(`Agent "${this.name}" requires a model`);
} }
@ -826,7 +891,7 @@ export class Agent implements BuiltAgent, AgentBuilder {
); );
} }
const allTools = [...finalStaticTools, ...mcpTools]; let allTools = [...finalStaticTools, ...mcpTools];
// Validate checkpoint again after discovering actual MCP tools // Validate checkpoint again after discovering actual MCP tools
// (catches the case where MCP tools have suspendSchema after listing). // (catches the case where MCP tools have suspendSchema after listing).
@ -856,34 +921,167 @@ export class Agent implements BuiltAgent, AgentBuilder {
instructions = `${instructions}\n\n${wsInstructions}`; instructions = `${instructions}\n\n${wsInstructions}`;
} }
} }
const telemetry = this.telemetryConfig ?? (await this.telemetryBuilder?.build());
const toolSearch =
finalDeferredTools.length > 0 && this.deferredToolSearchTopK !== undefined
? { topK: this.deferredToolSearchTopK }
: undefined;
this.runtime = new AgentRuntime({ allTools = this.completeInlineDelegateTools(allTools, {
deferredTools: finalDeferredTools,
modelConfig,
providerTools: this.providerTools,
...(telemetry !== undefined ? { telemetry } : {}),
...(this.concurrencyValue !== undefined
? { toolCallConcurrency: this.concurrencyValue }
: {}),
...(toolSearch !== undefined ? { toolSearch } : {}),
});
let modelCost: Awaited<ReturnType<typeof getModelCost>> | undefined;
try {
const modelId =
typeof modelConfig === 'string'
? modelConfig
: 'id' in modelConfig && typeof modelConfig.id === 'string'
? modelConfig.id
: undefined;
modelCost = modelId ? await getModelCost(modelId) : undefined;
} catch {
modelCost = undefined;
}
const runState = new RunStateManager(this.checkpointStore);
return {
name: this.name, name: this.name,
model: modelConfig, model: modelConfig,
instructions, instructions,
tools: allTools.length > 0 ? allTools : undefined, tools: allTools.length > 0 ? allTools : undefined,
deferredTools: finalDeferredTools.length > 0 ? finalDeferredTools : undefined, deferredTools: finalDeferredTools.length > 0 ? finalDeferredTools : undefined,
toolSearch: toolSearch,
finalDeferredTools.length > 0 && this.deferredToolSearchTopK !== undefined
? { topK: this.deferredToolSearchTopK }
: undefined,
instructionProviderOptions: this.instructionProviderOpts, instructionProviderOptions: this.instructionProviderOpts,
providerTools: this.providerTools.length > 0 ? this.providerTools : undefined, providerTools: this.providerTools.length > 0 ? this.providerTools : undefined,
memory: memoryConfig?.memory, memory: memoryConfig?.memory,
observationLog: memoryConfig?.observationLog, observationLog: memoryConfig?.observationLog,
observationalMemory: memoryConfig?.observationalMemory, observationalMemory: memoryConfig?.observationalMemory,
episodicMemory: memoryConfig?.episodicMemory, episodicMemory: memoryConfig?.episodicMemory,
semanticRecall: memoryConfig?.semanticRecall,
structuredOutput: this.outputSchema, structuredOutput: this.outputSchema,
checkpointStorage: this.checkpointStore, checkpointStorage: this.checkpointStore,
thinking: this.thinkingConfig, thinking: this.thinkingConfig,
eventBus: this.eventBus,
toolCallConcurrency: this.concurrencyValue, toolCallConcurrency: this.concurrencyValue,
titleGeneration: memoryConfig?.titleGeneration, titleGeneration: memoryConfig?.titleGeneration,
telemetry: this.telemetryConfig ?? (await this.telemetryBuilder?.build()), telemetry: this.telemetryConfig ?? (await this.telemetryBuilder?.build()),
}); modelCost,
runState,
};
}
return this.runtime; private completeInlineDelegateTools(
tools: BuiltTool[],
options: {
deferredTools: BuiltTool[];
modelConfig: ModelConfig;
providerTools: BuiltProviderTool[];
telemetry?: BuiltTelemetry;
toolCallConcurrency?: number;
toolSearch?: { topK?: number };
},
): BuiltTool[] {
return tools.map((tool) => {
const delegateOptions = getInlineDelegateSubAgentToolOptions(tool);
if (!delegateOptions) return tool;
const runInlineSubAgent = this.createInlineSubAgentRunner({
...options,
tools,
inlineSubAgentBlockedTools: delegateOptions.inlineSubAgentBlockedTools,
});
const hostRunner = delegateOptions.runSubAgent;
const completedTool = createDelegateSubAgentTool({
...delegateOptions,
runSubAgent: async (request, _helpersFromHandler) => {
const helpers = { runInlineSubAgent };
if (hostRunner) {
return await hostRunner(request, helpers);
}
if (request.subAgentId === INLINE_SUB_AGENT_ID) {
return await runInlineSubAgent(request);
}
return {
status: 'failed',
taskPath: request.taskPath,
answer: '',
error: `No configured subagent matched "${request.subAgentId}". Use "inline" for an inline sub-agent, or pass one of the configured subagent IDs.`,
};
},
});
if (tool.withDefaultApproval) {
return wrapToolForApproval(completedTool, { requireApproval: true });
}
return completedTool;
});
}
private createInlineSubAgentRunner(options: {
deferredTools: BuiltTool[];
modelConfig: ModelConfig;
providerTools: BuiltProviderTool[];
telemetry?: BuiltTelemetry;
toolCallConcurrency?: number;
toolSearch?: { topK?: number };
tools: BuiltTool[];
inlineSubAgentBlockedTools?: string[];
}): (request: DelegateSubAgentRequest) => Promise<DelegateSubAgentToolOutput> {
return async (request) => {
const tools = filterInlineSubAgentTools(options.tools, options.inlineSubAgentBlockedTools);
const deferredTools = filterInlineSubAgentTools(
options.deferredTools,
options.inlineSubAgentBlockedTools,
);
const providerTools = filterInlineSubAgentTools(
options.providerTools,
options.inlineSubAgentBlockedTools,
);
const childRuntime = new AgentRuntime({
name: `${this.name}:${request.taskName}`,
model: options.modelConfig,
instructions:
'You are a focused subagent working on a specific delegated task. Complete the delegated task independently and return a concise, self-contained summary to your parent agent.',
tools: tools.length > 0 ? tools : undefined,
deferredTools: deferredTools.length > 0 ? deferredTools : undefined,
toolSearch: deferredTools.length > 0 ? options.toolSearch : undefined,
providerTools: providerTools.length > 0 ? providerTools : undefined,
instructionProviderOptions: this.instructionProviderOpts,
checkpointStorage: this.checkpointStore,
thinking: this.thinkingConfig,
...(options.telemetry !== undefined ? { telemetry: options.telemetry } : {}),
...(options.toolCallConcurrency !== undefined
? { toolCallConcurrency: options.toolCallConcurrency }
: {}),
});
try {
const result = await childRuntime.generate(renderDelegateSubAgentPrompt(request), {
...(request.parentAbortSignal !== undefined
? { abortSignal: request.parentAbortSignal }
: {}),
...(options.telemetry !== undefined ? { telemetry: options.telemetry } : {}),
});
if (result.pendingSuspend !== undefined && result.pendingSuspend.length > 0) {
return failedDelegatedChildSuspendOutput(request.taskPath);
}
return generateResultToDelegateSubAgentOutput(request.taskPath, result);
} finally {
await childRuntime.dispose();
}
};
}
private assertToolRegistrationAllowed(tool: BuiltTool): void {
this.assertToolNameAvailable(tool.name);
this.assertReservedSdkBuiltInToolName(tool);
} }
private assertToolNameAvailable(toolName: string): void { private assertToolNameAvailable(toolName: string): void {
@ -892,6 +1090,13 @@ export class Agent implements BuiltAgent, AgentBuilder {
throw new Error(`Tool name "${toolName}" is reserved for runtime skills`); throw new Error(`Tool name "${toolName}" is reserved for runtime skills`);
} }
private assertReservedSdkBuiltInToolName(tool: BuiltTool): void {
if (!SDK_RESERVED_BUILTIN_TOOL_NAMES.has(tool.name)) return;
if (isSdkOwnedBuiltInTool(tool)) return;
throw new Error(`Tool name "${tool.name}" is reserved for SDK built-in tools`);
}
private removeRuntimeSkillTools(): void { private removeRuntimeSkillTools(): void {
if (!this.hasRuntimeSkillTool) return; if (!this.hasRuntimeSkillTool) return;
@ -900,6 +1105,18 @@ export class Agent implements BuiltAgent, AgentBuilder {
} }
} }
export function buildInlineSubAgentBlockedToolNames(hostBlockedTools?: string[]): Set<string> {
return new Set([...SDK_INLINE_SUB_AGENT_BLOCKED_TOOL_NAMES, ...(hostBlockedTools ?? [])]);
}
export function filterInlineSubAgentTools<T extends { readonly name: string }>(
tools: T[],
hostBlockedTools?: string[],
): T[] {
const blocked = buildInlineSubAgentBlockedToolNames(hostBlockedTools);
return tools.filter((tool) => !blocked.has(tool.name));
}
function findDuplicateToolNames(tools: BuiltTool[]): string[] { function findDuplicateToolNames(tools: BuiltTool[]): string[] {
const seen = new Set<string>(); const seen = new Set<string>();
const duplicates = new Set<string>(); const duplicates = new Set<string>();

View File

@ -0,0 +1,29 @@
/**
* Pass as `resumeData` to `agent.resume()` to cancel a suspended tool call
* and steer the agent with a new message instead of answering the tool.
*
* Uses a JSON-serializable `_type` string so it survives HTTP round-trips
* frontend code can construct `{ _type: 'agent.cancellation', message }`
* without importing this package.
*/
export const CANCELLATION_TYPE = 'agent.cancellation' as const;
export interface Cancellation {
readonly _type: typeof CANCELLATION_TYPE;
/** The user's steering message provided when cancelling. */
readonly message: string;
}
export function createCancellation(message: string): Cancellation {
return { _type: CANCELLATION_TYPE, message };
}
export function isCancellation(value: unknown): value is Cancellation {
return (
typeof value === 'object' &&
value !== null &&
(value as Record<string, unknown>)._type === CANCELLATION_TYPE &&
typeof (value as Record<string, unknown>).message === 'string'
);
}

View File

@ -1,5 +1,16 @@
const MODELS_DEV_URL = 'https://models.dev/api.json'; const MODELS_DEV_URL = 'https://models.dev/api.json';
const MODELS_DEV_PROVIDER_ALIASES: Record<string, string> = {
'amazon-bedrock': 'aws-bedrock',
azure: 'azure-openai',
'azure-cognitive-services': 'azure-openai',
};
const AGENT_PROVIDER_NAMES: Record<string, string> = {
'aws-bedrock': 'AWS Bedrock',
'azure-openai': 'Azure OpenAI',
};
/** Cost per million tokens. */ /** Cost per million tokens. */
export interface ModelCost { export interface ModelCost {
/** Cost per million input tokens (USD). */ /** Cost per million input tokens (USD). */
@ -67,6 +78,10 @@ interface ModelsDevProvider {
models?: Record<string, ModelsDevModel>; models?: Record<string, ModelsDevModel>;
} }
function toAgentProviderId(modelsDevProviderId: string): string {
return MODELS_DEV_PROVIDER_ALIASES[modelsDevProviderId] ?? modelsDevProviderId;
}
/** /**
* Fetch the provider/model catalog from models.dev. * Fetch the provider/model catalog from models.dev.
* *
@ -120,10 +135,14 @@ export async function fetchProviderCatalog(): Promise<ProviderCatalog> {
models[modelId] = info; models[modelId] = info;
} }
catalog[key] = { const providerId = toAgentProviderId(key);
id: provider.id, catalog[providerId] = {
name: provider.name, id: providerId,
models, name: catalog[providerId]?.name ?? AGENT_PROVIDER_NAMES[providerId] ?? provider.name,
models: {
...(catalog[providerId]?.models ?? {}),
...models,
},
}; };
} }

View File

@ -23,7 +23,6 @@ import type {
EpisodicMemoryConfig, EpisodicMemoryConfig,
MemoryConfig, MemoryConfig,
ObservationalMemoryConfig, ObservationalMemoryConfig,
SemanticRecallConfig,
TitleGenerationConfig, TitleGenerationConfig,
} from '../types'; } from '../types';
import type { ModelConfig } from '../types/sdk/agent'; import type { ModelConfig } from '../types/sdk/agent';
@ -165,8 +164,6 @@ export function normalizeMemoryConfig(config: MemoryConfig): MemoryConfig {
* ``` * ```
*/ */
export class Memory { export class Memory {
private semanticRecallConfig?: SemanticRecallConfig;
private episodicMemoryConfig?: EpisodicMemoryConfig; private episodicMemoryConfig?: EpisodicMemoryConfig;
private memoryBackend?: BuiltMemory; private memoryBackend?: BuiltMemory;
@ -190,12 +187,6 @@ export class Memory {
return this; return this;
} }
/** Enable semantic recall (RAG-based retrieval of relevant past messages). */
semanticRecall(config: SemanticRecallConfig): this {
this.semanticRecallConfig = config;
return this;
}
/** Enable source-backed cross-session episodic memory. */ /** Enable source-backed cross-session episodic memory. */
episodicMemory(config: EpisodicMemoryConfig = {}): this { episodicMemory(config: EpisodicMemoryConfig = {}): this {
if (config.enabled === false) { if (config.enabled === false) {
@ -233,26 +224,10 @@ export class Memory {
/** /**
* Validate configuration and produce a `MemoryConfig`. * Validate configuration and produce a `MemoryConfig`.
*
* @throws if `.semanticRecall()` is used with a backend that doesn't support search()
*/ */
build(): MemoryConfig { build(): MemoryConfig {
const memory: BuiltMemory = this.memoryBackend ?? new InMemoryMemory(); const memory: BuiltMemory = this.memoryBackend ?? new InMemoryMemory();
if (this.semanticRecallConfig) {
if (!memory.queryEmbeddings && !memory.search) {
throw new Error(
'Semantic recall requires a storage backend with queryEmbeddings() or search() support.',
);
}
if (!memory.search && !this.semanticRecallConfig.embedder) {
throw new Error(
'Semantic recall requires an embedder when using queryEmbeddings(). Add embedder to your semanticRecall config: ' +
".semanticRecall({ topK: 5, embedder: 'openai/text-embedding-3-small' })",
);
}
}
if (isEpisodicMemoryEnabled(this.episodicMemoryConfig)) { if (isEpisodicMemoryEnabled(this.episodicMemoryConfig)) {
if (!hasEpisodicMemoryStore(memory)) { if (!hasEpisodicMemoryStore(memory)) {
throw new Error( throw new Error(
@ -263,7 +238,6 @@ export class Memory {
const baseConfig = { const baseConfig = {
memory, memory,
semanticRecall: this.semanticRecallConfig,
episodicMemory: this.episodicMemoryConfig, episodicMemory: this.episodicMemoryConfig,
titleGeneration: this.titleGenerationConfig, titleGeneration: this.titleGenerationConfig,
}; };

View File

@ -1,90 +0,0 @@
import type { Agent } from './agent';
import type { GenerateResult, RunOptions } from '../types';
import type { Message } from '../types/sdk/message';
interface BuiltNetwork {
readonly name: string;
run(prompt: string, options?: RunOptions): Promise<GenerateResult>;
}
/**
* Builder for creating multi-agent networks with a coordinator.
*
* Usage:
* ```typescript
* const network = new Network('content-team')
* .coordinator(coordinatorAgent)
* .agent(researcher)
* .agent(writer);
*
* const result = await network.run('Research and write about RAG');
* ```
*/
export class Network {
private networkName: string;
private coordinatorAgent?: Agent;
private agents: Agent[] = [];
private built?: BuiltNetwork;
constructor(name: string) {
this.networkName = name;
}
/** Set the coordinator agent that routes tasks to specialists. */
coordinator(a: Agent): this {
this.coordinatorAgent = a;
return this;
}
/** Add a specialist agent to the network. */
agent(a: Agent): this {
this.agents.push(a);
return this;
}
/** @internal Lazy-build the network on first use. */
private ensureBuilt(): BuiltNetwork {
this.built ??= this.build();
return this.built;
}
/** The network name. */
get name(): string {
return this.networkName;
}
/** Run the network with a prompt. Lazy-builds on first call. */
async run(prompt: string, options?: RunOptions): Promise<GenerateResult> {
return await this.ensureBuilt().run(prompt, options);
}
/** @internal */
protected build(): BuiltNetwork {
if (!this.coordinatorAgent) {
throw new Error(`Network "${this.networkName}" requires a coordinator`);
}
if (this.agents.length === 0) {
throw new Error(`Network "${this.networkName}" requires at least one agent`);
}
// TODO: Specialist agents are stored for validation but not yet wired
// to the coordinator automatically. For now, specialists must be added
// as tools on the coordinator agent manually (via agent.asTool()).
// Multi-agent routing will be implemented in a future iteration.
const coordinator = this.coordinatorAgent;
const name = this.networkName;
return {
name,
async run(prompt: string, options?: RunOptions): Promise<GenerateResult> {
const messages: Message[] = [{ role: 'user', content: [{ type: 'text', text: prompt }] }];
return await coordinator.generate(messages, options);
},
};
}
}

View File

@ -32,6 +32,7 @@ export const providerCapabilities: Record<
cohere: {}, cohere: {},
vercel: {}, vercel: {},
openrouter: {}, openrouter: {},
nvidia: {},
'azure-openai': {}, 'azure-openai': {},
'aws-bedrock': {}, 'aws-bedrock': {},
}; };

View File

@ -56,18 +56,14 @@ export function wrapToolForApproval(tool: BuiltTool, config: ApprovalConfig): Bu
if (needs) { if (needs) {
return await interruptCtx.suspend({ type: 'approval', toolName: tool.name, args: input }); return await interruptCtx.suspend({ type: 'approval', toolName: tool.name, args: input });
} }
return await originalHandler(input, { return await originalHandler(input, interruptCtx as ToolContext);
parentTelemetry: interruptCtx.parentTelemetry,
} as ToolContext);
} }
const { approved } = interruptCtx.resumeData as z.infer<typeof APPROVAL_RESUME_SCHEMA>; const { approved } = interruptCtx.resumeData as z.infer<typeof APPROVAL_RESUME_SCHEMA>;
if (!approved) { if (!approved) {
return { declined: true, message: `Tool "${tool.name}" was not approved` }; return { declined: true, message: `Tool "${tool.name}" was not approved` };
} }
return await originalHandler(input, { return await originalHandler(input, interruptCtx as ToolContext);
parentTelemetry: interruptCtx.parentTelemetry,
} as ToolContext);
}, },
}; };
} }
@ -125,6 +121,8 @@ export class Tool<
private providerOptionsValue?: Record<string, JSONObject>; private providerOptionsValue?: Record<string, JSONObject>;
private handleCancellationValue?: boolean;
private requireApprovalValue?: boolean; private requireApprovalValue?: boolean;
private needsApprovalFnValue?: (args: unknown) => Promise<boolean> | boolean; private needsApprovalFnValue?: (args: unknown) => Promise<boolean> | boolean;
@ -214,6 +212,15 @@ export class Tool<
return this; return this;
} }
/**
* Opt in to handle cancellations in the tool handler (`ctx.cancellation`).
* By default, the runtime bypasses the handler and injects the steering message directly.
*/
handleCancellation(): this {
this.handleCancellationValue = true;
return this;
}
/** Require human approval before this tool executes. Mutually exclusive with .suspend()/.resume(). */ /** Require human approval before this tool executes. Mutually exclusive with .suspend()/.resume(). */
requireApproval(): this { requireApproval(): this {
this.requireApprovalValue = true; this.requireApprovalValue = true;
@ -281,6 +288,7 @@ export class Tool<
systemInstruction: this.systemInstructionText, systemInstruction: this.systemInstructionText,
suspendSchema: this.suspendSchemaValue, suspendSchema: this.suspendSchemaValue,
resumeSchema: this.resumeSchemaValue, resumeSchema: this.resumeSchemaValue,
handleCancellation: this.handleCancellationValue,
toMessage: this.toMessageFn as (output: unknown) => AgentMessage | undefined, toMessage: this.toMessageFn as (output: unknown) => AgentMessage | undefined,
toModelOutput: this.toModelOutputFn as ((output: unknown) => unknown) | undefined, toModelOutput: this.toModelOutputFn as ((output: unknown) => unknown) | undefined,
handler: this.handlerFn as ( handler: this.handlerFn as (

View File

@ -13,6 +13,7 @@ import {
parseRuntimeSkillMarkdown, parseRuntimeSkillMarkdown,
renderSkillCatalogPrompt, renderSkillCatalogPrompt,
} from '..'; } from '..';
import type { AgentRuntimeConfig } from '../../runtime/agent-runtime';
import { Agent } from '../../sdk/agent'; import { Agent } from '../../sdk/agent';
import { isZodSchema } from '../../utils/zod'; import { isZodSchema } from '../../utils/zod';
@ -474,8 +475,10 @@ Use the workflow SDK.`,
.model('anthropic/claude-sonnet-4-5') .model('anthropic/claude-sonnet-4-5')
.instructions('Base instructions.') .instructions('Base instructions.')
.skills(source); .skills(source);
const runtime = await (agent as unknown as { build(): Promise<unknown> }).build(); const runtimeConfig = await (
const instructions = (runtime as { config: { instructions: string } }).config.instructions; agent as unknown as { build(): Promise<AgentRuntimeConfig> }
).build();
const { instructions } = runtimeConfig;
expect(prepare).toHaveBeenCalledTimes(1); expect(prepare).toHaveBeenCalledTimes(1);
expect(instructions).toContain('name: "Summarize notes"'); expect(instructions).toContain('name: "Summarize notes"');

View File

@ -36,35 +36,6 @@ export abstract class BaseMemory<TConstructorOptions extends JSONObject = JSONOb
deleteMessages(_messageIds: string[]): Promise<void> { deleteMessages(_messageIds: string[]): Promise<void> {
throw new Error('Method not implemented.'); throw new Error('Method not implemented.');
} }
search?(
_query: string,
_opts?: {
scope?: 'thread' | 'resource';
threadId?: string;
resourceId?: string;
topK?: number;
messageRange?: { before: number; after: number };
},
): Promise<AgentDbMessage[]> {
throw new Error('Method not implemented.');
}
saveEmbeddings?(_opts: {
scope?: 'thread' | 'resource';
threadId?: string;
resourceId?: string;
entries: Array<{ id: string; vector: number[]; text: string; model: string }>;
}): Promise<void> {
throw new Error('Method not implemented.');
}
queryEmbeddings?(_opts: {
scope?: 'thread' | 'resource';
threadId?: string;
resourceId?: string;
vector: number[];
topK: number;
}): Promise<Array<{ id: string; score: number }>> {
throw new Error('Method not implemented.');
}
close?(): Promise<void> { close?(): Promise<void> {
throw new Error('Method not implemented.'); throw new Error('Method not implemented.');

View File

@ -36,12 +36,12 @@ export type {
ModelConfig, ModelConfig,
RunOptions, RunOptions,
ExecutionOptions, ExecutionOptions,
SmoothStreamOptions,
AgentExecutionCounter, AgentExecutionCounter,
PersistedExecutionOptions, PersistedExecutionOptions,
ResumeOptions, ResumeOptions,
GenerateResult, GenerateResult,
StreamResult, StreamResult,
SubAgentUsage,
BuiltAgent, BuiltAgent,
AgentRunState, AgentRunState,
AgentResumeData, AgentResumeData,
@ -93,7 +93,6 @@ export type {
RetrievedEpisodicMemoryEntry, RetrievedEpisodicMemoryEntry,
ObservationCapableMemory, ObservationCapableMemory,
MemoryDescriptor, MemoryDescriptor,
SemanticRecallConfig,
MemoryConfig, MemoryConfig,
ObservationLogMemoryConfig, ObservationLogMemoryConfig,
ObservationalMemoryConfig, ObservationalMemoryConfig,

View File

@ -1,5 +1,36 @@
import type { FinishReason, TokenUsage } from '../sdk/agent';
import type { AgentMessage, ContentToolCall } from '../sdk/message'; import type { AgentMessage, ContentToolCall } from '../sdk/message';
export type SubAgentLifecycleUsage = Pick<
TokenUsage,
'promptTokens' | 'completionTokens' | 'totalTokens' | 'cost'
>;
export interface SubAgentLifecycleBase {
taskName: string;
taskPath: string;
parentRunId?: string;
parentToolCallId?: string;
subAgentId?: string;
}
export interface SubAgentStartedPayload extends SubAgentLifecycleBase {
startedAt: number;
}
export interface SubAgentCompletedPayload extends SubAgentLifecycleBase {
status: 'completed' | 'failed' | 'suspended';
startedAt: number;
finishedAt: number;
durationMs: number;
runId?: string;
/** The child run's memory thread id (`persistence.threadId`), so consumers can correlate or continue it. */
threadId?: string;
usage?: SubAgentLifecycleUsage;
finishReason?: FinishReason;
error?: string;
}
export const enum AgentEvent { export const enum AgentEvent {
AgentStart = 'agent_start', AgentStart = 'agent_start',
AgentEnd = 'agent_end', AgentEnd = 'agent_end',
@ -7,6 +38,8 @@ export const enum AgentEvent {
TurnEnd = 'turn_end', TurnEnd = 'turn_end',
ToolExecutionStart = 'tool_execution_start', ToolExecutionStart = 'tool_execution_start',
ToolExecutionEnd = 'tool_execution_end', ToolExecutionEnd = 'tool_execution_end',
SubAgentStarted = 'subagent_started',
SubAgentCompleted = 'subagent_completed',
Error = 'error', Error = 'error',
} }
@ -23,6 +56,8 @@ export type AgentEventData =
result: unknown; result: unknown;
isError: boolean; isError: boolean;
} }
| ({ type: AgentEvent.SubAgentStarted } & SubAgentStartedPayload)
| ({ type: AgentEvent.SubAgentCompleted } & SubAgentCompletedPayload)
| { | {
type: AgentEvent.Error; type: AgentEvent.Error;
message: string; message: string;

View File

@ -1,15 +1,21 @@
import type { ProviderOptions } from '@ai-sdk/provider-utils'; import type { ProviderOptions } from '@ai-sdk/provider-utils';
import type { LanguageModel } from 'ai'; import type { LanguageModel, smoothStream } from 'ai';
import type { JsonSchema7Type } from 'zod-to-json-schema'; import type { JsonSchema7Type } from 'zod-to-json-schema';
import type { AgentMessage, ContentMetadata } from './message'; import type { AgentMessage, ContentMetadata } from './message';
import type { BuiltTool } from './tool';
import type { ProviderId, ProviderCredentials } from '../../runtime/provider-credentials'; import type { ProviderId, ProviderCredentials } from '../../runtime/provider-credentials';
import type { AgentEvent, AgentEventHandler } from '../runtime/event'; import type {
AgentEvent,
AgentEventHandler,
SubAgentCompletedPayload,
SubAgentStartedPayload,
} from '../runtime/event';
import type { SerializedMessageList } from '../runtime/message-list'; import type { SerializedMessageList } from '../runtime/message-list';
import type { BuiltTelemetry } from '../telemetry'; import type { BuiltTelemetry } from '../telemetry';
import type { JSONValue } from '../utils/json'; import type { JSONValue } from '../utils/json';
export type SmoothStreamOptions = NonNullable<Parameters<typeof smoothStream>[0]>;
export type FinishReason = export type FinishReason =
| 'stop' | 'stop'
| 'max-iterations' | 'max-iterations'
@ -90,6 +96,22 @@ export type StreamChunk = ContentMetadata &
type: 'tool-execution-start'; type: 'tool-execution-start';
toolCallId: string; toolCallId: string;
toolName: string; toolName: string;
/** Epoch ms when the handler started, measured on the runtime. */
startTime: number;
}
| {
/**
* Emitted as soon as an individual tool handler settles, bridged from
* the runtime event bus. Lets consumers flip a concurrent tool call to
* its terminal state immediately, instead of waiting for the batched
* `tool-result` chunks emitted only after the whole batch settles.
*/
type: 'tool-execution-end';
toolCallId: string;
toolName: string;
isError: boolean;
/** Epoch ms when the handler settled, measured on the runtime. */
endTime: number;
} }
| { | {
type: 'tool-result'; type: 'tool-result';
@ -97,6 +119,7 @@ export type StreamChunk = ContentMetadata &
toolName: string; toolName: string;
output: unknown; output: unknown;
isError?: boolean; isError?: boolean;
canceled?: boolean;
} }
| { | {
type: 'tool-call-suspended'; type: 'tool-call-suspended';
@ -110,14 +133,14 @@ export type StreamChunk = ContentMetadata &
} }
// `message` is reserved for sub-agent / app-defined `CustomAgentMessage` // `message` is reserved for sub-agent / app-defined `CustomAgentMessage`
| { type: 'message'; message: AgentMessage } | { type: 'message'; message: AgentMessage }
| ({ type: 'subagent-started' } & SubAgentStartedPayload)
| ({ type: 'subagent-completed' } & SubAgentCompletedPayload)
| { | {
type: 'finish'; type: 'finish';
finishReason: FinishReason; finishReason: FinishReason;
usage?: TokenUsage; usage?: TokenUsage;
model?: string; model?: string;
structuredOutput?: unknown; structuredOutput?: unknown;
subAgentUsage?: SubAgentUsage[];
totalCost?: number;
} }
| { type: 'error'; error: unknown } | { type: 'error'; error: unknown }
); );
@ -136,7 +159,9 @@ export interface ExecutionOptions {
maxIterations?: number; maxIterations?: number;
abortSignal?: AbortSignal; abortSignal?: AbortSignal;
providerOptions?: ProviderOptions; providerOptions?: ProviderOptions;
/** Inherited telemetry from a parent agent. Used internally by asTool(). */ /** AI SDK `smoothStream` transform. Enabled by default; pass `false` to disable. */
smoothStream?: SmoothStreamOptions | false;
/** Inherited telemetry from a host runtime. */
telemetry?: BuiltTelemetry; telemetry?: BuiltTelemetry;
/** Inherited execution counter from the host runtime. Used for aggregate heartbeat telemetry. */ /** Inherited execution counter from the host runtime. Used for aggregate heartbeat telemetry. */
executionCounter?: AgentExecutionCounter; executionCounter?: AgentExecutionCounter;
@ -151,16 +176,7 @@ export interface ToolResultEntry {
input: unknown; input: unknown;
output: unknown; output: unknown;
transformed?: boolean; transformed?: boolean;
} canceled?: boolean;
/** Token usage from a sub-agent called via .asTool(). */
export interface SubAgentUsage {
/** Name of the sub-agent. */
agent: string;
/** Model used by the sub-agent. */
model?: string;
/** Token usage for the sub-agent call. */
usage: TokenUsage;
} }
export interface GenerateResult { export interface GenerateResult {
@ -175,10 +191,6 @@ export interface GenerateResult {
providerMetadata?: Record<string, unknown>; providerMetadata?: Record<string, unknown>;
/** Tool calls made during the run (with merged results when available). */ /** Tool calls made during the run (with merged results when available). */
toolCalls?: ToolResultEntry[]; toolCalls?: ToolResultEntry[];
/** Token usage from sub-agents called via .asTool(). */
subAgentUsage?: SubAgentUsage[];
/** Total cost (USD) including this agent + all sub-agents. */
totalCost?: number;
/** /**
* Present when the run suspended awaiting tool resume (HITL). * Present when the run suspended awaiting tool resume (HITL).
* Call `agent.resume('generate', data, { runId, toolCallId })` to resume. * Call `agent.resume('generate', data, { runId, toolCallId })` to resume.
@ -198,6 +210,8 @@ export interface GenerateResult {
* callers can handle them without try/catch. * callers can handle them without try/catch.
*/ */
error?: unknown; error?: unknown;
/** Return a snapshot of the agent state for this run. */
getState(): SerializableAgentState;
} }
export interface StreamResult { export interface StreamResult {
@ -205,6 +219,11 @@ export interface StreamResult {
runId: string; runId: string;
/** The readable stream of chunks. */ /** The readable stream of chunks. */
stream: ReadableStream<StreamChunk>; stream: ReadableStream<StreamChunk>;
/**
* Return the current agent state for this run.
* May be called while streaming or after the stream closes.
*/
getState(): SerializableAgentState;
} }
export interface ResumeOptions { export interface ResumeOptions {
@ -226,10 +245,6 @@ export interface BuiltAgent {
on(event: AgentEvent, handler: AgentEventHandler): void; on(event: AgentEvent, handler: AgentEventHandler): void;
asTool(description: string): BuiltTool;
getState(): SerializableAgentState;
/** Cancel the currently running agent. Synchronous — sets an abort flag that the agentic loop checks asynchronously. */ /** Cancel the currently running agent. Synchronous — sets an abort flag that the agentic loop checks asynchronously. */
abort(): void; abort(): void;

View File

@ -61,38 +61,6 @@ export interface BuiltMemory {
messages: AgentDbMessage[]; messages: AgentDbMessage[];
}): Promise<void>; }): Promise<void>;
deleteMessages(messageIds: string[]): Promise<void>; deleteMessages(messageIds: string[]): Promise<void>;
// --- Semantic recall (optional) ---
search?(
query: string,
opts?: {
/** @default 'resource' */
scope?: 'thread' | 'resource';
threadId?: string;
resourceId?: string;
topK?: number;
messageRange?: { before: number; after: number };
},
): Promise<AgentDbMessage[]>;
// --- Tier 3: Vector operations (optional — runtime handles embeddings) ---
saveEmbeddings?(opts: {
scope?: 'thread' | 'resource';
threadId?: string;
resourceId?: string;
entries: Array<{
id: string;
vector: number[];
text: string;
model: string;
}>;
}): Promise<void>;
queryEmbeddings?(opts: {
/** @default 'resource' */
scope?: 'thread' | 'resource';
threadId?: string;
resourceId?: string;
vector: number[];
topK: number;
}): Promise<Array<{ id: string; score: number }>>;
// --- Episodic memory (optional — runtime handles extraction and embeddings) --- // --- Episodic memory (optional — runtime handles extraction and embeddings) ---
episodic?: EpisodicMemoryMethods; episodic?: EpisodicMemoryMethods;
// --- Lifecycle (optional) --- // --- Lifecycle (optional) ---
@ -102,18 +70,6 @@ export interface BuiltMemory {
describe(): MemoryDescriptor; describe(): MemoryDescriptor;
} }
// --- Semantic Recall Config ---
export interface SemanticRecallConfig {
/** @default 'resource' */
scope?: 'thread' | 'resource';
topK: number;
messageRange?: { before: number; after: number };
embedder?: string; // e.g. 'openai/text-embedding-3-small' — required for queryEmbeddings(), optional for search()-based backends
/** API key for the embedder provider. Falls back to environment variables if not set. */
apiKey?: string;
}
export type EpisodicMemoryStatus = 'active' | 'superseded' | 'dropped'; export type EpisodicMemoryStatus = 'active' | 'superseded' | 'dropped';
export interface EpisodicMemoryScope { export interface EpisodicMemoryScope {
@ -346,7 +302,6 @@ export interface ObservationalMemoryConfig {
interface MemoryConfigBase { interface MemoryConfigBase {
observationLog?: ObservationLogMemoryConfig; observationLog?: ObservationLogMemoryConfig;
semanticRecall?: SemanticRecallConfig;
episodicMemory?: EpisodicMemoryConfig; episodicMemory?: EpisodicMemoryConfig;
titleGeneration?: TitleGenerationConfig; titleGeneration?: TitleGenerationConfig;
} }

View File

@ -105,7 +105,7 @@ export type ContentToolCall = ContentMetadata & {
providerExecuted?: boolean; providerExecuted?: boolean;
} & ( } & (
| { state: 'pending' } | { state: 'pending' }
| { state: 'resolved'; output: JSONValue } | { state: 'resolved'; output: JSONValue; canceled?: boolean }
| { state: 'rejected'; error: string } | { state: 'rejected'; error: string }
); );

View File

@ -2,6 +2,7 @@ import type { JSONSchema7 } from 'json-schema';
import type { ZodType } from 'zod'; import type { ZodType } from 'zod';
import type { AgentMessage } from './message'; import type { AgentMessage } from './message';
import type { AgentEventData } from '../runtime/event';
import type { BuiltTelemetry } from '../telemetry'; import type { BuiltTelemetry } from '../telemetry';
import type { JSONObject } from '../utils/json'; import type { JSONObject } from '../utils/json';
@ -18,6 +19,14 @@ export interface ToolExecutionContext {
threadId: string; threadId: string;
resourceId: string; resourceId: string;
}; };
/** Internal runtime event bridge for platform-managed tools. */
emitEvent?: (event: AgentEventData) => void;
/**
* The current run's abort signal. Long-running tools (e.g. ones that spawn a
* child agent) should forward it so cancelling the parent run also cancels
* the work they started.
*/
abortSignal?: AbortSignal;
} }
export interface ToolContext { export interface ToolContext {
@ -27,8 +36,12 @@ export interface ToolContext {
runId?: string; runId?: string;
/** Current persisted thread scope when the run is backed by memory. */ /** Current persisted thread scope when the run is backed by memory. */
persistence?: ToolExecutionContext['persistence']; persistence?: ToolExecutionContext['persistence'];
/** Telemetry config from the parent agent, for sub-agent propagation. */ /** Telemetry config from the parent agent. */
parentTelemetry?: BuiltTelemetry; parentTelemetry?: BuiltTelemetry;
/** Internal runtime event bridge for platform-managed tools. */
emitEvent?: ToolExecutionContext['emitEvent'];
/** The current run's abort signal, for tools that start cancellable work. */
abortSignal?: ToolExecutionContext['abortSignal'];
} }
export interface InterruptibleToolContext<S = unknown, R = unknown> { export interface InterruptibleToolContext<S = unknown, R = unknown> {
@ -38,16 +51,22 @@ export interface InterruptibleToolContext<S = unknown, R = unknown> {
* the execution engine to halt. Code after `return await ctx.suspend()` is unreachable. * the execution engine to halt. Code after `return await ctx.suspend()` is unreachable.
*/ */
suspend: (payload: S) => Promise<never>; suspend: (payload: S) => Promise<never>;
/** Data from the consumer after resume. Undefined on first invocation. */ /** Data from the consumer after resume. Undefined on first invocation or when cancelled. */
resumeData: R | undefined; resumeData: R | undefined;
/** Set when the resume was a cancellation and the tool opted in via `.handleCancellation()`. */
cancellation?: { message: string };
/** AI SDK tool call ID for the current local tool execution. */ /** AI SDK tool call ID for the current local tool execution. */
toolCallId?: string; toolCallId?: string;
/** Agent run ID for the current execution. */ /** Agent run ID for the current execution. */
runId?: string; runId?: string;
/** Current persisted thread scope when the run is backed by memory. */ /** Current persisted thread scope when the run is backed by memory. */
persistence?: ToolExecutionContext['persistence']; persistence?: ToolExecutionContext['persistence'];
/** Telemetry config from the parent agent, for sub-agent propagation. */ /** Telemetry config from the parent agent. */
parentTelemetry?: BuiltTelemetry; parentTelemetry?: BuiltTelemetry;
/** Internal runtime event bridge for platform-managed tools. */
emitEvent?: ToolExecutionContext['emitEvent'];
/** The current run's abort signal, for tools that start cancellable work. */
abortSignal?: ToolExecutionContext['abortSignal'];
} }
export interface BuiltTool { export interface BuiltTool {
@ -63,6 +82,8 @@ export interface BuiltTool {
readonly systemInstruction?: string; readonly systemInstruction?: string;
readonly suspendSchema?: ZodType | JSONSchema7; readonly suspendSchema?: ZodType | JSONSchema7;
readonly resumeSchema?: ZodType | JSONSchema7; readonly resumeSchema?: ZodType | JSONSchema7;
/** When `true`, the handler is called on cancellation with `ctx.cancellation` set instead of being bypassed. */
readonly handleCancellation?: boolean;
readonly withDefaultApproval?: boolean; readonly withDefaultApproval?: boolean;
readonly toMessage?: (output: unknown) => AgentMessage | undefined; readonly toMessage?: (output: unknown) => AgentMessage | undefined;
/** /**

View File

@ -18,6 +18,7 @@
"dev": "pnpm watch", "dev": "pnpm watch",
"typecheck": "tsc --noEmit", "typecheck": "tsc --noEmit",
"build": "tsc --build tsconfig.build.esm.json tsconfig.build.cjs.json", "build": "tsc --build tsconfig.build.esm.json tsconfig.build.cjs.json",
"build:unchecked": "tsc --build tsconfig.build.esm.json tsconfig.build.cjs.json --noCheck",
"format": "biome format --write .", "format": "biome format --write .",
"format:check": "biome ci .", "format:check": "biome ci .",
"lint": "eslint . --quiet", "lint": "eslint . --quiet",

View File

@ -1,6 +0,0 @@
/** @type {import('jest').Config} */
module.exports = {
...require('../../../jest.config'),
collectCoverageFrom: ['src/**/*.ts', 'integration-tests/**/*.ts'],
setupFilesAfterEnv: ['jest-expect-message'],
};

View File

@ -63,21 +63,25 @@
"typecheck": "tsc --noEmit", "typecheck": "tsc --noEmit",
"copy-tokenizer-json": "node scripts/copy-tokenizer-json.js .", "copy-tokenizer-json": "node scripts/copy-tokenizer-json.js .",
"build": "tsc --build tsconfig.build.esm.json tsconfig.build.cjs.json && tsc-alias -p tsconfig.build.esm.json && tsc-alias -p tsconfig.build.cjs.json && pnpm copy-tokenizer-json dist/cjs && pnpm copy-tokenizer-json dist/esm", "build": "tsc --build tsconfig.build.esm.json tsconfig.build.cjs.json && tsc-alias -p tsconfig.build.esm.json && tsc-alias -p tsconfig.build.cjs.json && pnpm copy-tokenizer-json dist/cjs && pnpm copy-tokenizer-json dist/esm",
"build:unchecked": "tsc --build tsconfig.build.esm.json tsconfig.build.cjs.json --noCheck && tsc-alias -p tsconfig.build.esm.json && tsc-alias -p tsconfig.build.cjs.json && pnpm copy-tokenizer-json dist/cjs && pnpm copy-tokenizer-json dist/esm",
"format": "biome format --write .", "format": "biome format --write .",
"format:check": "biome ci .", "format:check": "biome ci .",
"lint": "eslint . --quiet", "lint": "eslint . --quiet",
"lint:fix": "eslint . --fix", "lint:fix": "eslint . --fix",
"watch": "tsc --build tsconfig.build.esm.json tsconfig.build.cjs.json --watch", "watch": "tsc --build tsconfig.build.esm.json tsconfig.build.cjs.json --watch",
"test": "jest", "test": "vitest run",
"test:unit": "jest", "test:unit": "vitest run",
"test:dev": "jest --watch" "test:dev": "vitest --silent=false"
}, },
"files": [ "files": [
"dist" "dist"
], ],
"devDependencies": { "devDependencies": {
"@types/json-schema": "^7.0.15", "@types/json-schema": "^7.0.15",
"jest-mock-extended": "^3.0.4", "@n8n/vitest-config": "workspace:*",
"@vitest/coverage-v8": "catalog:",
"vitest": "catalog:",
"vitest-mock-extended": "catalog:",
"@types/mime-types": "catalog:", "@types/mime-types": "catalog:",
"tsx": "catalog:", "tsx": "catalog:",
"axios": "catalog:", "axios": "catalog:",
@ -98,9 +102,10 @@
"tmp-promise": "3.0.3", "tmp-promise": "3.0.3",
"js-tiktoken": "catalog:", "js-tiktoken": "catalog:",
"https-proxy-agent": "catalog:", "https-proxy-agent": "catalog:",
"@thednp/dommatrix": "^2.0.12",
"pdf-parse": "catalog:", "pdf-parse": "catalog:",
"proxy-from-env": "^1.1.0", "proxy-from-env": "^1.1.0",
"undici": "^6.21.0" "undici": "catalog:undici-v6"
}, },
"peerDependencies": { "peerDependencies": {
"n8n-workflow": "*" "n8n-workflow": "*"

View File

@ -1,13 +1,17 @@
import type { CallbackManagerForLLMRun } from '@langchain/core/callbacks/manager'; import type { CallbackManagerForLLMRun } from '@langchain/core/callbacks/manager';
import { HumanMessage } from '@langchain/core/messages'; import { HumanMessage } from '@langchain/core/messages';
import type { ISupplyDataFunctions } from 'n8n-workflow'; import type { ISupplyDataFunctions } from 'n8n-workflow';
import type { Mock } from 'vitest';
import * as toolConverter from 'src/converters/tool';
import type { GenerateResult, StreamChunk } from 'src/types/output'; import type { GenerateResult, StreamChunk } from 'src/types/output';
import * as failedAttemptHandler from 'src/utils/failed-attempt-handler/n8nLlmFailedAttemptHandler';
import * as n8nLlmTracing from 'src/utils/n8n-llm-tracing';
import { LangchainChatModelAdapter } from '../../adapters/langchain-chat-model'; import { LangchainChatModelAdapter } from '../../adapters/langchain-chat-model';
jest.mock('src/converters/tool', () => ({ vi.mock('src/converters/tool', () => ({
fromLcTool: jest.fn().mockImplementation((t: { name?: string }) => ({ fromLcTool: vi.fn().mockImplementation((t: { name?: string }) => ({
type: 'function' as const, type: 'function' as const,
name: t?.name ?? 'tool', name: t?.name ?? 'tool',
description: '', description: '',
@ -15,32 +19,32 @@ jest.mock('src/converters/tool', () => ({
})), })),
})); }));
jest.mock('src/utils/n8n-llm-tracing', () => ({ vi.mock('src/utils/n8n-llm-tracing', () => ({
N8nLlmTracing: jest.fn().mockImplementation(function (this: unknown) { N8nLlmTracing: vi.fn().mockImplementation(function (this: unknown) {
return this; return this;
}), }),
})); }));
jest.mock('src/utils/failed-attempt-handler/n8nLlmFailedAttemptHandler', () => ({ vi.mock('src/utils/failed-attempt-handler/n8nLlmFailedAttemptHandler', () => ({
makeN8nLlmFailedAttemptHandler: jest.fn().mockReturnValue(jest.fn()), makeN8nLlmFailedAttemptHandler: vi.fn().mockReturnValue(vi.fn()),
})); }));
const { fromLcTool } = jest.requireMock('src/converters/tool'); const fromLcTool = vi.mocked(toolConverter.fromLcTool);
const { N8nLlmTracing } = jest.requireMock('src/utils/n8n-llm-tracing'); const N8nLlmTracing = vi.mocked(n8nLlmTracing.N8nLlmTracing);
const { makeN8nLlmFailedAttemptHandler } = jest.requireMock( const makeN8nLlmFailedAttemptHandler = vi.mocked(
'src/utils/failed-attempt-handler/n8nLlmFailedAttemptHandler', failedAttemptHandler.makeN8nLlmFailedAttemptHandler,
); );
function createMockChatModel( function createMockChatModel(
overrides: { overrides: {
generate?: jest.Mock; generate?: Mock;
stream?: jest.Mock; stream?: Mock;
withTools?: jest.Mock; withTools?: Mock;
} = {}, } = {},
) { ) {
const generate = jest.fn(); const generate = vi.fn();
const stream = jest.fn(); const stream = vi.fn();
const withTools = jest.fn().mockImplementation(function ( const withTools = vi.fn().mockImplementation(function (
this: ReturnType<typeof createMockChatModel>, this: ReturnType<typeof createMockChatModel>,
) { ) {
return this; return this;
@ -56,14 +60,14 @@ function createMockChatModel(
describe('LangchainAdapter', () => { describe('LangchainAdapter', () => {
beforeEach(() => { beforeEach(() => {
jest.clearAllMocks(); vi.clearAllMocks();
}); });
describe('constructor', () => { describe('constructor', () => {
it('passes callbacks and onFailedAttempt when ctx is provided', () => { it('passes callbacks and onFailedAttempt when ctx is provided', () => {
const ctx = { const ctx = {
getNode: jest.fn(), getNode: vi.fn(),
addOutputData: jest.fn(), addOutputData: vi.fn(),
} as unknown as ISupplyDataFunctions; } as unknown as ISupplyDataFunctions;
const chatModel = createMockChatModel(); const chatModel = createMockChatModel();
@ -200,10 +204,10 @@ describe('LangchainAdapter', () => {
yield response2; yield response2;
} }
const chatModel = createMockChatModel({ const chatModel = createMockChatModel({
stream: jest.fn().mockImplementation(() => stream()), stream: vi.fn().mockImplementation(() => stream()),
}); });
const adapter = new LangchainChatModelAdapter(chatModel); const adapter = new LangchainChatModelAdapter(chatModel);
const handleLLMNewToken = jest.fn(); const handleLLMNewToken = vi.fn();
const chunks: any[] = []; const chunks: any[] = [];
for await (const chunk of adapter._streamResponseChunks([new HumanMessage('hi')], {}, { for await (const chunk of adapter._streamResponseChunks([new HumanMessage('hi')], {}, {
@ -244,7 +248,7 @@ describe('LangchainAdapter', () => {
yield response; yield response;
} }
const chatModel = createMockChatModel({ const chatModel = createMockChatModel({
stream: jest.fn().mockImplementation(() => stream()), stream: vi.fn().mockImplementation(() => stream()),
}); });
const adapter = new LangchainChatModelAdapter(chatModel); const adapter = new LangchainChatModelAdapter(chatModel);
@ -272,7 +276,7 @@ describe('LangchainAdapter', () => {
}; };
} }
const chatModel = createMockChatModel({ const chatModel = createMockChatModel({
stream: jest.fn().mockImplementation(() => stream()), stream: vi.fn().mockImplementation(() => stream()),
}); });
const adapter = new LangchainChatModelAdapter(chatModel); const adapter = new LangchainChatModelAdapter(chatModel);
@ -295,7 +299,7 @@ describe('LangchainAdapter', () => {
it('converts tools via fromLcTool, calls chatModel.withTools, and returns new LangchainAdapter', () => { it('converts tools via fromLcTool, calls chatModel.withTools, and returns new LangchainAdapter', () => {
const chatModel = createMockChatModel(); const chatModel = createMockChatModel();
const adapter = new LangchainChatModelAdapter(chatModel, undefined); const adapter = new LangchainChatModelAdapter(chatModel, undefined);
const lcTools = [{ name: 'my_tool', schema: {}, invoke: jest.fn() }]; const lcTools = [{ name: 'my_tool', schema: {}, invoke: vi.fn() }];
const bound = adapter.bindTools(lcTools); const bound = adapter.bindTools(lcTools);

View File

@ -11,10 +11,10 @@ describe('LangchainHistoryAdapter', () => {
}); });
const createMockHistory = (messages: Message[] = []): ChatHistory => ({ const createMockHistory = (messages: Message[] = []): ChatHistory => ({
getMessages: jest.fn().mockResolvedValue([...messages]), getMessages: vi.fn().mockResolvedValue([...messages]),
addMessage: jest.fn().mockResolvedValue(undefined), addMessage: vi.fn().mockResolvedValue(undefined),
addMessages: jest.fn().mockResolvedValue(undefined), addMessages: vi.fn().mockResolvedValue(undefined),
clear: jest.fn().mockResolvedValue(undefined), clear: vi.fn().mockResolvedValue(undefined),
}); });
describe('getMessages', () => { describe('getMessages', () => {

View File

@ -12,16 +12,16 @@ describe('LangchainMemoryAdapter', () => {
const createMockMemory = (messages: Message[] = []): ChatMemory => { const createMockMemory = (messages: Message[] = []): ChatMemory => {
const mockHistory: ChatHistory = { const mockHistory: ChatHistory = {
getMessages: jest.fn().mockResolvedValue([...messages]), getMessages: vi.fn().mockResolvedValue([...messages]),
addMessage: jest.fn().mockResolvedValue(undefined), addMessage: vi.fn().mockResolvedValue(undefined),
addMessages: jest.fn().mockResolvedValue(undefined), addMessages: vi.fn().mockResolvedValue(undefined),
clear: jest.fn().mockResolvedValue(undefined), clear: vi.fn().mockResolvedValue(undefined),
}; };
return { return {
loadMessages: jest.fn().mockResolvedValue([...messages]), loadMessages: vi.fn().mockResolvedValue([...messages]),
saveTurn: jest.fn().mockResolvedValue(undefined), saveTurn: vi.fn().mockResolvedValue(undefined),
clear: jest.fn().mockResolvedValue(undefined), clear: vi.fn().mockResolvedValue(undefined),
chatHistory: mockHistory, chatHistory: mockHistory,
}; };
}; };

Some files were not shown because too many files have changed in this diff Show More