fix(core): Abort orchestrator run after repeated plan-guard rejections (no-changelog) (#30274)

This commit is contained in:
Jaakko Husso 2026-05-12 15:11:17 +03:00 committed by GitHub
parent f0649e0a3d
commit 0ce820de73
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 170 additions and 37 deletions

View File

@ -821,25 +821,23 @@ describe('createBuildWorkflowAgentTool — plan-enforcement guard', () => {
}
});
it('rejects direct calls outside a replan/checkpoint follow-up', async () => {
it('rejects direct calls outside a replan/checkpoint follow-up with an imperative STOP message', async () => {
const context = createMockContext();
const tool = createBuildWorkflowAgentTool(context) as unknown as BuildExecutable;
const out = await tool.execute({ task: 'Build a Slack notifier' });
expect(out.taskId).toBe('');
expect(out.result).toContain('bypassPlan');
expect(out.result).toMatch(
/For new workflows, multi-workflow builds, or data-table schema changes/,
);
expect(out.result).toMatch(/^STOP\./);
expect(out.result).toContain('`plan`');
expect(out.result).toContain('bypassPlan');
expect(context.logger.warn).toHaveBeenCalledWith(
'build-workflow-with-agent called outside plan/replan context — rejecting',
expect.objectContaining({ threadId: 'test-thread' }),
);
});
it('rejects bypassPlan=true without a workflowId (new builds must go through plan)', async () => {
it('rejects bypassPlan=true without a workflowId with an imperative STOP message', async () => {
const context = createMockContext();
const tool = createBuildWorkflowAgentTool(context) as unknown as BuildExecutable;
@ -850,10 +848,13 @@ describe('createBuildWorkflowAgentTool — plan-enforcement guard', () => {
});
expect(out.taskId).toBe('');
expect(out.result).toMatch(/edits to an EXISTING workflow and requires a `workflowId`/);
expect(out.result).toMatch(/^STOP\./);
expect(out.result).toMatch(/EXISTING workflow/);
expect(out.result).toContain('`workflowId`');
expect(out.result).toContain('`plan`');
});
it('rejects bypassPlan=true without a reason', async () => {
it('rejects bypassPlan=true without a reason with an imperative STOP message', async () => {
const context = createMockContext();
const tool = createBuildWorkflowAgentTool(context) as unknown as BuildExecutable;
@ -864,7 +865,8 @@ describe('createBuildWorkflowAgentTool — plan-enforcement guard', () => {
});
expect(out.taskId).toBe('');
expect(out.result).toContain('requires a one-sentence `reason`');
expect(out.result).toMatch(/^STOP\./);
expect(out.result).toContain('`reason`');
});
it('allows the call when bypassPlan=true with a reason is provided', async () => {
@ -883,7 +885,7 @@ describe('createBuildWorkflowAgentTool — plan-enforcement guard', () => {
// Guard passes → reaches startBuildWorkflowAgentTask, which short-circuits on
// missing spawnBackgroundTask. The point is we got past the guard, not what
// the downstream does.
expect(out.result).not.toMatch(/`bypassPlan: true` is for edits/);
expect(out.result).not.toMatch(/^STOP\./);
const warnMock = context.logger.warn as jest.Mock<void, [string, Record<string, unknown>?]>;
expect(warnMock.mock.calls.some((c) => c[0].includes('bypassing plan'))).toBe(true);
});
@ -894,7 +896,7 @@ describe('createBuildWorkflowAgentTool — plan-enforcement guard', () => {
const out = await tool.execute({ task: 'retry after failure' });
expect(out.result).not.toContain('direct builder calls require');
expect(out.result).not.toMatch(/^STOP\./);
expect(context.logger.warn).not.toHaveBeenCalledWith(
'build-workflow-with-agent called outside plan/replan context — rejecting',
expect.anything(),
@ -907,7 +909,7 @@ describe('createBuildWorkflowAgentTool — plan-enforcement guard', () => {
const out = await tool.execute({ task: 'checkpoint branch' });
expect(out.result).not.toContain('direct builder calls require');
expect(out.result).not.toMatch(/^STOP\./);
});
it('skips the guard when the env flag is disabled', async () => {
@ -917,7 +919,124 @@ describe('createBuildWorkflowAgentTool — plan-enforcement guard', () => {
const out = await tool.execute({ task: 'build directly' });
expect(out.result).not.toContain('direct builder calls require');
expect(out.result).not.toContain('STOP');
});
// If the LLM keeps retrying the same rejected call, the tool throws after
// PLAN_GUARD_REJECTION_LIMIT (3) consecutive rejections so the loop surfaces
// as a clean failure instead of a stall.
it('throws after three consecutive plan-guard rejections in the same tool instance', async () => {
const context = createMockContext();
const tool = createBuildWorkflowAgentTool(context) as unknown as BuildExecutable;
const first = await tool.execute({ task: 'one' });
expect(first.result).toMatch(/^STOP\./);
const second = await tool.execute({ task: 'two' });
expect(second.result).toMatch(/^STOP\./);
await expect(tool.execute({ task: 'three' })).rejects.toThrow(/looped on .* rejections/);
expect(context.logger.warn).toHaveBeenCalledWith(
'build-workflow-with-agent plan-guard rejection limit reached — aborting run',
expect.objectContaining({
threadId: 'test-thread',
rejectionCount: 3,
}),
);
});
it('counts mixed-cause rejections (missing bypassPlan / workflowId / reason) toward the same limit', async () => {
const context = createMockContext();
const tool = createBuildWorkflowAgentTool(context) as unknown as BuildExecutable;
// 1: missing bypassPlan
await tool.execute({ task: 'one' });
// 2: bypassPlan=true but missing workflowId
await tool.execute({
task: 'two',
bypassPlan: true,
reason: 'patch a thing',
});
// 3: bypassPlan=true + workflowId but missing reason — throws
await expect(
tool.execute({
task: 'three',
bypassPlan: true,
workflowId: 'WF_EXISTING',
}),
).rejects.toThrow(/looped on .* rejections/);
});
it('resets the rejection counter when a call gets past the guard', async () => {
const context = createMockContext({
domainContext: createMockDomainContext({ updateWorkflow: 'always_allow' }),
});
const tool = createBuildWorkflowAgentTool(context) as unknown as BuildExecutable;
// Two rejections — counter at 2.
await tool.execute({ task: 'one' });
await tool.execute({ task: 'two' });
// A valid bypassPlan call gets past the guard (startBuildWorkflowAgentTask
// short-circuits on missing spawnBackgroundTask, but the counter resets
// BEFORE that call, so the counter is back to 0).
const past = await tool.execute({
task: 'edit an existing workflow',
workflowId: 'WF_EXISTING',
bypassPlan: true,
reason: 'Swap Slack channel on this notifier.',
});
expect(past.result).not.toMatch(/^STOP\./);
// Two more rejections — the counter restarts from 0, so this must NOT throw.
const fourth = await tool.execute({ task: 'three' });
expect(fourth.result).toMatch(/^STOP\./);
const fifth = await tool.execute({ task: 'four' });
expect(fifth.result).toMatch(/^STOP\./);
});
it('keeps the rejection counter per tool instance (one orchestrator run = one counter)', async () => {
const contextA = createMockContext();
const toolA = createBuildWorkflowAgentTool(contextA) as unknown as BuildExecutable;
await toolA.execute({ task: 'a-one' });
await toolA.execute({ task: 'a-two' });
// A different tool instance must start with a fresh counter.
const contextB = createMockContext();
const toolB = createBuildWorkflowAgentTool(contextB) as unknown as BuildExecutable;
const out = await toolB.execute({ task: 'b-one' });
expect(out.result).toMatch(/^STOP\./);
// And toolA, which is at 2, throws on its next call as expected.
await expect(toolA.execute({ task: 'a-three' })).rejects.toThrow(/looped on .* rejections/);
});
it('does not increment the counter or throw when the guard is disabled by env flag', async () => {
process.env.N8N_INSTANCE_AI_ENFORCE_BUILD_VIA_PLAN = 'false';
const context = createMockContext();
const tool = createBuildWorkflowAgentTool(context) as unknown as BuildExecutable;
// Many calls with input shapes that would otherwise trip the guard — none throw.
for (let i = 0; i < 5; i++) {
const out = await tool.execute({ task: `call-${i}` });
expect(out.result).not.toMatch(/^STOP\./);
}
});
it('does not increment the counter when the run is a replan follow-up', async () => {
// Counter only advances when the guard branch actually rejects. A replan
// follow-up skips the guard, so the counter stays at zero and a later
// rejection-prone call sees a fresh counter.
const context = createMockContext({ isReplanFollowUp: true });
const tool = createBuildWorkflowAgentTool(context) as unknown as BuildExecutable;
// Many follow-up bypasses — none should throw.
for (let i = 0; i < 5; i++) {
const out = await tool.execute({ task: `replan-${i}` });
expect(out.result).not.toMatch(/^STOP\./);
}
});
});

View File

@ -11,6 +11,7 @@ import { Agent } from '@mastra/core/agent';
import type { ToolsInput } from '@mastra/core/agent';
import { createTool } from '@mastra/core/tools';
import { generateWorkflowCode } from '@n8n/workflow-sdk';
import { UserError } from 'n8n-workflow';
import { nanoid } from 'nanoid';
import { createHash, randomUUID } from 'node:crypto';
import { z } from 'zod';
@ -1621,6 +1622,14 @@ function isBuildViaPlanGuardEnabled(): boolean {
return raw.toLowerCase() !== 'false' && raw !== '0';
}
/**
* If the LLM ignores plan-guard rejections and retries the same direct call, the
* orchestrator can burn its entire step budget (and the eval CLI's per-build timeout)
* looping. After this many consecutive rejections in a single tool instance we abort
* the run with a UserError so the loop surfaces as a clean failure instead of a stall. See INS-242.
*/
const PLAN_GUARD_REJECTION_LIMIT = 3;
async function resolveWorkflowNameForEditConfirmation(
context: OrchestrationContext,
workflowId: string,
@ -1635,6 +1644,25 @@ async function resolveWorkflowNameForEditConfirmation(
}
export function createBuildWorkflowAgentTool(context: OrchestrationContext) {
let consecutivePlanGuardRejections = 0;
const rejectWithGuard = (message: string): { result: string; taskId: string } => {
consecutivePlanGuardRejections += 1;
if (consecutivePlanGuardRejections >= PLAN_GUARD_REJECTION_LIMIT) {
context.logger.warn(
'build-workflow-with-agent plan-guard rejection limit reached — aborting run',
{
threadId: context.threadId,
rejectionCount: consecutivePlanGuardRejections,
},
);
throw new UserError(
'Stopped: the agent looped on `build-workflow-with-agent` rejections without correcting them. Try again or rephrase the request.',
);
}
return { result: message, taskId: '' };
};
return createTool({
id: 'build-workflow-with-agent',
description:
@ -1664,34 +1692,19 @@ export function createBuildWorkflowAgentTool(context: OrchestrationContext) {
hasWorkflowId: Boolean(input.workflowId),
},
);
return {
result:
'Error: direct builder calls require `bypassPlan: true` + an existing ' +
'`workflowId` + a one-sentence `reason`. Use that combination for any edit to ' +
'an existing workflow. For new workflows, multi-workflow builds, or data-table ' +
'schema changes, call `plan` with a `build-workflow` task instead — the planner ' +
'discovers credentials, data tables, and best practices, and schedules an ' +
'orchestrator-run verification checkpoint.',
taskId: '',
};
return rejectWithGuard(
'STOP. Call `plan` with a `build-workflow` task — do NOT retry this tool. Direct builder calls outside a plan are only allowed for edits to an existing workflow (`bypassPlan: true` + `workflowId` + one-sentence `reason`).',
);
}
if (!input.workflowId) {
return {
result:
'Error: `bypassPlan: true` is for edits to an EXISTING workflow and requires a ' +
'`workflowId`. New workflow builds must go through `plan` so an orchestrator-run ' +
'verification checkpoint is scheduled. Call `plan` with a `build-workflow` task ' +
'instead.',
taskId: '',
};
return rejectWithGuard(
'STOP. `bypassPlan: true` is for edits to an EXISTING workflow only. For new workflows call `plan` with a `build-workflow` task — do NOT retry this tool without a `workflowId`.',
);
}
if (!input.reason || input.reason.trim().length === 0) {
return {
result:
'Error: `bypassPlan: true` requires a one-sentence `reason` describing the edit ' +
'(e.g. "swap Slack channel", "fix Code node shape issue").',
taskId: '',
};
return rejectWithGuard(
'STOP. `bypassPlan: true` requires a one-sentence `reason` (e.g. "swap Slack channel"). Retry once with `reason` set; do not retry without it.',
);
}
context.logger.warn('build-workflow-with-agent bypassing plan with bypassPlan=true', {
threadId: context.threadId,
@ -1739,6 +1752,7 @@ export function createBuildWorkflowAgentTool(context: OrchestrationContext) {
}
}
consecutivePlanGuardRejections = 0;
const result = await startBuildWorkflowAgentTask(context, input);
return { result: result.result, taskId: result.taskId };
},