mirror of
https://github.com/n8n-io/n8n.git
synced 2026-05-12 16:10:30 +02:00
fix(core): Abort orchestrator run after repeated plan-guard rejections (no-changelog) (#30274)
This commit is contained in:
parent
f0649e0a3d
commit
0ce820de73
|
|
@ -821,25 +821,23 @@ describe('createBuildWorkflowAgentTool — plan-enforcement guard', () => {
|
|||
}
|
||||
});
|
||||
|
||||
it('rejects direct calls outside a replan/checkpoint follow-up', async () => {
|
||||
it('rejects direct calls outside a replan/checkpoint follow-up with an imperative STOP message', async () => {
|
||||
const context = createMockContext();
|
||||
const tool = createBuildWorkflowAgentTool(context) as unknown as BuildExecutable;
|
||||
|
||||
const out = await tool.execute({ task: 'Build a Slack notifier' });
|
||||
|
||||
expect(out.taskId).toBe('');
|
||||
expect(out.result).toContain('bypassPlan');
|
||||
expect(out.result).toMatch(
|
||||
/For new workflows, multi-workflow builds, or data-table schema changes/,
|
||||
);
|
||||
expect(out.result).toMatch(/^STOP\./);
|
||||
expect(out.result).toContain('`plan`');
|
||||
expect(out.result).toContain('bypassPlan');
|
||||
expect(context.logger.warn).toHaveBeenCalledWith(
|
||||
'build-workflow-with-agent called outside plan/replan context — rejecting',
|
||||
expect.objectContaining({ threadId: 'test-thread' }),
|
||||
);
|
||||
});
|
||||
|
||||
it('rejects bypassPlan=true without a workflowId (new builds must go through plan)', async () => {
|
||||
it('rejects bypassPlan=true without a workflowId with an imperative STOP message', async () => {
|
||||
const context = createMockContext();
|
||||
const tool = createBuildWorkflowAgentTool(context) as unknown as BuildExecutable;
|
||||
|
||||
|
|
@ -850,10 +848,13 @@ describe('createBuildWorkflowAgentTool — plan-enforcement guard', () => {
|
|||
});
|
||||
|
||||
expect(out.taskId).toBe('');
|
||||
expect(out.result).toMatch(/edits to an EXISTING workflow and requires a `workflowId`/);
|
||||
expect(out.result).toMatch(/^STOP\./);
|
||||
expect(out.result).toMatch(/EXISTING workflow/);
|
||||
expect(out.result).toContain('`workflowId`');
|
||||
expect(out.result).toContain('`plan`');
|
||||
});
|
||||
|
||||
it('rejects bypassPlan=true without a reason', async () => {
|
||||
it('rejects bypassPlan=true without a reason with an imperative STOP message', async () => {
|
||||
const context = createMockContext();
|
||||
const tool = createBuildWorkflowAgentTool(context) as unknown as BuildExecutable;
|
||||
|
||||
|
|
@ -864,7 +865,8 @@ describe('createBuildWorkflowAgentTool — plan-enforcement guard', () => {
|
|||
});
|
||||
|
||||
expect(out.taskId).toBe('');
|
||||
expect(out.result).toContain('requires a one-sentence `reason`');
|
||||
expect(out.result).toMatch(/^STOP\./);
|
||||
expect(out.result).toContain('`reason`');
|
||||
});
|
||||
|
||||
it('allows the call when bypassPlan=true with a reason is provided', async () => {
|
||||
|
|
@ -883,7 +885,7 @@ describe('createBuildWorkflowAgentTool — plan-enforcement guard', () => {
|
|||
// Guard passes → reaches startBuildWorkflowAgentTask, which short-circuits on
|
||||
// missing spawnBackgroundTask. The point is we got past the guard, not what
|
||||
// the downstream does.
|
||||
expect(out.result).not.toMatch(/`bypassPlan: true` is for edits/);
|
||||
expect(out.result).not.toMatch(/^STOP\./);
|
||||
const warnMock = context.logger.warn as jest.Mock<void, [string, Record<string, unknown>?]>;
|
||||
expect(warnMock.mock.calls.some((c) => c[0].includes('bypassing plan'))).toBe(true);
|
||||
});
|
||||
|
|
@ -894,7 +896,7 @@ describe('createBuildWorkflowAgentTool — plan-enforcement guard', () => {
|
|||
|
||||
const out = await tool.execute({ task: 'retry after failure' });
|
||||
|
||||
expect(out.result).not.toContain('direct builder calls require');
|
||||
expect(out.result).not.toMatch(/^STOP\./);
|
||||
expect(context.logger.warn).not.toHaveBeenCalledWith(
|
||||
'build-workflow-with-agent called outside plan/replan context — rejecting',
|
||||
expect.anything(),
|
||||
|
|
@ -907,7 +909,7 @@ describe('createBuildWorkflowAgentTool — plan-enforcement guard', () => {
|
|||
|
||||
const out = await tool.execute({ task: 'checkpoint branch' });
|
||||
|
||||
expect(out.result).not.toContain('direct builder calls require');
|
||||
expect(out.result).not.toMatch(/^STOP\./);
|
||||
});
|
||||
|
||||
it('skips the guard when the env flag is disabled', async () => {
|
||||
|
|
@ -917,7 +919,124 @@ describe('createBuildWorkflowAgentTool — plan-enforcement guard', () => {
|
|||
|
||||
const out = await tool.execute({ task: 'build directly' });
|
||||
|
||||
expect(out.result).not.toContain('direct builder calls require');
|
||||
expect(out.result).not.toContain('STOP');
|
||||
});
|
||||
|
||||
// If the LLM keeps retrying the same rejected call, the tool throws after
|
||||
// PLAN_GUARD_REJECTION_LIMIT (3) consecutive rejections so the loop surfaces
|
||||
// as a clean failure instead of a stall.
|
||||
it('throws after three consecutive plan-guard rejections in the same tool instance', async () => {
|
||||
const context = createMockContext();
|
||||
const tool = createBuildWorkflowAgentTool(context) as unknown as BuildExecutable;
|
||||
|
||||
const first = await tool.execute({ task: 'one' });
|
||||
expect(first.result).toMatch(/^STOP\./);
|
||||
|
||||
const second = await tool.execute({ task: 'two' });
|
||||
expect(second.result).toMatch(/^STOP\./);
|
||||
|
||||
await expect(tool.execute({ task: 'three' })).rejects.toThrow(/looped on .* rejections/);
|
||||
expect(context.logger.warn).toHaveBeenCalledWith(
|
||||
'build-workflow-with-agent plan-guard rejection limit reached — aborting run',
|
||||
expect.objectContaining({
|
||||
threadId: 'test-thread',
|
||||
rejectionCount: 3,
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
it('counts mixed-cause rejections (missing bypassPlan / workflowId / reason) toward the same limit', async () => {
|
||||
const context = createMockContext();
|
||||
const tool = createBuildWorkflowAgentTool(context) as unknown as BuildExecutable;
|
||||
|
||||
// 1: missing bypassPlan
|
||||
await tool.execute({ task: 'one' });
|
||||
// 2: bypassPlan=true but missing workflowId
|
||||
await tool.execute({
|
||||
task: 'two',
|
||||
bypassPlan: true,
|
||||
reason: 'patch a thing',
|
||||
});
|
||||
// 3: bypassPlan=true + workflowId but missing reason — throws
|
||||
await expect(
|
||||
tool.execute({
|
||||
task: 'three',
|
||||
bypassPlan: true,
|
||||
workflowId: 'WF_EXISTING',
|
||||
}),
|
||||
).rejects.toThrow(/looped on .* rejections/);
|
||||
});
|
||||
|
||||
it('resets the rejection counter when a call gets past the guard', async () => {
|
||||
const context = createMockContext({
|
||||
domainContext: createMockDomainContext({ updateWorkflow: 'always_allow' }),
|
||||
});
|
||||
const tool = createBuildWorkflowAgentTool(context) as unknown as BuildExecutable;
|
||||
|
||||
// Two rejections — counter at 2.
|
||||
await tool.execute({ task: 'one' });
|
||||
await tool.execute({ task: 'two' });
|
||||
|
||||
// A valid bypassPlan call gets past the guard (startBuildWorkflowAgentTask
|
||||
// short-circuits on missing spawnBackgroundTask, but the counter resets
|
||||
// BEFORE that call, so the counter is back to 0).
|
||||
const past = await tool.execute({
|
||||
task: 'edit an existing workflow',
|
||||
workflowId: 'WF_EXISTING',
|
||||
bypassPlan: true,
|
||||
reason: 'Swap Slack channel on this notifier.',
|
||||
});
|
||||
expect(past.result).not.toMatch(/^STOP\./);
|
||||
|
||||
// Two more rejections — the counter restarts from 0, so this must NOT throw.
|
||||
const fourth = await tool.execute({ task: 'three' });
|
||||
expect(fourth.result).toMatch(/^STOP\./);
|
||||
const fifth = await tool.execute({ task: 'four' });
|
||||
expect(fifth.result).toMatch(/^STOP\./);
|
||||
});
|
||||
|
||||
it('keeps the rejection counter per tool instance (one orchestrator run = one counter)', async () => {
|
||||
const contextA = createMockContext();
|
||||
const toolA = createBuildWorkflowAgentTool(contextA) as unknown as BuildExecutable;
|
||||
|
||||
await toolA.execute({ task: 'a-one' });
|
||||
await toolA.execute({ task: 'a-two' });
|
||||
|
||||
// A different tool instance must start with a fresh counter.
|
||||
const contextB = createMockContext();
|
||||
const toolB = createBuildWorkflowAgentTool(contextB) as unknown as BuildExecutable;
|
||||
|
||||
const out = await toolB.execute({ task: 'b-one' });
|
||||
expect(out.result).toMatch(/^STOP\./);
|
||||
|
||||
// And toolA, which is at 2, throws on its next call as expected.
|
||||
await expect(toolA.execute({ task: 'a-three' })).rejects.toThrow(/looped on .* rejections/);
|
||||
});
|
||||
|
||||
it('does not increment the counter or throw when the guard is disabled by env flag', async () => {
|
||||
process.env.N8N_INSTANCE_AI_ENFORCE_BUILD_VIA_PLAN = 'false';
|
||||
const context = createMockContext();
|
||||
const tool = createBuildWorkflowAgentTool(context) as unknown as BuildExecutable;
|
||||
|
||||
// Many calls with input shapes that would otherwise trip the guard — none throw.
|
||||
for (let i = 0; i < 5; i++) {
|
||||
const out = await tool.execute({ task: `call-${i}` });
|
||||
expect(out.result).not.toMatch(/^STOP\./);
|
||||
}
|
||||
});
|
||||
|
||||
it('does not increment the counter when the run is a replan follow-up', async () => {
|
||||
// Counter only advances when the guard branch actually rejects. A replan
|
||||
// follow-up skips the guard, so the counter stays at zero and a later
|
||||
// rejection-prone call sees a fresh counter.
|
||||
const context = createMockContext({ isReplanFollowUp: true });
|
||||
const tool = createBuildWorkflowAgentTool(context) as unknown as BuildExecutable;
|
||||
|
||||
// Many follow-up bypasses — none should throw.
|
||||
for (let i = 0; i < 5; i++) {
|
||||
const out = await tool.execute({ task: `replan-${i}` });
|
||||
expect(out.result).not.toMatch(/^STOP\./);
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
|
|
|
|||
|
|
@ -11,6 +11,7 @@ import { Agent } from '@mastra/core/agent';
|
|||
import type { ToolsInput } from '@mastra/core/agent';
|
||||
import { createTool } from '@mastra/core/tools';
|
||||
import { generateWorkflowCode } from '@n8n/workflow-sdk';
|
||||
import { UserError } from 'n8n-workflow';
|
||||
import { nanoid } from 'nanoid';
|
||||
import { createHash, randomUUID } from 'node:crypto';
|
||||
import { z } from 'zod';
|
||||
|
|
@ -1621,6 +1622,14 @@ function isBuildViaPlanGuardEnabled(): boolean {
|
|||
return raw.toLowerCase() !== 'false' && raw !== '0';
|
||||
}
|
||||
|
||||
/**
|
||||
* If the LLM ignores plan-guard rejections and retries the same direct call, the
|
||||
* orchestrator can burn its entire step budget (and the eval CLI's per-build timeout)
|
||||
* looping. After this many consecutive rejections in a single tool instance we abort
|
||||
* the run with a UserError so the loop surfaces as a clean failure instead of a stall. See INS-242.
|
||||
*/
|
||||
const PLAN_GUARD_REJECTION_LIMIT = 3;
|
||||
|
||||
async function resolveWorkflowNameForEditConfirmation(
|
||||
context: OrchestrationContext,
|
||||
workflowId: string,
|
||||
|
|
@ -1635,6 +1644,25 @@ async function resolveWorkflowNameForEditConfirmation(
|
|||
}
|
||||
|
||||
export function createBuildWorkflowAgentTool(context: OrchestrationContext) {
|
||||
let consecutivePlanGuardRejections = 0;
|
||||
|
||||
const rejectWithGuard = (message: string): { result: string; taskId: string } => {
|
||||
consecutivePlanGuardRejections += 1;
|
||||
if (consecutivePlanGuardRejections >= PLAN_GUARD_REJECTION_LIMIT) {
|
||||
context.logger.warn(
|
||||
'build-workflow-with-agent plan-guard rejection limit reached — aborting run',
|
||||
{
|
||||
threadId: context.threadId,
|
||||
rejectionCount: consecutivePlanGuardRejections,
|
||||
},
|
||||
);
|
||||
throw new UserError(
|
||||
'Stopped: the agent looped on `build-workflow-with-agent` rejections without correcting them. Try again or rephrase the request.',
|
||||
);
|
||||
}
|
||||
return { result: message, taskId: '' };
|
||||
};
|
||||
|
||||
return createTool({
|
||||
id: 'build-workflow-with-agent',
|
||||
description:
|
||||
|
|
@ -1664,34 +1692,19 @@ export function createBuildWorkflowAgentTool(context: OrchestrationContext) {
|
|||
hasWorkflowId: Boolean(input.workflowId),
|
||||
},
|
||||
);
|
||||
return {
|
||||
result:
|
||||
'Error: direct builder calls require `bypassPlan: true` + an existing ' +
|
||||
'`workflowId` + a one-sentence `reason`. Use that combination for any edit to ' +
|
||||
'an existing workflow. For new workflows, multi-workflow builds, or data-table ' +
|
||||
'schema changes, call `plan` with a `build-workflow` task instead — the planner ' +
|
||||
'discovers credentials, data tables, and best practices, and schedules an ' +
|
||||
'orchestrator-run verification checkpoint.',
|
||||
taskId: '',
|
||||
};
|
||||
return rejectWithGuard(
|
||||
'STOP. Call `plan` with a `build-workflow` task — do NOT retry this tool. Direct builder calls outside a plan are only allowed for edits to an existing workflow (`bypassPlan: true` + `workflowId` + one-sentence `reason`).',
|
||||
);
|
||||
}
|
||||
if (!input.workflowId) {
|
||||
return {
|
||||
result:
|
||||
'Error: `bypassPlan: true` is for edits to an EXISTING workflow and requires a ' +
|
||||
'`workflowId`. New workflow builds must go through `plan` so an orchestrator-run ' +
|
||||
'verification checkpoint is scheduled. Call `plan` with a `build-workflow` task ' +
|
||||
'instead.',
|
||||
taskId: '',
|
||||
};
|
||||
return rejectWithGuard(
|
||||
'STOP. `bypassPlan: true` is for edits to an EXISTING workflow only. For new workflows call `plan` with a `build-workflow` task — do NOT retry this tool without a `workflowId`.',
|
||||
);
|
||||
}
|
||||
if (!input.reason || input.reason.trim().length === 0) {
|
||||
return {
|
||||
result:
|
||||
'Error: `bypassPlan: true` requires a one-sentence `reason` describing the edit ' +
|
||||
'(e.g. "swap Slack channel", "fix Code node shape issue").',
|
||||
taskId: '',
|
||||
};
|
||||
return rejectWithGuard(
|
||||
'STOP. `bypassPlan: true` requires a one-sentence `reason` (e.g. "swap Slack channel"). Retry once with `reason` set; do not retry without it.',
|
||||
);
|
||||
}
|
||||
context.logger.warn('build-workflow-with-agent bypassing plan with bypassPlan=true', {
|
||||
threadId: context.threadId,
|
||||
|
|
@ -1739,6 +1752,7 @@ export function createBuildWorkflowAgentTool(context: OrchestrationContext) {
|
|||
}
|
||||
}
|
||||
|
||||
consecutivePlanGuardRejections = 0;
|
||||
const result = await startBuildWorkflowAgentTask(context, input);
|
||||
return { result: result.result, taskId: result.taskId };
|
||||
},
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user