fix(core): Avoid unnecessary planner credential prompts (#30451)

2026-05-27 23:07:12 +02:00 · 2026-05-14 17:36:11 +02:00 · 2026-05-14 17:36:11 +02:00 · f1fd79f830
commit f1fd79f830
parent 94113f44a5
7 changed files with 169 additions and 10 deletions
--- a/packages/@n8n/instance-ai/evaluations/tests/data-discovery.test.ts
+++ b/packages/@n8n/instance-ai/evaluations/tests/data-discovery.test.ts
@ -23,6 +23,7 @@ describe('loadDiscoveryTestCasesWithFiles', () => {
 				'screenshot-dashboard',
 				'http-node-config-no-browser',
 				'oauth-with-computer-use-disabled',
+				'planner-no-credential-ask',
 			]),
 		);
 	});
@ -33,6 +34,7 @@ describe('loadDiscoveryTestCasesWithFiles', () => {
 		'screenshot-dashboard',
 		'http-node-config-no-browser',
 		'oauth-with-computer-use-disabled',
+		'planner-no-credential-ask',
 	])('%s parses with a valid expectedToolInvocations rule', (slug) => {
 		const entry = cases.find((c) => c.fileSlug === slug);
 		expect(entry).toBeDefined();
--- a/packages/@n8n/instance-ai/evaluations/data/discovery/planner-no-credential-ask.json
+++ b/packages/@n8n/instance-ai/evaluations/data/discovery/planner-no-credential-ask.json
@ -0,0 +1,18 @@
+{
+	"id": "planner-no-credential-ask",
+	"userMessage": "Build a workflow that lets customers request a meeting time in natural language, parses the preferred time with OpenAI, creates a Google Calendar event, and sends a Gmail confirmation email.",
+	"expectedToolInvocations": {
+		"anyOf": ["plan", "spawn_sub_agent:planner"],
+		"noneOfToolCalls": [
+			{
+				"toolName": "ask-user",
+				"argsContainAny": ["credential", "credentials", "account", "auth", "api key", "token"]
+			},
+			{
+				"toolName": "ask-user",
+				"argsContainAny": ["timezone", "time zone"]
+			}
+		]
+	},
+	"rationale": "Regression coverage for INS-204. A fresh workflow plan may use credentials(action=\"list\") for discovery, but planning must not ask the user which credential/account to use when the builder can auto-select or mock unresolved credentials. The planner should also use contextual timezone data rather than asking for it."
+}
--- a/packages/@n8n/instance-ai/evaluations/discovery/tests/expected-tools-invoked.test.ts
+++ b/packages/@n8n/instance-ai/evaluations/discovery/tests/expected-tools-invoked.test.ts
@ -3,7 +3,7 @@ import { runExpectedToolsInvokedCheck } from '../expected-tools-invoked';
 import type { DiscoveryTestCase } from '../types';

 function makeOutcome(opts: {
-	toolCalls?: Array<Pick<CapturedToolCall, 'toolName'>>;
+	toolCalls?: Array<Pick<CapturedToolCall, 'toolName'> & Partial<Pick<CapturedToolCall, 'args'>>>;
 	agents?: Array<Pick<AgentActivity, 'role' | 'tools'>>;
 }): EventOutcome {
 	return {
@ -14,7 +14,7 @@ function makeOutcome(opts: {
 		toolCalls: (opts.toolCalls ?? []).map((tc, i) => ({
 			toolCallId: `call-${i}`,
 			toolName: tc.toolName,
-			args: {},
+			args: tc.args ?? {},
 			durationMs: 0,
 		})),
 		agentActivities: (opts.agents ?? []).map((a, i) => ({
@ -175,6 +175,65 @@ describe('runExpectedToolsInvokedCheck', () => {
 		});
 	});

+	describe('noneOfToolCalls — actual tool-call guard', () => {
+		const plannerScenario: DiscoveryTestCase = {
+			id: 'test',
+			userMessage: 'Build a Gmail and Calendar workflow',
+			expectedToolInvocations: {
+				anyOf: ['plan', 'spawn_sub_agent:planner'],
+				noneOfToolCalls: [{ toolName: 'ask-user', argsContainAny: ['credential'] }],
+			},
+		};
+
+		it('passes when a spawned planner has ask-user available but does not call it', () => {
+			const result = runExpectedToolsInvokedCheck(
+				plannerScenario,
+				makeOutcome({
+					toolCalls: [{ toolName: 'plan' }],
+					agents: [{ role: 'planner', tools: ['credentials', 'ask-user'] }],
+				}),
+			);
+
+			expect(result.pass).toBe(true);
+			expect(result.invokedTools).toContain('ask-user');
+		});
+
+		it('fails when the forbidden tool call happens with matching args', () => {
+			const result = runExpectedToolsInvokedCheck(
+				plannerScenario,
+				makeOutcome({
+					toolCalls: [
+						{ toolName: 'plan' },
+						{
+							toolName: 'ask-user',
+							args: { question: 'Which Google Calendar credential should I use?' },
+						},
+					],
+					agents: [{ role: 'planner', tools: ['credentials', 'ask-user'] }],
+				}),
+			);
+
+			expect(result.pass).toBe(false);
+			expect(result.comment).toContain('Expected no actual tool call matching');
+			expect(result.comment).toContain('credential');
+		});
+
+		it('passes when the same tool is called for unrelated args', () => {
+			const result = runExpectedToolsInvokedCheck(
+				plannerScenario,
+				makeOutcome({
+					toolCalls: [
+						{ toolName: 'plan' },
+						{ toolName: 'ask-user', args: { question: 'Which failure branch should run?' } },
+					],
+					agents: [{ role: 'planner', tools: ['credentials', 'ask-user'] }],
+				}),
+			);
+
+			expect(result.pass).toBe(true);
+		});
+	});
+
 	describe('rule validation', () => {
 		it('throws when neither anyOf nor noneOf is provided', () => {
 			expect(() =>
--- a/packages/@n8n/instance-ai/evaluations/discovery/expected-tools-invoked.ts
+++ b/packages/@n8n/instance-ai/evaluations/discovery/expected-tools-invoked.ts
@ -18,7 +18,12 @@
 // ---------------------------------------------------------------------------

 import type { EventOutcome } from '../types';
-import type { DiscoveryCheckResult, DiscoveryTestCase, ExpectedToolInvocations } from './types';
+import type {
+	DiscoveryCheckResult,
+	DiscoveryTestCase,
+	ExpectedToolInvocations,
+	ForbiddenToolCall,
+} from './types';

 const SPAWN_PREFIX = 'spawn_sub_agent:';

@ -52,11 +57,35 @@ function matches(name: string, invokedTools: string[], spawnedAgents: string[]):
 function validateRule(rule: ExpectedToolInvocations): void {
 	const hasAnyOf = Array.isArray(rule.anyOf) && rule.anyOf.length > 0;
 	const hasNoneOf = Array.isArray(rule.noneOf) && rule.noneOf.length > 0;
-	if (!hasAnyOf && !hasNoneOf) {
-		throw new Error('expectedToolInvocations must specify a non-empty `anyOf` or `noneOf` list');
+	const hasNoneOfToolCalls = Array.isArray(rule.noneOfToolCalls) && rule.noneOfToolCalls.length > 0;
+	if (!hasAnyOf && !hasNoneOf && !hasNoneOfToolCalls) {
+		throw new Error(
+			'expectedToolInvocations must specify a non-empty `anyOf`, `noneOf`, or `noneOfToolCalls` list',
+		);
 	}
 }

+function toolCallMatchesExpectation(
+	toolCall: EventOutcome['toolCalls'][number],
+	expectation: ForbiddenToolCall,
+): boolean {
+	if (toolCall.toolName !== expectation.toolName) return false;
+
+	const argsContainAny = expectation.argsContainAny ?? [];
+	if (argsContainAny.length === 0) return true;
+
+	const argsText = JSON.stringify(toolCall.args).toLowerCase();
+	return argsContainAny.some((term) => argsText.includes(term.toLowerCase()));
+}
+
+function formatForbiddenToolCall(expectation: ForbiddenToolCall): string {
+	const args =
+		expectation.argsContainAny && expectation.argsContainAny.length > 0
+			? ` with args containing one of [${expectation.argsContainAny.join(', ')}]`
+			: '';
+	return `${expectation.toolName}${args}`;
+}
+
 export function runExpectedToolsInvokedCheck(
 	scenario: DiscoveryTestCase,
 	outcome: EventOutcome,
@ -66,7 +95,7 @@ export function runExpectedToolsInvokedCheck(
 	const invokedTools = collectInvokedTools(outcome);
 	const spawnedAgents = collectSpawnedAgents(outcome);

-	const { anyOf, noneOf } = scenario.expectedToolInvocations;
+	const { anyOf, noneOf, noneOfToolCalls } = scenario.expectedToolInvocations;

 	if (anyOf && anyOf.length > 0) {
 		const matched = anyOf.find((name) => matches(name, invokedTools, spawnedAgents));
@ -92,6 +121,22 @@ export function runExpectedToolsInvokedCheck(
 		}
 	}

+	if (noneOfToolCalls && noneOfToolCalls.length > 0) {
+		for (const expectation of noneOfToolCalls) {
+			const violated = outcome.toolCalls.find((toolCall) =>
+				toolCallMatchesExpectation(toolCall, expectation),
+			);
+			if (violated) {
+				return {
+					pass: false,
+					comment: `Expected no actual tool call matching [${formatForbiddenToolCall(expectation)}], but saw ${violated.toolName} with args ${JSON.stringify(violated.args)}.`,
+					invokedTools,
+					spawnedAgents,
+				};
+			}
+		}
+	}
+
 	return {
 		pass: true,
 		comment: 'Discovery expectation satisfied.',
--- a/packages/@n8n/instance-ai/evaluations/discovery/types.ts
+++ b/packages/@n8n/instance-ai/evaluations/discovery/types.ts
@ -17,13 +17,23 @@ import type { LocalGatewayStatus } from '../../src/types';
 *   (top-level orchestrator call, or via a spawned sub-agent's tool list).
 * - `noneOf` — pass only if NONE of the listed tool names was invoked.
 *   Used for negative scenarios that guard against over-eager invocation.
+ * - `noneOfToolCalls` — pass only if NONE of the listed tool calls happened.
+ *   Unlike `noneOf`, this checks actual tool calls only; a spawned sub-agent
+ *   having the tool available does not count as a violation.
 *
 * Both forms accept a sub-agent role prefix `spawn_sub_agent:<role>` to match
 * an `agent-spawned` event whose role equals `<role>`.
 */
+export interface ForbiddenToolCall {
+	toolName: string;
+	/** Optional case-insensitive substrings to match against the serialized tool args. */
+	argsContainAny?: string[];
+}
+
 export interface ExpectedToolInvocations {
 	anyOf?: string[];
 	noneOf?: string[];
+	noneOfToolCalls?: ForbiddenToolCall[];
 }

 /**
@ -43,7 +53,7 @@ export interface DiscoveryTestCase {
 	userMessage: string;
 	/** Optional instance state overrides applied when constructing the agent. */
 	instanceState?: DiscoveryInstanceState;
-	/** Pass condition. Exactly one of the form keys (`anyOf` / `noneOf`) is required. */
+	/** Pass condition. At least one expectation key is required. */
 	expectedToolInvocations: ExpectedToolInvocations;
 	/** Free-form note explaining what regression this scenario protects against. */
 	rationale?: string;
--- a/packages/@n8n/instance-ai/src/tools/orchestration/tests/credential-guardrails.prompt.test.ts
+++ b/packages/@n8n/instance-ai/src/tools/orchestration/tests/credential-guardrails.prompt.test.ts
@ -33,14 +33,33 @@ describe('credential guardrail prompts', () => {
 		);
 	});

-	it('tells the planner to ask when a required service has more than one credential of the same type', () => {
+	it('tells the planner not to block planning on credential selection', () => {
+		expect(PLANNER_AGENT_PROMPT).toContain('Handle credentials without blocking planning');
+		expect(PLANNER_AGENT_PROMPT).toContain('If the user already named a credential');
+		expect(PLANNER_AGENT_PROMPT).toContain('If there is exactly one matching credential');
+		expect(PLANNER_AGENT_PROMPT).toContain('auto-select it, do not ask');
+		expect(PLANNER_AGENT_PROMPT).toContain('If there are no matching credentials, do not ask');
+		expect(PLANNER_AGENT_PROMPT).toContain('builder will use a mocked or unresolved credential');
 		expect(PLANNER_AGENT_PROMPT).toContain(
-			'Do ask when a required service has more than one credential of the same type',
+			'If there is more than one credential of the same required type',
 		);
+		expect(PLANNER_AGENT_PROMPT).toContain('ask once with a single-select');
 		expect(PLANNER_AGENT_PROMPT).toContain('cannot be discovered, only chosen');
+		expect(PLANNER_AGENT_PROMPT).toContain('credential-backed resource investigation');
+		expect(PLANNER_AGENT_PROMPT).toContain('Do not turn that into a credential-choice question');
 		expect(PLANNER_AGENT_PROMPT).toContain('Record the chosen credential name in `assumptions`');
 	});

+	it('tells the planner to use the contextual timezone before asking', () => {
+		expect(PLANNER_AGENT_PROMPT).toContain(
+			"Never ask for the user's timezone when `<user-timezone>` is present",
+		);
+		expect(PLANNER_AGENT_PROMPT).toContain('use `<current-datetime>` / `<user-timezone>`');
+		expect(PLANNER_AGENT_PROMPT).toContain(
+			'Only ask if timezone is missing and a date or schedule cannot be interpreted safely',
+		);
+	});
+
 	it('tells the builder to wrap ambiguous resource matches with placeholder()', () => {
 		// Both prompts inline PLACEHOLDERS_RULE, which now covers the multi-match case.
 		const sharedRule = '**Resource IDs with more than one candidate**';
--- a/packages/@n8n/instance-ai/src/tools/orchestration/plan-agent-prompt.ts
+++ b/packages/@n8n/instance-ai/src/tools/orchestration/plan-agent-prompt.ts
@ -19,10 +19,16 @@ ${SUBAGENT_OUTPUT_CONTRACT}
 1. **Prefer assumptions over questions.** The user is waiting for a plan, and they can reject it if your assumptions are wrong — so default to making reasonable choices rather than asking.
   - **Never ask about things you can discover** — call \`credentials(action="list")\`, \`data-tables(action="list")\`, \`templates(action="best-practices")\` instead.
   - **Never ask about implementation details** — trigger types, node choices, schedule times, column names. Pick sensible defaults.
+   - **Never ask for the user's timezone when \`<user-timezone>\` is present** — use \`<current-datetime>\` / \`<user-timezone>\`. Only ask if timezone is missing and a date or schedule cannot be interpreted safely.
   - **Never default resource identifiers** the user didn't mention (Slack channels, calendars, spreadsheets, folders, etc.) — leave them for the builder to resolve at build time.
   - **Trust already-collected briefing context** — if the briefing includes an Already-collected answers or Already-discovered resources section, treat those entries as authoritative. Do not ask again for purpose, trigger, integrations, schedule, model, resource, or credential choices already listed there.
   - **Do ask when the answer would significantly change the plan** — e.g. the user's goal is ambiguous ("build me a CRM" — for sales? support? recruiting?), or a business rule must come from the user ("what should happen when payment fails?").
-   - **Do ask when a required service has more than one credential of the same type** (e.g. two \`openAiApi\` accounts, three Google Calendar accounts) — which one to use cannot be discovered, only chosen. Record the chosen credential name in \`assumptions\`.
+   - **Handle credentials without blocking planning.** Call \`credentials(action="list")\` for external services, then apply these cases:
+     - If the user already named a credential in their request, use it directly and record the credential name in \`assumptions\`.
+     - If there is exactly one matching credential for a required type, auto-select it, do not ask, and record the credential name in \`assumptions\`.
+     - If there are no matching credentials, do not ask; plan normally and note that the builder will use a mocked or unresolved credential and route setup after verification.
+     - If there is more than one credential of the same required type and the user did not name one, ask once with a single-select because the choice cannot be discovered, only chosen. Record the chosen credential name in \`assumptions\`.
+   - **Use credential-backed resource investigation only when it changes the plan.** You may call \`credentials(action="list")\` so a later resource lookup can validate a resource that affects the architecture (for example checking whether a named Slack channel exists). Do not turn that into a credential-choice question unless the multiple-credentials rule above applies.
   - **List your assumptions** on your first \`add-plan-item\` call. The user reviews the plan before execution and can reject/correct.

 2. **Discover** — check what exists and learn best practices. Expect 3–6 tool calls for a typical request: