mirror of
https://github.com/n8n-io/n8n.git
synced 2026-05-27 23:07:12 +02:00
fix(core): Avoid unnecessary planner credential prompts (#30451)
This commit is contained in:
parent
94113f44a5
commit
f1fd79f830
|
|
@ -23,6 +23,7 @@ describe('loadDiscoveryTestCasesWithFiles', () => {
|
|||
'screenshot-dashboard',
|
||||
'http-node-config-no-browser',
|
||||
'oauth-with-computer-use-disabled',
|
||||
'planner-no-credential-ask',
|
||||
]),
|
||||
);
|
||||
});
|
||||
|
|
@ -33,6 +34,7 @@ describe('loadDiscoveryTestCasesWithFiles', () => {
|
|||
'screenshot-dashboard',
|
||||
'http-node-config-no-browser',
|
||||
'oauth-with-computer-use-disabled',
|
||||
'planner-no-credential-ask',
|
||||
])('%s parses with a valid expectedToolInvocations rule', (slug) => {
|
||||
const entry = cases.find((c) => c.fileSlug === slug);
|
||||
expect(entry).toBeDefined();
|
||||
|
|
|
|||
|
|
@ -0,0 +1,18 @@
|
|||
{
|
||||
"id": "planner-no-credential-ask",
|
||||
"userMessage": "Build a workflow that lets customers request a meeting time in natural language, parses the preferred time with OpenAI, creates a Google Calendar event, and sends a Gmail confirmation email.",
|
||||
"expectedToolInvocations": {
|
||||
"anyOf": ["plan", "spawn_sub_agent:planner"],
|
||||
"noneOfToolCalls": [
|
||||
{
|
||||
"toolName": "ask-user",
|
||||
"argsContainAny": ["credential", "credentials", "account", "auth", "api key", "token"]
|
||||
},
|
||||
{
|
||||
"toolName": "ask-user",
|
||||
"argsContainAny": ["timezone", "time zone"]
|
||||
}
|
||||
]
|
||||
},
|
||||
"rationale": "Regression coverage for INS-204. A fresh workflow plan may use credentials(action=\"list\") for discovery, but planning must not ask the user which credential/account to use when the builder can auto-select or mock unresolved credentials. The planner should also use contextual timezone data rather than asking for it."
|
||||
}
|
||||
|
|
@ -3,7 +3,7 @@ import { runExpectedToolsInvokedCheck } from '../expected-tools-invoked';
|
|||
import type { DiscoveryTestCase } from '../types';
|
||||
|
||||
function makeOutcome(opts: {
|
||||
toolCalls?: Array<Pick<CapturedToolCall, 'toolName'>>;
|
||||
toolCalls?: Array<Pick<CapturedToolCall, 'toolName'> & Partial<Pick<CapturedToolCall, 'args'>>>;
|
||||
agents?: Array<Pick<AgentActivity, 'role' | 'tools'>>;
|
||||
}): EventOutcome {
|
||||
return {
|
||||
|
|
@ -14,7 +14,7 @@ function makeOutcome(opts: {
|
|||
toolCalls: (opts.toolCalls ?? []).map((tc, i) => ({
|
||||
toolCallId: `call-${i}`,
|
||||
toolName: tc.toolName,
|
||||
args: {},
|
||||
args: tc.args ?? {},
|
||||
durationMs: 0,
|
||||
})),
|
||||
agentActivities: (opts.agents ?? []).map((a, i) => ({
|
||||
|
|
@ -175,6 +175,65 @@ describe('runExpectedToolsInvokedCheck', () => {
|
|||
});
|
||||
});
|
||||
|
||||
describe('noneOfToolCalls — actual tool-call guard', () => {
|
||||
const plannerScenario: DiscoveryTestCase = {
|
||||
id: 'test',
|
||||
userMessage: 'Build a Gmail and Calendar workflow',
|
||||
expectedToolInvocations: {
|
||||
anyOf: ['plan', 'spawn_sub_agent:planner'],
|
||||
noneOfToolCalls: [{ toolName: 'ask-user', argsContainAny: ['credential'] }],
|
||||
},
|
||||
};
|
||||
|
||||
it('passes when a spawned planner has ask-user available but does not call it', () => {
|
||||
const result = runExpectedToolsInvokedCheck(
|
||||
plannerScenario,
|
||||
makeOutcome({
|
||||
toolCalls: [{ toolName: 'plan' }],
|
||||
agents: [{ role: 'planner', tools: ['credentials', 'ask-user'] }],
|
||||
}),
|
||||
);
|
||||
|
||||
expect(result.pass).toBe(true);
|
||||
expect(result.invokedTools).toContain('ask-user');
|
||||
});
|
||||
|
||||
it('fails when the forbidden tool call happens with matching args', () => {
|
||||
const result = runExpectedToolsInvokedCheck(
|
||||
plannerScenario,
|
||||
makeOutcome({
|
||||
toolCalls: [
|
||||
{ toolName: 'plan' },
|
||||
{
|
||||
toolName: 'ask-user',
|
||||
args: { question: 'Which Google Calendar credential should I use?' },
|
||||
},
|
||||
],
|
||||
agents: [{ role: 'planner', tools: ['credentials', 'ask-user'] }],
|
||||
}),
|
||||
);
|
||||
|
||||
expect(result.pass).toBe(false);
|
||||
expect(result.comment).toContain('Expected no actual tool call matching');
|
||||
expect(result.comment).toContain('credential');
|
||||
});
|
||||
|
||||
it('passes when the same tool is called for unrelated args', () => {
|
||||
const result = runExpectedToolsInvokedCheck(
|
||||
plannerScenario,
|
||||
makeOutcome({
|
||||
toolCalls: [
|
||||
{ toolName: 'plan' },
|
||||
{ toolName: 'ask-user', args: { question: 'Which failure branch should run?' } },
|
||||
],
|
||||
agents: [{ role: 'planner', tools: ['credentials', 'ask-user'] }],
|
||||
}),
|
||||
);
|
||||
|
||||
expect(result.pass).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
describe('rule validation', () => {
|
||||
it('throws when neither anyOf nor noneOf is provided', () => {
|
||||
expect(() =>
|
||||
|
|
|
|||
|
|
@ -18,7 +18,12 @@
|
|||
// ---------------------------------------------------------------------------
|
||||
|
||||
import type { EventOutcome } from '../types';
|
||||
import type { DiscoveryCheckResult, DiscoveryTestCase, ExpectedToolInvocations } from './types';
|
||||
import type {
|
||||
DiscoveryCheckResult,
|
||||
DiscoveryTestCase,
|
||||
ExpectedToolInvocations,
|
||||
ForbiddenToolCall,
|
||||
} from './types';
|
||||
|
||||
const SPAWN_PREFIX = 'spawn_sub_agent:';
|
||||
|
||||
|
|
@ -52,11 +57,35 @@ function matches(name: string, invokedTools: string[], spawnedAgents: string[]):
|
|||
function validateRule(rule: ExpectedToolInvocations): void {
|
||||
const hasAnyOf = Array.isArray(rule.anyOf) && rule.anyOf.length > 0;
|
||||
const hasNoneOf = Array.isArray(rule.noneOf) && rule.noneOf.length > 0;
|
||||
if (!hasAnyOf && !hasNoneOf) {
|
||||
throw new Error('expectedToolInvocations must specify a non-empty `anyOf` or `noneOf` list');
|
||||
const hasNoneOfToolCalls = Array.isArray(rule.noneOfToolCalls) && rule.noneOfToolCalls.length > 0;
|
||||
if (!hasAnyOf && !hasNoneOf && !hasNoneOfToolCalls) {
|
||||
throw new Error(
|
||||
'expectedToolInvocations must specify a non-empty `anyOf`, `noneOf`, or `noneOfToolCalls` list',
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
function toolCallMatchesExpectation(
|
||||
toolCall: EventOutcome['toolCalls'][number],
|
||||
expectation: ForbiddenToolCall,
|
||||
): boolean {
|
||||
if (toolCall.toolName !== expectation.toolName) return false;
|
||||
|
||||
const argsContainAny = expectation.argsContainAny ?? [];
|
||||
if (argsContainAny.length === 0) return true;
|
||||
|
||||
const argsText = JSON.stringify(toolCall.args).toLowerCase();
|
||||
return argsContainAny.some((term) => argsText.includes(term.toLowerCase()));
|
||||
}
|
||||
|
||||
function formatForbiddenToolCall(expectation: ForbiddenToolCall): string {
|
||||
const args =
|
||||
expectation.argsContainAny && expectation.argsContainAny.length > 0
|
||||
? ` with args containing one of [${expectation.argsContainAny.join(', ')}]`
|
||||
: '';
|
||||
return `${expectation.toolName}${args}`;
|
||||
}
|
||||
|
||||
export function runExpectedToolsInvokedCheck(
|
||||
scenario: DiscoveryTestCase,
|
||||
outcome: EventOutcome,
|
||||
|
|
@ -66,7 +95,7 @@ export function runExpectedToolsInvokedCheck(
|
|||
const invokedTools = collectInvokedTools(outcome);
|
||||
const spawnedAgents = collectSpawnedAgents(outcome);
|
||||
|
||||
const { anyOf, noneOf } = scenario.expectedToolInvocations;
|
||||
const { anyOf, noneOf, noneOfToolCalls } = scenario.expectedToolInvocations;
|
||||
|
||||
if (anyOf && anyOf.length > 0) {
|
||||
const matched = anyOf.find((name) => matches(name, invokedTools, spawnedAgents));
|
||||
|
|
@ -92,6 +121,22 @@ export function runExpectedToolsInvokedCheck(
|
|||
}
|
||||
}
|
||||
|
||||
if (noneOfToolCalls && noneOfToolCalls.length > 0) {
|
||||
for (const expectation of noneOfToolCalls) {
|
||||
const violated = outcome.toolCalls.find((toolCall) =>
|
||||
toolCallMatchesExpectation(toolCall, expectation),
|
||||
);
|
||||
if (violated) {
|
||||
return {
|
||||
pass: false,
|
||||
comment: `Expected no actual tool call matching [${formatForbiddenToolCall(expectation)}], but saw ${violated.toolName} with args ${JSON.stringify(violated.args)}.`,
|
||||
invokedTools,
|
||||
spawnedAgents,
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
pass: true,
|
||||
comment: 'Discovery expectation satisfied.',
|
||||
|
|
|
|||
|
|
@ -17,13 +17,23 @@ import type { LocalGatewayStatus } from '../../src/types';
|
|||
* (top-level orchestrator call, or via a spawned sub-agent's tool list).
|
||||
* - `noneOf` — pass only if NONE of the listed tool names was invoked.
|
||||
* Used for negative scenarios that guard against over-eager invocation.
|
||||
* - `noneOfToolCalls` — pass only if NONE of the listed tool calls happened.
|
||||
* Unlike `noneOf`, this checks actual tool calls only; a spawned sub-agent
|
||||
* having the tool available does not count as a violation.
|
||||
*
|
||||
* Both forms accept a sub-agent role prefix `spawn_sub_agent:<role>` to match
|
||||
* an `agent-spawned` event whose role equals `<role>`.
|
||||
*/
|
||||
export interface ForbiddenToolCall {
|
||||
toolName: string;
|
||||
/** Optional case-insensitive substrings to match against the serialized tool args. */
|
||||
argsContainAny?: string[];
|
||||
}
|
||||
|
||||
export interface ExpectedToolInvocations {
|
||||
anyOf?: string[];
|
||||
noneOf?: string[];
|
||||
noneOfToolCalls?: ForbiddenToolCall[];
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -43,7 +53,7 @@ export interface DiscoveryTestCase {
|
|||
userMessage: string;
|
||||
/** Optional instance state overrides applied when constructing the agent. */
|
||||
instanceState?: DiscoveryInstanceState;
|
||||
/** Pass condition. Exactly one of the form keys (`anyOf` / `noneOf`) is required. */
|
||||
/** Pass condition. At least one expectation key is required. */
|
||||
expectedToolInvocations: ExpectedToolInvocations;
|
||||
/** Free-form note explaining what regression this scenario protects against. */
|
||||
rationale?: string;
|
||||
|
|
|
|||
|
|
@ -33,14 +33,33 @@ describe('credential guardrail prompts', () => {
|
|||
);
|
||||
});
|
||||
|
||||
it('tells the planner to ask when a required service has more than one credential of the same type', () => {
|
||||
it('tells the planner not to block planning on credential selection', () => {
|
||||
expect(PLANNER_AGENT_PROMPT).toContain('Handle credentials without blocking planning');
|
||||
expect(PLANNER_AGENT_PROMPT).toContain('If the user already named a credential');
|
||||
expect(PLANNER_AGENT_PROMPT).toContain('If there is exactly one matching credential');
|
||||
expect(PLANNER_AGENT_PROMPT).toContain('auto-select it, do not ask');
|
||||
expect(PLANNER_AGENT_PROMPT).toContain('If there are no matching credentials, do not ask');
|
||||
expect(PLANNER_AGENT_PROMPT).toContain('builder will use a mocked or unresolved credential');
|
||||
expect(PLANNER_AGENT_PROMPT).toContain(
|
||||
'Do ask when a required service has more than one credential of the same type',
|
||||
'If there is more than one credential of the same required type',
|
||||
);
|
||||
expect(PLANNER_AGENT_PROMPT).toContain('ask once with a single-select');
|
||||
expect(PLANNER_AGENT_PROMPT).toContain('cannot be discovered, only chosen');
|
||||
expect(PLANNER_AGENT_PROMPT).toContain('credential-backed resource investigation');
|
||||
expect(PLANNER_AGENT_PROMPT).toContain('Do not turn that into a credential-choice question');
|
||||
expect(PLANNER_AGENT_PROMPT).toContain('Record the chosen credential name in `assumptions`');
|
||||
});
|
||||
|
||||
it('tells the planner to use the contextual timezone before asking', () => {
|
||||
expect(PLANNER_AGENT_PROMPT).toContain(
|
||||
"Never ask for the user's timezone when `<user-timezone>` is present",
|
||||
);
|
||||
expect(PLANNER_AGENT_PROMPT).toContain('use `<current-datetime>` / `<user-timezone>`');
|
||||
expect(PLANNER_AGENT_PROMPT).toContain(
|
||||
'Only ask if timezone is missing and a date or schedule cannot be interpreted safely',
|
||||
);
|
||||
});
|
||||
|
||||
it('tells the builder to wrap ambiguous resource matches with placeholder()', () => {
|
||||
// Both prompts inline PLACEHOLDERS_RULE, which now covers the multi-match case.
|
||||
const sharedRule = '**Resource IDs with more than one candidate**';
|
||||
|
|
|
|||
|
|
@ -19,10 +19,16 @@ ${SUBAGENT_OUTPUT_CONTRACT}
|
|||
1. **Prefer assumptions over questions.** The user is waiting for a plan, and they can reject it if your assumptions are wrong — so default to making reasonable choices rather than asking.
|
||||
- **Never ask about things you can discover** — call \`credentials(action="list")\`, \`data-tables(action="list")\`, \`templates(action="best-practices")\` instead.
|
||||
- **Never ask about implementation details** — trigger types, node choices, schedule times, column names. Pick sensible defaults.
|
||||
- **Never ask for the user's timezone when \`<user-timezone>\` is present** — use \`<current-datetime>\` / \`<user-timezone>\`. Only ask if timezone is missing and a date or schedule cannot be interpreted safely.
|
||||
- **Never default resource identifiers** the user didn't mention (Slack channels, calendars, spreadsheets, folders, etc.) — leave them for the builder to resolve at build time.
|
||||
- **Trust already-collected briefing context** — if the briefing includes an Already-collected answers or Already-discovered resources section, treat those entries as authoritative. Do not ask again for purpose, trigger, integrations, schedule, model, resource, or credential choices already listed there.
|
||||
- **Do ask when the answer would significantly change the plan** — e.g. the user's goal is ambiguous ("build me a CRM" — for sales? support? recruiting?), or a business rule must come from the user ("what should happen when payment fails?").
|
||||
- **Do ask when a required service has more than one credential of the same type** (e.g. two \`openAiApi\` accounts, three Google Calendar accounts) — which one to use cannot be discovered, only chosen. Record the chosen credential name in \`assumptions\`.
|
||||
- **Handle credentials without blocking planning.** Call \`credentials(action="list")\` for external services, then apply these cases:
|
||||
- If the user already named a credential in their request, use it directly and record the credential name in \`assumptions\`.
|
||||
- If there is exactly one matching credential for a required type, auto-select it, do not ask, and record the credential name in \`assumptions\`.
|
||||
- If there are no matching credentials, do not ask; plan normally and note that the builder will use a mocked or unresolved credential and route setup after verification.
|
||||
- If there is more than one credential of the same required type and the user did not name one, ask once with a single-select because the choice cannot be discovered, only chosen. Record the chosen credential name in \`assumptions\`.
|
||||
- **Use credential-backed resource investigation only when it changes the plan.** You may call \`credentials(action="list")\` so a later resource lookup can validate a resource that affects the architecture (for example checking whether a named Slack channel exists). Do not turn that into a credential-choice question unless the multiple-credentials rule above applies.
|
||||
- **List your assumptions** on your first \`add-plan-item\` call. The user reviews the plan before execution and can reject/correct.
|
||||
|
||||
2. **Discover** — check what exists and learn best practices. Expect 3–6 tool calls for a typical request:
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user