refactor(instance-ai): detach browser credential setup

Run browser credential setup as an accepted background_subagent task instead of an inline orchestrator sub-agent, publish UI spawn events only after accepted task creation, and close detached actor spans before their root trace.
This commit is contained in:
Oleg Ivaniv 2026-05-06 14:34:31 +02:00
parent 4de67e70bf
commit 6ab822879e
No known key found for this signature in database
3 changed files with 371 additions and 223 deletions

View File

@ -0,0 +1,141 @@
import { executeTool } from '../../../__tests__/tool-test-utils';
import type { InstanceAiEventBus } from '../../../event-bus/event-bus.interface';
import type { InstanceAiToolRegistry, OrchestrationContext, TaskStorage } from '../../../types';
const { createBrowserCredentialSetupTool } =
// eslint-disable-next-line @typescript-eslint/no-require-imports, @typescript-eslint/consistent-type-imports
require('../browser-credential-setup.tool') as typeof import('../browser-credential-setup.tool');
function createMockEventBus(): InstanceAiEventBus {
return {
publish: jest.fn(),
subscribe: jest.fn().mockReturnValue(() => {}),
getEventsAfter: jest.fn(),
getNextEventId: jest.fn(),
getEventsForRun: jest.fn().mockReturnValue([]),
getEventsForRuns: jest.fn().mockReturnValue([]),
};
}
function createMockContext(overrides?: Partial<OrchestrationContext>): OrchestrationContext {
const mcpTools: InstanceAiToolRegistry = {
browser_click: {
name: 'browser_click',
description: 'Click in the browser',
handler: jest.fn(),
},
};
return {
threadId: 'thread-123',
runId: 'run-123',
userId: 'test-user',
orchestratorAgentId: 'agent-001',
modelId: 'anthropic/claude-sonnet-4-5',
subAgentMaxSteps: 10,
eventBus: createMockEventBus(),
logger: { info: jest.fn(), warn: jest.fn(), error: jest.fn(), debug: jest.fn() },
domainTools: {},
abortSignal: new AbortController().signal,
taskStorage: {} as TaskStorage,
browserMcpConfig: { name: 'chrome-devtools', command: 'npx', args: [] },
mcpTools,
spawnBackgroundTask: jest.fn(() => ({
status: 'started' as const,
taskId: 'browser-task-id',
agentId: 'browser-agent-id',
})),
cancelBackgroundTask: jest.fn(),
...overrides,
};
}
describe('browser-credential-setup tool', () => {
it('spawns a detached browser task and returns immediately', async () => {
const context = createMockContext();
const tool = createBrowserCredentialSetupTool(context);
const result = await executeTool<{ result: string }>(
tool,
{ credentialType: 'googleOAuth2Api', docsUrl: 'https://docs.example/credential' },
{} as never,
);
expect(result.result).toContain('Browser credential setup started');
type SpawnCall = {
threadId: string;
agentId: string;
role: string;
taskId: string;
createTraceContext: unknown;
run: unknown;
};
const spawnMock = context.spawnBackgroundTask as jest.Mock<unknown, [SpawnCall]>;
const spawnCall = spawnMock.mock.calls[0]?.[0];
expect(spawnCall).toBeDefined();
if (!spawnCall) throw new Error('Expected spawnBackgroundTask to be called');
expect(spawnCall.threadId).toBe('thread-123');
expect(spawnCall.agentId).toMatch(/^agent-browser-/);
expect(spawnCall.role).toBe('credential-setup-browser-agent');
expect(spawnCall.taskId).toMatch(/^browser-credential-/);
expect(typeof spawnCall.createTraceContext).toBe('function');
expect(typeof spawnCall.run).toBe('function');
const publishCall = (context.eventBus.publish as jest.Mock).mock.calls[0] as [
string,
{
type: string;
runId: string;
payload: {
role: string;
kind: string;
tools: string[];
};
},
];
expect(publishCall[0]).toBe('thread-123');
expect(publishCall[1].type).toBe('agent-spawned');
expect(publishCall[1].runId).toBe('run-123');
expect(publishCall[1].payload.role).toBe('credential-setup-browser-agent');
expect(publishCall[1].payload.kind).toBe('browser-setup');
expect(publishCall[1].payload.tools).toEqual(
expect.arrayContaining(['browser_click', 'pause-for-user', 'ask-user']),
);
});
it('does not publish a browser agent when spawn is rejected as duplicate', async () => {
const context = createMockContext({
spawnBackgroundTask: jest.fn(() => ({
status: 'duplicate' as const,
existing: {
taskId: 'task-existing',
agentId: 'agent-existing',
role: 'credential-setup-browser-agent',
},
})),
});
const tool = createBrowserCredentialSetupTool(context);
const result = await executeTool<{ result: string }>(
tool,
{ credentialType: 'googleOAuth2Api' },
{} as never,
);
expect(result.result).toContain('already running');
expect(context.eventBus.publish).not.toHaveBeenCalled();
});
it('returns an error when background task support is missing', async () => {
const context = createMockContext({ spawnBackgroundTask: undefined });
const tool = createBrowserCredentialSetupTool(context);
const result = await executeTool<{ result: string }>(
tool,
{ credentialType: 'googleOAuth2Api' },
{} as never,
);
expect(result.result).toBe('Browser credential setup requires background task support.');
});
});

View File

@ -5,11 +5,9 @@ import { z } from 'zod';
import { buildBrowserAgentPrompt, type BrowserToolSource } from './browser-credential-setup.prompt';
import {
failTraceRun,
finishTraceRun,
startSubAgentTrace,
createDetachedSubAgentTraceFactory,
traceSubAgentTools,
withTraceRun,
withTraceContextActor,
} from './tracing-utils';
import { MAX_STEPS } from '../../constants/max-steps';
import {
@ -24,6 +22,8 @@ import { createAskUserTool } from '../shared/ask-user.tool';
export { buildBrowserAgentPrompt, type BrowserToolSource } from './browser-credential-setup.prompt';
const BROWSER_CREDENTIAL_AGENT_ROLE = 'credential-setup-browser-agent';
function createPauseForUserTool() {
return new Tool('pause-for-user')
.description(
@ -85,6 +85,47 @@ const browserCredentialSetupToolInputSchema = z.object({
.describe('Credential fields the user needs to obtain from the service'),
});
type BrowserCredentialSetupToolInput = z.infer<typeof browserCredentialSetupToolInputSchema>;
function buildCredentialSetupBriefing(
input: BrowserCredentialSetupToolInput,
context: OrchestrationContext,
): string {
const docsLine = input.docsUrl
? `**Documentation:** ${input.docsUrl}`
: '**Documentation:** No URL available — use `research` (action: web-search) to find setup instructions.';
let fieldsSection = '';
if (input.requiredFields && input.requiredFields.length > 0) {
const fieldLines = input.requiredFields.map(
(field) =>
`- ${field.displayName} (${field.name})${field.required ? ' [REQUIRED]' : ''}${field.description ? ': ' + field.description : ''}`,
);
fieldsSection = `\n### Required Fields\n${fieldLines.join('\n')}`;
}
const isOAuth = input.credentialType.toLowerCase().includes('oauth');
const oauthSection =
isOAuth && context.oauth2CallbackUrl
? `\n### OAuth Redirect URL\n${context.oauth2CallbackUrl}\n` +
'Paste this into the "Authorized redirect URIs" field. ' +
'Do NOT navigate to the n8n instance to find it — use this URL directly.'
: '';
return [
`## Credential Setup: ${input.credentialType}`,
'',
docsLine,
fieldsSection,
oauthSection,
'',
'### Completion Criteria',
'Done ONLY when all required values are visible on screen or downloaded, and you have called `pause-for-user` telling the user where to find them and to enter them privately in n8n.',
]
.filter(Boolean)
.join('\n');
}
export function createBrowserCredentialSetupTool(context: OrchestrationContext) {
return new Tool('browser-credential-setup')
.description(
@ -99,6 +140,7 @@ export function createBrowserCredentialSetupTool(context: OrchestrationContext)
}),
)
.handler(async (input: z.infer<typeof browserCredentialSetupToolInputSchema>) => {
await Promise.resolve();
// Determine tool source: prefer local gateway browser tools over chrome-devtools-mcp
const browserTools: InstanceAiToolRegistry = {};
let toolSource: BrowserToolSource;
@ -147,236 +189,185 @@ export function createBrowserCredentialSetupTool(context: OrchestrationContext)
browserTools.research = createResearchTool(context.domainContext);
}
const subAgentId = `agent-browser-${nanoid(6)}`;
if (!context.spawnBackgroundTask) {
return { result: 'Browser credential setup requires background task support.' };
}
const subAgentId = `agent-browser-${nanoid(6)}`;
const taskId = `browser-credential-${nanoid(8)}`;
const browserPrompt = buildBrowserAgentPrompt(toolSource);
const tracedBrowserTools = traceSubAgentTools(
context,
browserTools,
BROWSER_CREDENTIAL_AGENT_ROLE,
);
const createTraceContext = createDetachedSubAgentTraceFactory(context, {
agentId: subAgentId,
role: BROWSER_CREDENTIAL_AGENT_ROLE,
kind: 'browser-credential-setup',
taskId,
inputs: {
credentialType: input.credentialType,
docsUrl: input.docsUrl,
requiredFields: input.requiredFields?.map((field) => ({
name: field.name,
type: field.type,
required: field.required,
})),
},
});
const spawnOutcome = context.spawnBackgroundTask({
taskId,
threadId: context.threadId,
agentId: subAgentId,
role: BROWSER_CREDENTIAL_AGENT_ROLE,
createTraceContext,
dedupeKey: { role: BROWSER_CREDENTIAL_AGENT_ROLE },
parentCheckpointId:
context.isCheckpointFollowUp === true ? context.checkpointTaskId : undefined,
run: async (signal, _drainCorrections, _waitForCorrection, { traceContext }) =>
await withTraceContextActor(traceContext, async () => {
const subAgent = new Agent('Browser Credential Setup Agent')
.model(context.modelId)
.instructions(browserPrompt, {
providerOptions: {
anthropic: { cacheControl: { type: 'ephemeral' } },
},
})
.tool(Object.values(tracedBrowserTools))
.checkpoint(context.checkpointStore ?? 'memory');
const telemetry = traceContext?.getTelemetry?.({
agentRole: BROWSER_CREDENTIAL_AGENT_ROLE,
functionId: `instance-ai.subagent.${BROWSER_CREDENTIAL_AGENT_ROLE}`,
executionMode: 'background_subagent',
metadata: { agent_id: subAgentId, task_id: taskId },
});
if (telemetry) {
subAgent.telemetry(telemetry);
}
mergeTraceRunInputs(
traceContext?.actorRun,
buildAgentTraceInputs({
systemPrompt: browserPrompt,
tools: tracedBrowserTools,
modelId: context.modelId,
}),
);
const briefing = buildCredentialSetupBriefing(input, context);
const stream = await subAgent.stream(briefing, {
maxIterations: MAX_STEPS.BROWSER,
abortSignal: signal,
providerOptions: {
anthropic: { cacheControl: { type: 'ephemeral' } },
},
});
let activeStream = normalizeStreamSource(stream);
let activeAgentRunId = typeof activeStream.runId === 'string' ? activeStream.runId : '';
let lastSuspendedToolName = '';
const MAX_NUDGES = 3;
let nudgeCount = 0;
while (true) {
const result = await executeResumableStream({
agent: subAgent,
stream: activeStream,
initialAgentRunId: activeAgentRunId,
context: {
threadId: context.threadId,
runId: context.runId,
agentId: subAgentId,
eventBus: context.eventBus,
signal,
logger: context.logger,
},
control: {
mode: 'auto',
buildResumeOptions: ({ agentRunId, suspension }) => ({
runId: agentRunId,
toolCallId: suspension.toolCallId,
maxIterations: MAX_STEPS.BROWSER,
}),
waitForConfirmation: async (requestId) => {
if (!context.waitForConfirmation) {
throw new Error(
'Browser agent requires user interaction but no HITL handler is available',
);
}
return await context.waitForConfirmation(requestId);
},
onSuspension: (suspension) => {
lastSuspendedToolName = suspension.toolName ?? '';
},
},
});
if (result.status === 'cancelled') {
throw new Error('Run cancelled while waiting for confirmation');
}
if (lastSuspendedToolName !== 'pause-for-user' && nudgeCount < MAX_NUDGES) {
// Agent ended without a final pause-for-user confirmation.
// Re-invoke with a nudge to call pause-for-user.
nudgeCount++;
const nudge = await subAgent.stream(
'You stopped without confirming with the user. Call pause-for-user NOW to tell the user where the credential values live and to enter them privately in the n8n credential form.',
{
maxIterations: MAX_STEPS.BROWSER,
abortSignal: signal,
providerOptions: {
anthropic: { cacheControl: { type: 'ephemeral' } },
},
},
);
activeStream = normalizeStreamSource(nudge);
activeAgentRunId =
(typeof activeStream.runId === 'string' && activeStream.runId) ||
result.agentRunId ||
activeAgentRunId;
continue;
}
return await (result.text ?? activeStream.text ?? Promise.resolve(''));
}
}),
});
if (spawnOutcome.status === 'duplicate') {
return {
result: `Browser credential setup is already running (task: ${spawnOutcome.existing.taskId}). Wait for the background-task follow-up before dispatching another one.`,
};
}
if (spawnOutcome.status === 'limit-reached') {
return {
result:
'Could not start browser credential setup: concurrent background-task limit reached. Wait for an existing task to finish and try again.',
};
}
// Publish agent-spawned so the UI shows the browser agent
context.eventBus.publish(context.threadId, {
type: 'agent-spawned',
runId: context.runId,
agentId: subAgentId,
payload: {
parentId: context.orchestratorAgentId,
role: 'credential-setup-browser-agent',
role: BROWSER_CREDENTIAL_AGENT_ROLE,
tools: Object.keys(browserTools),
taskId,
kind: 'browser-setup',
title: 'Setting up credential',
subtitle: input.credentialType,
goal: `Set up ${input.credentialType}`,
targetResource: { type: 'credential' as const },
},
});
let traceRun: Awaited<ReturnType<typeof startSubAgentTrace>>;
try {
traceRun = await startSubAgentTrace(context, {
agentId: subAgentId,
role: 'credential-setup-browser-agent',
kind: 'browser-credential-setup',
inputs: {
credentialType: input.credentialType,
docsUrl: input.docsUrl,
requiredFields: input.requiredFields?.map(
(field: {
name: string;
displayName: string;
type: string;
required: boolean;
description?: string;
}) => ({
name: field.name,
type: field.type,
required: field.required,
}),
),
},
});
const tracedBrowserTools = traceSubAgentTools(
context,
browserTools,
'credential-setup-browser-agent',
);
const browserPrompt = buildBrowserAgentPrompt(toolSource);
const resultText = await withTraceRun(context, traceRun, async () => {
const subAgent = new Agent('Browser Credential Setup Agent')
.model(context.modelId)
.instructions(browserPrompt, {
providerOptions: {
anthropic: { cacheControl: { type: 'ephemeral' } },
},
})
.tool(Object.values(tracedBrowserTools))
.checkpoint(context.checkpointStore ?? 'memory');
const telemetry = context.tracing?.getTelemetry?.({
agentRole: 'credential-setup-browser-agent',
functionId: 'instance-ai.subagent.credential-setup-browser-agent',
executionMode: 'background',
metadata: { agent_id: subAgentId },
});
if (telemetry) {
subAgent.telemetry(telemetry);
}
mergeTraceRunInputs(
traceRun,
buildAgentTraceInputs({
systemPrompt: browserPrompt,
tools: tracedBrowserTools,
modelId: context.modelId,
}),
);
// Build the briefing
const docsLine = input.docsUrl
? `**Documentation:** ${input.docsUrl}`
: '**Documentation:** No URL available — use `research` (action: web-search) to find setup instructions.';
let fieldsSection = '';
if (input.requiredFields && input.requiredFields.length > 0) {
const fieldLines = input.requiredFields.map(
(f: {
name: string;
displayName: string;
type: string;
required: boolean;
description?: string;
}) =>
`- ${f.displayName} (${f.name})${f.required ? ' [REQUIRED]' : ''}${f.description ? ': ' + f.description : ''}`,
);
fieldsSection = `\n### Required Fields\n${fieldLines.join('\n')}`;
}
// For OAuth2 credentials, include the redirect URL so the agent can
// paste it directly into the "Authorized redirect URIs" field
const isOAuth = input.credentialType.toLowerCase().includes('oauth');
const oauthSection =
isOAuth && context.oauth2CallbackUrl
? `\n### OAuth Redirect URL\n${context.oauth2CallbackUrl}\n` +
'Paste this into the "Authorized redirect URIs" field. ' +
'Do NOT navigate to the n8n instance to find it — use this URL directly.'
: '';
const briefing = [
`## Credential Setup: ${input.credentialType}`,
'',
docsLine,
fieldsSection,
oauthSection,
'',
'### Completion Criteria',
'Done ONLY when all required values are visible on screen or downloaded, and you have called `pause-for-user` telling the user where to find them and to enter them privately in n8n.',
]
.filter(Boolean)
.join('\n');
const stream = await subAgent.stream(briefing, {
maxIterations: MAX_STEPS.BROWSER,
abortSignal: context.abortSignal,
providerOptions: {
anthropic: { cacheControl: { type: 'ephemeral' } },
},
});
let activeStream = normalizeStreamSource(stream);
let activeAgentRunId = typeof activeStream.runId === 'string' ? activeStream.runId : '';
let lastSuspendedToolName = '';
const MAX_NUDGES = 3;
let nudgeCount = 0;
while (true) {
const result = await executeResumableStream({
agent: subAgent,
stream: activeStream,
initialAgentRunId: activeAgentRunId,
context: {
threadId: context.threadId,
runId: context.runId,
agentId: subAgentId,
eventBus: context.eventBus,
signal: context.abortSignal,
logger: context.logger,
},
control: {
mode: 'auto',
buildResumeOptions: ({ agentRunId, suspension }) => ({
runId: agentRunId,
toolCallId: suspension.toolCallId,
maxIterations: MAX_STEPS.BROWSER,
}),
waitForConfirmation: async (requestId) => {
if (!context.waitForConfirmation) {
throw new Error(
'Browser agent requires user interaction but no HITL handler is available',
);
}
return await context.waitForConfirmation(requestId);
},
onSuspension: (suspension) => {
lastSuspendedToolName = suspension.toolName ?? '';
},
},
});
if (result.status === 'cancelled') {
throw new Error('Run cancelled while waiting for confirmation');
}
if (lastSuspendedToolName !== 'pause-for-user' && nudgeCount < MAX_NUDGES) {
// Agent ended without a final pause-for-user confirmation.
// Re-invoke with a nudge to call pause-for-user.
nudgeCount++;
const nudge = await subAgent.stream(
'You stopped without confirming with the user. Call pause-for-user NOW to tell the user where the credential values live and to enter them privately in the n8n credential form.',
{
maxIterations: MAX_STEPS.BROWSER,
abortSignal: context.abortSignal,
providerOptions: {
anthropic: { cacheControl: { type: 'ephemeral' } },
},
},
);
activeStream = normalizeStreamSource(nudge);
activeAgentRunId =
(typeof activeStream.runId === 'string' && activeStream.runId) ||
result.agentRunId ||
activeAgentRunId;
continue;
}
return await (result.text ?? activeStream.text ?? Promise.resolve(''));
}
});
await finishTraceRun(context, traceRun, {
outputs: {
result: resultText,
agentId: subAgentId,
role: 'credential-setup-browser-agent',
},
});
context.eventBus.publish(context.threadId, {
type: 'agent-completed',
runId: context.runId,
agentId: subAgentId,
payload: {
role: 'credential-setup-browser-agent',
result: resultText,
},
});
return { result: resultText };
} catch (error) {
const errorMessage = error instanceof Error ? error.message : String(error);
await failTraceRun(context, traceRun, error, {
agent_id: subAgentId,
agent_role: 'credential-setup-browser-agent',
});
context.eventBus.publish(context.threadId, {
type: 'agent-completed',
runId: context.runId,
agentId: subAgentId,
payload: {
role: 'credential-setup-browser-agent',
result: '',
error: errorMessage,
},
});
return { result: `Browser agent error: ${errorMessage}` };
}
return {
result: `Browser credential setup started (task: ${taskId}). Wait for the background-task follow-up before summarizing the result.`,
};
})
.build();
}

View File

@ -901,6 +901,22 @@ export class InstanceAiService {
if (!traceContext) return;
try {
if (
traceContext.actorRun.id !== traceContext.rootRun.id &&
traceContext.actorRun.endTime === undefined
) {
await traceContext.finishRun(traceContext.actorRun, {
outputs: {
status: options.status,
...options.outputs,
},
metadata: {
final_status: options.status,
...options.metadata,
},
...(options.error ? { error: options.error } : {}),
});
}
await traceContext.finishRun(traceContext.rootRun, {
outputs: {
status: options.status,