From c8ac2fb1f225f335756de000e314e973fce34000 Mon Sep 17 00:00:00 2001 From: oleg Date: Wed, 20 May 2026 12:06:11 +0200 Subject: [PATCH] feat(core): Rework LangSmith tracing (no-changelog) (#30017) --- .../src/__tests__/langsmith-telemetry.test.ts | 108 +- .../@n8n/agents/src/integrations/langsmith.ts | 82 +- .../runtime/__tests__/agent-runtime.test.ts | 26 + .../@n8n/agents/src/runtime/agent-runtime.ts | 14 +- .../src/sdk/__tests__/agent-telemetry.test.ts | 80 + .../src/sdk/__tests__/telemetry.test.ts | 34 + packages/@n8n/agents/src/sdk/agent.ts | 1 + packages/@n8n/agents/src/sdk/telemetry.ts | 9 + .../@n8n/instance-ai/docs/configuration.md | 9 + .../instance-ai/src/agent/instance-agent.ts | 22 +- .../instance-ai/src/tools/credentials.tool.ts | 3 +- .../instance-ai/src/tools/data-tables.tool.ts | 3 +- packages/@n8n/instance-ai/src/tools/index.ts | 81 +- .../orchestration/plan-with-agent.tool.ts | 2 +- .../src/tools/shared/ask-user.tool.ts | 4 +- .../@n8n/instance-ai/src/tools/tool-ids.ts | 62 + .../__tests__/langsmith-tracing.test.ts | 1915 ++++++++++++---- .../src/tracing/langsmith-tracing.ts | 1985 ++++++++++------- .../instance-ai/src/tracing/trace-labels.ts | 34 + .../instance-ai/src/tracing/trace-payloads.ts | 1227 ++++++++++ .../instance-ai/src/utils/scrub-secrets.ts | 2 +- .../__tests__/instance-ai.service.test.ts | 45 + .../instance-ai/instance-ai.service.ts | 29 + 23 files changed, 4426 insertions(+), 1351 deletions(-) create mode 100644 packages/@n8n/agents/src/sdk/__tests__/agent-telemetry.test.ts create mode 100644 packages/@n8n/instance-ai/src/tools/tool-ids.ts create mode 100644 packages/@n8n/instance-ai/src/tracing/trace-labels.ts create mode 100644 packages/@n8n/instance-ai/src/tracing/trace-payloads.ts diff --git a/packages/@n8n/agents/src/__tests__/langsmith-telemetry.test.ts b/packages/@n8n/agents/src/__tests__/langsmith-telemetry.test.ts index c30c25ade6c..ba98c91a59c 100644 --- a/packages/@n8n/agents/src/__tests__/langsmith-telemetry.test.ts +++ b/packages/@n8n/agents/src/__tests__/langsmith-telemetry.test.ts @@ -1,4 +1,12 @@ const mockExporterConfigs: unknown[] = []; +type MockExportResult = { code: number; error?: Error }; +type MockExportCallback = (result: MockExportResult) => void; +type MockExporter = { + type: 'exporter'; + export: jest.Mock; + shutdown: jest.Mock, []>; +}; +const mockExporterInstances: MockExporter[] = []; const mockBatchProcessorInputs: unknown[] = []; const mockBatchProcessorInstances: Array<{ forceFlush: jest.Mock, []>; @@ -19,7 +27,15 @@ const mockProvider = { jest.mock('langsmith/experimental/otel/exporter', () => ({ LangSmithOTLPTraceExporter: jest.fn((config: unknown) => { mockExporterConfigs.push(config); - return { type: 'exporter' }; + const exporter: MockExporter = { + type: 'exporter', + export: jest.fn((_: unknown[], resultHandler: MockExportCallback) => { + resultHandler({ code: 0 }); + }), + shutdown: jest.fn(async () => await Promise.resolve()), + }; + mockExporterInstances.push(exporter); + return exporter; }), })); @@ -59,6 +75,7 @@ describe('LangSmithTelemetry', () => { beforeEach(() => { mockExporterConfigs.length = 0; + mockExporterInstances.length = 0; mockBatchProcessorInputs.length = 0; mockBatchProcessorInstances.length = 0; mockProviderConfigs.length = 0; @@ -78,21 +95,16 @@ describe('LangSmithTelemetry', () => { } }); - it('passes proxy headers and derived OTLP URL to the LangSmith exporter', async () => { + it('passes static proxy headers and derived OTLP URL to the LangSmith exporter', async () => { const transformExportedSpan = (span: unknown) => span; - const getHeaders = jest.fn(async () => { - await Promise.resolve(); - return { Authorization: 'Bearer proxy-token' } satisfies Record; - }); const built = await new LangSmithTelemetry({ apiKey: '-', project: 'instance-ai', endpoint: 'https://ai-proxy.test/langsmith', - headers: getHeaders, + headers: { Authorization: 'Bearer proxy-token' }, transformExportedSpan, }).build(); - expect(getHeaders).toHaveBeenCalledTimes(1); expect(mockExporterConfigs).toEqual([ { apiKey: '-', @@ -102,7 +114,7 @@ describe('LangSmithTelemetry', () => { url: 'https://ai-proxy.test/langsmith/otel/v1/traces', }, ]); - expect(mockBatchProcessorInputs).toEqual([{ type: 'exporter' }]); + expect(mockBatchProcessorInputs).toEqual([mockExporterInstances[0]]); expect(mockProviderConfigs).toHaveLength(1); const providerConfig = mockProviderConfigs[0] as { spanProcessors: unknown[] }; expect(providerConfig.spanProcessors).toHaveLength(1); @@ -118,6 +130,84 @@ describe('LangSmithTelemetry', () => { expect(process.env.LANGCHAIN_TRACING_V2).toBe('true'); }); + it('resolves function headers for every export request', async () => { + const getHeaders = jest + .fn>, []>() + .mockResolvedValueOnce({ Authorization: 'Bearer proxy-token-1' }) + .mockResolvedValueOnce({ Authorization: 'Bearer proxy-token-2' }); + + await new LangSmithTelemetry({ + apiKey: '-', + project: 'instance-ai', + endpoint: 'https://ai-proxy.test/langsmith', + headers: getHeaders, + }).build(); + + expect(getHeaders).not.toHaveBeenCalled(); + expect(mockExporterConfigs).toEqual([]); + expect(mockBatchProcessorInputs).toHaveLength(1); + + const exporter = mockBatchProcessorInputs[0] as { + export(spans: unknown[], resultCallback: MockExportCallback): void; + }; + const firstSpan = { name: 'first' }; + const secondSpan = { name: 'second' }; + + const firstResult = await new Promise((resolve) => { + exporter.export([firstSpan], resolve); + }); + const secondResult = await new Promise((resolve) => { + exporter.export([secondSpan], resolve); + }); + + expect(firstResult).toEqual({ code: 0 }); + expect(secondResult).toEqual({ code: 0 }); + expect(getHeaders).toHaveBeenCalledTimes(2); + expect(mockExporterConfigs).toEqual([ + { + apiKey: '-', + projectName: 'instance-ai', + headers: { Authorization: 'Bearer proxy-token-1' }, + url: 'https://ai-proxy.test/langsmith/otel/v1/traces', + }, + { + apiKey: '-', + projectName: 'instance-ai', + headers: { Authorization: 'Bearer proxy-token-2' }, + url: 'https://ai-proxy.test/langsmith/otel/v1/traces', + }, + ]); + expect(mockExporterInstances[0].export).toHaveBeenCalledWith([firstSpan], expect.any(Function)); + expect(mockExporterInstances[1].export).toHaveBeenCalledWith( + [secondSpan], + expect.any(Function), + ); + }); + + it('reports export failure when function headers reject', async () => { + const refreshError = new Error('could not refresh headers'); + const getHeaders = jest + .fn>, []>() + .mockRejectedValueOnce(refreshError); + + await new LangSmithTelemetry({ + apiKey: '-', + project: 'instance-ai', + headers: getHeaders, + }).build(); + + const exporter = mockBatchProcessorInputs[0] as { + export(spans: unknown[], resultCallback: MockExportCallback): void; + }; + const result = await new Promise((resolve) => { + exporter.export([{ name: 'span' }], resolve); + }); + + expect(result).toEqual({ code: 1, error: refreshError }); + expect(getHeaders).toHaveBeenCalledTimes(1); + expect(mockExporterConfigs).toEqual([]); + }); + it('does not allow endpoint overrides when using an engine-resolved key', async () => { const telemetry = new LangSmithTelemetry({ project: 'instance-ai', diff --git a/packages/@n8n/agents/src/integrations/langsmith.ts b/packages/@n8n/agents/src/integrations/langsmith.ts index 50b876c9fc9..fe77cfc19a7 100644 --- a/packages/@n8n/agents/src/integrations/langsmith.ts +++ b/packages/@n8n/agents/src/integrations/langsmith.ts @@ -39,12 +39,66 @@ interface BatchSpanProcessorConstructor { new (exporter: unknown): SpanProcessorLike; } +interface ExportResultLike { + code: number; + error?: Error; +} + +type ExportResultCallback = (result: ExportResultLike) => void; + +interface SpanExporterLike { + export(spans: unknown[], resultCallback: ExportResultCallback): void; + shutdown(): Promise; +} + +interface LangSmithOTLPTraceExporterConfig { + apiKey?: string; + projectName?: string; + url?: string; + headers?: Record; + transformExportedSpan?: (span: unknown) => unknown; +} + +interface LangSmithOTLPTraceExporterConstructor { + new (cfg?: LangSmithOTLPTraceExporterConfig): SpanExporterLike; +} + interface LangSmithRunTree { getSharedClient(): { awaitPendingTraceBatches(): Promise; }; } +const OTEL_EXPORT_RESULT_FAILED = 1; + +function toExportError(error: unknown): Error { + return error instanceof Error ? error : new Error(String(error)); +} + +function createHeaderRefreshingLangSmithExporter( + LangSmithOTLPTraceExporter: LangSmithOTLPTraceExporterConstructor, + getExporterConfig: (headers: Record) => LangSmithOTLPTraceExporterConfig, + getHeaders: () => Promise>, +): SpanExporterLike { + return { + export(spans, resultCallback) { + void (async () => { + try { + const headers = await getHeaders(); + const exporter = new LangSmithOTLPTraceExporter(getExporterConfig(headers)); + exporter.export(spans, resultCallback); + } catch (error) { + resultCallback({ code: OTEL_EXPORT_RESULT_FAILED, error: toExportError(error) }); + } + })(); + }, + + async shutdown() { + await Promise.resolve(); + }, + }; +} + function isOtelSpanLike(value: unknown): value is OtelSpanLike { return ( value !== null && @@ -174,7 +228,10 @@ export interface LangSmithTelemetryConfig { * as `${endpoint}/otel/v1/traces`. Use this for custom collectors or testing. */ url?: string; - /** Default headers to send with LangSmith OTLP export requests. */ + /** + * Default headers to send with LangSmith OTLP export requests. + * Callback headers are resolved per export request. + */ headers?: Record | (() => Promise>); /** Optional hook for redacting or annotating spans before LangSmith export. */ transformExportedSpan?: (span: unknown) => unknown; @@ -199,13 +256,7 @@ async function createLangSmithTracer( }; const { LangSmithOTLPTraceExporter } = (await import('langsmith/experimental/otel/exporter')) as { - LangSmithOTLPTraceExporter: new (cfg?: { - apiKey?: string; - projectName?: string; - url?: string; - headers?: Record; - transformExportedSpan?: (span: unknown) => unknown; - }) => unknown; + LangSmithOTLPTraceExporter: LangSmithOTLPTraceExporterConstructor; }; const { BatchSpanProcessor } = (await import('@opentelemetry/sdk-trace-base')) as { BatchSpanProcessor: BatchSpanProcessorConstructor; @@ -223,9 +274,10 @@ async function createLangSmithTracer( ? undefined : (config?.url ?? (config?.endpoint ? `${config.endpoint.replace(/\/$/, '')}/otel/v1/traces` : undefined)); - const headers = typeof config?.headers === 'function' ? await config.headers() : config?.headers; - const exporter = new LangSmithOTLPTraceExporter({ + const buildExporterConfig = ( + headers?: Record, + ): LangSmithOTLPTraceExporterConfig => ({ apiKey, projectName: config?.project, ...(headers ? { headers } : {}), @@ -235,6 +287,16 @@ async function createLangSmithTracer( ...(url ? { url } : {}), }); + const headers = config?.headers; + const exporter = + typeof headers === 'function' + ? createHeaderRefreshingLangSmithExporter( + LangSmithOTLPTraceExporter, + buildExporterConfig, + headers, + ) + : new LangSmithOTLPTraceExporter(buildExporterConfig(headers)); + const processor = createLangSmithSpanProcessor({ exporter, BatchSpanProcessor, diff --git a/packages/@n8n/agents/src/runtime/__tests__/agent-runtime.test.ts b/packages/@n8n/agents/src/runtime/__tests__/agent-runtime.test.ts index 142d1fbf47e..76927971705 100644 --- a/packages/@n8n/agents/src/runtime/__tests__/agent-runtime.test.ts +++ b/packages/@n8n/agents/src/runtime/__tests__/agent-runtime.test.ts @@ -2807,6 +2807,32 @@ describe('AgentRuntime — telemetry propagation', () => { expect(expTelemetry.recordOutputs).toBe(false); }); + it('uses updated telemetry config for later runs', async () => { + generateText.mockResolvedValue(makeGenerateSuccess()); + const updatedTelemetry: BuiltTelemetry = { + ...baseTelemetry, + functionId: 'updated-agent', + metadata: { env: 'updated' }, + }; + + const runtime = new AgentRuntime({ + name: 'telemetry-test', + model: 'openai/gpt-4o-mini', + instructions: 'test', + eventBus: new AgentEventBus(), + telemetry: baseTelemetry, + }); + + runtime.setTelemetry(updatedTelemetry); + await runtime.generate('hello'); + + // eslint-disable-next-line @typescript-eslint/no-unsafe-member-access + const callArgs = generateText.mock.calls[0][0] as Record; + const expTelemetry = callArgs.experimental_telemetry as Record; + expect(expTelemetry.functionId).toBe('updated-agent'); + expect(expTelemetry.metadata).toEqual({ env: 'updated' }); + }); + it('wraps generate calls in a telemetry root span when the tracer supports active spans', async () => { generateText.mockResolvedValue(makeGenerateSuccess()); const span = { diff --git a/packages/@n8n/agents/src/runtime/agent-runtime.ts b/packages/@n8n/agents/src/runtime/agent-runtime.ts index b6ef3baddc6..562ccc1dc00 100644 --- a/packages/@n8n/agents/src/runtime/agent-runtime.ts +++ b/packages/@n8n/agents/src/runtime/agent-runtime.ts @@ -69,6 +69,7 @@ import { toAiSdkProviderTools, toAiSdkTools, } from './tool-adapter'; +import { Telemetry } from '../sdk/telemetry'; import { AgentEvent } from '../types/runtime/event'; import type { AgentEventData } from '../types/runtime/event'; import type { @@ -362,6 +363,10 @@ export class AgentRuntime { }; } + setTelemetry(telemetry: BuiltTelemetry | undefined): void { + this.config.telemetry = telemetry; + } + /** * Wait for in-flight background tasks (title generation, future * observer cycles) to settle. Safe to call multiple times. @@ -748,14 +753,7 @@ export class AgentRuntime { /** Best-effort flush of telemetry provider. Never throws. */ private async flushTelemetry(options?: ExecutionOptions): Promise { - try { - const resolved = this.resolveTelemetry(options); - if (resolved?.provider) { - await resolved.provider.forceFlush(); - } - } catch { - // Telemetry flush is best-effort — never block the response or mask the real error. - } + await Telemetry.forceFlush(this.resolveTelemetry(options)); } /** Map resolved telemetry to AI SDK's experimental_telemetry shape. */ diff --git a/packages/@n8n/agents/src/sdk/__tests__/agent-telemetry.test.ts b/packages/@n8n/agents/src/sdk/__tests__/agent-telemetry.test.ts new file mode 100644 index 00000000000..414e1488432 --- /dev/null +++ b/packages/@n8n/agents/src/sdk/__tests__/agent-telemetry.test.ts @@ -0,0 +1,80 @@ +import type { BuiltTelemetry } from '../../types'; +import { Agent } from '../agent'; + +// Mock provider packages so createModel() doesn't fail when no API key is set. +jest.mock('@ai-sdk/openai', () => ({ + createOpenAI: () => () => ({ provider: 'openai', modelId: 'mock', specificationVersion: 'v3' }), +})); + +// eslint-disable-next-line @typescript-eslint/consistent-type-imports +type AiImport = typeof import('ai'); + +jest.mock('ai', () => { + const actual = jest.requireActual('ai'); + return { + ...actual, + generateText: jest.fn(), + }; +}); + +// eslint-disable-next-line @typescript-eslint/no-require-imports +const { generateText } = require('ai') as { + generateText: jest.Mock; +}; + +function makeGenerateSuccess(text = 'OK') { + return { + finishReason: 'stop', + usage: { inputTokens: 10, outputTokens: 5, totalTokens: 15 }, + response: { + messages: [ + { + role: 'assistant', + content: [{ type: 'text', text }], + }, + ], + }, + toolCalls: [], + }; +} + +function makeTelemetry(functionId: string): BuiltTelemetry { + return { + enabled: true, + functionId, + metadata: { functionId }, + recordInputs: true, + recordOutputs: true, + integrations: [], + tracer: { startSpan: jest.fn() }, + }; +} + +describe('Agent telemetry', () => { + beforeEach(() => { + jest.clearAllMocks(); + }); + + it('updates telemetry on an already-built runtime', async () => { + generateText.mockResolvedValue(makeGenerateSuccess()); + const agent = new Agent('agent') + .model('openai/gpt-4o-mini') + .instructions('test') + .telemetry(makeTelemetry('initial-agent')); + + await agent.generate('first'); + agent.telemetry(makeTelemetry('updated-agent')); + await agent.generate('second'); + + // eslint-disable-next-line @typescript-eslint/no-unsafe-member-access + const firstCall = generateText.mock.calls[0][0] as Record; + // eslint-disable-next-line @typescript-eslint/no-unsafe-member-access + const secondCall = generateText.mock.calls[1][0] as Record; + const firstTelemetry = firstCall.experimental_telemetry as Record; + const secondTelemetry = secondCall.experimental_telemetry as Record; + + expect(firstTelemetry.functionId).toBe('initial-agent'); + expect(secondTelemetry.functionId).toBe('updated-agent'); + expect(secondTelemetry.metadata).toEqual({ functionId: 'updated-agent' }); + }); +}); diff --git a/packages/@n8n/agents/src/sdk/__tests__/telemetry.test.ts b/packages/@n8n/agents/src/sdk/__tests__/telemetry.test.ts index e48f2a3bfdd..01f177dac61 100644 --- a/packages/@n8n/agents/src/sdk/__tests__/telemetry.test.ts +++ b/packages/@n8n/agents/src/sdk/__tests__/telemetry.test.ts @@ -171,3 +171,37 @@ describe('Telemetry.shutdown()', () => { await Telemetry.shutdown(built); }); }); + +describe('Telemetry.forceFlush()', () => { + it('calls provider.forceFlush() when provider exists', async () => { + const forceFlushMock = jest.fn().mockResolvedValue(undefined); + const built = await new Telemetry().build(); + const withProvider = { + ...built, + provider: { forceFlush: forceFlushMock, shutdown: jest.fn() }, + }; + + await Telemetry.forceFlush(withProvider); + + expect(forceFlushMock).toHaveBeenCalled(); + }); + + it('swallows provider.forceFlush() errors', async () => { + const built = await new Telemetry().build(); + const withProvider = { + ...built, + provider: { + forceFlush: jest.fn().mockRejectedValue(new Error('flush failed')), + shutdown: jest.fn(), + }, + }; + + await expect(Telemetry.forceFlush(withProvider)).resolves.toBeUndefined(); + }); + + it('does nothing when no provider exists', async () => { + const built = await new Telemetry().build(); + + await expect(Telemetry.forceFlush(built)).resolves.toBeUndefined(); + }); +}); diff --git a/packages/@n8n/agents/src/sdk/agent.ts b/packages/@n8n/agents/src/sdk/agent.ts index 697021cf9f3..200eaf99e53 100644 --- a/packages/@n8n/agents/src/sdk/agent.ts +++ b/packages/@n8n/agents/src/sdk/agent.ts @@ -344,6 +344,7 @@ export class Agent implements BuiltAgent, AgentBuilder { } else { this.telemetryBuilder = undefined; this.telemetryConfig = t; + this.runtime?.setTelemetry(t); } return this; } diff --git a/packages/@n8n/agents/src/sdk/telemetry.ts b/packages/@n8n/agents/src/sdk/telemetry.ts index ea475121a1a..1435e42f1cb 100644 --- a/packages/@n8n/agents/src/sdk/telemetry.ts +++ b/packages/@n8n/agents/src/sdk/telemetry.ts @@ -309,4 +309,13 @@ export class Telemetry { await telemetry.provider.shutdown(); } } + + /** Best-effort provider flush. Telemetry export must not affect agent execution. */ + static async forceFlush(telemetry: BuiltTelemetry | undefined): Promise { + try { + await telemetry?.provider?.forceFlush(); + } catch { + // Telemetry flush is best-effort — never block the response or mask the real error. + } + } } diff --git a/packages/@n8n/instance-ai/docs/configuration.md b/packages/@n8n/instance-ai/docs/configuration.md index a6f76c8486e..e0de4aa0132 100644 --- a/packages/@n8n/instance-ai/docs/configuration.md +++ b/packages/@n8n/instance-ai/docs/configuration.md @@ -17,6 +17,15 @@ All Instance AI configuration is done via environment variables. | `N8N_INSTANCE_AI_BROWSER_MCP` | boolean | `false` | Enable Chrome DevTools MCP for browser-assisted credential setup | | `N8N_INSTANCE_AI_LOCAL_GATEWAY_DISABLED` | boolean | `false` | Disable the local gateway (filesystem, shell, browser) for all users | +### Tracing + +| Variable | Type | Default | Description | +|----------|------|---------|-------------| +| `N8N_DIAGNOSTICS_ENABLED` | boolean | `true` | When set to `false`, Instance AI tracing is disabled. | +| `LANGSMITH_API_KEY` / `LANGCHAIN_API_KEY` | string | unset | Enables direct LangSmith export for local and self-hosted setups. | +| `LANGSMITH_ENDPOINT` / `LANGCHAIN_ENDPOINT` | string | unset | Optional direct LangSmith endpoint override. | +| `LANGSMITH_TRACING` / `LANGCHAIN_TRACING_V2` | boolean | unset | LangSmith SDK tracing flags. `false` disables tracing; `true` enables direct tracing when direct LangSmith credentials or endpoints are configured. | + ### Memory | Variable | Type | Default | Description | diff --git a/packages/@n8n/instance-ai/src/agent/instance-agent.ts b/packages/@n8n/instance-ai/src/agent/instance-agent.ts index 0cfec4ee004..523f3917728 100644 --- a/packages/@n8n/instance-ai/src/agent/instance-agent.ts +++ b/packages/@n8n/instance-ai/src/agent/instance-agent.ts @@ -14,28 +14,12 @@ import { } from '../tool-registry'; import { createAllTools, createOrchestratorDomainTools, createOrchestrationTools } from '../tools'; import { createToolsFromLocalMcpServer } from '../tools/filesystem/create-tools-from-mcp-server'; +import { ALWAYS_LOADED_TOOL_NAMES, CHECKPOINT_FOLLOW_UP_TOOL_NAMES } from '../tools/tool-ids'; import { buildAgentTraceInputs, mergeTraceRunInputs } from '../tracing/langsmith-tracing'; import type { CreateInstanceAgentOptions, InstanceAiToolRegistry } from '../types'; // ── Agent factory ─────────────────────────────────────────────────────────── -const ALWAYS_LOADED_TOOLS = new Set([ - 'plan', - 'create-tasks', - 'delegate', - 'ask-user', - 'credentials', - 'workflows', - 'build-workflow-with-agent', - 'verify-built-workflow', - 'research', - 'evals', - 'web-search', - 'fetch-url', -]); - -const CHECKPOINT_FOLLOW_UP_TOOLS = new Set(['complete-checkpoint', 'executions']); - function splitDeferredTools( tools: InstanceAiToolRegistry, options: { isCheckpointFollowUp?: boolean } = {}, @@ -45,8 +29,8 @@ function splitDeferredTools( for (const [name, tool] of tools) { if ( - ALWAYS_LOADED_TOOLS.has(name) || - (options.isCheckpointFollowUp && CHECKPOINT_FOLLOW_UP_TOOLS.has(name)) + ALWAYS_LOADED_TOOL_NAMES.has(name) || + (options.isCheckpointFollowUp && CHECKPOINT_FOLLOW_UP_TOOL_NAMES.has(name)) ) { coreTools.set(name, tool); } else { diff --git a/packages/@n8n/instance-ai/src/tools/credentials.tool.ts b/packages/@n8n/instance-ai/src/tools/credentials.tool.ts index fe08e8ba522..97b2c8a3e08 100644 --- a/packages/@n8n/instance-ai/src/tools/credentials.tool.ts +++ b/packages/@n8n/instance-ai/src/tools/credentials.tool.ts @@ -8,10 +8,11 @@ import { z } from 'zod'; import { sanitizeInputSchema } from '../agent/sanitize-mcp-schemas'; import type { InstanceAiContext } from '../types'; +import { CREDENTIALS_TOOL_ID } from './tool-ids'; // ── Constants ────────────────────────────────────────────────────────────── -export const CREDENTIALS_TOOL_ID = 'credentials'; +export { CREDENTIALS_TOOL_ID }; const DEFAULT_LIMIT = 50; diff --git a/packages/@n8n/instance-ai/src/tools/data-tables.tool.ts b/packages/@n8n/instance-ai/src/tools/data-tables.tool.ts index 10cfd725866..959d4ca0362 100644 --- a/packages/@n8n/instance-ai/src/tools/data-tables.tool.ts +++ b/packages/@n8n/instance-ai/src/tools/data-tables.tool.ts @@ -9,10 +9,11 @@ import { z } from 'zod'; import { sanitizeInputSchema } from '../agent/sanitize-mcp-schemas'; import type { InstanceAiContext } from '../types'; +import { DATA_TABLES_TOOL_ID } from './tool-ids'; // ── Shared schemas ───────────────────────────────────────────────────────── -export const DATA_TABLES_TOOL_ID = 'data-tables'; +export { DATA_TABLES_TOOL_ID }; const columnTypeSchema = z.enum(['string', 'number', 'boolean', 'date']); diff --git a/packages/@n8n/instance-ai/src/tools/index.ts b/packages/@n8n/instance-ai/src/tools/index.ts index d6445f8d12c..92b84426d70 100644 --- a/packages/@n8n/instance-ai/src/tools/index.ts +++ b/packages/@n8n/instance-ai/src/tools/index.ts @@ -4,8 +4,8 @@ import { isParseableAttachment } from '../parsers/structured-file-parser'; import { createToolRegistry } from '../tool-registry'; import type { InstanceAiContext, InstanceAiToolRegistry, OrchestrationContext } from '../types'; import { createParseFileTool } from './attachments/parse-file.tool'; -import { createCredentialsTool, CREDENTIALS_TOOL_ID } from './credentials.tool'; -import { createDataTablesTool, DATA_TABLES_TOOL_ID } from './data-tables.tool'; +import { createCredentialsTool } from './credentials.tool'; +import { createDataTablesTool } from './data-tables.tool'; import { createEvalsTool } from './evals/evals.tool'; import { createExecutionsTool } from './executions.tool'; import { createNodesTool } from './nodes.tool'; @@ -18,8 +18,9 @@ import { createPlanTool } from './orchestration/plan.tool'; import { createReportVerificationVerdictTool } from './orchestration/report-verification-verdict.tool'; import { createVerifyBuiltWorkflowTool } from './orchestration/verify-built-workflow.tool'; import { createResearchTool } from './research.tool'; -import { ASK_USER_TOOL_ID, createAskUserTool } from './shared/ask-user.tool'; +import { createAskUserTool } from './shared/ask-user.tool'; import { createTaskControlTool } from './task-control.tool'; +import { DOMAIN_TOOL_IDS, ORCHESTRATION_TOOL_IDS } from './tool-ids'; import { createApplyWorkflowCredentialsTool } from './workflows/apply-workflow-credentials.tool'; import { createBuildWorkflowTool } from './workflows/build-workflow.tool'; import { createWorkflowsTool } from './workflows.tool'; @@ -31,20 +32,20 @@ import { createWorkspaceTool } from './workspace.tool'; */ export function createAllTools(context: InstanceAiContext): InstanceAiToolRegistry { const tools: Array<[string, BuiltTool]> = [ - ['workflows', createWorkflowsTool(context)], - ['evals', createEvalsTool(context)], - ['executions', createExecutionsTool(context)], - [CREDENTIALS_TOOL_ID, createCredentialsTool(context)], - [DATA_TABLES_TOOL_ID, createDataTablesTool(context)], - ['workspace', createWorkspaceTool(context)], - ['research', createResearchTool(context)], - ['nodes', createNodesTool(context)], - [ASK_USER_TOOL_ID, createAskUserTool()], - ['build-workflow', createBuildWorkflowTool(context)], + [DOMAIN_TOOL_IDS.WORKFLOWS, createWorkflowsTool(context)], + [DOMAIN_TOOL_IDS.EVALS, createEvalsTool(context)], + [DOMAIN_TOOL_IDS.EXECUTIONS, createExecutionsTool(context)], + [DOMAIN_TOOL_IDS.CREDENTIALS, createCredentialsTool(context)], + [DOMAIN_TOOL_IDS.DATA_TABLES, createDataTablesTool(context)], + [DOMAIN_TOOL_IDS.WORKSPACE, createWorkspaceTool(context)], + [DOMAIN_TOOL_IDS.RESEARCH, createResearchTool(context)], + [DOMAIN_TOOL_IDS.NODES, createNodesTool(context)], + [DOMAIN_TOOL_IDS.ASK_USER, createAskUserTool()], + [DOMAIN_TOOL_IDS.BUILD_WORKFLOW, createBuildWorkflowTool(context)], ]; if (context.currentUserAttachments?.some(isParseableAttachment)) { - tools.push(['parse-file', createParseFileTool(context)]); + tools.push([DOMAIN_TOOL_IDS.PARSE_FILE, createParseFileTool(context)]); } return createToolRegistry(tools); @@ -56,19 +57,19 @@ export function createAllTools(context: InstanceAiContext): InstanceAiToolRegist */ export function createOrchestratorDomainTools(context: InstanceAiContext): InstanceAiToolRegistry { const tools: Array<[string, BuiltTool]> = [ - ['workflows', createWorkflowsTool(context, 'orchestrator')], - ['evals', createEvalsTool(context)], - ['executions', createExecutionsTool(context)], - [CREDENTIALS_TOOL_ID, createCredentialsTool(context)], - [DATA_TABLES_TOOL_ID, createDataTablesTool(context, 'orchestrator')], - ['workspace', createWorkspaceTool(context)], - ['research', createResearchTool(context)], - ['nodes', createNodesTool(context, 'orchestrator')], - [ASK_USER_TOOL_ID, createAskUserTool()], + [DOMAIN_TOOL_IDS.WORKFLOWS, createWorkflowsTool(context, 'orchestrator')], + [DOMAIN_TOOL_IDS.EVALS, createEvalsTool(context)], + [DOMAIN_TOOL_IDS.EXECUTIONS, createExecutionsTool(context)], + [DOMAIN_TOOL_IDS.CREDENTIALS, createCredentialsTool(context)], + [DOMAIN_TOOL_IDS.DATA_TABLES, createDataTablesTool(context, 'orchestrator')], + [DOMAIN_TOOL_IDS.WORKSPACE, createWorkspaceTool(context)], + [DOMAIN_TOOL_IDS.RESEARCH, createResearchTool(context)], + [DOMAIN_TOOL_IDS.NODES, createNodesTool(context, 'orchestrator')], + [DOMAIN_TOOL_IDS.ASK_USER, createAskUserTool()], ]; if (context.currentUserAttachments?.some(isParseableAttachment)) { - tools.push(['parse-file', createParseFileTool(context)]); + tools.push([DOMAIN_TOOL_IDS.PARSE_FILE, createParseFileTool(context)]); } return createToolRegistry(tools); @@ -80,25 +81,37 @@ export function createOrchestratorDomainTools(context: InstanceAiContext): Insta */ export function createOrchestrationTools(context: OrchestrationContext): InstanceAiToolRegistry { const tools: Array<[string, BuiltTool]> = [ - ['plan', createPlanWithAgentTool(context)], - ['create-tasks', createPlanTool(context)], - ['task-control', createTaskControlTool(context)], - ['delegate', createDelegateTool(context)], - ['build-workflow-with-agent', createBuildWorkflowAgentTool(context)], - ['complete-checkpoint', createCompleteCheckpointTool(context)], + [ORCHESTRATION_TOOL_IDS.PLAN, createPlanWithAgentTool(context)], + [ORCHESTRATION_TOOL_IDS.CREATE_TASKS, createPlanTool(context)], + [ORCHESTRATION_TOOL_IDS.TASK_CONTROL, createTaskControlTool(context)], + [ORCHESTRATION_TOOL_IDS.DELEGATE, createDelegateTool(context)], + [ORCHESTRATION_TOOL_IDS.BUILD_WORKFLOW_WITH_AGENT, createBuildWorkflowAgentTool(context)], + [ORCHESTRATION_TOOL_IDS.COMPLETE_CHECKPOINT, createCompleteCheckpointTool(context)], ]; if (context.browserMcpConfig || hasGatewayBrowserTools(context)) { - tools.push(['browser-credential-setup', createBrowserCredentialSetupTool(context)]); + tools.push([ + ORCHESTRATION_TOOL_IDS.BROWSER_CREDENTIAL_SETUP, + createBrowserCredentialSetupTool(context), + ]); } if (context.workflowTaskService) { - tools.push(['report-verification-verdict', createReportVerificationVerdictTool(context)]); + tools.push([ + ORCHESTRATION_TOOL_IDS.REPORT_VERIFICATION_VERDICT, + createReportVerificationVerdictTool(context), + ]); } if (context.workflowTaskService && context.domainContext) { - tools.push(['verify-built-workflow', createVerifyBuiltWorkflowTool(context)]); - tools.push(['apply-workflow-credentials', createApplyWorkflowCredentialsTool(context)]); + tools.push([ + ORCHESTRATION_TOOL_IDS.VERIFY_BUILT_WORKFLOW, + createVerifyBuiltWorkflowTool(context), + ]); + tools.push([ + ORCHESTRATION_TOOL_IDS.APPLY_WORKFLOW_CREDENTIALS, + createApplyWorkflowCredentialsTool(context), + ]); } return createToolRegistry(tools); diff --git a/packages/@n8n/instance-ai/src/tools/orchestration/plan-with-agent.tool.ts b/packages/@n8n/instance-ai/src/tools/orchestration/plan-with-agent.tool.ts index e9325f34158..9086c3a7add 100644 --- a/packages/@n8n/instance-ai/src/tools/orchestration/plan-with-agent.tool.ts +++ b/packages/@n8n/instance-ai/src/tools/orchestration/plan-with-agent.tool.ts @@ -56,7 +56,7 @@ const PLANNER_DOMAIN_TOOL_NAMES = [ /** Research tools added when available. */ const PLANNER_RESEARCH_TOOL_NAMES = ['research']; -const RELEVANT_PRIOR_TOOL_NAMES = new Set([ +const RELEVANT_PRIOR_TOOL_NAMES = new Set([ ASK_USER_TOOL_ID, CREDENTIALS_TOOL_ID, DATA_TABLES_TOOL_ID, diff --git a/packages/@n8n/instance-ai/src/tools/shared/ask-user.tool.ts b/packages/@n8n/instance-ai/src/tools/shared/ask-user.tool.ts index 9c3e570a277..93a27be850c 100644 --- a/packages/@n8n/instance-ai/src/tools/shared/ask-user.tool.ts +++ b/packages/@n8n/instance-ai/src/tools/shared/ask-user.tool.ts @@ -2,7 +2,9 @@ import { Tool } from '@n8n/agents'; import { nanoid } from 'nanoid'; import { z } from 'zod'; -export const ASK_USER_TOOL_ID = 'ask-user'; +import { ASK_USER_TOOL_ID } from '../tool-ids'; + +export { ASK_USER_TOOL_ID }; const questionSchema = z.object({ id: z.string().describe('Unique question identifier'), diff --git a/packages/@n8n/instance-ai/src/tools/tool-ids.ts b/packages/@n8n/instance-ai/src/tools/tool-ids.ts new file mode 100644 index 00000000000..32de50744fe --- /dev/null +++ b/packages/@n8n/instance-ai/src/tools/tool-ids.ts @@ -0,0 +1,62 @@ +export const DOMAIN_TOOL_IDS = { + WORKFLOWS: 'workflows', + EVALS: 'evals', + EXECUTIONS: 'executions', + CREDENTIALS: 'credentials', + DATA_TABLES: 'data-tables', + WORKSPACE: 'workspace', + RESEARCH: 'research', + NODES: 'nodes', + ASK_USER: 'ask-user', + BUILD_WORKFLOW: 'build-workflow', + PARSE_FILE: 'parse-file', +} as const; + +export const ORCHESTRATION_TOOL_IDS = { + PLAN: 'plan', + SUBMIT_PLAN: 'submit-plan', + ADD_PLAN_ITEM: 'add-plan-item', + REMOVE_PLAN_ITEM: 'remove-plan-item', + CREATE_TASKS: 'create-tasks', + TASK_CONTROL: 'task-control', + DELEGATE: 'delegate', + BUILD_WORKFLOW_WITH_AGENT: 'build-workflow-with-agent', + MANAGE_DATA_TABLES_WITH_AGENT: 'manage-data-tables-with-agent', + RESEARCH_WITH_AGENT: 'research-with-agent', + BROWSER_CREDENTIAL_SETUP: 'browser-credential-setup', + COMPLETE_CHECKPOINT: 'complete-checkpoint', + VERIFY_BUILT_WORKFLOW: 'verify-built-workflow', + REPORT_VERIFICATION_VERDICT: 'report-verification-verdict', + APPLY_WORKFLOW_CREDENTIALS: 'apply-workflow-credentials', +} as const; + +export const WORKSPACE_TOOL_IDS = { + WRITE_FILE: 'write-file', + SUBMIT_WORKFLOW: 'submit-workflow', +} as const; + +export const CREDENTIALS_TOOL_ID = DOMAIN_TOOL_IDS.CREDENTIALS; +export const DATA_TABLES_TOOL_ID = DOMAIN_TOOL_IDS.DATA_TABLES; +export const ASK_USER_TOOL_ID = DOMAIN_TOOL_IDS.ASK_USER; + +export const ORCHESTRATION_TOOL_NAMES = new Set(Object.values(ORCHESTRATION_TOOL_IDS)); + +export const ALWAYS_LOADED_TOOL_NAMES = new Set([ + ORCHESTRATION_TOOL_IDS.PLAN, + ORCHESTRATION_TOOL_IDS.CREATE_TASKS, + ORCHESTRATION_TOOL_IDS.DELEGATE, + DOMAIN_TOOL_IDS.ASK_USER, + DOMAIN_TOOL_IDS.CREDENTIALS, + DOMAIN_TOOL_IDS.WORKFLOWS, + ORCHESTRATION_TOOL_IDS.BUILD_WORKFLOW_WITH_AGENT, + ORCHESTRATION_TOOL_IDS.VERIFY_BUILT_WORKFLOW, + DOMAIN_TOOL_IDS.RESEARCH, + DOMAIN_TOOL_IDS.EVALS, + 'web-search', + 'fetch-url', +]); + +export const CHECKPOINT_FOLLOW_UP_TOOL_NAMES = new Set([ + ORCHESTRATION_TOOL_IDS.COMPLETE_CHECKPOINT, + DOMAIN_TOOL_IDS.EXECUTIONS, +]); diff --git a/packages/@n8n/instance-ai/src/tracing/__tests__/langsmith-tracing.test.ts b/packages/@n8n/instance-ai/src/tracing/__tests__/langsmith-tracing.test.ts index ec4b29ed3f1..21a991e61cf 100644 --- a/packages/@n8n/instance-ai/src/tracing/__tests__/langsmith-tracing.test.ts +++ b/packages/@n8n/instance-ai/src/tracing/__tests__/langsmith-tracing.test.ts @@ -1,171 +1,204 @@ -import { createToolRegistry } from '../../tool-registry'; -import type { InstanceAiTraceRun } from '../../types'; +import type { BuiltTool } from '@n8n/agents'; +import type { Context, ContextManager } from '@opentelemetry/api'; +import { jsonParse } from 'n8n-workflow'; +import type * as AsyncHooks from 'node:async_hooks'; -jest.mock('langsmith', () => { - let runCounter = 0; - const createdRunTrees: Array<{ +import { executeTool } from '../../__tests__/tool-test-utils'; +import { createToolRegistry } from '../../tool-registry'; +import { TraceWriter, type TraceToolCall, type TraceToolSuspend } from '../trace-replay'; + +jest.mock('@n8n/agents', () => { + const actual = jest.requireActual>('@n8n/agents'); + const { AsyncLocalStorage } = jest.requireActual('node:async_hooks'); + const { ROOT_CONTEXT, context, trace } = jest.requireActual<{ + ROOT_CONTEXT: Context; + context: { + active(): Context; + with( + ctx: Context, + fn: (...args: unknown[]) => T, + thisArg?: unknown, + ...args: unknown[] + ): T; + setGlobalContextManager(contextManager: ContextManager): boolean; + }; + trace: { + getSpan(ctx: Context): unknown; + setSpan(ctx: Context, span: unknown): Context; + }; + }>('@opentelemetry/api'); + + let spanCounter = 0; + let buildError: Error | undefined; + let startSpanError: Error | undefined; + const contextStorage = new AsyncLocalStorage(); + const contextManager: ContextManager = { + active: () => contextStorage.getStore() ?? ROOT_CONTEXT, + with: (ctx, fn, thisArg, ...args) => contextStorage.run(ctx, () => fn.call(thisArg, ...args)), + bind: (_ctx, target) => target, + enable: () => contextManager, + disable: () => contextManager, + }; + context.setGlobalContextManager(contextManager); + + const spans: Array<{ id: string; - dotted_order: string; + traceId: string; + parentSpanId?: string; name: string; - run_type: string; - parent_run_id?: string; - client?: unknown; + attributes: Record; + status?: { code: number; message?: string }; + ended: boolean; }> = []; - class MockRunTree { - id: string; - name: string; - run_type: string; - project_name: string; - parent_run?: MockRunTree; - parent_run_id?: string; - child_runs: MockRunTree[]; - start_time: number; - end_time?: number; - extra: { metadata?: Record }; - tags?: string[]; - error?: string; - serialized: Record; - inputs: Record; - outputs?: Record; - events?: Array>; - trace_id: string; - dotted_order: string; - execution_order: number; - child_execution_order: number; - client?: unknown; + function nextHex(length: number): string { + spanCounter += 1; + return spanCounter.toString(16).padStart(length, '0').slice(-length); + } - constructor(config: { - id?: string; - name: string; - run_type?: string; - project_name?: string; - parent_run?: MockRunTree; - parent_run_id?: string; - start_time?: number; - end_time?: number; - metadata?: Record; - tags?: string[]; - error?: string; - inputs?: Record; - outputs?: Record; - execution_order?: number; - child_execution_order?: number; - trace_id?: string; - dotted_order?: string; - serialized?: Record; - client?: unknown; - }) { - runCounter += 1; - this.id = config.id ?? `run-${runCounter}`; - this.name = config.name; - this.run_type = config.run_type ?? 'chain'; - this.project_name = config.project_name ?? 'instance-ai'; - this.parent_run = config.parent_run; - this.parent_run_id = config.parent_run_id; - this.child_runs = []; - this.start_time = config.start_time ?? runCounter; - this.end_time = config.end_time; - this.extra = config.metadata ? { metadata: { ...config.metadata } } : {}; - this.tags = config.tags; - this.error = config.error; - this.serialized = config.serialized ?? {}; - this.inputs = config.inputs ?? {}; - this.outputs = config.outputs; - this.events = []; - this.execution_order = config.execution_order ?? 1; - this.child_execution_order = config.child_execution_order ?? this.execution_order; - this.trace_id = config.trace_id ?? this.parent_run?.trace_id ?? this.id; - this.dotted_order = - config.dotted_order ?? - (this.parent_run ? `${this.parent_run.dotted_order}.${this.id}` : this.id); - this.client = config.client; + class MockSpan { + private readonly traceId: string; + private readonly spanId: string; + private readonly record: (typeof spans)[number]; - createdRunTrees.push({ - id: this.id, - dotted_order: this.dotted_order, - name: this.name, - run_type: this.run_type, - ...(this.parent_run_id ? { parent_run_id: this.parent_run_id } : {}), - ...(this.client ? { client: this.client } : {}), - }); - } - - get metadata(): Record | undefined { - return this.extra.metadata; - } - - set metadata(metadata: Record | undefined) { - this.extra = metadata ? { ...this.extra, metadata: { ...metadata } } : this.extra; - } - - createChild(config: { - name: string; - run_type?: string; - tags?: string[]; - metadata?: Record; - inputs?: Record; - }): MockRunTree { - const childExecutionOrder = this.child_execution_order + 1; - const child = new MockRunTree({ - ...config, - parent_run: this, - parent_run_id: this.id, - project_name: this.project_name, - execution_order: childExecutionOrder, - child_execution_order: childExecutionOrder, - }); - - this.child_execution_order = Math.max(this.child_execution_order, childExecutionOrder); - this.child_runs.push(child); - - return child; - } - - async postRun(): Promise { - await Promise.resolve(); - } - - async end( - outputs?: Record, - error?: string, - endTime = Date.now(), - metadata?: Record, - ): Promise { - this.outputs = outputs ?? this.outputs; - this.error = error ?? this.error; - this.end_time = endTime; - if (metadata) { - this.metadata = { - ...(this.metadata ?? {}), - ...metadata, - }; - } - await Promise.resolve(); - } - - async patchRun(): Promise { - if (this.parent_run_id === undefined && this.dotted_order.includes('.')) { - await Promise.resolve(); - throw new Error( - 'invalid dotted_order: dotted_order must contain a single part for root runs', - ); - } - await Promise.resolve(); - } - - addEvent(event: Record | string): void { - this.events?.push(typeof event === 'string' ? { message: event } : event); - } - - toHeaders(): { 'langsmith-trace': string; baggage: string } { - return { - 'langsmith-trace': this.dotted_order, - baggage: '', + constructor(name: string, attributes: Record, parentSpan?: MockSpan) { + this.traceId = parentSpan?.spanContext().traceId ?? nextHex(32); + this.spanId = nextHex(16); + this.record = { + id: this.spanId, + traceId: this.traceId, + ...(parentSpan ? { parentSpanId: parentSpan.spanContext().spanId } : {}), + name, + attributes: { ...attributes }, + ended: false, }; + spans.push(this.record); + } + + spanContext(): { traceId: string; spanId: string } { + return { traceId: this.traceId, spanId: this.spanId }; + } + + setAttributes(attributes: Record): void { + Object.assign(this.record.attributes, attributes); + } + + recordException(): void {} + + setStatus(status: { code: number; message?: string }): void { + this.record.status = status; + } + + end(): void { + this.record.ended = true; } } + const tracer = { + startSpan: ( + name: string, + options?: { attributes?: Record }, + parentContext?: Context, + ) => { + if (startSpanError) { + throw startSpanError; + } + const parentSpan = trace.getSpan(parentContext ?? context.active()) as MockSpan | undefined; + return new MockSpan(name, options?.attributes ?? {}, parentSpan); + }, + startActiveSpan: async ( + name: string, + options: { attributes?: Record }, + fn: (span: MockSpan) => Promise, + ): Promise => { + const span = tracer.startSpan(name, options); + const spanContext = trace.setSpan(context.active(), span as never); + return await context.with(spanContext, async () => await fn(span)); + }, + }; + + const provider = { + forceFlush: jest.fn(async () => await Promise.resolve()), + shutdown: jest.fn(async () => await Promise.resolve()), + }; + + class MockLangSmithTelemetry { + private functionIdValue?: string; + private metadataValue?: Record; + private recordInputsValue = true; + private recordOutputsValue = true; + private runtimeRootSpanEnabledValue = true; + + functionId(value: string): this { + this.functionIdValue = value; + return this; + } + + metadata(value: Record): this { + this.metadataValue = value; + return this; + } + + recordInputs(value: boolean): this { + this.recordInputsValue = value; + return this; + } + + recordOutputs(value: boolean): this { + this.recordOutputsValue = value; + return this; + } + + runtimeRootSpan(value: boolean): this { + this.runtimeRootSpanEnabledValue = value; + return this; + } + + async build(): Promise> { + if (buildError) { + throw buildError; + } + return await Promise.resolve({ + enabled: true, + functionId: this.functionIdValue, + metadata: this.metadataValue, + recordInputs: this.recordInputsValue, + recordOutputs: this.recordOutputsValue, + runtimeRootSpanEnabled: this.runtimeRootSpanEnabledValue, + integrations: [], + tracer, + provider, + }); + } + } + + return { + ...actual, + LangSmithTelemetry: MockLangSmithTelemetry, + __mock: { + reset: () => { + spanCounter = 0; + buildError = undefined; + startSpanError = undefined; + spans.length = 0; + provider.forceFlush.mockReset(); + provider.forceFlush.mockImplementation(async () => await Promise.resolve()); + provider.shutdown.mockReset(); + provider.shutdown.mockImplementation(async () => await Promise.resolve()); + }, + setBuildError: (error: Error | undefined) => { + buildError = error; + }, + setStartSpanError: (error: Error | undefined) => { + startSpanError = error; + }, + getSpans: () => spans, + getProvider: () => provider, + }, + }; +}); + +jest.mock('langsmith', () => { const createFeedbackCalls: Array<{ runId: string; key: string; @@ -173,6 +206,12 @@ jest.mock('langsmith', () => { clientApiUrl: string; }> = []; + class ForbiddenRunTree { + constructor() { + throw new Error('Instance AI tracing must use OTel spans, not LangSmith RunTree'); + } + } + class MockClient { apiUrl: string; apiKey: string; @@ -194,33 +233,26 @@ jest.mock('langsmith', () => { return { Client: MockClient, - RunTree: MockRunTree, + RunTree: ForbiddenRunTree, __mock: { reset: () => { - runCounter = 0; - createdRunTrees.length = 0; createFeedbackCalls.length = 0; }, - getCreatedRunTrees: () => createdRunTrees, getCreateFeedbackCalls: () => createFeedbackCalls, }, }; }); jest.mock('langsmith/traceable', () => { - let currentRunTree: unknown; - return { - traceable: unknown>(fn: T) => fn, - getCurrentRunTree: () => currentRunTree, - withRunTree: async (runTree: unknown, fn: () => Promise): Promise => { - const previous = currentRunTree; - currentRunTree = runTree; - try { - return await fn(); - } finally { - currentRunTree = previous; - } + traceable: () => { + throw new Error('Instance AI tracing must use OTel spans, not langsmith/traceable'); + }, + getCurrentRunTree: () => { + throw new Error('Instance AI tracing must use OTel spans, not langsmith/traceable'); + }, + withRunTree: () => { + throw new Error('Instance AI tracing must use OTel spans, not langsmith/traceable'); }, }; }); @@ -228,14 +260,6 @@ jest.mock('langsmith/traceable', () => { type LangSmithMockModule = { __mock: { reset: () => void; - getCreatedRunTrees: () => Array<{ - id: string; - dotted_order: string; - name: string; - run_type: string; - parent_run_id?: string; - client?: unknown; - }>; getCreateFeedbackCalls: () => Array<{ runId: string; key: string; @@ -245,11 +269,30 @@ type LangSmithMockModule = { }; }; -interface ExecutableTool { - handler: (input: unknown, context: unknown) => Promise; -} +type AgentsMockModule = { + __mock: { + reset: () => void; + setBuildError: (error: Error | undefined) => void; + setStartSpanError: (error: Error | undefined) => void; + getSpans: () => Array<{ + id: string; + traceId: string; + parentSpanId?: string; + name: string; + attributes: Record; + status?: { code: number; message?: string }; + ended: boolean; + }>; + getProvider: () => { + forceFlush: jest.Mock, []>; + shutdown: jest.Mock, []>; + }; + }; +}; -function isExecutableTool(value: unknown): value is ExecutableTool { +function isExecutableTool( + value: unknown, +): value is { handler: (input: unknown, context: unknown) => Promise } { return ( typeof value === 'object' && value !== null && @@ -259,36 +302,64 @@ function isExecutableTool(value: unknown): value is ExecutableTool { } const { - appendRootRunMetadata, - appendGeneratedWorkflowIdToRootMetadata, buildAgentTraceInputs, createDetachedSubAgentTraceContext, createInstanceAiTraceContext, + createInternalOperationTraceContext, + createTraceReplayOnlyContext, continueInstanceAiTraceContext, - mergeCurrentTraceMetadata, mergeTraceRunInputs, + redactLangSmithTelemetrySpan, + releaseTraceClient, submitLangsmithUserFeedback, withCurrentTraceSpan, } = // eslint-disable-next-line @typescript-eslint/no-require-imports, @typescript-eslint/consistent-type-imports require('../langsmith-tracing') as typeof import('../langsmith-tracing'); + +async function startForegroundActor( + tracing: NonNullable>>, +) { + const actorRun = await tracing.startChildRun(tracing.rootRun, { + name: 'agent: orchestrator', + canonicalName: 'instance-ai.agent.orchestrator', + tags: ['orchestrator'], + metadata: { + agent_role: 'orchestrator', + execution_mode: 'foreground', + trace_kind: tracing.traceKind, + }, + inputs: { message: 'test' }, + }); + tracing.actorRun = actorRun; + tracing.orchestratorRun = actorRun; + return actorRun; +} const { createAskUserTool } = // eslint-disable-next-line @typescript-eslint/no-require-imports, @typescript-eslint/consistent-type-imports require('../../tools/shared/ask-user.tool') as typeof import('../../tools/shared/ask-user.tool'); const { __mock: langsmithMock } = // eslint-disable-next-line @typescript-eslint/no-require-imports require('langsmith') as LangSmithMockModule; +const { __mock: agentsMock } = + // eslint-disable-next-line @typescript-eslint/no-require-imports + require('@n8n/agents') as AgentsMockModule; describe('createInstanceAiTraceContext', () => { const originalLangSmithApiKey = process.env.LANGSMITH_API_KEY; const originalLangSmithTracing = process.env.LANGSMITH_TRACING; const originalLangChainTracingV2 = process.env.LANGCHAIN_TRACING_V2; + const originalTraceInternal = process.env.N8N_INSTANCE_AI_TRACE_INTERNAL; + const originalDiagnosticsEnabled = process.env.N8N_DIAGNOSTICS_ENABLED; beforeEach(() => { langsmithMock.reset(); + agentsMock.reset(); process.env.LANGSMITH_API_KEY = 'test-key'; delete process.env.LANGSMITH_TRACING; delete process.env.LANGCHAIN_TRACING_V2; + delete process.env.N8N_INSTANCE_AI_TRACE_INTERNAL; + delete process.env.N8N_DIAGNOSTICS_ENABLED; }); afterAll(() => { @@ -303,11 +374,22 @@ describe('createInstanceAiTraceContext', () => { } else { process.env.LANGCHAIN_TRACING_V2 = originalLangChainTracingV2; } + if (originalTraceInternal === undefined) { + delete process.env.N8N_INSTANCE_AI_TRACE_INTERNAL; + } else { + process.env.N8N_INSTANCE_AI_TRACE_INTERNAL = originalTraceInternal; + } + if (originalDiagnosticsEnabled === undefined) { + delete process.env.N8N_DIAGNOSTICS_ENABLED; + } else { + process.env.N8N_DIAGNOSTICS_ENABLED = originalDiagnosticsEnabled; + } }); - it('persists the parent run id for child runs created from a parent run tree', async () => { + it('starts foreground agent spans under the message root when activated', async () => { const tracing = await createInstanceAiTraceContext({ threadId: 'thread-1', + messageGroupId: 'group-1', messageId: 'message-1', runId: 'run-1', userId: 'user-1', @@ -315,10 +397,541 @@ describe('createInstanceAiTraceContext', () => { }); expect(tracing).toBeDefined(); + expect(tracing?.rootRun.name).toBe('turn'); + expect(tracing?.rootRun.metadata).toEqual( + expect.objectContaining({ + display_name: 'turn', + display_kind: 'turn', + display_group: 'message-turn', + 'instance_ai.canonical_name': 'instance-ai.message_turn', + }), + ); + expect(typeof tracing?.rootRun.metadata?.agents_version).toBe('string'); + expect(typeof tracing?.rootRun.metadata?.workflow_sdk_version).toBe('string'); + await startForegroundActor(tracing!); expect(tracing?.orchestratorRun.parentRunId).toBe(tracing?.messageRun.id); }); - it('rehydrates child runs with their parent linkage before patching', async () => { + it('creates native telemetry with thread and run metadata', async () => { + const tracing = await createInstanceAiTraceContext({ + threadId: 'thread-1', + conversationId: 'conversation-1', + messageId: 'message-1', + messageGroupId: 'group-1', + runId: 'run-1', + userId: 'user-1', + input: { message: 'What workflows do I have?' }, + metadata: { n8n_version: '2.19.0' }, + }); + + expect(tracing?.getTelemetry).toBeDefined(); + + const telemetryOrBuilder = tracing!.getTelemetry!({ + agentRole: 'orchestrator', + functionId: 'instance-ai.orchestrator', + executionMode: 'foreground', + metadata: { custom_flag: true }, + }); + const telemetry = + 'build' in telemetryOrBuilder ? await telemetryOrBuilder.build() : telemetryOrBuilder; + + expect(telemetry.functionId).toBe('instance-ai.orchestrator'); + expect(telemetry.recordInputs).toBe(true); + expect(telemetry.recordOutputs).toBe(true); + expect(telemetry.runtimeRootSpanEnabled).toBe(false); + expect(telemetry.metadata).toEqual( + expect.objectContaining({ + thread_id: 'thread-1', + conversation_id: 'conversation-1', + message_group_id: 'group-1', + message_id: 'message-1', + run_id: 'run-1', + user_id: 'user-1', + agent_role: 'orchestrator', + n8n_version: '2.19.0', + execution_mode: 'foreground', + trace_kind: 'message_turn', + langsmith_trace_id: tracing?.rootRun.traceId, + langsmith_root_run_id: tracing?.rootRun.id, + langsmith_actor_run_id: tracing?.actorRun.id, + custom_flag: true, + }), + ); + expect(typeof telemetry.metadata?.agents_version).toBe('string'); + expect(typeof telemetry.metadata?.workflow_sdk_version).toBe('string'); + + await telemetry.provider?.shutdown(); + }); + + it('does not mutate LangSmith tracing environment flags while creating a context', async () => { + delete process.env.LANGSMITH_TRACING; + delete process.env.LANGCHAIN_TRACING_V2; + + await createInstanceAiTraceContext({ + threadId: 'thread-env', + messageId: 'message-env', + runId: 'run-env', + userId: 'user-env', + input: { message: 'hello' }, + }); + + expect(process.env.LANGSMITH_TRACING).toBeUndefined(); + expect(process.env.LANGCHAIN_TRACING_V2).toBeUndefined(); + }); + + it('uses the current foreground actor run in native telemetry metadata', async () => { + const tracing = await createInstanceAiTraceContext({ + threadId: 'thread-1', + conversationId: 'conversation-1', + messageId: 'message-1', + messageGroupId: 'group-1', + runId: 'run-1', + userId: 'user-1', + input: { message: 'What workflows do I have?' }, + }); + const actorRun = await startForegroundActor(tracing!); + + const telemetryOrBuilder = tracing!.getTelemetry!({ + agentRole: 'orchestrator', + functionId: 'instance-ai.orchestrator', + }); + const telemetry = + 'build' in telemetryOrBuilder ? await telemetryOrBuilder.build() : telemetryOrBuilder; + + expect(telemetry.metadata).toEqual( + expect.objectContaining({ + langsmith_root_run_id: tracing?.rootRun.id, + langsmith_actor_run_id: actorRun.id, + }), + ); + + await telemetry.provider?.shutdown(); + }); + + it('redacts secret-bearing native telemetry span attributes', () => { + const span = { + attributes: { + 'ai.operationId': 'ai.streamText.doStream', + 'ai.prompt.messages': JSON.stringify([ + { + role: 'user', + content: [{ type: 'text', text: 'use access_token=secret-token' }], + credentials: { apiKey: 'secret-key', id: 'cred-1' }, + }, + ]), + 'ai.response.text': 'Authorization: Bearer abcdefghijklmnopqrstuvwxyz', + 'ai.telemetry.metadata.thread_id': 'thread-1', + 'ai.usage.inputTokens': 123, + 'ai.usage.outputTokens': 45, + 'ai.usage.cachedInputTokens': 67, + 'ai.usage.inputTokenDetails.cacheReadTokens': 67, + 'gen_ai.usage.input_tokens': 123, + 'gen_ai.usage.input_token_details': JSON.stringify({ cache_read: 67 }), + 'headers.authorization': 'Bearer secret', + 'metadata.access_token': 'secret-token', + 'langsmith.span.parent_id': 'parent-run-1', + 'langsmith.is_root': true, + }, + }; + + const redacted = redactLangSmithTelemetrySpan(span) as { + attributes: Record; + }; + + expect(redacted.attributes).toEqual( + expect.objectContaining({ + 'ai.prompt.messages': JSON.stringify([ + { + role: 'user', + content: [{ type: 'text', text: 'use [REDACTED]' }], + credentials: '[redacted]', + }, + ]), + 'ai.response.text': '[REDACTED]', + 'ai.telemetry.metadata.thread_id': 'thread-1', + 'ai.usage.inputTokens': 123, + 'ai.usage.outputTokens': 45, + 'ai.usage.cachedInputTokens': 67, + 'ai.usage.inputTokenDetails.cacheReadTokens': 67, + 'gen_ai.usage.input_tokens': 123, + 'gen_ai.usage.output_tokens': 45, + 'gen_ai.usage.total_tokens': 168, + 'gen_ai.usage.input_token_details': JSON.stringify({ + cache_read: 67, + cache_creation: 0, + regular: 56, + original_input_tokens: 123, + }), + 'langsmith.usage_metadata': JSON.stringify({ + input_tokens: 123, + output_tokens: 45, + total_tokens: 168, + input_token_details: { cache_read: 67 }, + }), + 'headers.authorization': '[redacted]', + 'metadata.access_token': '[redacted]', + 'langsmith.span.parent_id': 'parent-run-1', + 'langsmith.span.kind': 'llm', + 'langsmith.is_root': true, + 'langsmith.metadata.original_input_tokens': 123, + 'langsmith.metadata.total_input_tokens': 123, + 'langsmith.metadata.regular_input_tokens': 56, + 'langsmith.metadata.cache_read_input_tokens': 67, + 'langsmith.metadata.cache_creation_input_tokens': 0, + 'langsmith.metadata.anthropic_original_input_tokens': 123, + 'langsmith.metadata.anthropic_total_input_tokens': 123, + 'langsmith.metadata.anthropic_regular_input_tokens': 56, + 'langsmith.metadata.anthropic_cache_read_input_tokens': 67, + 'langsmith.metadata.anthropic_cache_creation_input_tokens': 0, + 'ai_sdk.operation': 'ai.streamText.doStream', + }), + ); + expect(redacted.attributes['ai.operationId']).toBeUndefined(); + expect(redacted.attributes['instance_ai.usage.ai.usage.inputTokens']).toBeUndefined(); + }); + + it('redacts common token formats inside telemetry strings', () => { + const span = { + attributes: { + 'ai.operationId': 'ai.streamText.doStream', + 'ai.response.text': + 'openai=sk-proj-1234567890abcdefghijklmnopqrst slack=xoxb-1234567890-abcdef github=ghp_1234567890abcdefghijklmnop', + 'ai.prompt.messages': JSON.stringify([ + { + role: 'user', + content: 'use api_key=secret123 and Authorization: Bearer abcdefghijklmnopqrstuvwxyz', + }, + ]), + }, + }; + + const redacted = redactLangSmithTelemetrySpan(span) as { + attributes: Record; + }; + const serialized = JSON.stringify(redacted.attributes); + + expect(serialized).not.toContain('sk-proj-1234567890abcdefghijklmnopqrst'); + expect(serialized).not.toContain('xoxb-1234567890-abcdef'); + expect(serialized).not.toContain('ghp_1234567890abcdefghijklmnop'); + expect(serialized).not.toContain('secret123'); + expect(serialized).not.toContain('abcdefghijklmnopqrstuvwxyz'); + expect(serialized).toContain('[REDACTED]'); + }); + + it('uses cache-only Anthropic input tokens for LangSmith prompt totals', () => { + const span = { + attributes: { + 'ai.operationId': 'ai.streamText.doStream', + 'ai.usage.inputTokens': 0, + 'ai.usage.outputTokens': 12, + 'ai.usage.cachedInputTokens': 100, + }, + }; + + const redacted = redactLangSmithTelemetrySpan(span) as { + attributes: Record; + }; + + expect(redacted.attributes).toEqual( + expect.objectContaining({ + 'ai.usage.inputTokens': 100, + 'gen_ai.usage.input_tokens': 100, + 'gen_ai.usage.output_tokens': 12, + 'gen_ai.usage.total_tokens': 112, + 'langsmith.usage_metadata': JSON.stringify({ + input_tokens: 100, + output_tokens: 12, + total_tokens: 112, + input_token_details: { cache_read: 100 }, + }), + 'langsmith.metadata.anthropic_total_input_tokens': 100, + 'langsmith.metadata.anthropic_regular_input_tokens': 0, + 'langsmith.metadata.anthropic_cache_read_input_tokens': 100, + 'langsmith.metadata.original_input_tokens': 0, + 'langsmith.metadata.total_input_tokens': 100, + 'langsmith.metadata.regular_input_tokens': 0, + 'langsmith.metadata.cache_read_input_tokens': 100, + 'langsmith.metadata.cache_creation_input_tokens': 0, + }), + ); + }); + + it('keeps useful nested tool output fields while redacting secrets', () => { + const span = { + attributes: { + 'langsmith.span.kind': 'tool', + 'ai.toolCall.result': JSON.stringify({ + results: [ + { + suggestedNodes: [ + { + name: 'n8n-nodes-base.slack', + displayName: 'Slack', + credentials: { apiKey: 'secret-key' }, + defaults: { + options: { + nested: { + value: 'kept', + }, + }, + }, + }, + ], + }, + ], + }), + }, + }; + + const redacted = redactLangSmithTelemetrySpan(span) as { + attributes: Record; + }; + const result = jsonParse>( + redacted.attributes['ai.toolCall.result'] as string, + ); + const serializedResult = JSON.stringify(result); + + expect(serializedResult).toContain('n8n-nodes-base.slack'); + expect(serializedResult).toContain('kept'); + expect(serializedResult).not.toContain('secret-key'); + expect(serializedResult).not.toContain('[redacted-depth-limit]'); + }); + + it('adds action to native tool span display names', () => { + const span = { + name: 'ai.toolCall', + attributes: { + 'ai.operationId': 'ai.toolCall', + 'langsmith.span.kind': 'tool', + 'ai.toolCall.name': 'nodes', + 'ai.toolCall.id': 'toolu-nodes', + 'ai.toolCall.args': JSON.stringify({ action: 'search', query: 'Slack' }), + }, + }; + + const redacted = redactLangSmithTelemetrySpan(span) as { + name: string; + attributes: Record; + }; + + expect(redacted.name).toBe('nodes[search]'); + expect(redacted.attributes['langsmith.trace.name']).toBe('nodes[search]'); + expect(redacted.attributes['langsmith.span.kind']).toBe('tool'); + expect(redacted.attributes['ai.toolCall.name']).toBe('nodes'); + expect(redacted.attributes['ai.toolCall.action']).toBe('search'); + expect(redacted.attributes['ai.toolCall.display_name']).toBe('nodes[search]'); + expect(redacted.attributes['langsmith.metadata.display_action']).toBe('search'); + expect(redacted.attributes['langsmith.metadata.ls_run_name']).toBe('nodes[search]'); + expect(redacted.attributes['instance_ai.canonical_name']).toBe('nodes'); + }); + + it('moves counted usage attributes off non-LLM spans', () => { + const span = { + attributes: { + 'langsmith.span.kind': 'chain', + 'gen_ai.usage.input_tokens': 100, + 'ai.usage.outputTokens': 5, + 'langsmith.usage_metadata': JSON.stringify({ + input_tokens: 100, + output_tokens: 5, + total_tokens: 105, + }), + }, + }; + + const redacted = redactLangSmithTelemetrySpan(span) as { + attributes: Record; + }; + + expect(redacted.attributes['gen_ai.usage.input_tokens']).toBeUndefined(); + expect(redacted.attributes['ai.usage.outputTokens']).toBeUndefined(); + expect(redacted.attributes['langsmith.usage_metadata']).toBeUndefined(); + expect(redacted.attributes['instance_ai.usage.gen_ai.usage.input_tokens']).toBe(100); + expect(redacted.attributes['instance_ai.usage.ai.usage.outputTokens']).toBe(5); + expect(redacted.attributes['instance_ai.usage.langsmith.usage_metadata']).toBe( + JSON.stringify({ input_tokens: 100, output_tokens: 5, total_tokens: 105 }), + ); + }); + + it('adds LangSmith prompt input with tool specs for native AI SDK spans', () => { + const span = { + attributes: { + 'ai.prompt.messages': JSON.stringify([{ role: 'user', content: 'hello' }]), + 'ai.prompt.tools': [ + JSON.stringify({ + type: 'function', + name: 'lookup', + description: 'Lookup records', + input_schema: { + type: 'object', + properties: { + query: { type: 'string' }, + }, + }, + }), + ], + 'ai.prompt.toolChoice': JSON.stringify({ type: 'auto' }), + }, + }; + + const redacted = redactLangSmithTelemetrySpan(span) as { + attributes: Record; + }; + const prompt = jsonParse>( + redacted.attributes['gen_ai.prompt'] as string, + ); + + expect(prompt).toEqual({ + input: [{ role: 'user', content: 'hello' }], + tools: [ + { + type: 'function', + name: 'lookup', + description: 'Lookup records', + input_schema: { + type: 'object', + properties: { + query: { type: 'string' }, + }, + }, + }, + ], + tool_choice: { type: 'auto' }, + }); + expect(redacted.attributes['llm.available_tool_names']).toEqual(['lookup']); + expect(redacted.attributes['llm.available_tool_count']).toBe(1); + expect(redacted.attributes['llm.tool_schema_hash']).toEqual(expect.any(String)); + expect(redacted.attributes['llm.tool_manifest_ref']).toBe( + redacted.attributes['llm.tool_schema_hash'], + ); + expect(redacted.attributes['llm.available_tools']).toBeUndefined(); + expect(jsonParse(redacted.attributes['tools'] as string)).toEqual(prompt.tools); + expect(jsonParse(redacted.attributes['invocation_params.tools'] as string)).toEqual( + prompt.tools, + ); + expect(jsonParse(redacted.attributes['invocation_params.tool_choice'] as string)).toEqual({ + type: 'auto', + }); + }); + + it('renames native LLM spans for LangSmith display while keeping SDK operation metadata', () => { + const span = { + name: 'ai.streamText.doStream', + attributes: { + 'ai.operationId': 'ai.streamText.doStream', + 'ai.telemetry.metadata.agent_role': 'workflow-builder', + }, + }; + + const redacted = redactLangSmithTelemetrySpan(span) as { + name: string; + attributes: Record; + }; + + expect(redacted.name).toBe('llm: workflow-builder'); + expect(redacted.attributes['langsmith.trace.name']).toBe('llm: workflow-builder'); + expect(redacted.attributes['langsmith.span.kind']).toBe('llm'); + expect(redacted.attributes['gen_ai.operation.name']).toBe('chat'); + expect(redacted.attributes['ai_sdk.operation']).toBe('ai.streamText.doStream'); + expect(redacted.attributes['ai.operationId']).toBeUndefined(); + expect(redacted.attributes['instance_ai.canonical_name']).toBe('ai.streamText.doStream'); + expect(redacted.attributes.display_kind).toBe('llm'); + expect(redacted.attributes.display_group).toBe('workflow-builder'); + }); + + it('normalizes AI SDK tool messages for LangSmith chat rendering', () => { + const span = { + attributes: { + 'ai.prompt.messages': JSON.stringify([ + { + role: 'user', + content: [{ type: 'text', text: 'find my Slack credential' }], + }, + { + role: 'assistant', + content: [ + { type: 'text', text: 'Checking credentials.' }, + { + type: 'tool-call', + toolCallId: 'toolu-1', + toolName: 'credentials', + input: { action: 'list', name: 'Slack account' }, + providerOptions: { anthropic: { cacheControl: { type: 'ephemeral' } } }, + }, + ], + }, + { + role: 'tool', + content: [ + { + type: 'tool-result', + toolCallId: 'toolu-1', + toolName: 'credentials', + output: { + ok: true, + items: [{ name: 'Slack account', apiKey: 'sk-secret' }], + }, + }, + ], + }, + ]), + 'ai.prompt.tools': [ + JSON.stringify({ + type: 'function', + name: 'credentials', + description: 'List credentials', + input_schema: { + type: 'object', + properties: { + action: { type: 'string' }, + name: { type: 'string' }, + }, + }, + }), + ], + }, + }; + + const redacted = redactLangSmithTelemetrySpan(span) as { + attributes: Record; + }; + const prompt = jsonParse<{ input: Array> }>( + redacted.attributes['gen_ai.prompt'] as string, + ); + + expect(prompt.input[1]).toEqual({ + role: 'assistant', + content: 'Checking credentials.', + tool_calls: [ + { + id: 'toolu-1', + type: 'function', + function: { + name: 'credentials', + arguments: JSON.stringify({ action: 'list', name: 'Slack account' }), + }, + }, + ], + }); + expect(prompt.input[2]).toEqual({ + role: 'tool', + tool_call_id: 'toolu-1', + name: 'credentials', + content: JSON.stringify({ + ok: true, + items: [{ name: 'Slack account', apiKey: '[redacted]' }], + }), + }); + + const originalMessages = jsonParse>>( + redacted.attributes['ai.prompt.messages'] as string, + ); + expect(JSON.stringify(originalMessages)).toContain('Slack account'); + expect(JSON.stringify(originalMessages)).not.toContain('[redacted-depth-limit]'); + expect(JSON.stringify(prompt)).not.toContain('sk-secret'); + }); + + it('finishes OTel child spans with their parent linkage', async () => { const tracing = await createInstanceAiTraceContext({ threadId: 'thread-1', messageId: 'message-1', @@ -328,18 +941,62 @@ describe('createInstanceAiTraceContext', () => { }); expect(tracing).toBeDefined(); + await startForegroundActor(tracing!); await expect( tracing?.finishRun(tracing.orchestratorRun, { outputs: { result: 'done' }, }), ).resolves.toBeUndefined(); - const patchTarget = langsmithMock.getCreatedRunTrees().at(-1); - expect(patchTarget?.id).toBe(tracing?.orchestratorRun.id); - expect(patchTarget?.parent_run_id).toBe(tracing?.messageRun.id); + const spans = agentsMock.getSpans(); + const orchestratorSpan = spans.find((span) => span.id === tracing?.orchestratorRun.otelSpanId); + expect(orchestratorSpan?.parentSpanId).toBe(tracing?.messageRun.otelSpanId); + expect(orchestratorSpan?.ended).toBe(true); + expect(orchestratorSpan?.attributes.gen_ai_completion).toBeUndefined(); + expect(orchestratorSpan?.attributes['gen_ai.completion']).toBe( + JSON.stringify({ result: 'done' }), + ); + expect(agentsMock.getProvider().forceFlush).not.toHaveBeenCalled(); }); - it('reuses the same message root when continuing a trace for the same message group', async () => { + it('shuts down product telemetry once when the root run finishes', async () => { + const tracing = await createInstanceAiTraceContext({ + threadId: 'thread-shutdown', + messageId: 'message-shutdown', + runId: 'run-shutdown', + userId: 'user-shutdown', + input: { message: 'hello' }, + }); + + expect(tracing).toBeDefined(); + const provider = agentsMock.getProvider(); + + await tracing!.finishRun(tracing!.rootRun, { outputs: { status: 'done' } }); + await tracing!.finishRun(tracing!.rootRun, { outputs: { status: 'done again' } }); + + expect(provider.forceFlush).toHaveBeenCalledTimes(1); + expect(provider.shutdown).toHaveBeenCalledTimes(1); + }); + + it('shuts down product telemetry when releasing a trace client', async () => { + const tracing = await createInstanceAiTraceContext({ + threadId: 'thread-release', + messageId: 'message-release', + runId: 'run-release', + userId: 'user-release', + input: { message: 'hello' }, + }); + + expect(tracing).toBeDefined(); + const provider = agentsMock.getProvider(); + + releaseTraceClient(tracing!.rootRun.traceId); + await Promise.resolve(); + + expect(provider.shutdown).toHaveBeenCalledTimes(1); + }); + + it('creates a new orchestrator resume root when continuing a trace', async () => { const tracing = await createInstanceAiTraceContext({ threadId: 'thread-1', messageId: 'message-1', @@ -358,12 +1015,122 @@ describe('createInstanceAiTraceContext', () => { runId: 'run-2', userId: 'user-1', input: { message: 'follow-up turn' }, + metadata: { resume_reason: 'background_task_completed' }, }); - expect(continuedTracing.messageRun).toBe(tracing?.messageRun); - expect(continuedTracing.messageRun.id).toBe(tracing?.messageRun.id); + expect(continuedTracing).toBeDefined(); + if (!continuedTracing) { + throw new Error('Expected continued tracing context'); + } + expect(continuedTracing.traceKind).toBe('orchestrator_resume'); + expect(continuedTracing.rootRun.id).not.toBe(tracing?.rootRun.id); + expect(continuedTracing.rootRun.parentRunId).toBeUndefined(); + expect(continuedTracing.rootRun.name).toBe('resume: background task completed'); + expect(continuedTracing.rootRun.metadata).toEqual( + expect.objectContaining({ + 'instance_ai.canonical_name': 'instance-ai.orchestrator_resume', + }), + ); + expect(continuedTracing.rootRun.metadata).toEqual( + expect.objectContaining({ + trace_kind: 'orchestrator_resume', + execution_mode: 'resume', + resume_reason: 'background_task_completed', + continued_from_run_id: tracing?.rootRun.id, + continued_from_trace_id: tracing?.rootRun.otelTraceId, + resumed_from_activation_id: tracing?.actorRun.id, + resumed_from_trace_id: tracing?.rootRun.otelTraceId, + }), + ); expect(continuedTracing.orchestratorRun.id).not.toBe(tracing?.orchestratorRun.id); - expect(continuedTracing.orchestratorRun.parentRunId).toBe(tracing?.messageRun.id); + expect(continuedTracing.orchestratorRun.parentRunId).toBe(continuedTracing.rootRun.id); + expect(continuedTracing.orchestratorRun.name).toBe('agent: orchestrator'); + expect(continuedTracing.orchestratorRun.metadata).toEqual( + expect.objectContaining({ + 'instance_ai.canonical_name': 'instance-ai.agent.orchestrator', + }), + ); + }); + + it('creates an orchestrator resume root without a previous trace when tracing is enabled', async () => { + const tracing = await continueInstanceAiTraceContext(undefined, { + threadId: 'thread-1', + messageId: 'message-2', + messageGroupId: 'group-1', + runId: 'run-2', + userId: 'user-1', + input: { message: 'orphaned resume' }, + metadata: { resume_reason: 'planned_checkpoint', checkpoint_task_id: 'cp-1' }, + }); + + expect(tracing).toBeDefined(); + expect(tracing?.traceKind).toBe('orchestrator_resume'); + expect(tracing?.rootRun.name).toBe('resume: planned checkpoint'); + expect(tracing?.rootRun.parentRunId).toBeUndefined(); + expect(tracing?.rootRun.metadata).toEqual( + expect.objectContaining({ + trace_kind: 'orchestrator_resume', + resume_reason: 'planned_checkpoint', + checkpoint_task_id: 'cp-1', + }), + ); + expect(tracing?.orchestratorRun.parentRunId).toBe(tracing?.rootRun.id); + }); + + it('gates internal operation roots unless internal tracing is enabled', async () => { + await expect( + createInternalOperationTraceContext({ + threadId: 'thread-1', + messageId: 'message-1', + runId: 'title-1', + userId: 'user-1', + modelId: 'anthropic/claude-sonnet-4-6', + operationName: 'thread_title', + input: { source: 'title' }, + }), + ).resolves.toBeUndefined(); + + process.env.N8N_INSTANCE_AI_TRACE_INTERNAL = 'true'; + const tracing = await createInternalOperationTraceContext({ + threadId: 'thread-1', + messageId: 'message-1', + runId: 'title-1', + userId: 'user-1', + modelId: 'anthropic/claude-sonnet-4-6', + operationName: 'thread_title', + input: { source: 'title' }, + }); + + expect(tracing).toBeDefined(); + expect(tracing?.traceKind).toBe('internal_operation'); + expect(tracing?.rootRun.name).toBe('internal: thread-title'); + expect(tracing?.rootRun.parentRunId).toBeUndefined(); + expect(tracing?.rootRun.metadata).toEqual( + expect.objectContaining({ + trace_kind: 'internal_operation', + execution_mode: 'internal', + operation_name: 'thread_title', + agent_role: 'thread_title', + thread_id: 'thread-1', + 'instance_ai.canonical_name': 'instance-ai.internal.thread_title', + }), + ); + + const telemetryOrBuilder = tracing!.getTelemetry!({ + agentRole: 'thread_title', + functionId: 'instance-ai.thread_title', + executionMode: 'internal', + }); + const telemetry = + 'build' in telemetryOrBuilder ? await telemetryOrBuilder.build() : telemetryOrBuilder; + expect(telemetry.functionId).toBe('instance-ai.thread_title'); + expect(telemetry.metadata).toEqual( + expect.objectContaining({ + trace_kind: 'internal_operation', + execution_mode: 'internal', + operation_name: 'thread_title', + }), + ); }); it('creates detached sub-agent traces as separate root traces', async () => { @@ -379,16 +1146,22 @@ describe('createInstanceAiTraceContext', () => { kind: 'builder', taskId: 'build-1', spawnedByTraceId: 'trace-parent-1', + spawnedBySpanId: 'span-parent-1', spawnedByRunId: 'run-parent-1', spawnedByAgentId: 'agent-001', + spawnedByAgentRole: 'orchestrator', + spawnedByToolCallId: 'toolu-1', input: { task: 'Build a workflow' }, + metadata: { n8n_version: '2.19.0' }, }); expect(tracing).toBeDefined(); - expect(tracing?.traceKind).toBe('detached_subagent'); - expect(tracing?.rootRun.id).toBe(tracing?.actorRun.id); + expect(tracing?.traceKind).toBe('background_subagent'); + expect(tracing?.rootRun.id).not.toBe(tracing?.actorRun.id); expect(tracing?.rootRun.parentRunId).toBeUndefined(); - expect(tracing?.rootRun.name).toBe('subagent:workflow-builder'); + expect(tracing?.rootRun.name).toBe('background task: workflow-builder'); + expect(tracing?.actorRun.name).toBe('agent: workflow-builder'); + expect(tracing?.actorRun.parentRunId).toBe(tracing?.rootRun.id); expect(tracing?.rootRun.metadata).toEqual( expect.objectContaining({ thread_id: 'thread-1', @@ -396,14 +1169,38 @@ describe('createInstanceAiTraceContext', () => { task_id: 'build-1', task_kind: 'builder', agent_id: 'agent-builder-1', + n8n_version: '2.19.0', + trace_kind: 'background_subagent', + execution_mode: 'background_subagent', spawned_by_trace_id: 'trace-parent-1', + spawned_by_span_id: 'span-parent-1', spawned_by_run_id: 'run-parent-1', spawned_by_agent_id: 'agent-001', + spawned_by_agent_role: 'orchestrator', + spawned_by_tool_call_id: 'toolu-1', + 'instance_ai.canonical_name': 'instance-ai.background_subagent', }), ); + expect(typeof tracing?.rootRun.metadata?.agents_version).toBe('string'); + expect(typeof tracing?.rootRun.metadata?.workflow_sdk_version).toBe('string'); + expect(tracing?.actorRun.metadata).toEqual( + expect.objectContaining({ + 'instance_ai.canonical_name': 'instance-ai.agent.workflow-builder', + }), + ); + + const telemetryOrBuilder = tracing!.getTelemetry!({ + agentRole: 'workflow-builder', + functionId: 'instance-ai.subagent.workflow-builder', + executionMode: 'background_subagent', + }); + const telemetry = + 'build' in telemetryOrBuilder ? await telemetryOrBuilder.build() : telemetryOrBuilder; + + expect(telemetry.runtimeRootSpanEnabled).toBe(false); }); - it('attaches root agent config without duplicating it into llm steps', async () => { + it('attaches compact root agent config without duplicating tool schemas into agent spans', async () => { const tracing = await createDetachedSubAgentTraceContext({ threadId: 'thread-1', conversationId: 'thread-1', @@ -425,26 +1222,66 @@ describe('createInstanceAiTraceContext', () => { buildAgentTraceInputs({ systemPrompt: ['line 1', 'line 2', 'line 3', 'line 4'].join('\n').repeat(700), tools: createToolRegistry([ - ['build-workflow', { description: 'Build or patch a workflow from SDK code.' } as never], - ['submit-workflow', { description: 'Submit a workflow to n8n.' } as never], + [ + 'build-workflow', + { + description: 'Build or patch a workflow from SDK code.', + inputSchema: { + type: 'object', + properties: { + task: { type: 'string' }, + }, + required: ['task'], + }, + } as never, + ], + [ + 'submit-workflow', + { + description: 'Submit a workflow to n8n.', + } as never, + ], + ]), + runtimeTools: createToolRegistry([ + [ + 'workspace_read_file', + { + name: 'workspace_read_file', + description: 'Read a file from the workspace.', + } as never, + ], ]), modelId: 'anthropic/claude-sonnet-4-6', }), ); const actorInputs = tracing?.actorRun.inputs as Record; - const loadedTools = actorInputs.loaded_tools as Array>; const systemPrompt = actorInputs.system_prompt as Record; expect(actorInputs.task).toBe('Build a workflow'); expect(actorInputs.model).toBe('anthropic/claude-sonnet-4-6'); - expect(actorInputs.loaded_tool_count).toBe(2); - expect(loadedTools).toEqual( - expect.arrayContaining([ - expect.objectContaining({ name: 'build-workflow' }), - expect.objectContaining({ name: 'submit-workflow' }), - ]), + expect(actorInputs.assigned_tool_count).toBe(2); + expect(actorInputs.assigned_tool_names).toEqual(['build-workflow', 'submit-workflow']); + expect(actorInputs.assigned_tool_schema_hash).toEqual(expect.any(String)); + expect(actorInputs.runtime_tool_count).toBe(1); + expect(actorInputs.runtime_tool_names).toEqual(['workspace_read_file']); + expect(actorInputs.runtime_tool_schema_hash).toEqual(expect.any(String)); + expect(actorInputs.loaded_tool_count).toBeUndefined(); + expect(actorInputs.loaded_tool_names).toBeUndefined(); + expect(actorInputs.loaded_tool_schema_hash).toBeUndefined(); + expect(actorInputs.loaded_tool_manifest).toBeUndefined(); + expect(actorInputs.loaded_tools).toBeUndefined(); + expect(actorInputs.loaded_tool_catalog).toBeUndefined(); + const actorSpan = agentsMock + .getSpans() + .find((span) => span.id === tracing?.actorRun.otelSpanId); + const spanInputs = jsonParse>( + actorSpan?.attributes['gen_ai.prompt'] as string, ); + expect(spanInputs.assigned_tool_names).toEqual(['build-workflow', 'submit-workflow']); + expect(spanInputs.runtime_tool_names).toEqual(['workspace_read_file']); + expect(spanInputs.loaded_tool_manifest).toBeUndefined(); + expect(spanInputs.loaded_tools).toBeUndefined(); expect(systemPrompt.part_01).toEqual(expect.any(String)); expect(systemPrompt.part_02).toEqual(expect.any(String)); }); @@ -466,7 +1303,7 @@ describe('createInstanceAiTraceContext', () => { expect(tracing).toBeDefined(); - await tracing?.withRunTree(tracing.actorRun, async () => { + await tracing?.withActiveSpan(tracing.actorRun, async () => { mergeTraceRunInputs( tracing?.actorRun, buildAgentTraceInputs({ @@ -474,7 +1311,9 @@ describe('createInstanceAiTraceContext', () => { tools: createToolRegistry([ [ 'build-workflow', - { description: 'Build or patch a workflow from SDK code.' } as never, + { + description: 'Build or patch a workflow from SDK code.', + } as never, ], ]), modelId: 'anthropic/claude-sonnet-4-6', @@ -486,7 +1325,8 @@ describe('createInstanceAiTraceContext', () => { const actorInputs = tracing?.actorRun.inputs as Record; expect(actorInputs.model).toBe('anthropic/claude-sonnet-4-6'); expect(actorInputs.system_prompt).toBe('system prompt'); - expect(actorInputs.loaded_tool_count).toBe(1); + expect(actorInputs.assigned_tool_count).toBe(1); + expect(actorInputs.assigned_tool_names).toEqual(['build-workflow']); }); it('redacts model secrets from agent trace inputs', () => { @@ -505,44 +1345,6 @@ describe('createInstanceAiTraceContext', () => { expect(JSON.stringify(inputs)).not.toContain('custom.endpoint'); }); - it('includes n8n and workflow SDK versions in trace metadata when provided', async () => { - const tracing = await createInstanceAiTraceContext({ - threadId: 'thread-1', - messageId: 'message-1', - runId: 'run-1', - userId: 'user-1', - n8nVersion: '1.123.4', - workflowSdkVersion: '0.13.0', - input: { message: 'What workflows do I have?' }, - }); - - expect(tracing?.messageRun.metadata).toEqual( - expect.objectContaining({ - n8n_version: '1.123.4', - workflow_sdk_version: '0.13.0', - }), - ); - expect(tracing?.orchestratorRun.metadata).toEqual( - expect.objectContaining({ - n8n_version: '1.123.4', - workflow_sdk_version: '0.13.0', - }), - ); - }); - - it('omits version metadata when not provided', async () => { - const tracing = await createInstanceAiTraceContext({ - threadId: 'thread-1', - messageId: 'message-1', - runId: 'run-1', - userId: 'user-1', - input: { message: 'What workflows do I have?' }, - }); - - expect(tracing?.messageRun.metadata).not.toHaveProperty('n8n_version'); - expect(tracing?.messageRun.metadata).not.toHaveProperty('workflow_sdk_version'); - }); - it('redacts model secrets from trace metadata', async () => { const tracing = await createInstanceAiTraceContext({ threadId: 'thread-1', @@ -587,6 +1389,7 @@ describe('createInstanceAiTraceContext', () => { it('traces suspendable tools and HITL suspension spans', async () => { const tracing = await createInstanceAiTraceContext({ threadId: 'thread-1', + messageGroupId: 'group-1', messageId: 'message-1', runId: 'run-1', userId: 'user-1', @@ -594,6 +1397,7 @@ describe('createInstanceAiTraceContext', () => { }); expect(tracing).toBeDefined(); + await startForegroundActor(tracing!); const wrappedTools = tracing!.wrapTools( createToolRegistry([['ask-user', createAskUserTool()]]), @@ -605,23 +1409,166 @@ describe('createInstanceAiTraceContext', () => { throw new Error('Wrapped ask-user tool is not executable'); } - await tracing!.withRunTree(tracing!.orchestratorRun, async () => { + await tracing!.withActiveSpan(tracing!.orchestratorRun, async () => { await wrappedAskUser.handler( { questions: [{ id: 'q1', question: 'What do you want?', type: 'text' }], }, { + toolCallId: 'toolu-ask', + resumeData: undefined, suspend: async () => { await Promise.resolve(); - return undefined; + return undefined as never; }, }, ); }); - const createdRunNames = langsmithMock.getCreatedRunTrees().map((run) => run.name); - expect(createdRunNames).toContain('tool:ask-user'); - expect(createdRunNames).toContain('hitl:suspend'); + const spans = agentsMock.getSpans(); + const spanNames = spans.map((span) => span.name); + expect(spanNames).toContain('hitl: suspend'); + expect(spans.find((span) => span.name === 'hitl: suspend')?.attributes).toEqual( + expect.objectContaining({ + agent_role: 'orchestrator', + thread_id: 'thread-1', + message_group_id: 'group-1', + tool_call_id: 'toolu-ask', + }), + ); + expect(agentsMock.getProvider().forceFlush).toHaveBeenCalledTimes(1); + expect(spanNames.some((name) => name.startsWith('instance-ai.tool.'))).toBe(false); + }); + + it('records actual suspend calls with the suspend payload', async () => { + const writer = new TraceWriter('record-suspend'); + const tracing = createTraceReplayOnlyContext(); + tracing.replayMode = 'record'; + tracing.traceWriter = writer; + + const suspendPayload = { + requestId: 'request-1', + inputType: 'approval', + message: 'Confirm workspace change', + }; + const interruptibleTool: BuiltTool = { + name: 'approval-tool', + description: 'Requests approval.', + suspendSchema: {}, + handler: async (_input, context) => { + if (!('suspend' in context) || typeof context.suspend !== 'function') { + throw new Error('Expected interruptible tool context'); + } + return await context.suspend(suspendPayload); + }, + }; + + const wrappedTools = tracing.wrapTools( + createToolRegistry([['approval-tool', interruptibleTool]]), + { agentRole: 'workflow-builder' }, + ); + const wrappedTool = wrappedTools.get('approval-tool'); + if (!isExecutableTool(wrappedTool)) { + throw new Error('Wrapped approval-tool is not executable'); + } + + const result = await executeTool( + wrappedTool, + { operation: 'write-file' }, + { + resumeData: undefined, + suspend: async (payload: unknown): Promise => + await Promise.resolve({ denied: true, payload } as never), + }, + ); + + expect(result).toEqual({ denied: true, payload: suspendPayload }); + const suspend = writer.getEvents()[1] as TraceToolSuspend; + expect(suspend).toEqual({ + kind: 'tool-suspend', + stepId: 1, + agentRole: 'workflow-builder', + toolName: 'approval-tool', + input: { operation: 'write-file' }, + output: {}, + suspendPayload, + }); + }); + + it('records denied first-call outputs as normal tool calls when suspend is not called', async () => { + const writer = new TraceWriter('record-denied-output'); + const tracing = createTraceReplayOnlyContext(); + tracing.replayMode = 'record'; + tracing.traceWriter = writer; + + const deniedTool: BuiltTool = { + name: 'admin-only-tool', + description: 'Returns a denied output.', + suspendSchema: {}, + handler: async () => await Promise.resolve({ denied: true }), + }; + + const wrappedTools = tracing.wrapTools(createToolRegistry([['admin-only-tool', deniedTool]]), { + agentRole: 'orchestrator', + }); + const wrappedTool = wrappedTools.get('admin-only-tool'); + if (!isExecutableTool(wrappedTool)) { + throw new Error('Wrapped admin-only-tool is not executable'); + } + + const result = await executeTool( + wrappedTool, + { action: 'read-secret' }, + { + resumeData: undefined, + suspend: async (payload: unknown): Promise => + await Promise.resolve({ denied: true, payload } as never), + }, + ); + + expect(result).toEqual({ denied: true }); + const call = writer.getEvents()[1] as TraceToolCall; + expect(call).toEqual({ + kind: 'tool-call', + stepId: 1, + agentRole: 'orchestrator', + toolName: 'admin-only-tool', + input: { action: 'read-secret' }, + output: { denied: true }, + }); + }); + + it('does not wrap local tools for duplicate product-level LangSmith spans', async () => { + const tracing = await createInstanceAiTraceContext({ + threadId: 'thread-1', + messageId: 'message-1', + runId: 'run-1', + userId: 'user-1', + input: { message: 'Find a template' }, + }); + + expect(tracing).toBeDefined(); + + const regularTool = { + name: 'templates', + description: 'Search templates', + handler: jest.fn(), + }; + const workspaceTool = { + name: 'workspace_execute_command', + description: 'Run a workspace command', + handler: jest.fn(), + }; + + const wrappedTools = tracing!.wrapTools( + createToolRegistry([ + ['templates', regularTool as never], + ['workspace_execute_command', workspaceTool as never], + ]), + ); + + expect(wrappedTools.get('templates')).toBe(regularTool); + expect(wrappedTools.get('workspace_execute_command')).toBe(workspaceTool); }); it('keeps ad-hoc child spans rooted under the active sub-agent run', async () => { @@ -636,13 +1583,14 @@ describe('createInstanceAiTraceContext', () => { expect(tracing).toBeDefined(); const subAgentRun = await tracing!.startChildRun(tracing!.orchestratorRun, { - name: 'subagent:workflow-builder', + name: 'agent: workflow-builder', + canonicalName: 'instance-ai.subagent.workflow-builder.stream', tags: ['sub-agent'], metadata: { agent_role: 'workflow-builder' }, inputs: { task: 'Build a workflow' }, }); - await tracing!.withRunTree(subAgentRun, async () => { + await tracing!.withActiveSpan(subAgentRun, async () => { await withCurrentTraceSpan( { name: 'llm:anthropic/claude-sonnet-4-6', @@ -655,10 +1603,10 @@ describe('createInstanceAiTraceContext', () => { ); }); - const llmRun = langsmithMock - .getCreatedRunTrees() - .find((run) => run.name === 'llm:anthropic/claude-sonnet-4-6'); - expect(llmRun?.parent_run_id).toBe(subAgentRun.id); + const llmSpan = agentsMock + .getSpans() + .find((span) => span.name === 'llm:anthropic/claude-sonnet-4-6'); + expect(llmSpan?.parentSpanId).toBe(subAgentRun.otelSpanId); }); it('traces resumed suspendable tools without extra HITL child span spam', async () => { @@ -671,6 +1619,7 @@ describe('createInstanceAiTraceContext', () => { }); expect(tracing).toBeDefined(); + await startForegroundActor(tracing!); const wrappedTools = tracing!.wrapTools( createToolRegistry([['ask-user', createAskUserTool()]]), @@ -682,8 +1631,9 @@ describe('createInstanceAiTraceContext', () => { throw new Error('Wrapped ask-user tool is not executable'); } - const result = await tracing!.withRunTree(tracing!.orchestratorRun, async () => { - return await wrappedAskUser.handler( + const result = await tracing!.withActiveSpan(tracing!.orchestratorRun, async () => { + return await executeTool( + wrappedAskUser, { questions: [{ id: 'q1', question: 'What do you want?', type: 'text' }], }, @@ -718,10 +1668,11 @@ describe('createInstanceAiTraceContext', () => { ], }); - const createdRunNames = langsmithMock.getCreatedRunTrees().map((run) => run.name); - expect(createdRunNames).toContain('tool:ask-user:resume'); - expect(createdRunNames).not.toContain('hitl:resume'); - expect(createdRunNames).not.toContain('hitl:approval'); + const spanNames = agentsMock.getSpans().map((span) => span.name); + expect(spanNames).toContain('hitl: resume'); + expect(spanNames).not.toContain('hitl: suspend'); + expect(agentsMock.getProvider().forceFlush).toHaveBeenCalledTimes(1); + expect(spanNames.some((name) => name.startsWith('instance-ai.tool.'))).toBe(false); }); it('creates ad-hoc child spans under the current run tree', async () => { @@ -733,7 +1684,7 @@ describe('createInstanceAiTraceContext', () => { input: { message: 'hello' }, }); - await tracing!.withRunTree(tracing!.orchestratorRun, async () => { + await tracing!.withActiveSpan(tracing!.orchestratorRun, async () => { const result = await withCurrentTraceSpan( { name: 'prepare_context', @@ -747,8 +1698,127 @@ describe('createInstanceAiTraceContext', () => { expect(result).toBe(42); }); - const createdRunNames = langsmithMock.getCreatedRunTrees().map((run) => run.name); - expect(createdRunNames).toContain('prepare_context'); + const spanNames = agentsMock.getSpans().map((span) => span.name); + expect(spanNames).toContain('prepare_context'); + }); + + it('keeps explicit child runs under their requested parent when another trace is active', async () => { + const activeTracing = await createInstanceAiTraceContext({ + threadId: 'thread-active', + messageId: 'message-active', + runId: 'run-active', + userId: 'user-active', + input: { message: 'active trace' }, + }); + const resumedTracing = await createInstanceAiTraceContext({ + threadId: 'thread-resumed', + messageId: 'message-resumed', + runId: 'run-resumed', + userId: 'user-resumed', + input: { message: 'resumed trace' }, + }); + + expect(activeTracing).toBeDefined(); + expect(resumedTracing).toBeDefined(); + + let childRun: + | NonNullable>>['rootRun'] + | undefined; + await activeTracing!.withActiveSpan(activeTracing!.rootRun, async () => { + childRun = await resumedTracing!.startChildRun(resumedTracing!.rootRun, { + name: 'prepare: context', + canonicalName: 'instance-ai.context_compaction', + metadata: { agent_role: 'context_compaction' }, + }); + }); + + expect(childRun).toBeDefined(); + expect(childRun!.traceId).toBe(resumedTracing!.rootRun.traceId); + expect(childRun!.otelTraceId).toBe(resumedTracing!.rootRun.otelTraceId); + + const childSpan = agentsMock.getSpans().find((span) => span.id === childRun!.otelSpanId); + expect(childSpan?.parentSpanId).toBe(resumedTracing!.rootRun.otelSpanId); + }); + + it('treats product tracing setup failures as disabled tracing', async () => { + agentsMock.setBuildError(new Error('telemetry setup failed')); + + await expect( + createInstanceAiTraceContext({ + threadId: 'thread-setup-failure', + messageId: 'message-setup-failure', + runId: 'run-setup-failure', + userId: 'user-setup-failure', + input: { message: 'hello' }, + }), + ).resolves.toBeUndefined(); + }); + + it('runs ad-hoc span callbacks when product span creation fails', async () => { + const tracing = await createInstanceAiTraceContext({ + threadId: 'thread-span-start-failure', + messageId: 'message-span-start-failure', + runId: 'run-span-start-failure', + userId: 'user-span-start-failure', + input: { message: 'hello' }, + }); + + expect(tracing).toBeDefined(); + await startForegroundActor(tracing!); + agentsMock.setStartSpanError(new Error('span start failed')); + + await expect( + tracing!.withActiveSpan( + tracing!.orchestratorRun, + async () => + await withCurrentTraceSpan( + { name: 'prepare_context' }, + async () => await Promise.resolve(42), + ), + ), + ).resolves.toBe(42); + }); + + it('does not fail finishRun when telemetry flush fails', async () => { + const tracing = await createInstanceAiTraceContext({ + threadId: 'thread-flush-failure', + messageId: 'message-flush-failure', + runId: 'run-flush-failure', + userId: 'user-flush-failure', + input: { message: 'hello' }, + }); + + expect(tracing).toBeDefined(); + agentsMock.getProvider().forceFlush.mockRejectedValueOnce(new Error('flush failed')); + + await expect( + tracing!.finishRun(tracing!.rootRun, { outputs: { status: 'done' } }), + ).resolves.toBeUndefined(); + }); + + it('does not mask callback errors from ad-hoc spans', async () => { + const tracing = await createInstanceAiTraceContext({ + threadId: 'thread-finalize-failure', + messageId: 'message-finalize-failure', + runId: 'run-finalize-failure', + userId: 'user-finalize-failure', + input: { message: 'hello' }, + }); + const callbackError = new Error('agent callback failed'); + + expect(tracing).toBeDefined(); + await startForegroundActor(tracing!); + + await expect( + tracing!.withActiveSpan( + tracing!.orchestratorRun, + async () => + await withCurrentTraceSpan({ name: 'prepare_context' }, async () => { + await Promise.resolve(); + throw callbackError; + }), + ), + ).rejects.toBe(callbackError); }); it('creates trace context when proxyConfig is provided even without env vars', async () => { @@ -777,7 +1847,26 @@ describe('createInstanceAiTraceContext', () => { expect(tracing?.orchestratorRun).toBeDefined(); }); - it('passes client to RunTree when proxyConfig is provided', async () => { + it('respects diagnostics opt-out before proxy auto-enable', async () => { + process.env.N8N_DIAGNOSTICS_ENABLED = 'false'; + + const tracing = await createInstanceAiTraceContext({ + threadId: 'thread-diagnostics-disabled', + messageId: 'message-diagnostics-disabled', + runId: 'run-diagnostics-disabled', + userId: 'user-diagnostics-disabled', + input: { message: 'proxy test' }, + proxyConfig: { + apiUrl: 'https://proxy.example.com/langsmith', + // eslint-disable-next-line @typescript-eslint/require-await + getAuthHeaders: async () => ({ Authorization: 'Bearer proxy-token' }), + }, + }); + + expect(tracing).toBeUndefined(); + }); + + it('creates OTel product spans when proxyConfig is provided', async () => { const tracing = await createInstanceAiTraceContext({ threadId: 'thread-client', messageId: 'message-client', @@ -793,28 +1882,115 @@ describe('createInstanceAiTraceContext', () => { expect(tracing).toBeDefined(); - const rootRunTree = langsmithMock - .getCreatedRunTrees() - .find((run) => run.name === 'message_turn' && run.client); - expect(rootRunTree).toBeDefined(); - expect(rootRunTree?.client).toBeDefined(); + const rootSpan = agentsMock.getSpans().find((span) => span.name === 'turn'); + expect(rootSpan).toBeDefined(); }); - it('does not pass client to RunTree without proxyConfig', async () => { - await createInstanceAiTraceContext({ - threadId: 'thread-no-proxy', - messageId: 'message-no-proxy', - runId: 'run-no-proxy', - userId: 'user-no-proxy', - input: { message: 'no proxy test' }, + it('keeps product, native provider, and local tool spans in one foreground OTel trace', async () => { + const tracing = await createInstanceAiTraceContext({ + threadId: 'thread-local-exporter', + messageId: 'message-local-exporter', + runId: 'run-local-exporter', + userId: 'user-local-exporter', + input: { message: 'Build a workflow' }, }); - const rootRunTree = langsmithMock - .getCreatedRunTrees() - .find((run) => run.name === 'message_turn'); - expect(rootRunTree).toBeDefined(); - // Without proxyConfig, the direct client is used (never undefined) - expect(rootRunTree?.client).toBeDefined(); + expect(tracing).toBeDefined(); + await startForegroundActor(tracing!); + + const wrappedTools = tracing!.wrapTools( + createToolRegistry([ + [ + 'workspace_write_file', + { + name: 'workspace_write_file', + description: 'Write a file in the workspace.', + handler: jest.fn(async () => await Promise.resolve({ written: true })), + } as never, + ], + ]), + { agentRole: 'workflow-builder' }, + ); + const workspaceWriteFile = wrappedTools.get('workspace_write_file'); + if (!isExecutableTool(workspaceWriteFile)) { + throw new Error('Wrapped workspace_write_file tool is not executable'); + } + + await tracing!.withActiveSpan(tracing!.orchestratorRun, async () => { + const telemetryOrBuilder = tracing!.getTelemetry!({ + agentRole: 'orchestrator', + functionId: 'instance-ai.orchestrator', + }); + if ('build' in telemetryOrBuilder) { + throw new Error('Expected foreground tracing to reuse built OTel telemetry'); + } + + type NativeSpan = { + end(): void; + spanContext(): { traceId: string; spanId: string }; + }; + type NativeTracer = { + startSpan(name: string, options?: { attributes?: Record }): NativeSpan; + startActiveSpan( + name: string, + options: { attributes?: Record }, + fn: (span: NativeSpan) => Promise, + ): Promise; + }; + const tracer = telemetryOrBuilder.tracer as NativeTracer; + + const providerSpan = tracer.startSpan('ai.streamText.doStream', { + attributes: { + 'ai.operationId': 'ai.streamText.doStream', + 'langsmith.span.kind': 'llm', + }, + }); + providerSpan.end(); + + await tracer.startActiveSpan( + 'ai.toolCall', + { + attributes: { + 'ai.operationId': 'ai.toolCall', + 'langsmith.span.kind': 'tool', + 'ai.toolCall.name': 'workspace_write_file', + 'ai.toolCall.id': 'toolu-write-file', + }, + }, + async (span) => { + await workspaceWriteFile.handler( + { path: 'workflow.json', content: '{}' }, + { toolCallId: 'toolu-write-file' }, + ); + span.end(); + }, + ); + }); + + await tracing!.finishRun(tracing!.orchestratorRun, { outputs: { status: 'done' } }); + await tracing!.finishRun(tracing!.rootRun, { outputs: { status: 'done' } }); + + const spans = agentsMock.getSpans(); + const rootSpan = spans.find((span) => span.name === 'turn'); + const orchestratorSpan = spans.find((span) => span.name === 'agent: orchestrator'); + const providerSpan = spans.find((span) => span.name === 'ai.streamText.doStream'); + const localToolSpan = spans.find((span) => span.name === 'ai.toolCall'); + + expect(rootSpan).toBeDefined(); + expect(orchestratorSpan).toBeDefined(); + expect(providerSpan).toBeDefined(); + expect(localToolSpan).toBeDefined(); + expect( + new Set( + [rootSpan, orchestratorSpan, providerSpan, localToolSpan].map((span) => span?.traceId), + ), + ).toEqual(new Set([rootSpan?.traceId])); + expect(orchestratorSpan?.parentSpanId).toBe(rootSpan?.id); + expect(providerSpan?.parentSpanId).toBe(orchestratorSpan?.id); + expect(localToolSpan?.parentSpanId).toBe(orchestratorSpan?.id); + expect(localToolSpan?.attributes['ai.toolCall.id']).toBe('toolu-write-file'); + expect(localToolSpan?.attributes['ai.toolCall.name']).toBe('workspace_write_file'); + expect(spans.some((span) => span.name.startsWith('instance-ai.tool.'))).toBe(false); }); it('returns undefined when tracing is explicitly disabled even with proxy', async () => { @@ -841,12 +2017,15 @@ describe('submitLangsmithUserFeedback', () => { const originalLangSmithApiKey = process.env.LANGSMITH_API_KEY; const originalLangSmithTracing = process.env.LANGSMITH_TRACING; const originalLangChainTracingV2 = process.env.LANGCHAIN_TRACING_V2; + const originalDiagnosticsEnabled = process.env.N8N_DIAGNOSTICS_ENABLED; beforeEach(() => { langsmithMock.reset(); + agentsMock.reset(); process.env.LANGSMITH_API_KEY = 'test-key'; delete process.env.LANGSMITH_TRACING; delete process.env.LANGCHAIN_TRACING_V2; + delete process.env.N8N_DIAGNOSTICS_ENABLED; }); afterAll(() => { @@ -861,6 +2040,11 @@ describe('submitLangsmithUserFeedback', () => { } else { process.env.LANGCHAIN_TRACING_V2 = originalLangChainTracingV2; } + if (originalDiagnosticsEnabled === undefined) { + delete process.env.N8N_DIAGNOSTICS_ENABLED; + } else { + process.env.N8N_DIAGNOSTICS_ENABLED = originalDiagnosticsEnabled; + } }); it('calls Client.createFeedback with the full payload', async () => { @@ -926,154 +2110,3 @@ describe('submitLangsmithUserFeedback', () => { expect(getAuthHeaders).toHaveBeenCalled(); }); }); - -describe('appendGeneratedWorkflowIdToRootMetadata', () => { - function makeRoot(metadata?: Record): InstanceAiTraceRun { - return { - id: 'root-1', - name: 'message_turn', - runType: 'chain', - projectName: 'instance-ai', - startTime: 0, - traceId: 'trace-1', - dottedOrder: '', - executionOrder: 0, - childExecutionOrder: 0, - ...(metadata ? { metadata: { ...metadata } } : {}), - }; - } - - it('initialises generated_workflow_ids array on first append', () => { - const root = makeRoot(); - appendGeneratedWorkflowIdToRootMetadata(root, 'wf-1'); - expect(root.metadata?.generated_workflow_ids).toEqual(['wf-1']); - }); - - it('appends additional ids without losing existing entries', () => { - const root = makeRoot({ generated_workflow_ids: ['wf-1'] }); - appendGeneratedWorkflowIdToRootMetadata(root, 'wf-2'); - expect(root.metadata?.generated_workflow_ids).toEqual(['wf-1', 'wf-2']); - }); - - it('dedupes repeated ids', () => { - const root = makeRoot({ generated_workflow_ids: ['wf-1'] }); - appendGeneratedWorkflowIdToRootMetadata(root, 'wf-1'); - expect(root.metadata?.generated_workflow_ids).toEqual(['wf-1']); - }); - - it('ignores non-string entries when reading existing metadata', () => { - const root = makeRoot({ generated_workflow_ids: [42, null, 'wf-1'] as unknown[] }); - appendGeneratedWorkflowIdToRootMetadata(root, 'wf-2'); - expect(root.metadata?.generated_workflow_ids).toEqual(['wf-1', 'wf-2']); - }); - - it('preserves unrelated metadata', () => { - const root = makeRoot({ user_id: 'u-1', thread_id: 't-1' }); - appendGeneratedWorkflowIdToRootMetadata(root, 'wf-1'); - expect(root.metadata).toMatchObject({ - user_id: 'u-1', - thread_id: 't-1', - generated_workflow_ids: ['wf-1'], - }); - }); - - it('preserves live RunTree metadata mutations when appending root metadata', async () => { - const originalLangSmithApiKey = process.env.LANGSMITH_API_KEY; - const originalLangSmithTracing = process.env.LANGSMITH_TRACING; - const originalLangChainTracingV2 = process.env.LANGCHAIN_TRACING_V2; - - langsmithMock.reset(); - process.env.LANGSMITH_API_KEY = 'test-key'; - delete process.env.LANGSMITH_TRACING; - delete process.env.LANGCHAIN_TRACING_V2; - - try { - const tracing = await createDetachedSubAgentTraceContext({ - threadId: 'thread-1', - conversationId: 'thread-1', - messageGroupId: 'group-1', - messageId: 'message-1', - runId: 'run-1', - userId: 'user-1', - agentId: 'agent-builder-1', - role: 'workflow-builder', - kind: 'builder', - taskId: 'build-1', - input: { task: 'Build a workflow' }, - }); - - if (!tracing) { - throw new Error('Expected tracing context'); - } - - expect(tracing.rootRun.metadata?.agent_role).toBe('workflow-builder'); - - await tracing.withRunTree(tracing.actorRun, async () => { - await Promise.resolve(); - // Overwrite an existing root metadata key on the live RunTree so the - // two diverge on the same key with different values. The subsequent - // append must preserve the live value instead of rolling it back to - // the stale root state. - mergeCurrentTraceMetadata({ agent_role: 'planner' }); - appendGeneratedWorkflowIdToRootMetadata(tracing.rootRun, 'wf-1'); - expect(tracing.rootRun.metadata?.generated_workflow_ids).toEqual(['wf-1']); - expect(tracing.rootRun.metadata?.agent_role).toBe('planner'); - }); - - expect(tracing.rootRun.metadata?.generated_workflow_ids).toEqual(['wf-1']); - expect(tracing.rootRun.metadata?.agent_role).toBe('planner'); - } finally { - if (originalLangSmithApiKey === undefined) { - delete process.env.LANGSMITH_API_KEY; - } else { - process.env.LANGSMITH_API_KEY = originalLangSmithApiKey; - } - if (originalLangSmithTracing === undefined) { - delete process.env.LANGSMITH_TRACING; - } else { - process.env.LANGSMITH_TRACING = originalLangSmithTracing; - } - if (originalLangChainTracingV2 === undefined) { - delete process.env.LANGCHAIN_TRACING_V2; - } else { - process.env.LANGCHAIN_TRACING_V2 = originalLangChainTracingV2; - } - } - }); -}); - -describe('appendRootRunMetadata', () => { - it('merges new fields into root metadata', () => { - const root: InstanceAiTraceRun = { - id: 'root-1', - name: 'message_turn', - runType: 'chain', - projectName: 'instance-ai', - startTime: 0, - traceId: 'trace-1', - dottedOrder: '', - executionOrder: 0, - childExecutionOrder: 0, - metadata: { user_id: 'u-1' }, - }; - appendRootRunMetadata(root, { primary_workflow_id: 'wf-1' }); - expect(root.metadata).toEqual({ user_id: 'u-1', primary_workflow_id: 'wf-1' }); - }); - - it('overwrites existing values for the same key', () => { - const root: InstanceAiTraceRun = { - id: 'root-1', - name: 'message_turn', - runType: 'chain', - projectName: 'instance-ai', - startTime: 0, - traceId: 'trace-1', - dottedOrder: '', - executionOrder: 0, - childExecutionOrder: 0, - metadata: { final_status: 'pending' }, - }; - appendRootRunMetadata(root, { final_status: 'completed' }); - expect(root.metadata?.final_status).toBe('completed'); - }); -}); diff --git a/packages/@n8n/instance-ai/src/tracing/langsmith-tracing.ts b/packages/@n8n/instance-ai/src/tracing/langsmith-tracing.ts index 9c547de397c..faee005119e 100644 --- a/packages/@n8n/instance-ai/src/tracing/langsmith-tracing.ts +++ b/packages/@n8n/instance-ai/src/tracing/langsmith-tracing.ts @@ -1,11 +1,27 @@ -import type { BuiltTool, InterruptibleToolContext, ToolContext } from '@n8n/agents'; -import { Client, RunTree } from 'langsmith'; -import { getCurrentRunTree, withRunTree as withLangSmithRunTree } from 'langsmith/traceable'; +import { + LangSmithTelemetry, + Telemetry, + type AttributeValue, + type BuiltTelemetry, + type BuiltTool, + type InterruptibleToolContext, + type ToolContext, +} from '@n8n/agents'; +import { + ROOT_CONTEXT, + SpanStatusCode, + context as otelContext, + trace as otelTrace, +} from '@opentelemetry/api'; +import type { Context as OtelContext, Span as OtelApiSpan } from '@opentelemetry/api'; +import { Client } from 'langsmith'; import { AsyncLocalStorage } from 'node:async_hooks'; +import { createRequire } from 'node:module'; import { createToolRegistry } from '../tool-registry'; import type { InstanceAiToolTraceOptions, + InstanceAiTelemetryOptions, InstanceAiTraceContext, InstanceAiTraceRun, InstanceAiTraceRunFinishOptions, @@ -13,17 +29,40 @@ import type { InstanceAiToolRegistry, ServiceProxyConfig, } from '../types'; +import { + formatAgentRoleLabel, + formatInternalOperationLabel, + formatResumeReasonLabel, + formatTelemetryFunctionId, + formatTraceLabel, +} from './trace-labels'; +import { + GEN_AI_COMPLETION, + GEN_AI_PROMPT, + mergeTraceInputs, + redactLangSmithTelemetrySpan, + sanitizeTracePayload, + sanitizeTraceValue, + serializeModelIdForTrace, + toTelemetryAttributeValue, + toTelemetryMetadata, +} from './trace-payloads'; import type { IdRemapper, TraceIndex, TraceWriter } from './trace-replay'; import { PURE_REPLAY_TOOLS } from './trace-replay'; import { isRecord } from '../utils/stream-helpers'; +export { + buildAgentTraceInputs, + redactLangSmithTelemetrySpan, + serializeModelIdForTrace, +} from './trace-payloads'; + const DEFAULT_PROJECT_NAME = 'instance-ai'; const DEFAULT_TAGS = ['instance-ai']; -const MAX_TRACE_DEPTH = 4; -const MAX_TRACE_STRING_LENGTH = 2_000; -const MAX_TRACE_ARRAY_ITEMS = 20; -const MAX_TRACE_OBJECT_KEYS = 30; -const traceParentOverrideStorage = new AsyncLocalStorage<{ current: RunTree | null }>(); +const productTraceStorage = new AsyncLocalStorage<{ + runtime: ProductOtelTraceRuntime; + currentRun: InstanceAiTraceRun; +}>(); // Per-request proxy auth headers, isolated via AsyncLocalStorage. // The proxy Client is cached per deployment URL; each concurrent request @@ -32,10 +71,8 @@ const traceParentOverrideStorage = new AsyncLocalStorage<{ current: RunTree | nu // Authorization header without any shared mutable state. const proxyHeaderStore = new AsyncLocalStorage>(); -// Module-level map associating traceIds with proxy clients so that -// hydrateRunTree() (which reconstructs RunTree from serialized state) -// can use the correct proxy client for its HTTP calls. -const traceClients = new Map(); +const otelTraceRuntimes = new Map(); +const hostRequire = createRequire(__filename); /** * Fetch wrapper for LangSmith clients: @@ -59,6 +96,467 @@ const gzipFetch: typeof globalThis.fetch = async (input, init) => { let cachedProxyClient: { client: Client; apiUrl: string } | null = null; let cachedDirectClient: Client | null = null; +const OTEL_TRACE_VERSION = 'otel-v2'; +const LANGSMITH_TRACEABLE = 'langsmith.traceable'; +const LANGSMITH_TRACE_NAME = 'langsmith.trace.name'; +const LANGSMITH_SPAN_KIND = 'langsmith.span.kind'; +const LANGSMITH_SPAN_TAGS = 'langsmith.span.tags'; + +interface ProductOtelTraceRuntime { + telemetry: BuiltTelemetry; + spans: Map; + contexts: Map; + shutdown: boolean; +} + +interface OTelTracer { + startSpan( + name: string, + options?: { attributes?: Record }, + context?: OtelContext, + ): OtelApiSpan; +} + +function isOtelTracer(value: unknown): value is OTelTracer { + return ( + value !== null && + typeof value === 'object' && + typeof Reflect.get(value, 'startSpan') === 'function' + ); +} + +function langsmithTraceIdFromOtelTraceId(traceId: string): string { + return `${traceId.substring(0, 8)}-${traceId.substring(8, 12)}-${traceId.substring( + 12, + 16, + )}-${traceId.substring(16, 20)}-${traceId.substring(20, 32)}`; +} + +function langsmithRunIdFromOtelSpanId(spanId: string): string { + const paddedHex = spanId.padStart(16, '0'); + return `00000000-0000-0000-${paddedHex.substring(0, 4)}-${paddedHex.substring(4, 16)}`; +} + +function stableDottedOrder(parentRun: InstanceAiTraceRun | undefined, runId: string): string { + return parentRun?.dottedOrder ? `${parentRun.dottedOrder}.${runId}` : runId; +} + +function inferDisplayKind(name: string): string { + if (name === 'turn') return 'turn'; + if (name.startsWith('agent:')) return 'agent'; + if (name.startsWith('llm:')) return 'llm'; + if (name.startsWith('tool:')) return 'tool'; + if (name.startsWith('prepare:')) return 'prepare'; + if (name.startsWith('resume:')) return 'resume'; + if (name.startsWith('background task:')) return 'background_task'; + if (name.startsWith('hitl:')) return 'hitl'; + if (name.startsWith('internal:')) return 'internal'; + return 'operation'; +} + +function inferDisplayGroup( + metadata: Record | undefined, + name: string, +): string | undefined { + const role = + typeof metadata?.agent_role === 'string' + ? metadata.agent_role + : typeof metadata?.subagent_role === 'string' + ? metadata.subagent_role + : undefined; + if (role) { + return formatAgentRoleLabel(role); + } + + if (name.startsWith('prepare:')) return 'preparation'; + if (name.startsWith('hitl:')) return 'human-in-the-loop'; + if (name === 'turn') return 'conversation'; + return undefined; +} + +function inferDisplayPhase(metadata: Record | undefined): string | undefined { + return typeof metadata?.execution_mode === 'string' + ? formatTraceLabel(metadata.execution_mode) + : undefined; +} + +function buildProductSpanMetadata(options: { + name: string; + canonicalName?: string; + metadata?: Record; +}): Record { + const canonicalName = options.canonicalName ?? options.name; + const displayGroup = inferDisplayGroup(options.metadata, options.name); + const displayPhase = inferDisplayPhase(options.metadata); + const displayDefaults = { + trace_version: OTEL_TRACE_VERSION, + 'instance_ai.trace_version': OTEL_TRACE_VERSION, + display_kind: inferDisplayKind(options.name), + ...(displayGroup ? { display_group: displayGroup } : {}), + ...(displayPhase ? { display_phase: displayPhase } : {}), + }; + + return ( + mergeMetadata(displayDefaults, options.metadata, { + display_name: options.name, + 'instance_ai.display_name': options.name, + 'instance_ai.canonical_name': canonicalName, + 'instance_ai.run_name': canonicalName, + }) ?? {} + ); +} + +function buildProductSpanAttributes(options: { + name: string; + canonicalName?: string; + runType?: string; + tags?: string[]; + metadata?: Record; + inputs?: unknown; +}): Record { + const attributes: Record = { + [LANGSMITH_TRACEABLE]: 'true', + [LANGSMITH_TRACE_NAME]: options.name, + [LANGSMITH_SPAN_KIND]: options.runType ?? 'chain', + 'instance_ai.trace_version': OTEL_TRACE_VERSION, + }; + + const tags = normalizeTags(DEFAULT_TAGS, options.tags); + if (tags?.length) { + attributes[LANGSMITH_SPAN_TAGS] = tags; + } + + const metadata = buildProductSpanMetadata(options); + for (const [key, value] of Object.entries(metadata ?? {})) { + const attributeValue = toTelemetryAttributeValue(value); + if (attributeValue === undefined) continue; + attributes[key] = attributeValue; + if (!key.startsWith('langsmith.metadata.')) { + attributes[`langsmith.metadata.${key}`] = attributeValue; + } + } + + const inputs = options.inputs === undefined ? undefined : stringifyTracePayload(options.inputs); + if (inputs !== undefined) { + attributes[GEN_AI_PROMPT] = inputs; + } + + return attributes; +} + +function stringifyTracePayload(value: unknown): string | undefined { + try { + return JSON.stringify(sanitizeTracePayload(value)); + } catch { + return undefined; + } +} + +function startProductSpan( + runtime: ProductOtelTraceRuntime, + options: { + projectName: string; + name: string; + canonicalName?: string; + runType?: string; + tags?: string[]; + metadata?: Record; + inputs?: unknown; + parentRun?: InstanceAiTraceRun; + parentContext?: OtelContext; + root?: boolean; + }, +): InstanceAiTraceRun { + if (!isOtelTracer(runtime.telemetry.tracer)) { + throw new Error('Instance AI tracing requires an OpenTelemetry tracer'); + } + + const spanMetadata = buildProductSpanMetadata(options); + const parentContext = options.root + ? ROOT_CONTEXT + : (options.parentContext ?? + (options.parentRun ? runtime.contexts.get(options.parentRun.id) : undefined) ?? + otelContext.active()); + const span = runtime.telemetry.tracer.startSpan( + options.name, + { + attributes: buildProductSpanAttributes(options), + }, + parentContext, + ); + const spanContext = span.spanContext(); + const traceId = langsmithTraceIdFromOtelTraceId(spanContext.traceId); + const runId = langsmithRunIdFromOtelSpanId(spanContext.spanId); + const spanContextWithSpan = otelTrace.setSpan(parentContext ?? otelContext.active(), span); + const parentRun = options.parentRun; + const runMetadata = mergeMetadata(parentRun?.metadata, spanMetadata); + const run: InstanceAiTraceRun = { + id: runId, + name: options.name, + runType: options.runType ?? 'chain', + projectName: options.projectName, + startTime: Date.now(), + traceId, + otelTraceId: spanContext.traceId, + otelSpanId: spanContext.spanId, + dottedOrder: stableDottedOrder(parentRun, runId), + executionOrder: parentRun ? parentRun.childExecutionOrder + 1 : 0, + childExecutionOrder: 0, + ...(parentRun ? { parentRunId: parentRun.id } : {}), + ...(options.tags ? { tags: normalizeTags(DEFAULT_TAGS, parentRun?.tags, options.tags) } : {}), + ...(runMetadata ? { metadata: runMetadata } : {}), + ...(options.inputs !== undefined ? { inputs: sanitizeTracePayload(options.inputs) } : {}), + }; + + if (parentRun) { + parentRun.childExecutionOrder += 1; + } + + runtime.spans.set(run.id, span); + runtime.contexts.set(run.id, spanContextWithSpan); + return run; +} + +async function finishProductSpan( + runtime: ProductOtelTraceRuntime, + run: InstanceAiTraceRun, + options?: ProductSpanFinishOptions, +): Promise { + const span = runtime.spans.get(run.id); + if (!span) return; + + const metadata = mergeMetadata(options?.metadata); + const attributes: Record = {}; + for (const [key, value] of Object.entries(metadata ?? {})) { + const attributeValue = toTelemetryAttributeValue(value); + if (attributeValue === undefined) continue; + attributes[key] = attributeValue; + attributes[`langsmith.metadata.${key}`] = attributeValue; + } + + if (options?.outputs !== undefined) { + const completion = stringifyTracePayload(options.outputs); + if (completion !== undefined) { + attributes[GEN_AI_COMPLETION] = completion; + } + run.outputs = sanitizeTracePayload(options.outputs); + } + + run.endTime = Date.now(); + run.metadata = mergeMetadata(run.metadata, metadata); + + try { + if (Object.keys(attributes).length > 0) { + span.setAttributes(attributes); + } + + if (options?.error) { + span.recordException(new Error(options.error)); + span.setStatus({ code: SpanStatusCode.ERROR, message: options.error }); + run.error = options.error; + } else { + span.setStatus({ code: SpanStatusCode.OK }); + } + + span.end(); + } finally { + runtime.spans.delete(run.id); + runtime.contexts.delete(run.id); + } + + if (options?.forceFlush === true) { + await Telemetry.forceFlush(runtime.telemetry); + } +} + +async function finishProductSpanBestEffort( + runtime: ProductOtelTraceRuntime, + run: InstanceAiTraceRun, + options?: ProductSpanFinishOptions, +): Promise { + try { + await finishProductSpan(runtime, run, options); + } catch { + // Product tracing is best-effort and must not fail or mask agent execution. + } +} + +async function shutdownProductOtelRuntime( + runtime: ProductOtelTraceRuntime, + traceId: string, +): Promise { + if (runtime.shutdown) return; + + runtime.shutdown = true; + runtime.spans.clear(); + runtime.contexts.clear(); + otelTraceRuntimes.delete(traceId); + + try { + await Telemetry.shutdown(runtime.telemetry); + } catch { + // Product tracing is best-effort and must not fail or mask agent execution. + } +} + +async function withProxyHeaders( + proxyConfig: ServiceProxyConfig | undefined, + fn: () => T | Promise, +): Promise { + if (!proxyConfig) return await fn(); + + const headers = await proxyConfig.getAuthHeaders(); + return await proxyHeaderStore.run(headers, fn); +} + +async function withProxyHeadersBestEffort( + proxyConfig: ServiceProxyConfig | undefined, + fn: () => T | Promise, +): Promise { + if (!proxyConfig) return await fn(); + + let headers: Record; + try { + headers = await proxyConfig.getAuthHeaders(); + } catch { + return await fn(); + } + + return await proxyHeaderStore.run(headers, fn); +} + +async function withProductSpanContext( + runtime: ProductOtelTraceRuntime, + run: InstanceAiTraceRun, + fn: () => Promise, +): Promise { + const spanContext = runtime.contexts.get(run.id); + if (!spanContext) { + return await fn(); + } + + return await productTraceStorage.run( + { runtime, currentRun: run }, + async () => await otelContext.with(spanContext, fn), + ); +} + +async function withProductSpanContextBestEffort( + runtime: ProductOtelTraceRuntime, + run: InstanceAiTraceRun, + fn: () => Promise, +): Promise { + let callbackStarted = false; + try { + return await withProductSpanContext(runtime, run, async () => { + callbackStarted = true; + return await fn(); + }); + } catch (error) { + if (!callbackStarted) { + return await fn(); + } + throw error; + } +} + +function createFallbackChildRun( + projectName: string, + parentRun: InstanceAiTraceRun, + init: InstanceAiTraceRunInit, +): InstanceAiTraceRun { + const executionOrder = parentRun.childExecutionOrder + 1; + const id = `${parentRun.id}:child:${executionOrder}`; + const metadata = mergeMetadata(parentRun.metadata, init.metadata); + parentRun.childExecutionOrder = executionOrder; + + return { + id, + name: init.name, + runType: init.runType ?? 'chain', + projectName, + startTime: Date.now(), + traceId: parentRun.traceId, + dottedOrder: stableDottedOrder(parentRun, id), + executionOrder, + childExecutionOrder: 0, + parentRunId: parentRun.id, + ...(init.tags ? { tags: normalizeTags(DEFAULT_TAGS, parentRun.tags, init.tags) } : {}), + ...(metadata ? { metadata } : {}), + ...(init.inputs !== undefined ? { inputs: sanitizeTracePayload(init.inputs) } : {}), + }; +} + +function getCurrentProductTrace(): + | { runtime: ProductOtelTraceRuntime; currentRun: InstanceAiTraceRun } + | undefined { + return productTraceStorage.getStore(); +} + +function getActiveOtelContextWithSpan(expectedTraceId?: string): OtelContext | undefined { + const activeContext = otelContext.active(); + const activeSpanContext = otelTrace.getSpan(activeContext)?.spanContext(); + if (!activeSpanContext) { + return undefined; + } + if (expectedTraceId && activeSpanContext.traceId !== expectedTraceId) { + return undefined; + } + return activeContext; +} + +function spanMetadataAttributes( + metadata: Record | undefined, +): Record { + const attributes: Record = {}; + for (const [key, value] of Object.entries(metadata ?? {})) { + const attributeValue = toTelemetryAttributeValue(value); + if (attributeValue === undefined) continue; + attributes[key] = attributeValue; + if (!key.startsWith('langsmith.metadata.')) { + attributes[`langsmith.metadata.${key}`] = attributeValue; + } + } + return attributes; +} + +function updateProductRunMetadata( + runtime: ProductOtelTraceRuntime, + run: InstanceAiTraceRun, + metadata: Record, +): void { + const mergedMetadata = mergeMetadata(run.metadata, metadata); + if (!mergedMetadata) return; + + run.metadata = mergedMetadata; + const attributes = spanMetadataAttributes(metadata); + if (Object.keys(attributes).length > 0) { + try { + runtime.spans.get(run.id)?.setAttributes(attributes); + } catch { + // Product tracing is best-effort and must not fail agent execution. + } + } +} + +function updateProductRunInputs( + runtime: ProductOtelTraceRuntime, + run: InstanceAiTraceRun, + inputs: Record, +): void { + const mergedInputs = sanitizeTracePayload(mergeTraceInputs(run.inputs, inputs)); + run.inputs = mergedInputs; + + const prompt = stringifyTracePayload(mergedInputs); + if (prompt !== undefined) { + try { + runtime.spans.get(run.id)?.setAttributes({ [GEN_AI_PROMPT]: prompt }); + } catch { + // Product tracing is best-effort and must not fail agent execution. + } + } +} + /** Get a LangSmith Client that uses gzip encoding (no brotli). */ function getOrCreateDirectClient(): Client { if (cachedDirectClient) return cachedDirectClient; @@ -125,12 +623,15 @@ interface CreateDetachedSubAgentTraceContextOptions extends CreateInstanceAiTrac spawnedByToolCallId?: string; } -interface CreateInternalOperationTraceContextOptions extends CreateInstanceAiTraceContextOptions { +interface CreateInternalOperationTraceContextOptions + extends Omit { operationName: string; + messageId?: string; } interface CurrentTraceSpanOptions { name: string; + canonicalName?: string; runType?: string; tags?: string[]; metadata?: Record; @@ -138,32 +639,28 @@ interface CurrentTraceSpanOptions { processOutputs?: (result: T) => unknown; } -interface AgentTraceInputOptions { - systemPrompt?: string; - tools?: InstanceAiToolRegistry; - runtimeTools?: InstanceAiToolRegistry; - deferredTools?: InstanceAiToolRegistry; - modelId?: unknown; - memory?: unknown; - toolSearchEnabled?: boolean; - inputProcessors?: string[]; +type NativeToolContext = ToolContext | InterruptibleToolContext; +type TraceableNativeTool = BuiltTool & { handler: NonNullable }; +type ProductSpanFinishOptions = InstanceAiTraceRunFinishOptions & { forceFlush?: boolean }; + +function readBooleanEnvFlag(value: string | undefined): boolean | undefined { + const normalized = value?.toLowerCase(); + if (normalized === 'true') return true; + if (normalized === 'false') return false; + return undefined; } -type TraceableNativeTool = BuiltTool; -type NativeToolExecutionContext = ToolContext | InterruptibleToolContext; - -interface NormalizedModelMetadata { - provider?: string; - modelName?: string; -} - -function isLangSmithTracingEnabled(proxyAvailable?: boolean): boolean { - const tracingFlag = - process.env.LANGCHAIN_TRACING_V2 ?? process.env.LANGSMITH_TRACING ?? undefined; - if (tracingFlag?.toLowerCase() === 'false') { +function isLangSmithTracingEnabled(proxyAvailable = false): boolean { + if (readBooleanEnvFlag(process.env.N8N_DIAGNOSTICS_ENABLED) === false) { return false; } + const tracingFlag = readBooleanEnvFlag( + process.env.LANGCHAIN_TRACING_V2 ?? process.env.LANGSMITH_TRACING, + ); + if (tracingFlag === false) { + return false; + } if (proxyAvailable) { return true; } @@ -173,13 +670,15 @@ function isLangSmithTracingEnabled(proxyAvailable?: boolean): boolean { process.env.LANGCHAIN_API_KEY ?? process.env.LANGSMITH_ENDPOINT ?? process.env.LANGCHAIN_ENDPOINT ?? - tracingFlag?.toLowerCase() === 'true', + tracingFlag === true, ); } -function ensureLangSmithTracingEnv(): void { - process.env.LANGCHAIN_TRACING_V2 ??= 'true'; - process.env.LANGSMITH_TRACING ??= 'true'; +function isInternalOperationTracingEnabled(): boolean { + return ( + process.env.N8N_INSTANCE_AI_TRACE_INTERNAL === 'true' || + process.env.N8N_INSTANCE_AI_TRACE_INCLUDE_INTERNAL === 'true' + ); } function normalizeErrorMessage(error: unknown): string { @@ -208,233 +707,18 @@ function mergeMetadata( return Object.keys(merged).length > 0 ? merged : undefined; } -function truncateString(value: string): string { - if (value.length <= MAX_TRACE_STRING_LENGTH) { - return value; - } - - return `${value.slice(0, MAX_TRACE_STRING_LENGTH)}…`; -} - -function splitTraceText(value: string): string[] { - if (value.length <= MAX_TRACE_STRING_LENGTH) { - return [value]; - } - - const chunks: string[] = []; - let remaining = value; - - while (remaining.length > MAX_TRACE_STRING_LENGTH) { - const candidate = remaining.slice(0, MAX_TRACE_STRING_LENGTH); - const splitIndex = candidate.lastIndexOf('\n'); - const chunkEnd = - splitIndex >= MAX_TRACE_STRING_LENGTH / 2 ? splitIndex + 1 : MAX_TRACE_STRING_LENGTH; - chunks.push(remaining.slice(0, chunkEnd)); - remaining = remaining.slice(chunkEnd); - } - - if (remaining.length > 0) { - chunks.push(remaining); - } - - return chunks; -} - -function serializeTraceText(value: string): string | Record { - const chunks = splitTraceText(value); - if (chunks.length === 1) { - return chunks[0]; - } - - return Object.fromEntries( - chunks.map((chunk, index) => [`part_${String(index + 1).padStart(2, '0')}`, chunk]), - ); -} - -function summarizeToolDescription(tool: unknown): string | undefined { - if (!isRecord(tool)) { - return undefined; - } - - return typeof tool.description === 'string' ? tool.description : undefined; -} - -function summarizeToolSet( - fieldPrefix: 'loaded' | 'deferred' | 'runtime', - tools: InstanceAiToolRegistry | undefined, -): Record { - if (!tools || tools.size === 0) { - return {}; - } - - const summaries = Array.from(tools, ([name, tool]) => ({ - name, - ...(summarizeToolDescription(tool) ? { description: summarizeToolDescription(tool) } : {}), - })); - const catalogText = summaries - .map((tool) => - typeof tool.description === 'string' ? `${tool.name}: ${tool.description}` : tool.name, - ) - .join('\n'); - - return { - [`${fieldPrefix}_tool_count`]: summaries.length, - [`${fieldPrefix}_tools`]: summaries, - [`${fieldPrefix}_tool_catalog`]: serializeTraceText(catalogText), - }; -} - -function summarizeMemoryBinding(memory: unknown): Record { - if (!isRecord(memory)) { - return {}; - } - - return { - memory_enabled: true, - ...(typeof memory.resource === 'string' ? { memory_resource_id: memory.resource } : {}), - ...(typeof memory.thread === 'string' ? { memory_thread_id: memory.thread } : {}), - }; -} - -function sanitizeTraceValue(value: unknown, depth = 0): unknown { - if (value === null || value === undefined) { - return value; - } - - if (typeof value === 'string') { - return truncateString(value); - } - - if (typeof value === 'number' || typeof value === 'boolean') { - return value; - } - - if (typeof value === 'bigint') { - return value.toString(); - } - - if (typeof value === 'function') { - return `[function ${value.name || 'anonymous'}]`; - } - - if (value instanceof Date) { - return value.toISOString(); - } - - if (value instanceof Error) { - return { - name: value.name, - message: truncateString(value.message), - }; - } - - if (value instanceof Uint8Array) { - return `[binary ${value.byteLength} bytes]`; - } - - if (Array.isArray(value)) { - if (depth >= MAX_TRACE_DEPTH) { - return `[array(${value.length})]`; - } - - return value - .slice(0, MAX_TRACE_ARRAY_ITEMS) - .map((entry) => sanitizeTraceValue(entry, depth + 1)); - } - - if (isRecord(value)) { - if (depth >= MAX_TRACE_DEPTH) { - return `[object ${Object.keys(value).length} keys]`; - } - - const entries = Object.entries(value).slice(0, MAX_TRACE_OBJECT_KEYS); - const sanitized: Record = {}; - for (const [key, entryValue] of entries) { - sanitized[key] = sanitizeTraceValue(entryValue, depth + 1); - } - if (Object.keys(value).length > entries.length) { - sanitized.__truncatedKeys = Object.keys(value).length - entries.length; - } - return sanitized; - } - - if (typeof value === 'symbol') { - return value.toString(); - } - - return truncateString(Object.prototype.toString.call(value)); -} - -function sanitizeTracePayload(value: unknown): Record { - if (isRecord(value)) { - const sanitized: Record = {}; - for (const [key, entryValue] of Object.entries(value)) { - sanitized[key] = sanitizeTraceValue(entryValue); - } - return sanitized; - } - - if (value === undefined) { - return {}; - } - - return { value: sanitizeTraceValue(value) }; -} - -function normalizeModelMetadata(modelId: unknown): NormalizedModelMetadata { - if (typeof modelId === 'string' && modelId.length > 0) { - const [provider, ...modelParts] = modelId.split('/'); - return modelParts.length > 0 - ? { provider, modelName: modelParts.join('/') } - : { modelName: modelId }; - } - - if (isRecord(modelId) && typeof modelId.id === 'string') { - return normalizeModelMetadata(modelId.id); - } - - return {}; -} - -export function serializeModelIdForTrace(modelId: unknown): unknown { - if (typeof modelId === 'string' && modelId.length > 0) { - return truncateString(modelId); - } - - if (isRecord(modelId) && typeof modelId.id === 'string') { - return truncateString(modelId.id); - } - - return sanitizeTraceValue(modelId); -} - -function mergeRunTreeMetadata( - baseMetadata: Record | undefined, - metadata: Record | undefined, -): Record | undefined { - return mergeMetadata(baseMetadata, metadata); -} - -function mergeRunTreeInputs( - baseInputs: unknown, - inputs: Record | undefined, -): Record { - const existingInputs = - isRecord(baseInputs) && !Array.isArray(baseInputs) ? { ...baseInputs } : {}; - - return { - ...existingInputs, - ...(inputs ?? {}), - }; -} - /** - * Unconditionally remove the cached LangSmith Client for a trace. - * Call after run finalization (success or failure) so the Client and - * its RunTree hierarchy can be garbage-collected. + * Unconditionally remove the cached OTel runtime for a trace. + * Call after run finalization (success or failure) so its OTel runtime can be + * garbage-collected. */ export function releaseTraceClient(traceId: string): void { - traceClients.delete(traceId); + const runtime = otelTraceRuntimes.get(traceId); + if (!runtime) { + return; + } + + void shutdownProductOtelRuntime(runtime, traceId); } export interface SubmitLangsmithUserFeedbackOptions { @@ -477,49 +761,39 @@ export async function submitLangsmithUserFeedback( }); }; - if (options.proxyConfig) { - const headers = await options.proxyConfig.getAuthHeaders(); - await proxyHeaderStore.run(headers, call); - } else { - await call(); - } + await withProxyHeaders(options.proxyConfig, call); return true; } -export function getTraceParentRun(): RunTree | undefined { - const overrideRun = traceParentOverrideStorage.getStore()?.current; - if (overrideRun) { - return overrideRun; +export function getCurrentOtelSpanContext(): { traceId: string; spanId: string } | undefined { + const activeSpanContext = otelTrace.getSpan(otelContext.active())?.spanContext(); + if (activeSpanContext) { + return { + traceId: activeSpanContext.traceId, + spanId: activeSpanContext.spanId, + }; } - try { - return getCurrentRunTree() ?? undefined; - } catch { - return undefined; + const currentRun = getCurrentProductTrace()?.currentRun; + if (currentRun?.otelTraceId && currentRun.otelSpanId) { + return { + traceId: currentRun.otelTraceId, + spanId: currentRun.otelSpanId, + }; } + + return undefined; } -export function setTraceParentOverride(parentRun: RunTree | null | undefined): void { - const store = traceParentOverrideStorage.getStore(); - if (store) { - store.current = parentRun ?? null; - } else if (parentRun) { - // No ALS context yet — bootstrap one for the current async chain. - // Safe: each withTraceParentContext call creates its own nested context, - // so this only affects code that skips our context setup (e.g. tests). - traceParentOverrideStorage.enterWith({ current: parentRun }); - } +export function getCurrentTraceToolCallId(): string | undefined { + const metadata = getCurrentProductTrace()?.currentRun.metadata; + return typeof metadata?.tool_call_id === 'string' ? metadata.tool_call_id : undefined; } export function mergeCurrentTraceMetadata(metadata: Record): void { - const currentRun = getTraceParentRun(); - if (!currentRun) { - return; - } - - const mergedMetadata = mergeRunTreeMetadata(currentRun.metadata, metadata); - if (mergedMetadata) { - currentRun.metadata = mergedMetadata; + const currentProductTrace = getCurrentProductTrace(); + if (currentProductTrace) { + updateProductRunMetadata(currentProductTrace.runtime, currentProductTrace.currentRun, metadata); } } @@ -527,17 +801,15 @@ export function appendRootRunMetadata( root: InstanceAiTraceRun, patch: Record, ): void { - const currentRun = getTraceParentRun(); - const baseMetadata = - currentRun?.id === root.id - ? mergeRunTreeMetadata(root.metadata, currentRun.metadata) - : root.metadata; - const merged = mergeRunTreeMetadata(baseMetadata, patch); - if (merged) { - root.metadata = merged; - if (currentRun?.id === root.id) { - currentRun.metadata = merged; - } + const runtime = getCurrentProductTrace()?.runtime ?? otelTraceRuntimes.get(root.traceId); + if (runtime) { + updateProductRunMetadata(runtime, root, patch); + return; + } + + const mergedMetadata = mergeMetadata(root.metadata, patch); + if (mergedMetadata) { + root.metadata = mergedMetadata; } } @@ -545,12 +817,7 @@ export function appendGeneratedWorkflowIdToRootMetadata( root: InstanceAiTraceRun, workflowId: string, ): void { - const currentRun = getTraceParentRun(); - const metadata = - currentRun?.id === root.id - ? mergeRunTreeMetadata(root.metadata, currentRun.metadata) - : root.metadata; - const generatedWorkflowIds = metadata?.generated_workflow_ids; + const generatedWorkflowIds = root.metadata?.generated_workflow_ids; const existing = Array.isArray(generatedWorkflowIds) ? generatedWorkflowIds.filter((value): value is string => typeof value === 'string') : []; @@ -568,108 +835,53 @@ export function mergeTraceRunInputs( return; } - const mergedInputs = sanitizeTracePayload(mergeRunTreeInputs(run.inputs, inputs)); - run.inputs = mergedInputs; - - const currentRun = getTraceParentRun(); - if (currentRun?.id === run.id) { - currentRun.inputs = mergedInputs; + const runtime = getCurrentProductTrace()?.runtime ?? otelTraceRuntimes.get(run.traceId); + if (runtime) { + updateProductRunInputs(runtime, run, inputs); + return; } -} -export function buildAgentTraceInputs(options: AgentTraceInputOptions): Record { - return sanitizeTracePayload({ - ...(options.systemPrompt ? { system_prompt: serializeTraceText(options.systemPrompt) } : {}), - ...(options.modelId !== undefined ? { model: serializeModelIdForTrace(options.modelId) } : {}), - ...(options.toolSearchEnabled !== undefined - ? { tool_search_enabled: options.toolSearchEnabled } - : {}), - ...(options.inputProcessors?.length ? { input_processors: options.inputProcessors } : {}), - ...summarizeMemoryBinding(options.memory), - ...summarizeToolSet('loaded', options.tools), - ...summarizeToolSet('runtime', options.runtimeTools), - ...summarizeToolSet('deferred', options.deferredTools), - }); -} - -export async function withTraceParentContext( - parentRun: RunTree | undefined, - fn: () => Promise, -): Promise { - // Always create a new nested ALS context. Mutating an existing store.current - // is not safe when concurrent background tasks inherit the same parent context. - return await traceParentOverrideStorage.run({ current: parentRun ?? null }, fn); -} - -export function getCurrentOtelSpanContext(): { traceId?: string; spanId?: string } | undefined { - const currentRun = getTraceParentRun(); - if (!currentRun) return undefined; - return { - traceId: currentRun.trace_id, - spanId: currentRun.id, - }; -} - -export function getCurrentTraceToolCallId(): string | undefined { - const metadata = getTraceParentRun()?.metadata; - return typeof metadata?.tool_call_id === 'string' ? metadata.tool_call_id : undefined; -} - -async function postChildRun( - parentRun: RunTree, - options: InstanceAiTraceRunInit & { tags?: string[] }, -): Promise { - const childRun = parentRun.createChild({ - name: options.name, - run_type: options.runType ?? 'chain', - tags: normalizeTags(DEFAULT_TAGS, parentRun.tags, options.tags), - metadata: mergeRunTreeMetadata(parentRun.metadata, options.metadata), - inputs: sanitizeTracePayload(options.inputs), - }); - childRun.parent_run_id ??= parentRun.id; - await childRun.postRun(); - return childRun; -} - -async function finishRunTree( - runTree: RunTree, - options?: InstanceAiTraceRunFinishOptions, -): Promise { - await runTree.end( - options?.outputs !== undefined ? sanitizeTracePayload(options.outputs) : undefined, - options?.error, - Date.now(), - mergeMetadata(options?.metadata), - ); - await runTree.patchRun(); + run.inputs = sanitizeTracePayload(mergeTraceInputs(run.inputs, inputs)); } export async function withCurrentTraceSpan( options: CurrentTraceSpanOptions, fn: () => Promise, ): Promise { - const parentRun = getTraceParentRun(); - if (!parentRun) { + const currentProductTrace = getCurrentProductTrace(); + if (!currentProductTrace) { return await fn(); } - const spanRun = await postChildRun(parentRun, { - name: options.name, - runType: options.runType ?? 'chain', - tags: options.tags, - metadata: options.metadata, - inputs: options.inputs, - }); + const activeParentContext = getActiveOtelContextWithSpan( + currentProductTrace.currentRun.otelTraceId, + ); + let spanRun: InstanceAiTraceRun; + try { + spanRun = startProductSpan(currentProductTrace.runtime, { + projectName: currentProductTrace.currentRun.projectName, + name: options.name, + canonicalName: options.canonicalName, + runType: options.runType ?? 'chain', + tags: options.tags, + metadata: options.metadata, + inputs: options.inputs, + parentRun: currentProductTrace.currentRun, + ...(activeParentContext ? { parentContext: activeParentContext } : {}), + }); + } catch { + return await fn(); + } try { - const result = await withLangSmithRunTree(spanRun, fn); - await finishRunTree(spanRun, { + const result = await withProductSpanContextBestEffort(currentProductTrace.runtime, spanRun, fn); + await finishProductSpanBestEffort(currentProductTrace.runtime, spanRun, { ...(options.processOutputs ? { outputs: options.processOutputs(result) } : {}), metadata: { final_status: 'completed' }, }); return result; } catch (error) { - await finishRunTree(spanRun, { + await finishProductSpanBestEffort(currentProductTrace.runtime, spanRun, { error: normalizeErrorMessage(error), metadata: { final_status: 'error' }, }); @@ -677,25 +889,6 @@ export async function withCurrentTraceSpan( } } -async function startHitlChildRun( - parentRun: RunTree, - name: string, - inputs: unknown, - metadata?: Record, -): Promise { - const hitlRun = await postChildRun(parentRun, { - name, - runType: 'chain', - tags: ['hitl'], - metadata, - inputs, - }); - await finishRunTree(hitlRun, { - outputs: inputs, - metadata: { final_status: 'completed' }, - }); -} - function buildSuspendMetadata( toolName: string, suspendPayload: unknown, @@ -716,140 +909,124 @@ function buildSuspendMetadata( }; } -function getResumeData(context: NativeToolExecutionContext): unknown { - return 'resumeData' in context ? context.resumeData : undefined; +function isInterruptibleToolContext( + context: NativeToolContext, +): context is InterruptibleToolContext { + return isRecord(context) && typeof context.suspend === 'function'; } -function getSuspend( - context: NativeToolExecutionContext, -): InterruptibleToolContext['suspend'] | undefined { - return 'suspend' in context && typeof context.suspend === 'function' - ? context.suspend +function getToolCallId(context: NativeToolContext): string | undefined { + return isRecord(context) && typeof context.toolCallId === 'string' + ? context.toolCallId : undefined; } -async function traceSuspendableToolHandler( - tool: TraceableNativeTool, - options: InstanceAiToolTraceOptions | undefined, - input: unknown, - context: NativeToolExecutionContext, -): Promise { - const parentRun = getTraceParentRun(); - if (!parentRun || typeof tool.handler !== 'function') { - return await tool.handler?.(input, context); - } - - const resumeData = getResumeData(context); - const toolRun = await postChildRun(parentRun, { - name: - resumeData !== undefined && resumeData !== null - ? `tool:${tool.name}:resume` - : `tool:${tool.name}`, - runType: 'tool', - tags: normalizeTags(['tool'], options?.tags), - metadata: mergeMetadata(options?.metadata, { - tool_name: tool.name, - ...(options?.agentRole ? { agent_role: options.agentRole } : {}), - phase: resumeData !== undefined && resumeData !== null ? 'resume' : 'initial', - ...(resumeData !== undefined && resumeData !== null - ? mergeMetadata(buildSuspendMetadata(tool.name, resumeData), { - approved: isRecord(resumeData) ? resumeData.approved : undefined, - }) - : {}), - }), - inputs: { input }, - }); - - let toolRunFinished = false; - const finishToolRun = async (finishOptions?: InstanceAiTraceRunFinishOptions) => { - if (toolRunFinished) return; - toolRunFinished = true; - await finishRunTree(toolRun, finishOptions); - }; - - const originalSuspend = getSuspend(context); - const wrappedContext = originalSuspend - ? { - ...context, - suspend: async (suspendPayload: unknown) => { - await startHitlChildRun( - toolRun, - 'hitl:suspend', - suspendPayload, - buildSuspendMetadata(tool.name, suspendPayload), - ); - await finishToolRun({ - outputs: { - status: 'suspended', - suspendPayload, - }, - metadata: mergeMetadata(buildSuspendMetadata(tool.name, suspendPayload), { - final_status: 'suspended', - }), - }); - return await originalSuspend(suspendPayload); - }, - } - : context; - +async function startAndFinishProductChildSpan( + currentTrace: { runtime: ProductOtelTraceRuntime; currentRun: InstanceAiTraceRun }, + options: { + name: string; + canonicalName?: string; + runType?: string; + tags?: string[]; + metadata?: Record; + inputs?: unknown; + outputs?: unknown; + error?: string; + forceFlush?: boolean; + }, +): Promise { + const activeParentContext = getActiveOtelContextWithSpan(currentTrace.currentRun.otelTraceId); + let childRun: InstanceAiTraceRun; try { - const result = await withLangSmithRunTree(toolRun, async () => { - return await tool.handler!(input, wrappedContext); + childRun = startProductSpan(currentTrace.runtime, { + projectName: currentTrace.currentRun.projectName, + name: options.name, + canonicalName: options.canonicalName, + runType: options.runType ?? 'chain', + tags: options.tags, + metadata: mergeMetadata(currentTrace.currentRun.metadata, options.metadata), + inputs: options.inputs, + parentRun: currentTrace.currentRun, + ...(activeParentContext ? { parentContext: activeParentContext } : {}), }); - await finishToolRun({ - outputs: result, - metadata: { final_status: 'completed' }, - }); - return result; - } catch (error) { - await finishToolRun({ - error: normalizeErrorMessage(error), - metadata: { final_status: 'error' }, - }); - throw error; + } catch { + return; } + await finishProductSpanBestEffort(currentTrace.runtime, childRun, { + ...(options.outputs !== undefined ? { outputs: options.outputs } : {}), + ...(options.error ? { error: options.error } : {}), + metadata: { + final_status: options.error ? 'error' : 'completed', + }, + forceFlush: options.forceFlush, + }); } -async function traceToolHandler( +async function traceProductSuspendableToolExecute( tool: TraceableNativeTool, - options: InstanceAiToolTraceOptions | undefined, input: unknown, - context: NativeToolExecutionContext, + context: NativeToolContext, + currentTrace: { runtime: ProductOtelTraceRuntime; currentRun: InstanceAiTraceRun }, ): Promise { - const parentRun = getTraceParentRun(); - if (!parentRun || typeof tool.handler !== 'function') { - return await tool.handler?.(input, context); + const resumeData = isInterruptibleToolContext(context) ? context.resumeData : undefined; + const isResume = resumeData !== undefined && resumeData !== null; + const toolCallId = getToolCallId(context); + + const originalSuspend = isInterruptibleToolContext(context) ? context.suspend : undefined; + const wrappedContext: NativeToolContext = + typeof originalSuspend === 'function' + ? { + ...context, + suspend: async (suspendPayload: unknown) => { + await startAndFinishProductChildSpan(currentTrace, { + name: 'hitl: suspend', + canonicalName: 'instance-ai.hitl.suspend', + runType: 'chain', + tags: ['hitl'], + metadata: mergeMetadata(buildSuspendMetadata(tool.name, suspendPayload), { + ...(toolCallId ? { tool_call_id: toolCallId } : {}), + }), + inputs: suspendPayload, + outputs: suspendPayload, + forceFlush: true, + }); + return await originalSuspend(suspendPayload); + }, + } + : context; + + if (isResume) { + await startAndFinishProductChildSpan(currentTrace, { + name: 'hitl: resume', + canonicalName: 'instance-ai.hitl.resume', + runType: 'chain', + tags: ['hitl', 'resume'], + metadata: mergeMetadata(buildSuspendMetadata(tool.name, resumeData), { + approved: isRecord(resumeData) ? resumeData.approved : undefined, + ...(toolCallId ? { tool_call_id: toolCallId } : {}), + }), + inputs: resumeData, + outputs: { + status: 'resumed', + }, + forceFlush: true, + }); } - const toolRun = await postChildRun(parentRun, { - name: `tool:${tool.name}`, - runType: 'tool', - tags: normalizeTags(['tool'], options?.tags), - metadata: mergeMetadata(options?.metadata, { - tool_name: tool.name, - ...(options?.agentRole ? { agent_role: options.agentRole } : {}), - ...normalizeModelMetadata(options?.metadata?.model_id), - }), - inputs: { input }, - }); + return await tool.handler(input, wrappedContext); +} - try { - const result = await withLangSmithRunTree( - toolRun, - async () => await tool.handler!(input, context), - ); - await finishRunTree(toolRun, { - outputs: result, - metadata: { final_status: 'completed' }, - }); - return result; - } catch (error) { - await finishRunTree(toolRun, { - error: normalizeErrorMessage(error), - metadata: { final_status: 'error' }, - }); - throw error; +async function traceSuspendableToolExecute( + tool: TraceableNativeTool, + input: unknown, + context: NativeToolContext, +): Promise { + const currentProductTrace = getCurrentProductTrace(); + if (!currentProductTrace) { + return await tool.handler(input, context); } + + return await traceProductSuspendableToolExecute(tool, input, context, currentProductTrace); } function createTraceContext( @@ -857,31 +1034,61 @@ function createTraceContext( traceKind: InstanceAiTraceContext['traceKind'], rootRun: InstanceAiTraceRun, actorRun: InstanceAiTraceRun, - getProxyHeaders?: () => Promise>, + otelRuntime: ProductOtelTraceRuntime, + proxyConfig?: ServiceProxyConfig, + telemetryFactory?: (options: InstanceAiTelemetryOptions) => Telemetry | BuiltTelemetry, ): InstanceAiTraceContext { - const withProxy = async (fn: () => Promise): Promise => { - if (!getProxyHeaders) return await fn(); - const headers = await getProxyHeaders(); - return await proxyHeaderStore.run(headers, fn); - }; + otelTraceRuntimes.set(rootRun.traceId, otelRuntime); const startChildRun = async ( parentRun: InstanceAiTraceRun, init: InstanceAiTraceRunInit, ): Promise => - await withProxy(async () => await createChildRun(parentRun, init)); + await withProxyHeadersBestEffort(proxyConfig, () => { + const activeParentContext = getActiveOtelContextWithSpan(parentRun.otelTraceId); + try { + return startProductSpan(otelRuntime, { + projectName, + name: init.name, + canonicalName: init.canonicalName, + runType: init.runType, + tags: init.tags, + metadata: mergeMetadata(parentRun.metadata, init.metadata), + inputs: init.inputs, + parentRun, + ...(activeParentContext ? { parentContext: activeParentContext } : {}), + }); + } catch { + return createFallbackChildRun(projectName, parentRun, init); + } + }); - const withRunTree = async (run: InstanceAiTraceRun, fn: () => Promise): Promise => - await withProxy(async () => await withSerializedRunTree(run, fn)); + const withActiveSpan = async (run: InstanceAiTraceRun, fn: () => Promise): Promise => + await withProxyHeadersBestEffort( + proxyConfig, + async () => await withProductSpanContextBestEffort(otelRuntime, run, fn), + ); + const withRunTree = withActiveSpan; const finishRun = async ( run: InstanceAiTraceRun, finishOptions?: InstanceAiTraceRunFinishOptions, ): Promise => { - await withProxy(async () => await finishTraceRun(run, finishOptions)); - // Clean up traceClients when root run finishes - if (!run.parentRunId) { - traceClients.delete(run.traceId); + const isRootRun = !run.parentRunId; + await withProxyHeadersBestEffort( + proxyConfig, + async () => + await finishProductSpanBestEffort( + otelRuntime, + run, + isRootRun ? { ...finishOptions, forceFlush: true } : finishOptions, + ), + ); + if (isRootRun) { + await withProxyHeadersBestEffort( + proxyConfig, + async () => await shutdownProductOtelRuntime(otelRuntime, run.traceId), + ); } }; @@ -890,31 +1097,39 @@ function createTraceContext( error: unknown, metadata?: Record, ): Promise => { - await withProxy( + const isRootRun = !run.parentRunId; + await withProxyHeadersBestEffort( + proxyConfig, async () => - await finishTraceRun(run, { + await finishProductSpanBestEffort(otelRuntime, run, { error: normalizeErrorMessage(error), metadata, + forceFlush: isRootRun, }), ); - if (!run.parentRunId) { - traceClients.delete(run.traceId); + if (isRootRun) { + await withProxyHeadersBestEffort( + proxyConfig, + async () => await shutdownProductOtelRuntime(otelRuntime, run.traceId), + ); } }; const ctx: InstanceAiTraceContext = { projectName, traceKind, + ...(proxyConfig ? { proxyConfig } : {}), rootRun, actorRun, messageRun: rootRun, orchestratorRun: actorRun, startChildRun, withRunTree, - withActiveSpan: withRunTree, + withActiveSpan, + toHeaders: () => ({}), finishRun, failRun, - toHeaders: (run) => hydrateRunTree(run).toHeaders(), + ...(telemetryFactory ? { getTelemetry: telemetryFactory } : {}), wrapTools: (tools, traceOptions) => { if (ctx.replayMode === 'replay' && ctx.traceIndex && ctx.idRemapper) { return replayWrapTools(tools, ctx.traceIndex, ctx.idRemapper, traceOptions); @@ -930,96 +1145,39 @@ function createTraceContext( return ctx; } -function createRunStateFromTree(tree: RunTree): InstanceAiTraceRun { - const parentRunId = tree.parent_run?.id ?? tree.parent_run_id; - - return { - id: tree.id, - name: tree.name, - runType: tree.run_type, - projectName: tree.project_name, - startTime: tree.start_time, - ...(tree.end_time ? { endTime: tree.end_time } : {}), - traceId: tree.trace_id, - dottedOrder: tree.dotted_order, - executionOrder: tree.execution_order, - childExecutionOrder: tree.child_execution_order, - ...(parentRunId ? { parentRunId } : {}), - ...(tree.tags ? { tags: [...tree.tags] } : {}), - ...(tree.metadata ? { metadata: { ...tree.metadata } } : {}), - ...(tree.inputs ? { inputs: sanitizeTracePayload(tree.inputs) } : {}), - ...(tree.outputs ? { outputs: sanitizeTracePayload(tree.outputs) } : {}), - ...(tree.error ? { error: tree.error } : {}), - }; -} - -function syncRunState(state: InstanceAiTraceRun, tree: RunTree): void { - Object.assign(state, createRunStateFromTree(tree)); -} - -function hydrateRunTree(state: InstanceAiTraceRun): RunTree { - const client = traceClients.get(state.traceId); - return new RunTree({ - id: state.id, - name: state.name, - run_type: state.runType, - project_name: state.projectName, - start_time: state.startTime, - end_time: state.endTime, - parent_run_id: state.parentRunId, - execution_order: state.executionOrder, - child_execution_order: state.childExecutionOrder, - trace_id: state.traceId, - dotted_order: state.dottedOrder, - tags: state.tags, - metadata: state.metadata, - inputs: state.inputs, - outputs: state.outputs, - error: state.error, - serialized: {}, - client: client ?? getOrCreateDirectClient(), - }); -} - function isTraceableNativeTool(value: unknown): value is TraceableNativeTool { return ( isRecord(value) && typeof value.name === 'string' && typeof value.description === 'string' && - (!('handler' in value) || typeof value.handler === 'function') + typeof value.handler === 'function' ); } -function wrapToolHandler( - tool: TraceableNativeTool, - options: InstanceAiToolTraceOptions | undefined, -): TraceableNativeTool { - if (typeof tool.handler !== 'function') { - return tool; - } - - if (tool.suspendSchema !== undefined || tool.resumeSchema !== undefined) { - return { - ...tool, - handler: async (input, context) => - await traceSuspendableToolHandler(tool, options, input, context), - }; - } - +function wrapToolHandler(tool: TraceableNativeTool): TraceableNativeTool { return { ...tool, - handler: async (input, context) => await traceToolHandler(tool, options, input, context), + handler: async (input, context) => await traceSuspendableToolExecute(tool, input, context), }; } +function shouldTraceLocalToolExecution(tool: TraceableNativeTool): boolean { + return tool.suspendSchema !== undefined || tool.resumeSchema !== undefined; +} + function wrapTools( tools: InstanceAiToolRegistry, - options?: InstanceAiToolTraceOptions, + _options?: InstanceAiToolTraceOptions, ): InstanceAiToolRegistry { const wrapped = createToolRegistry(); for (const [name, tool] of tools) { - wrapped.set(name, isTraceableNativeTool(tool) ? wrapToolHandler(tool, options) : tool); + wrapped.set( + name, + isTraceableNativeTool(tool) && shouldTraceLocalToolExecution(tool) + ? wrapToolHandler(tool) + : tool, + ); } return wrapped; @@ -1043,7 +1201,7 @@ function replayWrapTool( handler: async (input, context) => { const event = traceIndex.nextMatching(agentRole, tool.name); const remappedInput: unknown = idRemapper.remapInput(input); - const realOutput = await tool.handler!(remappedInput, context); + const realOutput = await tool.handler(remappedInput, context); if (event) { idRemapper.learn(event.output, realOutput as Record); } @@ -1104,36 +1262,31 @@ function replayWrapTools( // ── Recording wrappers ────────────────────────────────────────────────────── /** - * Wraps a tool to record its I/O to a TraceWriter while also applying - * the normal LangSmith tracing wrapper. + * Wraps a tool to record its I/O to a TraceWriter. Replay records stable + * Instance AI events and intentionally does not depend on LangSmith run shape. */ function recordWrapTool( tool: TraceableNativeTool, traceWriter: TraceWriter, agentRole: string, - traceOptions: InstanceAiToolTraceOptions | undefined, ): TraceableNativeTool { - // First apply LangSmith tracing (preserves existing tracing behavior) - const traced = wrapToolHandler(tool, traceOptions); - return { - ...traced, + ...tool, handler: async (input, context) => { - const resumeData = getResumeData(context); + const resumeData = isInterruptibleToolContext(context) ? context.resumeData : undefined; const inputRecord = (input ?? {}) as Record; - let suspendPayload: unknown; - const originalSuspend = getSuspend(context); - const wrappedContext = originalSuspend + let capturedSuspendPayload: Record | undefined; + const wrappedContext: NativeToolContext = isInterruptibleToolContext(context) ? { ...context, - suspend: async (payload: unknown) => { - suspendPayload = payload; - return await originalSuspend(payload); + suspend: async (suspendPayload: unknown) => { + capturedSuspendPayload = isRecord(suspendPayload) ? suspendPayload : {}; + return await context.suspend(suspendPayload); }, } : context; - const result = await traced.handler!(input, wrappedContext); + const result = await tool.handler(input, wrappedContext); const outputRecord = (result ?? {}) as Record; if (resumeData !== undefined && resumeData !== null) { @@ -1144,13 +1297,13 @@ function recordWrapTool( outputRecord, resumeData as Record, ); - } else if (suspendPayload !== undefined) { + } else if (capturedSuspendPayload) { traceWriter.recordToolSuspend( agentRole, tool.name, inputRecord, - outputRecord, - isRecord(suspendPayload) ? suspendPayload : { payload: suspendPayload }, + {}, + capturedSuspendPayload, ); } else { traceWriter.recordToolCall(agentRole, tool.name, inputRecord, outputRecord); @@ -1174,7 +1327,7 @@ function recordWrapTools( wrapped.set(name, tool); continue; } - wrapped.set(name, recordWrapTool(tool, traceWriter, agentRole, options)); + wrapped.set(name, recordWrapTool(tool, traceWriter, agentRole)); } return wrapped; @@ -1208,9 +1361,9 @@ export function createTraceReplayOnlyContext(): InstanceAiTraceContext { startChildRun: async () => await Promise.resolve(stubRun), withRunTree: async (_run, fn) => await fn(), withActiveSpan: async (_run, fn) => await fn(), + toHeaders: () => ({}), finishRun: async () => {}, failRun: async () => {}, - toHeaders: () => ({}), wrapTools: (tools, traceOptions) => { if (ctx.replayMode === 'replay' && ctx.traceIndex && ctx.idRemapper) { return replayWrapTools(tools, ctx.traceIndex, ctx.idRemapper, traceOptions); @@ -1226,99 +1379,214 @@ export function createTraceReplayOnlyContext(): InstanceAiTraceContext { return ctx; } -async function createRun(options: { - projectName: string; - name: string; - runType?: string; - tags?: string[]; - metadata?: Record; - inputs?: unknown; - client?: Client; -}): Promise { - const runTree = new RunTree({ - name: options.name, - run_type: options.runType ?? 'chain', - project_name: options.projectName, - tags: normalizeTags(DEFAULT_TAGS, options.tags), - metadata: mergeMetadata(options.metadata), - inputs: sanitizeTracePayload(options.inputs), - client: options.client ?? getOrCreateDirectClient(), - }); - await runTree.postRun(); - - if (options.client) { - traceClients.set(runTree.trace_id, options.client); - } - - return createRunStateFromTree(runTree); +interface TraceRuntimeVersions { + agents_version?: string; + workflow_sdk_version?: string; } -async function createChildRun( - parentState: InstanceAiTraceRun, - options: InstanceAiTraceRunInit, -): Promise { - const parentRun = hydrateRunTree(parentState); - const childRun = parentRun.createChild({ - name: options.name, - run_type: options.runType ?? 'chain', - tags: normalizeTags(DEFAULT_TAGS, parentState.tags, options.tags), - metadata: mergeMetadata(parentRun.metadata, options.metadata), - inputs: sanitizeTracePayload(options.inputs), - }); - syncRunState(parentState, parentRun); - await childRun.postRun(); - return createRunStateFromTree(childRun); -} +let traceRuntimeVersions: TraceRuntimeVersions | undefined; -async function finishTraceRun( - runState: InstanceAiTraceRun, - options?: InstanceAiTraceRunFinishOptions, -): Promise { - const runTree = hydrateRunTree(runState); - await runTree.end( - options?.outputs !== undefined ? sanitizeTracePayload(options.outputs) : undefined, - options?.error, - Date.now(), - mergeMetadata(options?.metadata), - ); - await runTree.patchRun(); - syncRunState(runState, runTree); -} - -async function withSerializedRunTree( - runState: InstanceAiTraceRun, - fn: () => Promise, -): Promise { - const runTree = hydrateRunTree(runState); +function readPackageVersion(packageName: string): string | undefined { try { - return await withTraceParentContext( - runTree, - async () => await withLangSmithRunTree(runTree, fn), - ); - } finally { - syncRunState(runState, runTree); + const packageJson = hostRequire(`${packageName}/package.json`) as { version?: unknown }; + return typeof packageJson.version === 'string' ? packageJson.version : undefined; + } catch { + return undefined; } } +function getTraceRuntimeVersions(): TraceRuntimeVersions { + if (!traceRuntimeVersions) { + const agentsVersion = readPackageVersion('@n8n/agents'); + const workflowSdkVersion = readPackageVersion('@n8n/workflow-sdk'); + traceRuntimeVersions = { + ...(agentsVersion ? { agents_version: agentsVersion } : {}), + ...(workflowSdkVersion ? { workflow_sdk_version: workflowSdkVersion } : {}), + }; + } + + return traceRuntimeVersions; +} + function buildBaseMetadata(options: CreateInstanceAiTraceContextOptions): Record { return { thread_id: options.threadId, + 'langsmith.metadata.thread_id': options.threadId, conversation_id: options.conversationId ?? options.threadId, message_group_id: options.messageGroupId, message_id: options.messageId, run_id: options.runId, + activation_id: options.runId, user_id: options.userId, - ...(options.modelId !== undefined - ? { model_id: serializeModelIdForTrace(options.modelId) } - : {}), + 'instance_ai.trace_version': OTEL_TRACE_VERSION, + ...getTraceRuntimeVersions(), ...(options.n8nVersion !== undefined ? { n8n_version: options.n8nVersion } : {}), ...(options.workflowSdkVersion !== undefined ? { workflow_sdk_version: options.workflowSdkVersion } : {}), + ...(options.modelId !== undefined + ? { model_id: serializeModelIdForTrace(options.modelId) } + : {}), ...options.metadata, }; } +function buildDetachedSubAgentMetadata( + options: CreateDetachedSubAgentTraceContextOptions, + includeSpawnMetadata: boolean, +): Record { + return { + agent_role: options.role, + agent_id: options.agentId, + execution_mode: 'background_subagent', + trace_kind: 'background_subagent', + task_kind: options.kind, + ...(options.taskId ? { task_id: options.taskId } : {}), + ...(options.plannedTaskId ? { planned_task_id: options.plannedTaskId } : {}), + ...(options.workItemId ? { work_item_id: options.workItemId } : {}), + ...(includeSpawnMetadata && options.spawnedByTraceId + ? { spawned_by_trace_id: options.spawnedByTraceId } + : {}), + ...(includeSpawnMetadata && options.spawnedBySpanId + ? { spawned_by_span_id: options.spawnedBySpanId } + : {}), + ...(includeSpawnMetadata && options.spawnedByRunId + ? { spawned_by_run_id: options.spawnedByRunId } + : {}), + ...(includeSpawnMetadata && options.spawnedByAgentId + ? { spawned_by_agent_id: options.spawnedByAgentId } + : {}), + ...(includeSpawnMetadata && options.spawnedByAgentRole + ? { spawned_by_agent_role: options.spawnedByAgentRole } + : {}), + ...(includeSpawnMetadata && options.spawnedByToolCallId + ? { spawned_by_tool_call_id: options.spawnedByToolCallId } + : {}), + subagent_role: options.role, + }; +} + +function buildInternalOperationMetadata(operationName: string): Record { + return { + agent_role: operationName, + execution_mode: 'internal', + trace_kind: 'internal_operation', + operation_name: operationName, + }; +} + +function createTelemetryFactory(options: { + projectName: string; + traceKind: InstanceAiTraceContext['traceKind']; + rootRun: InstanceAiTraceRun; + actorRun: InstanceAiTraceRun; + getActorRun?: () => InstanceAiTraceRun; + baseMetadata: Record; + proxyConfig?: ServiceProxyConfig; + baseTelemetry?: BuiltTelemetry; +}): (telemetryOptions: InstanceAiTelemetryOptions) => BuiltTelemetry | Telemetry { + return (telemetryOptions) => { + const actorRun = options.getActorRun?.() ?? options.actorRun; + const agentRole = telemetryOptions.agentRole; + const executionMode = + telemetryOptions.executionMode ?? + (options.traceKind === 'background_subagent' ? 'background_subagent' : 'foreground'); + const metadata = toTelemetryMetadata(options.baseMetadata, telemetryOptions.metadata, { + agent_role: agentRole, + execution_mode: executionMode, + trace_kind: options.traceKind, + langsmith_trace_id: options.rootRun.traceId, + langsmith_root_run_id: options.rootRun.id, + langsmith_actor_run_id: actorRun.id, + }); + const functionId = telemetryOptions.functionId ?? formatTelemetryFunctionId(agentRole); + + if (options.baseTelemetry) { + return { + ...options.baseTelemetry, + functionId, + metadata, + recordInputs: true, + recordOutputs: true, + runtimeRootSpanEnabled: false, + }; + } + + return createLangSmithTelemetryBuilder(options.projectName, options.proxyConfig) + .functionId(functionId) + .metadata(metadata) + .recordInputs(true) + .recordOutputs(true) + .runtimeRootSpan(false); + }; +} + +function createLangSmithTelemetryBuilder( + projectName: string, + proxyConfig?: ServiceProxyConfig, +): LangSmithTelemetry { + return new LangSmithTelemetry({ + project: projectName, + transformExportedSpan: redactLangSmithTelemetrySpan, + ...(proxyConfig + ? { + apiKey: '-', + endpoint: proxyConfig.apiUrl, + headers: proxyConfig.getAuthHeaders, + } + : {}), + }); +} + +async function createProductOtelRuntime( + projectName: string, + proxyConfig?: ServiceProxyConfig, +): Promise { + const telemetry = await createLangSmithTelemetryBuilder(projectName, proxyConfig) + .functionId('instance-ai.product') + .metadata({}) + .recordInputs(true) + .recordOutputs(true) + .build(); + + return { + telemetry, + spans: new Map(), + contexts: new Map(), + shutdown: false, + }; +} + +function createProductTraceContext(options: { + projectName: string; + traceKind: InstanceAiTraceContext['traceKind']; + rootRun: InstanceAiTraceRun; + actorRun: InstanceAiTraceRun; + otelRuntime: ProductOtelTraceRuntime; + baseMetadata: Record; + proxyConfig?: ServiceProxyConfig; + getActorRun?: () => InstanceAiTraceRun; +}): InstanceAiTraceContext { + return createTraceContext( + options.projectName, + options.traceKind, + options.rootRun, + options.actorRun, + options.otelRuntime, + options.proxyConfig, + createTelemetryFactory({ + projectName: options.projectName, + traceKind: options.traceKind, + rootRun: options.rootRun, + actorRun: options.actorRun, + ...(options.getActorRun ? { getActorRun: options.getActorRun } : {}), + baseMetadata: options.baseMetadata, + baseTelemetry: options.otelRuntime.telemetry, + ...(options.proxyConfig ? { proxyConfig: options.proxyConfig } : {}), + }), + ); +} + export async function createInstanceAiTraceContext( options: CreateInstanceAiTraceContextOptions, ): Promise { @@ -1326,114 +1594,133 @@ export async function createInstanceAiTraceContext( return undefined; } - ensureLangSmithTracingEnv(); - - const client = options.proxyConfig ? getOrCreateProxyClient(options.proxyConfig) : undefined; const projectName = options.projectName ?? DEFAULT_PROJECT_NAME; const baseMetadata = buildBaseMetadata(options); const createTraceRuns = async () => { - const messageRun = await createRun({ + const otelRuntime = await createProductOtelRuntime(projectName, options.proxyConfig); + const traceContextRef: { current?: InstanceAiTraceContext } = {}; + const messageRun = startProductSpan(otelRuntime, { projectName, - name: 'message_turn', + name: 'turn', + canonicalName: 'instance-ai.message_turn', + runType: 'chain', tags: ['message-turn'], - metadata: mergeMetadata(baseMetadata, { agent_role: 'message_turn' }), - inputs: options.input, - client, - }); - const orchestratorRun = await createChildRun(messageRun, { - name: 'orchestrator', - tags: ['orchestrator'], - metadata: mergeMetadata(baseMetadata, { agent_role: 'orchestrator' }), - inputs: options.input, - }); - - return createTraceContext( - projectName, - 'message_turn', - messageRun, - orchestratorRun, - options.proxyConfig?.getAuthHeaders, - ); - }; - - if (options.proxyConfig) { - const headers = await options.proxyConfig.getAuthHeaders(); - return await proxyHeaderStore.run(headers, createTraceRuns); - } - return await createTraceRuns(); -} - -export async function createInternalOperationTraceContext( - options: CreateInternalOperationTraceContextOptions, -): Promise { - if (!isLangSmithTracingEnabled(!!options.proxyConfig)) { - return undefined; - } - - ensureLangSmithTracingEnv(); - - const client = options.proxyConfig ? getOrCreateProxyClient(options.proxyConfig) : undefined; - const projectName = options.projectName ?? DEFAULT_PROJECT_NAME; - const baseMetadata = buildBaseMetadata(options); - - const createInternalRun = async () => { - const operationRun = await createRun({ - projectName, - name: `internal:${options.operationName}`, - tags: ['internal'], metadata: mergeMetadata(baseMetadata, { - agent_role: options.operationName, - operation_name: options.operationName, + agent_role: 'message_turn', + execution_mode: 'foreground', + trace_kind: 'message_turn', }), inputs: options.input, - client, + root: true, }); - - return createTraceContext( + const tracing = createProductTraceContext({ projectName, - 'internal_operation', - operationRun, - operationRun, - options.proxyConfig?.getAuthHeaders, - ); + traceKind: 'message_turn', + rootRun: messageRun, + actorRun: messageRun, + otelRuntime, + baseMetadata, + getActorRun: () => traceContextRef.current?.actorRun ?? messageRun, + ...(options.proxyConfig ? { proxyConfig: options.proxyConfig } : {}), + }); + traceContextRef.current = tracing; + return tracing; }; - if (options.proxyConfig) { - const headers = await options.proxyConfig.getAuthHeaders(); - return await proxyHeaderStore.run(headers, createInternalRun); + try { + return await withProxyHeaders(options.proxyConfig, createTraceRuns); + } catch { + return undefined; } - return await createInternalRun(); } export async function continueInstanceAiTraceContext( existingContext: InstanceAiTraceContext, options: CreateInstanceAiTraceContextOptions, -): Promise { +): Promise; +export async function continueInstanceAiTraceContext( + existingContext: InstanceAiTraceContext | undefined, + options: CreateInstanceAiTraceContextOptions, +): Promise; +export async function continueInstanceAiTraceContext( + existingContext: InstanceAiTraceContext | undefined, + options: CreateInstanceAiTraceContextOptions, +): Promise { + const proxyConfig = options.proxyConfig ?? existingContext?.proxyConfig; + if (!existingContext && !isLangSmithTracingEnabled(!!proxyConfig)) { + return undefined; + } + if (existingContext?.rootRun.traceId === 'stub' && !isLangSmithTracingEnabled(!!proxyConfig)) { + return existingContext; + } + const baseMetadata = buildBaseMetadata(options); + const projectName = existingContext?.projectName ?? options.projectName ?? DEFAULT_PROJECT_NAME; + const continuedMetadata = + existingContext && existingContext.rootRun.traceId !== 'stub' + ? { + continued_from_trace_id: + existingContext.rootRun.otelTraceId ?? existingContext.rootRun.traceId, + continued_from_run_id: existingContext.rootRun.id, + resumed_from_trace_id: + existingContext.rootRun.otelTraceId ?? existingContext.rootRun.traceId, + ...(existingContext.actorRun.otelSpanId + ? { resumed_from_span_id: existingContext.actorRun.otelSpanId } + : {}), + resumed_from_activation_id: existingContext.actorRun.id, + } + : {}; const createContinuation = async () => { - const orchestratorRun = await createChildRun(existingContext.messageRun, { - name: 'orchestrator', - tags: ['orchestrator'], - metadata: mergeMetadata(baseMetadata, { agent_role: 'orchestrator' }), + const otelRuntime = await createProductOtelRuntime(projectName, proxyConfig); + const rootRun = startProductSpan(otelRuntime, { + projectName, + name: `resume: ${formatResumeReasonLabel(options.metadata?.resume_reason)}`, + canonicalName: 'instance-ai.orchestrator_resume', + runType: 'chain', + tags: ['orchestrator-resume'], + metadata: mergeMetadata(baseMetadata, { + agent_role: 'orchestrator_resume', + execution_mode: 'resume', + trace_kind: 'orchestrator_resume', + ...continuedMetadata, + }), inputs: options.input, + root: true, + }); + const orchestratorRun = startProductSpan(otelRuntime, { + projectName, + name: 'agent: orchestrator', + canonicalName: 'instance-ai.agent.orchestrator', + runType: 'chain', + tags: ['orchestrator', 'resume'], + metadata: mergeMetadata(baseMetadata, { + agent_role: 'orchestrator', + execution_mode: 'resume', + trace_kind: 'orchestrator_resume', + ...continuedMetadata, + }), + inputs: options.input, + parentRun: rootRun, }); - return createTraceContext( - existingContext.projectName, - 'message_turn', - existingContext.rootRun, - orchestratorRun, - options.proxyConfig?.getAuthHeaders, - ); + return createProductTraceContext({ + projectName, + traceKind: 'orchestrator_resume', + rootRun, + actorRun: orchestratorRun, + otelRuntime, + baseMetadata, + ...(proxyConfig ? { proxyConfig } : {}), + }); }; - if (options.proxyConfig) { - const headers = await options.proxyConfig.getAuthHeaders(); - return await proxyHeaderStore.run(headers, createContinuation); + try { + return await withProxyHeaders(proxyConfig, createContinuation); + } catch { + return undefined; } - return await createContinuation(); } export async function createDetachedSubAgentTraceContext( @@ -1443,54 +1730,102 @@ export async function createDetachedSubAgentTraceContext( return undefined; } - ensureLangSmithTracingEnv(); - - const client = options.proxyConfig ? getOrCreateProxyClient(options.proxyConfig) : undefined; const projectName = options.projectName ?? DEFAULT_PROJECT_NAME; const baseMetadata = buildBaseMetadata(options); const createDetachedRuns = async () => { - const rootRun = await createRun({ + const otelRuntime = await createProductOtelRuntime(projectName, options.proxyConfig); + const rootMetadata = buildDetachedSubAgentMetadata(options, true); + const actorMetadata = buildDetachedSubAgentMetadata(options, false); + const rootRun = startProductSpan(otelRuntime, { projectName, - name: `subagent:${options.role}`, + name: `background task: ${formatAgentRoleLabel(options.role)}`, + canonicalName: 'instance-ai.background_subagent', + runType: 'chain', tags: normalizeTags( ['sub-agent', 'background'], options.plannedTaskId ? ['planned'] : undefined, ), - metadata: mergeMetadata(baseMetadata, { - agent_role: options.role, - agent_id: options.agentId, - task_kind: options.kind, - ...(options.taskId ? { task_id: options.taskId } : {}), - ...(options.plannedTaskId ? { planned_task_id: options.plannedTaskId } : {}), - ...(options.workItemId ? { work_item_id: options.workItemId } : {}), - ...(options.spawnedByTraceId ? { spawned_by_trace_id: options.spawnedByTraceId } : {}), - ...(options.spawnedBySpanId ? { spawned_by_span_id: options.spawnedBySpanId } : {}), - ...(options.spawnedByRunId ? { spawned_by_run_id: options.spawnedByRunId } : {}), - ...(options.spawnedByAgentId ? { spawned_by_agent_id: options.spawnedByAgentId } : {}), - ...(options.spawnedByAgentRole - ? { spawned_by_agent_role: options.spawnedByAgentRole } - : {}), - ...(options.spawnedByToolCallId - ? { spawned_by_tool_call_id: options.spawnedByToolCallId } - : {}), - }), + metadata: mergeMetadata(baseMetadata, rootMetadata), inputs: options.input, - client, + root: true, + }); + const actorRun = startProductSpan(otelRuntime, { + projectName, + name: `agent: ${formatAgentRoleLabel(options.role)}`, + canonicalName: `instance-ai.agent.${options.role}`, + runType: 'chain', + tags: normalizeTags( + ['sub-agent', 'background'], + options.plannedTaskId ? ['planned'] : undefined, + ), + metadata: mergeMetadata(baseMetadata, actorMetadata), + inputs: options.input, + parentRun: rootRun, }); - return createTraceContext( + return createProductTraceContext({ projectName, - 'detached_subagent', + traceKind: 'background_subagent', rootRun, - rootRun, - options.proxyConfig?.getAuthHeaders, - ); + actorRun, + otelRuntime, + baseMetadata: mergeMetadata(baseMetadata, rootMetadata) ?? baseMetadata, + ...(options.proxyConfig ? { proxyConfig: options.proxyConfig } : {}), + }); }; - if (options.proxyConfig) { - const headers = await options.proxyConfig.getAuthHeaders(); - return await proxyHeaderStore.run(headers, createDetachedRuns); + try { + return await withProxyHeaders(options.proxyConfig, createDetachedRuns); + } catch { + return undefined; + } +} + +export async function createInternalOperationTraceContext( + options: CreateInternalOperationTraceContextOptions, +): Promise { + if (!isInternalOperationTracingEnabled() || !isLangSmithTracingEnabled(!!options.proxyConfig)) { + return undefined; + } + + const projectName = options.projectName ?? DEFAULT_PROJECT_NAME; + const baseMetadata = buildBaseMetadata({ + ...options, + messageId: options.messageId ?? `internal:${options.operationName}:${options.runId}`, + metadata: mergeMetadata(options.metadata, { + operation_name: options.operationName, + }), + }); + + const createInternalRuns = async () => { + const otelRuntime = await createProductOtelRuntime(projectName, options.proxyConfig); + const internalMetadata = buildInternalOperationMetadata(options.operationName); + const rootRun = startProductSpan(otelRuntime, { + projectName, + name: `internal: ${formatInternalOperationLabel(options.operationName)}`, + canonicalName: `instance-ai.internal.${options.operationName}`, + runType: 'chain', + tags: ['internal-operation'], + metadata: mergeMetadata(baseMetadata, internalMetadata), + inputs: options.input, + root: true, + }); + + return createProductTraceContext({ + projectName, + traceKind: 'internal_operation', + rootRun, + actorRun: rootRun, + otelRuntime, + baseMetadata: mergeMetadata(baseMetadata, internalMetadata) ?? baseMetadata, + ...(options.proxyConfig ? { proxyConfig: options.proxyConfig } : {}), + }); + }; + + try { + return await withProxyHeaders(options.proxyConfig, createInternalRuns); + } catch { + return undefined; } - return await createDetachedRuns(); } diff --git a/packages/@n8n/instance-ai/src/tracing/trace-labels.ts b/packages/@n8n/instance-ai/src/tracing/trace-labels.ts new file mode 100644 index 00000000000..fa4b7076f12 --- /dev/null +++ b/packages/@n8n/instance-ai/src/tracing/trace-labels.ts @@ -0,0 +1,34 @@ +export function formatTraceLabel(value: string): string { + return value + .trim() + .replace(/[._\s]+/g, '-') + .replace(/-+/g, '-') + .replace(/^-+|-+$/g, ''); +} + +export function formatAgentRoleLabel(role: string): string { + return formatTraceLabel(role.replace(/^instance-ai[._-]?/, '')); +} + +export function formatResumeReasonLabel(reason: unknown): string { + if (typeof reason !== 'string' || reason.trim().length === 0) { + return 'checkpoint'; + } + + return reason + .trim() + .replace(/[._-]+/g, ' ') + .replace(/\s+/g, ' '); +} + +export function formatInternalOperationLabel(operationName: string): string { + return formatAgentRoleLabel(operationName); +} + +export function formatTelemetryFunctionId(agentRole: string): string { + if (agentRole.startsWith('instance-ai.')) { + return agentRole; + } + + return `instance-ai.${agentRole.replace(/[^a-zA-Z0-9._-]+/g, '-').replace(/^-+|-+$/g, '')}`; +} diff --git a/packages/@n8n/instance-ai/src/tracing/trace-payloads.ts b/packages/@n8n/instance-ai/src/tracing/trace-payloads.ts new file mode 100644 index 00000000000..82e63abb07a --- /dev/null +++ b/packages/@n8n/instance-ai/src/tracing/trace-payloads.ts @@ -0,0 +1,1227 @@ +import type { AttributeValue } from '@n8n/agents'; +import { createHash } from 'node:crypto'; + +import { + DOMAIN_TOOL_IDS, + ORCHESTRATION_TOOL_IDS, + ORCHESTRATION_TOOL_NAMES, + WORKSPACE_TOOL_IDS, +} from '../tools/tool-ids'; +import type { InstanceAiToolRegistry } from '../types'; +import { formatAgentRoleLabel, formatTraceLabel } from './trace-labels'; +import { scrubSecretsInText } from '../utils/scrub-secrets'; +import { isRecord } from '../utils/stream-helpers'; + +const MAX_TRACE_DEPTH = 4; +const MAX_PROMPT_SCHEMA_TRACE_DEPTH = 12; +const MAX_TOOL_IO_TRACE_DEPTH = 8; +const MAX_TRACE_STRING_LENGTH = 2_000; +const MAX_TOOL_ACTION_DISPLAY_LENGTH = 64; +const MAX_TRACE_ARRAY_ITEMS = 20; +const MAX_TRACE_OBJECT_KEYS = 30; +const SENSITIVE_TELEMETRY_KEY_PATTERN = + /(api[_-]?key|authorization|bearer|cookie|credentials?|password|secret|access[_-]?token|refresh[_-]?token|id[_-]?token|session[_-]?token|auth[_-]?token|(?:^|[._-])token$)/i; + +const LANGSMITH_TRACE_NAME = 'langsmith.trace.name'; +const LANGSMITH_SPAN_KIND = 'langsmith.span.kind'; +const LANGSMITH_USAGE_METADATA = 'langsmith.usage_metadata'; +export const GEN_AI_PROMPT = 'gen_ai.prompt'; +export const GEN_AI_COMPLETION = 'gen_ai.completion'; +const GEN_AI_OPERATION_NAME = 'gen_ai.operation.name'; +const GEN_AI_USAGE_INPUT_TOKENS = 'gen_ai.usage.input_tokens'; +const GEN_AI_USAGE_OUTPUT_TOKENS = 'gen_ai.usage.output_tokens'; +const GEN_AI_USAGE_TOTAL_TOKENS = 'gen_ai.usage.total_tokens'; +const GEN_AI_USAGE_INPUT_TOKEN_DETAILS = 'gen_ai.usage.input_token_details'; +const AI_OPERATION_ID = 'ai.operationId'; +const LLM_AI_SDK_OPERATION_IDS = new Set([ + 'ai.generateText.doGenerate', + 'ai.streamText.doStream', + 'ai.generateObject.doGenerate', + 'ai.streamObject.doStream', +]); + +export interface AgentTraceInputOptions { + systemPrompt?: string; + tools?: InstanceAiToolRegistry; + deferredTools?: InstanceAiToolRegistry; + runtimeTools?: InstanceAiToolRegistry; + modelId?: unknown; + memory?: unknown; + toolSearchEnabled?: boolean; + inputProcessors?: string[]; +} + +function truncateString(value: string): string { + if (value.length <= MAX_TRACE_STRING_LENGTH) { + return value; + } + + return `${value.slice(0, MAX_TRACE_STRING_LENGTH)}...`; +} + +export function toTelemetryAttributeValue(value: unknown): AttributeValue | undefined { + if (value === undefined || value === null) { + return undefined; + } + + if (typeof value === 'string') { + return truncateString(value); + } + + if (typeof value === 'number' || typeof value === 'boolean') { + return value; + } + + if (Array.isArray(value)) { + if (value.every((entry): entry is string => typeof entry === 'string')) { + return value.map((entry) => truncateString(entry)); + } + if (value.every((entry): entry is number => typeof entry === 'number')) { + return value; + } + if (value.every((entry): entry is boolean => typeof entry === 'boolean')) { + return value; + } + return value.map((entry) => truncateString(String(sanitizeTraceValue(entry)))); + } + + const sanitized = sanitizeTraceValue(value); + if (typeof sanitized === 'string') { + return sanitized; + } + if (typeof sanitized === 'number' || typeof sanitized === 'boolean') { + return sanitized; + } + return truncateString(JSON.stringify(sanitized)); +} + +export function toTelemetryMetadata( + ...records: Array | undefined> +): Record { + const metadata: Record = {}; + + for (const record of records) { + if (!record) continue; + + for (const [key, value] of Object.entries(record)) { + if (key.startsWith('langsmith.metadata.')) { + continue; + } + const attributeValue = toTelemetryAttributeValue(value); + if (attributeValue !== undefined) { + metadata[key] = attributeValue; + } + } + } + + return metadata; +} + +function redactTelemetryJsonValue( + value: unknown, + keyHint?: string, + depth = 0, + maxDepth = MAX_TRACE_DEPTH, +): unknown { + if (depth > maxDepth) { + return '[redacted-depth-limit]'; + } + + if (keyHint && SENSITIVE_TELEMETRY_KEY_PATTERN.test(keyHint)) { + return '[redacted]'; + } + + if (typeof value === 'string') { + return scrubSecretsInText(value); + } + + if (typeof value === 'number' || typeof value === 'boolean' || value === null) { + return value; + } + + if (Array.isArray(value)) { + return value.map((entry) => redactTelemetryJsonValue(entry, keyHint, depth + 1, maxDepth)); + } + + if (isRecord(value)) { + const redacted: Record = {}; + for (const [key, entryValue] of Object.entries(value)) { + redacted[key] = redactTelemetryJsonValue(entryValue, key, depth + 1, maxDepth); + } + return redacted; + } + + return sanitizeTraceValue(value); +} + +function maxRedactionDepthForAttribute(key: string): number { + if (key === 'ai.prompt.messages' || key === GEN_AI_PROMPT) { + return MAX_PROMPT_SCHEMA_TRACE_DEPTH; + } + + if (key === 'ai.toolCall.args' || key === 'ai.toolCall.result' || key === GEN_AI_COMPLETION) { + return MAX_TOOL_IO_TRACE_DEPTH; + } + + return MAX_TRACE_DEPTH; +} + +function redactTelemetryAttribute(key: string, value: unknown): unknown { + if (SENSITIVE_TELEMETRY_KEY_PATTERN.test(key)) { + return '[redacted]'; + } + + const maxDepth = maxRedactionDepthForAttribute(key); + + if (typeof value !== 'string') { + return redactTelemetryJsonValue(value, key, 0, maxDepth); + } + + const trimmed = value.trim(); + if ( + (trimmed.startsWith('{') && trimmed.endsWith('}')) || + (trimmed.startsWith('[') && trimmed.endsWith(']')) + ) { + try { + const parsed: unknown = JSON.parse(trimmed); + return JSON.stringify(redactTelemetryJsonValue(parsed, key, 0, maxDepth)); + } catch { + return scrubSecretsInText(value); + } + } + + return scrubSecretsInText(value); +} + +function parseTelemetryJson(value: unknown): unknown { + if (typeof value !== 'string') { + return undefined; + } + + try { + const parsed: unknown = JSON.parse(value); + return parsed; + } catch { + return undefined; + } +} + +function parseTelemetryTools(value: unknown): unknown[] | undefined { + if (!Array.isArray(value)) { + const parsed = parseTelemetryJson(value); + return Array.isArray(parsed) ? parseTelemetryTools(parsed) : undefined; + } + + const entries: readonly unknown[] = value; + const tools: unknown[] = []; + for (const entry of entries) { + tools.push(typeof entry === 'string' ? (parseTelemetryJson(entry) ?? entry) : entry); + } + + return tools.length > 0 ? tools : undefined; +} + +function readToolCallPayload(part: Record): unknown { + if ('input' in part) return part.input; + if ('args' in part) return part.args; + if ('arguments' in part) return part.arguments; + return {}; +} + +function readToolResultPayload(part: Record): unknown { + if ('output' in part) return part.output; + if ('result' in part) return part.result; + if ('content' in part) return part.content; + return ''; +} + +function stringifyToolPayload(value: unknown): string { + if (value === undefined || value === null) { + return ''; + } + + if (typeof value === 'string') { + return value; + } + + try { + return JSON.stringify(value) ?? ''; + } catch { + return '[unserializable]'; + } +} + +function normalizeAssistantMessageForLangSmith(message: Record): unknown { + const content = message.content; + if (!Array.isArray(content)) { + return message; + } + + const textParts: string[] = []; + const toolCalls: Array> = []; + + for (const part of content) { + if (!isRecord(part)) continue; + + if (part.type === 'text' && typeof part.text === 'string') { + textParts.push(part.text); + continue; + } + + if (part.type !== 'tool-call') continue; + + const toolCallId = + typeof part.toolCallId === 'string' + ? part.toolCallId + : typeof part.id === 'string' + ? part.id + : undefined; + const toolName = + typeof part.toolName === 'string' + ? part.toolName + : typeof part.name === 'string' + ? part.name + : undefined; + if (!toolCallId || !toolName) continue; + + toolCalls.push({ + id: toolCallId, + type: 'function', + function: { + name: toolName, + arguments: stringifyToolPayload(readToolCallPayload(part)), + }, + }); + } + + if (toolCalls.length === 0) { + return message; + } + + return { + role: 'assistant', + content: textParts.join('\n'), + tool_calls: toolCalls, + }; +} + +function normalizeToolMessageForLangSmith(message: Record): unknown[] { + const content = message.content; + if (!Array.isArray(content)) { + return [message]; + } + + const normalizedMessages: unknown[] = []; + for (const part of content) { + if (!isRecord(part) || part.type !== 'tool-result') { + continue; + } + + const toolCallId = + typeof part.toolCallId === 'string' + ? part.toolCallId + : typeof part.id === 'string' + ? part.id + : undefined; + const toolName = + typeof part.toolName === 'string' + ? part.toolName + : typeof part.name === 'string' + ? part.name + : undefined; + if (!toolCallId) continue; + + normalizedMessages.push({ + role: 'tool', + tool_call_id: toolCallId, + ...(toolName ? { name: toolName } : {}), + content: stringifyToolPayload(readToolResultPayload(part)), + }); + } + + return normalizedMessages.length > 0 ? normalizedMessages : [message]; +} + +function normalizeMessagesForLangSmith(messages: unknown[]): unknown[] { + const normalizedMessages: unknown[] = []; + + for (const message of messages) { + if (!isRecord(message) || typeof message.role !== 'string') { + normalizedMessages.push(message); + continue; + } + + if (message.role === 'assistant') { + normalizedMessages.push(normalizeAssistantMessageForLangSmith(message)); + continue; + } + + if (message.role === 'tool') { + normalizedMessages.push(...normalizeToolMessageForLangSmith(message)); + continue; + } + + normalizedMessages.push(message); + } + + return normalizedMessages; +} + +function stableStringify(value: unknown): string { + if (Array.isArray(value)) { + return `[${value.map((entry) => stableStringify(entry)).join(',')}]`; + } + + if (isRecord(value)) { + const entries = Object.entries(value).sort(([a], [b]) => a.localeCompare(b)); + return `{${entries + .map(([key, entryValue]) => `${JSON.stringify(key)}:${stableStringify(entryValue)}`) + .join(',')}}`; + } + + return JSON.stringify(value); +} + +function stableHash(value: unknown): string { + return createHash('sha256').update(stableStringify(value)).digest('hex'); +} + +function toolNameFromDefinition(tool: unknown): string | undefined { + if (!isRecord(tool)) return undefined; + if (typeof tool.name === 'string') return tool.name; + if (typeof tool.id === 'string') return tool.id; + if (isRecord(tool.function) && typeof tool.function.name === 'string') { + return tool.function.name; + } + return undefined; +} + +function enrichLangSmithToolAttributes(attributes: Record): unknown[] | undefined { + const tools = parseTelemetryTools(attributes['ai.prompt.tools']); + if (!tools) { + return undefined; + } + + const redactedTools = redactTelemetryJsonValue( + tools, + undefined, + 0, + MAX_PROMPT_SCHEMA_TRACE_DEPTH, + ); + const normalizedTools: unknown[] = Array.isArray(redactedTools) ? redactedTools : tools; + const toolNames = normalizedTools + .map(toolNameFromDefinition) + .filter((name): name is string => name !== undefined); + const serializedTools = JSON.stringify(normalizedTools); + const schemaHash = stableHash(normalizedTools); + + attributes['llm.available_tool_count'] = normalizedTools.length; + attributes['llm.available_tool_names'] = toolNames; + attributes['llm.tool_manifest_ref'] = schemaHash; + attributes['llm.tool_schema_hash'] = schemaHash; + attributes.tools = serializedTools; + attributes['invocation_params.tools'] = serializedTools; + + const toolChoice = parseTelemetryJson(attributes['ai.prompt.toolChoice']); + if (toolChoice !== undefined) { + const redactedToolChoice = redactTelemetryJsonValue( + toolChoice, + undefined, + 0, + MAX_PROMPT_SCHEMA_TRACE_DEPTH, + ); + attributes['llm.tool_choice'] = JSON.stringify(redactedToolChoice); + attributes['invocation_params.tool_choice'] = JSON.stringify(redactedToolChoice); + } + + return normalizedTools; +} + +function enrichLangSmithPromptAttribute(attributes: Record): void { + const tools = enrichLangSmithToolAttributes(attributes); + + if (attributes['gen_ai.prompt'] !== undefined) { + return; + } + + const messages = parseTelemetryJson(attributes['ai.prompt.messages']); + if (!Array.isArray(messages)) { + return; + } + + const toolChoice = parseTelemetryJson(attributes['ai.prompt.toolChoice']); + if (!tools && toolChoice === undefined) { + return; + } + + const prompt: Record = { input: normalizeMessagesForLangSmith(messages) }; + if (tools) { + prompt.tools = tools; + } + + if (toolChoice !== undefined) { + prompt.tool_choice = toolChoice; + } + + attributes['gen_ai.prompt'] = JSON.stringify( + redactTelemetryJsonValue(prompt, undefined, 0, MAX_PROMPT_SCHEMA_TRACE_DEPTH), + ); +} + +function numberFromAttribute(value: unknown): number | undefined { + if (typeof value === 'number' && Number.isFinite(value)) { + return value; + } + if (typeof value === 'string' && value.trim()) { + const parsed = Number(value); + return Number.isFinite(parsed) ? parsed : undefined; + } + return undefined; +} + +function readTokenDetail(details: unknown, keys: string[]): number | undefined { + const parsedDetails = typeof details === 'string' ? parseTelemetryJson(details) : details; + if (!isRecord(parsedDetails)) { + return undefined; + } + + for (const key of keys) { + const value = numberFromAttribute(parsedDetails[key]); + if (value !== undefined) { + return value; + } + } + return undefined; +} + +function readNestedRecord( + record: Record, + keys: string[], +): Record { + let current: unknown = record; + for (const key of keys) { + if (!isRecord(current)) { + return {}; + } + current = current[key]; + } + return isRecord(current) ? current : {}; +} + +function readProviderAnthropicUsage(attributes: Record): Record { + const providerMetadata = parseTelemetryJson(attributes['ai.response.providerMetadata']); + return isRecord(providerMetadata) + ? readNestedRecord(providerMetadata, ['anthropic', 'usage']) + : {}; +} + +function firstNumberAttribute( + attributes: Record, + keys: string[], +): number | undefined { + for (const key of keys) { + const value = numberFromAttribute(attributes[key]); + if (value !== undefined) { + return value; + } + } + return undefined; +} + +function calculateInputTokenAccounting( + inputTokens: number | undefined, + cacheReadTokens: number, + cacheCreationTokens: number, +): { regularInputTokens: number; totalInputTokens: number } { + const cachedInputTokens = cacheReadTokens + cacheCreationTokens; + const observedInputTokens = inputTokens ?? 0; + const regularInputTokens = + observedInputTokens >= cachedInputTokens + ? Math.max(0, observedInputTokens - cachedInputTokens) + : observedInputTokens; + + return { + regularInputTokens, + totalInputTokens: regularInputTokens + cachedInputTokens, + }; +} + +function buildLangSmithUsageMetadata( + attributes: Record, +): Record | undefined { + const inputTokens = firstNumberAttribute(attributes, [ + GEN_AI_USAGE_INPUT_TOKENS, + 'ai.usage.inputTokens', + 'ai.usage.promptTokens', + ]); + + const outputTokens = + firstNumberAttribute(attributes, [ + GEN_AI_USAGE_OUTPUT_TOKENS, + 'ai.usage.outputTokens', + 'ai.usage.completionTokens', + ]) ?? 0; + if (inputTokens === undefined && outputTokens === 0) { + return; + } + + const providerAnthropicUsage = readProviderAnthropicUsage(attributes); + const inputDetails = attributes[GEN_AI_USAGE_INPUT_TOKEN_DETAILS]; + const cacheReadTokens = + firstNumberAttribute(attributes, [ + 'ai.usage.inputTokenDetails.cacheReadTokens', + 'ai.usage.cachedInputTokens', + 'ai.usage.cacheReadInputTokens', + ]) ?? + firstNumberAttribute(providerAnthropicUsage, ['cache_read_input_tokens']) ?? + readTokenDetail(inputDetails, [ + 'cache_read', + 'cache_read_tokens', + 'cache_read_input_tokens', + 'cached_tokens', + ]) ?? + 0; + const cacheCreationTokens = + firstNumberAttribute(attributes, [ + 'ai.usage.inputTokenDetails.cacheCreationTokens', + 'ai.usage.cacheCreationInputTokens', + 'ai.usage.inputTokenDetails.cacheWriteTokens', + 'ai.usage.cacheWriteInputTokens', + ]) ?? + firstNumberAttribute(providerAnthropicUsage, [ + 'cache_creation_input_tokens', + 'cache_write_input_tokens', + ]) ?? + readTokenDetail(inputDetails, [ + 'cache_creation', + 'cache_creation_tokens', + 'cache_creation_input_tokens', + 'cache_write', + 'cache_write_tokens', + 'cache_write_input_tokens', + ]) ?? + 0; + + const { totalInputTokens } = calculateInputTokenAccounting( + inputTokens, + cacheReadTokens, + cacheCreationTokens, + ); + const inputTokenDetails: Record = {}; + if (cacheReadTokens > 0) { + inputTokenDetails.cache_read = cacheReadTokens; + } + if (cacheCreationTokens > 0) { + inputTokenDetails.cache_creation = cacheCreationTokens; + inputTokenDetails.ephemeral_5m_input_tokens = cacheCreationTokens; + } + + return { + input_tokens: totalInputTokens, + output_tokens: outputTokens, + total_tokens: totalInputTokens + outputTokens, + ...(Object.keys(inputTokenDetails).length > 0 + ? { input_token_details: inputTokenDetails } + : {}), + }; +} + +function normalizeUsageForLangSmith(attributes: Record): void { + const usageMetadata = buildLangSmithUsageMetadata(attributes); + if (!usageMetadata) { + return; + } + + const inputTokens = firstNumberAttribute(attributes, [ + GEN_AI_USAGE_INPUT_TOKENS, + 'ai.usage.inputTokens', + 'ai.usage.promptTokens', + ]); + const outputTokens = numberFromAttribute(usageMetadata.output_tokens) ?? 0; + const inputTokenDetails = isRecord(usageMetadata.input_token_details) + ? usageMetadata.input_token_details + : {}; + const cacheReadTokens = numberFromAttribute(inputTokenDetails.cache_read) ?? 0; + const cacheCreationTokens = + numberFromAttribute(inputTokenDetails.cache_creation) ?? + numberFromAttribute(inputTokenDetails.ephemeral_5m_input_tokens) ?? + 0; + const totalInputTokens = numberFromAttribute(usageMetadata.input_tokens) ?? 0; + const { regularInputTokens } = calculateInputTokenAccounting( + inputTokens, + cacheReadTokens, + cacheCreationTokens, + ); + + attributes[GEN_AI_USAGE_INPUT_TOKENS] = totalInputTokens; + attributes[GEN_AI_USAGE_OUTPUT_TOKENS] = outputTokens; + attributes[GEN_AI_USAGE_TOTAL_TOKENS] = totalInputTokens + outputTokens; + attributes['ai.usage.inputTokens'] = totalInputTokens; + attributes[LANGSMITH_USAGE_METADATA] = JSON.stringify(usageMetadata); + if (inputTokens !== undefined) { + attributes['langsmith.metadata.original_input_tokens'] = inputTokens; + attributes['langsmith.metadata.anthropic_original_input_tokens'] = inputTokens; + } + attributes['langsmith.metadata.total_input_tokens'] = totalInputTokens; + attributes['langsmith.metadata.regular_input_tokens'] = regularInputTokens; + attributes['langsmith.metadata.cache_read_input_tokens'] = cacheReadTokens; + attributes['langsmith.metadata.cache_creation_input_tokens'] = cacheCreationTokens; + attributes['langsmith.metadata.anthropic_total_input_tokens'] = totalInputTokens; + attributes['langsmith.metadata.anthropic_regular_input_tokens'] = regularInputTokens; + attributes['langsmith.metadata.anthropic_cache_read_input_tokens'] = cacheReadTokens; + attributes['langsmith.metadata.anthropic_cache_creation_input_tokens'] = cacheCreationTokens; + attributes[GEN_AI_USAGE_INPUT_TOKEN_DETAILS] = JSON.stringify({ + cache_read: cacheReadTokens, + cache_creation: cacheCreationTokens, + regular: regularInputTokens, + ...(inputTokens !== undefined ? { original_input_tokens: inputTokens } : {}), + }); +} + +function readStringAttribute( + attributes: Record, + keys: string[], +): string | undefined { + for (const key of keys) { + const value = attributes[key]; + if (typeof value === 'string' && value.length > 0) { + return value; + } + } + return undefined; +} + +function inferNativeLlmRole(attributes: Record): string | undefined { + return readStringAttribute(attributes, [ + 'ai.telemetry.metadata.agent_role', + 'langsmith.metadata.agent_role', + 'agent_role', + ]); +} + +function displayNameForNativeLlmSpan(attributes: Record): string { + const role = inferNativeLlmRole(attributes); + if (role === 'thread_title') { + return 'llm: title'; + } + + if (role) { + return `llm: ${formatAgentRoleLabel(role)}`; + } + + const functionId = readStringAttribute(attributes, [ + 'ai.telemetry.functionId', + 'resource.name', + 'operation.name', + ]); + if (functionId) { + return `llm: ${formatAgentRoleLabel(functionId.replace(/^instance-ai[._-]?/, ''))}`; + } + + return 'llm'; +} + +function parseTelemetryObject(value: unknown): Record | undefined { + const parsed = typeof value === 'string' ? parseTelemetryJson(value) : value; + return isRecord(parsed) ? parsed : undefined; +} + +function sanitizeToolActionForDisplay(action: string): string | undefined { + const sanitized = action + .trim() + .slice(0, MAX_TOOL_ACTION_DISPLAY_LENGTH) + .replace(/[^\w.-]+/g, '_') + .replace(/^_+|_+$/g, ''); + + return sanitized.length > 0 ? sanitized : undefined; +} + +function toolActionForNativeToolSpan(attributes: Record): string | undefined { + const args = parseTelemetryObject(attributes['ai.toolCall.args']); + if (!args || typeof args.action !== 'string') { + return undefined; + } + + return sanitizeToolActionForDisplay(args.action); +} + +function setLangSmithMetadataAttribute( + attributes: Record, + key: string, + value: unknown, +): void { + attributes[key] = value; + if (!key.startsWith('langsmith.metadata.')) { + attributes[`langsmith.metadata.${key}`] = value; + } +} + +function renameNativeLlmSpanForLangSmith( + span: Record, + attributes: Record, +): void { + const operationId = readStringAttribute(attributes, [AI_OPERATION_ID]); + if (!operationId || !LLM_AI_SDK_OPERATION_IDS.has(operationId)) { + return; + } + + const displayName = displayNameForNativeLlmSpan(attributes); + span.name = displayName; + attributes[LANGSMITH_TRACE_NAME] = displayName; + attributes[LANGSMITH_SPAN_KIND] = 'llm'; + attributes[GEN_AI_OPERATION_NAME] = 'chat'; + const displayGroup = inferNativeLlmRole(attributes); + setLangSmithMetadataAttribute(attributes, 'display_name', displayName); + setLangSmithMetadataAttribute(attributes, 'display_kind', 'llm'); + setLangSmithMetadataAttribute( + attributes, + 'display_group', + displayGroup ? formatAgentRoleLabel(displayGroup) : 'llm', + ); + const executionMode = readStringAttribute(attributes, [ + 'ai.telemetry.metadata.execution_mode', + 'langsmith.metadata.execution_mode', + 'execution_mode', + ]); + if (executionMode) { + setLangSmithMetadataAttribute(attributes, 'display_phase', formatTraceLabel(executionMode)); + } + setLangSmithMetadataAttribute(attributes, 'ai_sdk.operation', operationId); + setLangSmithMetadataAttribute(attributes, 'instance_ai.display_name', displayName); + setLangSmithMetadataAttribute(attributes, 'instance_ai.canonical_name', operationId); + setLangSmithMetadataAttribute(attributes, 'instance_ai.run_name', operationId); + delete attributes[AI_OPERATION_ID]; +} + +function renameNativeToolSpanForLangSmith( + span: Record, + attributes: Record, +): void { + const toolName = readStringAttribute(attributes, ['ai.toolCall.name']); + if (!toolName) { + return; + } + + const operationId = readStringAttribute(attributes, [AI_OPERATION_ID]); + if (operationId && operationId !== 'ai.toolCall') { + return; + } + + const action = toolActionForNativeToolSpan(attributes); + const displayName = action ? `${toolName}[${action}]` : toolName; + span.name = displayName; + attributes[LANGSMITH_TRACE_NAME] = displayName; + attributes[LANGSMITH_SPAN_KIND] = 'tool'; + attributes['ai.toolCall.display_name'] = displayName; + if (action) { + attributes['ai.toolCall.action'] = action; + setLangSmithMetadataAttribute(attributes, 'display_action', action); + } + setLangSmithMetadataAttribute(attributes, 'display_name', displayName); + setLangSmithMetadataAttribute(attributes, 'display_kind', 'tool'); + setLangSmithMetadataAttribute(attributes, 'display_group', toolName); + setLangSmithMetadataAttribute(attributes, 'ai_sdk.operation', operationId ?? 'ai.toolCall'); + attributes['langsmith.metadata.ls_run_name'] = displayName; + setLangSmithMetadataAttribute(attributes, 'instance_ai.display_name', displayName); + setLangSmithMetadataAttribute(attributes, 'instance_ai.canonical_name', toolName); + setLangSmithMetadataAttribute(attributes, 'instance_ai.run_name', displayName); +} + +function isLangSmithLlmSpan(attributes: Record): boolean { + const operationId = readStringAttribute(attributes, [AI_OPERATION_ID]); + return ( + attributes[LANGSMITH_SPAN_KIND] === 'llm' || + attributes.display_kind === 'llm' || + (typeof operationId === 'string' && LLM_AI_SDK_OPERATION_IDS.has(operationId)) + ); +} + +function isCountedUsageAttribute(key: string): boolean { + return ( + key === LANGSMITH_USAGE_METADATA || + key === 'usage_metadata' || + key === 'prompt_tokens' || + key === 'completion_tokens' || + key === 'total_tokens' || + key === 'input_tokens' || + key === 'output_tokens' || + key.startsWith('gen_ai.usage.') || + key.startsWith('ai.usage.') || + key.startsWith('llm.usage.') + ); +} + +function neutralUsageAttributeKey(key: string): string { + return `instance_ai.usage.${key}`; +} + +function moveNonLlmUsageAttributes(attributes: Record): void { + if (isLangSmithLlmSpan(attributes)) { + return; + } + + for (const key of Object.keys(attributes)) { + if (!isCountedUsageAttribute(key)) { + continue; + } + + attributes[neutralUsageAttributeKey(key)] = attributes[key]; + delete attributes[key]; + } +} + +export function redactLangSmithTelemetrySpan(span: unknown): unknown { + if (!isRecord(span) || !isRecord(span.attributes)) { + return span; + } + + const attributes: Record = {}; + for (const [key, value] of Object.entries(span.attributes)) { + attributes[key] = redactTelemetryAttribute(key, value); + } + enrichLangSmithPromptAttribute(attributes); + normalizeUsageForLangSmith(attributes); + renameNativeLlmSpanForLangSmith(span, attributes); + renameNativeToolSpanForLangSmith(span, attributes); + moveNonLlmUsageAttributes(attributes); + span.attributes = attributes; + return span; +} + +function splitTraceText(value: string): string[] { + if (value.length <= MAX_TRACE_STRING_LENGTH) { + return [value]; + } + + const chunks: string[] = []; + let remaining = value; + + while (remaining.length > MAX_TRACE_STRING_LENGTH) { + const candidate = remaining.slice(0, MAX_TRACE_STRING_LENGTH); + const splitIndex = candidate.lastIndexOf('\n'); + const chunkEnd = + splitIndex >= MAX_TRACE_STRING_LENGTH / 2 ? splitIndex + 1 : MAX_TRACE_STRING_LENGTH; + chunks.push(remaining.slice(0, chunkEnd)); + remaining = remaining.slice(chunkEnd); + } + + if (remaining.length > 0) { + chunks.push(remaining); + } + + return chunks; +} + +function serializeTraceText(value: string): string | Record { + const chunks = splitTraceText(value); + if (chunks.length === 1) { + return chunks[0]; + } + + return Object.fromEntries( + chunks.map((chunk, index) => [`part_${String(index + 1).padStart(2, '0')}`, chunk]), + ); +} + +function summarizeToolDescription(tool: unknown): string | undefined { + if (!isRecord(tool)) { + return undefined; + } + + return typeof tool.description === 'string' ? tool.description : undefined; +} + +function classifyToolSource(name: string, toolRecord: Record): string { + if (toolRecord.mcpTool === true) { + return typeof toolRecord.mcpServerName === 'string' && + toolRecord.mcpServerName.toLowerCase().includes('local') + ? 'local-mcp' + : 'mcp'; + } + + if ( + name.startsWith('workspace_') || + name === WORKSPACE_TOOL_IDS.WRITE_FILE || + name === WORKSPACE_TOOL_IDS.SUBMIT_WORKFLOW || + name === ORCHESTRATION_TOOL_IDS.APPLY_WORKFLOW_CREDENTIALS + ) { + return 'workspace'; + } + + if (ORCHESTRATION_TOOL_NAMES.has(name)) { + return 'orchestration'; + } + + return 'domain'; +} + +function classifyToolCategory(name: string): string { + if (name.includes('credential')) return 'credential'; + if (name.includes('browser')) return 'browser'; + if (name.includes('data-table')) return 'data-table'; + if ( + name.includes('workflow') || + name === DOMAIN_TOOL_IDS.BUILD_WORKFLOW || + name === WORKSPACE_TOOL_IDS.SUBMIT_WORKFLOW + ) { + return 'workflow'; + } + if (name === DOMAIN_TOOL_IDS.NODES || name === 'materialize-node-type') return 'node'; + if (name === DOMAIN_TOOL_IDS.EXECUTIONS) return 'execution'; + if (name.includes('research')) return 'research'; + if ( + name === ORCHESTRATION_TOOL_IDS.PLAN || + name.includes('plan') || + name === ORCHESTRATION_TOOL_IDS.CREATE_TASKS + ) { + return 'planning'; + } + if (name.startsWith('workspace_')) return 'workspace'; + if (name.includes('file') || name.includes('filesystem')) return 'filesystem'; + return 'other'; +} + +function classifyToolSideEffect(name: string): string { + if (name.includes('browser')) return 'browser'; + if (name.includes('research')) return 'network'; + if (name === DOMAIN_TOOL_IDS.EXECUTIONS || name.includes('execute') || name.includes('run')) { + return 'execute'; + } + if ( + name.includes('write') || + name.includes('submit') || + name.includes('apply') || + name.includes('build') || + name.includes('create') || + name.includes('update') || + name.includes('delete') || + name.includes('remove') || + name.includes('complete') + ) { + return 'write'; + } + if (name.includes(DOMAIN_TOOL_IDS.ASK_USER) || name.includes('pause-for-user')) return 'none'; + return 'read'; +} + +function getToolInputSchema(tool: unknown): unknown { + if (!isRecord(tool)) { + return undefined; + } + + const inputSchema = tool.inputSchema; + if (inputSchema === undefined) { + return undefined; + } + + const toJSONSchema = isRecord(inputSchema) ? inputSchema.toJSONSchema : undefined; + if (typeof toJSONSchema === 'function' && toJSONSchema.length === 0) { + try { + const schema: unknown = Reflect.apply(toJSONSchema, inputSchema, []); + return schema; + } catch { + return { type: 'zod', conversion: 'failed' }; + } + } + + if (isRecord(inputSchema) && typeof inputSchema.safeParse === 'function') { + return { type: 'zod' }; + } + + return inputSchema; +} + +function summarizeToolForManifest(name: string, tool: unknown): Record { + const schema = getToolInputSchema(tool); + const redactedSchema = + schema === undefined + ? undefined + : redactTelemetryJsonValue(schema, undefined, 0, MAX_PROMPT_SCHEMA_TRACE_DEPTH); + const toolRecord = isRecord(tool) ? tool : {}; + const description = summarizeToolDescription(tool); + const providerOptions = isRecord(toolRecord.providerOptions) + ? redactTelemetryJsonValue(toolRecord.providerOptions) + : undefined; + + return { + name, + ...(description ? { description } : {}), + kind: toolRecord.mcpTool === true ? 'mcp' : 'local', + source: classifyToolSource(name, toolRecord), + category: classifyToolCategory(name), + side_effect: classifyToolSideEffect(name), + ...(typeof toolRecord.mcpServerName === 'string' + ? { mcp_server_name: toolRecord.mcpServerName } + : {}), + approval: { + default_approval: toolRecord.withDefaultApproval === true, + suspend: toolRecord.suspendSchema !== undefined, + resume: toolRecord.resumeSchema !== undefined, + }, + ...(redactedSchema !== undefined ? { input_schema: redactedSchema } : {}), + ...(providerOptions !== undefined ? { provider_options: providerOptions } : {}), + }; +} + +function summarizeToolSet( + fieldPrefix: 'loaded' | 'deferred' | 'runtime', + tools: InstanceAiToolRegistry | undefined, +): Record { + if (!tools || tools.size === 0) { + return {}; + } + + const toolEntries = Array.from(tools); + const summaries = toolEntries.map(([name, tool]) => summarizeToolForManifest(name, tool)); + const manifestHash = stableHash(summaries); + const toolNames = toolEntries.map(([name]) => name); + if (fieldPrefix === 'loaded') { + return { + assigned_tool_count: summaries.length, + assigned_tool_names: toolNames, + assigned_tool_schema_hash: manifestHash, + }; + } + return { + [`${fieldPrefix}_tool_count`]: summaries.length, + [`${fieldPrefix}_tool_names`]: toolNames, + [`${fieldPrefix}_tool_schema_hash`]: manifestHash, + }; +} + +function summarizeMemoryBinding(memory: unknown): Record { + if (!isRecord(memory)) { + return {}; + } + + return { + memory_enabled: true, + ...(typeof memory.resource === 'string' ? { memory_resource_id: memory.resource } : {}), + ...(typeof memory.thread === 'string' ? { memory_thread_id: memory.thread } : {}), + }; +} + +export function sanitizeTraceValue(value: unknown, depth = 0): unknown { + if (value === null || value === undefined) { + return value; + } + + if (typeof value === 'string') { + return truncateString(value); + } + + if (typeof value === 'number' || typeof value === 'boolean') { + return value; + } + + if (typeof value === 'bigint') { + return value.toString(); + } + + if (typeof value === 'function') { + return `[function ${value.name || 'anonymous'}]`; + } + + if (value instanceof Date) { + return value.toISOString(); + } + + if (value instanceof Error) { + return { + name: value.name, + message: truncateString(value.message), + }; + } + + if (value instanceof Uint8Array) { + return `[binary ${value.byteLength} bytes]`; + } + + if (Array.isArray(value)) { + if (depth >= MAX_TRACE_DEPTH) { + return `[array(${value.length})]`; + } + + return value + .slice(0, MAX_TRACE_ARRAY_ITEMS) + .map((entry) => sanitizeTraceValue(entry, depth + 1)); + } + + if (isRecord(value)) { + if (depth >= MAX_TRACE_DEPTH) { + return `[object ${Object.keys(value).length} keys]`; + } + + const entries = Object.entries(value).slice(0, MAX_TRACE_OBJECT_KEYS); + const sanitized: Record = {}; + for (const [key, entryValue] of entries) { + sanitized[key] = sanitizeTraceValue(entryValue, depth + 1); + } + if (Object.keys(value).length > entries.length) { + sanitized.__truncatedKeys = Object.keys(value).length - entries.length; + } + return sanitized; + } + + if (typeof value === 'symbol') { + return value.toString(); + } + + return truncateString(Object.prototype.toString.call(value)); +} + +export function sanitizeTracePayload(value: unknown): Record { + if (isRecord(value)) { + const sanitized: Record = {}; + for (const [key, entryValue] of Object.entries(value)) { + sanitized[key] = sanitizeTraceValue(entryValue); + } + return sanitized; + } + + if (value === undefined) { + return {}; + } + + return { value: sanitizeTraceValue(value) }; +} + +export function serializeModelIdForTrace(modelId: unknown): unknown { + if (typeof modelId === 'string' && modelId.length > 0) { + return truncateString(modelId); + } + + if (isRecord(modelId) && typeof modelId.id === 'string') { + return truncateString(modelId.id); + } + + return sanitizeTraceValue(modelId); +} + +export function mergeTraceInputs( + baseInputs: unknown, + inputs: Record | undefined, +): Record { + const existingInputs = + isRecord(baseInputs) && !Array.isArray(baseInputs) ? { ...baseInputs } : {}; + + return { + ...existingInputs, + ...(inputs ?? {}), + }; +} + +export function buildAgentTraceInputs(options: AgentTraceInputOptions): Record { + return sanitizeTracePayload({ + ...(options.systemPrompt ? { system_prompt: serializeTraceText(options.systemPrompt) } : {}), + ...(options.modelId !== undefined ? { model: serializeModelIdForTrace(options.modelId) } : {}), + ...(options.toolSearchEnabled !== undefined + ? { tool_search_enabled: options.toolSearchEnabled } + : {}), + ...(options.inputProcessors?.length ? { input_processors: options.inputProcessors } : {}), + ...summarizeMemoryBinding(options.memory), + ...summarizeToolSet('loaded', options.tools), + ...summarizeToolSet('deferred', options.deferredTools), + ...summarizeToolSet('runtime', options.runtimeTools), + }); +} diff --git a/packages/@n8n/instance-ai/src/utils/scrub-secrets.ts b/packages/@n8n/instance-ai/src/utils/scrub-secrets.ts index 11759f6df44..ac9bafac144 100644 --- a/packages/@n8n/instance-ai/src/utils/scrub-secrets.ts +++ b/packages/@n8n/instance-ai/src/utils/scrub-secrets.ts @@ -22,7 +22,7 @@ const SECRET_VALUE_PATTERNS: readonly RegExp[] = [ // AWS access key id /\bAKIA[0-9A-Z]{16}\b/g, // Generic `password=...` / `api_key=...` / `secret=...` style assignments - /\b(?:password|passwd|secret|api[_-]?key|access[_-]?token|auth[_-]?token)\s*[:=]\s*\S+/gi, + /\b(?:password|passwd|secret|api[_-]?key|authorization|access[_-]?token|refresh[_-]?token|id[_-]?token|session[_-]?token|auth[_-]?token)\s*[:=]\s*\S+/gi, ]; export function scrubSecretsInText(input: string): string { diff --git a/packages/cli/src/modules/instance-ai/__tests__/instance-ai.service.test.ts b/packages/cli/src/modules/instance-ai/__tests__/instance-ai.service.test.ts index be8f591af54..e6bc48e41ba 100644 --- a/packages/cli/src/modules/instance-ai/__tests__/instance-ai.service.test.ts +++ b/packages/cli/src/modules/instance-ai/__tests__/instance-ai.service.test.ts @@ -116,6 +116,7 @@ import type { InstanceAiAgentNode, InstanceAiEvent } from '@n8n/api-types'; import { resumeAgentRun, type ManagedBackgroundTask, + type InstanceAiTraceContext, type SpawnBackgroundTaskOptions, type SpawnBackgroundTaskResult, type SpawnManagedBackgroundTaskOptions, @@ -575,6 +576,7 @@ type TerminalGuardOrderServiceInternals = { signal: AbortSignal; abortController: AbortController; snapshotStorage: unknown; + tracing?: InstanceAiTraceContext; }, ) => Promise; }; @@ -1555,6 +1557,49 @@ describe('InstanceAiService — terminal response guard wiring', () => { thread_id: 'thread-a', }); }); + + it('rebinds resumed agents to resume trace telemetry', async () => { + const service = createTerminalGuardOrderService(); + const abortController = new AbortController(); + const telemetry = { enabled: true }; + const agent = { telemetry: jest.fn() }; + const tracing = { + traceKind: 'orchestrator_resume', + actorRun: { id: 'actor-run' }, + getTelemetry: jest.fn(() => telemetry), + withActiveSpan: jest.fn(async (_run: unknown, fn: () => Promise) => await fn()), + } as unknown as InstanceAiTraceContext; + jest.mocked(resumeAgentRun).mockResolvedValueOnce({ + status: 'completed', + agentRunId: 'agent-run-1', + text: Promise.resolve('done'), + workSummary: { toolCalls: [], totalToolCalls: 0, totalToolErrors: 0 }, + }); + + await service.processResumedStream( + agent, + {}, + { + runId: 'run-1', + agentRunId: 'agent-run-1', + threadId: 'thread-a', + user: fakeUser, + toolCallId: 'tool-call-1', + signal: abortController.signal, + abortController, + snapshotStorage: {}, + tracing, + }, + ); + + expect(tracing.getTelemetry).toHaveBeenCalledWith({ + agentRole: 'orchestrator', + functionId: 'instance-ai.orchestrator', + executionMode: 'resume', + }); + expect(agent.telemetry).toHaveBeenCalledWith(telemetry); + expect(tracing.withActiveSpan).toHaveBeenCalledWith(tracing.actorRun, expect.any(Function)); + }); }); describe('InstanceAiService — AI temporary workflow cleanup', () => { diff --git a/packages/cli/src/modules/instance-ai/instance-ai.service.ts b/packages/cli/src/modules/instance-ai/instance-ai.service.ts index 8ef373a4afc..af508b92310 100644 --- a/packages/cli/src/modules/instance-ai/instance-ai.service.ts +++ b/packages/cli/src/modules/instance-ai/instance-ai.service.ts @@ -122,6 +122,16 @@ function getErrorMessage(error: unknown): string { return error instanceof Error ? error.message : String(error); } +function isTelemetryConfigurableAgent( + agent: unknown, +): agent is { telemetry: (telemetry: unknown) => void } { + return ( + typeof agent === 'object' && + agent !== null && + typeof Reflect.get(agent, 'telemetry') === 'function' + ); +} + const INSTANCE_AI_CHECKPOINT_PRUNE_RETRY_MS = 30 * 1000; function isTextMessagePart(part: unknown): part is { type: 'text'; text: string } { @@ -3809,6 +3819,25 @@ export class InstanceAiService { let messageTraceFinalization: MessageTraceFinalization | undefined; try { + if (opts.tracing?.getTelemetry && isTelemetryConfigurableAgent(agent)) { + try { + agent.telemetry( + opts.tracing.getTelemetry({ + agentRole: 'orchestrator', + functionId: 'instance-ai.orchestrator', + executionMode: + opts.tracing.traceKind === 'orchestrator_resume' ? 'resume' : 'foreground', + }), + ); + } catch (error) { + this.logger.warn('Failed to configure Instance AI resume tracing', { + error: getErrorMessage(error), + threadId: opts.threadId, + runId: opts.runId, + }); + } + } + const result = opts.tracing ? await opts.tracing.withActiveSpan(opts.tracing.actorRun, async () => { return await resumeAgentRun(