fix(instance-ai): rely on native tool tracing

This commit is contained in:
Oleg Ivaniv 2026-05-06 09:34:29 +02:00
parent 23fa642e26
commit ef933cfc99
No known key found for this signature in database
3 changed files with 78 additions and 225 deletions

View File

@ -11,9 +11,9 @@ model.
We will stop mixing LangSmith `RunTree` spans with OTel spans for normal
execution. Product concepts such as message turns, orchestrator work,
sub-agent work, HITL, background jobs, and selected local tool executions must
be represented as OTel spans. Native AI SDK spans for model calls, provider
requests, messages, tool calls, and token usage stay in the same OTel tree.
sub-agent work, HITL, and background jobs must be represented as OTel spans.
Native AI SDK spans for model calls, provider requests, messages, tool calls,
and token usage stay in the same OTel tree.
`RunTree` should not be used as a live trace hierarchy once this migration is
complete. If LangSmith feedback or legacy replay needs a compatibility path,
@ -86,6 +86,9 @@ Implemented so far:
product actor span already represents the agent loop; native provider and
`ai.toolCall` spans remain enabled and are parented directly under the
product actor span.
- Normal tool execution no longer emits duplicate `instance-ai.tool.*` product
spans. The native `ai.toolCall` span is the canonical tool execution span;
Instance AI only adds product spans for HITL suspend/resume lifecycle events.
- Live LangSmith validation has proved feedback against an OTel-only product
root and full provider-span visibility with a real model turn.
- Detached sub-agent linking captures spawning trace/span metadata and model
@ -341,15 +344,10 @@ Product chain spans:
- `instance-ai.subagent.<role>.generate`
- `instance-ai.background.<kind>`
Product tool or side-effect spans:
Product side-effect spans:
- `instance-ai.hitl.suspend`
- `instance-ai.hitl.resume`
- `instance-ai.tool.workspace_edit`
- `instance-ai.tool.workflow_validation`
- `instance-ai.tool.workflow_submit`
- `instance-ai.tool.daytona`
- `instance-ai.tool.background_task`
Native AI SDK spans:
@ -432,9 +430,9 @@ Default local tool execution should use `ai.toolCall` spans with:
- `ai.toolCall.result`, when output recording is enabled
- `ai.telemetry.metadata.*`
Add additional product side-effect spans only when a normal `ai.toolCall` span
is not enough. Workspace edits, Daytona operations, workflow submission,
workflow validation, and HITL are valid examples.
Do not emit duplicate `instance-ai.tool.*` product spans for normal tool
execution. Add product side-effect spans only for lifecycle events that a normal
`ai.toolCall` span does not represent, currently HITL suspend/resume.
## Service Proxy Support
@ -510,7 +508,7 @@ must not require LangSmith to be available.
- product OTel trace context creation
- thread metadata construction
- product span helpers for message turns, context compaction, prompt building,
HITL, background tasks, workflow build loops, and selected side-effect tools
HITL, background tasks, and workflow build loops
- feedback snapshot persistence
- service proxy request metadata and headers
- detached sub-agent linking metadata
@ -558,7 +556,9 @@ must not require LangSmith to be available.
OTel spans.
- [x] Convert inline `subagent:*` spans to OTel spans under active context.
- [x] Convert HITL suspend/resume spans to OTel spans.
- [x] Convert selected side-effect-heavy tools to OTel product spans.
- [x] ~~Convert selected side-effect-heavy tools to OTel product spans.~~
Replaced by native `ai.toolCall` spans only; duplicate `instance-ai.tool.*`
spans are intentionally not emitted.
5. Preserve detached/background sub-agent linking

View File

@ -950,14 +950,14 @@ describe('createInstanceAiTraceContext', () => {
const spans = agentsMock.getSpans();
const spanNames = spans.map((span) => span.name);
expect(spanNames).toContain('instance-ai.tool.ask-user');
expect(spanNames).toContain('instance-ai.hitl.suspend');
expect(
spans.find((span) => span.name === 'instance-ai.tool.ask-user')?.attributes.tool_call_id,
spans.find((span) => span.name === 'instance-ai.hitl.suspend')?.attributes.tool_call_id,
).toBe('toolu-ask');
expect(spanNames.some((name) => name.startsWith('instance-ai.tool.'))).toBe(false);
});
it('does not wrap ordinary local tools for product-level LangSmith spans', async () => {
it('does not wrap local tools for duplicate product-level LangSmith spans', async () => {
const tracing = await createInstanceAiTraceContext({
threadId: 'thread-1',
messageId: 'message-1',
@ -985,7 +985,7 @@ describe('createInstanceAiTraceContext', () => {
});
expect(wrappedTools.templates).toBe(regularTool);
expect(wrappedTools.workspace_execute_command).not.toBe(workspaceTool);
expect(wrappedTools.workspace_execute_command).toBe(workspaceTool);
});
it('keeps ad-hoc child spans rooted under the active sub-agent run', async () => {
@ -1086,9 +1086,9 @@ describe('createInstanceAiTraceContext', () => {
});
const spanNames = agentsMock.getSpans().map((span) => span.name);
expect(spanNames).toContain('instance-ai.tool.ask-user');
expect(spanNames).toContain('instance-ai.hitl.resume');
expect(spanNames).not.toContain('instance-ai.hitl.suspend');
expect(spanNames.some((name) => name.startsWith('instance-ai.tool.'))).toBe(false);
});
it('creates ad-hoc child spans under the current run tree', async () => {
@ -1223,22 +1223,39 @@ describe('createInstanceAiTraceContext', () => {
};
type NativeTracer = {
startSpan(name: string, options?: { attributes?: Record<string, unknown> }): NativeSpan;
startActiveSpan<T>(
name: string,
options: { attributes?: Record<string, unknown> },
fn: (span: NativeSpan) => Promise<T>,
): Promise<T>;
};
const tracer = telemetryOrBuilder.tracer as NativeTracer;
const providerSpan = (telemetryOrBuilder.tracer as NativeTracer).startSpan(
'ai.streamText.doStream',
{
attributes: {
'ai.operationId': 'ai.streamText.doStream',
'langsmith.span.kind': 'llm',
},
const providerSpan = tracer.startSpan('ai.streamText.doStream', {
attributes: {
'ai.operationId': 'ai.streamText.doStream',
'langsmith.span.kind': 'llm',
},
);
});
providerSpan.end();
await workspaceWriteFile.handler(
{ path: 'workflow.json', content: '{}' },
{ toolCallId: 'toolu-write-file' },
await tracer.startActiveSpan(
'ai.toolCall',
{
attributes: {
'ai.operationId': 'ai.toolCall',
'langsmith.span.kind': 'tool',
'ai.toolCall.name': 'workspace_write_file',
'ai.toolCall.id': 'toolu-write-file',
},
},
async (span) => {
await workspaceWriteFile.handler(
{ path: 'workflow.json', content: '{}' },
{ toolCallId: 'toolu-write-file' },
);
span.end();
},
);
});
@ -1249,7 +1266,7 @@ describe('createInstanceAiTraceContext', () => {
const rootSpan = spans.find((span) => span.name === 'instance-ai.message_turn');
const orchestratorSpan = spans.find((span) => span.name === 'instance-ai.orchestrator.stream');
const providerSpan = spans.find((span) => span.name === 'ai.streamText.doStream');
const localToolSpan = spans.find((span) => span.name === 'instance-ai.tool.workspace_edit');
const localToolSpan = spans.find((span) => span.name === 'ai.toolCall');
expect(rootSpan).toBeDefined();
expect(orchestratorSpan).toBeDefined();
@ -1263,7 +1280,9 @@ describe('createInstanceAiTraceContext', () => {
expect(orchestratorSpan?.parentSpanId).toBe(rootSpan?.id);
expect(providerSpan?.parentSpanId).toBe(orchestratorSpan?.id);
expect(localToolSpan?.parentSpanId).toBe(orchestratorSpan?.id);
expect(localToolSpan?.attributes.tool_call_id).toBe('toolu-write-file');
expect(localToolSpan?.attributes['ai.toolCall.id']).toBe('toolu-write-file');
expect(localToolSpan?.attributes['ai.toolCall.name']).toBe('workspace_write_file');
expect(spans.some((span) => span.name.startsWith('instance-ai.tool.'))).toBe(false);
expect(langsmithMock.getCreatedRunTrees()).toHaveLength(0);
});

View File

@ -41,19 +41,6 @@ const MAX_TRACE_ARRAY_ITEMS = 20;
const MAX_TRACE_OBJECT_KEYS = 30;
const SENSITIVE_TELEMETRY_KEY_PATTERN =
/(api[_-]?key|authorization|bearer|cookie|credentials?|password|secret|access[_-]?token|refresh[_-]?token|id[_-]?token|session[_-]?token|auth[_-]?token|(?:^|[._-])token$)/i;
const LOCAL_TOOL_TRACE_NAMES = new Set([
'ask-user',
'pause-for-user',
'workspace',
'write-file',
'build-workflow',
'submit-workflow',
'apply-workflow-credentials',
'verify-built-workflow',
'report-verification-verdict',
'task-control',
'complete-checkpoint',
]);
const traceParentOverrideStorage = new AsyncLocalStorage<{ current: RunTree | null }>();
const productTraceStorage = new AsyncLocalStorage<{
runtime: ProductOtelTraceRuntime;
@ -456,11 +443,6 @@ interface AgentTraceInputOptions {
type NativeToolContext = ToolContext | InterruptibleToolContext;
type TraceableNativeTool = BuiltTool & { handler: NonNullable<BuiltTool['handler']> };
interface NormalizedModelMetadata {
provider?: string;
modelName?: string;
}
function isLangSmithTracingEnabled(proxyAvailable?: boolean): boolean {
const tracingFlag =
process.env.LANGCHAIN_TRACING_V2 ?? process.env.LANGSMITH_TRACING ?? undefined;
@ -895,21 +877,6 @@ function sanitizeTracePayload(value: unknown): Record<string, unknown> {
return { value: sanitizeTraceValue(value) };
}
function normalizeModelMetadata(modelId: unknown): NormalizedModelMetadata {
if (typeof modelId === 'string' && modelId.length > 0) {
const [provider, ...modelParts] = modelId.split('/');
return modelParts.length > 0
? { provider, modelName: modelParts.join('/') }
: { modelName: modelId };
}
if (isRecord(modelId) && typeof modelId.id === 'string') {
return normalizeModelMetadata(modelId.id);
}
return {};
}
export function serializeModelIdForTrace(modelId: unknown): unknown {
if (typeof modelId === 'string' && modelId.length > 0) {
return truncateString(modelId);
@ -1270,25 +1237,6 @@ function getToolCallId(context: NativeToolContext): string | undefined {
: undefined;
}
function getProductToolSpanName(toolName: string): string {
if (toolName.startsWith('workspace_') || toolName === 'workspace' || toolName === 'write-file') {
return 'instance-ai.tool.workspace_edit';
}
if (toolName === 'submit-workflow') {
return 'instance-ai.tool.workflow_submit';
}
if (toolName === 'verify-built-workflow' || toolName === 'report-verification-verdict') {
return 'instance-ai.tool.workflow_validation';
}
if (toolName === 'build-workflow' || toolName === 'build-workflow-with-agent') {
return 'instance-ai.tool.workflow_build';
}
if (toolName === 'complete-checkpoint' || toolName === 'task-control') {
return 'instance-ai.tool.background_task';
}
return `instance-ai.tool.${toolName.replace(/[^a-zA-Z0-9._-]+/g, '-')}`;
}
async function startAndFinishProductChildSpan(
currentTrace: { runtime: ProductOtelTraceRuntime; currentRun: InstanceAiTraceRun },
options: {
@ -1321,44 +1269,15 @@ async function startAndFinishProductChildSpan(
});
}
async function traceProductToolExecute(
async function traceProductSuspendableToolExecute(
tool: TraceableNativeTool,
options: InstanceAiToolTraceOptions | undefined,
input: unknown,
context: NativeToolContext,
currentTrace: { runtime: ProductOtelTraceRuntime; currentRun: InstanceAiTraceRun },
): Promise<unknown> {
const resumeData = isInterruptibleToolContext(context) ? context.resumeData : undefined;
const isResume = resumeData !== undefined && resumeData !== null;
const activeParentContext = getActiveOtelContextWithSpan();
const toolCallId = getToolCallId(context);
const toolRun = startProductSpan(currentTrace.runtime, {
projectName: currentTrace.currentRun.projectName,
name: getProductToolSpanName(tool.name),
runType: 'tool',
tags: normalizeTags(['tool'], options?.tags),
metadata: mergeMetadata(options?.metadata, {
tool_name: tool.name,
...(toolCallId ? { tool_call_id: toolCallId } : {}),
...(options?.agentRole ? { agent_role: options.agentRole } : {}),
phase: isResume ? 'resume' : 'initial',
...(isResume
? mergeMetadata(buildSuspendMetadata(tool.name, resumeData), {
approved: isRecord(resumeData) ? resumeData.approved : undefined,
})
: {}),
}),
inputs: { input },
parentRun: currentTrace.currentRun,
...(activeParentContext ? { parentContext: activeParentContext } : {}),
});
let toolRunFinished = false;
const finishToolRun = async (finishOptions?: InstanceAiTraceRunFinishOptions) => {
if (toolRunFinished) return;
toolRunFinished = true;
await finishProductSpan(currentTrace.runtime, toolRun, finishOptions);
};
const originalSuspend = isInterruptibleToolContext(context) ? context.suspend : undefined;
const wrappedContext: NativeToolContext =
@ -1366,64 +1285,38 @@ async function traceProductToolExecute(
? {
...context,
suspend: async (suspendPayload: unknown) => {
await startAndFinishProductChildSpan(
{ runtime: currentTrace.runtime, currentRun: toolRun },
{
name: 'instance-ai.hitl.suspend',
runType: 'chain',
tags: ['hitl'],
metadata: buildSuspendMetadata(tool.name, suspendPayload),
inputs: suspendPayload,
outputs: suspendPayload,
},
);
await finishToolRun({
outputs: {
status: 'suspended',
suspendPayload,
},
await startAndFinishProductChildSpan(currentTrace, {
name: 'instance-ai.hitl.suspend',
runType: 'chain',
tags: ['hitl'],
metadata: mergeMetadata(buildSuspendMetadata(tool.name, suspendPayload), {
final_status: 'suspended',
...(toolCallId ? { tool_call_id: toolCallId } : {}),
}),
inputs: suspendPayload,
outputs: suspendPayload,
});
return await originalSuspend(suspendPayload);
},
}
: context;
try {
const result = await withProductSpanContext(currentTrace.runtime, toolRun, async () => {
if (isResume) {
await startAndFinishProductChildSpan(
{ runtime: currentTrace.runtime, currentRun: toolRun },
{
name: 'instance-ai.hitl.resume',
runType: 'chain',
tags: ['hitl', 'resume'],
metadata: mergeMetadata(buildSuspendMetadata(tool.name, resumeData), {
approved: isRecord(resumeData) ? resumeData.approved : undefined,
}),
inputs: resumeData,
outputs: {
status: 'resumed',
},
},
);
}
return await tool.handler(input, wrappedContext);
if (isResume) {
await startAndFinishProductChildSpan(currentTrace, {
name: 'instance-ai.hitl.resume',
runType: 'chain',
tags: ['hitl', 'resume'],
metadata: mergeMetadata(buildSuspendMetadata(tool.name, resumeData), {
approved: isRecord(resumeData) ? resumeData.approved : undefined,
...(toolCallId ? { tool_call_id: toolCallId } : {}),
}),
inputs: resumeData,
outputs: {
status: 'resumed',
},
});
await finishToolRun({
outputs: result,
metadata: { final_status: 'completed' },
});
return result;
} catch (error) {
await finishToolRun({
error: normalizeErrorMessage(error),
metadata: { final_status: 'error' },
});
throw error;
}
return await tool.handler(input, wrappedContext);
}
async function traceSuspendableToolExecute(
@ -1434,7 +1327,7 @@ async function traceSuspendableToolExecute(
): Promise<unknown> {
const currentProductTrace = getCurrentProductTrace();
if (currentProductTrace) {
return await traceProductToolExecute(tool, options, input, context, currentProductTrace);
return await traceProductSuspendableToolExecute(tool, input, context, currentProductTrace);
}
const parentRun = getTraceParentRun();
@ -1514,53 +1407,6 @@ async function traceSuspendableToolExecute(
}
}
async function traceToolExecute(
tool: TraceableNativeTool,
options: InstanceAiToolTraceOptions | undefined,
input: unknown,
context: NativeToolContext,
): Promise<unknown> {
const currentProductTrace = getCurrentProductTrace();
if (currentProductTrace) {
return await traceProductToolExecute(tool, options, input, context, currentProductTrace);
}
const parentRun = getTraceParentRun();
if (!parentRun) {
return await tool.handler(input, context);
}
const toolRun = await postChildRun(parentRun, {
name: `tool:${tool.name}`,
runType: 'tool',
tags: normalizeTags(['tool'], options?.tags),
metadata: mergeMetadata(options?.metadata, {
tool_name: tool.name,
...(options?.agentRole ? { agent_role: options.agentRole } : {}),
...normalizeModelMetadata(options?.metadata?.model_id),
}),
inputs: { input },
});
try {
const result = await withLangSmithRunTree(
toolRun,
async () => await tool.handler(input, context),
);
await finishRunTree(toolRun, {
outputs: result,
metadata: { final_status: 'completed' },
});
return result;
} catch (error) {
await finishRunTree(toolRun, {
error: normalizeErrorMessage(error),
metadata: { final_status: 'error' },
});
throw error;
}
}
function createTraceContext(
projectName: string,
traceKind: InstanceAiTraceContext['traceKind'],
@ -1737,27 +1583,15 @@ function wrapToolHandler(
tool: TraceableNativeTool,
options: InstanceAiToolTraceOptions | undefined,
): TraceableNativeTool {
if (tool.suspendSchema !== undefined || tool.resumeSchema !== undefined) {
return {
...tool,
handler: async (input, context) =>
await traceSuspendableToolExecute(tool, options, input, context),
};
}
return {
...tool,
handler: async (input, context) => await traceToolExecute(tool, options, input, context),
handler: async (input, context) =>
await traceSuspendableToolExecute(tool, options, input, context),
};
}
function shouldTraceLocalToolExecution(tool: TraceableNativeTool): boolean {
return (
tool.suspendSchema !== undefined ||
tool.resumeSchema !== undefined ||
LOCAL_TOOL_TRACE_NAMES.has(tool.name) ||
tool.name.startsWith('workspace_')
);
return tool.suspendSchema !== undefined || tool.resumeSchema !== undefined;
}
function wrapTools(