mirror of
https://github.com/n8n-io/n8n.git
synced 2026-05-30 00:07:02 +02:00
feat(core): Add eval-data populator tool (no-changelog) (#30680)
This commit is contained in:
parent
73eae7f1d4
commit
e13d4e0465
|
|
@ -56,6 +56,10 @@ jest.mock('../orchestration/eval-setup-agent.tool', () => ({
|
|||
createEvalSetupAgentTool: jest.fn(() => ({ id: 'eval-setup-with-agent' })),
|
||||
}));
|
||||
|
||||
jest.mock('../orchestration/eval-data-agent.tool', () => ({
|
||||
createEvalDataAgentTool: jest.fn(() => ({ id: 'eval-data' })),
|
||||
}));
|
||||
|
||||
jest.mock('../orchestration/plan-with-agent.tool', () => ({
|
||||
createPlanWithAgentTool: jest.fn(() => ({ id: 'plan' })),
|
||||
}));
|
||||
|
|
|
|||
|
|
@ -275,6 +275,110 @@ describe('runBatch', () => {
|
|||
expect(rows).toEqual([]);
|
||||
expect(generate).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
describe('realExamples few-shot block', () => {
|
||||
function captureBatchPrompt(generate: GenerateMock): string {
|
||||
return getPromptText(generate);
|
||||
}
|
||||
|
||||
it('injects a reference-not-seed block when examples are provided', async () => {
|
||||
const generate = createGenerateMock();
|
||||
mockCreateEvalAgent.mockReturnValue({ generate } as unknown as ReturnType<
|
||||
typeof createEvalAgent
|
||||
>);
|
||||
mockExtractText.mockReturnValue(JSON.stringify([]));
|
||||
await runBatch({
|
||||
facet: BATCH_FACET,
|
||||
rowCount: 1,
|
||||
context: BATCH_CONTEXT,
|
||||
columns: ['user_query'],
|
||||
realExamples: [{ user_query: 'how do I refund an order?' }],
|
||||
});
|
||||
const promptText = captureBatchPrompt(generate);
|
||||
expect(promptText).toContain('Recent real inputs the agent has received in production');
|
||||
expect(promptText).toContain('REFERENCE, not seeds');
|
||||
expect(promptText).toContain('how do I refund an order?');
|
||||
expect(promptText).toMatch(/Do NOT copy or paraphrase them/);
|
||||
});
|
||||
|
||||
it('omits the block entirely when realExamples is undefined', async () => {
|
||||
const generate = createGenerateMock();
|
||||
mockCreateEvalAgent.mockReturnValue({ generate } as unknown as ReturnType<
|
||||
typeof createEvalAgent
|
||||
>);
|
||||
mockExtractText.mockReturnValue(JSON.stringify([]));
|
||||
await runBatch({
|
||||
facet: BATCH_FACET,
|
||||
rowCount: 1,
|
||||
context: BATCH_CONTEXT,
|
||||
columns: ['user_query'],
|
||||
});
|
||||
expect(captureBatchPrompt(generate)).not.toContain('Recent real inputs');
|
||||
});
|
||||
|
||||
it('filters examples to the requested columns and drops rows that lack all of them', async () => {
|
||||
const generate = createGenerateMock();
|
||||
mockCreateEvalAgent.mockReturnValue({ generate } as unknown as ReturnType<
|
||||
typeof createEvalAgent
|
||||
>);
|
||||
mockExtractText.mockReturnValue(JSON.stringify([]));
|
||||
await runBatch({
|
||||
facet: BATCH_FACET,
|
||||
rowCount: 1,
|
||||
context: BATCH_CONTEXT,
|
||||
columns: ['user_query'],
|
||||
realExamples: [
|
||||
{ user_query: 'real one', expected_response: 'should not leak' },
|
||||
{ unrelated: 'dropped' },
|
||||
],
|
||||
});
|
||||
const promptText = captureBatchPrompt(generate);
|
||||
expect(promptText).toContain('real one');
|
||||
expect(promptText).not.toContain('should not leak');
|
||||
expect(promptText).not.toContain('dropped');
|
||||
expect(promptText).not.toContain('unrelated');
|
||||
});
|
||||
|
||||
it('caps the example list at 10 entries', async () => {
|
||||
const generate = createGenerateMock();
|
||||
mockCreateEvalAgent.mockReturnValue({ generate } as unknown as ReturnType<
|
||||
typeof createEvalAgent
|
||||
>);
|
||||
mockExtractText.mockReturnValue(JSON.stringify([]));
|
||||
const examples = Array.from({ length: 13 }, (_, i) => ({ user_query: `q${i}` }));
|
||||
await runBatch({
|
||||
facet: BATCH_FACET,
|
||||
rowCount: 1,
|
||||
context: BATCH_CONTEXT,
|
||||
columns: ['user_query'],
|
||||
realExamples: examples,
|
||||
});
|
||||
const promptText = captureBatchPrompt(generate);
|
||||
expect(promptText).toContain('q0');
|
||||
expect(promptText).toContain('q9');
|
||||
expect(promptText).not.toContain('q10');
|
||||
expect(promptText).not.toContain('q12');
|
||||
});
|
||||
|
||||
it('truncates values longer than 300 characters with an ellipsis', async () => {
|
||||
const generate = createGenerateMock();
|
||||
mockCreateEvalAgent.mockReturnValue({ generate } as unknown as ReturnType<
|
||||
typeof createEvalAgent
|
||||
>);
|
||||
mockExtractText.mockReturnValue(JSON.stringify([]));
|
||||
const longValue = 'x'.repeat(500);
|
||||
await runBatch({
|
||||
facet: BATCH_FACET,
|
||||
rowCount: 1,
|
||||
context: BATCH_CONTEXT,
|
||||
columns: ['user_query'],
|
||||
realExamples: [{ user_query: longValue }],
|
||||
});
|
||||
const promptText = captureBatchPrompt(generate);
|
||||
expect(promptText).toMatch(/x{300}…/);
|
||||
expect(promptText).not.toMatch(/x{301}/);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('extractAgentContext', () => {
|
||||
|
|
|
|||
|
|
@ -9,6 +9,8 @@ import { createEvalAgent, extractText, HAIKU_MODEL } from '../../utils/eval-agen
|
|||
const FACET_COUNT = 5;
|
||||
const DEFAULT_ROW_COUNT = 25;
|
||||
const SYSTEM_PROMPT_MAX_CHARS = 2000;
|
||||
const REAL_EXAMPLES_MAX_COUNT = 10;
|
||||
const REAL_EXAMPLE_VALUE_MAX_CHARS = 300;
|
||||
|
||||
export interface SampleRowFacet {
|
||||
length: string;
|
||||
|
|
@ -145,6 +147,52 @@ function buildAgentContextBlock(context: AgentContext | undefined): string {
|
|||
const FORMAT_INFERENCE =
|
||||
"Inspect the agent's system prompt, prompt template, and connected tools to infer what kind of text this agent receives at runtime. It may be a user chat message, output from another tool, scraped web content, structured records (JSON/key-value), document chunks, log lines, code, etc. Generate inputs that look like what would arrive at the agent in production. Do not assume a human user when the agent suggests otherwise.";
|
||||
|
||||
function truncateExampleValue(value: string): string {
|
||||
return value.length > REAL_EXAMPLE_VALUE_MAX_CHARS
|
||||
? `${value.slice(0, REAL_EXAMPLE_VALUE_MAX_CHARS)}…`
|
||||
: value;
|
||||
}
|
||||
|
||||
/**
|
||||
* Render a small block of recent real inputs (filtered to the requested
|
||||
* `columns`) as a reference for the LLM. Returns an empty string when no
|
||||
* usable examples exist — the caller injects this block only when
|
||||
* non-empty, so the generator keeps producing rows from agent context
|
||||
* alone when history is missing.
|
||||
*
|
||||
* The directive is explicit that these are flavour reference, not seed
|
||||
* data to copy: the generator must produce NEW inputs in the same domain
|
||||
* and tone, not paraphrase the examples.
|
||||
*/
|
||||
function buildRealExamplesBlock(
|
||||
examples: ReadonlyArray<Record<string, unknown>> | undefined,
|
||||
columns: string[],
|
||||
): string {
|
||||
if (!examples || examples.length === 0 || columns.length === 0) return '';
|
||||
const filtered: Array<Record<string, string>> = [];
|
||||
for (const example of examples.slice(0, REAL_EXAMPLES_MAX_COUNT)) {
|
||||
const row: Record<string, string> = {};
|
||||
let hasValue = false;
|
||||
for (const col of columns) {
|
||||
const raw = example[col];
|
||||
if (raw === undefined || raw === null) continue;
|
||||
const str = typeof raw === 'string' ? raw : JSON.stringify(raw);
|
||||
if (str.length === 0) continue;
|
||||
row[col] = truncateExampleValue(str);
|
||||
hasValue = true;
|
||||
}
|
||||
if (hasValue) filtered.push(row);
|
||||
}
|
||||
if (filtered.length === 0) return '';
|
||||
const numbered = filtered.map((row, i) => `${i + 1}. ${JSON.stringify(row)}`).join('\n');
|
||||
return [
|
||||
'',
|
||||
'Recent real inputs the agent has received in production (REFERENCE, not seeds):',
|
||||
numbered,
|
||||
'Use these as a hint about the actual domain, tone and shape of inputs the agent sees. Do NOT copy or paraphrase them — produce NEW inputs that fit the same setting.',
|
||||
].join('\n');
|
||||
}
|
||||
|
||||
const BATCH_SYSTEM_INSTRUCTIONS = `You generate realistic test inputs for an n8n workflow evaluation dataset.
|
||||
|
||||
Output: JSON array of objects. Keys = exactly the provided column names. Values = short strings. No prose outside the JSON.
|
||||
|
|
@ -158,6 +206,7 @@ export interface RunBatchInput {
|
|||
rowCount: number;
|
||||
context: AgentContext | undefined;
|
||||
columns: string[];
|
||||
realExamples?: ReadonlyArray<Record<string, unknown>>;
|
||||
logger?: Pick<Logger, 'warn'>;
|
||||
}
|
||||
|
||||
|
|
@ -194,17 +243,23 @@ export async function runBatch(input: RunBatchInput): Promise<Array<Record<strin
|
|||
model: HAIKU_MODEL,
|
||||
instructions: BATCH_SYSTEM_INSTRUCTIONS,
|
||||
});
|
||||
const userText = [
|
||||
buildAgentContextBlock(input.context),
|
||||
'',
|
||||
FORMAT_INFERENCE,
|
||||
'',
|
||||
`Variation focus for this batch: length = ${input.facet.length}; mode = ${input.facet.edgeMode}.`,
|
||||
input.facet.instructions,
|
||||
'',
|
||||
`Columns: ${generatedColumns.join(', ')}`,
|
||||
`Generate exactly ${requestedRowCount} rows.`,
|
||||
].join('\n');
|
||||
const realExamplesBlock = buildRealExamplesBlock(input.realExamples, generatedColumns);
|
||||
const sections = [buildAgentContextBlock(input.context)];
|
||||
if (realExamplesBlock) sections.push(realExamplesBlock);
|
||||
sections.push(FORMAT_INFERENCE);
|
||||
sections.push(
|
||||
[
|
||||
`Variation focus for this batch: length = ${input.facet.length}; mode = ${input.facet.edgeMode}.`,
|
||||
input.facet.instructions,
|
||||
].join('\n'),
|
||||
);
|
||||
sections.push(
|
||||
[
|
||||
`Columns: ${generatedColumns.join(', ')}`,
|
||||
`Generate exactly ${requestedRowCount} rows.`,
|
||||
].join('\n'),
|
||||
);
|
||||
const userText = sections.join('\n\n');
|
||||
const result = await agent.generate(userText);
|
||||
const text = extractText(result);
|
||||
const parsed: unknown = JSON.parse(stripMarkdownFences(text));
|
||||
|
|
@ -235,6 +290,15 @@ export interface GenerateSampleRowsInput {
|
|||
columns: string[];
|
||||
rowCount?: number;
|
||||
targetAgentNodeName?: string;
|
||||
/**
|
||||
* Recent real input rows extracted from the workflow's execution history.
|
||||
* When present (typically below the history threshold that would otherwise
|
||||
* have been used directly), they are passed to the LLM as a flavour
|
||||
* reference — rows are filtered to the requested `columns`, truncated, and
|
||||
* accompanied by an explicit "reference, not seed" directive so the
|
||||
* generator produces new in-domain inputs instead of paraphrasing them.
|
||||
*/
|
||||
realExamples?: ReadonlyArray<Record<string, unknown>>;
|
||||
logger?: Pick<Logger, 'warn'>;
|
||||
}
|
||||
|
||||
|
|
@ -270,6 +334,7 @@ export async function generateSampleRows(
|
|||
rowCount: counts[i],
|
||||
context,
|
||||
columns: input.columns,
|
||||
realExamples: input.realExamples,
|
||||
logger: input.logger,
|
||||
});
|
||||
}),
|
||||
|
|
|
|||
|
|
@ -13,6 +13,7 @@ import { createBrowserCredentialSetupTool } from './orchestration/browser-creden
|
|||
import { createBuildWorkflowAgentTool } from './orchestration/build-workflow-agent.tool';
|
||||
import { createCompleteCheckpointTool } from './orchestration/complete-checkpoint.tool';
|
||||
import { createDelegateTool } from './orchestration/delegate.tool';
|
||||
import { createEvalDataAgentTool } from './orchestration/eval-data-agent.tool';
|
||||
import { createEvalSetupAgentTool } from './orchestration/eval-setup-agent.tool';
|
||||
import { createPlanWithAgentTool } from './orchestration/plan-with-agent.tool';
|
||||
import { createPlanTool } from './orchestration/plan.tool';
|
||||
|
|
@ -89,6 +90,7 @@ export function createOrchestrationTools(context: OrchestrationContext): Instanc
|
|||
[ORCHESTRATION_TOOL_IDS.BUILD_WORKFLOW_WITH_AGENT, createBuildWorkflowAgentTool(context)],
|
||||
[ORCHESTRATION_TOOL_IDS.COMPLETE_CHECKPOINT, createCompleteCheckpointTool(context)],
|
||||
[ORCHESTRATION_TOOL_IDS.EVAL_SETUP_WITH_AGENT, createEvalSetupAgentTool(context)],
|
||||
[ORCHESTRATION_TOOL_IDS.EVAL_DATA, createEvalDataAgentTool(context)],
|
||||
];
|
||||
|
||||
if (context.browserMcpConfig || hasGatewayBrowserTools(context)) {
|
||||
|
|
|
|||
|
|
@ -0,0 +1,519 @@
|
|||
import type { WorkflowJSON } from '@n8n/workflow-sdk';
|
||||
|
||||
import * as sampleRowsService from '../../evals/generate-sample-rows.service';
|
||||
import { createEvalDataAgentTool } from '../eval-data-agent.tool';
|
||||
|
||||
type EvalDataToolResult = {
|
||||
status: 'imported' | 'generated' | 'skipped';
|
||||
source?: 'history' | 'synthetic';
|
||||
rowCount?: number;
|
||||
expectedOutputsNeedUserReview?: boolean;
|
||||
expectedOutputColumns?: string[];
|
||||
table?: {
|
||||
id: string;
|
||||
name: string;
|
||||
projectId?: string;
|
||||
rowCount: number;
|
||||
inputColumns: string[];
|
||||
previewRows: Array<Record<string, unknown>>;
|
||||
};
|
||||
};
|
||||
|
||||
async function runEvalDataTool(
|
||||
ctx: ReturnType<typeof buildOrchestrationCtx>,
|
||||
input: { workflowId: string; projectId?: string },
|
||||
): Promise<EvalDataToolResult> {
|
||||
const tool = createEvalDataAgentTool(ctx as never);
|
||||
return (await tool.handler!(input, {} as never)) as EvalDataToolResult;
|
||||
}
|
||||
|
||||
const evalWf = (): WorkflowJSON =>
|
||||
({
|
||||
name: 't',
|
||||
nodes: [
|
||||
{
|
||||
name: 'EvalTrig',
|
||||
type: 'n8n-nodes-base.evaluationTrigger',
|
||||
typeVersion: 1,
|
||||
parameters: { dataTableId: { value: 'dt-1' } },
|
||||
position: [0, 0],
|
||||
id: 't',
|
||||
},
|
||||
{
|
||||
name: 'Agent',
|
||||
type: '@n8n/n8n-nodes-langchain.agent',
|
||||
typeVersion: 1,
|
||||
parameters: { text: '={{ $json.user_query }}' },
|
||||
position: [200, 0],
|
||||
id: 'a',
|
||||
},
|
||||
],
|
||||
connections: {
|
||||
EvalTrig: { main: [[{ node: 'Agent', type: 'main', index: 0 }]] },
|
||||
},
|
||||
pinData: {},
|
||||
settings: {},
|
||||
}) as unknown as WorkflowJSON;
|
||||
|
||||
const evalWfWithMetrics = (): WorkflowJSON =>
|
||||
({
|
||||
name: 't',
|
||||
nodes: [
|
||||
{
|
||||
name: 'EvalTrig',
|
||||
type: 'n8n-nodes-base.evaluationTrigger',
|
||||
typeVersion: 1,
|
||||
parameters: { dataTableId: { value: 'dt-1' } },
|
||||
position: [0, 0],
|
||||
id: 't',
|
||||
},
|
||||
{
|
||||
name: 'Agent',
|
||||
type: '@n8n/n8n-nodes-langchain.agent',
|
||||
typeVersion: 1,
|
||||
parameters: { text: '={{ $json.user_query }}' },
|
||||
position: [200, 0],
|
||||
id: 'a',
|
||||
},
|
||||
{
|
||||
name: 'MetricN',
|
||||
type: 'n8n-nodes-base.evaluation',
|
||||
typeVersion: 1,
|
||||
parameters: {
|
||||
operation: 'setMetrics',
|
||||
expectedAnswer: "={{ $('EvalTrig').item.json.expected_response }}",
|
||||
actualAnswer: '={{ $json.output }}',
|
||||
},
|
||||
position: [400, 0],
|
||||
id: 'm',
|
||||
},
|
||||
],
|
||||
connections: {
|
||||
EvalTrig: { main: [[{ node: 'Agent', type: 'main', index: 0 }]] },
|
||||
Agent: { main: [[{ node: 'MetricN', type: 'main', index: 0 }]] },
|
||||
},
|
||||
pinData: {},
|
||||
settings: {},
|
||||
}) as unknown as WorkflowJSON;
|
||||
|
||||
const defaultInsertResult = {
|
||||
insertedCount: 0,
|
||||
dataTableId: 'dt-1',
|
||||
tableName: 'eval_dataset',
|
||||
projectId: 'proj-1',
|
||||
};
|
||||
|
||||
const silentLogger = () => ({ info: jest.fn(), warn: jest.fn(), error: jest.fn() });
|
||||
|
||||
/** Default DataTable service stub. Override individual mocks per test as needed. */
|
||||
function defaultDataTableService(
|
||||
overrides: Partial<{
|
||||
insertRows: jest.Mock;
|
||||
getSchema: jest.Mock;
|
||||
addColumn: jest.Mock;
|
||||
queryRows: jest.Mock;
|
||||
}> = {},
|
||||
) {
|
||||
return {
|
||||
insertRows: jest.fn().mockResolvedValue(defaultInsertResult),
|
||||
getSchema: jest.fn().mockResolvedValue([]),
|
||||
addColumn: jest.fn().mockResolvedValue(undefined),
|
||||
queryRows: jest.fn().mockResolvedValue({ count: 0, data: [] }),
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
/** Execution service stub with no successful executions to read from. */
|
||||
function emptyExecutionService() {
|
||||
return {
|
||||
list: jest.fn().mockResolvedValueOnce([]).mockResolvedValueOnce([]),
|
||||
getNodeOutput: jest.fn(),
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Execution service stub returning `count` successful executions whose
|
||||
* EvalTrig output exposes `user_query: "real-eN"` per execution.
|
||||
*/
|
||||
function trigInputHistoryExecutionService(count: number) {
|
||||
const summaries = Array.from({ length: count }, (_, i) => ({
|
||||
id: `e${i}`,
|
||||
status: 'success',
|
||||
}));
|
||||
return {
|
||||
list: jest.fn().mockResolvedValueOnce(summaries).mockResolvedValueOnce([]),
|
||||
getNodeOutput: jest.fn(
|
||||
async (id: string) =>
|
||||
await Promise.resolve({
|
||||
nodeName: 'EvalTrig',
|
||||
items: [{ json: { user_query: `real-${id}` } }],
|
||||
totalItems: 1,
|
||||
returned: { from: 0, to: 0 },
|
||||
}),
|
||||
),
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Execution service stub returning `count` successful executions with both
|
||||
* an EvalTrig input (`user_query: "q-eN"`) and an Agent output (`output: "a-eN"`).
|
||||
*/
|
||||
function trigInputAgentOutputExecutionService(count: number) {
|
||||
const summaries = Array.from({ length: count }, (_, i) => ({
|
||||
id: `e${i}`,
|
||||
status: 'success',
|
||||
}));
|
||||
return {
|
||||
list: jest.fn().mockResolvedValueOnce(summaries).mockResolvedValueOnce([]),
|
||||
getNodeOutput: jest.fn(async (id: string, nodeName: string) =>
|
||||
nodeName === 'EvalTrig'
|
||||
? await Promise.resolve({
|
||||
nodeName,
|
||||
items: [{ json: { user_query: `q-${id}` } }],
|
||||
totalItems: 1,
|
||||
returned: { from: 0, to: 0 },
|
||||
})
|
||||
: await Promise.resolve({
|
||||
nodeName,
|
||||
items: [{ json: { output: `a-${id}` } }],
|
||||
totalItems: 1,
|
||||
returned: { from: 0, to: 0 },
|
||||
}),
|
||||
),
|
||||
};
|
||||
}
|
||||
|
||||
interface BuildCtxOptions {
|
||||
workflow?: WorkflowJSON;
|
||||
dataTableService?: ReturnType<typeof defaultDataTableService>;
|
||||
executionService?: ReturnType<typeof emptyExecutionService>;
|
||||
}
|
||||
|
||||
const buildOrchestrationCtx = (opts: BuildCtxOptions = {}) => ({
|
||||
domainContext: {
|
||||
workflowService: {
|
||||
getAsWorkflowJSON: jest.fn().mockResolvedValue(opts.workflow ?? evalWf()),
|
||||
},
|
||||
dataTableService: opts.dataTableService ?? defaultDataTableService(),
|
||||
executionService: opts.executionService ?? emptyExecutionService(),
|
||||
logger: silentLogger(),
|
||||
},
|
||||
});
|
||||
|
||||
describe('eval-data tool', () => {
|
||||
beforeEach(() => {
|
||||
jest.restoreAllMocks();
|
||||
});
|
||||
|
||||
it('imports rows from execution history when >= 10 valid rows are available', async () => {
|
||||
const dataTableService = defaultDataTableService({
|
||||
getSchema: jest.fn().mockResolvedValue([{ name: 'user_query' }]),
|
||||
});
|
||||
const ctx = buildOrchestrationCtx({
|
||||
dataTableService,
|
||||
executionService: trigInputHistoryExecutionService(12),
|
||||
});
|
||||
|
||||
const result = await runEvalDataTool(ctx, { workflowId: 'w1' });
|
||||
|
||||
expect(result.status).toBe('imported');
|
||||
expect(result.source).toBe('history');
|
||||
expect(result.rowCount).toBe(12);
|
||||
expect(dataTableService.insertRows).toHaveBeenCalledWith('dt-1', expect.any(Array), undefined);
|
||||
});
|
||||
|
||||
it('falls back to synthetic generation when fewer than 10 valid history rows are available', async () => {
|
||||
const dataTableService = defaultDataTableService({
|
||||
getSchema: jest.fn().mockResolvedValue([{ name: 'user_query' }]),
|
||||
});
|
||||
const ctx = buildOrchestrationCtx({ dataTableService });
|
||||
jest
|
||||
.spyOn(sampleRowsService, 'generateSampleRows')
|
||||
.mockResolvedValue(Array.from({ length: 10 }, (_, i) => ({ user_query: `gen-${i}` })));
|
||||
|
||||
const result = await runEvalDataTool(ctx, { workflowId: 'w1' });
|
||||
|
||||
expect(result.status).toBe('generated');
|
||||
expect(result.source).toBe('synthetic');
|
||||
expect(result.rowCount).toBe(10);
|
||||
expect(dataTableService.insertRows).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('returns skipped when no eval target exists', async () => {
|
||||
const wf: WorkflowJSON = {
|
||||
name: 't',
|
||||
nodes: [],
|
||||
connections: {},
|
||||
pinData: {},
|
||||
settings: {},
|
||||
} as never;
|
||||
const ctx = buildOrchestrationCtx({ workflow: wf });
|
||||
|
||||
const result = await runEvalDataTool(ctx, { workflowId: 'w1' });
|
||||
|
||||
expect(result.status).toBe('skipped');
|
||||
});
|
||||
|
||||
it('populates with the fallback "input" column when agent has no $json refs', async () => {
|
||||
const wf = {
|
||||
name: 't',
|
||||
nodes: [
|
||||
{
|
||||
name: 'EvalTrig',
|
||||
type: 'n8n-nodes-base.evaluationTrigger',
|
||||
typeVersion: 1,
|
||||
parameters: { dataTableId: { value: 'dt-1' } },
|
||||
position: [0, 0],
|
||||
id: 't',
|
||||
},
|
||||
// Agent with no $json refs in its parameters
|
||||
{
|
||||
name: 'Agent',
|
||||
type: '@n8n/n8n-nodes-langchain.agent',
|
||||
typeVersion: 1,
|
||||
parameters: { text: 'literal prompt' },
|
||||
position: [200, 0],
|
||||
id: 'a',
|
||||
},
|
||||
],
|
||||
connections: {
|
||||
EvalTrig: { main: [[{ node: 'Agent', type: 'main', index: 0 }]] },
|
||||
},
|
||||
pinData: {},
|
||||
settings: {},
|
||||
} as unknown as WorkflowJSON;
|
||||
const dataTableService = defaultDataTableService({
|
||||
getSchema: jest.fn().mockResolvedValue([{ name: 'input' }]),
|
||||
});
|
||||
const ctx = buildOrchestrationCtx({ workflow: wf, dataTableService });
|
||||
jest.spyOn(sampleRowsService, 'generateSampleRows').mockResolvedValue([{ input: 'sample' }]);
|
||||
|
||||
const result = await runEvalDataTool(ctx, { workflowId: 'w1' });
|
||||
|
||||
expect(result.status).toBe('generated');
|
||||
expect(dataTableService.insertRows).toHaveBeenCalledWith(
|
||||
'dt-1',
|
||||
[{ input: 'sample' }],
|
||||
undefined,
|
||||
);
|
||||
});
|
||||
|
||||
it('populates expected_* columns from agent output in the history path', async () => {
|
||||
const dataTableService = defaultDataTableService({
|
||||
getSchema: jest
|
||||
.fn()
|
||||
.mockResolvedValue([{ name: 'user_query' }, { name: 'expected_response' }]),
|
||||
});
|
||||
const ctx = buildOrchestrationCtx({
|
||||
workflow: evalWfWithMetrics(),
|
||||
dataTableService,
|
||||
executionService: trigInputAgentOutputExecutionService(12),
|
||||
});
|
||||
|
||||
const result = await runEvalDataTool(ctx, { workflowId: 'w1' });
|
||||
|
||||
expect(result.status).toBe('imported');
|
||||
expect(result.rowCount).toBe(12);
|
||||
expect(dataTableService.insertRows).toHaveBeenCalledWith('dt-1', expect.any(Array), undefined);
|
||||
expect(dataTableService.insertRows).toHaveBeenCalledWith(
|
||||
'dt-1',
|
||||
expect.arrayContaining([{ user_query: 'q-e0', expected_response: 'a-e0' }]),
|
||||
undefined,
|
||||
);
|
||||
});
|
||||
|
||||
it('synthetic path generates ONLY input columns and flags expected outputs for user review', async () => {
|
||||
const dataTableService = defaultDataTableService({
|
||||
getSchema: jest
|
||||
.fn()
|
||||
.mockResolvedValue([{ name: 'user_query' }, { name: 'expected_response' }]),
|
||||
});
|
||||
const ctx = buildOrchestrationCtx({ workflow: evalWfWithMetrics(), dataTableService });
|
||||
const generateSpy = jest
|
||||
.spyOn(sampleRowsService, 'generateSampleRows')
|
||||
.mockResolvedValue([{ user_query: 'q' }]);
|
||||
|
||||
const result = await runEvalDataTool(ctx, { workflowId: 'w1' });
|
||||
|
||||
expect(generateSpy).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
columns: ['user_query'],
|
||||
rowCount: 10,
|
||||
}),
|
||||
);
|
||||
expect(result).toMatchObject({
|
||||
status: 'generated',
|
||||
source: 'synthetic',
|
||||
expectedOutputsNeedUserReview: true,
|
||||
expectedOutputColumns: ['expected_response'],
|
||||
});
|
||||
});
|
||||
|
||||
it('does not flag user review on the history path (real outputs are ground truth)', async () => {
|
||||
const dataTableService = defaultDataTableService({
|
||||
getSchema: jest
|
||||
.fn()
|
||||
.mockResolvedValue([{ name: 'user_query' }, { name: 'expected_response' }]),
|
||||
});
|
||||
const ctx = buildOrchestrationCtx({
|
||||
workflow: evalWfWithMetrics(),
|
||||
dataTableService,
|
||||
executionService: trigInputAgentOutputExecutionService(12),
|
||||
});
|
||||
|
||||
const result = await runEvalDataTool(ctx, { workflowId: 'w1' });
|
||||
|
||||
expect(result.source).toBe('history');
|
||||
expect(result.expectedOutputsNeedUserReview).toBeUndefined();
|
||||
});
|
||||
|
||||
it('does not flag user review when there are no expected-output columns', async () => {
|
||||
const dataTableService = defaultDataTableService({
|
||||
getSchema: jest.fn().mockResolvedValue([{ name: 'user_query' }]),
|
||||
});
|
||||
const ctx = buildOrchestrationCtx({ dataTableService });
|
||||
jest.spyOn(sampleRowsService, 'generateSampleRows').mockResolvedValue([{ user_query: 'q' }]);
|
||||
|
||||
const result = await runEvalDataTool(ctx, { workflowId: 'w1' });
|
||||
|
||||
expect(result.source).toBe('synthetic');
|
||||
expect(result.expectedOutputsNeedUserReview).toBeUndefined();
|
||||
});
|
||||
|
||||
it('adds missing columns to the DataTable before inserting rows', async () => {
|
||||
// Schema has only the input column; expected_response is missing.
|
||||
const dataTableService = defaultDataTableService({
|
||||
getSchema: jest.fn().mockResolvedValue([{ name: 'user_query' }]),
|
||||
});
|
||||
const ctx = buildOrchestrationCtx({
|
||||
workflow: evalWfWithMetrics(),
|
||||
dataTableService,
|
||||
});
|
||||
jest
|
||||
.spyOn(sampleRowsService, 'generateSampleRows')
|
||||
.mockResolvedValue([{ user_query: 'q', expected_response: 'r' }]);
|
||||
|
||||
await runEvalDataTool(ctx, { workflowId: 'w1' });
|
||||
|
||||
expect(dataTableService.getSchema).toHaveBeenCalledWith('dt-1', undefined);
|
||||
expect(dataTableService.addColumn).toHaveBeenCalledTimes(1);
|
||||
expect(dataTableService.addColumn).toHaveBeenCalledWith(
|
||||
'dt-1',
|
||||
{ name: 'expected_response', type: 'string' },
|
||||
undefined,
|
||||
);
|
||||
expect(dataTableService.insertRows).toHaveBeenCalled();
|
||||
expect(dataTableService.addColumn.mock.invocationCallOrder[0]).toBeLessThan(
|
||||
dataTableService.insertRows.mock.invocationCallOrder[0],
|
||||
);
|
||||
});
|
||||
|
||||
it('does not add columns that already exist in the DataTable schema', async () => {
|
||||
const dataTableService = defaultDataTableService({
|
||||
getSchema: jest
|
||||
.fn()
|
||||
.mockResolvedValue([{ name: 'user_query' }, { name: 'expected_response' }]),
|
||||
});
|
||||
const ctx = buildOrchestrationCtx({
|
||||
workflow: evalWfWithMetrics(),
|
||||
dataTableService,
|
||||
});
|
||||
jest
|
||||
.spyOn(sampleRowsService, 'generateSampleRows')
|
||||
.mockResolvedValue([{ user_query: 'q', expected_response: 'r' }]);
|
||||
|
||||
await runEvalDataTool(ctx, { workflowId: 'w1' });
|
||||
|
||||
expect(dataTableService.addColumn).not.toHaveBeenCalled();
|
||||
expect(dataTableService.insertRows).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('forwards projectId to insertRows when present', async () => {
|
||||
const dataTableService = defaultDataTableService({
|
||||
getSchema: jest.fn().mockResolvedValue([{ name: 'user_query' }]),
|
||||
});
|
||||
const ctx = buildOrchestrationCtx({ dataTableService });
|
||||
jest.spyOn(sampleRowsService, 'generateSampleRows').mockResolvedValue([{ user_query: 'q' }]);
|
||||
|
||||
await runEvalDataTool(ctx, { workflowId: 'w1', projectId: 'proj-1' });
|
||||
|
||||
expect(dataTableService.insertRows).toHaveBeenCalledWith('dt-1', expect.any(Array), {
|
||||
projectId: 'proj-1',
|
||||
});
|
||||
});
|
||||
|
||||
it('returns a `table` summary so the agent can recap the populated dataset to the user', async () => {
|
||||
const dataTableService = defaultDataTableService({
|
||||
getSchema: jest.fn().mockResolvedValue([{ name: 'user_query' }]),
|
||||
queryRows: jest.fn().mockResolvedValue({
|
||||
count: 2,
|
||||
data: [
|
||||
{
|
||||
user_query:
|
||||
'first question with a really long body that should be truncated past eighty characters of content easily',
|
||||
},
|
||||
{ user_query: 'second' },
|
||||
],
|
||||
}),
|
||||
});
|
||||
const ctx = buildOrchestrationCtx({ dataTableService });
|
||||
jest.spyOn(sampleRowsService, 'generateSampleRows').mockResolvedValue([{ user_query: 'q' }]);
|
||||
|
||||
const result = await runEvalDataTool(ctx, { workflowId: 'w1' });
|
||||
|
||||
expect(result.table).toMatchObject({
|
||||
id: 'dt-1',
|
||||
name: 'eval_dataset',
|
||||
projectId: 'proj-1',
|
||||
rowCount: 1,
|
||||
inputColumns: ['user_query'],
|
||||
});
|
||||
expect(result.table?.previewRows).toHaveLength(2);
|
||||
// First row's long string should be truncated.
|
||||
expect(String(result.table?.previewRows[0]?.user_query)).toMatch(/…$/);
|
||||
});
|
||||
|
||||
describe('few-shot seeding from history', () => {
|
||||
it('passes residual history rows to generateSampleRows when below threshold', async () => {
|
||||
// 3 valid history rows — below the 10-row threshold, so the tool
|
||||
// goes synthetic but should still hand the rows to the generator.
|
||||
const dataTableService = defaultDataTableService({
|
||||
getSchema: jest.fn().mockResolvedValue([{ name: 'user_query' }]),
|
||||
});
|
||||
const ctx = buildOrchestrationCtx({
|
||||
dataTableService,
|
||||
executionService: trigInputHistoryExecutionService(3),
|
||||
});
|
||||
const generateSpy = jest
|
||||
.spyOn(sampleRowsService, 'generateSampleRows')
|
||||
.mockResolvedValue([{ user_query: 'synth' }]);
|
||||
|
||||
const result = await runEvalDataTool(ctx, { workflowId: 'w1' });
|
||||
|
||||
expect(result.source).toBe('synthetic');
|
||||
const callArg = generateSpy.mock.calls[0]?.[0];
|
||||
expect(callArg?.realExamples).toEqual(
|
||||
expect.arrayContaining([
|
||||
expect.objectContaining({ user_query: 'real-e0' }),
|
||||
expect.objectContaining({ user_query: 'real-e1' }),
|
||||
expect.objectContaining({ user_query: 'real-e2' }),
|
||||
]),
|
||||
);
|
||||
});
|
||||
|
||||
it('does not pass realExamples when no history rows are available', async () => {
|
||||
const dataTableService = defaultDataTableService({
|
||||
getSchema: jest.fn().mockResolvedValue([{ name: 'user_query' }]),
|
||||
});
|
||||
const ctx = buildOrchestrationCtx({ dataTableService });
|
||||
const generateSpy = jest
|
||||
.spyOn(sampleRowsService, 'generateSampleRows')
|
||||
.mockResolvedValue([{ user_query: 'synth' }]);
|
||||
|
||||
await runEvalDataTool(ctx, { workflowId: 'w1' });
|
||||
|
||||
const callArg = generateSpy.mock.calls[0]?.[0];
|
||||
expect(callArg).not.toHaveProperty('realExamples');
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,244 @@
|
|||
import { Tool } from '@n8n/agents';
|
||||
import { z } from 'zod';
|
||||
|
||||
import type { InstanceAiDataTableService, OrchestrationContext } from '../../types';
|
||||
import { analyzeEvalDataRequirements } from '../evals/eval-data-requirements.service';
|
||||
import { extractRowsFromExecutionHistory } from '../evals/extract-rows-from-history.service';
|
||||
import { generateSampleRows } from '../evals/generate-sample-rows.service';
|
||||
|
||||
const HISTORY_THRESHOLD = 10;
|
||||
const GENERATE_ROW_COUNT = 10;
|
||||
|
||||
async function ensureColumnsExist(
|
||||
dataTableService: InstanceAiDataTableService,
|
||||
dataTableId: string,
|
||||
rows: Array<Record<string, unknown>>,
|
||||
extraColumns: readonly string[],
|
||||
options: { projectId?: string } | undefined,
|
||||
): Promise<void> {
|
||||
const referencedColumns = new Set<string>(extraColumns);
|
||||
for (const row of rows) {
|
||||
for (const key of Object.keys(row)) referencedColumns.add(key);
|
||||
}
|
||||
if (referencedColumns.size === 0) return;
|
||||
|
||||
const schema = await dataTableService.getSchema(dataTableId, options);
|
||||
const existing = new Set(schema.map((c) => c.name));
|
||||
const missing = [...referencedColumns].filter((name) => !existing.has(name));
|
||||
|
||||
for (const name of missing) {
|
||||
await dataTableService.addColumn(dataTableId, { name, type: 'string' }, options);
|
||||
}
|
||||
}
|
||||
|
||||
const evalDataInputSchema = z.object({
|
||||
workflowId: z.string().describe('ID of the workflow whose eval DataTable should be populated'),
|
||||
projectId: z.string().optional(),
|
||||
});
|
||||
|
||||
const PREVIEW_ROW_COUNT = 3;
|
||||
const PREVIEW_VALUE_MAX_LEN = 80;
|
||||
|
||||
const tableSummarySchema = z.object({
|
||||
id: z.string(),
|
||||
name: z.string(),
|
||||
projectId: z.string().optional(),
|
||||
rowCount: z.number(),
|
||||
inputColumns: z.array(z.string()),
|
||||
expectedOutputColumns: z.array(z.string()),
|
||||
previewRows: z.array(z.record(z.string(), z.unknown())),
|
||||
});
|
||||
|
||||
const outputSchema = z.object({
|
||||
status: z.enum(['imported', 'generated', 'skipped']),
|
||||
rowCount: z.number().optional(),
|
||||
source: z.enum(['history', 'synthetic']).optional(),
|
||||
reason: z.string().optional(),
|
||||
/**
|
||||
* True when synthetic rows were inserted with empty expected-output columns.
|
||||
* The agent must tell the user to fill those columns in before running the
|
||||
* evaluation, so the eval measures correctness instead of self-consistency
|
||||
* with the generator's own guess at the right answer.
|
||||
*/
|
||||
expectedOutputsNeedUserReview: z.boolean().optional(),
|
||||
expectedOutputColumns: z.array(z.string()).optional(),
|
||||
/**
|
||||
* Snapshot of the populated DataTable so the agent can show the user what
|
||||
* was generated alongside the metric setup, without making them dig through
|
||||
* the data-tables UI to verify. Includes the table id (for deep-linking) and
|
||||
* a short row preview. Only present on success paths.
|
||||
*/
|
||||
table: tableSummarySchema.optional(),
|
||||
});
|
||||
|
||||
function truncateForPreview(value: unknown): unknown {
|
||||
if (typeof value !== 'string') return value;
|
||||
return value.length > PREVIEW_VALUE_MAX_LEN ? `${value.slice(0, PREVIEW_VALUE_MAX_LEN)}…` : value;
|
||||
}
|
||||
|
||||
function buildPreviewRows(rows: Array<Record<string, unknown>>): Array<Record<string, unknown>> {
|
||||
return rows.slice(0, PREVIEW_ROW_COUNT).map((row) => {
|
||||
const truncated: Record<string, unknown> = {};
|
||||
for (const [key, value] of Object.entries(row)) {
|
||||
truncated[key] = truncateForPreview(value);
|
||||
}
|
||||
return truncated;
|
||||
});
|
||||
}
|
||||
|
||||
export function createEvalDataAgentTool(context: OrchestrationContext) {
|
||||
return new Tool('eval-data')
|
||||
.description(
|
||||
'Populate an eval DataTable for a workflow that already has its eval setup wired. ' +
|
||||
'First scans the workflow execution history for real rows (these include real expected ' +
|
||||
'outputs); if fewer than 10 valid rows are available, generates synthetic rows with INPUT ' +
|
||||
'columns only — expected-output columns are left empty so the user can fill them in with ' +
|
||||
'the correct answers. We never auto-fill expected outputs with model-generated guesses, ' +
|
||||
'because that would measure self-consistency rather than correctness. ' +
|
||||
'Inserts at most 25 rows total. Synchronous — no sub-agent, no HITL.',
|
||||
)
|
||||
.input(evalDataInputSchema)
|
||||
.output(outputSchema)
|
||||
.handler(async (input: z.infer<typeof evalDataInputSchema>) => {
|
||||
const domain = context.domainContext;
|
||||
if (!domain) {
|
||||
return { status: 'skipped' as const, reason: 'Domain context unavailable.' };
|
||||
}
|
||||
|
||||
const log = (level: 'info' | 'warn' | 'error', msg: string) => {
|
||||
domain.logger?.[level]?.(`[eval-data] ${msg}`);
|
||||
};
|
||||
const j = (v: unknown) => JSON.stringify(v);
|
||||
|
||||
log('info', `start workflowId=${input.workflowId} projectId=${j(input.projectId)}`);
|
||||
|
||||
const workflow = await domain.workflowService.getAsWorkflowJSON(input.workflowId);
|
||||
const reqs = analyzeEvalDataRequirements(workflow);
|
||||
const target = reqs.targets[0];
|
||||
if (!target) {
|
||||
log('warn', `skip:no-target reason=${j(reqs.reason)}`);
|
||||
return { status: 'skipped' as const, reason: reqs.reason ?? 'No eval target.' };
|
||||
}
|
||||
log(
|
||||
'info',
|
||||
`target dataTableId=${target.dataTableId} agent=${j(target.targetAgentNodeName)} inputColumns=${j(target.inputColumns)} expectedOutputColumns=${j(target.expectedOutputColumns)} pairs=${j(target.expectedToActualPairs)}`,
|
||||
);
|
||||
if (!target.targetAgentNodeName) {
|
||||
log('warn', 'skip:no-agent');
|
||||
return {
|
||||
status: 'skipped' as const,
|
||||
reason: 'No agent node reachable from EvaluationTrigger.',
|
||||
};
|
||||
}
|
||||
|
||||
const { rows: historyRows } = await extractRowsFromExecutionHistory(domain, {
|
||||
workflow,
|
||||
workflowId: input.workflowId,
|
||||
agentNodeName: target.targetAgentNodeName,
|
||||
inputColumns: target.inputColumns,
|
||||
expectedToActualPairs: target.expectedToActualPairs,
|
||||
});
|
||||
log('info', `history-extracted count=${historyRows.length}`);
|
||||
|
||||
let rowsToInsert: Array<Record<string, unknown>>;
|
||||
let source: 'history' | 'synthetic';
|
||||
|
||||
if (historyRows.length >= HISTORY_THRESHOLD) {
|
||||
rowsToInsert = historyRows;
|
||||
source = 'history';
|
||||
} else {
|
||||
// This will only generate the input part: expected output columns
|
||||
// will stay empty so that the user has to supply the ground truth.
|
||||
// If the threshold for using history rows has not been reached, however
|
||||
// many rows exist get passed as `realExamples` — a domain reference,
|
||||
// not seeds to paraphrase.
|
||||
rowsToInsert = await generateSampleRows({
|
||||
workflow,
|
||||
columns: target.inputColumns,
|
||||
rowCount: GENERATE_ROW_COUNT,
|
||||
targetAgentNodeName: target.targetAgentNodeName,
|
||||
...(historyRows.length > 0 ? { realExamples: historyRows } : {}),
|
||||
});
|
||||
source = 'synthetic';
|
||||
}
|
||||
log(
|
||||
'info',
|
||||
`rows-prepared source=${source} count=${rowsToInsert.length} firstRowKeys=${j(rowsToInsert[0] ? Object.keys(rowsToInsert[0]) : [])}`,
|
||||
);
|
||||
|
||||
const dataTableOptions = input.projectId ? { projectId: input.projectId } : undefined;
|
||||
|
||||
// On the synthetic path we leave expected-output columns empty, so the
|
||||
// rows never reference them. Still make sure those columns exist in
|
||||
// the table so the user has somewhere to type the correct answer.
|
||||
const extraColumns = source === 'synthetic' ? target.expectedOutputColumns : [];
|
||||
|
||||
try {
|
||||
await ensureColumnsExist(
|
||||
domain.dataTableService,
|
||||
target.dataTableId,
|
||||
rowsToInsert,
|
||||
extraColumns,
|
||||
dataTableOptions,
|
||||
);
|
||||
} catch (error) {
|
||||
const message = error instanceof Error ? error.message : String(error);
|
||||
log('error', `ensureColumnsExist-failed error=${j(message)}`);
|
||||
throw error;
|
||||
}
|
||||
|
||||
let insertResult: Awaited<ReturnType<typeof domain.dataTableService.insertRows>>;
|
||||
try {
|
||||
insertResult = await domain.dataTableService.insertRows(
|
||||
target.dataTableId,
|
||||
rowsToInsert,
|
||||
dataTableOptions,
|
||||
);
|
||||
log('info', `insertRows-ok result=${j(insertResult)}`);
|
||||
} catch (error) {
|
||||
const message = error instanceof Error ? error.message : String(error);
|
||||
log('error', `insertRows-failed error=${j(message)}`);
|
||||
throw error;
|
||||
}
|
||||
|
||||
// Fetch a tiny preview so the agent can recap WHAT was generated, not
|
||||
// just that something was. Treat failures here as non-fatal — the
|
||||
// insert already succeeded; a missing preview is a UX gap, not a bug.
|
||||
let previewRows: Array<Record<string, unknown>> = [];
|
||||
try {
|
||||
const preview = await domain.dataTableService.queryRows(target.dataTableId, {
|
||||
limit: PREVIEW_ROW_COUNT,
|
||||
...(insertResult.projectId ? { projectId: insertResult.projectId } : {}),
|
||||
});
|
||||
previewRows = buildPreviewRows(preview.data);
|
||||
} catch (error) {
|
||||
const message = error instanceof Error ? error.message : String(error);
|
||||
log('warn', `preview-query-failed error=${j(message)}`);
|
||||
}
|
||||
|
||||
log('info', `done source=${source} rowCount=${rowsToInsert.length}`);
|
||||
const needsReview = source === 'synthetic' && target.expectedOutputColumns.length > 0;
|
||||
const table = {
|
||||
id: target.dataTableId,
|
||||
name: insertResult.tableName,
|
||||
...(insertResult.projectId ? { projectId: insertResult.projectId } : {}),
|
||||
rowCount: rowsToInsert.length,
|
||||
inputColumns: target.inputColumns,
|
||||
expectedOutputColumns: target.expectedOutputColumns,
|
||||
previewRows,
|
||||
};
|
||||
return {
|
||||
status: source === 'history' ? ('imported' as const) : ('generated' as const),
|
||||
rowCount: rowsToInsert.length,
|
||||
source,
|
||||
...(needsReview
|
||||
? {
|
||||
expectedOutputsNeedUserReview: true as const,
|
||||
expectedOutputColumns: target.expectedOutputColumns,
|
||||
}
|
||||
: {}),
|
||||
table,
|
||||
};
|
||||
})
|
||||
.build();
|
||||
}
|
||||
|
|
@ -22,6 +22,7 @@ export const ORCHESTRATION_TOOL_IDS = {
|
|||
DELEGATE: 'delegate',
|
||||
BUILD_WORKFLOW_WITH_AGENT: 'build-workflow-with-agent',
|
||||
EVAL_SETUP_WITH_AGENT: 'eval-setup-with-agent',
|
||||
EVAL_DATA: 'eval-data',
|
||||
MANAGE_DATA_TABLES_WITH_AGENT: 'manage-data-tables-with-agent',
|
||||
RESEARCH_WITH_AGENT: 'research-with-agent',
|
||||
BROWSER_CREDENTIAL_SETUP: 'browser-credential-setup',
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user