mirror of
https://github.com/n8n-io/n8n.git
synced 2026-06-02 17:57:06 +02:00
121 lines
3.8 KiB
TypeScript
121 lines
3.8 KiB
TypeScript
// ---------------------------------------------------------------------------
|
|
// Binary checks evaluator for instance-ai
|
|
// ---------------------------------------------------------------------------
|
|
|
|
import type {
|
|
BinaryCheck,
|
|
BinaryCheckContext,
|
|
BinaryCheckResult,
|
|
CheckOutcome,
|
|
CheckStatus,
|
|
} from './types';
|
|
import type { WorkflowResponse } from '../clients/n8n-client';
|
|
import type { Feedback } from '../subagent/types';
|
|
import { DETERMINISTIC_CHECKS, LLM_CHECKS } from './checks/index';
|
|
|
|
const EVALUATOR_NAME = 'binary-checks';
|
|
|
|
export interface BinaryChecksOptions {
|
|
/** Run only the checks whose names appear in this list. Runs all if omitted. */
|
|
only?: string[];
|
|
}
|
|
|
|
export interface BinaryChecksRun {
|
|
feedback: Feedback[];
|
|
outcomes: CheckOutcome[];
|
|
}
|
|
|
|
/** LLM checks are filtered out entirely when `ctx.modelId` is not set (they don't appear in outcomes). */
|
|
export async function runBinaryChecks(
|
|
workflow: WorkflowResponse,
|
|
ctx: BinaryCheckContext,
|
|
options?: BinaryChecksOptions,
|
|
): Promise<BinaryChecksRun> {
|
|
const selected = resolveChecks(options?.only, ctx);
|
|
|
|
const results = await Promise.allSettled(
|
|
selected.map(async (check) => await check.run(workflow, ctx)),
|
|
);
|
|
|
|
const outcomes: CheckOutcome[] = results.map((settled, i) => {
|
|
const check = selected[i];
|
|
if (settled.status === 'fulfilled') {
|
|
return toOutcome(check, settled.value);
|
|
}
|
|
const message =
|
|
settled.reason instanceof Error ? settled.reason.message : String(settled.reason);
|
|
return {
|
|
name: check.name,
|
|
description: check.description,
|
|
kind: check.kind,
|
|
dimension: check.dimension,
|
|
status: 'fail',
|
|
comment: `Error: ${message}`,
|
|
};
|
|
});
|
|
|
|
const feedback: Feedback[] = outcomes
|
|
.filter((o) => o.status !== 'n_a')
|
|
.map((o) => ({
|
|
evaluator: EVALUATOR_NAME,
|
|
metric: o.name,
|
|
score: o.status === 'pass' ? 1 : 0,
|
|
kind: 'metric' as const,
|
|
...(o.comment ? { comment: o.comment } : {}),
|
|
}));
|
|
|
|
const scoredOutcomes = outcomes.filter((o) => o.status !== 'n_a');
|
|
const totalScored = scoredOutcomes.length;
|
|
const passCount = scoredOutcomes.filter((o) => o.status === 'pass').length;
|
|
const passRate = totalScored > 0 ? passCount / totalScored : 0;
|
|
const naCount = outcomes.length - totalScored;
|
|
|
|
const passRateComment = `${String(passCount)}/${String(totalScored)} checks passed${
|
|
naCount > 0 ? ` (${String(naCount)} N/A)` : ''
|
|
}`;
|
|
|
|
feedback.push({
|
|
evaluator: EVALUATOR_NAME,
|
|
metric: 'pass_rate',
|
|
score: passRate,
|
|
kind: 'score',
|
|
comment: passRateComment,
|
|
});
|
|
|
|
return { feedback, outcomes };
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Helpers
|
|
// ---------------------------------------------------------------------------
|
|
|
|
function toOutcome(check: BinaryCheck, result: BinaryCheckResult): CheckOutcome {
|
|
const status: CheckStatus = result.applicable === false ? 'n_a' : result.pass ? 'pass' : 'fail';
|
|
return {
|
|
name: check.name,
|
|
description: check.description,
|
|
kind: check.kind,
|
|
dimension: check.dimension,
|
|
status,
|
|
...(result.comment ? { comment: result.comment } : {}),
|
|
};
|
|
}
|
|
|
|
function resolveChecks(only: string[] | undefined, ctx: BinaryCheckContext): BinaryCheck[] {
|
|
const allChecks = [...DETERMINISTIC_CHECKS, ...LLM_CHECKS];
|
|
const eligible = ctx.modelId ? allChecks : DETERMINISTIC_CHECKS;
|
|
|
|
if (!only || only.length === 0) return eligible;
|
|
|
|
// Validate names against all registered checks, not just eligible ones,
|
|
// so LLM checks are skipped (not rejected) when modelId is missing.
|
|
const allNames = new Set(allChecks.map((c) => c.name));
|
|
const unknown = only.filter((name) => !allNames.has(name));
|
|
if (unknown.length > 0) {
|
|
const available = Array.from(allNames).join(', ');
|
|
throw new Error(`Unknown binary check(s): ${unknown.join(', ')}. Available: ${available}`);
|
|
}
|
|
|
|
return eligible.filter((c) => only.includes(c.name));
|
|
}
|