n8n/packages/@n8n/instance-ai/evaluations/index.ts
José Braulio González Valido 81ea56fa6b
test(ai-builder): Add multi-turn capability for IAI evals (no-changelog) (#30586)
Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-21 13:03:35 +00:00

77 lines
2.3 KiB
TypeScript

// ---------------------------------------------------------------------------
// Public API for the instance-ai workflow evaluation framework
//
// This module exports the domain logic used by the CLI (evaluations/cli/)
// and available for custom orchestration (e.g. LangSmith evaluate).
// ---------------------------------------------------------------------------
// -- Client & Auth --
export { N8nClient } from './clients/n8n-client';
export type { WorkflowResponse, WorkflowNodeResponse, ExecutionDetail } from './clients/n8n-client';
// -- Test case data --
export { loadWorkflowTestCasesWithFiles } from './data/workflows';
export type { WorkflowTestCaseWithFile } from './data/workflows';
// -- Credentials --
export { seedCredentials, cleanupCredentials } from './credentials/seeder';
export type { SeedResult } from './credentials/seeder';
// -- Runner (all-in-one) --
export { runWorkflowTestCase, runWithConcurrency } from './harness/runner';
// -- Runner (split API: build once, run scenarios independently) --
export { buildWorkflow, executeScenario, cleanupBuild } from './harness/runner';
export type { BuildResult, BuildWorkflowConfig } from './harness/runner';
// -- Workflow discovery --
export { snapshotWorkflowIds } from './outcome/workflow-discovery';
// -- Logger --
export { type EvalLogger, createLogger } from './harness/logger';
// -- Types --
export type {
WorkflowTestCase,
ExecutionScenario,
WorkflowTestCaseResult,
ExecutionScenarioResult,
ChecklistItem,
ChecklistResult,
} from './types';
// -- Comparison (regression detection) --
export {
compareBuckets,
byVerdict,
improvements,
hardRegressions,
softRegressions,
watchList,
} from './comparison/compare';
export type {
ComparisonResult,
ScenarioComparison,
ScenarioCounts,
ExperimentBucket,
AggregateComparison,
FailureCategoryComparison,
} from './comparison/compare';
export {
classifyScenario,
fishersExactOneSidedLeft,
wilsonInterval,
} from './comparison/statistics';
export type {
ScenarioVerdict,
ScenarioClassification,
ClassifyOptions,
TierThresholds,
} from './comparison/statistics';
export { formatComparisonMarkdown, formatComparisonTerminal } from './comparison/format';
export {
fetchBaselineBucket,
findLatestBaseline,
BASELINE_EXPERIMENT_PREFIX,
} from './comparison/fetch-baseline';