mirror of
https://github.com/n8n-io/n8n.git
synced 2026-06-03 10:17:00 +02:00
118 lines
2.7 KiB
TypeScript
118 lines
2.7 KiB
TypeScript
/**
|
|
* V2 Evaluation Harness
|
|
*
|
|
* A factory-based, testable evaluation system for AI workflow generation.
|
|
*
|
|
* Key features:
|
|
* - Factory pattern for evaluator creation
|
|
* - Parallel evaluator execution
|
|
* - Both local and LangSmith modes
|
|
* - Centralized lifecycle hooks for logging
|
|
* - Pre-computed feedback pattern for LangSmith compatibility
|
|
*/
|
|
|
|
// Core runner
|
|
export { runEvaluation } from './harness/runner';
|
|
|
|
// Types
|
|
export {
|
|
isGenerationResult,
|
|
type Feedback,
|
|
type EvaluationContext,
|
|
type TestCaseContext,
|
|
type GlobalRunContext,
|
|
type Evaluator,
|
|
type TestCase,
|
|
type RunConfig,
|
|
type ExampleResult,
|
|
type RunSummary,
|
|
type EvaluationLifecycle,
|
|
type LangsmithOptions,
|
|
type GenerationResult,
|
|
type SubgraphExampleOutput,
|
|
} from './harness/harness-types';
|
|
|
|
// Lifecycle
|
|
export {
|
|
createConsoleLifecycle,
|
|
createQuietLifecycle,
|
|
mergeLifecycles,
|
|
type ConsoleLifecycleOptions,
|
|
} from './harness/lifecycle';
|
|
|
|
// Evaluator factories
|
|
export {
|
|
createLLMJudgeEvaluator,
|
|
createProgrammaticEvaluator,
|
|
createPairwiseEvaluator,
|
|
createSimilarityEvaluator,
|
|
createResponderEvaluator,
|
|
createExecutionEvaluator,
|
|
type PairwiseEvaluatorOptions,
|
|
type SimilarityEvaluatorOptions,
|
|
type ResponderEvaluationContext,
|
|
} from './evaluators';
|
|
|
|
// Subgraph evaluation
|
|
export {
|
|
createSubgraphRunner,
|
|
type SubgraphName,
|
|
type SubgraphRunFn,
|
|
} from './harness/subgraph-runner';
|
|
export { runSubgraphEvaluation } from './harness/subgraph-evaluation';
|
|
export { runLocalSubgraphEvaluation } from './harness/subgraph-evaluation-local';
|
|
// Introspection lifecycle
|
|
export {
|
|
createIntrospectionAnalysisLifecycle,
|
|
type IntrospectionAnalysisOptions,
|
|
} from './lifecycles/introspection-analysis';
|
|
|
|
// Output
|
|
export {
|
|
createArtifactSaver,
|
|
type ArtifactSaver,
|
|
type ArtifactSaverOptions,
|
|
} from './harness/output';
|
|
|
|
// Trace filtering (re-exported from v1 for convenience)
|
|
export {
|
|
createTraceFilters,
|
|
isMinimalTracingEnabled,
|
|
type TraceFilters,
|
|
} from './langsmith/trace-filters';
|
|
|
|
// Score calculation utilities
|
|
export {
|
|
parseFeedbackKey,
|
|
extractCategory,
|
|
groupByEvaluator,
|
|
calculateWeightedScore,
|
|
aggregateScores,
|
|
DEFAULT_EVALUATOR_WEIGHTS,
|
|
DEFAULT_WEIGHTS,
|
|
type ScoreWeights,
|
|
type AggregatedScore,
|
|
type FeedbackKeyParts,
|
|
} from './harness/score-calculator';
|
|
|
|
// Report generation
|
|
export {
|
|
extractViolationSeverity,
|
|
calculateReportMetrics,
|
|
generateMarkdownReport,
|
|
type ViolationSeverity,
|
|
type ReportOptions,
|
|
type ReportMetrics,
|
|
} from './support/report-generator';
|
|
|
|
// Test case generation
|
|
export {
|
|
createTestCaseGenerator,
|
|
type TestCaseGeneratorOptions,
|
|
type GeneratedTestCase,
|
|
type TestCaseGenerator,
|
|
} from './support/test-case-generator';
|
|
|
|
// CSV loader utilities
|
|
export { loadDefaultTestCases, getDefaultTestCaseIds } from './cli/csv-prompt-loader';
|