mirror of
https://github.com/n8n-io/n8n.git
synced 2026-05-26 14:25:35 +02:00
100 lines
2.7 KiB
TypeScript
100 lines
2.7 KiB
TypeScript
import type {
|
|
WorkflowTestCaseResult,
|
|
MultiRunEvaluation,
|
|
TestCaseAggregation,
|
|
ScenarioAggregation,
|
|
} from '../types';
|
|
|
|
/**
|
|
* Binomial coefficient C(n, k). Returns 0 when k > n.
|
|
*/
|
|
function combinations(n: number, k: number): number {
|
|
if (k > n || k < 0) return 0;
|
|
if (k === 0 || k === n) return 1;
|
|
let result = 1;
|
|
for (let i = 0; i < k; i++) {
|
|
result = (result * (n - i)) / (i + 1);
|
|
}
|
|
return Math.round(result);
|
|
}
|
|
|
|
/**
|
|
* pass@k = 1 - C(n - c, k) / C(n, k)
|
|
* Probability that at least 1 of k randomly chosen samples passes,
|
|
* given n total samples of which c passed.
|
|
*/
|
|
export function passAtK(n: number, c: number, k: number): number {
|
|
if (k > n) return 0;
|
|
const denominator = combinations(n, k);
|
|
if (denominator === 0) return 0;
|
|
return 1 - combinations(n - c, k) / denominator;
|
|
}
|
|
|
|
/**
|
|
* pass^k = (c/n)^k
|
|
* Probability that all k independent attempts pass,
|
|
* given observed success rate p = c/n.
|
|
*/
|
|
export function passHatK(n: number, c: number, k: number): number {
|
|
if (n === 0) return 0;
|
|
return Math.pow(c / n, k);
|
|
}
|
|
|
|
function computePassMetrics(
|
|
n: number,
|
|
c: number,
|
|
): { passAtKValues: number[]; passHatKValues: number[] } {
|
|
const passAtKValues: number[] = [];
|
|
const passHatKValues: number[] = [];
|
|
for (let k = 1; k <= n; k++) {
|
|
passAtKValues.push(passAtK(n, c, k));
|
|
passHatKValues.push(passHatK(n, c, k));
|
|
}
|
|
return { passAtKValues, passHatKValues };
|
|
}
|
|
|
|
export function aggregateResults(
|
|
allRunResults: WorkflowTestCaseResult[][],
|
|
totalRuns: number,
|
|
): MultiRunEvaluation {
|
|
const testCaseCount = allRunResults[0].length;
|
|
const testCases: TestCaseAggregation[] = [];
|
|
|
|
for (let tcIdx = 0; tcIdx < testCaseCount; tcIdx++) {
|
|
const runs = allRunResults.map((runResults) => runResults[tcIdx]);
|
|
const testCase = runs[0].testCase;
|
|
const buildSuccessCount = runs.filter((r) => r.workflowBuildSuccess).length;
|
|
|
|
const scenarioCount = testCase.scenarios.length;
|
|
const scenarios: ScenarioAggregation[] = [];
|
|
|
|
for (let sIdx = 0; sIdx < scenarioCount; sIdx++) {
|
|
const scenario = testCase.scenarios[sIdx];
|
|
const scenarioRuns = runs.map(
|
|
(r) =>
|
|
r.scenarioResults[sIdx] ?? {
|
|
scenario,
|
|
success: false,
|
|
score: 0,
|
|
reasoning: 'Build failed — scenario not executed',
|
|
},
|
|
);
|
|
const passCount = scenarioRuns.filter((sr) => sr.success).length;
|
|
const { passAtKValues, passHatKValues } = computePassMetrics(totalRuns, passCount);
|
|
|
|
scenarios.push({
|
|
scenario,
|
|
runs: scenarioRuns,
|
|
passCount,
|
|
passRate: totalRuns > 0 ? passCount / totalRuns : 0,
|
|
passAtK: passAtKValues,
|
|
passHatK: passHatKValues,
|
|
});
|
|
}
|
|
|
|
testCases.push({ testCase, runs, buildSuccessCount, scenarios });
|
|
}
|
|
|
|
return { totalRuns, testCases };
|
|
}
|