// ---------------------------------------------------------------------------
// Self-contained HTML report renderer for a RunReport.
//
// Drops a single static HTML file with inline CSS — no JS frameworks, no
// fetches, opens in any browser. Optimised for "what failed and why" at a
// glance, plus enough detail to debug a failed grader without opening the
// raw JSON.
// ---------------------------------------------------------------------------
import { escapeHtml, formatTokens, safeStringify } from './formatting';
import type {
CapturedConfirmation,
GraderResult,
RunManifest,
RunReport,
ScenarioResult,
} from './types';
export function renderHtml(report: RunReport): string {
const manifest: RunManifest = report.manifest;
const passRate = report.totalScenarios > 0 ? report.passCount / report.totalScenarios : 0;
const totalDurationMs = report.results.reduce((acc, r) => acc + r.durationMs, 0);
const totalToolCalls = report.results.reduce((acc, r) => acc + r.toolCallCount, 0);
const totalResultTokens = report.results.reduce((acc, r) => acc + r.tokens.totalResultsEst, 0);
return `
Computer-use eval — ${report.passCount}/${report.totalScenarios} passed
Computer-use eval
${escapeHtml(report.startedAt)} → ${escapeHtml(report.finishedAt)}
git ${escapeHtml(manifest.gitRef)}
computer-use ${escapeHtml(manifest.daemonVersion)}
n8n ${escapeHtml(manifest.n8nVersion)}
${report.passCount}/${report.totalScenarios}
scenarios passed
${formatDuration(totalDurationMs)}
total run time
${totalToolCalls}
tool calls
${formatTokens(totalResultTokens)}
result tokens (est)
${report.results.map(renderScenario).join('\n')}
`;
}
// ---------------------------------------------------------------------------
// Per-scenario card
// ---------------------------------------------------------------------------
function renderScenario(result: ScenarioResult): string {
const failedGraders = result.graderResults.filter((g) => !g.pass);
const tagChips = (result.scenario.tags ?? [])
.map((t) => `${escapeHtml(t)}`)
.join(' ');
return `
${result.pass ? 'PASS' : 'FAIL'}
${escapeHtml(result.scenario.id)}
${escapeHtml(result.scenario.category)}
${result.toolCallCount} calls
· ${formatDuration(result.durationMs)}
· ${formatTokens(result.tokens.totalResultsEst)} result tokens est
${tagChips ? `${tagChips}` : ''}
${result.error ? `
Run error: ${escapeHtml(result.error)}
` : ''}
Prompt
${escapeHtml(result.scenario.prompt)}
${failedGraders.length > 0 ? renderFailedGraders(failedGraders) : ''}
${renderAllGraders(result.graderResults)}
${renderConfirmations(result.confirmations)}
${renderToolCalls(result)}
${renderFinalText(result.finalText)}
`;
}
function renderConfirmations(confirmations: CapturedConfirmation[]): string {
if (confirmations.length === 0) return '';
const rows = confirmations
.map(
(c: CapturedConfirmation) => `
| ${c.autoApproved ? 'auto-approved' : 'pending'} |
${escapeHtml(c.summary ?? '(no summary)')} |
${escapeHtml(c.requestId)} |
`,
)
.join('\n');
return `
Confirmations (${confirmations.length})
`;
}
function renderFailedGraders(failed: GraderResult[]): string {
const items = failed
.map(
(g) => `
${escapeHtml(g.grader.type)}
${escapeHtml(g.reason)}
`,
)
.join('\n');
return ``;
}
function renderAllGraders(results: GraderResult[]): string {
const rows = results
.map(
(g) => `
| ${g.pass ? 'pass' : 'fail'} |
${escapeHtml(g.grader.type)} |
${escapeHtml(g.reason)} |
`,
)
.join('\n');
return ``;
}
function renderToolCalls(r: ScenarioResult): string {
if (r.toolCalls.length === 0) {
return '';
}
const maxResult = Math.max(1, ...r.toolCalls.map((tc) => tc.resultTokensEst));
const rows = r.toolCalls
.map((tc, i) => {
const widthPct = Math.max(1, Math.round((tc.resultTokensEst / maxResult) * 100));
const argsPreview = previewArgs(tc.args);
return `
| #${i + 1} |
${escapeHtml(tc.name)} |
${escapeHtml(argsPreview)} |
${formatTokens(tc.argTokensEst)} |
${formatTokens(tc.resultTokensEst)}
|
`;
})
.join('\n');
const biggestNote = r.tokens.largestResultToolName
? `Biggest result: ${escapeHtml(r.tokens.largestResultToolName)} ~${formatTokens(r.tokens.largestResultEst)} tokens (est)
`
: '';
return ``;
}
function renderFinalText(text: string): string {
if (!text) return '';
return `
Final agent text (${text.length} chars)
${escapeHtml(text)}
`;
}
// ---------------------------------------------------------------------------
// Helpers
// ---------------------------------------------------------------------------
function previewArgs(args: Record): string {
const json = safeStringify(args);
if (json.length <= 140) return json;
return json.slice(0, 137) + '…';
}
function formatDuration(ms: number): string {
if (ms < 1_000) return `${ms}ms`;
if (ms < 60_000) return `${(ms / 1000).toFixed(1)}s`;
// Round the whole duration to seconds first, then split. Splitting before
// rounding (e.g. `Math.round((ms % 60_000) / 1000)`) can carry the seconds
// component up to 60 and emit invalid `Xm60s` values for inputs like 119_500.
const totalSeconds = Math.round(ms / 1000);
const m = Math.floor(totalSeconds / 60);
const s = totalSeconds % 60;
return `${m}m${s}s`;
}
// ---------------------------------------------------------------------------
// Style — kept inline so the file is portable
// ---------------------------------------------------------------------------
const STYLE = `
:root {
--bg: #0f1115;
--panel: #181b22;
--panel-2: #1f232c;
--muted: #8a93a3;
--text: #e6e9ef;
--pass: #39c97a;
--fail: #ef4f4f;
--pass-bg: rgba(57, 201, 122, 0.10);
--fail-bg: rgba(239, 79, 79, 0.12);
--accent: #6aa9ff;
--border: #2a2f3a;
}
* { box-sizing: border-box; }
body {
background: var(--bg);
color: var(--text);
font: 14px/1.45 -apple-system, BlinkMacSystemFont, "Segoe UI", system-ui, sans-serif;
margin: 0;
padding: 24px;
max-width: 1200px;
margin-left: auto;
margin-right: auto;
}
header h1 { margin: 0 0 4px 0; font-weight: 600; letter-spacing: -0.01em; }
.meta { color: var(--muted); margin-bottom: 8px; font-size: 13px; }
.manifest { color: var(--muted); margin-bottom: 16px; font-size: 12px; display: flex; gap: 16px; flex-wrap: wrap; }
.manifest-item { display: inline-flex; gap: 6px; align-items: center; }
.manifest-label { text-transform: uppercase; letter-spacing: 0.04em; font-size: 11px; }
.manifest code { font-family: ui-monospace, SFMono-Regular, Menlo, monospace; color: var(--text); background: var(--panel-2); padding: 1px 6px; border-radius: 3px; }
.confirmations table { width: 100%; border-collapse: collapse; font-size: 12.5px; margin-top: 4px; }
.confirmations td { padding: 6px 8px; border-bottom: 1px solid var(--border); vertical-align: top; }
.conf-decision { width: 110px; color: var(--accent); }
.conf-summary { color: var(--text); }
.conf-id { width: 280px; color: var(--muted); font-family: ui-monospace, SFMono-Regular, Menlo, monospace; }
.banner {
display: grid;
grid-template-columns: repeat(4, 1fr);
gap: 16px;
padding: 18px 20px;
border-radius: 10px;
border: 1px solid var(--border);
background: var(--panel);
margin-bottom: 24px;
}
.banner-ok { border-color: var(--pass); }
.banner-bad { border-color: var(--fail); }
.banner-stat .num { font-size: 22px; font-weight: 600; letter-spacing: -0.01em; }
.banner-stat .label { color: var(--muted); font-size: 12px; text-transform: uppercase; letter-spacing: 0.04em; }
main { display: flex; flex-direction: column; gap: 12px; }
.scenario { border: 1px solid var(--border); border-radius: 8px; background: var(--panel); overflow: hidden; }
.scenario.pass { border-left: 3px solid var(--pass); }
.scenario.fail { border-left: 3px solid var(--fail); background: linear-gradient(180deg, var(--fail-bg), var(--panel) 60px); }
summary { list-style: none; cursor: pointer; padding: 12px 16px; display: flex; align-items: center; gap: 12px; flex-wrap: wrap; }
summary::-webkit-details-marker { display: none; }
summary:hover { background: var(--panel-2); }
.status { font-weight: 600; padding: 2px 8px; border-radius: 4px; font-size: 12px; letter-spacing: 0.04em; }
.scenario.pass .status { color: var(--pass); background: var(--pass-bg); }
.scenario.fail .status { color: var(--fail); background: var(--fail-bg); }
.id { font-family: ui-monospace, SFMono-Regular, Menlo, monospace; font-size: 13px; }
.cat { color: var(--muted); font-size: 12px; }
.stats { color: var(--muted); font-size: 12px; margin-left: auto; }
.tags { width: 100%; margin-top: 4px; }
.chip { display: inline-block; font-size: 11px; padding: 1px 6px; border-radius: 3px; background: var(--panel-2); color: var(--muted); margin-right: 4px; }
.body { padding: 0 16px 16px; border-top: 1px solid var(--border); }
.section-label { font-size: 11px; text-transform: uppercase; letter-spacing: 0.06em; color: var(--muted); margin: 14px 0 6px; }
pre {
background: var(--panel-2); border: 1px solid var(--border); border-radius: 6px;
padding: 10px 12px; overflow: auto; white-space: pre-wrap; word-break: break-word;
font-family: ui-monospace, SFMono-Regular, Menlo, monospace; font-size: 12.5px;
margin: 0;
}
.error-box { color: var(--fail); border: 1px solid var(--fail); border-radius: 6px; padding: 10px 12px; margin: 12px 0; background: var(--fail-bg); }
.failed-block { background: var(--fail-bg); border: 1px solid var(--fail); border-radius: 6px; padding: 8px 12px 12px; margin: 12px 0; }
.failed-list { margin: 0; padding-left: 18px; }
.failed-list .grader-type { font-family: ui-monospace, SFMono-Regular, Menlo, monospace; font-size: 12.5px; color: var(--fail); margin-right: 8px; }
.failed-list .reason { color: var(--text); }
.graders table, .tool-table { width: 100%; border-collapse: collapse; font-size: 12.5px; }
.graders td, .tool-table td, .tool-table th { padding: 6px 8px; border-bottom: 1px solid var(--border); text-align: left; vertical-align: top; }
.tool-table th { color: var(--muted); font-weight: 500; font-size: 11px; text-transform: uppercase; letter-spacing: 0.04em; }
.g-status { width: 56px; font-weight: 600; }
.g-pass .g-status { color: var(--pass); }
.g-fail .g-status { color: var(--fail); }
.g-type { font-family: ui-monospace, SFMono-Regular, Menlo, monospace; width: 220px; color: var(--accent); }
.tool-table .idx { width: 36px; color: var(--muted); }
.tool-table .tool { font-family: ui-monospace, SFMono-Regular, Menlo, monospace; color: var(--accent); width: 180px; white-space: nowrap; }
.tool-table .args code { font-size: 11.5px; color: var(--text); white-space: pre-wrap; word-break: break-word; }
.tool-table .num { text-align: right; font-variant-numeric: tabular-nums; width: 80px; }
.tool-table .resultBar { width: 220px; }
.bar { width: 140px; height: 6px; background: var(--panel-2); border-radius: 3px; overflow: hidden; display: inline-block; vertical-align: middle; }
.bar .fill { height: 100%; background: var(--accent); }
.resultBar .num { display: inline-block; margin-left: 8px; }
.biggest { color: var(--muted); font-size: 12px; margin-bottom: 4px; }
.final-text summary { padding: 10px 0; color: var(--accent); }
.final-text pre { margin-top: 8px; }
.muted { color: var(--muted); font-size: 12px; }
footer { color: var(--muted); font-size: 12px; margin-top: 32px; padding-top: 16px; border-top: 1px solid var(--border); text-align: center; }
`;