// --------------------------------------------------------------------------- // Self-contained HTML report renderer for a RunReport. // // Drops a single static HTML file with inline CSS — no JS frameworks, no // fetches, opens in any browser. Optimised for "what failed and why" at a // glance, plus enough detail to debug a failed grader without opening the // raw JSON. // --------------------------------------------------------------------------- import { escapeHtml, formatTokens, safeStringify } from './formatting'; import type { CapturedConfirmation, GraderResult, RunManifest, RunReport, ScenarioResult, } from './types'; export function renderHtml(report: RunReport): string { const manifest: RunManifest = report.manifest; const passRate = report.totalScenarios > 0 ? report.passCount / report.totalScenarios : 0; const totalDurationMs = report.results.reduce((acc, r) => acc + r.durationMs, 0); const totalToolCalls = report.results.reduce((acc, r) => acc + r.toolCallCount, 0); const totalResultTokens = report.results.reduce((acc, r) => acc + r.tokens.totalResultsEst, 0); return ` Computer-use eval — ${report.passCount}/${report.totalScenarios} passed

Computer-use eval

${escapeHtml(report.startedAt)} → ${escapeHtml(report.finishedAt)}
git ${escapeHtml(manifest.gitRef)} computer-use ${escapeHtml(manifest.daemonVersion)} n8n ${escapeHtml(manifest.n8nVersion)}
${report.results.map(renderScenario).join('\n')}
`; } // --------------------------------------------------------------------------- // Per-scenario card // --------------------------------------------------------------------------- function renderScenario(result: ScenarioResult): string { const failedGraders = result.graderResults.filter((g) => !g.pass); const tagChips = (result.scenario.tags ?? []) .map((t) => `${escapeHtml(t)}`) .join(' '); return `
${result.pass ? 'PASS' : 'FAIL'} ${escapeHtml(result.scenario.id)} ${escapeHtml(result.scenario.category)} ${result.toolCallCount} calls · ${formatDuration(result.durationMs)} · ${formatTokens(result.tokens.totalResultsEst)} result tokens est ${tagChips ? `${tagChips}` : ''}
${result.error ? `
Run error: ${escapeHtml(result.error)}
` : ''}
${escapeHtml(result.scenario.prompt)}
${failedGraders.length > 0 ? renderFailedGraders(failedGraders) : ''} ${renderAllGraders(result.graderResults)} ${renderConfirmations(result.confirmations)} ${renderToolCalls(result)} ${renderFinalText(result.finalText)}
`; } function renderConfirmations(confirmations: CapturedConfirmation[]): string { if (confirmations.length === 0) return ''; const rows = confirmations .map( (c: CapturedConfirmation) => ` ${c.autoApproved ? 'auto-approved' : 'pending'} ${escapeHtml(c.summary ?? '(no summary)')} ${escapeHtml(c.requestId)} `, ) .join('\n'); return `
Confirmations (${confirmations.length})
${rows}
`; } function renderFailedGraders(failed: GraderResult[]): string { const items = failed .map( (g) => `
  • ${escapeHtml(g.grader.type)} ${escapeHtml(g.reason)}
  • `, ) .join('\n'); return `
    Why it failed
    `; } function renderAllGraders(results: GraderResult[]): string { const rows = results .map( (g) => ` ${g.pass ? 'pass' : 'fail'} ${escapeHtml(g.grader.type)} ${escapeHtml(g.reason)} `, ) .join('\n'); return `
    Graders
    ${rows}
    `; } function renderToolCalls(r: ScenarioResult): string { if (r.toolCalls.length === 0) { return '
    Tool calls
    none
    '; } const maxResult = Math.max(1, ...r.toolCalls.map((tc) => tc.resultTokensEst)); const rows = r.toolCalls .map((tc, i) => { const widthPct = Math.max(1, Math.round((tc.resultTokensEst / maxResult) * 100)); const argsPreview = previewArgs(tc.args); return ` #${i + 1} ${escapeHtml(tc.name)} ${escapeHtml(argsPreview)} ${formatTokens(tc.argTokensEst)}
    ${formatTokens(tc.resultTokensEst)} `; }) .join('\n'); const biggestNote = r.tokens.largestResultToolName ? `
    Biggest result: ${escapeHtml(r.tokens.largestResultToolName)} ~${formatTokens(r.tokens.largestResultEst)} tokens (est)
    ` : ''; return `
    Tool calls
    ${biggestNote} ${rows}
    # Tool Args Arg tok Result tok (est)
    `; } function renderFinalText(text: string): string { if (!text) return ''; return `
    Final agent text (${text.length} chars)
    ${escapeHtml(text)}
    `; } // --------------------------------------------------------------------------- // Helpers // --------------------------------------------------------------------------- function previewArgs(args: Record): string { const json = safeStringify(args); if (json.length <= 140) return json; return json.slice(0, 137) + '…'; } function formatDuration(ms: number): string { if (ms < 1_000) return `${ms}ms`; if (ms < 60_000) return `${(ms / 1000).toFixed(1)}s`; // Round the whole duration to seconds first, then split. Splitting before // rounding (e.g. `Math.round((ms % 60_000) / 1000)`) can carry the seconds // component up to 60 and emit invalid `Xm60s` values for inputs like 119_500. const totalSeconds = Math.round(ms / 1000); const m = Math.floor(totalSeconds / 60); const s = totalSeconds % 60; return `${m}m${s}s`; } // --------------------------------------------------------------------------- // Style — kept inline so the file is portable // --------------------------------------------------------------------------- const STYLE = ` :root { --bg: #0f1115; --panel: #181b22; --panel-2: #1f232c; --muted: #8a93a3; --text: #e6e9ef; --pass: #39c97a; --fail: #ef4f4f; --pass-bg: rgba(57, 201, 122, 0.10); --fail-bg: rgba(239, 79, 79, 0.12); --accent: #6aa9ff; --border: #2a2f3a; } * { box-sizing: border-box; } body { background: var(--bg); color: var(--text); font: 14px/1.45 -apple-system, BlinkMacSystemFont, "Segoe UI", system-ui, sans-serif; margin: 0; padding: 24px; max-width: 1200px; margin-left: auto; margin-right: auto; } header h1 { margin: 0 0 4px 0; font-weight: 600; letter-spacing: -0.01em; } .meta { color: var(--muted); margin-bottom: 8px; font-size: 13px; } .manifest { color: var(--muted); margin-bottom: 16px; font-size: 12px; display: flex; gap: 16px; flex-wrap: wrap; } .manifest-item { display: inline-flex; gap: 6px; align-items: center; } .manifest-label { text-transform: uppercase; letter-spacing: 0.04em; font-size: 11px; } .manifest code { font-family: ui-monospace, SFMono-Regular, Menlo, monospace; color: var(--text); background: var(--panel-2); padding: 1px 6px; border-radius: 3px; } .confirmations table { width: 100%; border-collapse: collapse; font-size: 12.5px; margin-top: 4px; } .confirmations td { padding: 6px 8px; border-bottom: 1px solid var(--border); vertical-align: top; } .conf-decision { width: 110px; color: var(--accent); } .conf-summary { color: var(--text); } .conf-id { width: 280px; color: var(--muted); font-family: ui-monospace, SFMono-Regular, Menlo, monospace; } .banner { display: grid; grid-template-columns: repeat(4, 1fr); gap: 16px; padding: 18px 20px; border-radius: 10px; border: 1px solid var(--border); background: var(--panel); margin-bottom: 24px; } .banner-ok { border-color: var(--pass); } .banner-bad { border-color: var(--fail); } .banner-stat .num { font-size: 22px; font-weight: 600; letter-spacing: -0.01em; } .banner-stat .label { color: var(--muted); font-size: 12px; text-transform: uppercase; letter-spacing: 0.04em; } main { display: flex; flex-direction: column; gap: 12px; } .scenario { border: 1px solid var(--border); border-radius: 8px; background: var(--panel); overflow: hidden; } .scenario.pass { border-left: 3px solid var(--pass); } .scenario.fail { border-left: 3px solid var(--fail); background: linear-gradient(180deg, var(--fail-bg), var(--panel) 60px); } summary { list-style: none; cursor: pointer; padding: 12px 16px; display: flex; align-items: center; gap: 12px; flex-wrap: wrap; } summary::-webkit-details-marker { display: none; } summary:hover { background: var(--panel-2); } .status { font-weight: 600; padding: 2px 8px; border-radius: 4px; font-size: 12px; letter-spacing: 0.04em; } .scenario.pass .status { color: var(--pass); background: var(--pass-bg); } .scenario.fail .status { color: var(--fail); background: var(--fail-bg); } .id { font-family: ui-monospace, SFMono-Regular, Menlo, monospace; font-size: 13px; } .cat { color: var(--muted); font-size: 12px; } .stats { color: var(--muted); font-size: 12px; margin-left: auto; } .tags { width: 100%; margin-top: 4px; } .chip { display: inline-block; font-size: 11px; padding: 1px 6px; border-radius: 3px; background: var(--panel-2); color: var(--muted); margin-right: 4px; } .body { padding: 0 16px 16px; border-top: 1px solid var(--border); } .section-label { font-size: 11px; text-transform: uppercase; letter-spacing: 0.06em; color: var(--muted); margin: 14px 0 6px; } pre { background: var(--panel-2); border: 1px solid var(--border); border-radius: 6px; padding: 10px 12px; overflow: auto; white-space: pre-wrap; word-break: break-word; font-family: ui-monospace, SFMono-Regular, Menlo, monospace; font-size: 12.5px; margin: 0; } .error-box { color: var(--fail); border: 1px solid var(--fail); border-radius: 6px; padding: 10px 12px; margin: 12px 0; background: var(--fail-bg); } .failed-block { background: var(--fail-bg); border: 1px solid var(--fail); border-radius: 6px; padding: 8px 12px 12px; margin: 12px 0; } .failed-list { margin: 0; padding-left: 18px; } .failed-list .grader-type { font-family: ui-monospace, SFMono-Regular, Menlo, monospace; font-size: 12.5px; color: var(--fail); margin-right: 8px; } .failed-list .reason { color: var(--text); } .graders table, .tool-table { width: 100%; border-collapse: collapse; font-size: 12.5px; } .graders td, .tool-table td, .tool-table th { padding: 6px 8px; border-bottom: 1px solid var(--border); text-align: left; vertical-align: top; } .tool-table th { color: var(--muted); font-weight: 500; font-size: 11px; text-transform: uppercase; letter-spacing: 0.04em; } .g-status { width: 56px; font-weight: 600; } .g-pass .g-status { color: var(--pass); } .g-fail .g-status { color: var(--fail); } .g-type { font-family: ui-monospace, SFMono-Regular, Menlo, monospace; width: 220px; color: var(--accent); } .tool-table .idx { width: 36px; color: var(--muted); } .tool-table .tool { font-family: ui-monospace, SFMono-Regular, Menlo, monospace; color: var(--accent); width: 180px; white-space: nowrap; } .tool-table .args code { font-size: 11.5px; color: var(--text); white-space: pre-wrap; word-break: break-word; } .tool-table .num { text-align: right; font-variant-numeric: tabular-nums; width: 80px; } .tool-table .resultBar { width: 220px; } .bar { width: 140px; height: 6px; background: var(--panel-2); border-radius: 3px; overflow: hidden; display: inline-block; vertical-align: middle; } .bar .fill { height: 100%; background: var(--accent); } .resultBar .num { display: inline-block; margin-left: 8px; } .biggest { color: var(--muted); font-size: 12px; margin-bottom: 4px; } .final-text summary { padding: 10px 0; color: var(--accent); } .final-text pre { margin-top: 8px; } .muted { color: var(--muted); font-size: 12px; } footer { color: var(--muted); font-size: 12px; margin-top: 32px; padding-top: 16px; border-top: 1px solid var(--border); text-align: center; } `;