test(benchmark): Add Playwright UI scenario benchmark framework + customer-scale executions-list spec (#30561)

Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-27 23:07:12 +02:00 · 2026-05-19 10:27:17 +01:00 · 2026-05-19 10:27:17 +01:00 · 60b5aa643d
commit 60b5aa643d
parent 1dd041588c
20 changed files with 418 additions and 7 deletions
--- a/.github/workflows/test-e2e-infrastructure-reusable.yml
+++ b/.github/workflows/test-e2e-infrastructure-reusable.yml
@ -3,6 +3,8 @@ name: 'Test: E2E Infrastructure'
 on:
  workflow_call:
  workflow_dispatch:
+  schedule:
+    - cron: '0 2 * * *'
  pull_request:
    paths:
      - 'packages/testing/playwright/tests/infrastructure/**'
@ -23,20 +25,27 @@ jobs:

  benchmark:
    needs: [prepare-docker]
-    name: benchmarking
+    name: ${{ matrix.lane }}
    strategy:
      fail-fast: false
      matrix:
        include:
-          - runner: blacksmith-8vcpu-ubuntu-2204
+          - lane: ui
+            grep: '@bench:ui'
+            shards: '[{"shard":1,"images":""}]'
+          - lane: webhook
+            grep: '@bench:webhook'
+            shards: '[{"shard":1,"images":""}]'
+          - lane: kafka
+            grep: '@bench:kafka'
+            shards: '[{"shard":1,"images":""},{"shard":2,"images":""}]'
    uses: ./.github/workflows/test-e2e-reusable.yml
    with:
      test-mode: docker-artifact
-      # Runs the full benchmark suite. Each spec brings its own container via
-      # `test.use({ capability })`, so workers must be 1 (one container at a time).
-      test-command: 'pnpm --filter=n8n-playwright test:benchmark'
+      test-command: 'pnpm --filter=n8n-playwright test:benchmark --grep ${{ matrix.grep }}'
+      pre-generated-matrix: ${{ matrix.shards }}
      workers: '1'
-      runner: ${{ matrix.runner }}
+      runner: blacksmith-8vcpu-ubuntu-2204
      timeout-minutes: 120
-      artifact-prefix: benchmark
+      artifact-prefix: 'benchmark-${{ matrix.lane }}'
    secrets: inherit
--- a/packages/testing/playwright/composables/journeys/admin-views-executions-list.ts
+++ b/packages/testing/playwright/composables/journeys/admin-views-executions-list.ts
@ -0,0 +1,109 @@
+import { expect } from '@playwright/test';
+import type { IWorkflowBase } from 'n8n-workflow';
+import { nanoid } from 'nanoid';
+
+import { workflow, trigger, node } from '../../../../@n8n/workflow-sdk/src';
+import type { n8nPage } from '../../pages/n8nPage';
+import type { ApiHelpers } from '../../services/api-helper';
+import type { TestUser } from '../../services/user-api-helper';
+
+const TRIGGER_NAME = 'Manual Trigger';
+const NOOP_NODE_NAME = 'Code';
+const WORKFLOWS_IN_PROJECT = 2;
+const EXECUTIONS_PER_WORKFLOW = 1;
+
+export interface AdminViewsExecutionsListContext {
+	admin: TestUser;
+	project: { id: string; name: string };
+	workflows: Array<{ id: string; name: string }>;
+}
+
+function buildJourneyWorkflow(name: string): Partial<IWorkflowBase> {
+	const manualTrigger = trigger({
+		type: 'n8n-nodes-base.manualTrigger',
+		version: 1,
+		config: { name: TRIGGER_NAME, parameters: {} },
+	});
+	const noop = node({
+		type: 'n8n-nodes-base.code',
+		version: 1,
+		config: {
+			name: NOOP_NODE_NAME,
+			parameters: {
+				mode: 'runOnceForAllItems',
+				jsCode: 'return [{ json: { ok: true } }];',
+			},
+		},
+	});
+	const wf = workflow(nanoid(), name).add(manualTrigger.to(noop));
+	const json = wf.toJSON() as IWorkflowBase;
+	json.settings = { executionOrder: 'v1' };
+	return json;
+}
+
+export async function setupAdminViewsExecutionsList(
+	api: ApiHelpers,
+): Promise<AdminViewsExecutionsListContext> {
+	const admin = await api.publicApi.createUser({
+		email: `journey-admin-${nanoid()}@test.com`.toLowerCase(),
+		firstName: 'Journey',
+		lastName: 'Admin',
+	});
+
+	const project = await api.projects.createProject(`journey-${nanoid(8)}`);
+	await api.projects.addUserToProject(project.id, admin.id, 'project:admin');
+
+	const workflows: Array<{ id: string; name: string }> = [];
+	for (let i = 0; i < WORKFLOWS_IN_PROJECT; i++) {
+		const name = `journey-wf-${nanoid(6)}`;
+		const def = buildJourneyWorkflow(name);
+		(def as IWorkflowBase & { projectId: string }).projectId = project.id;
+		const { workflowId } = await api.workflows.createWorkflowFromDefinition(def, {
+			makeUnique: false,
+		});
+		workflows.push({ id: workflowId, name });
+	}
+
+	await Promise.all(
+		workflows.map(async (wf) => {
+			for (let i = 0; i < EXECUTIONS_PER_WORKFLOW; i++) {
+				await api.workflows.runManually(wf.id, TRIGGER_NAME);
+				await api.workflows.waitForExecution(wf.id, 15_000, 'manual');
+			}
+		}),
+	);
+
+	return {
+		admin,
+		project: { id: project.id, name: project.name },
+		workflows,
+	};
+}
+
+export async function viewExecutionsListAsAdmin(
+	n8n: n8nPage,
+	ctx: AdminViewsExecutionsListContext,
+): Promise<void> {
+	const executionsResponse = n8n.page.waitForResponse(
+		(r) => r.url().includes('/rest/executions') && r.status() === 200,
+		{ timeout: 120_000 },
+	);
+	await n8n.page.goto(`/projects/${ctx.project.id}/executions`, {
+		waitUntil: 'commit',
+		timeout: 120_000,
+	});
+	await executionsResponse;
+
+	await expect(n8n.executions.getGlobalExecutionItems().first()).toBeVisible({
+		timeout: 60_000,
+	});
+}
+
+export async function adminViewsExecutionsList(deps: {
+	n8n: n8nPage;
+	api: ApiHelpers;
+}): Promise<void> {
+	const ctx = await setupAdminViewsExecutionsList(deps.api);
+	const adminN8n = await deps.n8n.start.withUser(ctx.admin);
+	await viewExecutionsListAsAdmin(adminN8n, ctx);
+}
--- a/packages/testing/playwright/pages/ExecutionsPage.ts
+++ b/packages/testing/playwright/pages/ExecutionsPage.ts
@ -53,6 +53,10 @@ export class ExecutionsPage extends BasePage {
 		return this.page.getByTestId('current-executions-list');
 	}

+	getGlobalExecutionItems(): Locator {
+		return this.page.getByTestId('global-execution-list-item');
+	}
+
 	getExecutionsSidebar(): Locator {
 		return this.page.getByTestId('executions-sidebar');
 	}
--- a/packages/testing/playwright/tests/e2e/journeys/admin-views-executions-list.spec.ts
+++ b/packages/testing/playwright/tests/e2e/journeys/admin-views-executions-list.spec.ts
@ -0,0 +1,14 @@
+import { adminViewsExecutionsList } from '../../../composables/journeys/admin-views-executions-list';
+import { test } from '../../../fixtures/base';
+
+test.describe(
+	'an admin can view the execution list',
+	{
+		annotation: [{ type: 'owner', description: 'Catalysts' }],
+	},
+	() => {
+		test('renders the project executions list with seeded executions', async ({ n8n, api }) => {
+			await adminViewsExecutionsList({ n8n, api });
+		});
+	},
+);
--- a/packages/testing/playwright/tests/infrastructure/benchmarks/harness/bulk-seed-executions.ts
+++ b/packages/testing/playwright/tests/infrastructure/benchmarks/harness/bulk-seed-executions.ts
@ -0,0 +1,43 @@
+import type { ServiceHelpers } from 'n8n-containers/services/types';
+
+/**
+ * Bypasses TypeORM and the n8n execution lifecycle so 100k+ rows seed in seconds.
+ * Status mix and 1ms-staggered timestamps mirror production shape so the
+ * executions list query's ORDER BY and access-control filter exercise realistic plans.
+ */
+export async function bulkSeedExecutions(
+	services: ServiceHelpers,
+	options: { projectId: string; count: number },
+): Promise<void> {
+	const { projectId, count } = options;
+	if (count <= 0) return;
+	if (!services.postgres) {
+		throw new Error('postgres service not available — bulk seed requires direct PG access');
+	}
+
+	const sql = `
+		WITH project_workflows AS (
+			SELECT w.id, ROW_NUMBER() OVER (ORDER BY w.id) AS wf_idx
+			FROM workflow_entity w
+			INNER JOIN shared_workflow sw ON sw."workflowId" = w.id
+			WHERE sw."projectId" = '${projectId}'
+		),
+		wf_count AS (SELECT COUNT(*)::int AS n FROM project_workflows)
+		INSERT INTO execution_entity
+			(finished, mode, status, "createdAt", "startedAt", "stoppedAt", "workflowId", "storedAt")
+		SELECT
+			true,
+			'webhook',
+			CASE WHEN s.idx % 20 = 0 THEN 'error' ELSE 'success' END,
+			NOW() - (s.idx * interval '1 millisecond'),
+			NOW() - (s.idx * interval '1 millisecond'),
+			NOW() - (s.idx * interval '1 millisecond') + interval '50 ms',
+			pw.id,
+			'db'
+		FROM generate_series(1, ${count}) s(idx)
+		CROSS JOIN wf_count
+		JOIN project_workflows pw ON pw.wf_idx = ((s.idx - 1) % wf_count.n) + 1;
+	`;
+
+	await services.postgres.exec(sql);
+}
--- a/packages/testing/playwright/tests/infrastructure/benchmarks/harness/loop-ui-scenario.ts
+++ b/packages/testing/playwright/tests/infrastructure/benchmarks/harness/loop-ui-scenario.ts
@ -0,0 +1,24 @@
+import type { n8nPage } from '../../../../pages/n8nPage';
+
+export interface UiScenarioResult {
+	kind: 'ui-scenario';
+	latenciesMs: number[];
+}
+
+export interface LoopUiScenarioOptions {
+	n8n: n8nPage;
+	scenario: (n8n: n8nPage) => Promise<void>;
+	repeats: number;
+}
+
+export async function loopUiScenario(options: LoopUiScenarioOptions): Promise<UiScenarioResult> {
+	const { n8n, scenario, repeats } = options;
+	console.log(`[UI] Running ${repeats} iterations`);
+	const latenciesMs: number[] = [];
+	for (let i = 0; i < repeats; i++) {
+		const t0 = Date.now();
+		await scenario(n8n);
+		latenciesMs.push(Date.now() - t0);
+	}
+	return { kind: 'ui-scenario', latenciesMs };
+}
--- a/packages/testing/playwright/tests/infrastructure/benchmarks/harness/measure-load-impact.ts
+++ b/packages/testing/playwright/tests/infrastructure/benchmarks/harness/measure-load-impact.ts
@ -0,0 +1,104 @@
+import type { TestInfo } from '@playwright/test';
+import type { ServiceHelpers } from 'n8n-containers/services/types';
+
+import { DockerStatsSampler } from './docker-stats-fallback';
+import type { UiScenarioResult } from './loop-ui-scenario';
+import {
+	attachReportMetrics,
+	buildAndAttachRunReport,
+	renderRunReport,
+	reportContainerStats,
+	reportDiagnostics,
+	reportJaegerTraces,
+	reportPgQueryBreakdown,
+	reportPgSaturation,
+} from './orchestration';
+import type { BenchmarkDimensions, ThroughputInfo } from '../../../../utils/benchmark';
+
+export interface LoadDriver<T = unknown> {
+	name: string;
+	run: (signal: AbortSignal) => Promise<T>;
+}
+
+export interface MeasureLoadImpactOptions {
+	services: ServiceHelpers;
+	testInfo: TestInfo;
+	drivers: LoadDriver[];
+	dimensions?: BenchmarkDimensions;
+}
+
+export async function measureLoadImpact(options: MeasureLoadImpactOptions): Promise<void> {
+	const { services, testInfo, drivers, dimensions: extraDims = {} } = options;
+	if (drivers.length === 0) throw new Error('measureLoadImpact requires at least one driver');
+	testInfo.setTimeout(15 * 60 * 1000);
+
+	const dimensions: BenchmarkDimensions = {
+		...extraDims,
+		drivers: drivers.map((d) => d.name).join('+'),
+	};
+
+	await services.postgres.resetStatStatements();
+	const walBaseline = await services.postgres.pgStatWal();
+	const sampler = new DockerStatsSampler();
+	sampler.start();
+
+	console.log(`[MEASURE] ${drivers.map((d) => d.name).join(' + ')}`);
+	const controller = new AbortController();
+	const start = Date.now();
+	const driverPromises = drivers.map(async (d) => await d.run(controller.signal));
+	await Promise.race(driverPromises);
+	controller.abort();
+	const settled = await Promise.allSettled(driverPromises);
+	const durationMs = Date.now() - start;
+	const elapsedSec = durationMs / 1000;
+
+	const results = settled.map((s, i) => ({
+		name: drivers[i].name,
+		result: s.status === 'fulfilled' ? s.value : undefined,
+	}));
+
+	const diagnostics = await reportDiagnostics({ testInfo, services, durationMs, dimensions });
+	const { containers, source: containersSource } = await reportContainerStats(diagnostics, sampler);
+	const pgQueries = await reportPgQueryBreakdown({ services, durationMs });
+	const pgSaturation = await reportPgSaturation({ services, durationMs });
+	await reportJaegerTraces({ testInfo, services, since: start });
+
+	const throughput = throughputFromUi(results, elapsedSec);
+	const report = await buildAndAttachRunReport({
+		testInfo,
+		scenario: { spec: testInfo.title, dimensions },
+		duration: { totalMs: durationMs, wallClockMs: durationMs },
+		throughput,
+		containers,
+		containersSource,
+		diagnostics,
+		pgQueries,
+		pgSaturation,
+		walBaseline,
+	});
+	await attachReportMetrics(testInfo, report, dimensions);
+	renderRunReport(report);
+}
+
+function isUiScenarioResult(x: unknown): x is UiScenarioResult {
+	return typeof x === 'object' && x !== null && (x as { kind?: string }).kind === 'ui-scenario';
+}
+
+function throughputFromUi(
+	results: Array<{ name: string; result: unknown }>,
+	elapsedSec: number,
+): ThroughputInfo {
+	const ui = results.find((r) => isUiScenarioResult(r.result))?.result as
+		| UiScenarioResult
+		| undefined;
+	if (!ui || ui.latenciesMs.length === 0) return {};
+	const sorted = [...ui.latenciesMs].sort((a, b) => a - b);
+	const pct = (p: number) =>
+		sorted[Math.min(sorted.length - 1, Math.floor(sorted.length * p))] ?? 0;
+	return {
+		execPerSec: ui.latenciesMs.length / elapsedSec,
+		totalCompleted: ui.latenciesMs.length,
+		p50Ms: pct(0.5),
+		p99Ms: pct(0.95),
+	};
+}
--- a/packages/testing/playwright/tests/infrastructure/benchmarks/kafka/burst-drain-capacity.spec.ts
+++ b/packages/testing/playwright/tests/infrastructure/benchmarks/kafka/burst-drain-capacity.spec.ts
@ -8,6 +8,7 @@ test.use({ capability: benchConfig('burst-drain-capacity', { kafka: true, worker
 test.describe(
 	'How fast can we drain a backlog?',
 	{
+		tag: '@bench:kafka',
 		annotation: [
 			{ type: 'owner', description: 'Catalysts' },
 			{ type: 'question', description: 'burst-drain-capacity' },
--- a/packages/testing/playwright/tests/infrastructure/benchmarks/kafka/node-count-scaling.spec.ts
+++ b/packages/testing/playwright/tests/infrastructure/benchmarks/kafka/node-count-scaling.spec.ts
@ -11,6 +11,7 @@ test.use({ capability: benchConfig('node-count-scaling', { kafka: true, workers:
 test.describe(
 	'How does throughput scale with workflow complexity?',
 	{
+		tag: '@bench:kafka',
 		annotation: [
 			{ type: 'owner', description: 'Catalysts' },
 			{ type: 'question', description: 'node-count-scaling' },
--- a/packages/testing/playwright/tests/infrastructure/benchmarks/kafka/output-size-impact.spec.ts
+++ b/packages/testing/playwright/tests/infrastructure/benchmarks/kafka/output-size-impact.spec.ts
@ -16,6 +16,7 @@ test.use({ capability: benchConfig('output-size-impact', { kafka: true, workers:
 test.describe(
 	'What is the impact of node output size on throughput?',
 	{
+		tag: '@bench:kafka',
 		annotation: [
 			{ type: 'owner', description: 'Catalysts' },
 			{ type: 'question', description: 'output-size-impact' },
--- a/packages/testing/playwright/tests/infrastructure/benchmarks/kafka/queue-mode-sustained-rate.spec.ts
+++ b/packages/testing/playwright/tests/infrastructure/benchmarks/kafka/queue-mode-sustained-rate.spec.ts
@ -8,6 +8,7 @@ test.use({ capability: benchConfig('queue-mode-sustained-rate', { kafka: true, w
 test.describe(
 	'Can queue mode sustain 250 msg/s steady?',
 	{
+		tag: '@bench:kafka',
 		annotation: [
 			{ type: 'owner', description: 'Catalysts' },
 			{ type: 'question', description: 'queue-mode-sustained-rate' },
--- a/packages/testing/playwright/tests/infrastructure/benchmarks/kafka/single-instance-ceiling.spec.ts
+++ b/packages/testing/playwright/tests/infrastructure/benchmarks/kafka/single-instance-ceiling.spec.ts
@ -8,6 +8,7 @@ test.use({ capability: benchConfig('single-instance-ceiling', { kafka: true }) }
 test.describe(
 	'How much can we process on a single instance?',
 	{
+		tag: '@bench:kafka',
 		annotation: [
 			{ type: 'owner', description: 'Catalysts' },
 			{ type: 'question', description: 'single-instance-throughput-ceiling' },
--- a/packages/testing/playwright/tests/infrastructure/benchmarks/kafka/steady-rate-breaking-point.spec.ts
+++ b/packages/testing/playwright/tests/infrastructure/benchmarks/kafka/steady-rate-breaking-point.spec.ts
@ -21,6 +21,7 @@ test.use({ capability: benchConfig('steady-rate-breaking-point', { kafka: true }
 test.describe(
 	'At what input rate does the system fall behind?',
 	{
+		tag: '@bench:kafka',
 		annotation: [
 			{ type: 'owner', description: 'Catalysts' },
 			{ type: 'question', description: 'steady-rate-breaking-point' },
--- a/packages/testing/playwright/tests/infrastructure/benchmarks/ui/executions-list-customer-scale.spec.ts
+++ b/packages/testing/playwright/tests/infrastructure/benchmarks/ui/executions-list-customer-scale.spec.ts
@ -0,0 +1,92 @@
+import {
+	setupAdminViewsExecutionsList,
+	viewExecutionsListAsAdmin,
+} from '../../../../composables/journeys/admin-views-executions-list';
+import { test } from '../../../../fixtures/base';
+import { benchConfig } from '../../../../playwright-projects';
+import type { ApiHelpers } from '../../../../services/api-helper';
+import { bulkSeedExecutions } from '../harness/bulk-seed-executions';
+import { loopUiScenario } from '../harness/loop-ui-scenario';
+import { measureLoadImpact } from '../harness/measure-load-impact';
+
+const ITERATIONS = 30;
+const WORKFLOWS_IN_PROJECT = 400;
+const PRESEEDED_EXECUTIONS = 1_000_000;
+const CREATE_BATCH_SIZE = 20;
+
+test.use({
+	capability: benchConfig('executions-list-customer-scale', {
+		env: {
+			EXECUTIONS_DATA_SAVE_ON_SUCCESS: 'all',
+			EXECUTIONS_DATA_PRUNE: 'false',
+		},
+	}),
+});
+
+async function inflateProjectWorkflows(
+	api: ApiHelpers,
+	projectId: string,
+	target: number,
+	existing: number,
+): Promise<void> {
+	const toAdd = Math.max(0, target - existing);
+	if (toAdd === 0) return;
+	for (let offset = 0; offset < toAdd; offset += CREATE_BATCH_SIZE) {
+		const batch = Math.min(CREATE_BATCH_SIZE, toAdd - offset);
+		await Promise.all(
+			Array.from({ length: batch }, async () => await api.workflows.createInProject(projectId)),
+		);
+	}
+}
+
+test.describe(
+	'What is the PG impact of opening the executions list on a customer-shaped instance?',
+	{
+		tag: '@bench:ui',
+		annotation: [
+			{ type: 'owner', description: 'Catalysts' },
+			{ type: 'question', description: 'executions-list-customer-scale' },
+		],
+	},
+	() => {
+		test(`Admin opens /projects/:id/executions ×${ITERATIONS} | ${WORKFLOWS_IN_PROJECT} wf | ${PRESEEDED_EXECUTIONS} execs`, async ({
+			services,
+			n8n,
+		}, testInfo) => {
+			const ctx = await setupAdminViewsExecutionsList(n8n.api);
+			await inflateProjectWorkflows(
+				n8n.api,
+				ctx.project.id,
+				WORKFLOWS_IN_PROJECT,
+				ctx.workflows.length,
+			);
+			await bulkSeedExecutions(services, {
+				projectId: ctx.project.id,
+				count: PRESEEDED_EXECUTIONS,
+			});
+
+			const adminN8n = await n8n.start.withUser(ctx.admin);
+
+			await measureLoadImpact({
+				services,
+				testInfo,
+				drivers: [
+					{
+						name: 'ui',
+						run: () =>
+							loopUiScenario({
+								n8n: adminN8n,
+								scenario: (page) => viewExecutionsListAsAdmin(page, ctx),
+								repeats: ITERATIONS,
+							}),
+					},
+				],
+				dimensions: {
+					journey: 'admin-views-executions-list',
+					workflowsInProject: WORKFLOWS_IN_PROJECT,
+					preseededExecutions: PRESEEDED_EXECUTIONS,
+				},
+			});
+		});
+	},
+);
--- a/packages/testing/playwright/tests/infrastructure/benchmarks/webhook/webhook-main-scaling.spec.ts
+++ b/packages/testing/playwright/tests/infrastructure/benchmarks/webhook/webhook-main-scaling.spec.ts
@ -24,6 +24,7 @@ test.use({ capability: benchConfig('webhook-main-scaling', { mains: MAINS, worke
 test.describe(
 	'Does webhook ingestion scale linearly with main count?',
 	{
+		tag: '@bench:webhook',
 		annotation: [
 			{ type: 'owner', description: 'Catalysts' },
 			{ type: 'question', description: 'webhook-main-scaling' },
--- a/packages/testing/playwright/tests/infrastructure/benchmarks/webhook/webhook-otel-overhead.spec.ts
+++ b/packages/testing/playwright/tests/infrastructure/benchmarks/webhook/webhook-otel-overhead.spec.ts
@ -17,6 +17,7 @@ test.use({
 test.describe(
 	'What is the runtime cost of enabling OTEL?',
 	{
+		tag: '@bench:webhook',
 		annotation: [
 			{ type: 'owner', description: 'Catalysts' },
 			{ type: 'question', description: 'webhook-otel-overhead' },
--- a/packages/testing/playwright/tests/infrastructure/benchmarks/webhook/webhook-queue-baseline.spec.ts
+++ b/packages/testing/playwright/tests/infrastructure/benchmarks/webhook/webhook-queue-baseline.spec.ts
@ -22,6 +22,7 @@ test.use({ capability: benchConfig('webhook-queue-baseline', { mains: MAINS, wor
 test.describe(
 	'What is the webhook ingestion ceiling in queue mode at 1 main + 1 worker?',
 	{
+		tag: '@bench:webhook',
 		annotation: [
 			{ type: 'owner', description: 'Catalysts' },
 			{ type: 'question', description: 'webhook-queue-baseline' },
--- a/packages/testing/playwright/tests/infrastructure/benchmarks/webhook/webhook-save-data-overhead.spec.ts
+++ b/packages/testing/playwright/tests/infrastructure/benchmarks/webhook/webhook-save-data-overhead.spec.ts
@ -24,6 +24,7 @@ test.use({
 test.describe(
 	'What is the runtime cost of saving execution data on success?',
 	{
+		tag: '@bench:webhook',
 		annotation: [
 			{ type: 'owner', description: 'Catalysts' },
 			{ type: 'question', description: 'webhook-save-data-overhead' },
--- a/packages/testing/playwright/tests/infrastructure/benchmarks/webhook/webhook-single-instance.spec.ts
+++ b/packages/testing/playwright/tests/infrastructure/benchmarks/webhook/webhook-single-instance.spec.ts
@ -18,6 +18,7 @@ test.use({ capability: benchConfig('webhook-single-instance') });
 test.describe(
 	'What is the single-instance webhook ingestion ceiling?',
 	{
+		tag: '@bench:webhook',
 		annotation: [
 			{ type: 'owner', description: 'Catalysts' },
 			{ type: 'question', description: 'webhook-single-instance' },
--- a/packages/testing/playwright/tests/infrastructure/benchmarks/webhook/webhook-sync-latency-floor.spec.ts
+++ b/packages/testing/playwright/tests/infrastructure/benchmarks/webhook/webhook-sync-latency-floor.spec.ts
@ -31,6 +31,7 @@ test.use({
 test.describe(
 	'What is the sync webhook latency floor at 1 main + 1 worker?',
 	{
+		tag: '@bench:webhook',
 		annotation: [
 			{ type: 'owner', description: 'Catalysts' },
 			{ type: 'question', description: 'webhook-sync-latency-floor' },