diff --git a/.github/workflows/test-e2e-infrastructure-reusable.yml b/.github/workflows/test-e2e-infrastructure-reusable.yml index f7da4f9c217..8955ff41e6d 100644 --- a/.github/workflows/test-e2e-infrastructure-reusable.yml +++ b/.github/workflows/test-e2e-infrastructure-reusable.yml @@ -26,7 +26,10 @@ jobs: shards: '[{"shard":1,"images":""}]' - lane: webhook grep: '@bench:webhook' - shards: '[{"shard":1,"images":""}]' + # 2 shards — webhook lane grew from 6 to 9 spec files with the + # dedicated-proc topology (DEVP-200). Single shard pushed wall time + # past 40 min; two shards halves that. Bump to 3 if specs grow further. + shards: '[{"shard":1,"images":""},{"shard":2,"images":""}]' - lane: kafka grep: '@bench:kafka' shards: '[{"shard":1,"images":""},{"shard":2,"images":""}]' diff --git a/packages/cli/src/webhooks/webhook-server.ts b/packages/cli/src/webhooks/webhook-server.ts index 648dcd7bfb5..87dd52cd1e5 100644 --- a/packages/cli/src/webhooks/webhook-server.ts +++ b/packages/cli/src/webhooks/webhook-server.ts @@ -1,6 +1,14 @@ -import { Service } from '@n8n/di'; +import { Container, Service } from '@n8n/di'; import { AbstractServer } from '@/abstract-server'; @Service() -export class WebhookServer extends AbstractServer {} +export class WebhookServer extends AbstractServer { + /** Mounts `/metrics` so dedicated webhook procs are scrapeable. */ + async configure(): Promise { + if (this.globalConfig.endpoints.metrics.enable) { + const { PrometheusMetricsService } = await import('@/metrics/prometheus-metrics.service'); + await Container.get(PrometheusMetricsService).init(this.app); + } + } +} diff --git a/packages/testing/containers/services/load-balancer.ts b/packages/testing/containers/services/load-balancer.ts index d88a44bdbee..6f254470a81 100644 --- a/packages/testing/containers/services/load-balancer.ts +++ b/packages/testing/containers/services/load-balancer.ts @@ -6,6 +6,8 @@ import type { LoadBalancerPolicy, Service, ServiceResult } from './types'; export interface LoadBalancerConfig { mainCount: number; + /** When > 0, Caddy path-routes `WEBHOOK_PROC_PATHS` to webhook procs. */ + webhookCount: number; hostPort?: number; policy: LoadBalancerPolicy; } @@ -17,13 +19,19 @@ export interface LoadBalancerMeta { export type LoadBalancerResult = ServiceResult; -function buildCaddyConfig(upstreamServers: string[], policy: LoadBalancerPolicy): string { - const backends = upstreamServers.join(' '); - return ` -:80 { - # Reverse proxy with load balancing - reverse_proxy ${backends} { - lb_policy ${policy} +// Production paths the `n8n webhook` proc serves. Test/waiting/form-test paths +// stay on main, per `packages/cli/src/commands/webhook.ts`. +const WEBHOOK_PROC_PATHS = ['/webhook/*', '/form/*'] as const; + +function buildCaddyConfig( + mainUpstreams: string[], + webhookUpstreams: string[], + policy: LoadBalancerPolicy, +): string { + const mainBackends = mainUpstreams.join(' '); + const webhookBackends = webhookUpstreams.join(' '); + + const sharedReverseProxyBlock = ` lb_policy ${policy} # Health check health_uri /healthz @@ -34,13 +42,42 @@ function buildCaddyConfig(upstreamServers: string[], policy: LoadBalancerPolicy) dial_timeout 60s read_timeout 60s write_timeout 60s - } + }`; + + if (webhookUpstreams.length === 0) { + return ` +:80 { + # Reverse proxy with load balancing + reverse_proxy ${mainBackends} { +${sharedReverseProxyBlock} } # Set max request body size request_body { max_size 50MB } +}`; + } + + const webhookMatcher = WEBHOOK_PROC_PATHS.join(' '); + return ` +:80 { + request_body { + max_size 50MB + } + + @webhooks path ${webhookMatcher} + handle @webhooks { + reverse_proxy ${webhookBackends} { +${sharedReverseProxyBlock} + } + } + + handle { + reverse_proxy ${mainBackends} { +${sharedReverseProxyBlock} + } + } }`; } @@ -51,6 +88,7 @@ export const loadBalancer: Service = { getOptions(ctx) { return { mainCount: ctx.mains, + webhookCount: ctx.webhooks, hostPort: ctx.allocatedPorts.loadBalancer, policy: ctx.config.lbPolicy ?? 'first', } as LoadBalancerConfig; @@ -64,16 +102,19 @@ export const loadBalancer: Service = { }, async start(network, projectName, config?: unknown): Promise { - const { mainCount, hostPort, policy } = config as LoadBalancerConfig; + const { mainCount, webhookCount, hostPort, policy } = config as LoadBalancerConfig; const { consumer, throwWithLogs } = createSilentLogConsumer(); - // Generate upstream server addresses - const upstreamServers = Array.from( - { length: mainCount }, - (_, index) => `${projectName}-n8n-main-${index + 1}:5678`, + // Single-main containers are named `${projectName}-n8n`, not `-n8n-main-1`. + const mainHostname = (index: number): string => + mainCount > 1 ? `${projectName}-n8n-main-${index}:5678` : `${projectName}-n8n:5678`; + const mainUpstreams = Array.from({ length: mainCount }, (_, index) => mainHostname(index + 1)); + const webhookUpstreams = Array.from( + { length: webhookCount }, + (_, index) => `${projectName}-n8n-webhook-${index + 1}:5678`, ); - const caddyConfig = buildCaddyConfig(upstreamServers, policy); + const caddyConfig = buildCaddyConfig(mainUpstreams, webhookUpstreams, policy); try { const container = await new GenericContainer(TEST_CONTAINER_IMAGES.caddy) diff --git a/packages/testing/containers/services/n8n.ts b/packages/testing/containers/services/n8n.ts index 850376b9bdf..d3a7c65ab78 100644 --- a/packages/testing/containers/services/n8n.ts +++ b/packages/testing/containers/services/n8n.ts @@ -59,6 +59,8 @@ const BASE_ENV: Record = { export interface N8NInstancesOptions { mains: number; workers: number; + /** Dedicated `n8n webhook` procs. Forces queue mode when > 0. */ + webhooks?: number; projectName: string; network: StartedNetwork; serviceEnvironment: Record; @@ -68,6 +70,8 @@ export interface N8NInstancesOptions { allocatedPort?: number; resourceQuota?: { memory?: number; cpu?: number }; workerResourceQuota?: { memory?: number; cpu?: number }; + /** Resource quota for webhook procs. Falls back to `resourceQuota` if omitted. */ + webhookResourceQuota?: { memory?: number; cpu?: number }; filesToMount?: FileToMount[]; } @@ -81,13 +85,14 @@ function computeEnvironment(options: N8NInstancesOptions): Record 1 || workers > 0; + const isQueueMode = mains > 1 || workers > 0 || webhooks > 0; const env: Record = { ...BASE_ENV, @@ -121,9 +126,11 @@ function computeEnvironment(options: N8NInstancesOptions): Record string | null; } +const SERVICE_LABEL: Record = { + main: 'n8n-main', + webhook: 'n8n-webhook', + worker: 'n8n-worker', +}; + async function createContainer( instance: InstanceConfig, shared: SharedConfig, diagnostics: N8NStartupDiagnostics, ): Promise { - const { name, isWorker, instanceNumber, networkAlias, hostPort } = instance; + const { name, role, instanceNumber, networkAlias, hostPort } = instance; const { projectName, environment, network, resourceQuota, filesToMount } = shared; const { consumer, throwWithLogs, getLogs } = createSilentLogConsumer(); const { strategy: waitStrategy, getLastBody: getLastReadinessBody } = createReadinessProbe( @@ -161,7 +174,7 @@ async function createContainer( .withEnvironment(environment) .withLabels({ 'com.docker.compose.project': projectName, - 'com.docker.compose.service': isWorker ? 'n8n-worker' : 'n8n-main', + 'com.docker.compose.service': SERVICE_LABEL[role], instance: instanceNumber.toString(), }) .withPullPolicy(new N8nImagePullPolicy(N8N_IMAGE)) @@ -185,14 +198,16 @@ async function createContainer( const ports: PortWithOptionalBinding[] = hostPort ? [{ container: N8N_READINESS_PORT, host: hostPort }] : [N8N_READINESS_PORT]; - if (isWorker) { + if (role === 'worker') { ports.push(5679); } container = container.withExposedPorts(...ports).withWaitStrategy(waitStrategy); - if (isWorker) { + if (role === 'worker') { container = container.withCommand(['worker']); + } else if (role === 'webhook') { + container = container.withCommand(['webhook']); } try { @@ -219,11 +234,13 @@ export async function createN8NInstances( const { mains, workers, + webhooks = 0, projectName, network, allocatedPort, resourceQuota, workerResourceQuota, + webhookResourceQuota, filesToMount, } = options; @@ -248,26 +265,52 @@ export async function createN8NInstances( filesToMount, }; + const webhookShared: SharedConfig = { + projectName, + environment, + network, + resourceQuota: webhookResourceQuota ?? resourceQuota, + filesToMount, + }; + + const sharedByRole: Record = { + main: mainShared, + webhook: webhookShared, + worker: workerShared, + }; + const instances: InstanceConfig[] = [ - ...Array.from({ length: mains }, (_, i) => { + ...Array.from({ length: mains }, (_, i): InstanceConfig => { const num = i + 1; const name = mains > 1 ? `${projectName}-n8n-main-${num}` : `${projectName}-n8n`; return { name, - isWorker: false, + role: 'main', instanceNumber: num, networkAlias: name, hostPort: num === 1 ? allocatedPort : undefined, }; }), - ...Array.from({ length: workers }, (_, i) => ({ - name: `${projectName}-n8n-worker-${i + 1}`, - isWorker: true, - instanceNumber: i + 1, - })), + ...Array.from({ length: webhooks }, (_, i): InstanceConfig => { + const num = i + 1; + const name = `${projectName}-n8n-webhook-${num}`; + return { + name, + role: 'webhook', + instanceNumber: num, + networkAlias: name, + }; + }), + ...Array.from( + { length: workers }, + (_, i): InstanceConfig => ({ + name: `${projectName}-n8n-worker-${i + 1}`, + role: 'worker', + instanceNumber: i + 1, + }), + ), ]; - // Service-only mode: no n8n containers needed if (instances.length === 0) { log('No n8n instances requested (service-only mode)'); return { containers, environment, diagnostics }; @@ -284,12 +327,12 @@ export async function createN8NInstances( throw new N8NStartupError(message, diagnostics, error); }; - // Start main 1 first (handles DB migrations/setup) + // Main 1 handles DB migrations and must finish before parallel starts. const [main1, ...remaining] = instances; log(`Starting main 1: ${main1.name} (DB setup)`); let main1Result: ContainerStartResult; try { - main1Result = await createContainer(main1, mainShared, diagnostics); + main1Result = await createContainer(main1, sharedByRole[main1.role], diagnostics); } catch (error) { return rethrowWithDiagnostics(error); } @@ -297,20 +340,14 @@ export async function createN8NInstances( containers.push(main1Result.container); log('main 1 ready'); - // Start remaining instances in parallel if (remaining.length > 0) { log(`Starting ${remaining.length} remaining instances in parallel...`); try { const parallelResults = await Promise.all( remaining.map(async (instance) => { - const type = instance.isWorker ? 'worker' : 'main'; - log(`Starting ${type} ${instance.instanceNumber}: ${instance.name}`); - const result = await createContainer( - instance, - instance.isWorker ? workerShared : mainShared, - diagnostics, - ); - log(`${type} ${instance.instanceNumber} ready`); + log(`Starting ${instance.role} ${instance.instanceNumber}: ${instance.name}`); + const result = await createContainer(instance, sharedByRole[instance.role], diagnostics); + log(`${instance.role} ${instance.instanceNumber} ready`); return { instance, result }; }), ); diff --git a/packages/testing/containers/services/types.ts b/packages/testing/containers/services/types.ts index c5864b8369d..9b6b53fad3f 100644 --- a/packages/testing/containers/services/types.ts +++ b/packages/testing/containers/services/types.ts @@ -53,6 +53,7 @@ export interface StartContext { projectName: string; mains: number; workers: number; + webhooks: number; isQueueMode: boolean; usePostgres: boolean; needsLoadBalancer: boolean; @@ -69,18 +70,23 @@ export type LoadBalancerPolicy = 'first' | 'round_robin' | 'random' | 'least_con export interface StackConfig { mains?: number; workers?: number; + /** Dedicated `n8n webhook` procs. Forces queue mode when > 0. */ + webhooks?: number; postgres?: boolean; env?: Record; projectName?: string; resourceQuota?: { memory?: number; cpu?: number }; workerResourceQuota?: { memory?: number; cpu?: number }; + /** Resource quota for webhook procs. Falls back to `resourceQuota` if omitted. */ + webhookResourceQuota?: { memory?: number; cpu?: number }; services?: readonly ServiceName[]; /** When true, services target host machine instead of Docker-internal n8n */ external?: boolean; /** When set, the Docker network uses this exact name instead of a random UUID. */ networkName?: string; /** - * Caddy load-balancer upstream-selection policy. Only applies when `mains > 1`. + * Caddy load-balancer upstream-selection policy. Only applies when `mains > 1` + * or `webhooks > 0` (anything that triggers the LB to start). * Defaults to `'first'` — sticky to main #1, useful for UI debuggability. * Benchmarks should set `'round_robin'` to actually distribute load. */ diff --git a/packages/testing/containers/services/victoria-metrics.ts b/packages/testing/containers/services/victoria-metrics.ts index 33c135be44f..b882c0c34a6 100644 --- a/packages/testing/containers/services/victoria-metrics.ts +++ b/packages/testing/containers/services/victoria-metrics.ts @@ -65,7 +65,7 @@ export const victoriaMetrics: Service = { description: 'VictoriaMetrics', getOptions(ctx: StartContext): VictoriaMetricsConfig { - const { mains, workers, projectName } = ctx; + const { mains, workers, webhooks, projectName } = ctx; const scrapeTargets: ScrapeTarget[] = []; for (let i = 1; i <= mains; i++) { @@ -77,6 +77,14 @@ export const victoriaMetrics: Service = { port: 5678, }); } + for (let i = 1; i <= webhooks; i++) { + scrapeTargets.push({ + job: 'n8n-webhook', + instance: `n8n-webhook-${i}`, + host: `${projectName}-n8n-webhook-${i}`, + port: 5678, + }); + } for (let i = 1; i <= workers; i++) { scrapeTargets.push({ job: 'n8n-worker', diff --git a/packages/testing/containers/stack.ts b/packages/testing/containers/stack.ts index f08cf0fc89e..e1ab17e64f8 100644 --- a/packages/testing/containers/stack.ts +++ b/packages/testing/containers/stack.ts @@ -79,11 +79,13 @@ export async function createN8NStack(config: N8NConfig = {}): Promise const { mains = 1, workers = 0, + webhooks = 0, postgres: usePostgresConfig = false, env = {}, projectName, resourceQuota, workerResourceQuota, + webhookResourceQuota, services: enabledServices = [], external = false, networkName, @@ -91,8 +93,8 @@ export async function createN8NStack(config: N8NConfig = {}): Promise const log = createElapsedLogger('stack'); - const isQueueMode = mains > 1 || workers > 0; - const needsLoadBalancer = mains > 1; + const isQueueMode = mains > 1 || workers > 0 || webhooks > 0; + const needsLoadBalancer = mains > 1 || webhooks > 0; const usePostgres = usePostgresConfig || isQueueMode || enabledServices.includes('keycloak'); const uniqueProjectName = projectName ?? `n8n-stack-${Math.random().toString(36).substring(7)}`; @@ -131,6 +133,7 @@ export async function createN8NStack(config: N8NConfig = {}): Promise projectName: uniqueProjectName, mains, workers, + webhooks, isQueueMode, usePostgres, needsLoadBalancer, @@ -213,6 +216,7 @@ export async function createN8NStack(config: N8NConfig = {}): Promise const n8nResult = await createN8NInstances({ mains, workers, + webhooks, projectName: uniqueProjectName, network, serviceEnvironment: environment, @@ -222,6 +226,7 @@ export async function createN8NStack(config: N8NConfig = {}): Promise allocatedPort: needsLoadBalancer ? undefined : allocatedMainPort, resourceQuota, workerResourceQuota, + webhookResourceQuota, filesToMount, }); containers.push(...n8nResult.containers); @@ -229,7 +234,7 @@ export async function createN8NStack(config: N8NConfig = {}): Promise Math.round(performance.now() - n8nStartupStart), n8nResult.containers.length, ); - log(`n8n ready: ${mains} main(s), ${workers} worker(s)`); + log(`n8n ready: ${mains} main(s), ${webhooks} webhook(s), ${workers} worker(s)`); if (lbResult) { await pollContainerHttpEndpoint(lbResult.container, '/healthz/readiness'); diff --git a/packages/testing/playwright/.janitor-baseline.json b/packages/testing/playwright/.janitor-baseline.json index 09d98266351..24b3b4f7142 100644 --- a/packages/testing/playwright/.janitor-baseline.json +++ b/packages/testing/playwright/.janitor-baseline.json @@ -1,6 +1,6 @@ { "version": 1, - "generated": "2026-05-18T14:07:20.233Z", + "generated": "2026-05-25T15:20:35.251Z", "totalViolations": 412, "violations": { "pages/AIAssistantPage.ts": [ @@ -50,13 +50,13 @@ }, { "rule": "scope-lockdown", - "line": 99, + "line": 65, "message": "InstanceAiPage: Unscoped locator - use this.container instead of this.page", "hash": "713916b2be0f" }, { "rule": "scope-lockdown", - "line": 144, + "line": 140, "message": "InstanceAiPage: Unscoped locator - use this.container instead of this.page", "hash": "713916b2be0f" } @@ -2556,7 +2556,7 @@ "tests/infrastructure/benchmarks/kafka/queue-mode-sustained-rate.spec.ts": [ { "rule": "duplicate-logic", - "line": 17, + "line": 18, "message": "Duplicate test logic: \"Kafka trigger + 1 noop, 1KB payload, 250 msg/s × 240s (1 main + 1 worker)\" has identical structure to tests/infrastructure/benchmarks/kafka/burst-drain-capacity.spec.ts:\"Kafka trigger + 1 noop, 1KB payload, drain 100k preloaded backlog (1 main + 1 worker)\"", "hash": "388616eecbaa" } @@ -2564,7 +2564,7 @@ "tests/infrastructure/benchmarks/kafka/single-instance-ceiling.spec.ts": [ { "rule": "duplicate-logic", - "line": 17, + "line": 18, "message": "Duplicate test logic: \"Kafka trigger + 1 noop, 1KB payload, 150k msgs\" has identical structure to tests/infrastructure/benchmarks/kafka/burst-drain-capacity.spec.ts:\"Kafka trigger + 1 noop, 1KB payload, drain 100k preloaded backlog (1 main + 1 worker)\"", "hash": "1c5033214063" } @@ -2572,7 +2572,7 @@ "tests/infrastructure/benchmarks/kafka/steady-rate-breaking-point.spec.ts": [ { "rule": "duplicate-logic", - "line": 34, + "line": 35, "message": "Duplicate test logic: \"anonymous\" has identical structure to tests/infrastructure/benchmarks/kafka/burst-drain-capacity.spec.ts:\"Kafka trigger + 1 noop, 1KB payload, drain 100k preloaded backlog (1 main + 1 worker)\"", "hash": "5579d3e330e1" } @@ -2580,25 +2580,33 @@ "tests/infrastructure/benchmarks/kafka/output-size-impact.spec.ts": [ { "rule": "duplicate-logic", - "line": 25, + "line": 26, "message": "Duplicate test logic: \"anonymous\" has identical structure to tests/infrastructure/benchmarks/kafka/node-count-scaling.spec.ts:\"anonymous\"", "hash": "d676f700b46d" } ], - "tests/infrastructure/benchmarks/webhook/webhook-otel-overhead.spec.ts": [ + "tests/infrastructure/benchmarks/webhook/webhook-dedicated-proc-2wp-2w.spec.ts": [ { "rule": "duplicate-logic", - "line": 26, - "message": "Duplicate test logic: \"anonymous\" has identical structure to tests/infrastructure/benchmarks/webhook/webhook-main-scaling.spec.ts:\"anonymous\"", - "hash": "61c8c9b0f1ff" + "line": 44, + "message": "Duplicate test logic: \"anonymous\" has identical structure to tests/infrastructure/benchmarks/webhook/webhook-dedicated-proc-2wp-1w.spec.ts:\"anonymous\"", + "hash": "bc07737c363b" + } + ], + "tests/infrastructure/benchmarks/webhook/webhook-dedicated-proc-baseline.spec.ts": [ + { + "rule": "duplicate-logic", + "line": 38, + "message": "Duplicate test logic: \"anonymous\" has identical structure to tests/infrastructure/benchmarks/webhook/webhook-dedicated-proc-2wp-1w.spec.ts:\"anonymous\"", + "hash": "20c97ee82b2c" } ], "tests/infrastructure/benchmarks/webhook/webhook-single-instance.spec.ts": [ { "rule": "duplicate-logic", "line": 27, - "message": "Duplicate test logic: \"anonymous\" has identical structure to tests/infrastructure/benchmarks/webhook/webhook-queue-baseline.spec.ts:\"anonymous\"", - "hash": "51d2d4fe3fdb" + "message": "Duplicate test logic: \"anonymous\" has identical structure to tests/infrastructure/benchmarks/webhook/webhook-dedicated-proc-2wp-1w.spec.ts:\"anonymous\"", + "hash": "46d8dc14acc3" } ], "tests/infrastructure/benchmarks-local/instance-ai/thread-churn-delegation.spec.ts": [ @@ -2617,14 +2625,6 @@ "hash": "3ac5475c5814" } ], - "tests/performance/memory-consumption-instance-ai.spec.ts": [ - { - "rule": "duplicate-logic", - "line": 22, - "message": "Duplicate test logic: \"Idle baseline with Instance AI module loaded\" has identical structure to tests/performance/memory-consumption-agents.spec.ts:\"Idle baseline with Agents module loaded\"", - "hash": "ef1cde8770e1" - } - ], "tests/infrastructure/benchmarks-local/instance-ai/cancel-abort.spec.ts": [ { "rule": "no-direct-page-instantiation", diff --git a/packages/testing/playwright/playwright-projects.ts b/packages/testing/playwright/playwright-projects.ts index 8d9b23548f6..33632c94c45 100644 --- a/packages/testing/playwright/playwright-projects.ts +++ b/packages/testing/playwright/playwright-projects.ts @@ -50,6 +50,7 @@ const CONTAINER_CONFIGS: Array<{ name: string; config: N8NConfig }> = [ // postgres/kafka/redis/observability. export const BENCHMARK_MAIN_RESOURCES = { memory: 4, cpu: 2 }; export const BENCHMARK_WORKER_RESOURCES = { memory: 2, cpu: 1 }; +export const BENCHMARK_WEBHOOK_RESOURCES = { memory: 4, cpu: 2 }; export const OBSERVABILITY_SERVICES = ['victoriaLogs', 'victoriaMetrics', 'vector'] as const; @@ -67,9 +68,10 @@ const BENCHMARK_CONFIG: N8NConfig = { postgres: true, resourceQuota: BENCHMARK_MAIN_RESOURCES, workerResourceQuota: BENCHMARK_WORKER_RESOURCES, - // Distribute load across all mains. UI tests stick to the default `first` - // policy so debugging hits a single predictable backend. - lbPolicy: 'round_robin', + webhookResourceQuota: BENCHMARK_WEBHOOK_RESOURCES, + // `least_conn` avoids keep-alive affinity that skews round_robin 50/100% with + // autocannon's long-lived connections across 2+ procs. UI tests use `first`. + lbPolicy: 'least_conn', env: { N8N_LOG_LEVEL: 'error', N8N_DIAGNOSTICS_ENABLED: 'false', @@ -96,6 +98,8 @@ export interface BenchOptions { mains?: number; /** Number of worker pods. Default: 0 (direct mode). */ workers?: number; + /** Dedicated `n8n webhook` procs. Forces queue mode when > 0. */ + webhooks?: number; /** * Adds the `tracing` service (Jaeger + n8n-tracer) and turns on OTEL emission. * Adds ~5-10% per-request overhead — opt in only when measuring OTEL cost or @@ -118,8 +122,8 @@ export interface BenchOptions { * // Queue-mode kafka (1 main + 3 workers) * test.use({ capability: benchConfig('node-count-scaling', { kafka: true, workers: 3 }) }); * - * // Multi-main webhook - * test.use({ capability: benchConfig('webhook-main-scaling', { mains: 2, workers: 2 }) }); + * // Dedicated webhook proc + worker + * test.use({ capability: benchConfig('webhook-dedicated-proc', { webhooks: 1, workers: 1 }) }); */ export function benchConfig(isolation: string, opts: BenchOptions = {}): N8NConfig { const services = [...(BENCHMARK_CONFIG.services ?? [])]; @@ -144,6 +148,7 @@ export function benchConfig(isolation: string, opts: BenchOptions = {}): N8NConf services, ...(opts.mains !== undefined && { mains: opts.mains }), ...(opts.workers !== undefined && { workers: opts.workers }), + ...(opts.webhooks !== undefined && { webhooks: opts.webhooks }), env, }; } diff --git a/packages/testing/playwright/scripts/sizing-matrix-aggregate.ts b/packages/testing/playwright/scripts/sizing-matrix-aggregate.ts new file mode 100644 index 00000000000..8076b8d6871 --- /dev/null +++ b/packages/testing/playwright/scripts/sizing-matrix-aggregate.ts @@ -0,0 +1,414 @@ +/** + * Aggregates N `run-report.json` files into a `sizing-matrix.json`. + * + * npx tsx packages/testing/playwright/scripts/sizing-matrix-aggregate.ts \ + * --input [--out ] [--mapping ] + * + * CURRENTS_API_KEY=… ... --currents-run + * + * Hardware defaults to the Blacksmith CI runner (8 vCPU / 16 GB). Override + * via `--hardware-runner/--hardware-vcpu/--hardware-ram-gb` or + * `SIZING_MATRIX_RUNNER/VCPU/RAM_GB` env when running off-CI, or the matrix + * will mis-attribute the source. + */ + +import { readdirSync, readFileSync, writeFileSync, statSync, mkdirSync } from 'node:fs'; +import { tmpdir } from 'node:os'; +import { dirname, join, resolve } from 'node:path'; + +const CURRENTS_API = 'https://api.currents.dev/v1'; + +import type { RunReport } from '../utils/benchmark/run-report'; +import { + aggregate, + renderMarkdown, + type AggregateInput, + type HardwareInfo, + type SpecMapping, + type Topology, +} from '../utils/benchmark/sizing-matrix'; + +const DEFAULT_HARDWARE: HardwareInfo = { + runner: 'blacksmith-8vcpu-ubuntu-2204', + vcpu: 8, + ramGb: 16, +}; + +const S0_SINGLE_MAIN: Topology = { + mains: 1, + webhookProcs: 0, + workers: 0, + mainVcpu: 2, + mainRamGb: 4, + pgVcpu: 2, + pgRamGb: 4, + redisVcpu: 1, + redisRamGb: 1, +}; + +const S1_QUEUE_BASELINE: Topology = { + ...S0_SINGLE_MAIN, + workers: 1, + workerVcpu: 2, + workerRamGb: 4, +}; + +const S1_DEDICATED_PROC_BASELINE: Topology = { + ...S1_QUEUE_BASELINE, + webhookProcs: 1, +}; + +const S2_DEDICATED_PROC_2WP_1W: Topology = { + ...S1_DEDICATED_PROC_BASELINE, + webhookProcs: 2, +}; + +const S2_DEDICATED_PROC_2WP_2W: Topology = { + ...S2_DEDICATED_PROC_2WP_1W, + workers: 2, +}; + +// Webhook and kafka triggers collapse into the same cell — shape is workload +// archetype, not ingress protocol. +const DEFAULT_MAPPING: SpecMapping = { + 'webhook/webhook-single-instance.spec.ts': { + scale: 'S0', + shape: 'L', + topology: S0_SINGLE_MAIN, + }, + 'webhook/webhook-dedicated-proc-baseline.spec.ts': { + scale: 'S1', + shape: 'L', + topology: S1_DEDICATED_PROC_BASELINE, + }, + 'webhook/webhook-dedicated-proc-2wp-1w.spec.ts': { + scale: 'S2', + shape: 'L', + topology: S2_DEDICATED_PROC_2WP_1W, + }, + 'webhook/webhook-dedicated-proc-2wp-2w.spec.ts': { + scale: 'S2', + shape: 'L', + topology: S2_DEDICATED_PROC_2WP_2W, + }, + 'webhook/webhook-save-data-overhead.spec.ts': { + scale: 'S1', + shape: 'D', + topology: S1_DEDICATED_PROC_BASELINE, + }, + // `webhook-sync-latency-floor` is deliberately unmapped — measures latency at + // fixed concurrency, not throughput, and distorts the S1-L distribution. + 'kafka/single-instance-ceiling.spec.ts': { + scale: 'S0', + shape: 'L', + topology: S0_SINGLE_MAIN, + }, + 'kafka/queue-mode-sustained-rate.spec.ts': { + scale: 'S1', + shape: 'L', + topology: S1_QUEUE_BASELINE, + }, + 'kafka/burst-drain-capacity.spec.ts': { + scale: 'S1', + shape: 'L', + topology: S1_QUEUE_BASELINE, + }, + 'kafka/node-count-scaling.spec.ts': { + scale: 'S1', + shape: 'X', + topology: S1_QUEUE_BASELINE, + }, + 'kafka/output-size-impact.spec.ts': { + scale: 'S1', + shape: 'D', + topology: S1_QUEUE_BASELINE, + }, + 'kafka/steady-rate-breaking-point.spec.ts': { + scale: 'S0', + shape: 'X', + topology: S0_SINGLE_MAIN, + }, +}; + +interface CliArgs { + input?: string; + currentsRun?: string; + mapping?: string; + out: string; + markdownOut?: string; + n8nVersion: string; + commitSha: string; + hardware: HardwareInfo; +} + +function parseArgs(argv: string[]): CliArgs { + const args: Record = {}; + for (let i = 0; i < argv.length; i++) { + const token = argv[i]; + if (!token?.startsWith('--')) continue; + const key = token.slice(2); + const value = argv[i + 1]; + if (value && !value.startsWith('--')) { + args[key] = value; + i++; + } else { + args[key] = 'true'; + } + } + if (!args.input && !args['currents-run']) { + throw new Error('Either --input or --currents-run is required'); + } + return { + input: args.input ? resolve(args.input) : undefined, + currentsRun: args['currents-run'], + mapping: args.mapping ? resolve(args.mapping) : undefined, + out: resolve(args.out ?? 'sizing-matrix.json'), + markdownOut: args['markdown-out'] ? resolve(args['markdown-out']) : undefined, + n8nVersion: args['n8n-version'] ?? readN8nVersion(), + commitSha: args['commit-sha'] ?? readGitSha(), + hardware: resolveHardware(args), + }; +} + +function resolveHardware(args: Record): HardwareInfo { + const runner = + args['hardware-runner'] ?? process.env.SIZING_MATRIX_RUNNER ?? DEFAULT_HARDWARE.runner; + const vcpu = args['hardware-vcpu'] + ? Number(args['hardware-vcpu']) + : process.env.SIZING_MATRIX_VCPU + ? Number(process.env.SIZING_MATRIX_VCPU) + : DEFAULT_HARDWARE.vcpu; + const ramGb = args['hardware-ram-gb'] + ? Number(args['hardware-ram-gb']) + : process.env.SIZING_MATRIX_RAM_GB + ? Number(process.env.SIZING_MATRIX_RAM_GB) + : DEFAULT_HARDWARE.ramGb; + if (!Number.isFinite(vcpu) || vcpu <= 0) { + throw new Error(`--hardware-vcpu must be a positive number, got ${args['hardware-vcpu']}`); + } + if (!Number.isFinite(ramGb) || ramGb <= 0) { + throw new Error(`--hardware-ram-gb must be a positive number, got ${args['hardware-ram-gb']}`); + } + return { runner, vcpu, ramGb }; +} + +function readN8nVersion(): string { + try { + const pkg = JSON.parse( + readFileSync(join(process.cwd(), 'packages/cli/package.json'), 'utf8'), + ) as { version?: string }; + return pkg.version ?? 'unknown'; + } catch { + return 'unknown'; + } +} + +function readGitSha(): string { + try { + const head = readFileSync(join(process.cwd(), '.git/HEAD'), 'utf8').trim(); + if (head.startsWith('ref: ')) { + const refPath = head.slice(5); + return readFileSync(join(process.cwd(), '.git', refPath), 'utf8') + .trim() + .slice(0, 12); + } + return head.slice(0, 12); + } catch { + return 'unknown'; + } +} + +function loadMapping(path?: string): SpecMapping { + if (!path) return DEFAULT_MAPPING; + const overrides = JSON.parse(readFileSync(path, 'utf8')) as SpecMapping; + return { ...DEFAULT_MAPPING, ...overrides }; +} + +interface CurrentsAttachment { + name: string; + readUrl: string; + contentType: string; +} + +interface CurrentsRunSpec { + instanceId: string; + spec: string; +} + +interface CurrentsRunDetails { + commit?: { sha?: string }; + specs?: CurrentsRunSpec[]; +} + +interface CurrentsInstanceDetails { + spec?: string; + commit?: { sha?: string }; + results?: { attachments?: CurrentsAttachment[] }; +} + +async function currentsGet(apiKey: string, path: string): Promise { + const res = await fetch(`${CURRENTS_API}${path}`, { + headers: { Authorization: `Bearer ${apiKey}` }, + }); + if (!res.ok) { + throw new Error(`Currents API ${path} failed: ${res.status} ${res.statusText}`); + } + const json = (await res.json()) as { data?: T }; + if (!json.data) throw new Error(`Currents API ${path} returned no data field`); + return json.data; +} + +async function fetchCurrentsRun( + runId: string, + cacheDir: string, +): Promise<{ reportPaths: string[]; commitSha?: string }> { + const apiKey = process.env.CURRENTS_API_KEY; + if (!apiKey) throw new Error('CURRENTS_API_KEY env var required for --currents-run'); + + console.log(`[sizing-matrix] Fetching Currents run ${runId}…`); + const run = await currentsGet(apiKey, `/runs/${runId}`); + const specs = run.specs ?? []; + if (specs.length === 0) { + throw new Error(`Currents run ${runId} has no specs.`); + } + console.log(`[sizing-matrix] Run contains ${specs.length} spec instances.`); + mkdirSync(cacheDir, { recursive: true }); + + const reportPaths: string[] = []; + for (const spec of specs) { + const instance = await currentsGet( + apiKey, + `/instances/${spec.instanceId}`, + ); + const attachment = instance.results?.attachments?.find((a) => a.name === 'run-report.json'); + if (!attachment) { + console.warn( + `[sizing-matrix] Spec ${spec.spec} (instance ${spec.instanceId}) has no run-report.json attachment — skipping.`, + ); + continue; + } + const filename = `${sanitiseFilename(spec.spec)}.${spec.instanceId}.json`; + const outPath = join(cacheDir, filename); + await downloadTo(attachment.readUrl, outPath); + reportPaths.push(outPath); + } + console.log( + `[sizing-matrix] Downloaded ${reportPaths.length} run-report.json file(s) → ${cacheDir}`, + ); + return { reportPaths, commitSha: run.commit?.sha?.slice(0, 12) }; +} + +async function downloadTo(url: string, outPath: string): Promise { + const res = await fetch(url); + if (!res.ok) throw new Error(`Download ${url} failed: ${res.status}`); + const buf = Buffer.from(await res.arrayBuffer()); + writeFileSync(outPath, buf); +} + +function sanitiseFilename(spec: string): string { + return spec.replace(/[^a-z0-9._-]+/gi, '-').slice(0, 120); +} + +function findRunReports(root: string): string[] { + const found: string[] = []; + const stack = [root]; + while (stack.length) { + const current = stack.pop(); + if (!current) continue; + const stat = statSync(current); + if (stat.isFile()) { + if (current.endsWith('.json') && current.includes('run-report')) found.push(current); + continue; + } + if (!stat.isDirectory()) continue; + for (const entry of readdirSync(current)) { + stack.push(join(current, entry)); + } + } + return found.sort(); +} + +function loadReport(path: string): { path: string; report: RunReport } | undefined { + try { + const report = JSON.parse(readFileSync(path, 'utf8')) as RunReport; + if (report.schemaVersion !== 1) { + console.warn(`[sizing-matrix] Skipping ${path}: schemaVersion ${report.schemaVersion} != 1`); + return undefined; + } + return { path, report }; + } catch (error) { + console.warn(`[sizing-matrix] Failed to parse ${path}: ${(error as Error).message}`); + return undefined; + } +} + +async function main(): Promise { + const args = parseArgs(process.argv.slice(2)); + const mapping = loadMapping(args.mapping); + + let reportPaths: string[]; + let commitSha = args.commitSha; + if (args.currentsRun) { + const cacheDir = join(tmpdir(), `sizing-aggregator-cache`, args.currentsRun); + const fetched = await fetchCurrentsRun(args.currentsRun, cacheDir); + reportPaths = fetched.reportPaths; + if (fetched.commitSha) commitSha = fetched.commitSha; + } else if (args.input) { + reportPaths = findRunReports(args.input); + } else { + throw new Error('Either --input or --currents-run is required'); + } + + const reports = reportPaths + .map(loadReport) + .filter((r): r is { path: string; report: RunReport } => r !== undefined); + + if (reports.length === 0) { + throw new Error(`No valid run-report.json files found`); + } + console.log(`[sizing-matrix] Aggregating ${reports.length} run-report.json files…`); + + const input: AggregateInput = { + reports, + mapping, + hardware: args.hardware, + n8nVersion: args.n8nVersion, + commitSha, + runDate: new Date().toISOString(), + }; + const matrix = aggregate(input); + + mkdirSync(dirname(args.out), { recursive: true }); + writeFileSync(args.out, JSON.stringify(matrix, null, 2)); + console.log(`[sizing-matrix] Wrote ${matrix.cells.length} cell(s) → ${args.out}`); + + if (args.markdownOut) { + mkdirSync(dirname(args.markdownOut), { recursive: true }); + writeFileSync(args.markdownOut, renderMarkdown(matrix)); + console.log(`[sizing-matrix] Rendered markdown guide → ${args.markdownOut}`); + } + + for (const cell of matrix.cells) { + const shapeKeys = Object.keys(cell.shapes); + console.log(` ${cell.scale}: shapes [${shapeKeys.join(', ')}]`); + for (const shape of shapeKeys) { + const r = cell.shapes[shape as keyof typeof cell.shapes]; + if (!r) continue; + console.log( + ` ${shape}: ceiling=${r.ceilingExecPerSec.p50.toFixed(1)}/s ` + + `green=${r.greenSustainedExecPerSec.toFixed(1)}/s ` + + `p99=${r.latency.p99.toFixed(0)}ms ` + + `workloadIops=${r.io.workloadIopsPerSec.toFixed(0)} ` + + `overhead=${r.io.overheadFactor.toFixed(2)}x ` + + `bottleneck=${r.bottleneck} ` + + `verdict=${r.verdict} ` + + `(n=${r.ceilingExecPerSec.n})`, + ); + } + } +} + +main().catch((error) => { + console.error(`[sizing-matrix] ${(error as Error).message}`); + process.exit(1); +}); diff --git a/packages/testing/playwright/tests/infrastructure/benchmarks/README.md b/packages/testing/playwright/tests/infrastructure/benchmarks/README.md index da73bfed012..6915e5a48d3 100644 --- a/packages/testing/playwright/tests/infrastructure/benchmarks/README.md +++ b/packages/testing/playwright/tests/infrastructure/benchmarks/README.md @@ -18,24 +18,26 @@ The architectural ceiling. No queue tax, no worker dispatch. What's the absolute | kafka | `steady-rate-breaking-point.spec.ts` | At what input rate does the system fall behind? | | webhook | `webhook-single-instance.spec.ts` | What is the single-instance webhook ingestion ceiling? | -### Actual — `1m + 1w queue mode` +### Actual — `1m + 1wp + 1w queue mode` -The real-world minimum HA topology. What does a basic production setup actually deliver? +The production-canonical queue-mode topology: dedicated `n8n webhook` proc fronted by Caddy path-routing, with one worker draining the queue. What does a real production setup actually deliver? | Trigger | Spec | Question | |---------|------|----------| +| webhook | `webhook-dedicated-proc-baseline.spec.ts` | What is the webhook ingestion ceiling with a dedicated webhook proc? | | kafka | `queue-mode-sustained-rate.spec.ts` | Can queue mode sustain 250 msg/s steady? | | kafka | `burst-drain-capacity.spec.ts` | How fast can we drain a backlog? | | kafka | `node-count-scaling.spec.ts` | How does throughput scale with workflow complexity? | | kafka | `output-size-impact.spec.ts` | What is the impact of node output size on throughput? | -### Scaling — `2m + 2w queue mode` +### Scaling — proc-axis and worker-axis at production topology -HA distribution check. Does doubling capacity ~double the actual baseline? +How does the production topology scale when you add a webhook proc, a worker, or both? -| Trigger | Spec | Question | -|---------|------|----------| -| webhook | `webhook-main-scaling.spec.ts` | Does webhook ingestion scale linearly with main count? | +| Trigger | Spec | Topology | Question | +|---------|------|----------|----------| +| webhook | `webhook-dedicated-proc-2wp-1w.spec.ts` | 1m + 2wp + 1w | Does doubling webhook procs (workers fixed) increase ingestion throughput? | +| webhook | `webhook-dedicated-proc-2wp-2w.spec.ts` | 1m + 2wp + 2w | What is the joint scale-up of doubling both webhook procs and workers? | ### Cost — feature toggles on the actual baseline @@ -46,16 +48,18 @@ What does turning on configuration X cost vs the baseline? | webhook | `webhook-otel-overhead.spec.ts` | What is the runtime cost of enabling OTEL? | | webhook | `webhook-save-data-overhead.spec.ts` | What is the runtime cost of saving execution data on success? | -Cost specs run the same workload as a baseline spec with one config knob flipped. Compare the `exec/s`/`p50` of a Cost spec against its baseline from the same CI run to read the cost. OTEL specs also attach `jaeger-traces.json` as a test artifact — replay locally for flamegraph inspection. +Cost specs run the same workload as the `Actual` baseline with one config knob flipped. Compare the `exec/s`/`p50` of a Cost spec against `webhook-dedicated-proc-baseline` from the same CI run to read the cost. OTEL specs also attach `jaeger-traces.json` as a test artifact — replay locally for flamegraph inspection. ## Standard topology -| Tier | Mains | Workers | Per-pod resources | -|------|-------|---------|-------------------| -| **Peak** | 1 | 0 | 4GB / 2 vCPU | -| **Actual** | 1 | 1 | main 4GB/2 vCPU, worker 2GB/1 vCPU | -| **Scaling** | 2 | 2 | main 4GB/2 vCPU, worker 2GB/1 vCPU | -| **Cost** | matches the baseline | matches the baseline | matches the baseline | +| Tier | Mains | Webhook procs | Workers | Per-pod resources | +|------|-------|---------------|---------|-------------------| +| **Peak** | 1 | 0 | 0 | 4GB / 2 vCPU | +| **Actual** | 1 | 0–1 | 1 | main 4GB/2 vCPU, webhook 4GB/2 vCPU, worker 2GB/1 vCPU | +| **Scaling** | 1 | 2 | 1–2 | main 4GB/2 vCPU, webhook 4GB/2 vCPU, worker 2GB/1 vCPU | +| **Cost** | matches the baseline | matches the baseline | matches the baseline | matches the baseline | + +Webhook-trigger specs in **Actual** and **Scaling** use the production-canonical topology (dedicated `n8n webhook` proc fronted by Caddy path-routing). Kafka-trigger specs in **Actual** use 1m + 1w queue mode (kafka doesn't ingress via HTTP — no dedicated webhook proc applicable). All specs share a single env profile aligned with internal n8n production defaults — connection-pool, lock-duration, and Bull/Redis tuning from real deployments. See `BENCHMARK_CONFIG` in `playwright-projects.ts`. @@ -65,7 +69,7 @@ All specs share a single env profile aligned with internal n8n production defaul # Build n8n image first (skip if you only changed test code). pnpm build:docker -# Full suite — all 9 specs sequentially (each spawns its own container). +# Full suite — all 14 specs sequentially (each spawns its own container). pnpm --filter=n8n-playwright test:benchmark # One spec. diff --git a/packages/testing/playwright/tests/infrastructure/benchmarks/webhook/webhook-queue-baseline.spec.ts b/packages/testing/playwright/tests/infrastructure/benchmarks/webhook/webhook-dedicated-proc-2wp-1w.spec.ts similarity index 55% rename from packages/testing/playwright/tests/infrastructure/benchmarks/webhook/webhook-queue-baseline.spec.ts rename to packages/testing/playwright/tests/infrastructure/benchmarks/webhook/webhook-dedicated-proc-2wp-1w.spec.ts index cb7416cd16b..5ec218fbe79 100644 --- a/packages/testing/playwright/tests/infrastructure/benchmarks/webhook/webhook-queue-baseline.spec.ts +++ b/packages/testing/playwright/tests/infrastructure/benchmarks/webhook/webhook-dedicated-proc-2wp-1w.spec.ts @@ -3,33 +3,34 @@ import { benchConfig } from '../../../../playwright-projects'; import { setupWebhook } from '../../../../utils/benchmark/webhook-driver'; import { runWebhookThroughputTest } from '../harness/webhook-throughput-harness'; -// Queue-mode baseline at 1 main + 1 worker. Pair with `webhook-main-scaling` -// (2 mains + 2 workers) to read the multi-main scaling factor isolated from -// the direct→queue architecture change. Without this baseline, comparing -// `webhook-single-instance` (1m, 0w direct) to `webhook-main-scaling` would -// conflate "adding mains" with "switching to queue mode". -// -// Parameters intentionally match `webhook-main-scaling.spec.ts` (200 × 180s) -// so the comparison is apples-to-apples. +// Workers fixed, webhook procs doubled — isolates the ingestion-axis ceiling +// vs the baseline. const MAINS = 1; +const WEBHOOKS = 2; const WORKERS = 1; const CONNECTIONS = 200; const DURATION_SECONDS = 180; -test.use({ capability: benchConfig('webhook-queue-baseline', { mains: MAINS, workers: WORKERS }) }); +test.use({ + capability: benchConfig('webhook-dedicated-proc-2wp-1w', { + mains: MAINS, + webhooks: WEBHOOKS, + workers: WORKERS, + }), +}); test.describe( - 'What is the webhook ingestion ceiling in queue mode at 1 main + 1 worker?', + 'Does doubling webhook procs (workers fixed) increase ingestion throughput?', { tag: '@bench:webhook', annotation: [ { type: 'owner', description: 'Catalysts' }, - { type: 'question', description: 'webhook-queue-baseline' }, + { type: 'question', description: 'webhook-dedicated-proc-2wp-1w' }, ], }, () => { - test(`Async webhook + 1 noop, 1KB payload, ${CONNECTIONS} connections × ${DURATION_SECONDS}s (${MAINS} main + ${WORKERS} worker)`, async ({ + test(`Async webhook + 1 noop, 1KB payload, ${CONNECTIONS} connections × ${DURATION_SECONDS}s (${MAINS} main + ${WEBHOOKS} webhook + ${WORKERS} worker)`, async ({ api, services, backendUrl, diff --git a/packages/testing/playwright/tests/infrastructure/benchmarks/webhook/webhook-dedicated-proc-2wp-2w.spec.ts b/packages/testing/playwright/tests/infrastructure/benchmarks/webhook/webhook-dedicated-proc-2wp-2w.spec.ts new file mode 100644 index 00000000000..026b53ecef6 --- /dev/null +++ b/packages/testing/playwright/tests/infrastructure/benchmarks/webhook/webhook-dedicated-proc-2wp-2w.spec.ts @@ -0,0 +1,58 @@ +import { test } from '../../../../fixtures/base'; +import { benchConfig } from '../../../../playwright-projects'; +import { setupWebhook } from '../../../../utils/benchmark/webhook-driver'; +import { runWebhookThroughputTest } from '../harness/webhook-throughput-harness'; + +// Both knobs doubled. With baseline and 2wp-1w, factor marginal worker gain +// (E3 − E2) and marginal proc gain (E2 − E1). + +const MAINS = 1; +const WEBHOOKS = 2; +const WORKERS = 2; +const CONNECTIONS = 200; +const DURATION_SECONDS = 180; + +test.use({ + capability: benchConfig('webhook-dedicated-proc-2wp-2w', { + mains: MAINS, + webhooks: WEBHOOKS, + workers: WORKERS, + }), +}); + +test.describe( + 'What is the joint scale-up of doubling both webhook procs and workers?', + { + tag: '@bench:webhook', + annotation: [ + { type: 'owner', description: 'Catalysts' }, + { type: 'question', description: 'webhook-dedicated-proc-2wp-2w' }, + ], + }, + () => { + test(`Async webhook + 1 noop, 1KB payload, ${CONNECTIONS} connections × ${DURATION_SECONDS}s (${MAINS} main + ${WEBHOOKS} webhook + ${WORKERS} worker)`, async ({ + api, + services, + backendUrl, + }, testInfo) => { + const handle = setupWebhook({ + scenario: { + nodeCount: 1, + payloadSize: '1KB', + nodeOutputSize: 'noop', + responseMode: 'onReceived', + }, + }); + await runWebhookThroughputTest({ + handle, + api, + services, + testInfo, + baseUrl: backendUrl, + connections: CONNECTIONS, + durationSeconds: DURATION_SECONDS, + timeoutMs: (DURATION_SECONDS + 60) * 1000, + }); + }); + }, +); diff --git a/packages/testing/playwright/tests/infrastructure/benchmarks/webhook/webhook-dedicated-proc-baseline.spec.ts b/packages/testing/playwright/tests/infrastructure/benchmarks/webhook/webhook-dedicated-proc-baseline.spec.ts new file mode 100644 index 00000000000..69490ea1827 --- /dev/null +++ b/packages/testing/playwright/tests/infrastructure/benchmarks/webhook/webhook-dedicated-proc-baseline.spec.ts @@ -0,0 +1,58 @@ +import { test } from '../../../../fixtures/base'; +import { benchConfig } from '../../../../playwright-projects'; +import { setupWebhook } from '../../../../utils/benchmark/webhook-driver'; +import { runWebhookThroughputTest } from '../harness/webhook-throughput-harness'; + +// Production-canonical queue-mode baseline: 1m + 1wp + 1w. Pair with the +// 2wp-1w and 2wp-2w specs to factor proc-axis and worker-axis scaling. + +const MAINS = 1; +const WEBHOOKS = 1; +const WORKERS = 1; +const CONNECTIONS = 200; +const DURATION_SECONDS = 180; + +test.use({ + capability: benchConfig('webhook-dedicated-proc-baseline', { + mains: MAINS, + webhooks: WEBHOOKS, + workers: WORKERS, + }), +}); + +test.describe( + 'What is the webhook ingestion ceiling with a dedicated webhook proc?', + { + tag: '@bench:webhook', + annotation: [ + { type: 'owner', description: 'Catalysts' }, + { type: 'question', description: 'webhook-dedicated-proc-baseline' }, + ], + }, + () => { + test(`Async webhook + 1 noop, 1KB payload, ${CONNECTIONS} connections × ${DURATION_SECONDS}s (${MAINS} main + ${WEBHOOKS} webhook + ${WORKERS} worker)`, async ({ + api, + services, + backendUrl, + }, testInfo) => { + const handle = setupWebhook({ + scenario: { + nodeCount: 1, + payloadSize: '1KB', + nodeOutputSize: 'noop', + responseMode: 'onReceived', + }, + }); + await runWebhookThroughputTest({ + handle, + api, + services, + testInfo, + baseUrl: backendUrl, + connections: CONNECTIONS, + durationSeconds: DURATION_SECONDS, + timeoutMs: (DURATION_SECONDS + 60) * 1000, + }); + }); + }, +); diff --git a/packages/testing/playwright/tests/infrastructure/benchmarks/webhook/webhook-main-scaling.spec.ts b/packages/testing/playwright/tests/infrastructure/benchmarks/webhook/webhook-main-scaling.spec.ts deleted file mode 100644 index be062db2fd2..00000000000 --- a/packages/testing/playwright/tests/infrastructure/benchmarks/webhook/webhook-main-scaling.spec.ts +++ /dev/null @@ -1,70 +0,0 @@ -import { test } from '../../../../fixtures/base'; -import { benchConfig } from '../../../../playwright-projects'; -import { setupWebhook } from '../../../../utils/benchmark/webhook-driver'; -import { runWebhookThroughputTest } from '../harness/webhook-throughput-harness'; - -// 2 mains + 2 workers fits within the CI runner's CPU budget while still -// exercising multi-main HA. Pair with `webhook-queue-baseline.spec.ts` -// (1m + 1w queue mode) — NOT `webhook-single-instance.spec.ts` (1m + 0w -// direct mode) — to read the main-scaling factor cleanly. Comparing against -// the direct-mode spec would conflate "adding a main" with "switching from -// direct to queue execution" since the architectures differ. -// -// Multi-main HA requires N8N_MULTI_MAIN_SETUP_ENABLED and a license cert -// (picked up from N8N_LICENSE_ACTIVATION_KEY / N8N_LICENSE_CERT in the host -// env). `benchConfig()` enables the env var automatically when `mains > 1`. - -const MAINS = 2; -const WORKERS = 2; -const CONNECTIONS = 200; -const DURATION_SECONDS = 180; - -test.use({ capability: benchConfig('webhook-main-scaling', { mains: MAINS, workers: WORKERS }) }); - -test.describe( - 'Does webhook ingestion scale linearly with main count?', - { - tag: '@bench:webhook', - annotation: [ - { type: 'owner', description: 'Catalysts' }, - { type: 'question', description: 'webhook-main-scaling' }, - ], - }, - () => { - // Async webhook so HTTP req/s = ingestion ACK rate (independent of worker drain). - // Baseline (1m + 1w queue mode): see webhook-queue-baseline.spec.ts. - // At N mains, holding worker count proportional: should be ~N× baseline if - // load distributes evenly. Sub-linear means LB or shared resource - // contention; super-linear is impossible. - test(`Async webhook + 1 noop, 1KB payload, ${CONNECTIONS} connections × ${DURATION_SECONDS}s (${MAINS} mains + ${WORKERS} workers)`, async ({ - api, - services, - backendUrl, - }, testInfo) => { - const handle = setupWebhook({ - scenario: { - nodeCount: 1, - payloadSize: '1KB', - nodeOutputSize: 'noop', - responseMode: 'onReceived', - }, - }); - await runWebhookThroughputTest({ - handle, - api, - services, - testInfo, - baseUrl: backendUrl, - connections: CONNECTIONS, - durationSeconds: DURATION_SECONDS, - timeoutMs: (DURATION_SECONDS + 60) * 1000, - }); - - console.log( - `\n[MAIN SCALING] mains=${MAINS}, workers=${WORKERS} | Compare HTTP req/s vs webhook-queue-baseline (1m+1w).\n` + - ' Linear scaling = ingestion req/s grows ~N× with mains.\n' + - ' Sub-linear = load balancer / shared resource bottleneck.', - ); - }); - }, -); diff --git a/packages/testing/playwright/tests/infrastructure/benchmarks/webhook/webhook-save-data-overhead.spec.ts b/packages/testing/playwright/tests/infrastructure/benchmarks/webhook/webhook-save-data-overhead.spec.ts index 5d3cb7be44a..8af40285d70 100644 --- a/packages/testing/playwright/tests/infrastructure/benchmarks/webhook/webhook-save-data-overhead.spec.ts +++ b/packages/testing/playwright/tests/infrastructure/benchmarks/webhook/webhook-save-data-overhead.spec.ts @@ -4,18 +4,20 @@ import { setupWebhook } from '../../../../utils/benchmark/webhook-driver'; import { runWebhookThroughputTest } from '../harness/webhook-throughput-harness'; const MAINS = 1; +const WEBHOOKS = 1; const WORKERS = 1; const CONNECTIONS = 200; const DURATION_SECONDS = 180; -// Same scenario as `webhook-queue-baseline.spec.ts` but with execution data -// persisted on success. Compare this run's `exec/s` and `p50` against the -// queue-baseline from the same CI run to read the cost of saving execution -// data — the typical production default the rest of the bench suite suppresses -// for clean ceiling numbers. +// Same scenario as `webhook-dedicated-proc-baseline.spec.ts` but with execution +// data persisted on success. Compare this run's `exec/s` and `p50` against the +// dedicated-proc baseline from the same CI run to read the cost of saving +// execution data — the typical production default the rest of the bench suite +// suppresses for clean ceiling numbers. test.use({ capability: benchConfig('webhook-save-data-overhead', { mains: MAINS, + webhooks: WEBHOOKS, workers: WORKERS, env: { EXECUTIONS_DATA_SAVE_ON_SUCCESS: 'all' }, }), @@ -31,7 +33,7 @@ test.describe( ], }, () => { - test(`Async webhook + 1 noop, 1KB payload, ${CONNECTIONS} connections × ${DURATION_SECONDS}s (${MAINS} main + ${WORKERS} worker, save-on-success)`, async ({ + test(`Async webhook + 1 noop, 1KB payload, ${CONNECTIONS} connections × ${DURATION_SECONDS}s (${MAINS} main + ${WEBHOOKS} webhook + ${WORKERS} worker, save-on-success)`, async ({ api, services, backendUrl, @@ -56,7 +58,7 @@ test.describe( console.log( "\n[SAVE-DATA OVERHEAD] Compare this spec's exec/s and p50 against " + - 'webhook-queue-baseline from the same run for the save-execution-data cost delta.\n', + 'webhook-dedicated-proc-baseline from the same run for the save-execution-data cost delta.\n', ); }); }, diff --git a/packages/testing/playwright/tests/infrastructure/benchmarks/webhook/webhook-single-instance.spec.ts b/packages/testing/playwright/tests/infrastructure/benchmarks/webhook/webhook-single-instance.spec.ts index 1f2379b64c5..98343a84951 100644 --- a/packages/testing/playwright/tests/infrastructure/benchmarks/webhook/webhook-single-instance.spec.ts +++ b/packages/testing/playwright/tests/infrastructure/benchmarks/webhook/webhook-single-instance.spec.ts @@ -9,10 +9,9 @@ const DURATION_SECONDS = 120; // Direct mode: no Bull, no workers. Webhook receives → workflow runs inline on // the same Node.js process → respond. Async (`onReceived`) returns the 200 // before execution completes; the workflow runs as a detached promise on the -// same event loop. This is the canonical single-instance direct-mode ceiling. -// For multi-main scaling comparisons, use `webhook-queue-baseline.spec.ts` -// (1m + 1w queue mode) — comparing direct mode to queue mode mixes -// architecture changes with main-count scaling. +// same event loop. This is the canonical single-instance direct-mode ceiling +// — the community-edition / single-container deployment shape. For queue-mode +// shapes, see the `webhook-dedicated-proc-*` specs. test.use({ capability: benchConfig('webhook-single-instance') }); test.describe( diff --git a/packages/testing/playwright/utils/benchmark/sizing-matrix.ts b/packages/testing/playwright/utils/benchmark/sizing-matrix.ts new file mode 100644 index 00000000000..9bf749d891a --- /dev/null +++ b/packages/testing/playwright/utils/benchmark/sizing-matrix.ts @@ -0,0 +1,622 @@ +/** + * Aggregated sizing-matrix artifact emitted from N `run-report.json` files. + * + * Schema lock: DEVP-185. The matrix is two-axis — scale (S0–S4 throughput + * slices) × shape (L/D/A/X workload archetypes). Each cell carries a + * topology (output, not input) and a per-shape result that collapses ≥3 + * cold runs into `{p50, p95, max, n}`. v0 tolerates `n=1` cells and marks + * verdict='amber' so the renderer can flag low-confidence data. + */ + +import type { RunReport, ServiceMetrics } from './run-report'; + +export type Scale = 'S0' | 'S1' | 'S2' | 'S3' | 'S4'; +export type Shape = 'L' | 'D' | 'A' | 'X'; +export type Verdict = 'green' | 'amber' | 'red'; +export type Bottleneck = 'main-cpu' | 'pg-cpu' | 'worker-cpu' | 'queue' | 'network'; + +export interface Topology { + mains: number; + webhookProcs: number; + workers: number; + mainVcpu: number; + mainRamGb: number; + workerVcpu?: number; + workerRamGb?: number; + pgVcpu: number; + pgRamGb: number; + pgIops?: number; + redisVcpu: number; + redisRamGb: number; +} + +export interface HardwareInfo { + runner: string; + vcpu: number; + ramGb: number; +} + +export interface SourceRun { + runReportPath: string; + commitSha: string; + spec: string; +} + +export interface PercentileSummary { + p50: number; + p95: number; + max: number; + n: number; +} + +export interface ShapeResult { + ceilingExecPerSec: PercentileSummary; + /** Steady-state ceiling — closer to the architectural limit than total exec/sec. */ + tailExecPerSec?: PercentileSummary; + reqPerSec?: PercentileSummary; + latency: { p50: number; p975: number; p99: number }; + headroomAtCeiling: { + mainCpuPct: number; + workerCpuPct?: number; + pgCpuPct: number; + pgCpuPctPeak: number; + pgBufferHit: number; + eventLoopLagMs: number; + }; + io: { + /** `pg_stat_io` `client backend` (reads+writes+extends) / sec. */ + workloadIopsPerSec: number; + /** Sum of bgwriter + checkpointer + walwriter + autovacuum + standalone, per sec. */ + overheadIopsPerSec: number; + /** Ratio total / workload. ~1.0 means workload-dominated; >1.5 means overhead-heavy. */ + overheadFactor: number; + walMbPerSec: number; + walRecordsPerSec: number; + }; + bottleneck: Bottleneck; + verdict: Verdict; + /** Derived: throughput at which all headroom signals would stay green. */ + greenSustainedExecPerSec: number; + sourceRuns: SourceRun[]; +} + +export interface SizingCell { + scale: Scale; + topology: Topology; + shapes: Partial>; +} + +export interface SizingMatrix { + schemaVersion: 1; + n8nVersion: string; + commitSha: string; + runDate: string; + hardware: HardwareInfo; + cells: SizingCell[]; +} + +/** Maps each benchmark spec path to its cell coordinates. */ +export interface SpecMapping { + [specPath: string]: { + scale: Scale; + shape: Shape; + topology: Topology; + }; +} + +export interface AggregateInput { + reports: Array<{ path: string; report: RunReport }>; + mapping: SpecMapping; + hardware: HardwareInfo; + n8nVersion: string; + commitSha: string; + runDate?: string; +} + +const GREEN_THRESHOLDS = { + mainCpuPct: 75, + pgCpuPct: 70, + eventLoopLagMs: 25, + httpP99Ms: 100, +} as const; + +const AMBER_THRESHOLDS = { + mainCpuPct: 85, + pgCpuPct: 85, + eventLoopLagMs: 100, + httpP99Ms: 500, +} as const; + +export function aggregate(input: AggregateInput): SizingMatrix { + const groups = groupByCell(input.reports, input.mapping); + const cells = Array.from(groups.entries()) + .map(([key, entries]) => buildCell(key, entries)) + .sort(byScale); + + return { + schemaVersion: 1, + n8nVersion: input.n8nVersion, + commitSha: input.commitSha, + runDate: input.runDate ?? new Date().toISOString(), + hardware: input.hardware, + cells, + }; +} + +interface CellGroupEntry { + scale: Scale; + shape: Shape; + topology: Topology; + path: string; + report: RunReport; +} + +function groupByCell( + reports: Array<{ path: string; report: RunReport }>, + mapping: SpecMapping, +): Map { + const byScale = new Map(); + for (const { path, report } of reports) { + const cellCoord = findMapping(path, report, mapping); + if (!cellCoord) { + console.warn( + `[sizing-matrix] No mapping for spec "${report.scenario.spec}" (file: ${path}) — skipping.`, + ); + continue; + } + const entry: CellGroupEntry = { ...cellCoord, path, report }; + const existing = byScale.get(cellCoord.scale) ?? []; + existing.push(entry); + byScale.set(cellCoord.scale, existing); + } + return byScale; +} + +// First-match-wins: tries path, spec title, and normalised title against each key. +function findMapping(path: string, report: RunReport, mapping: SpecMapping) { + const candidates = [path, report.scenario.spec, normaliseSpecTitle(report.scenario.spec)]; + for (const [key, value] of Object.entries(mapping)) { + const fragment = key.toLowerCase(); + const fileStem = + key + .split('/') + .pop() + ?.replace(/\.spec\.ts$/, '') ?? ''; + const stemFragment = fileStem.toLowerCase(); + for (const candidate of candidates) { + const haystack = candidate.toLowerCase(); + if (haystack.includes(fragment)) return value; + if (stemFragment && haystack.includes(stemFragment)) return value; + } + } + return undefined; +} + +function normaliseSpecTitle(title: string): string { + return title.toLowerCase().replace(/[^a-z0-9]+/g, '-'); +} + +function buildCell(scale: Scale, entries: CellGroupEntry[]): SizingCell { + const topology = entries[0]?.topology; + if (!topology) throw new Error(`No entries for scale ${scale}`); + + const byShape = new Map(); + for (const entry of entries) { + const list = byShape.get(entry.shape) ?? []; + list.push(entry); + byShape.set(entry.shape, list); + } + + const shapes: SizingCell['shapes'] = {}; + for (const [shape, group] of byShape.entries()) { + shapes[shape] = buildShapeResult(group); + } + + return { scale, topology, shapes }; +} + +function buildShapeResult(group: CellGroupEntry[]): ShapeResult { + const perRun = group.map((entry) => projectRun(entry)); + const execs = perRun.map((r) => r.execPerSec).filter((v) => v > 0); + const tailExecs = perRun + .map((r) => r.tailExecPerSec) + .filter((v): v is number => v !== undefined && v > 0); + const reqs = perRun.map((r) => r.reqPerSec).filter((v): v is number => v !== undefined && v > 0); + const latP50 = mean(perRun.map((r) => r.latency.p50).filter((v) => v > 0)); + const latP975 = mean(perRun.map((r) => r.latency.p975).filter((v) => v > 0)); + const latP99 = mean(perRun.map((r) => r.latency.p99).filter((v) => v > 0)); + + const headroom = headroomFromGroup(group); + const ioMetrics = ioFromGroup(group); + const bottleneck = detectBottleneck(headroom, latP99); + const verdict = scoreVerdict(headroom, latP99, group.length); + + // Median of per-run green projections, capped at observed ceiling. + const ceilingMax = execs.length ? Math.max(...execs) : 0; + const perRunGreens = perRun.map((r) => r.greenProjection).filter((v) => v > 0); + const greenMedian = perRunGreens.length ? median(perRunGreens) : 0; + const greenSustainedExecPerSec = Math.min(greenMedian, ceilingMax); + + return { + ceilingExecPerSec: summarise(execs), + tailExecPerSec: tailExecs.length ? summarise(tailExecs) : undefined, + reqPerSec: reqs.length ? summarise(reqs) : undefined, + latency: { p50: latP50, p975: latP975, p99: latP99 }, + headroomAtCeiling: headroom, + io: ioMetrics, + bottleneck, + verdict, + greenSustainedExecPerSec, + sourceRuns: group.map((g) => ({ + runReportPath: g.path, + commitSha: extractSha(g.report) ?? 'unknown', + spec: g.report.scenario.spec, + })), + }; +} + +interface PerRunProjection { + execPerSec: number; + tailExecPerSec?: number; + reqPerSec?: number; + latency: { p50: number; p975: number; p99: number }; + greenProjection: number; +} + +// Projects "rate sustainable at green headroom" using this run's own CPU/lag. +function projectRun(entry: CellGroupEntry): PerRunProjection { + const t = entry.report.throughput; + const execPerSec = t.execPerSec ?? 0; + const mainCpu = entry.report.containers.find((c) => c.name === 'n8n-main')?.cpuPct ?? 0; + const pgCpu = entry.report.containers.find((c) => c.name === 'postgres')?.cpuPct ?? 0; + const lag = + entry.report.services + .filter((s): s is Extract => s.kind === 'n8n-main') + .map((s) => (s.eventLoopLagSec ?? 0) * 1000)[0] ?? 0; + + const mainFactor = mainCpu > 0 ? GREEN_THRESHOLDS.mainCpuPct / mainCpu : Infinity; + const pgFactor = pgCpu > 0 ? GREEN_THRESHOLDS.pgCpuPct / pgCpu : Infinity; + const lagFactor = lag > 0 ? GREEN_THRESHOLDS.eventLoopLagMs / lag : Infinity; + const limit = Math.min(mainFactor, pgFactor, lagFactor); + const greenProjection = Number.isFinite(limit) ? execPerSec * limit : execPerSec; + + return { + execPerSec, + tailExecPerSec: t.tailExecPerSec, + reqPerSec: t.reqPerSec, + latency: { + p50: t.p50Ms ?? 0, + p975: t.p97_5Ms ?? 0, + p99: t.p99Ms ?? 0, + }, + greenProjection, + }; +} + +function headroomFromGroup(group: CellGroupEntry[]): ShapeResult['headroomAtCeiling'] { + const mainCpu = mean(definedNumbers(group, (g) => containerValues(g, 'n8n-main', 'cpuPct'))); + const workerVals = group.flatMap((g) => containerValues(g, 'n8n-worker', 'cpuPct')); + const workerDefined = workerVals.filter(isNumber); + const workerCpu = workerDefined.length ? mean(workerDefined) : undefined; + const pgCpuAvg = mean(definedNumbers(group, (g) => containerValues(g, 'postgres', 'cpuPct'))); + const pgCpuPeak = mean( + definedNumbers(group, (g) => containerValues(g, 'postgres', 'cpuPctPeak')), + ); + const eventLoopLagMs = mean( + group.flatMap((g) => + g.report.services + .filter((s): s is Extract => s.kind === 'n8n-main') + .map((s) => (s.eventLoopLagSec ?? 0) * 1000), + ), + ); + const pgBufferHit = mean( + group.flatMap((g) => + g.report.services + .filter((s): s is Extract => s.kind === 'postgres') + .map((s) => s.saturation.bufferHitRatio ?? 0), + ), + ); + + return { + mainCpuPct: mainCpu, + workerCpuPct: workerCpu, + pgCpuPct: pgCpuAvg, + pgCpuPctPeak: pgCpuPeak, + pgBufferHit, + eventLoopLagMs, + }; +} + +function ioFromGroup(group: CellGroupEntry[]): ShapeResult['io'] { + const perRun = group.map((g) => derivePerRunIo(g.report)); + const workloadIopsPerSec = mean(perRun.map((r) => r.workload)); + const overheadIopsPerSec = mean(perRun.map((r) => r.overhead)); + const overheadFactor = + workloadIopsPerSec > 0 ? (workloadIopsPerSec + overheadIopsPerSec) / workloadIopsPerSec : 0; + const walMbPerSec = mean(perRun.map((r) => r.walMbPerSec)); + const walRecordsPerSec = mean(perRun.map((r) => r.walRecordsPerSec)); + return { + workloadIopsPerSec, + overheadIopsPerSec, + overheadFactor, + walMbPerSec, + walRecordsPerSec, + }; +} + +function derivePerRunIo(report: RunReport): { + workload: number; + overhead: number; + walMbPerSec: number; + walRecordsPerSec: number; +} { + const elapsedSec = (report.duration.totalMs ?? 1) / 1000; + const pg = report.services.find( + (s): s is Extract => s.kind === 'postgres', + ); + if (!pg) { + return { workload: 0, overhead: 0, walMbPerSec: 0, walRecordsPerSec: 0 }; + } + const sumIo = (filter: (b: string) => boolean) => + pg.io + .filter((row) => filter(row.backendType)) + .reduce((acc, row) => acc + row.reads + row.writes + row.extends, 0); + + const workloadOps = sumIo((b) => b === 'client backend'); + const overheadOps = sumIo( + (b) => + b === 'background writer' || + b === 'checkpointer' || + b === 'walwriter' || + b === 'autovacuum worker' || + b === 'autovacuum launcher' || + b === 'standalone backend', + ); + + return { + workload: workloadOps / elapsedSec, + overhead: overheadOps / elapsedSec, + walMbPerSec: pg.saturation.walMbPerSec ?? 0, + walRecordsPerSec: pg.saturation.walRecordsPerSec ?? 0, + }; +} + +function detectBottleneck(headroom: ShapeResult['headroomAtCeiling'], _p99: number): Bottleneck { + // Classify on resource saturation, not p99 — for async modes p99 is + // dominated by Bull queue wait and misranked PG-CPU as "network". + const candidates: Array<[Bottleneck, number]> = [ + ['main-cpu', headroom.mainCpuPct / AMBER_THRESHOLDS.mainCpuPct], + ['pg-cpu', headroom.pgCpuPct / AMBER_THRESHOLDS.pgCpuPct], + ]; + if (headroom.workerCpuPct !== undefined) { + candidates.push(['worker-cpu', headroom.workerCpuPct / AMBER_THRESHOLDS.mainCpuPct]); + } + candidates.sort((a, b) => b[1] - a[1]); + return candidates[0]?.[0] ?? 'main-cpu'; +} + +function scoreVerdict( + headroom: ShapeResult['headroomAtCeiling'], + _p99: number, + sampleCount: number, +): Verdict { + // Verdict gates on topology health (CPU/PG/event-loop), not p99 — same + // rationale as detectBottleneck. p99 still surfaces in the cell display. + if (sampleCount < 3) return 'amber'; + const anyRed = + headroom.mainCpuPct > AMBER_THRESHOLDS.mainCpuPct || + headroom.pgCpuPct > AMBER_THRESHOLDS.pgCpuPct || + headroom.eventLoopLagMs > AMBER_THRESHOLDS.eventLoopLagMs; + if (anyRed) return 'red'; + const anyAmber = + headroom.mainCpuPct > GREEN_THRESHOLDS.mainCpuPct || + headroom.pgCpuPct > GREEN_THRESHOLDS.pgCpuPct || + headroom.eventLoopLagMs > GREEN_THRESHOLDS.eventLoopLagMs; + return anyAmber ? 'amber' : 'green'; +} + +function median(values: number[]): number { + if (values.length === 0) return 0; + const sorted = [...values].sort((a, b) => a - b); + const mid = Math.floor(sorted.length / 2); + if (sorted.length % 2 === 1) return sorted[mid] ?? 0; + return ((sorted[mid - 1] ?? 0) + (sorted[mid] ?? 0)) / 2; +} + +function summarise(values: number[]): PercentileSummary { + if (values.length === 0) return { p50: 0, p95: 0, max: 0, n: 0 }; + const sorted = [...values].sort((a, b) => a - b); + return { + p50: percentile(sorted, 0.5), + p95: percentile(sorted, 0.95), + max: sorted[sorted.length - 1] ?? 0, + n: values.length, + }; +} + +function percentile(sortedAsc: number[], p: number): number { + if (sortedAsc.length === 0) return 0; + if (sortedAsc.length === 1) return sortedAsc[0] ?? 0; + const idx = Math.min(sortedAsc.length - 1, Math.floor(p * sortedAsc.length)); + return sortedAsc[idx] ?? 0; +} + +function mean(values: number[]): number { + if (values.length === 0) return 0; + return values.reduce((acc, v) => acc + v, 0) / values.length; +} + +function isNumber(value: number | undefined): value is number { + return typeof value === 'number' && !Number.isNaN(value); +} + +function containerValues( + entry: CellGroupEntry, + name: string, + field: 'cpuPct' | 'cpuPctPeak', +): Array { + return entry.report.containers.filter((c) => c.name === name).map((c) => c[field]); +} + +function definedNumbers( + group: CellGroupEntry[], + pick: (entry: CellGroupEntry) => Array, +): number[] { + return group.flatMap(pick).filter(isNumber); +} + +function byScale(a: SizingCell, b: SizingCell): number { + const order: Scale[] = ['S0', 'S1', 'S2', 'S3', 'S4']; + return order.indexOf(a.scale) - order.indexOf(b.scale); +} + +function extractSha(report: RunReport): string | undefined { + const value = report.scenario.dimensions['commitSha']; + return typeof value === 'string' ? value : undefined; +} + +/** Renders a `SizingMatrix` to the customer-facing markdown guide. */ +export function renderMarkdown(matrix: SizingMatrix): string { + const lines: string[] = []; + lines.push('# n8n Self-Hosted Sizing Guide'); + lines.push(''); + lines.push( + `*Generated for n8n \`${matrix.n8nVersion}\` · commit \`${matrix.commitSha}\` · ${matrix.runDate.slice(0, 10)}*`, + ); + lines.push(''); + lines.push( + `*Hardware baseline:* \`${matrix.hardware.runner}\` (${matrix.hardware.vcpu} vCPU / ${matrix.hardware.ramGb} GB).`, + ); + lines.push(''); + lines.push( + '> **Status:** generated from the benchmark substrate at every release. ' + + 'Cells with **verdict: amber** are single-run or low-sample — informational, ' + + 'not promotion-ready. Each cell ships **two throughput numbers** — the ' + + 'saturation **ceiling** observed in CI, and the **green-sustained** ' + + 'recommendation derived from headroom thresholds. Size to the green number.', + ); + lines.push(''); + + const shapes: Shape[] = ['L', 'D', 'A', 'X']; + for (const shape of shapes) { + const cellsWithShape = matrix.cells.filter((c) => c.shapes[shape]); + if (cellsWithShape.length === 0) continue; + + lines.push(`## Shape ${shape}`); + lines.push(''); + lines.push(renderShapeTable(cellsWithShape, shape)); + lines.push(''); + lines.push(renderShapeDetail(cellsWithShape, shape)); + lines.push(''); + } + + lines.push('---'); + lines.push(''); + lines.push('## Methodology'); + lines.push(''); + lines.push( + 'Cells are aggregated by the substrate at `packages/testing/playwright/utils/benchmark/sizing-matrix.ts` from per-run `run-report.json` files. ' + + "Each cell's headline **ceiling** is the **max** `execPerSec` observed across cold runs (saturation point); full p50/p95/max distribution is shown in cell detail. " + + '**green-sustained** is the ceiling scaled by headroom (main CPU < 75%, PG CPU < 70%, event-loop lag < 25 ms). ' + + '**Workload IOPS** is `pg_stat_io` `client backend` reads+writes+extends per second; ' + + '**overhead factor** is `(workload + bgwriter + checkpointer + walwriter + autovacuum) / workload`. ' + + 'WAL is reported separately (`MB/s`, `records/s`).', + ); + lines.push(''); + return lines.join('\n'); +} + +function renderShapeTable(cells: SizingCell[], shape: Shape): string { + const header = + '| Scale | Mains | Webhook procs | Workers | PG (vCPU/RAM) | Redis | Burst headroom (≤30 s) exec/s | **Green sustained** exec/s | Req/s | p99 ms | Verdict |'; + const sep = '|---|---:|---:|---:|---|---|---:|---:|---:|---:|---|'; + const rows: string[] = [header, sep]; + for (const cell of cells) { + const result = cell.shapes[shape]; + if (!result) continue; + rows.push( + [ + `**${cell.scale}**`, + `${cell.topology.mains}× (${cell.topology.mainVcpu} vCPU / ${cell.topology.mainRamGb} GB)`, + cell.topology.webhookProcs > 0 ? `${cell.topology.webhookProcs}×` : '(in main)', + cell.topology.workers > 0 && cell.topology.workerVcpu + ? `${cell.topology.workers}× (${cell.topology.workerVcpu} vCPU / ${cell.topology.workerRamGb} GB)` + : '—', + `${cell.topology.pgVcpu} vCPU / ${cell.topology.pgRamGb} GB`, + `${cell.topology.redisVcpu} vCPU / ${cell.topology.redisRamGb} GB`, + result.ceilingExecPerSec.max.toFixed(1), + result.greenSustainedExecPerSec.toFixed(1), + result.reqPerSec ? result.reqPerSec.p50.toFixed(1) : '—', + result.latency.p99.toFixed(0), + verdictBadge(result.verdict), + ].join(' | '), + ); + } + return rows.join('\n'); +} + +function renderShapeDetail(cells: SizingCell[], shape: Shape): string { + const lines: string[] = []; + lines.push('
'); + lines.push(`Cell detail — Shape ${shape}`); + lines.push(''); + for (const cell of cells) { + const result = cell.shapes[shape]; + if (!result) continue; + lines.push(`### ${cell.scale}-${shape} (n=${result.ceilingExecPerSec.n})`); + lines.push(''); + lines.push('| Field | Value |'); + lines.push('|---|---|'); + lines.push(`| Ceiling exec/s (p50/p95/max) | ${fmtPercentile(result.ceilingExecPerSec)} |`); + if (result.tailExecPerSec) { + lines.push(`| Tail exec/s (steady-state) | ${fmtPercentile(result.tailExecPerSec)} |`); + } + lines.push( + `| **Green sustained recommendation** | ${result.greenSustainedExecPerSec.toFixed(1)} exec/s |`, + ); + lines.push( + `| Latency p50 / p97.5 / p99 | ${result.latency.p50.toFixed(0)} / ${result.latency.p975.toFixed(0)} / ${result.latency.p99.toFixed(0)} ms |`, + ); + lines.push(`| Main CPU at ceiling | ${result.headroomAtCeiling.mainCpuPct.toFixed(1)}% |`); + if (result.headroomAtCeiling.workerCpuPct !== undefined) { + lines.push( + `| Worker CPU at ceiling | ${result.headroomAtCeiling.workerCpuPct.toFixed(1)}% |`, + ); + } + lines.push( + `| PG CPU avg / peak | ${result.headroomAtCeiling.pgCpuPct.toFixed(1)}% / ${result.headroomAtCeiling.pgCpuPctPeak.toFixed(1)}% |`, + ); + lines.push( + `| PG buffer-hit ratio | ${(result.headroomAtCeiling.pgBufferHit * 100).toFixed(2)}% |`, + ); + lines.push(`| Event-loop lag | ${result.headroomAtCeiling.eventLoopLagMs.toFixed(1)} ms |`); + lines.push(`| **Workload IOPS** | ${result.io.workloadIopsPerSec.toFixed(0)} ops/s |`); + lines.push( + `| Overhead IOPS (autovac + bgwriter + checkpointer + walwriter) | ${result.io.overheadIopsPerSec.toFixed(0)} ops/s |`, + ); + lines.push(`| **Overhead factor** | ${result.io.overheadFactor.toFixed(2)}× workload |`); + lines.push( + `| WAL throughput | ${result.io.walMbPerSec.toFixed(2)} MB/s · ${result.io.walRecordsPerSec.toFixed(0)} records/s |`, + ); + lines.push(`| Bottleneck | \`${result.bottleneck}\` |`); + lines.push(`| Verdict | ${verdictBadge(result.verdict)} |`); + lines.push(`| Source runs | ${result.sourceRuns.length} |`); + lines.push(''); + for (const src of result.sourceRuns) { + lines.push(`- \`${src.spec}\``); + } + lines.push(''); + } + lines.push('
'); + return lines.join('\n'); +} + +function verdictBadge(verdict: Verdict): string { + if (verdict === 'green') return '🟢 green'; + if (verdict === 'amber') return '🟠 amber (n<3, low confidence)'; + return '🔴 red'; +} + +function fmtPercentile(p: PercentileSummary): string { + return `${p.p50.toFixed(1)} / ${p.p95.toFixed(1)} / ${p.max.toFixed(1)}`; +} diff --git a/packages/testing/playwright/utils/benchmark/throughput-measure.ts b/packages/testing/playwright/utils/benchmark/throughput-measure.ts index 73cb185a165..d18fddc3144 100644 --- a/packages/testing/playwright/utils/benchmark/throughput-measure.ts +++ b/packages/testing/playwright/utils/benchmark/throughput-measure.ts @@ -64,16 +64,9 @@ export const QUEUE_JOBS_COMPLETED_QUERY = 'n8n_scaling_mode_queue_jobs_completed /** * Returns the completion metric for the current Playwright project. - * - * `n8n_workflow_success_total` is emitted by both main and workers; in queue mode - * each instance produces its own time series. The query wraps the metric in - * `sum(last_over_time(...[5m]))` so VictoriaMetrics aggregates across instances - * server-side, and the wide lookback tolerates transient scrape misses that - * would otherwise drop a series and make the summed counter appear to regress. - * - * `n8n_scaling_mode_queue_jobs_completed` is the designed queue-mode metric but - * it depends on ScalingService.scheduleQueueMetrics() emitting `job-counts-updated` - * events at regular intervals — currently observed as 0 in CI. + * `n8n_workflow_success_total` is per-receiver-instance; summed via + * `sum(last_over_time(...[5m]))` for the system-wide total. The wide lookback + * tolerates transient scrape misses. */ export function resolveMetricQuery(_testInfo: TestInfo): string { return WORKFLOW_SUCCESS_QUERY; @@ -196,8 +189,8 @@ export async function waitForThroughput( } /** - * Reads the current value of the workflow success counter from VictoriaMetrics. - * Returns 0 if the metric hasn't been scraped yet. + * Reads the current value of the workflow completion counter from + * VictoriaMetrics. Returns 0 if the metric hasn't been scraped yet. */ export async function getBaselineCounter( metrics: MetricsHelper,