mirror of
https://github.com/n8n-io/n8n.git
synced 2026-06-04 10:39:23 +02:00
test(benchmark): Add dedicated webhook procs + sizing matrix aggregator (DEVP-200 + DEVP-185) (#31037)
Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
3e5dcce911
commit
7bd7b9943b
|
|
@ -26,7 +26,10 @@ jobs:
|
|||
shards: '[{"shard":1,"images":""}]'
|
||||
- lane: webhook
|
||||
grep: '@bench:webhook'
|
||||
shards: '[{"shard":1,"images":""}]'
|
||||
# 2 shards — webhook lane grew from 6 to 9 spec files with the
|
||||
# dedicated-proc topology (DEVP-200). Single shard pushed wall time
|
||||
# past 40 min; two shards halves that. Bump to 3 if specs grow further.
|
||||
shards: '[{"shard":1,"images":""},{"shard":2,"images":""}]'
|
||||
- lane: kafka
|
||||
grep: '@bench:kafka'
|
||||
shards: '[{"shard":1,"images":""},{"shard":2,"images":""}]'
|
||||
|
|
|
|||
|
|
@ -1,6 +1,14 @@
|
|||
import { Service } from '@n8n/di';
|
||||
import { Container, Service } from '@n8n/di';
|
||||
|
||||
import { AbstractServer } from '@/abstract-server';
|
||||
|
||||
@Service()
|
||||
export class WebhookServer extends AbstractServer {}
|
||||
export class WebhookServer extends AbstractServer {
|
||||
/** Mounts `/metrics` so dedicated webhook procs are scrapeable. */
|
||||
async configure(): Promise<void> {
|
||||
if (this.globalConfig.endpoints.metrics.enable) {
|
||||
const { PrometheusMetricsService } = await import('@/metrics/prometheus-metrics.service');
|
||||
await Container.get(PrometheusMetricsService).init(this.app);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -6,6 +6,8 @@ import type { LoadBalancerPolicy, Service, ServiceResult } from './types';
|
|||
|
||||
export interface LoadBalancerConfig {
|
||||
mainCount: number;
|
||||
/** When > 0, Caddy path-routes `WEBHOOK_PROC_PATHS` to webhook procs. */
|
||||
webhookCount: number;
|
||||
hostPort?: number;
|
||||
policy: LoadBalancerPolicy;
|
||||
}
|
||||
|
|
@ -17,13 +19,19 @@ export interface LoadBalancerMeta {
|
|||
|
||||
export type LoadBalancerResult = ServiceResult<LoadBalancerMeta>;
|
||||
|
||||
function buildCaddyConfig(upstreamServers: string[], policy: LoadBalancerPolicy): string {
|
||||
const backends = upstreamServers.join(' ');
|
||||
return `
|
||||
:80 {
|
||||
# Reverse proxy with load balancing
|
||||
reverse_proxy ${backends} {
|
||||
lb_policy ${policy}
|
||||
// Production paths the `n8n webhook` proc serves. Test/waiting/form-test paths
|
||||
// stay on main, per `packages/cli/src/commands/webhook.ts`.
|
||||
const WEBHOOK_PROC_PATHS = ['/webhook/*', '/form/*'] as const;
|
||||
|
||||
function buildCaddyConfig(
|
||||
mainUpstreams: string[],
|
||||
webhookUpstreams: string[],
|
||||
policy: LoadBalancerPolicy,
|
||||
): string {
|
||||
const mainBackends = mainUpstreams.join(' ');
|
||||
const webhookBackends = webhookUpstreams.join(' ');
|
||||
|
||||
const sharedReverseProxyBlock = ` lb_policy ${policy}
|
||||
|
||||
# Health check
|
||||
health_uri /healthz
|
||||
|
|
@ -34,13 +42,42 @@ function buildCaddyConfig(upstreamServers: string[], policy: LoadBalancerPolicy)
|
|||
dial_timeout 60s
|
||||
read_timeout 60s
|
||||
write_timeout 60s
|
||||
}
|
||||
}`;
|
||||
|
||||
if (webhookUpstreams.length === 0) {
|
||||
return `
|
||||
:80 {
|
||||
# Reverse proxy with load balancing
|
||||
reverse_proxy ${mainBackends} {
|
||||
${sharedReverseProxyBlock}
|
||||
}
|
||||
|
||||
# Set max request body size
|
||||
request_body {
|
||||
max_size 50MB
|
||||
}
|
||||
}`;
|
||||
}
|
||||
|
||||
const webhookMatcher = WEBHOOK_PROC_PATHS.join(' ');
|
||||
return `
|
||||
:80 {
|
||||
request_body {
|
||||
max_size 50MB
|
||||
}
|
||||
|
||||
@webhooks path ${webhookMatcher}
|
||||
handle @webhooks {
|
||||
reverse_proxy ${webhookBackends} {
|
||||
${sharedReverseProxyBlock}
|
||||
}
|
||||
}
|
||||
|
||||
handle {
|
||||
reverse_proxy ${mainBackends} {
|
||||
${sharedReverseProxyBlock}
|
||||
}
|
||||
}
|
||||
}`;
|
||||
}
|
||||
|
||||
|
|
@ -51,6 +88,7 @@ export const loadBalancer: Service<LoadBalancerResult> = {
|
|||
getOptions(ctx) {
|
||||
return {
|
||||
mainCount: ctx.mains,
|
||||
webhookCount: ctx.webhooks,
|
||||
hostPort: ctx.allocatedPorts.loadBalancer,
|
||||
policy: ctx.config.lbPolicy ?? 'first',
|
||||
} as LoadBalancerConfig;
|
||||
|
|
@ -64,16 +102,19 @@ export const loadBalancer: Service<LoadBalancerResult> = {
|
|||
},
|
||||
|
||||
async start(network, projectName, config?: unknown): Promise<LoadBalancerResult> {
|
||||
const { mainCount, hostPort, policy } = config as LoadBalancerConfig;
|
||||
const { mainCount, webhookCount, hostPort, policy } = config as LoadBalancerConfig;
|
||||
const { consumer, throwWithLogs } = createSilentLogConsumer();
|
||||
|
||||
// Generate upstream server addresses
|
||||
const upstreamServers = Array.from(
|
||||
{ length: mainCount },
|
||||
(_, index) => `${projectName}-n8n-main-${index + 1}:5678`,
|
||||
// Single-main containers are named `${projectName}-n8n`, not `-n8n-main-1`.
|
||||
const mainHostname = (index: number): string =>
|
||||
mainCount > 1 ? `${projectName}-n8n-main-${index}:5678` : `${projectName}-n8n:5678`;
|
||||
const mainUpstreams = Array.from({ length: mainCount }, (_, index) => mainHostname(index + 1));
|
||||
const webhookUpstreams = Array.from(
|
||||
{ length: webhookCount },
|
||||
(_, index) => `${projectName}-n8n-webhook-${index + 1}:5678`,
|
||||
);
|
||||
|
||||
const caddyConfig = buildCaddyConfig(upstreamServers, policy);
|
||||
const caddyConfig = buildCaddyConfig(mainUpstreams, webhookUpstreams, policy);
|
||||
|
||||
try {
|
||||
const container = await new GenericContainer(TEST_CONTAINER_IMAGES.caddy)
|
||||
|
|
|
|||
|
|
@ -59,6 +59,8 @@ const BASE_ENV: Record<string, string> = {
|
|||
export interface N8NInstancesOptions {
|
||||
mains: number;
|
||||
workers: number;
|
||||
/** Dedicated `n8n webhook` procs. Forces queue mode when > 0. */
|
||||
webhooks?: number;
|
||||
projectName: string;
|
||||
network: StartedNetwork;
|
||||
serviceEnvironment: Record<string, string>;
|
||||
|
|
@ -68,6 +70,8 @@ export interface N8NInstancesOptions {
|
|||
allocatedPort?: number;
|
||||
resourceQuota?: { memory?: number; cpu?: number };
|
||||
workerResourceQuota?: { memory?: number; cpu?: number };
|
||||
/** Resource quota for webhook procs. Falls back to `resourceQuota` if omitted. */
|
||||
webhookResourceQuota?: { memory?: number; cpu?: number };
|
||||
filesToMount?: FileToMount[];
|
||||
}
|
||||
|
||||
|
|
@ -81,13 +85,14 @@ function computeEnvironment(options: N8NInstancesOptions): Record<string, string
|
|||
const {
|
||||
mains,
|
||||
workers,
|
||||
webhooks = 0,
|
||||
usePostgres,
|
||||
baseUrl,
|
||||
serviceEnvironment,
|
||||
userEnvironment = {},
|
||||
} = options;
|
||||
|
||||
const isQueueMode = mains > 1 || workers > 0;
|
||||
const isQueueMode = mains > 1 || workers > 0 || webhooks > 0;
|
||||
|
||||
const env: Record<string, string> = {
|
||||
...BASE_ENV,
|
||||
|
|
@ -121,9 +126,11 @@ function computeEnvironment(options: N8NInstancesOptions): Record<string, string
|
|||
return env;
|
||||
}
|
||||
|
||||
type InstanceRole = 'main' | 'webhook' | 'worker';
|
||||
|
||||
interface InstanceConfig {
|
||||
name: string;
|
||||
isWorker: boolean;
|
||||
role: InstanceRole;
|
||||
instanceNumber: number;
|
||||
networkAlias?: string;
|
||||
hostPort?: number;
|
||||
|
|
@ -143,12 +150,18 @@ interface ContainerStartResult {
|
|||
getLastReadinessBody: () => string | null;
|
||||
}
|
||||
|
||||
const SERVICE_LABEL: Record<InstanceRole, string> = {
|
||||
main: 'n8n-main',
|
||||
webhook: 'n8n-webhook',
|
||||
worker: 'n8n-worker',
|
||||
};
|
||||
|
||||
async function createContainer(
|
||||
instance: InstanceConfig,
|
||||
shared: SharedConfig,
|
||||
diagnostics: N8NStartupDiagnostics,
|
||||
): Promise<ContainerStartResult> {
|
||||
const { name, isWorker, instanceNumber, networkAlias, hostPort } = instance;
|
||||
const { name, role, instanceNumber, networkAlias, hostPort } = instance;
|
||||
const { projectName, environment, network, resourceQuota, filesToMount } = shared;
|
||||
const { consumer, throwWithLogs, getLogs } = createSilentLogConsumer();
|
||||
const { strategy: waitStrategy, getLastBody: getLastReadinessBody } = createReadinessProbe(
|
||||
|
|
@ -161,7 +174,7 @@ async function createContainer(
|
|||
.withEnvironment(environment)
|
||||
.withLabels({
|
||||
'com.docker.compose.project': projectName,
|
||||
'com.docker.compose.service': isWorker ? 'n8n-worker' : 'n8n-main',
|
||||
'com.docker.compose.service': SERVICE_LABEL[role],
|
||||
instance: instanceNumber.toString(),
|
||||
})
|
||||
.withPullPolicy(new N8nImagePullPolicy(N8N_IMAGE))
|
||||
|
|
@ -185,14 +198,16 @@ async function createContainer(
|
|||
const ports: PortWithOptionalBinding[] = hostPort
|
||||
? [{ container: N8N_READINESS_PORT, host: hostPort }]
|
||||
: [N8N_READINESS_PORT];
|
||||
if (isWorker) {
|
||||
if (role === 'worker') {
|
||||
ports.push(5679);
|
||||
}
|
||||
|
||||
container = container.withExposedPorts(...ports).withWaitStrategy(waitStrategy);
|
||||
|
||||
if (isWorker) {
|
||||
if (role === 'worker') {
|
||||
container = container.withCommand(['worker']);
|
||||
} else if (role === 'webhook') {
|
||||
container = container.withCommand(['webhook']);
|
||||
}
|
||||
|
||||
try {
|
||||
|
|
@ -219,11 +234,13 @@ export async function createN8NInstances(
|
|||
const {
|
||||
mains,
|
||||
workers,
|
||||
webhooks = 0,
|
||||
projectName,
|
||||
network,
|
||||
allocatedPort,
|
||||
resourceQuota,
|
||||
workerResourceQuota,
|
||||
webhookResourceQuota,
|
||||
filesToMount,
|
||||
} = options;
|
||||
|
||||
|
|
@ -248,26 +265,52 @@ export async function createN8NInstances(
|
|||
filesToMount,
|
||||
};
|
||||
|
||||
const webhookShared: SharedConfig = {
|
||||
projectName,
|
||||
environment,
|
||||
network,
|
||||
resourceQuota: webhookResourceQuota ?? resourceQuota,
|
||||
filesToMount,
|
||||
};
|
||||
|
||||
const sharedByRole: Record<InstanceRole, SharedConfig> = {
|
||||
main: mainShared,
|
||||
webhook: webhookShared,
|
||||
worker: workerShared,
|
||||
};
|
||||
|
||||
const instances: InstanceConfig[] = [
|
||||
...Array.from({ length: mains }, (_, i) => {
|
||||
...Array.from({ length: mains }, (_, i): InstanceConfig => {
|
||||
const num = i + 1;
|
||||
const name = mains > 1 ? `${projectName}-n8n-main-${num}` : `${projectName}-n8n`;
|
||||
return {
|
||||
name,
|
||||
isWorker: false,
|
||||
role: 'main',
|
||||
instanceNumber: num,
|
||||
networkAlias: name,
|
||||
hostPort: num === 1 ? allocatedPort : undefined,
|
||||
};
|
||||
}),
|
||||
...Array.from({ length: workers }, (_, i) => ({
|
||||
name: `${projectName}-n8n-worker-${i + 1}`,
|
||||
isWorker: true,
|
||||
instanceNumber: i + 1,
|
||||
})),
|
||||
...Array.from({ length: webhooks }, (_, i): InstanceConfig => {
|
||||
const num = i + 1;
|
||||
const name = `${projectName}-n8n-webhook-${num}`;
|
||||
return {
|
||||
name,
|
||||
role: 'webhook',
|
||||
instanceNumber: num,
|
||||
networkAlias: name,
|
||||
};
|
||||
}),
|
||||
...Array.from(
|
||||
{ length: workers },
|
||||
(_, i): InstanceConfig => ({
|
||||
name: `${projectName}-n8n-worker-${i + 1}`,
|
||||
role: 'worker',
|
||||
instanceNumber: i + 1,
|
||||
}),
|
||||
),
|
||||
];
|
||||
|
||||
// Service-only mode: no n8n containers needed
|
||||
if (instances.length === 0) {
|
||||
log('No n8n instances requested (service-only mode)');
|
||||
return { containers, environment, diagnostics };
|
||||
|
|
@ -284,12 +327,12 @@ export async function createN8NInstances(
|
|||
throw new N8NStartupError(message, diagnostics, error);
|
||||
};
|
||||
|
||||
// Start main 1 first (handles DB migrations/setup)
|
||||
// Main 1 handles DB migrations and must finish before parallel starts.
|
||||
const [main1, ...remaining] = instances;
|
||||
log(`Starting main 1: ${main1.name} (DB setup)`);
|
||||
let main1Result: ContainerStartResult;
|
||||
try {
|
||||
main1Result = await createContainer(main1, mainShared, diagnostics);
|
||||
main1Result = await createContainer(main1, sharedByRole[main1.role], diagnostics);
|
||||
} catch (error) {
|
||||
return rethrowWithDiagnostics(error);
|
||||
}
|
||||
|
|
@ -297,20 +340,14 @@ export async function createN8NInstances(
|
|||
containers.push(main1Result.container);
|
||||
log('main 1 ready');
|
||||
|
||||
// Start remaining instances in parallel
|
||||
if (remaining.length > 0) {
|
||||
log(`Starting ${remaining.length} remaining instances in parallel...`);
|
||||
try {
|
||||
const parallelResults = await Promise.all(
|
||||
remaining.map(async (instance) => {
|
||||
const type = instance.isWorker ? 'worker' : 'main';
|
||||
log(`Starting ${type} ${instance.instanceNumber}: ${instance.name}`);
|
||||
const result = await createContainer(
|
||||
instance,
|
||||
instance.isWorker ? workerShared : mainShared,
|
||||
diagnostics,
|
||||
);
|
||||
log(`${type} ${instance.instanceNumber} ready`);
|
||||
log(`Starting ${instance.role} ${instance.instanceNumber}: ${instance.name}`);
|
||||
const result = await createContainer(instance, sharedByRole[instance.role], diagnostics);
|
||||
log(`${instance.role} ${instance.instanceNumber} ready`);
|
||||
return { instance, result };
|
||||
}),
|
||||
);
|
||||
|
|
|
|||
|
|
@ -53,6 +53,7 @@ export interface StartContext {
|
|||
projectName: string;
|
||||
mains: number;
|
||||
workers: number;
|
||||
webhooks: number;
|
||||
isQueueMode: boolean;
|
||||
usePostgres: boolean;
|
||||
needsLoadBalancer: boolean;
|
||||
|
|
@ -69,18 +70,23 @@ export type LoadBalancerPolicy = 'first' | 'round_robin' | 'random' | 'least_con
|
|||
export interface StackConfig {
|
||||
mains?: number;
|
||||
workers?: number;
|
||||
/** Dedicated `n8n webhook` procs. Forces queue mode when > 0. */
|
||||
webhooks?: number;
|
||||
postgres?: boolean;
|
||||
env?: Record<string, string>;
|
||||
projectName?: string;
|
||||
resourceQuota?: { memory?: number; cpu?: number };
|
||||
workerResourceQuota?: { memory?: number; cpu?: number };
|
||||
/** Resource quota for webhook procs. Falls back to `resourceQuota` if omitted. */
|
||||
webhookResourceQuota?: { memory?: number; cpu?: number };
|
||||
services?: readonly ServiceName[];
|
||||
/** When true, services target host machine instead of Docker-internal n8n */
|
||||
external?: boolean;
|
||||
/** When set, the Docker network uses this exact name instead of a random UUID. */
|
||||
networkName?: string;
|
||||
/**
|
||||
* Caddy load-balancer upstream-selection policy. Only applies when `mains > 1`.
|
||||
* Caddy load-balancer upstream-selection policy. Only applies when `mains > 1`
|
||||
* or `webhooks > 0` (anything that triggers the LB to start).
|
||||
* Defaults to `'first'` — sticky to main #1, useful for UI debuggability.
|
||||
* Benchmarks should set `'round_robin'` to actually distribute load.
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -65,7 +65,7 @@ export const victoriaMetrics: Service<VictoriaMetricsResult> = {
|
|||
description: 'VictoriaMetrics',
|
||||
|
||||
getOptions(ctx: StartContext): VictoriaMetricsConfig {
|
||||
const { mains, workers, projectName } = ctx;
|
||||
const { mains, workers, webhooks, projectName } = ctx;
|
||||
const scrapeTargets: ScrapeTarget[] = [];
|
||||
|
||||
for (let i = 1; i <= mains; i++) {
|
||||
|
|
@ -77,6 +77,14 @@ export const victoriaMetrics: Service<VictoriaMetricsResult> = {
|
|||
port: 5678,
|
||||
});
|
||||
}
|
||||
for (let i = 1; i <= webhooks; i++) {
|
||||
scrapeTargets.push({
|
||||
job: 'n8n-webhook',
|
||||
instance: `n8n-webhook-${i}`,
|
||||
host: `${projectName}-n8n-webhook-${i}`,
|
||||
port: 5678,
|
||||
});
|
||||
}
|
||||
for (let i = 1; i <= workers; i++) {
|
||||
scrapeTargets.push({
|
||||
job: 'n8n-worker',
|
||||
|
|
|
|||
|
|
@ -79,11 +79,13 @@ export async function createN8NStack(config: N8NConfig = {}): Promise<N8NStack>
|
|||
const {
|
||||
mains = 1,
|
||||
workers = 0,
|
||||
webhooks = 0,
|
||||
postgres: usePostgresConfig = false,
|
||||
env = {},
|
||||
projectName,
|
||||
resourceQuota,
|
||||
workerResourceQuota,
|
||||
webhookResourceQuota,
|
||||
services: enabledServices = [],
|
||||
external = false,
|
||||
networkName,
|
||||
|
|
@ -91,8 +93,8 @@ export async function createN8NStack(config: N8NConfig = {}): Promise<N8NStack>
|
|||
|
||||
const log = createElapsedLogger('stack');
|
||||
|
||||
const isQueueMode = mains > 1 || workers > 0;
|
||||
const needsLoadBalancer = mains > 1;
|
||||
const isQueueMode = mains > 1 || workers > 0 || webhooks > 0;
|
||||
const needsLoadBalancer = mains > 1 || webhooks > 0;
|
||||
const usePostgres = usePostgresConfig || isQueueMode || enabledServices.includes('keycloak');
|
||||
const uniqueProjectName = projectName ?? `n8n-stack-${Math.random().toString(36).substring(7)}`;
|
||||
|
||||
|
|
@ -131,6 +133,7 @@ export async function createN8NStack(config: N8NConfig = {}): Promise<N8NStack>
|
|||
projectName: uniqueProjectName,
|
||||
mains,
|
||||
workers,
|
||||
webhooks,
|
||||
isQueueMode,
|
||||
usePostgres,
|
||||
needsLoadBalancer,
|
||||
|
|
@ -213,6 +216,7 @@ export async function createN8NStack(config: N8NConfig = {}): Promise<N8NStack>
|
|||
const n8nResult = await createN8NInstances({
|
||||
mains,
|
||||
workers,
|
||||
webhooks,
|
||||
projectName: uniqueProjectName,
|
||||
network,
|
||||
serviceEnvironment: environment,
|
||||
|
|
@ -222,6 +226,7 @@ export async function createN8NStack(config: N8NConfig = {}): Promise<N8NStack>
|
|||
allocatedPort: needsLoadBalancer ? undefined : allocatedMainPort,
|
||||
resourceQuota,
|
||||
workerResourceQuota,
|
||||
webhookResourceQuota,
|
||||
filesToMount,
|
||||
});
|
||||
containers.push(...n8nResult.containers);
|
||||
|
|
@ -229,7 +234,7 @@ export async function createN8NStack(config: N8NConfig = {}): Promise<N8NStack>
|
|||
Math.round(performance.now() - n8nStartupStart),
|
||||
n8nResult.containers.length,
|
||||
);
|
||||
log(`n8n ready: ${mains} main(s), ${workers} worker(s)`);
|
||||
log(`n8n ready: ${mains} main(s), ${webhooks} webhook(s), ${workers} worker(s)`);
|
||||
|
||||
if (lbResult) {
|
||||
await pollContainerHttpEndpoint(lbResult.container, '/healthz/readiness');
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
{
|
||||
"version": 1,
|
||||
"generated": "2026-05-18T14:07:20.233Z",
|
||||
"generated": "2026-05-25T15:20:35.251Z",
|
||||
"totalViolations": 412,
|
||||
"violations": {
|
||||
"pages/AIAssistantPage.ts": [
|
||||
|
|
@ -50,13 +50,13 @@
|
|||
},
|
||||
{
|
||||
"rule": "scope-lockdown",
|
||||
"line": 99,
|
||||
"line": 65,
|
||||
"message": "InstanceAiPage: Unscoped locator - use this.container instead of this.page",
|
||||
"hash": "713916b2be0f"
|
||||
},
|
||||
{
|
||||
"rule": "scope-lockdown",
|
||||
"line": 144,
|
||||
"line": 140,
|
||||
"message": "InstanceAiPage: Unscoped locator - use this.container instead of this.page",
|
||||
"hash": "713916b2be0f"
|
||||
}
|
||||
|
|
@ -2556,7 +2556,7 @@
|
|||
"tests/infrastructure/benchmarks/kafka/queue-mode-sustained-rate.spec.ts": [
|
||||
{
|
||||
"rule": "duplicate-logic",
|
||||
"line": 17,
|
||||
"line": 18,
|
||||
"message": "Duplicate test logic: \"Kafka trigger + 1 noop, 1KB payload, 250 msg/s × 240s (1 main + 1 worker)\" has identical structure to tests/infrastructure/benchmarks/kafka/burst-drain-capacity.spec.ts:\"Kafka trigger + 1 noop, 1KB payload, drain 100k preloaded backlog (1 main + 1 worker)\"",
|
||||
"hash": "388616eecbaa"
|
||||
}
|
||||
|
|
@ -2564,7 +2564,7 @@
|
|||
"tests/infrastructure/benchmarks/kafka/single-instance-ceiling.spec.ts": [
|
||||
{
|
||||
"rule": "duplicate-logic",
|
||||
"line": 17,
|
||||
"line": 18,
|
||||
"message": "Duplicate test logic: \"Kafka trigger + 1 noop, 1KB payload, 150k msgs\" has identical structure to tests/infrastructure/benchmarks/kafka/burst-drain-capacity.spec.ts:\"Kafka trigger + 1 noop, 1KB payload, drain 100k preloaded backlog (1 main + 1 worker)\"",
|
||||
"hash": "1c5033214063"
|
||||
}
|
||||
|
|
@ -2572,7 +2572,7 @@
|
|||
"tests/infrastructure/benchmarks/kafka/steady-rate-breaking-point.spec.ts": [
|
||||
{
|
||||
"rule": "duplicate-logic",
|
||||
"line": 34,
|
||||
"line": 35,
|
||||
"message": "Duplicate test logic: \"anonymous\" has identical structure to tests/infrastructure/benchmarks/kafka/burst-drain-capacity.spec.ts:\"Kafka trigger + 1 noop, 1KB payload, drain 100k preloaded backlog (1 main + 1 worker)\"",
|
||||
"hash": "5579d3e330e1"
|
||||
}
|
||||
|
|
@ -2580,25 +2580,33 @@
|
|||
"tests/infrastructure/benchmarks/kafka/output-size-impact.spec.ts": [
|
||||
{
|
||||
"rule": "duplicate-logic",
|
||||
"line": 25,
|
||||
"line": 26,
|
||||
"message": "Duplicate test logic: \"anonymous\" has identical structure to tests/infrastructure/benchmarks/kafka/node-count-scaling.spec.ts:\"anonymous\"",
|
||||
"hash": "d676f700b46d"
|
||||
}
|
||||
],
|
||||
"tests/infrastructure/benchmarks/webhook/webhook-otel-overhead.spec.ts": [
|
||||
"tests/infrastructure/benchmarks/webhook/webhook-dedicated-proc-2wp-2w.spec.ts": [
|
||||
{
|
||||
"rule": "duplicate-logic",
|
||||
"line": 26,
|
||||
"message": "Duplicate test logic: \"anonymous\" has identical structure to tests/infrastructure/benchmarks/webhook/webhook-main-scaling.spec.ts:\"anonymous\"",
|
||||
"hash": "61c8c9b0f1ff"
|
||||
"line": 44,
|
||||
"message": "Duplicate test logic: \"anonymous\" has identical structure to tests/infrastructure/benchmarks/webhook/webhook-dedicated-proc-2wp-1w.spec.ts:\"anonymous\"",
|
||||
"hash": "bc07737c363b"
|
||||
}
|
||||
],
|
||||
"tests/infrastructure/benchmarks/webhook/webhook-dedicated-proc-baseline.spec.ts": [
|
||||
{
|
||||
"rule": "duplicate-logic",
|
||||
"line": 38,
|
||||
"message": "Duplicate test logic: \"anonymous\" has identical structure to tests/infrastructure/benchmarks/webhook/webhook-dedicated-proc-2wp-1w.spec.ts:\"anonymous\"",
|
||||
"hash": "20c97ee82b2c"
|
||||
}
|
||||
],
|
||||
"tests/infrastructure/benchmarks/webhook/webhook-single-instance.spec.ts": [
|
||||
{
|
||||
"rule": "duplicate-logic",
|
||||
"line": 27,
|
||||
"message": "Duplicate test logic: \"anonymous\" has identical structure to tests/infrastructure/benchmarks/webhook/webhook-queue-baseline.spec.ts:\"anonymous\"",
|
||||
"hash": "51d2d4fe3fdb"
|
||||
"message": "Duplicate test logic: \"anonymous\" has identical structure to tests/infrastructure/benchmarks/webhook/webhook-dedicated-proc-2wp-1w.spec.ts:\"anonymous\"",
|
||||
"hash": "46d8dc14acc3"
|
||||
}
|
||||
],
|
||||
"tests/infrastructure/benchmarks-local/instance-ai/thread-churn-delegation.spec.ts": [
|
||||
|
|
@ -2617,14 +2625,6 @@
|
|||
"hash": "3ac5475c5814"
|
||||
}
|
||||
],
|
||||
"tests/performance/memory-consumption-instance-ai.spec.ts": [
|
||||
{
|
||||
"rule": "duplicate-logic",
|
||||
"line": 22,
|
||||
"message": "Duplicate test logic: \"Idle baseline with Instance AI module loaded\" has identical structure to tests/performance/memory-consumption-agents.spec.ts:\"Idle baseline with Agents module loaded\"",
|
||||
"hash": "ef1cde8770e1"
|
||||
}
|
||||
],
|
||||
"tests/infrastructure/benchmarks-local/instance-ai/cancel-abort.spec.ts": [
|
||||
{
|
||||
"rule": "no-direct-page-instantiation",
|
||||
|
|
|
|||
|
|
@ -50,6 +50,7 @@ const CONTAINER_CONFIGS: Array<{ name: string; config: N8NConfig }> = [
|
|||
// postgres/kafka/redis/observability.
|
||||
export const BENCHMARK_MAIN_RESOURCES = { memory: 4, cpu: 2 };
|
||||
export const BENCHMARK_WORKER_RESOURCES = { memory: 2, cpu: 1 };
|
||||
export const BENCHMARK_WEBHOOK_RESOURCES = { memory: 4, cpu: 2 };
|
||||
|
||||
export const OBSERVABILITY_SERVICES = ['victoriaLogs', 'victoriaMetrics', 'vector'] as const;
|
||||
|
||||
|
|
@ -67,9 +68,10 @@ const BENCHMARK_CONFIG: N8NConfig = {
|
|||
postgres: true,
|
||||
resourceQuota: BENCHMARK_MAIN_RESOURCES,
|
||||
workerResourceQuota: BENCHMARK_WORKER_RESOURCES,
|
||||
// Distribute load across all mains. UI tests stick to the default `first`
|
||||
// policy so debugging hits a single predictable backend.
|
||||
lbPolicy: 'round_robin',
|
||||
webhookResourceQuota: BENCHMARK_WEBHOOK_RESOURCES,
|
||||
// `least_conn` avoids keep-alive affinity that skews round_robin 50/100% with
|
||||
// autocannon's long-lived connections across 2+ procs. UI tests use `first`.
|
||||
lbPolicy: 'least_conn',
|
||||
env: {
|
||||
N8N_LOG_LEVEL: 'error',
|
||||
N8N_DIAGNOSTICS_ENABLED: 'false',
|
||||
|
|
@ -96,6 +98,8 @@ export interface BenchOptions {
|
|||
mains?: number;
|
||||
/** Number of worker pods. Default: 0 (direct mode). */
|
||||
workers?: number;
|
||||
/** Dedicated `n8n webhook` procs. Forces queue mode when > 0. */
|
||||
webhooks?: number;
|
||||
/**
|
||||
* Adds the `tracing` service (Jaeger + n8n-tracer) and turns on OTEL emission.
|
||||
* Adds ~5-10% per-request overhead — opt in only when measuring OTEL cost or
|
||||
|
|
@ -118,8 +122,8 @@ export interface BenchOptions {
|
|||
* // Queue-mode kafka (1 main + 3 workers)
|
||||
* test.use({ capability: benchConfig('node-count-scaling', { kafka: true, workers: 3 }) });
|
||||
*
|
||||
* // Multi-main webhook
|
||||
* test.use({ capability: benchConfig('webhook-main-scaling', { mains: 2, workers: 2 }) });
|
||||
* // Dedicated webhook proc + worker
|
||||
* test.use({ capability: benchConfig('webhook-dedicated-proc', { webhooks: 1, workers: 1 }) });
|
||||
*/
|
||||
export function benchConfig(isolation: string, opts: BenchOptions = {}): N8NConfig {
|
||||
const services = [...(BENCHMARK_CONFIG.services ?? [])];
|
||||
|
|
@ -144,6 +148,7 @@ export function benchConfig(isolation: string, opts: BenchOptions = {}): N8NConf
|
|||
services,
|
||||
...(opts.mains !== undefined && { mains: opts.mains }),
|
||||
...(opts.workers !== undefined && { workers: opts.workers }),
|
||||
...(opts.webhooks !== undefined && { webhooks: opts.webhooks }),
|
||||
env,
|
||||
};
|
||||
}
|
||||
|
|
|
|||
414
packages/testing/playwright/scripts/sizing-matrix-aggregate.ts
Normal file
414
packages/testing/playwright/scripts/sizing-matrix-aggregate.ts
Normal file
|
|
@ -0,0 +1,414 @@
|
|||
/**
|
||||
* Aggregates N `run-report.json` files into a `sizing-matrix.json`.
|
||||
*
|
||||
* npx tsx packages/testing/playwright/scripts/sizing-matrix-aggregate.ts \
|
||||
* --input <dir> [--out <file>] [--mapping <file>]
|
||||
*
|
||||
* CURRENTS_API_KEY=… ... --currents-run <runId>
|
||||
*
|
||||
* Hardware defaults to the Blacksmith CI runner (8 vCPU / 16 GB). Override
|
||||
* via `--hardware-runner/--hardware-vcpu/--hardware-ram-gb` or
|
||||
* `SIZING_MATRIX_RUNNER/VCPU/RAM_GB` env when running off-CI, or the matrix
|
||||
* will mis-attribute the source.
|
||||
*/
|
||||
|
||||
import { readdirSync, readFileSync, writeFileSync, statSync, mkdirSync } from 'node:fs';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { dirname, join, resolve } from 'node:path';
|
||||
|
||||
const CURRENTS_API = 'https://api.currents.dev/v1';
|
||||
|
||||
import type { RunReport } from '../utils/benchmark/run-report';
|
||||
import {
|
||||
aggregate,
|
||||
renderMarkdown,
|
||||
type AggregateInput,
|
||||
type HardwareInfo,
|
||||
type SpecMapping,
|
||||
type Topology,
|
||||
} from '../utils/benchmark/sizing-matrix';
|
||||
|
||||
const DEFAULT_HARDWARE: HardwareInfo = {
|
||||
runner: 'blacksmith-8vcpu-ubuntu-2204',
|
||||
vcpu: 8,
|
||||
ramGb: 16,
|
||||
};
|
||||
|
||||
const S0_SINGLE_MAIN: Topology = {
|
||||
mains: 1,
|
||||
webhookProcs: 0,
|
||||
workers: 0,
|
||||
mainVcpu: 2,
|
||||
mainRamGb: 4,
|
||||
pgVcpu: 2,
|
||||
pgRamGb: 4,
|
||||
redisVcpu: 1,
|
||||
redisRamGb: 1,
|
||||
};
|
||||
|
||||
const S1_QUEUE_BASELINE: Topology = {
|
||||
...S0_SINGLE_MAIN,
|
||||
workers: 1,
|
||||
workerVcpu: 2,
|
||||
workerRamGb: 4,
|
||||
};
|
||||
|
||||
const S1_DEDICATED_PROC_BASELINE: Topology = {
|
||||
...S1_QUEUE_BASELINE,
|
||||
webhookProcs: 1,
|
||||
};
|
||||
|
||||
const S2_DEDICATED_PROC_2WP_1W: Topology = {
|
||||
...S1_DEDICATED_PROC_BASELINE,
|
||||
webhookProcs: 2,
|
||||
};
|
||||
|
||||
const S2_DEDICATED_PROC_2WP_2W: Topology = {
|
||||
...S2_DEDICATED_PROC_2WP_1W,
|
||||
workers: 2,
|
||||
};
|
||||
|
||||
// Webhook and kafka triggers collapse into the same cell — shape is workload
|
||||
// archetype, not ingress protocol.
|
||||
const DEFAULT_MAPPING: SpecMapping = {
|
||||
'webhook/webhook-single-instance.spec.ts': {
|
||||
scale: 'S0',
|
||||
shape: 'L',
|
||||
topology: S0_SINGLE_MAIN,
|
||||
},
|
||||
'webhook/webhook-dedicated-proc-baseline.spec.ts': {
|
||||
scale: 'S1',
|
||||
shape: 'L',
|
||||
topology: S1_DEDICATED_PROC_BASELINE,
|
||||
},
|
||||
'webhook/webhook-dedicated-proc-2wp-1w.spec.ts': {
|
||||
scale: 'S2',
|
||||
shape: 'L',
|
||||
topology: S2_DEDICATED_PROC_2WP_1W,
|
||||
},
|
||||
'webhook/webhook-dedicated-proc-2wp-2w.spec.ts': {
|
||||
scale: 'S2',
|
||||
shape: 'L',
|
||||
topology: S2_DEDICATED_PROC_2WP_2W,
|
||||
},
|
||||
'webhook/webhook-save-data-overhead.spec.ts': {
|
||||
scale: 'S1',
|
||||
shape: 'D',
|
||||
topology: S1_DEDICATED_PROC_BASELINE,
|
||||
},
|
||||
// `webhook-sync-latency-floor` is deliberately unmapped — measures latency at
|
||||
// fixed concurrency, not throughput, and distorts the S1-L distribution.
|
||||
'kafka/single-instance-ceiling.spec.ts': {
|
||||
scale: 'S0',
|
||||
shape: 'L',
|
||||
topology: S0_SINGLE_MAIN,
|
||||
},
|
||||
'kafka/queue-mode-sustained-rate.spec.ts': {
|
||||
scale: 'S1',
|
||||
shape: 'L',
|
||||
topology: S1_QUEUE_BASELINE,
|
||||
},
|
||||
'kafka/burst-drain-capacity.spec.ts': {
|
||||
scale: 'S1',
|
||||
shape: 'L',
|
||||
topology: S1_QUEUE_BASELINE,
|
||||
},
|
||||
'kafka/node-count-scaling.spec.ts': {
|
||||
scale: 'S1',
|
||||
shape: 'X',
|
||||
topology: S1_QUEUE_BASELINE,
|
||||
},
|
||||
'kafka/output-size-impact.spec.ts': {
|
||||
scale: 'S1',
|
||||
shape: 'D',
|
||||
topology: S1_QUEUE_BASELINE,
|
||||
},
|
||||
'kafka/steady-rate-breaking-point.spec.ts': {
|
||||
scale: 'S0',
|
||||
shape: 'X',
|
||||
topology: S0_SINGLE_MAIN,
|
||||
},
|
||||
};
|
||||
|
||||
interface CliArgs {
|
||||
input?: string;
|
||||
currentsRun?: string;
|
||||
mapping?: string;
|
||||
out: string;
|
||||
markdownOut?: string;
|
||||
n8nVersion: string;
|
||||
commitSha: string;
|
||||
hardware: HardwareInfo;
|
||||
}
|
||||
|
||||
function parseArgs(argv: string[]): CliArgs {
|
||||
const args: Record<string, string> = {};
|
||||
for (let i = 0; i < argv.length; i++) {
|
||||
const token = argv[i];
|
||||
if (!token?.startsWith('--')) continue;
|
||||
const key = token.slice(2);
|
||||
const value = argv[i + 1];
|
||||
if (value && !value.startsWith('--')) {
|
||||
args[key] = value;
|
||||
i++;
|
||||
} else {
|
||||
args[key] = 'true';
|
||||
}
|
||||
}
|
||||
if (!args.input && !args['currents-run']) {
|
||||
throw new Error('Either --input <dir> or --currents-run <runId> is required');
|
||||
}
|
||||
return {
|
||||
input: args.input ? resolve(args.input) : undefined,
|
||||
currentsRun: args['currents-run'],
|
||||
mapping: args.mapping ? resolve(args.mapping) : undefined,
|
||||
out: resolve(args.out ?? 'sizing-matrix.json'),
|
||||
markdownOut: args['markdown-out'] ? resolve(args['markdown-out']) : undefined,
|
||||
n8nVersion: args['n8n-version'] ?? readN8nVersion(),
|
||||
commitSha: args['commit-sha'] ?? readGitSha(),
|
||||
hardware: resolveHardware(args),
|
||||
};
|
||||
}
|
||||
|
||||
function resolveHardware(args: Record<string, string>): HardwareInfo {
|
||||
const runner =
|
||||
args['hardware-runner'] ?? process.env.SIZING_MATRIX_RUNNER ?? DEFAULT_HARDWARE.runner;
|
||||
const vcpu = args['hardware-vcpu']
|
||||
? Number(args['hardware-vcpu'])
|
||||
: process.env.SIZING_MATRIX_VCPU
|
||||
? Number(process.env.SIZING_MATRIX_VCPU)
|
||||
: DEFAULT_HARDWARE.vcpu;
|
||||
const ramGb = args['hardware-ram-gb']
|
||||
? Number(args['hardware-ram-gb'])
|
||||
: process.env.SIZING_MATRIX_RAM_GB
|
||||
? Number(process.env.SIZING_MATRIX_RAM_GB)
|
||||
: DEFAULT_HARDWARE.ramGb;
|
||||
if (!Number.isFinite(vcpu) || vcpu <= 0) {
|
||||
throw new Error(`--hardware-vcpu must be a positive number, got ${args['hardware-vcpu']}`);
|
||||
}
|
||||
if (!Number.isFinite(ramGb) || ramGb <= 0) {
|
||||
throw new Error(`--hardware-ram-gb must be a positive number, got ${args['hardware-ram-gb']}`);
|
||||
}
|
||||
return { runner, vcpu, ramGb };
|
||||
}
|
||||
|
||||
function readN8nVersion(): string {
|
||||
try {
|
||||
const pkg = JSON.parse(
|
||||
readFileSync(join(process.cwd(), 'packages/cli/package.json'), 'utf8'),
|
||||
) as { version?: string };
|
||||
return pkg.version ?? 'unknown';
|
||||
} catch {
|
||||
return 'unknown';
|
||||
}
|
||||
}
|
||||
|
||||
function readGitSha(): string {
|
||||
try {
|
||||
const head = readFileSync(join(process.cwd(), '.git/HEAD'), 'utf8').trim();
|
||||
if (head.startsWith('ref: ')) {
|
||||
const refPath = head.slice(5);
|
||||
return readFileSync(join(process.cwd(), '.git', refPath), 'utf8')
|
||||
.trim()
|
||||
.slice(0, 12);
|
||||
}
|
||||
return head.slice(0, 12);
|
||||
} catch {
|
||||
return 'unknown';
|
||||
}
|
||||
}
|
||||
|
||||
function loadMapping(path?: string): SpecMapping {
|
||||
if (!path) return DEFAULT_MAPPING;
|
||||
const overrides = JSON.parse(readFileSync(path, 'utf8')) as SpecMapping;
|
||||
return { ...DEFAULT_MAPPING, ...overrides };
|
||||
}
|
||||
|
||||
interface CurrentsAttachment {
|
||||
name: string;
|
||||
readUrl: string;
|
||||
contentType: string;
|
||||
}
|
||||
|
||||
interface CurrentsRunSpec {
|
||||
instanceId: string;
|
||||
spec: string;
|
||||
}
|
||||
|
||||
interface CurrentsRunDetails {
|
||||
commit?: { sha?: string };
|
||||
specs?: CurrentsRunSpec[];
|
||||
}
|
||||
|
||||
interface CurrentsInstanceDetails {
|
||||
spec?: string;
|
||||
commit?: { sha?: string };
|
||||
results?: { attachments?: CurrentsAttachment[] };
|
||||
}
|
||||
|
||||
async function currentsGet<T>(apiKey: string, path: string): Promise<T> {
|
||||
const res = await fetch(`${CURRENTS_API}${path}`, {
|
||||
headers: { Authorization: `Bearer ${apiKey}` },
|
||||
});
|
||||
if (!res.ok) {
|
||||
throw new Error(`Currents API ${path} failed: ${res.status} ${res.statusText}`);
|
||||
}
|
||||
const json = (await res.json()) as { data?: T };
|
||||
if (!json.data) throw new Error(`Currents API ${path} returned no data field`);
|
||||
return json.data;
|
||||
}
|
||||
|
||||
async function fetchCurrentsRun(
|
||||
runId: string,
|
||||
cacheDir: string,
|
||||
): Promise<{ reportPaths: string[]; commitSha?: string }> {
|
||||
const apiKey = process.env.CURRENTS_API_KEY;
|
||||
if (!apiKey) throw new Error('CURRENTS_API_KEY env var required for --currents-run');
|
||||
|
||||
console.log(`[sizing-matrix] Fetching Currents run ${runId}…`);
|
||||
const run = await currentsGet<CurrentsRunDetails>(apiKey, `/runs/${runId}`);
|
||||
const specs = run.specs ?? [];
|
||||
if (specs.length === 0) {
|
||||
throw new Error(`Currents run ${runId} has no specs.`);
|
||||
}
|
||||
console.log(`[sizing-matrix] Run contains ${specs.length} spec instances.`);
|
||||
mkdirSync(cacheDir, { recursive: true });
|
||||
|
||||
const reportPaths: string[] = [];
|
||||
for (const spec of specs) {
|
||||
const instance = await currentsGet<CurrentsInstanceDetails>(
|
||||
apiKey,
|
||||
`/instances/${spec.instanceId}`,
|
||||
);
|
||||
const attachment = instance.results?.attachments?.find((a) => a.name === 'run-report.json');
|
||||
if (!attachment) {
|
||||
console.warn(
|
||||
`[sizing-matrix] Spec ${spec.spec} (instance ${spec.instanceId}) has no run-report.json attachment — skipping.`,
|
||||
);
|
||||
continue;
|
||||
}
|
||||
const filename = `${sanitiseFilename(spec.spec)}.${spec.instanceId}.json`;
|
||||
const outPath = join(cacheDir, filename);
|
||||
await downloadTo(attachment.readUrl, outPath);
|
||||
reportPaths.push(outPath);
|
||||
}
|
||||
console.log(
|
||||
`[sizing-matrix] Downloaded ${reportPaths.length} run-report.json file(s) → ${cacheDir}`,
|
||||
);
|
||||
return { reportPaths, commitSha: run.commit?.sha?.slice(0, 12) };
|
||||
}
|
||||
|
||||
async function downloadTo(url: string, outPath: string): Promise<void> {
|
||||
const res = await fetch(url);
|
||||
if (!res.ok) throw new Error(`Download ${url} failed: ${res.status}`);
|
||||
const buf = Buffer.from(await res.arrayBuffer());
|
||||
writeFileSync(outPath, buf);
|
||||
}
|
||||
|
||||
function sanitiseFilename(spec: string): string {
|
||||
return spec.replace(/[^a-z0-9._-]+/gi, '-').slice(0, 120);
|
||||
}
|
||||
|
||||
function findRunReports(root: string): string[] {
|
||||
const found: string[] = [];
|
||||
const stack = [root];
|
||||
while (stack.length) {
|
||||
const current = stack.pop();
|
||||
if (!current) continue;
|
||||
const stat = statSync(current);
|
||||
if (stat.isFile()) {
|
||||
if (current.endsWith('.json') && current.includes('run-report')) found.push(current);
|
||||
continue;
|
||||
}
|
||||
if (!stat.isDirectory()) continue;
|
||||
for (const entry of readdirSync(current)) {
|
||||
stack.push(join(current, entry));
|
||||
}
|
||||
}
|
||||
return found.sort();
|
||||
}
|
||||
|
||||
function loadReport(path: string): { path: string; report: RunReport } | undefined {
|
||||
try {
|
||||
const report = JSON.parse(readFileSync(path, 'utf8')) as RunReport;
|
||||
if (report.schemaVersion !== 1) {
|
||||
console.warn(`[sizing-matrix] Skipping ${path}: schemaVersion ${report.schemaVersion} != 1`);
|
||||
return undefined;
|
||||
}
|
||||
return { path, report };
|
||||
} catch (error) {
|
||||
console.warn(`[sizing-matrix] Failed to parse ${path}: ${(error as Error).message}`);
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
|
||||
async function main(): Promise<void> {
|
||||
const args = parseArgs(process.argv.slice(2));
|
||||
const mapping = loadMapping(args.mapping);
|
||||
|
||||
let reportPaths: string[];
|
||||
let commitSha = args.commitSha;
|
||||
if (args.currentsRun) {
|
||||
const cacheDir = join(tmpdir(), `sizing-aggregator-cache`, args.currentsRun);
|
||||
const fetched = await fetchCurrentsRun(args.currentsRun, cacheDir);
|
||||
reportPaths = fetched.reportPaths;
|
||||
if (fetched.commitSha) commitSha = fetched.commitSha;
|
||||
} else if (args.input) {
|
||||
reportPaths = findRunReports(args.input);
|
||||
} else {
|
||||
throw new Error('Either --input or --currents-run is required');
|
||||
}
|
||||
|
||||
const reports = reportPaths
|
||||
.map(loadReport)
|
||||
.filter((r): r is { path: string; report: RunReport } => r !== undefined);
|
||||
|
||||
if (reports.length === 0) {
|
||||
throw new Error(`No valid run-report.json files found`);
|
||||
}
|
||||
console.log(`[sizing-matrix] Aggregating ${reports.length} run-report.json files…`);
|
||||
|
||||
const input: AggregateInput = {
|
||||
reports,
|
||||
mapping,
|
||||
hardware: args.hardware,
|
||||
n8nVersion: args.n8nVersion,
|
||||
commitSha,
|
||||
runDate: new Date().toISOString(),
|
||||
};
|
||||
const matrix = aggregate(input);
|
||||
|
||||
mkdirSync(dirname(args.out), { recursive: true });
|
||||
writeFileSync(args.out, JSON.stringify(matrix, null, 2));
|
||||
console.log(`[sizing-matrix] Wrote ${matrix.cells.length} cell(s) → ${args.out}`);
|
||||
|
||||
if (args.markdownOut) {
|
||||
mkdirSync(dirname(args.markdownOut), { recursive: true });
|
||||
writeFileSync(args.markdownOut, renderMarkdown(matrix));
|
||||
console.log(`[sizing-matrix] Rendered markdown guide → ${args.markdownOut}`);
|
||||
}
|
||||
|
||||
for (const cell of matrix.cells) {
|
||||
const shapeKeys = Object.keys(cell.shapes);
|
||||
console.log(` ${cell.scale}: shapes [${shapeKeys.join(', ')}]`);
|
||||
for (const shape of shapeKeys) {
|
||||
const r = cell.shapes[shape as keyof typeof cell.shapes];
|
||||
if (!r) continue;
|
||||
console.log(
|
||||
` ${shape}: ceiling=${r.ceilingExecPerSec.p50.toFixed(1)}/s ` +
|
||||
`green=${r.greenSustainedExecPerSec.toFixed(1)}/s ` +
|
||||
`p99=${r.latency.p99.toFixed(0)}ms ` +
|
||||
`workloadIops=${r.io.workloadIopsPerSec.toFixed(0)} ` +
|
||||
`overhead=${r.io.overheadFactor.toFixed(2)}x ` +
|
||||
`bottleneck=${r.bottleneck} ` +
|
||||
`verdict=${r.verdict} ` +
|
||||
`(n=${r.ceilingExecPerSec.n})`,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
main().catch((error) => {
|
||||
console.error(`[sizing-matrix] ${(error as Error).message}`);
|
||||
process.exit(1);
|
||||
});
|
||||
|
|
@ -18,24 +18,26 @@ The architectural ceiling. No queue tax, no worker dispatch. What's the absolute
|
|||
| kafka | `steady-rate-breaking-point.spec.ts` | At what input rate does the system fall behind? |
|
||||
| webhook | `webhook-single-instance.spec.ts` | What is the single-instance webhook ingestion ceiling? |
|
||||
|
||||
### Actual — `1m + 1w queue mode`
|
||||
### Actual — `1m + 1wp + 1w queue mode`
|
||||
|
||||
The real-world minimum HA topology. What does a basic production setup actually deliver?
|
||||
The production-canonical queue-mode topology: dedicated `n8n webhook` proc fronted by Caddy path-routing, with one worker draining the queue. What does a real production setup actually deliver?
|
||||
|
||||
| Trigger | Spec | Question |
|
||||
|---------|------|----------|
|
||||
| webhook | `webhook-dedicated-proc-baseline.spec.ts` | What is the webhook ingestion ceiling with a dedicated webhook proc? |
|
||||
| kafka | `queue-mode-sustained-rate.spec.ts` | Can queue mode sustain 250 msg/s steady? |
|
||||
| kafka | `burst-drain-capacity.spec.ts` | How fast can we drain a backlog? |
|
||||
| kafka | `node-count-scaling.spec.ts` | How does throughput scale with workflow complexity? |
|
||||
| kafka | `output-size-impact.spec.ts` | What is the impact of node output size on throughput? |
|
||||
|
||||
### Scaling — `2m + 2w queue mode`
|
||||
### Scaling — proc-axis and worker-axis at production topology
|
||||
|
||||
HA distribution check. Does doubling capacity ~double the actual baseline?
|
||||
How does the production topology scale when you add a webhook proc, a worker, or both?
|
||||
|
||||
| Trigger | Spec | Question |
|
||||
|---------|------|----------|
|
||||
| webhook | `webhook-main-scaling.spec.ts` | Does webhook ingestion scale linearly with main count? |
|
||||
| Trigger | Spec | Topology | Question |
|
||||
|---------|------|----------|----------|
|
||||
| webhook | `webhook-dedicated-proc-2wp-1w.spec.ts` | 1m + 2wp + 1w | Does doubling webhook procs (workers fixed) increase ingestion throughput? |
|
||||
| webhook | `webhook-dedicated-proc-2wp-2w.spec.ts` | 1m + 2wp + 2w | What is the joint scale-up of doubling both webhook procs and workers? |
|
||||
|
||||
### Cost — feature toggles on the actual baseline
|
||||
|
||||
|
|
@ -46,16 +48,18 @@ What does turning on configuration X cost vs the baseline?
|
|||
| webhook | `webhook-otel-overhead.spec.ts` | What is the runtime cost of enabling OTEL? |
|
||||
| webhook | `webhook-save-data-overhead.spec.ts` | What is the runtime cost of saving execution data on success? |
|
||||
|
||||
Cost specs run the same workload as a baseline spec with one config knob flipped. Compare the `exec/s`/`p50` of a Cost spec against its baseline from the same CI run to read the cost. OTEL specs also attach `jaeger-traces.json` as a test artifact — replay locally for flamegraph inspection.
|
||||
Cost specs run the same workload as the `Actual` baseline with one config knob flipped. Compare the `exec/s`/`p50` of a Cost spec against `webhook-dedicated-proc-baseline` from the same CI run to read the cost. OTEL specs also attach `jaeger-traces.json` as a test artifact — replay locally for flamegraph inspection.
|
||||
|
||||
## Standard topology
|
||||
|
||||
| Tier | Mains | Workers | Per-pod resources |
|
||||
|------|-------|---------|-------------------|
|
||||
| **Peak** | 1 | 0 | 4GB / 2 vCPU |
|
||||
| **Actual** | 1 | 1 | main 4GB/2 vCPU, worker 2GB/1 vCPU |
|
||||
| **Scaling** | 2 | 2 | main 4GB/2 vCPU, worker 2GB/1 vCPU |
|
||||
| **Cost** | matches the baseline | matches the baseline | matches the baseline |
|
||||
| Tier | Mains | Webhook procs | Workers | Per-pod resources |
|
||||
|------|-------|---------------|---------|-------------------|
|
||||
| **Peak** | 1 | 0 | 0 | 4GB / 2 vCPU |
|
||||
| **Actual** | 1 | 0–1 | 1 | main 4GB/2 vCPU, webhook 4GB/2 vCPU, worker 2GB/1 vCPU |
|
||||
| **Scaling** | 1 | 2 | 1–2 | main 4GB/2 vCPU, webhook 4GB/2 vCPU, worker 2GB/1 vCPU |
|
||||
| **Cost** | matches the baseline | matches the baseline | matches the baseline | matches the baseline |
|
||||
|
||||
Webhook-trigger specs in **Actual** and **Scaling** use the production-canonical topology (dedicated `n8n webhook` proc fronted by Caddy path-routing). Kafka-trigger specs in **Actual** use 1m + 1w queue mode (kafka doesn't ingress via HTTP — no dedicated webhook proc applicable).
|
||||
|
||||
All specs share a single env profile aligned with internal n8n production defaults — connection-pool, lock-duration, and Bull/Redis tuning from real deployments. See `BENCHMARK_CONFIG` in `playwright-projects.ts`.
|
||||
|
||||
|
|
@ -65,7 +69,7 @@ All specs share a single env profile aligned with internal n8n production defaul
|
|||
# Build n8n image first (skip if you only changed test code).
|
||||
pnpm build:docker
|
||||
|
||||
# Full suite — all 9 specs sequentially (each spawns its own container).
|
||||
# Full suite — all 14 specs sequentially (each spawns its own container).
|
||||
pnpm --filter=n8n-playwright test:benchmark
|
||||
|
||||
# One spec.
|
||||
|
|
|
|||
|
|
@ -3,33 +3,34 @@ import { benchConfig } from '../../../../playwright-projects';
|
|||
import { setupWebhook } from '../../../../utils/benchmark/webhook-driver';
|
||||
import { runWebhookThroughputTest } from '../harness/webhook-throughput-harness';
|
||||
|
||||
// Queue-mode baseline at 1 main + 1 worker. Pair with `webhook-main-scaling`
|
||||
// (2 mains + 2 workers) to read the multi-main scaling factor isolated from
|
||||
// the direct→queue architecture change. Without this baseline, comparing
|
||||
// `webhook-single-instance` (1m, 0w direct) to `webhook-main-scaling` would
|
||||
// conflate "adding mains" with "switching to queue mode".
|
||||
//
|
||||
// Parameters intentionally match `webhook-main-scaling.spec.ts` (200 × 180s)
|
||||
// so the comparison is apples-to-apples.
|
||||
// Workers fixed, webhook procs doubled — isolates the ingestion-axis ceiling
|
||||
// vs the baseline.
|
||||
|
||||
const MAINS = 1;
|
||||
const WEBHOOKS = 2;
|
||||
const WORKERS = 1;
|
||||
const CONNECTIONS = 200;
|
||||
const DURATION_SECONDS = 180;
|
||||
|
||||
test.use({ capability: benchConfig('webhook-queue-baseline', { mains: MAINS, workers: WORKERS }) });
|
||||
test.use({
|
||||
capability: benchConfig('webhook-dedicated-proc-2wp-1w', {
|
||||
mains: MAINS,
|
||||
webhooks: WEBHOOKS,
|
||||
workers: WORKERS,
|
||||
}),
|
||||
});
|
||||
|
||||
test.describe(
|
||||
'What is the webhook ingestion ceiling in queue mode at 1 main + 1 worker?',
|
||||
'Does doubling webhook procs (workers fixed) increase ingestion throughput?',
|
||||
{
|
||||
tag: '@bench:webhook',
|
||||
annotation: [
|
||||
{ type: 'owner', description: 'Catalysts' },
|
||||
{ type: 'question', description: 'webhook-queue-baseline' },
|
||||
{ type: 'question', description: 'webhook-dedicated-proc-2wp-1w' },
|
||||
],
|
||||
},
|
||||
() => {
|
||||
test(`Async webhook + 1 noop, 1KB payload, ${CONNECTIONS} connections × ${DURATION_SECONDS}s (${MAINS} main + ${WORKERS} worker)`, async ({
|
||||
test(`Async webhook + 1 noop, 1KB payload, ${CONNECTIONS} connections × ${DURATION_SECONDS}s (${MAINS} main + ${WEBHOOKS} webhook + ${WORKERS} worker)`, async ({
|
||||
api,
|
||||
services,
|
||||
backendUrl,
|
||||
|
|
@ -0,0 +1,58 @@
|
|||
import { test } from '../../../../fixtures/base';
|
||||
import { benchConfig } from '../../../../playwright-projects';
|
||||
import { setupWebhook } from '../../../../utils/benchmark/webhook-driver';
|
||||
import { runWebhookThroughputTest } from '../harness/webhook-throughput-harness';
|
||||
|
||||
// Both knobs doubled. With baseline and 2wp-1w, factor marginal worker gain
|
||||
// (E3 − E2) and marginal proc gain (E2 − E1).
|
||||
|
||||
const MAINS = 1;
|
||||
const WEBHOOKS = 2;
|
||||
const WORKERS = 2;
|
||||
const CONNECTIONS = 200;
|
||||
const DURATION_SECONDS = 180;
|
||||
|
||||
test.use({
|
||||
capability: benchConfig('webhook-dedicated-proc-2wp-2w', {
|
||||
mains: MAINS,
|
||||
webhooks: WEBHOOKS,
|
||||
workers: WORKERS,
|
||||
}),
|
||||
});
|
||||
|
||||
test.describe(
|
||||
'What is the joint scale-up of doubling both webhook procs and workers?',
|
||||
{
|
||||
tag: '@bench:webhook',
|
||||
annotation: [
|
||||
{ type: 'owner', description: 'Catalysts' },
|
||||
{ type: 'question', description: 'webhook-dedicated-proc-2wp-2w' },
|
||||
],
|
||||
},
|
||||
() => {
|
||||
test(`Async webhook + 1 noop, 1KB payload, ${CONNECTIONS} connections × ${DURATION_SECONDS}s (${MAINS} main + ${WEBHOOKS} webhook + ${WORKERS} worker)`, async ({
|
||||
api,
|
||||
services,
|
||||
backendUrl,
|
||||
}, testInfo) => {
|
||||
const handle = setupWebhook({
|
||||
scenario: {
|
||||
nodeCount: 1,
|
||||
payloadSize: '1KB',
|
||||
nodeOutputSize: 'noop',
|
||||
responseMode: 'onReceived',
|
||||
},
|
||||
});
|
||||
await runWebhookThroughputTest({
|
||||
handle,
|
||||
api,
|
||||
services,
|
||||
testInfo,
|
||||
baseUrl: backendUrl,
|
||||
connections: CONNECTIONS,
|
||||
durationSeconds: DURATION_SECONDS,
|
||||
timeoutMs: (DURATION_SECONDS + 60) * 1000,
|
||||
});
|
||||
});
|
||||
},
|
||||
);
|
||||
|
|
@ -0,0 +1,58 @@
|
|||
import { test } from '../../../../fixtures/base';
|
||||
import { benchConfig } from '../../../../playwright-projects';
|
||||
import { setupWebhook } from '../../../../utils/benchmark/webhook-driver';
|
||||
import { runWebhookThroughputTest } from '../harness/webhook-throughput-harness';
|
||||
|
||||
// Production-canonical queue-mode baseline: 1m + 1wp + 1w. Pair with the
|
||||
// 2wp-1w and 2wp-2w specs to factor proc-axis and worker-axis scaling.
|
||||
|
||||
const MAINS = 1;
|
||||
const WEBHOOKS = 1;
|
||||
const WORKERS = 1;
|
||||
const CONNECTIONS = 200;
|
||||
const DURATION_SECONDS = 180;
|
||||
|
||||
test.use({
|
||||
capability: benchConfig('webhook-dedicated-proc-baseline', {
|
||||
mains: MAINS,
|
||||
webhooks: WEBHOOKS,
|
||||
workers: WORKERS,
|
||||
}),
|
||||
});
|
||||
|
||||
test.describe(
|
||||
'What is the webhook ingestion ceiling with a dedicated webhook proc?',
|
||||
{
|
||||
tag: '@bench:webhook',
|
||||
annotation: [
|
||||
{ type: 'owner', description: 'Catalysts' },
|
||||
{ type: 'question', description: 'webhook-dedicated-proc-baseline' },
|
||||
],
|
||||
},
|
||||
() => {
|
||||
test(`Async webhook + 1 noop, 1KB payload, ${CONNECTIONS} connections × ${DURATION_SECONDS}s (${MAINS} main + ${WEBHOOKS} webhook + ${WORKERS} worker)`, async ({
|
||||
api,
|
||||
services,
|
||||
backendUrl,
|
||||
}, testInfo) => {
|
||||
const handle = setupWebhook({
|
||||
scenario: {
|
||||
nodeCount: 1,
|
||||
payloadSize: '1KB',
|
||||
nodeOutputSize: 'noop',
|
||||
responseMode: 'onReceived',
|
||||
},
|
||||
});
|
||||
await runWebhookThroughputTest({
|
||||
handle,
|
||||
api,
|
||||
services,
|
||||
testInfo,
|
||||
baseUrl: backendUrl,
|
||||
connections: CONNECTIONS,
|
||||
durationSeconds: DURATION_SECONDS,
|
||||
timeoutMs: (DURATION_SECONDS + 60) * 1000,
|
||||
});
|
||||
});
|
||||
},
|
||||
);
|
||||
|
|
@ -1,70 +0,0 @@
|
|||
import { test } from '../../../../fixtures/base';
|
||||
import { benchConfig } from '../../../../playwright-projects';
|
||||
import { setupWebhook } from '../../../../utils/benchmark/webhook-driver';
|
||||
import { runWebhookThroughputTest } from '../harness/webhook-throughput-harness';
|
||||
|
||||
// 2 mains + 2 workers fits within the CI runner's CPU budget while still
|
||||
// exercising multi-main HA. Pair with `webhook-queue-baseline.spec.ts`
|
||||
// (1m + 1w queue mode) — NOT `webhook-single-instance.spec.ts` (1m + 0w
|
||||
// direct mode) — to read the main-scaling factor cleanly. Comparing against
|
||||
// the direct-mode spec would conflate "adding a main" with "switching from
|
||||
// direct to queue execution" since the architectures differ.
|
||||
//
|
||||
// Multi-main HA requires N8N_MULTI_MAIN_SETUP_ENABLED and a license cert
|
||||
// (picked up from N8N_LICENSE_ACTIVATION_KEY / N8N_LICENSE_CERT in the host
|
||||
// env). `benchConfig()` enables the env var automatically when `mains > 1`.
|
||||
|
||||
const MAINS = 2;
|
||||
const WORKERS = 2;
|
||||
const CONNECTIONS = 200;
|
||||
const DURATION_SECONDS = 180;
|
||||
|
||||
test.use({ capability: benchConfig('webhook-main-scaling', { mains: MAINS, workers: WORKERS }) });
|
||||
|
||||
test.describe(
|
||||
'Does webhook ingestion scale linearly with main count?',
|
||||
{
|
||||
tag: '@bench:webhook',
|
||||
annotation: [
|
||||
{ type: 'owner', description: 'Catalysts' },
|
||||
{ type: 'question', description: 'webhook-main-scaling' },
|
||||
],
|
||||
},
|
||||
() => {
|
||||
// Async webhook so HTTP req/s = ingestion ACK rate (independent of worker drain).
|
||||
// Baseline (1m + 1w queue mode): see webhook-queue-baseline.spec.ts.
|
||||
// At N mains, holding worker count proportional: should be ~N× baseline if
|
||||
// load distributes evenly. Sub-linear means LB or shared resource
|
||||
// contention; super-linear is impossible.
|
||||
test(`Async webhook + 1 noop, 1KB payload, ${CONNECTIONS} connections × ${DURATION_SECONDS}s (${MAINS} mains + ${WORKERS} workers)`, async ({
|
||||
api,
|
||||
services,
|
||||
backendUrl,
|
||||
}, testInfo) => {
|
||||
const handle = setupWebhook({
|
||||
scenario: {
|
||||
nodeCount: 1,
|
||||
payloadSize: '1KB',
|
||||
nodeOutputSize: 'noop',
|
||||
responseMode: 'onReceived',
|
||||
},
|
||||
});
|
||||
await runWebhookThroughputTest({
|
||||
handle,
|
||||
api,
|
||||
services,
|
||||
testInfo,
|
||||
baseUrl: backendUrl,
|
||||
connections: CONNECTIONS,
|
||||
durationSeconds: DURATION_SECONDS,
|
||||
timeoutMs: (DURATION_SECONDS + 60) * 1000,
|
||||
});
|
||||
|
||||
console.log(
|
||||
`\n[MAIN SCALING] mains=${MAINS}, workers=${WORKERS} | Compare HTTP req/s vs webhook-queue-baseline (1m+1w).\n` +
|
||||
' Linear scaling = ingestion req/s grows ~N× with mains.\n' +
|
||||
' Sub-linear = load balancer / shared resource bottleneck.',
|
||||
);
|
||||
});
|
||||
},
|
||||
);
|
||||
|
|
@ -4,18 +4,20 @@ import { setupWebhook } from '../../../../utils/benchmark/webhook-driver';
|
|||
import { runWebhookThroughputTest } from '../harness/webhook-throughput-harness';
|
||||
|
||||
const MAINS = 1;
|
||||
const WEBHOOKS = 1;
|
||||
const WORKERS = 1;
|
||||
const CONNECTIONS = 200;
|
||||
const DURATION_SECONDS = 180;
|
||||
|
||||
// Same scenario as `webhook-queue-baseline.spec.ts` but with execution data
|
||||
// persisted on success. Compare this run's `exec/s` and `p50` against the
|
||||
// queue-baseline from the same CI run to read the cost of saving execution
|
||||
// data — the typical production default the rest of the bench suite suppresses
|
||||
// for clean ceiling numbers.
|
||||
// Same scenario as `webhook-dedicated-proc-baseline.spec.ts` but with execution
|
||||
// data persisted on success. Compare this run's `exec/s` and `p50` against the
|
||||
// dedicated-proc baseline from the same CI run to read the cost of saving
|
||||
// execution data — the typical production default the rest of the bench suite
|
||||
// suppresses for clean ceiling numbers.
|
||||
test.use({
|
||||
capability: benchConfig('webhook-save-data-overhead', {
|
||||
mains: MAINS,
|
||||
webhooks: WEBHOOKS,
|
||||
workers: WORKERS,
|
||||
env: { EXECUTIONS_DATA_SAVE_ON_SUCCESS: 'all' },
|
||||
}),
|
||||
|
|
@ -31,7 +33,7 @@ test.describe(
|
|||
],
|
||||
},
|
||||
() => {
|
||||
test(`Async webhook + 1 noop, 1KB payload, ${CONNECTIONS} connections × ${DURATION_SECONDS}s (${MAINS} main + ${WORKERS} worker, save-on-success)`, async ({
|
||||
test(`Async webhook + 1 noop, 1KB payload, ${CONNECTIONS} connections × ${DURATION_SECONDS}s (${MAINS} main + ${WEBHOOKS} webhook + ${WORKERS} worker, save-on-success)`, async ({
|
||||
api,
|
||||
services,
|
||||
backendUrl,
|
||||
|
|
@ -56,7 +58,7 @@ test.describe(
|
|||
|
||||
console.log(
|
||||
"\n[SAVE-DATA OVERHEAD] Compare this spec's exec/s and p50 against " +
|
||||
'webhook-queue-baseline from the same run for the save-execution-data cost delta.\n',
|
||||
'webhook-dedicated-proc-baseline from the same run for the save-execution-data cost delta.\n',
|
||||
);
|
||||
});
|
||||
},
|
||||
|
|
|
|||
|
|
@ -9,10 +9,9 @@ const DURATION_SECONDS = 120;
|
|||
// Direct mode: no Bull, no workers. Webhook receives → workflow runs inline on
|
||||
// the same Node.js process → respond. Async (`onReceived`) returns the 200
|
||||
// before execution completes; the workflow runs as a detached promise on the
|
||||
// same event loop. This is the canonical single-instance direct-mode ceiling.
|
||||
// For multi-main scaling comparisons, use `webhook-queue-baseline.spec.ts`
|
||||
// (1m + 1w queue mode) — comparing direct mode to queue mode mixes
|
||||
// architecture changes with main-count scaling.
|
||||
// same event loop. This is the canonical single-instance direct-mode ceiling
|
||||
// — the community-edition / single-container deployment shape. For queue-mode
|
||||
// shapes, see the `webhook-dedicated-proc-*` specs.
|
||||
test.use({ capability: benchConfig('webhook-single-instance') });
|
||||
|
||||
test.describe(
|
||||
|
|
|
|||
622
packages/testing/playwright/utils/benchmark/sizing-matrix.ts
Normal file
622
packages/testing/playwright/utils/benchmark/sizing-matrix.ts
Normal file
|
|
@ -0,0 +1,622 @@
|
|||
/**
|
||||
* Aggregated sizing-matrix artifact emitted from N `run-report.json` files.
|
||||
*
|
||||
* Schema lock: DEVP-185. The matrix is two-axis — scale (S0–S4 throughput
|
||||
* slices) × shape (L/D/A/X workload archetypes). Each cell carries a
|
||||
* topology (output, not input) and a per-shape result that collapses ≥3
|
||||
* cold runs into `{p50, p95, max, n}`. v0 tolerates `n=1` cells and marks
|
||||
* verdict='amber' so the renderer can flag low-confidence data.
|
||||
*/
|
||||
|
||||
import type { RunReport, ServiceMetrics } from './run-report';
|
||||
|
||||
export type Scale = 'S0' | 'S1' | 'S2' | 'S3' | 'S4';
|
||||
export type Shape = 'L' | 'D' | 'A' | 'X';
|
||||
export type Verdict = 'green' | 'amber' | 'red';
|
||||
export type Bottleneck = 'main-cpu' | 'pg-cpu' | 'worker-cpu' | 'queue' | 'network';
|
||||
|
||||
export interface Topology {
|
||||
mains: number;
|
||||
webhookProcs: number;
|
||||
workers: number;
|
||||
mainVcpu: number;
|
||||
mainRamGb: number;
|
||||
workerVcpu?: number;
|
||||
workerRamGb?: number;
|
||||
pgVcpu: number;
|
||||
pgRamGb: number;
|
||||
pgIops?: number;
|
||||
redisVcpu: number;
|
||||
redisRamGb: number;
|
||||
}
|
||||
|
||||
export interface HardwareInfo {
|
||||
runner: string;
|
||||
vcpu: number;
|
||||
ramGb: number;
|
||||
}
|
||||
|
||||
export interface SourceRun {
|
||||
runReportPath: string;
|
||||
commitSha: string;
|
||||
spec: string;
|
||||
}
|
||||
|
||||
export interface PercentileSummary {
|
||||
p50: number;
|
||||
p95: number;
|
||||
max: number;
|
||||
n: number;
|
||||
}
|
||||
|
||||
export interface ShapeResult {
|
||||
ceilingExecPerSec: PercentileSummary;
|
||||
/** Steady-state ceiling — closer to the architectural limit than total exec/sec. */
|
||||
tailExecPerSec?: PercentileSummary;
|
||||
reqPerSec?: PercentileSummary;
|
||||
latency: { p50: number; p975: number; p99: number };
|
||||
headroomAtCeiling: {
|
||||
mainCpuPct: number;
|
||||
workerCpuPct?: number;
|
||||
pgCpuPct: number;
|
||||
pgCpuPctPeak: number;
|
||||
pgBufferHit: number;
|
||||
eventLoopLagMs: number;
|
||||
};
|
||||
io: {
|
||||
/** `pg_stat_io` `client backend` (reads+writes+extends) / sec. */
|
||||
workloadIopsPerSec: number;
|
||||
/** Sum of bgwriter + checkpointer + walwriter + autovacuum + standalone, per sec. */
|
||||
overheadIopsPerSec: number;
|
||||
/** Ratio total / workload. ~1.0 means workload-dominated; >1.5 means overhead-heavy. */
|
||||
overheadFactor: number;
|
||||
walMbPerSec: number;
|
||||
walRecordsPerSec: number;
|
||||
};
|
||||
bottleneck: Bottleneck;
|
||||
verdict: Verdict;
|
||||
/** Derived: throughput at which all headroom signals would stay green. */
|
||||
greenSustainedExecPerSec: number;
|
||||
sourceRuns: SourceRun[];
|
||||
}
|
||||
|
||||
export interface SizingCell {
|
||||
scale: Scale;
|
||||
topology: Topology;
|
||||
shapes: Partial<Record<Shape, ShapeResult>>;
|
||||
}
|
||||
|
||||
export interface SizingMatrix {
|
||||
schemaVersion: 1;
|
||||
n8nVersion: string;
|
||||
commitSha: string;
|
||||
runDate: string;
|
||||
hardware: HardwareInfo;
|
||||
cells: SizingCell[];
|
||||
}
|
||||
|
||||
/** Maps each benchmark spec path to its cell coordinates. */
|
||||
export interface SpecMapping {
|
||||
[specPath: string]: {
|
||||
scale: Scale;
|
||||
shape: Shape;
|
||||
topology: Topology;
|
||||
};
|
||||
}
|
||||
|
||||
export interface AggregateInput {
|
||||
reports: Array<{ path: string; report: RunReport }>;
|
||||
mapping: SpecMapping;
|
||||
hardware: HardwareInfo;
|
||||
n8nVersion: string;
|
||||
commitSha: string;
|
||||
runDate?: string;
|
||||
}
|
||||
|
||||
const GREEN_THRESHOLDS = {
|
||||
mainCpuPct: 75,
|
||||
pgCpuPct: 70,
|
||||
eventLoopLagMs: 25,
|
||||
httpP99Ms: 100,
|
||||
} as const;
|
||||
|
||||
const AMBER_THRESHOLDS = {
|
||||
mainCpuPct: 85,
|
||||
pgCpuPct: 85,
|
||||
eventLoopLagMs: 100,
|
||||
httpP99Ms: 500,
|
||||
} as const;
|
||||
|
||||
export function aggregate(input: AggregateInput): SizingMatrix {
|
||||
const groups = groupByCell(input.reports, input.mapping);
|
||||
const cells = Array.from(groups.entries())
|
||||
.map(([key, entries]) => buildCell(key, entries))
|
||||
.sort(byScale);
|
||||
|
||||
return {
|
||||
schemaVersion: 1,
|
||||
n8nVersion: input.n8nVersion,
|
||||
commitSha: input.commitSha,
|
||||
runDate: input.runDate ?? new Date().toISOString(),
|
||||
hardware: input.hardware,
|
||||
cells,
|
||||
};
|
||||
}
|
||||
|
||||
interface CellGroupEntry {
|
||||
scale: Scale;
|
||||
shape: Shape;
|
||||
topology: Topology;
|
||||
path: string;
|
||||
report: RunReport;
|
||||
}
|
||||
|
||||
function groupByCell(
|
||||
reports: Array<{ path: string; report: RunReport }>,
|
||||
mapping: SpecMapping,
|
||||
): Map<Scale, CellGroupEntry[]> {
|
||||
const byScale = new Map<Scale, CellGroupEntry[]>();
|
||||
for (const { path, report } of reports) {
|
||||
const cellCoord = findMapping(path, report, mapping);
|
||||
if (!cellCoord) {
|
||||
console.warn(
|
||||
`[sizing-matrix] No mapping for spec "${report.scenario.spec}" (file: ${path}) — skipping.`,
|
||||
);
|
||||
continue;
|
||||
}
|
||||
const entry: CellGroupEntry = { ...cellCoord, path, report };
|
||||
const existing = byScale.get(cellCoord.scale) ?? [];
|
||||
existing.push(entry);
|
||||
byScale.set(cellCoord.scale, existing);
|
||||
}
|
||||
return byScale;
|
||||
}
|
||||
|
||||
// First-match-wins: tries path, spec title, and normalised title against each key.
|
||||
function findMapping(path: string, report: RunReport, mapping: SpecMapping) {
|
||||
const candidates = [path, report.scenario.spec, normaliseSpecTitle(report.scenario.spec)];
|
||||
for (const [key, value] of Object.entries(mapping)) {
|
||||
const fragment = key.toLowerCase();
|
||||
const fileStem =
|
||||
key
|
||||
.split('/')
|
||||
.pop()
|
||||
?.replace(/\.spec\.ts$/, '') ?? '';
|
||||
const stemFragment = fileStem.toLowerCase();
|
||||
for (const candidate of candidates) {
|
||||
const haystack = candidate.toLowerCase();
|
||||
if (haystack.includes(fragment)) return value;
|
||||
if (stemFragment && haystack.includes(stemFragment)) return value;
|
||||
}
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function normaliseSpecTitle(title: string): string {
|
||||
return title.toLowerCase().replace(/[^a-z0-9]+/g, '-');
|
||||
}
|
||||
|
||||
function buildCell(scale: Scale, entries: CellGroupEntry[]): SizingCell {
|
||||
const topology = entries[0]?.topology;
|
||||
if (!topology) throw new Error(`No entries for scale ${scale}`);
|
||||
|
||||
const byShape = new Map<Shape, CellGroupEntry[]>();
|
||||
for (const entry of entries) {
|
||||
const list = byShape.get(entry.shape) ?? [];
|
||||
list.push(entry);
|
||||
byShape.set(entry.shape, list);
|
||||
}
|
||||
|
||||
const shapes: SizingCell['shapes'] = {};
|
||||
for (const [shape, group] of byShape.entries()) {
|
||||
shapes[shape] = buildShapeResult(group);
|
||||
}
|
||||
|
||||
return { scale, topology, shapes };
|
||||
}
|
||||
|
||||
function buildShapeResult(group: CellGroupEntry[]): ShapeResult {
|
||||
const perRun = group.map((entry) => projectRun(entry));
|
||||
const execs = perRun.map((r) => r.execPerSec).filter((v) => v > 0);
|
||||
const tailExecs = perRun
|
||||
.map((r) => r.tailExecPerSec)
|
||||
.filter((v): v is number => v !== undefined && v > 0);
|
||||
const reqs = perRun.map((r) => r.reqPerSec).filter((v): v is number => v !== undefined && v > 0);
|
||||
const latP50 = mean(perRun.map((r) => r.latency.p50).filter((v) => v > 0));
|
||||
const latP975 = mean(perRun.map((r) => r.latency.p975).filter((v) => v > 0));
|
||||
const latP99 = mean(perRun.map((r) => r.latency.p99).filter((v) => v > 0));
|
||||
|
||||
const headroom = headroomFromGroup(group);
|
||||
const ioMetrics = ioFromGroup(group);
|
||||
const bottleneck = detectBottleneck(headroom, latP99);
|
||||
const verdict = scoreVerdict(headroom, latP99, group.length);
|
||||
|
||||
// Median of per-run green projections, capped at observed ceiling.
|
||||
const ceilingMax = execs.length ? Math.max(...execs) : 0;
|
||||
const perRunGreens = perRun.map((r) => r.greenProjection).filter((v) => v > 0);
|
||||
const greenMedian = perRunGreens.length ? median(perRunGreens) : 0;
|
||||
const greenSustainedExecPerSec = Math.min(greenMedian, ceilingMax);
|
||||
|
||||
return {
|
||||
ceilingExecPerSec: summarise(execs),
|
||||
tailExecPerSec: tailExecs.length ? summarise(tailExecs) : undefined,
|
||||
reqPerSec: reqs.length ? summarise(reqs) : undefined,
|
||||
latency: { p50: latP50, p975: latP975, p99: latP99 },
|
||||
headroomAtCeiling: headroom,
|
||||
io: ioMetrics,
|
||||
bottleneck,
|
||||
verdict,
|
||||
greenSustainedExecPerSec,
|
||||
sourceRuns: group.map((g) => ({
|
||||
runReportPath: g.path,
|
||||
commitSha: extractSha(g.report) ?? 'unknown',
|
||||
spec: g.report.scenario.spec,
|
||||
})),
|
||||
};
|
||||
}
|
||||
|
||||
interface PerRunProjection {
|
||||
execPerSec: number;
|
||||
tailExecPerSec?: number;
|
||||
reqPerSec?: number;
|
||||
latency: { p50: number; p975: number; p99: number };
|
||||
greenProjection: number;
|
||||
}
|
||||
|
||||
// Projects "rate sustainable at green headroom" using this run's own CPU/lag.
|
||||
function projectRun(entry: CellGroupEntry): PerRunProjection {
|
||||
const t = entry.report.throughput;
|
||||
const execPerSec = t.execPerSec ?? 0;
|
||||
const mainCpu = entry.report.containers.find((c) => c.name === 'n8n-main')?.cpuPct ?? 0;
|
||||
const pgCpu = entry.report.containers.find((c) => c.name === 'postgres')?.cpuPct ?? 0;
|
||||
const lag =
|
||||
entry.report.services
|
||||
.filter((s): s is Extract<ServiceMetrics, { kind: 'n8n-main' }> => s.kind === 'n8n-main')
|
||||
.map((s) => (s.eventLoopLagSec ?? 0) * 1000)[0] ?? 0;
|
||||
|
||||
const mainFactor = mainCpu > 0 ? GREEN_THRESHOLDS.mainCpuPct / mainCpu : Infinity;
|
||||
const pgFactor = pgCpu > 0 ? GREEN_THRESHOLDS.pgCpuPct / pgCpu : Infinity;
|
||||
const lagFactor = lag > 0 ? GREEN_THRESHOLDS.eventLoopLagMs / lag : Infinity;
|
||||
const limit = Math.min(mainFactor, pgFactor, lagFactor);
|
||||
const greenProjection = Number.isFinite(limit) ? execPerSec * limit : execPerSec;
|
||||
|
||||
return {
|
||||
execPerSec,
|
||||
tailExecPerSec: t.tailExecPerSec,
|
||||
reqPerSec: t.reqPerSec,
|
||||
latency: {
|
||||
p50: t.p50Ms ?? 0,
|
||||
p975: t.p97_5Ms ?? 0,
|
||||
p99: t.p99Ms ?? 0,
|
||||
},
|
||||
greenProjection,
|
||||
};
|
||||
}
|
||||
|
||||
function headroomFromGroup(group: CellGroupEntry[]): ShapeResult['headroomAtCeiling'] {
|
||||
const mainCpu = mean(definedNumbers(group, (g) => containerValues(g, 'n8n-main', 'cpuPct')));
|
||||
const workerVals = group.flatMap((g) => containerValues(g, 'n8n-worker', 'cpuPct'));
|
||||
const workerDefined = workerVals.filter(isNumber);
|
||||
const workerCpu = workerDefined.length ? mean(workerDefined) : undefined;
|
||||
const pgCpuAvg = mean(definedNumbers(group, (g) => containerValues(g, 'postgres', 'cpuPct')));
|
||||
const pgCpuPeak = mean(
|
||||
definedNumbers(group, (g) => containerValues(g, 'postgres', 'cpuPctPeak')),
|
||||
);
|
||||
const eventLoopLagMs = mean(
|
||||
group.flatMap((g) =>
|
||||
g.report.services
|
||||
.filter((s): s is Extract<ServiceMetrics, { kind: 'n8n-main' }> => s.kind === 'n8n-main')
|
||||
.map((s) => (s.eventLoopLagSec ?? 0) * 1000),
|
||||
),
|
||||
);
|
||||
const pgBufferHit = mean(
|
||||
group.flatMap((g) =>
|
||||
g.report.services
|
||||
.filter((s): s is Extract<ServiceMetrics, { kind: 'postgres' }> => s.kind === 'postgres')
|
||||
.map((s) => s.saturation.bufferHitRatio ?? 0),
|
||||
),
|
||||
);
|
||||
|
||||
return {
|
||||
mainCpuPct: mainCpu,
|
||||
workerCpuPct: workerCpu,
|
||||
pgCpuPct: pgCpuAvg,
|
||||
pgCpuPctPeak: pgCpuPeak,
|
||||
pgBufferHit,
|
||||
eventLoopLagMs,
|
||||
};
|
||||
}
|
||||
|
||||
function ioFromGroup(group: CellGroupEntry[]): ShapeResult['io'] {
|
||||
const perRun = group.map((g) => derivePerRunIo(g.report));
|
||||
const workloadIopsPerSec = mean(perRun.map((r) => r.workload));
|
||||
const overheadIopsPerSec = mean(perRun.map((r) => r.overhead));
|
||||
const overheadFactor =
|
||||
workloadIopsPerSec > 0 ? (workloadIopsPerSec + overheadIopsPerSec) / workloadIopsPerSec : 0;
|
||||
const walMbPerSec = mean(perRun.map((r) => r.walMbPerSec));
|
||||
const walRecordsPerSec = mean(perRun.map((r) => r.walRecordsPerSec));
|
||||
return {
|
||||
workloadIopsPerSec,
|
||||
overheadIopsPerSec,
|
||||
overheadFactor,
|
||||
walMbPerSec,
|
||||
walRecordsPerSec,
|
||||
};
|
||||
}
|
||||
|
||||
function derivePerRunIo(report: RunReport): {
|
||||
workload: number;
|
||||
overhead: number;
|
||||
walMbPerSec: number;
|
||||
walRecordsPerSec: number;
|
||||
} {
|
||||
const elapsedSec = (report.duration.totalMs ?? 1) / 1000;
|
||||
const pg = report.services.find(
|
||||
(s): s is Extract<ServiceMetrics, { kind: 'postgres' }> => s.kind === 'postgres',
|
||||
);
|
||||
if (!pg) {
|
||||
return { workload: 0, overhead: 0, walMbPerSec: 0, walRecordsPerSec: 0 };
|
||||
}
|
||||
const sumIo = (filter: (b: string) => boolean) =>
|
||||
pg.io
|
||||
.filter((row) => filter(row.backendType))
|
||||
.reduce((acc, row) => acc + row.reads + row.writes + row.extends, 0);
|
||||
|
||||
const workloadOps = sumIo((b) => b === 'client backend');
|
||||
const overheadOps = sumIo(
|
||||
(b) =>
|
||||
b === 'background writer' ||
|
||||
b === 'checkpointer' ||
|
||||
b === 'walwriter' ||
|
||||
b === 'autovacuum worker' ||
|
||||
b === 'autovacuum launcher' ||
|
||||
b === 'standalone backend',
|
||||
);
|
||||
|
||||
return {
|
||||
workload: workloadOps / elapsedSec,
|
||||
overhead: overheadOps / elapsedSec,
|
||||
walMbPerSec: pg.saturation.walMbPerSec ?? 0,
|
||||
walRecordsPerSec: pg.saturation.walRecordsPerSec ?? 0,
|
||||
};
|
||||
}
|
||||
|
||||
function detectBottleneck(headroom: ShapeResult['headroomAtCeiling'], _p99: number): Bottleneck {
|
||||
// Classify on resource saturation, not p99 — for async modes p99 is
|
||||
// dominated by Bull queue wait and misranked PG-CPU as "network".
|
||||
const candidates: Array<[Bottleneck, number]> = [
|
||||
['main-cpu', headroom.mainCpuPct / AMBER_THRESHOLDS.mainCpuPct],
|
||||
['pg-cpu', headroom.pgCpuPct / AMBER_THRESHOLDS.pgCpuPct],
|
||||
];
|
||||
if (headroom.workerCpuPct !== undefined) {
|
||||
candidates.push(['worker-cpu', headroom.workerCpuPct / AMBER_THRESHOLDS.mainCpuPct]);
|
||||
}
|
||||
candidates.sort((a, b) => b[1] - a[1]);
|
||||
return candidates[0]?.[0] ?? 'main-cpu';
|
||||
}
|
||||
|
||||
function scoreVerdict(
|
||||
headroom: ShapeResult['headroomAtCeiling'],
|
||||
_p99: number,
|
||||
sampleCount: number,
|
||||
): Verdict {
|
||||
// Verdict gates on topology health (CPU/PG/event-loop), not p99 — same
|
||||
// rationale as detectBottleneck. p99 still surfaces in the cell display.
|
||||
if (sampleCount < 3) return 'amber';
|
||||
const anyRed =
|
||||
headroom.mainCpuPct > AMBER_THRESHOLDS.mainCpuPct ||
|
||||
headroom.pgCpuPct > AMBER_THRESHOLDS.pgCpuPct ||
|
||||
headroom.eventLoopLagMs > AMBER_THRESHOLDS.eventLoopLagMs;
|
||||
if (anyRed) return 'red';
|
||||
const anyAmber =
|
||||
headroom.mainCpuPct > GREEN_THRESHOLDS.mainCpuPct ||
|
||||
headroom.pgCpuPct > GREEN_THRESHOLDS.pgCpuPct ||
|
||||
headroom.eventLoopLagMs > GREEN_THRESHOLDS.eventLoopLagMs;
|
||||
return anyAmber ? 'amber' : 'green';
|
||||
}
|
||||
|
||||
function median(values: number[]): number {
|
||||
if (values.length === 0) return 0;
|
||||
const sorted = [...values].sort((a, b) => a - b);
|
||||
const mid = Math.floor(sorted.length / 2);
|
||||
if (sorted.length % 2 === 1) return sorted[mid] ?? 0;
|
||||
return ((sorted[mid - 1] ?? 0) + (sorted[mid] ?? 0)) / 2;
|
||||
}
|
||||
|
||||
function summarise(values: number[]): PercentileSummary {
|
||||
if (values.length === 0) return { p50: 0, p95: 0, max: 0, n: 0 };
|
||||
const sorted = [...values].sort((a, b) => a - b);
|
||||
return {
|
||||
p50: percentile(sorted, 0.5),
|
||||
p95: percentile(sorted, 0.95),
|
||||
max: sorted[sorted.length - 1] ?? 0,
|
||||
n: values.length,
|
||||
};
|
||||
}
|
||||
|
||||
function percentile(sortedAsc: number[], p: number): number {
|
||||
if (sortedAsc.length === 0) return 0;
|
||||
if (sortedAsc.length === 1) return sortedAsc[0] ?? 0;
|
||||
const idx = Math.min(sortedAsc.length - 1, Math.floor(p * sortedAsc.length));
|
||||
return sortedAsc[idx] ?? 0;
|
||||
}
|
||||
|
||||
function mean(values: number[]): number {
|
||||
if (values.length === 0) return 0;
|
||||
return values.reduce((acc, v) => acc + v, 0) / values.length;
|
||||
}
|
||||
|
||||
function isNumber(value: number | undefined): value is number {
|
||||
return typeof value === 'number' && !Number.isNaN(value);
|
||||
}
|
||||
|
||||
function containerValues(
|
||||
entry: CellGroupEntry,
|
||||
name: string,
|
||||
field: 'cpuPct' | 'cpuPctPeak',
|
||||
): Array<number | undefined> {
|
||||
return entry.report.containers.filter((c) => c.name === name).map((c) => c[field]);
|
||||
}
|
||||
|
||||
function definedNumbers(
|
||||
group: CellGroupEntry[],
|
||||
pick: (entry: CellGroupEntry) => Array<number | undefined>,
|
||||
): number[] {
|
||||
return group.flatMap(pick).filter(isNumber);
|
||||
}
|
||||
|
||||
function byScale(a: SizingCell, b: SizingCell): number {
|
||||
const order: Scale[] = ['S0', 'S1', 'S2', 'S3', 'S4'];
|
||||
return order.indexOf(a.scale) - order.indexOf(b.scale);
|
||||
}
|
||||
|
||||
function extractSha(report: RunReport): string | undefined {
|
||||
const value = report.scenario.dimensions['commitSha'];
|
||||
return typeof value === 'string' ? value : undefined;
|
||||
}
|
||||
|
||||
/** Renders a `SizingMatrix` to the customer-facing markdown guide. */
|
||||
export function renderMarkdown(matrix: SizingMatrix): string {
|
||||
const lines: string[] = [];
|
||||
lines.push('# n8n Self-Hosted Sizing Guide');
|
||||
lines.push('');
|
||||
lines.push(
|
||||
`*Generated for n8n \`${matrix.n8nVersion}\` · commit \`${matrix.commitSha}\` · ${matrix.runDate.slice(0, 10)}*`,
|
||||
);
|
||||
lines.push('');
|
||||
lines.push(
|
||||
`*Hardware baseline:* \`${matrix.hardware.runner}\` (${matrix.hardware.vcpu} vCPU / ${matrix.hardware.ramGb} GB).`,
|
||||
);
|
||||
lines.push('');
|
||||
lines.push(
|
||||
'> **Status:** generated from the benchmark substrate at every release. ' +
|
||||
'Cells with **verdict: amber** are single-run or low-sample — informational, ' +
|
||||
'not promotion-ready. Each cell ships **two throughput numbers** — the ' +
|
||||
'saturation **ceiling** observed in CI, and the **green-sustained** ' +
|
||||
'recommendation derived from headroom thresholds. Size to the green number.',
|
||||
);
|
||||
lines.push('');
|
||||
|
||||
const shapes: Shape[] = ['L', 'D', 'A', 'X'];
|
||||
for (const shape of shapes) {
|
||||
const cellsWithShape = matrix.cells.filter((c) => c.shapes[shape]);
|
||||
if (cellsWithShape.length === 0) continue;
|
||||
|
||||
lines.push(`## Shape ${shape}`);
|
||||
lines.push('');
|
||||
lines.push(renderShapeTable(cellsWithShape, shape));
|
||||
lines.push('');
|
||||
lines.push(renderShapeDetail(cellsWithShape, shape));
|
||||
lines.push('');
|
||||
}
|
||||
|
||||
lines.push('---');
|
||||
lines.push('');
|
||||
lines.push('## Methodology');
|
||||
lines.push('');
|
||||
lines.push(
|
||||
'Cells are aggregated by the substrate at `packages/testing/playwright/utils/benchmark/sizing-matrix.ts` from per-run `run-report.json` files. ' +
|
||||
"Each cell's headline **ceiling** is the **max** `execPerSec` observed across cold runs (saturation point); full p50/p95/max distribution is shown in cell detail. " +
|
||||
'**green-sustained** is the ceiling scaled by headroom (main CPU < 75%, PG CPU < 70%, event-loop lag < 25 ms). ' +
|
||||
'**Workload IOPS** is `pg_stat_io` `client backend` reads+writes+extends per second; ' +
|
||||
'**overhead factor** is `(workload + bgwriter + checkpointer + walwriter + autovacuum) / workload`. ' +
|
||||
'WAL is reported separately (`MB/s`, `records/s`).',
|
||||
);
|
||||
lines.push('');
|
||||
return lines.join('\n');
|
||||
}
|
||||
|
||||
function renderShapeTable(cells: SizingCell[], shape: Shape): string {
|
||||
const header =
|
||||
'| Scale | Mains | Webhook procs | Workers | PG (vCPU/RAM) | Redis | Burst headroom (≤30 s) exec/s | **Green sustained** exec/s | Req/s | p99 ms | Verdict |';
|
||||
const sep = '|---|---:|---:|---:|---|---|---:|---:|---:|---:|---|';
|
||||
const rows: string[] = [header, sep];
|
||||
for (const cell of cells) {
|
||||
const result = cell.shapes[shape];
|
||||
if (!result) continue;
|
||||
rows.push(
|
||||
[
|
||||
`**${cell.scale}**`,
|
||||
`${cell.topology.mains}× (${cell.topology.mainVcpu} vCPU / ${cell.topology.mainRamGb} GB)`,
|
||||
cell.topology.webhookProcs > 0 ? `${cell.topology.webhookProcs}×` : '(in main)',
|
||||
cell.topology.workers > 0 && cell.topology.workerVcpu
|
||||
? `${cell.topology.workers}× (${cell.topology.workerVcpu} vCPU / ${cell.topology.workerRamGb} GB)`
|
||||
: '—',
|
||||
`${cell.topology.pgVcpu} vCPU / ${cell.topology.pgRamGb} GB`,
|
||||
`${cell.topology.redisVcpu} vCPU / ${cell.topology.redisRamGb} GB`,
|
||||
result.ceilingExecPerSec.max.toFixed(1),
|
||||
result.greenSustainedExecPerSec.toFixed(1),
|
||||
result.reqPerSec ? result.reqPerSec.p50.toFixed(1) : '—',
|
||||
result.latency.p99.toFixed(0),
|
||||
verdictBadge(result.verdict),
|
||||
].join(' | '),
|
||||
);
|
||||
}
|
||||
return rows.join('\n');
|
||||
}
|
||||
|
||||
function renderShapeDetail(cells: SizingCell[], shape: Shape): string {
|
||||
const lines: string[] = [];
|
||||
lines.push('<details>');
|
||||
lines.push(`<summary>Cell detail — Shape ${shape}</summary>`);
|
||||
lines.push('');
|
||||
for (const cell of cells) {
|
||||
const result = cell.shapes[shape];
|
||||
if (!result) continue;
|
||||
lines.push(`### ${cell.scale}-${shape} (n=${result.ceilingExecPerSec.n})`);
|
||||
lines.push('');
|
||||
lines.push('| Field | Value |');
|
||||
lines.push('|---|---|');
|
||||
lines.push(`| Ceiling exec/s (p50/p95/max) | ${fmtPercentile(result.ceilingExecPerSec)} |`);
|
||||
if (result.tailExecPerSec) {
|
||||
lines.push(`| Tail exec/s (steady-state) | ${fmtPercentile(result.tailExecPerSec)} |`);
|
||||
}
|
||||
lines.push(
|
||||
`| **Green sustained recommendation** | ${result.greenSustainedExecPerSec.toFixed(1)} exec/s |`,
|
||||
);
|
||||
lines.push(
|
||||
`| Latency p50 / p97.5 / p99 | ${result.latency.p50.toFixed(0)} / ${result.latency.p975.toFixed(0)} / ${result.latency.p99.toFixed(0)} ms |`,
|
||||
);
|
||||
lines.push(`| Main CPU at ceiling | ${result.headroomAtCeiling.mainCpuPct.toFixed(1)}% |`);
|
||||
if (result.headroomAtCeiling.workerCpuPct !== undefined) {
|
||||
lines.push(
|
||||
`| Worker CPU at ceiling | ${result.headroomAtCeiling.workerCpuPct.toFixed(1)}% |`,
|
||||
);
|
||||
}
|
||||
lines.push(
|
||||
`| PG CPU avg / peak | ${result.headroomAtCeiling.pgCpuPct.toFixed(1)}% / ${result.headroomAtCeiling.pgCpuPctPeak.toFixed(1)}% |`,
|
||||
);
|
||||
lines.push(
|
||||
`| PG buffer-hit ratio | ${(result.headroomAtCeiling.pgBufferHit * 100).toFixed(2)}% |`,
|
||||
);
|
||||
lines.push(`| Event-loop lag | ${result.headroomAtCeiling.eventLoopLagMs.toFixed(1)} ms |`);
|
||||
lines.push(`| **Workload IOPS** | ${result.io.workloadIopsPerSec.toFixed(0)} ops/s |`);
|
||||
lines.push(
|
||||
`| Overhead IOPS (autovac + bgwriter + checkpointer + walwriter) | ${result.io.overheadIopsPerSec.toFixed(0)} ops/s |`,
|
||||
);
|
||||
lines.push(`| **Overhead factor** | ${result.io.overheadFactor.toFixed(2)}× workload |`);
|
||||
lines.push(
|
||||
`| WAL throughput | ${result.io.walMbPerSec.toFixed(2)} MB/s · ${result.io.walRecordsPerSec.toFixed(0)} records/s |`,
|
||||
);
|
||||
lines.push(`| Bottleneck | \`${result.bottleneck}\` |`);
|
||||
lines.push(`| Verdict | ${verdictBadge(result.verdict)} |`);
|
||||
lines.push(`| Source runs | ${result.sourceRuns.length} |`);
|
||||
lines.push('');
|
||||
for (const src of result.sourceRuns) {
|
||||
lines.push(`- \`${src.spec}\``);
|
||||
}
|
||||
lines.push('');
|
||||
}
|
||||
lines.push('</details>');
|
||||
return lines.join('\n');
|
||||
}
|
||||
|
||||
function verdictBadge(verdict: Verdict): string {
|
||||
if (verdict === 'green') return '🟢 green';
|
||||
if (verdict === 'amber') return '🟠 amber (n<3, low confidence)';
|
||||
return '🔴 red';
|
||||
}
|
||||
|
||||
function fmtPercentile(p: PercentileSummary): string {
|
||||
return `${p.p50.toFixed(1)} / ${p.p95.toFixed(1)} / ${p.max.toFixed(1)}`;
|
||||
}
|
||||
|
|
@ -64,16 +64,9 @@ export const QUEUE_JOBS_COMPLETED_QUERY = 'n8n_scaling_mode_queue_jobs_completed
|
|||
|
||||
/**
|
||||
* Returns the completion metric for the current Playwright project.
|
||||
*
|
||||
* `n8n_workflow_success_total` is emitted by both main and workers; in queue mode
|
||||
* each instance produces its own time series. The query wraps the metric in
|
||||
* `sum(last_over_time(...[5m]))` so VictoriaMetrics aggregates across instances
|
||||
* server-side, and the wide lookback tolerates transient scrape misses that
|
||||
* would otherwise drop a series and make the summed counter appear to regress.
|
||||
*
|
||||
* `n8n_scaling_mode_queue_jobs_completed` is the designed queue-mode metric but
|
||||
* it depends on ScalingService.scheduleQueueMetrics() emitting `job-counts-updated`
|
||||
* events at regular intervals — currently observed as 0 in CI.
|
||||
* `n8n_workflow_success_total` is per-receiver-instance; summed via
|
||||
* `sum(last_over_time(...[5m]))` for the system-wide total. The wide lookback
|
||||
* tolerates transient scrape misses.
|
||||
*/
|
||||
export function resolveMetricQuery(_testInfo: TestInfo): string {
|
||||
return WORKFLOW_SUCCESS_QUERY;
|
||||
|
|
@ -196,8 +189,8 @@ export async function waitForThroughput(
|
|||
}
|
||||
|
||||
/**
|
||||
* Reads the current value of the workflow success counter from VictoriaMetrics.
|
||||
* Returns 0 if the metric hasn't been scraped yet.
|
||||
* Reads the current value of the workflow completion counter from
|
||||
* VictoriaMetrics. Returns 0 if the metric hasn't been scraped yet.
|
||||
*/
|
||||
export async function getBaselineCounter(
|
||||
metrics: MetricsHelper,
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user