From d848b812c084db8d73896d8d035fea979e8d271b Mon Sep 17 00:00:00 2001 From: "n8n-cat-bot[bot]" <283985454+n8n-cat-bot[bot]@users.noreply.github.com> Date: Thu, 28 May 2026 20:47:53 +0100 Subject: [PATCH] test: Surface n8n startup logs and readiness payload (#31020) Co-authored-by: n8n-cat-bot[bot] Co-authored-by: Claude Opus 4.7 Co-authored-by: Declan Carroll --- packages/testing/containers/helpers/utils.ts | 30 +++- packages/testing/containers/index.ts | 3 + packages/testing/containers/services/n8n.ts | 131 +++++++++++++----- packages/testing/containers/stack.ts | 9 +- .../testing/containers/startup-diagnostics.ts | 29 ++++ .../playwright/fixtures/observability.ts | 80 ++++++++++- 6 files changed, 241 insertions(+), 41 deletions(-) create mode 100644 packages/testing/containers/startup-diagnostics.ts diff --git a/packages/testing/containers/helpers/utils.ts b/packages/testing/containers/helpers/utils.ts index 90ce6914cf7..9e38625336e 100644 --- a/packages/testing/containers/helpers/utils.ts +++ b/packages/testing/containers/helpers/utils.ts @@ -1,6 +1,7 @@ import { setTimeout as wait } from 'node:timers/promises'; import type { Readable } from 'stream'; import type { StartedTestContainer } from 'testcontainers'; +import { Wait } from 'testcontainers'; /** * Create a logger that prefixes messages with elapsed time since creation. @@ -39,7 +40,34 @@ export function createSilentLogConsumer() { throw error; }; - return { consumer, throwWithLogs }; + const getLogs = (): string => logs.join('\n'); + + return { consumer, throwWithLogs, getLogs }; +} + +export function createReadinessProbe( + path: string, + port: number, + options: { startupTimeoutMs: number; readTimeoutMs: number }, +) { + let lastBody: string | null = null; + + // Body predicate must be registered before status predicate: HttpWaitStrategy + // short-circuits on the first `false`, so a status-first order would skip the + // body capture for the non-200 responses we want to record. + const strategy = Wait.forHttp(path, port) + .forResponsePredicate((body) => { + lastBody = body; + return true; + }) + .forStatusCode(200) + .withStartupTimeout(options.startupTimeoutMs) + .withReadTimeout(options.readTimeoutMs); + + return { + strategy, + getLastBody: (): string | null => lastBody, + }; } /** diff --git a/packages/testing/containers/index.ts b/packages/testing/containers/index.ts index c0c5bbfe250..5cf10cb61ec 100644 --- a/packages/testing/containers/index.ts +++ b/packages/testing/containers/index.ts @@ -9,6 +9,9 @@ export { createN8NStack } from './stack'; export type { N8NConfig, N8NStack } from './stack'; +export { consumeStartupFailure } from './startup-diagnostics'; +export type { N8NStartupDiagnostics } from './services/n8n'; + // K3s + Helm chart stack - for Kubernetes deployment validation export { createHelmStack } from './helm-stack'; export type { HelmStack, HelmStackConfig, HelmStackMode } from './helm-stack'; diff --git a/packages/testing/containers/services/n8n.ts b/packages/testing/containers/services/n8n.ts index 716b1a8e13c..850376b9bdf 100644 --- a/packages/testing/containers/services/n8n.ts +++ b/packages/testing/containers/services/n8n.ts @@ -1,13 +1,41 @@ import type { PortWithOptionalBinding, StartedNetwork, StartedTestContainer } from 'testcontainers'; -import { GenericContainer, Wait } from 'testcontainers'; +import { GenericContainer } from 'testcontainers'; import { DockerImageNotFoundError } from '../docker-image-not-found-error'; -import { createElapsedLogger, createSilentLogConsumer } from '../helpers/utils'; +import { + createElapsedLogger, + createReadinessProbe, + createSilentLogConsumer, +} from '../helpers/utils'; import { N8nImagePullPolicy } from '../n8n-image-pull-policy'; import { TEST_CONTAINER_IMAGES } from '../test-containers'; import type { FileToMount } from './types'; const N8N_IMAGE = TEST_CONTAINER_IMAGES.n8n; +// Must match N8N_PORT / QUEUE_HEALTH_CHECK_PORT defaults. +const N8N_READINESS_PORT = 5678; +const N8N_STARTUP_TIMEOUT_MS = 60_000; +// withReadTimeout doubles as the poll interval (testcontainers IntervalRetry); the +// default 1000ms leaves up to a second of stale-poll latency after the process is ready. +const N8N_READ_TIMEOUT_MS = 250; + +export interface N8NStartupDiagnostics { + logs: Record; + readinessPayloads: Record; +} + +export class N8NStartupError extends Error { + readonly diagnostics: N8NStartupDiagnostics; + + constructor(message: string, diagnostics: N8NStartupDiagnostics, cause?: unknown) { + super(message); + this.name = 'N8NStartupError'; + this.diagnostics = diagnostics; + if (cause !== undefined) { + (this as Error & { cause?: unknown }).cause = cause; + } + } +} const BASE_ENV: Record = { N8N_LOG_LEVEL: 'debug', @@ -28,17 +56,6 @@ const BASE_ENV: Record = { NODE_OPTIONS: '--expose-gc', }; -// Port 5678 must match N8N_PORT / QUEUE_HEALTH_CHECK_PORT defaults. -// If those defaults change, update the port here too. -// /healthz/readiness implies the port is listening, so a separate forListeningPorts is redundant. -// withReadTimeout doubles as the poll interval (testcontainers IntervalRetry); the default of 1000ms -// means we sit on a stale poll for up to a second after the process is actually ready. 250ms is -// tight enough to reclaim that latency without firing too many requests. -const N8N_WAIT_STRATEGY = Wait.forHttp('/healthz/readiness', 5678) - .forStatusCode(200) - .withStartupTimeout(60_000) - .withReadTimeout(250); - export interface N8NInstancesOptions { mains: number; workers: number; @@ -57,6 +74,7 @@ export interface N8NInstancesOptions { export interface N8NInstancesResult { containers: StartedTestContainer[]; environment: Record; + diagnostics: N8NStartupDiagnostics; } function computeEnvironment(options: N8NInstancesOptions): Record { @@ -119,13 +137,25 @@ interface SharedConfig { filesToMount?: FileToMount[]; } +interface ContainerStartResult { + container: StartedTestContainer; + getLogs: () => string; + getLastReadinessBody: () => string | null; +} + async function createContainer( instance: InstanceConfig, shared: SharedConfig, -): Promise { + diagnostics: N8NStartupDiagnostics, +): Promise { const { name, isWorker, instanceNumber, networkAlias, hostPort } = instance; const { projectName, environment, network, resourceQuota, filesToMount } = shared; - const { consumer, throwWithLogs } = createSilentLogConsumer(); + const { consumer, throwWithLogs, getLogs } = createSilentLogConsumer(); + const { strategy: waitStrategy, getLastBody: getLastReadinessBody } = createReadinessProbe( + '/healthz/readiness', + N8N_READINESS_PORT, + { startupTimeoutMs: N8N_STARTUP_TIMEOUT_MS, readTimeoutMs: N8N_READ_TIMEOUT_MS }, + ); let container = new GenericContainer(N8N_IMAGE) .withEnvironment(environment) @@ -153,21 +183,25 @@ async function createContainer( } const ports: PortWithOptionalBinding[] = hostPort - ? [{ container: 5678, host: hostPort }] - : [5678]; + ? [{ container: N8N_READINESS_PORT, host: hostPort }] + : [N8N_READINESS_PORT]; if (isWorker) { ports.push(5679); } - container = container.withExposedPorts(...ports).withWaitStrategy(N8N_WAIT_STRATEGY); + container = container.withExposedPorts(...ports).withWaitStrategy(waitStrategy); if (isWorker) { container = container.withCommand(['worker']); } try { - return await container.start(); + const started = await container.start(); + return { container: started, getLogs, getLastReadinessBody }; } catch (error: unknown) { + diagnostics.logs[name] = getLogs(); + diagnostics.readinessPayloads[name] = getLastReadinessBody(); + if (error instanceof Error && 'statusCode' in error) { const statusCode = (error as Error & { statusCode: number }).statusCode; if (statusCode === 404) { @@ -196,6 +230,7 @@ export async function createN8NInstances( const log = createElapsedLogger('n8n-instances'); const environment = computeEnvironment(options); const containers: StartedTestContainer[] = []; + const diagnostics: N8NStartupDiagnostics = { logs: {}, readinessPayloads: {} }; const mainShared: SharedConfig = { projectName, @@ -235,32 +270,58 @@ export async function createN8NInstances( // Service-only mode: no n8n containers needed if (instances.length === 0) { log('No n8n instances requested (service-only mode)'); - return { containers, environment }; + return { containers, environment, diagnostics }; } + const recordSuccess = (instance: InstanceConfig, result: ContainerStartResult) => { + diagnostics.logs[instance.name] = result.getLogs(); + diagnostics.readinessPayloads[instance.name] = result.getLastReadinessBody(); + }; + + const rethrowWithDiagnostics = (error: unknown): never => { + const message = + error instanceof Error ? error.message : `n8n instances failed to start: ${String(error)}`; + throw new N8NStartupError(message, diagnostics, error); + }; + // Start main 1 first (handles DB migrations/setup) const [main1, ...remaining] = instances; log(`Starting main 1: ${main1.name} (DB setup)`); - containers.push(await createContainer(main1, mainShared)); + let main1Result: ContainerStartResult; + try { + main1Result = await createContainer(main1, mainShared, diagnostics); + } catch (error) { + return rethrowWithDiagnostics(error); + } + recordSuccess(main1, main1Result); + containers.push(main1Result.container); log('main 1 ready'); // Start remaining instances in parallel if (remaining.length > 0) { log(`Starting ${remaining.length} remaining instances in parallel...`); - const parallelContainers = await Promise.all( - remaining.map(async (instance) => { - const type = instance.isWorker ? 'worker' : 'main'; - log(`Starting ${type} ${instance.instanceNumber}: ${instance.name}`); - const container = await createContainer( - instance, - instance.isWorker ? workerShared : mainShared, - ); - log(`${type} ${instance.instanceNumber} ready`); - return container; - }), - ); - containers.push(...parallelContainers); + try { + const parallelResults = await Promise.all( + remaining.map(async (instance) => { + const type = instance.isWorker ? 'worker' : 'main'; + log(`Starting ${type} ${instance.instanceNumber}: ${instance.name}`); + const result = await createContainer( + instance, + instance.isWorker ? workerShared : mainShared, + diagnostics, + ); + log(`${type} ${instance.instanceNumber} ready`); + return { instance, result }; + }), + ); + for (const { instance, result } of parallelResults) { + recordSuccess(instance, result); + containers.push(result.container); + } + } catch (error) { + return rethrowWithDiagnostics(error); + } } - return { containers, environment }; + return { containers, environment, diagnostics }; } diff --git a/packages/testing/containers/stack.ts b/packages/testing/containers/stack.ts index ee841d722a0..c269e246827 100644 --- a/packages/testing/containers/stack.ts +++ b/packages/testing/containers/stack.ts @@ -5,7 +5,8 @@ import { Network } from 'testcontainers'; import { createElapsedLogger, pollContainerHttpEndpoint } from './helpers/utils'; import { waitForNetworkQuiet } from './network-stabilization'; import type { LoadBalancerResult } from './services/load-balancer'; -import { createN8NInstances } from './services/n8n'; +import type { N8NStartupDiagnostics } from './services/n8n'; +import { createN8NInstances, N8NStartupError } from './services/n8n'; import { helperFactories, services } from './services/registry'; import type { FileToMount, @@ -18,6 +19,7 @@ import type { StackConfig, StartContext, } from './services/types'; +import { recordStartupFailure } from './startup-diagnostics'; import { createTelemetryRecorder } from './telemetry'; const SERVICE_REGISTRY: Record = services; @@ -37,6 +39,7 @@ export interface N8NStack { stopContainer: (namePattern: string | RegExp) => Promise; /** Direct URLs to each main instance (bypasses load balancer). Index 0 = main-1, etc. */ mainUrls: string[]; + startupDiagnostics: N8NStartupDiagnostics; } function shouldServiceStart(name: ServiceName, service: Service, ctx: StartContext): boolean { @@ -327,9 +330,13 @@ export async function createN8NStack(config: N8NConfig = {}): Promise return container ? await container.stop() : null; }, mainUrls, + startupDiagnostics: n8nResult.diagnostics, }; } catch (error) { const message = error instanceof Error ? error.message : String(error); + if (error instanceof N8NStartupError) { + recordStartupFailure(uniqueProjectName, error.diagnostics, message); + } telemetry.flush(false, message); throw error; } diff --git a/packages/testing/containers/startup-diagnostics.ts b/packages/testing/containers/startup-diagnostics.ts new file mode 100644 index 00000000000..51fb580bcf4 --- /dev/null +++ b/packages/testing/containers/startup-diagnostics.ts @@ -0,0 +1,29 @@ +import type { N8NStartupDiagnostics } from './services/n8n'; + +// Module-global because the playwright `n8nContainer` worker fixture re-throws +// when `createN8NStack` fails, so dependent fixtures only see `n8nContainer === +// undefined` and have no fixture-graph path to the captured diagnostics. Scoped +// per worker — playwright workers are separate processes. +let latestStartupFailure: { + projectName: string; + diagnostics: N8NStartupDiagnostics; + message: string; +} | null = null; + +export function recordStartupFailure( + projectName: string, + diagnostics: N8NStartupDiagnostics, + message: string, +): void { + latestStartupFailure = { projectName, diagnostics, message }; +} + +export function consumeStartupFailure(): { + projectName: string; + diagnostics: N8NStartupDiagnostics; + message: string; +} | null { + const failure = latestStartupFailure; + latestStartupFailure = null; + return failure; +} diff --git a/packages/testing/playwright/fixtures/observability.ts b/packages/testing/playwright/fixtures/observability.ts index f469ebcdf38..9e47f8b9a38 100644 --- a/packages/testing/playwright/fixtures/observability.ts +++ b/packages/testing/playwright/fixtures/observability.ts @@ -1,4 +1,6 @@ import type { Fixtures, TestInfo } from '@playwright/test'; +import type { N8NStartupDiagnostics } from 'n8n-containers'; +import { consumeStartupFailure } from 'n8n-containers'; import type { N8NStack } from 'n8n-containers/stack'; export type ObservabilityTestFixtures = { @@ -23,6 +25,53 @@ function tryGetObservability(stack: N8NStack | undefined) { } } +const STARTUP_PROFILE_TAG = '@startup-profile'; + +function shouldAlwaysAttachStartup(testInfo: TestInfo): boolean { + if (process.env.CONTAINER_TELEMETRY_VERBOSE === '1') return true; + return testInfo.tags.includes(STARTUP_PROFILE_TAG); +} + +function formatStartupLogs(diagnostics: N8NStartupDiagnostics): string { + const entries = Object.entries(diagnostics.logs).sort(([a], [b]) => a.localeCompare(b)); + if (entries.length === 0) return ''; + return entries.map(([name, body]) => `=== ${name} ===\n${body}`).join('\n\n'); +} + +function formatReadinessPayloads(diagnostics: N8NStartupDiagnostics): string { + const entries = Object.entries(diagnostics.readinessPayloads).sort(([a], [b]) => + a.localeCompare(b), + ); + if (entries.length === 0) return ''; + return entries + .map( + ([name, body]) => + `=== ${name} ===\n${body ?? '(no /healthz/readiness response observed before timeout)'}`, + ) + .join('\n\n'); +} + +async function attachStartupDiagnostics( + diagnostics: N8NStartupDiagnostics, + testInfo: TestInfo, +): Promise { + const startupLogs = formatStartupLogs(diagnostics); + if (startupLogs) { + await testInfo.attach('n8n-startup-logs.txt', { + body: startupLogs, + contentType: 'text/plain', + }); + } + + const readinessPayloads = formatReadinessPayloads(diagnostics); + if (readinessPayloads) { + await testInfo.attach('n8n-readiness-payload.txt', { + body: readinessPayloads, + contentType: 'text/plain', + }); + } +} + async function attachLogsOnFailure( stack: N8NStack, testInfo: TestInfo, @@ -105,10 +154,33 @@ export const observabilityFixtures: Fixtures< async ({ n8nContainer }, use, testInfo) => { await use(undefined); - // n8nContainer is undefined when fixture setup failed (e.g. postgres timeout); - // observability may be unconfigured for this project (sqlite:e2e). - // Both cases must be handled gracefully so teardown never masks the real failure. - if (testInfo.status === testInfo.expectedStatus) return; + const isFailure = testInfo.status !== testInfo.expectedStatus; + const alwaysAttach = shouldAlwaysAttachStartup(testInfo); + + // n8nContainer is undefined when createN8NStack threw before returning, + // so observability/metrics aren't queryable. Drain whatever diagnostics + // the container service stashed before re-throwing. + if (!n8nContainer) { + if (!isFailure) return; + const failure = consumeStartupFailure(); + if (!failure) return; + try { + await attachStartupDiagnostics(failure.diagnostics, testInfo); + } catch (error) { + console.warn('Failed to attach n8n startup diagnostics:', error); + } + return; + } + + if (alwaysAttach) { + try { + await attachStartupDiagnostics(n8nContainer.startupDiagnostics, testInfo); + } catch (error) { + console.warn('Failed to attach n8n startup diagnostics:', error); + } + } + + if (!isFailure) return; if (!tryGetObservability(n8nContainer)) return; await Promise.all([