feat(editor): Evaluations canvas info card (no-changelog) (#31318)

This commit is contained in:
Benjamin Schroth 2026-06-04 16:44:13 +02:00 committed by GitHub
parent ac4778bb5c
commit 52f0960cea
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 560 additions and 0 deletions

View File

@ -5439,6 +5439,10 @@
"evaluations.wizardSidepanel.step3.outputPlaceholder": "Run the evaluation to view the captured output.",
"evaluations.wizardSidepanel.step3.noRun": "Run the evaluation to see results.",
"evaluations.wizardSidepanel.hydrate.error": "Couldnt load your previous setup. Starting with a blank wizard.",
"evaluations.canvasInfoCard.title": "Setup evaluations for workflows",
"evaluations.canvasInfoCard.description": "Catch issues early and build with confidence with automated tests using model-graded checks.",
"evaluations.canvasInfoCard.setup": "Setup",
"evaluations.canvasInfoCard.dismiss": "Dismiss",
"evaluations.emptyState.title": "Evaluations",
"evaluations.emptyState.description": "Evals are automated tests that evaluate agent workflow outputs using model-graded checks.",
"evaluations.emptyState.catchIssues.title": "Catch issues early",

View File

@ -34,3 +34,5 @@ export const LOCAL_STORAGE_BROWSER_NOTIFICATION_METADATA = 'N8N_BROWSER_NOTIFICA
export const LOCAL_STORAGE_FLOATING_CHAT_WINDOW = 'N8N_FLOATING_CHAT_WINDOW';
export const LOCAL_STORAGE_AGENT_BUILDER_CHAT_PANEL_WIDTH = 'N8N_AGENT_BUILDER_CHAT_PANEL_WIDTH';
export const LOCAL_STORAGE_PARALLEL_EVAL_BY_WORKFLOW = 'N8N_PARALLEL_EVAL_BY_WORKFLOW';
export const LOCAL_STORAGE_EVALUATIONS_CANVAS_INFO_CARD_DISMISSED =
'N8N_EVALUATIONS_CANVAS_INFO_CARD_DISMISSED';

View File

@ -131,6 +131,7 @@ import { useFocusPanelStore } from '@/app/stores/focusPanel.store';
import { useEmptyStateBuilderPromptStore } from '@/experiments/emptyStateBuilderPrompt/stores/emptyStateBuilderPrompt.store';
import { useEvaluationsWizardSidepanelStore } from '@/features/ai/evaluation.ee/wizardSidepanel.store';
import { useEvaluationsWizardSidepanelExperiment } from '@/experiments/evaluationsWizardSidepanel/useEvaluationsWizardSidepanelExperiment';
import EvaluationsCanvasInfoCard from '@/features/ai/evaluation.ee/components/EvaluationsCanvasInfoCard/EvaluationsCanvasInfoCard.vue';
import { useChatPanelStore } from '@/features/ai/assistant/chatPanel.store';
import { useChatHubPanelStore } from '@/features/ai/chatHub/chatHubPanel.store';
import { useKeybindings } from '@/app/composables/useKeybindings';
@ -2004,6 +2005,10 @@ onBeforeUnmount(() => {
<Suspense v-if="!isCanvasReadOnly">
<LazySetupWorkflowCredentialsButton :class="$style.setupCredentialsButtonWrapper" />
</Suspense>
<EvaluationsCanvasInfoCard
v-if="!isCanvasReadOnly"
:class="$style.evaluationsCanvasInfoCardWrapper"
/>
<div v-if="!isCanvasReadOnly || canExecuteOnCanvas" :class="$style.executionButtons">
<CanvasRunWorkflowButton
v-if="isRunWorkflowButtonVisible"
@ -2124,6 +2129,8 @@ onBeforeUnmount(() => {
</template>
<style lang="scss" module>
@use '@n8n/design-system/css/common/var';
.wrapper {
display: flex;
width: 100%;
@ -2157,6 +2164,13 @@ onBeforeUnmount(() => {
top: var(--spacing--sm);
}
.evaluationsCanvasInfoCardWrapper {
position: absolute;
left: var(--spacing--lg);
bottom: var(--spacing--lg);
z-index: var.$index-popper;
}
.readOnlyEnvironmentNotification {
position: absolute;
bottom: 16px;

View File

@ -0,0 +1,180 @@
import { describe, it, expect, vi, beforeEach } from 'vitest';
import { ref, nextTick } from 'vue';
import userEvent from '@testing-library/user-event';
import { createComponentRenderer } from '@/__tests__/render';
const mockAllNodes = ref<Array<{ name: string; type: string }>>([]);
const mockActive = ref(true);
const mockWorkflowId = ref('wf-1');
vi.mock('@/app/stores/workflowDocument.store', () => ({
injectWorkflowDocumentStore: () => ({
value: {
get workflowId() {
return mockWorkflowId.value;
},
get active() {
return mockActive.value;
},
get allNodes() {
return mockAllNodes.value;
},
},
}),
}));
vi.mock('@n8n/i18n', async (importOriginal) => ({
...(await importOriginal()),
useI18n: () => ({ baseText: (key: string) => `mocked-${key}` }),
}));
vi.mock('@n8n/stores/useRootStore', () => ({
useRootStore: () => ({ restApiContext: {} }),
}));
// Wizard store: only `open()` is exercised by the CTA.
const wizardOpen = vi.fn();
const wizardIsOpen = ref(false);
vi.mock('../../wizardSidepanel.store', () => ({
useEvaluationsWizardSidepanelStore: () => ({
open: wizardOpen,
get isOpen() {
return wizardIsOpen.value;
},
}),
}));
const isFeatureEnabled = ref(true);
vi.mock('@/experiments/evaluationsWizardSidepanel/useEvaluationsWizardSidepanelExperiment', () => ({
useEvaluationsWizardSidepanelExperiment: () => ({ isFeatureEnabled }),
}));
const listEvaluationConfigs = vi.fn();
vi.mock('../../evaluation.api', () => ({
listEvaluationConfigs: (...args: unknown[]) => listEvaluationConfigs(...args),
}));
import EvaluationsCanvasInfoCard from './EvaluationsCanvasInfoCard.vue';
const renderComponent = createComponentRenderer(EvaluationsCanvasInfoCard);
const AI_NODE = { name: 'AI Agent', type: '@n8n/n8n-nodes-langchain.agent' };
const PLAIN_NODE = { name: 'Set', type: 'n8n-nodes-base.set' };
describe('EvaluationsCanvasInfoCard', () => {
beforeEach(() => {
mockAllNodes.value = [PLAIN_NODE, AI_NODE];
mockActive.value = true;
mockWorkflowId.value = `wf-${Math.random().toString(36).slice(2, 8)}`;
isFeatureEnabled.value = true;
wizardOpen.mockReset();
wizardIsOpen.value = false;
listEvaluationConfigs.mockReset();
listEvaluationConfigs.mockResolvedValue([]);
localStorage.clear();
});
it('renders when all conditions match (active + AI node + no configs + not dismissed)', async () => {
const { findByTestId } = renderComponent();
await findByTestId('evaluations-canvas-info-card');
expect(listEvaluationConfigs).toHaveBeenCalled();
});
it('hides when the experiment flag is off', async () => {
isFeatureEnabled.value = false;
const { queryByTestId } = renderComponent();
await nextTick();
expect(queryByTestId('evaluations-canvas-info-card')).not.toBeInTheDocument();
expect(listEvaluationConfigs).not.toHaveBeenCalled();
});
it('hides when the workflow is not active', async () => {
mockActive.value = false;
const { queryByTestId } = renderComponent();
await nextTick();
expect(queryByTestId('evaluations-canvas-info-card')).not.toBeInTheDocument();
expect(listEvaluationConfigs).not.toHaveBeenCalled();
});
it('hides when the workflow has no AI root node', async () => {
mockAllNodes.value = [PLAIN_NODE];
const { queryByTestId } = renderComponent();
await nextTick();
expect(queryByTestId('evaluations-canvas-info-card')).not.toBeInTheDocument();
expect(listEvaluationConfigs).not.toHaveBeenCalled();
});
it('hides once at least one evaluation config exists', async () => {
listEvaluationConfigs.mockResolvedValue([{ id: 'c1' }]);
const { queryByTestId } = renderComponent();
// Wait for the async fetch + watcher to settle.
await new Promise((resolve) => setTimeout(resolve, 0));
await nextTick();
expect(queryByTestId('evaluations-canvas-info-card')).not.toBeInTheDocument();
});
it('dismisses per-workflow and persists across remounts', async () => {
const wfId = mockWorkflowId.value;
const { findByTestId, queryByTestId, unmount } = renderComponent();
const dismissBtn = await findByTestId('evaluations-canvas-info-card-dismiss');
await userEvent.click(dismissBtn);
expect(queryByTestId('evaluations-canvas-info-card')).not.toBeInTheDocument();
unmount();
// Same workflow id → stays dismissed.
mockWorkflowId.value = wfId;
const { queryByTestId: queryByTestIdRetry } = renderComponent();
await nextTick();
expect(queryByTestIdRetry('evaluations-canvas-info-card')).not.toBeInTheDocument();
});
it('opens the wizard when the setup CTA is clicked', async () => {
const { findByTestId } = renderComponent();
const setupBtn = await findByTestId('evaluations-canvas-info-card-setup');
await userEvent.click(setupBtn);
expect(wizardOpen).toHaveBeenCalledWith(0);
});
it('hides while the evaluations wizard is already open', async () => {
wizardIsOpen.value = true;
const { queryByTestId } = renderComponent();
await nextTick();
expect(queryByTestId('evaluations-canvas-info-card')).not.toBeInTheDocument();
});
it('re-checks configs when switching to another qualifying workflow', async () => {
// First workflow has a config → card stays hidden.
listEvaluationConfigs.mockResolvedValue([{ id: 'c1' }]);
const { queryByTestId, findByTestId } = renderComponent();
await new Promise((resolve) => setTimeout(resolve, 0));
await nextTick();
expect(queryByTestId('evaluations-canvas-info-card')).not.toBeInTheDocument();
// Switch to a second qualifying workflow that has no configs. The card
// must re-fetch and surface, not reuse the first workflow's result.
listEvaluationConfigs.mockResolvedValue([]);
mockWorkflowId.value = 'wf-2';
await findByTestId('evaluations-canvas-info-card');
expect(listEvaluationConfigs).toHaveBeenCalledWith(expect.anything(), 'wf-2');
});
it('re-checks configs when the wizard closes after a successful run', async () => {
const { findByTestId, queryByTestId } = renderComponent();
await findByTestId('evaluations-canvas-info-card');
// Open the wizard — card hides while it's open.
wizardIsOpen.value = true;
await nextTick();
expect(queryByTestId('evaluations-canvas-info-card')).not.toBeInTheDocument();
// User creates a config inside the wizard, then closes it. The next
// configs fetch must reflect the new config, otherwise the card pops
// back up.
listEvaluationConfigs.mockResolvedValue([{ id: 'c1' }]);
wizardIsOpen.value = false;
await new Promise((resolve) => setTimeout(resolve, 0));
await nextTick();
expect(queryByTestId('evaluations-canvas-info-card')).not.toBeInTheDocument();
});
});

View File

@ -0,0 +1,360 @@
<script setup lang="ts">
import { computed, ref, watch } from 'vue';
import { useI18n } from '@n8n/i18n';
import { N8nButton, N8nIcon, N8nText } from '@n8n/design-system';
import { useEvaluationsWizardSidepanelExperiment } from '@/experiments/evaluationsWizardSidepanel/useEvaluationsWizardSidepanelExperiment';
import { useEvaluationsWizardSidepanelStore } from '../../wizardSidepanel.store';
import { useAiRootNodes } from '../../composables/useAiRootNodes';
import { listEvaluationConfigs } from '../../evaluation.api';
import { injectWorkflowDocumentStore } from '@/app/stores/workflowDocument.store';
import { useRootStore } from '@n8n/stores/useRootStore';
import { useStorage } from '@/app/composables/useStorage';
import { LOCAL_STORAGE_EVALUATIONS_CANVAS_INFO_CARD_DISMISSED } from '@/app/constants';
import { CANNED_METRICS, LLM_JUDGE_METRIC_KEYS } from '../../evaluation.constants';
import CheckCard from '../WizardSidepanel/CheckCard.vue';
// Preview cards that scroll in the marquee at the top of the info card. We
// duplicate the list once so the CSS `translateY(-50%)` loop reads as a
// continuous scroll single-list looping would visibly snap back. The
// `LLM-as-Judge` pill is only shown for metrics in `LLM_JUDGE_METRIC_KEYS`
// (correctness, helpfulness) labelling Tools Used / String Similarity /
// Categorization as judge-based would be wrong, those are deterministic.
const marqueeMetrics = computed(() =>
[...CANNED_METRICS, ...CANNED_METRICS].map((m) => ({
...m,
isJudge: LLM_JUDGE_METRIC_KEYS.has(m.key),
})),
);
// Floating onboarding card in the bottom-left of the canvas, nudging users
// with an active AI workflow toward setting up evaluations. Conditions:
//
// - experiment flag on
// - workflow is active (published)
// - workflow has at least one AI root node
// - no evaluation configs exist for this workflow
// - the card hasn't been dismissed for this workflow before
//
// All five must hold. The dismiss state is scoped per-workflow so different
// workflows surface the prompt independently matches the per-workflow
// nature of evaluation configs.
const locale = useI18n();
const wizardStore = useEvaluationsWizardSidepanelStore();
const workflowDocumentStore = injectWorkflowDocumentStore();
const rootStore = useRootStore();
const aiRootNodes = useAiRootNodes();
const { isFeatureEnabled: isEvaluationsWizardSidepanelEnabled } =
useEvaluationsWizardSidepanelExperiment();
const hasConfigs = ref<boolean | null>(null);
// Workflow id of the fetch currently in flight, used to (a) dedupe concurrent
// fetches for the same workflow and (b) still allow a fresh fetch when the
// user switches to a different workflow mid-request. Plain variable it's a
// control flag, nothing in the template reacts to it.
let inFlightWorkflowId: string | null = null;
// LOCAL_STORAGE_EVALUATIONS_CANVAS_INFO_CARD_DISMISSED is a comma-separated
// list of workflow ids. We could use one key per workflow, but localStorage's
// flat keyspace gets noisy fast single key + Set membership is cheaper.
const dismissedStorage = useStorage(LOCAL_STORAGE_EVALUATIONS_CANVAS_INFO_CARD_DISMISSED);
const dismissedSet = computed<Set<string>>(() => {
const raw = dismissedStorage.value;
if (!raw) return new Set();
return new Set(raw.split(',').filter(Boolean));
});
const workflowId = computed(() => workflowDocumentStore.value?.workflowId ?? '');
const isWorkflowActive = computed(() => workflowDocumentStore.value?.active ?? false);
const hasAiNodes = computed(() => aiRootNodes.value.length > 0);
const isDismissed = computed(() => {
if (!workflowId.value) return true;
return dismissedSet.value.has(workflowId.value);
});
// Gate the config fetch on the cheap local predicates first no point in
// hitting the API when we already know we won't show the card.
const shouldRenderModuleQualifies = computed(
() =>
isEvaluationsWizardSidepanelEnabled.value &&
isWorkflowActive.value &&
hasAiNodes.value &&
!isDismissed.value,
);
// Hide the nudge while the user is already inside the evaluations wizard
// the card's whole purpose is to surface that flow, so it's redundant (and
// visually noisy under the open sidepanel). `wizardStore.isOpen` is itself
// derived from the focus panel store, so this stays reactive across both
// the wizard's own open/close actions and direct focus-panel tab switches.
const isVisible = computed(
() => shouldRenderModuleQualifies.value && hasConfigs.value === false && !wizardStore.isOpen,
);
async function checkConfigs() {
const wfId = workflowId.value;
if (!wfId) return;
// Already fetching for this exact workflow let that call settle the state.
if (inFlightWorkflowId === wfId) return;
inFlightWorkflowId = wfId;
try {
const configs = await listEvaluationConfigs(rootStore.restApiContext, wfId);
// Drop the response if the user has since switched workflows, so a slow
// fetch for the previous workflow can't clobber the current one's state.
if (wfId !== workflowId.value) return;
hasConfigs.value = configs.length > 0;
} catch {
if (wfId !== workflowId.value) return;
// API failures shouldn't crash the canvas treat as "configs exist"
// to hide the card defensively rather than nag the user with a stale
// prompt while the server is misbehaving.
hasConfigs.value = true;
} finally {
if (inFlightWorkflowId === wfId) inFlightWorkflowId = null;
}
}
// Re-check whenever the local predicates flip from false true. Avoids the
// fetch on every workflow load if the card wouldn't render anyway. Runs
// immediately so the initial mount also triggers the fetch when it qualifies.
watch(
shouldRenderModuleQualifies,
(qualifies) => {
if (!qualifies) {
hasConfigs.value = null;
return;
}
if (hasConfigs.value === null) void checkConfigs();
},
{ immediate: true },
);
// Re-fetch when the workflow itself changes. The qualify-predicate watch above
// only fires on transitions, so switching between two *qualifying* workflows
// leaves it untouched without this the card would compute visibility from the
// previous workflow's configs and show/hide incorrectly. Reset to null first so
// the card stays hidden until the new workflow's configs come back.
watch(workflowId, () => {
hasConfigs.value = null;
if (shouldRenderModuleQualifies.value) void checkConfigs();
});
// Re-check configs when the wizard closes the user may have just completed
// it and created a config. Without this, the cached `hasConfigs === false`
// from the initial fetch would let the card pop back up over a workflow that
// now has a config.
watch(
() => wizardStore.isOpen,
(isOpen, wasOpen) => {
if (wasOpen && !isOpen && shouldRenderModuleQualifies.value) {
void checkConfigs();
}
},
);
function dismiss() {
const wfId = workflowId.value;
if (!wfId) return;
const next = new Set(dismissedSet.value);
next.add(wfId);
dismissedStorage.value = [...next].join(',');
}
function openWizard() {
wizardStore.open(0);
}
</script>
<template>
<aside
v-if="isVisible"
:class="$style.card"
role="complementary"
data-test-id="evaluations-canvas-info-card"
>
<button
type="button"
:class="$style.dismiss"
:aria-label="locale.baseText('evaluations.canvasInfoCard.dismiss')"
data-test-id="evaluations-canvas-info-card-dismiss"
@click="dismiss"
>
<N8nIcon icon="x" size="small" />
</button>
<!--
Marquee preview of canned checks. Pure CSS marquee the list is
duplicated in `marqueeMetrics` and the animation translates the
track by 50% so the loop is seamless. Hidden from assistive tech;
the heading + description below carries the actual message.
Renders `CheckCard` in `readonly` mode so the preview reads as the
same component family the user will see inside the wizard. The card
handles its own icon/title/badge layout so we only feed it data
click handlers and selected marks stay off via `readonly`.
-->
<div :class="$style.marquee" aria-hidden="true">
<ul :class="$style.marqueeTrack">
<li
v-for="(metric, index) in marqueeMetrics"
:key="`${metric.key}-${index}`"
:class="$style.marqueeItem"
>
<CheckCard
:icon="metric.icon"
:icon-bg="metric.tileBg"
:icon-fg="metric.tileFg"
:title="locale.baseText(metric.labelKey)"
:description="locale.baseText(metric.descriptionKey)"
:badge="
metric.isJudge
? locale.baseText('evaluations.wizardSidepanel.metric.judgeTag')
: undefined
"
readonly
/>
</li>
</ul>
</div>
<div :class="$style.body">
<N8nText size="small" bold color="text-dark">
{{ locale.baseText('evaluations.canvasInfoCard.title') }}
</N8nText>
<N8nText size="small" color="text-base" :class="$style.description">
{{ locale.baseText('evaluations.canvasInfoCard.description') }}
</N8nText>
</div>
<div :class="$style.footer">
<N8nButton
variant="solid"
size="medium"
type="button"
:class="$style.setupButton"
data-test-id="evaluations-canvas-info-card-setup"
@click="openWizard"
>
{{ locale.baseText('evaluations.canvasInfoCard.setup') }}
</N8nButton>
</div>
</aside>
</template>
<style module lang="scss">
.card {
position: relative;
display: flex;
flex-direction: column;
gap: var(--spacing--xs);
width: 280px;
padding: var(--spacing--sm);
background-color: var(--background--surface);
border: var(--border);
border-radius: var(--border-radius--base);
box-shadow: var(--shadow--md);
overflow: hidden;
}
.dismiss {
position: absolute;
top: var(--spacing--3xs);
right: var(--spacing--3xs);
display: inline-flex;
align-items: center;
justify-content: center;
width: 22px;
height: 22px;
padding: 0;
background: transparent;
border: none;
border-radius: var(--border-radius--base);
color: var(--color--text--tint-1);
cursor: pointer;
&:hover,
&:focus-visible {
color: var(--color--text);
background-color: var(--background--subtle);
}
&:focus-visible {
outline: 1px solid var(--focus--border-color);
}
}
// Mask the marquee at top/bottom so cards fade in/out instead of clipping.
// The list translates by -50% over the duration so the duplicated half lines
// up perfectly with the start no visible reset on each cycle.
// Sized to fit ~2 full CheckCard tiles at once. Smaller heights crop the
// description; bigger feels like the card overstays its welcome.
.marquee {
position: relative;
height: 180px;
overflow: hidden;
mask-image: linear-gradient(to bottom, transparent 0%, black 15%, black 85%, transparent 100%);
-webkit-mask-image: linear-gradient(
to bottom,
transparent 0%,
black 15%,
black 85%,
transparent 100%
);
}
.marqueeTrack {
list-style: none;
margin: 0;
padding: 0;
display: flex;
flex-direction: column;
gap: var(--spacing--xs);
animation: marqueeScroll 24s linear infinite;
}
@keyframes marqueeScroll {
from {
transform: translateY(0);
}
to {
transform: translateY(-50%);
}
}
@media (prefers-reduced-motion: reduce) {
.marqueeTrack {
animation: none;
}
}
// Each marquee item is a thin wrapper around CheckCard, used purely for the
// list semantics + the marquee track's `gap` CheckCard owns its own
// border/padding/typography so there's nothing to style here.
.marqueeItem {
display: flex;
}
.body {
display: flex;
flex-direction: column;
gap: var(--spacing--3xs);
padding-right: var(--spacing--md);
}
.description {
display: block;
line-height: 1.4;
}
.footer {
display: flex;
}
.setupButton {
width: 100%;
}
</style>