n8n/packages/@n8n/ai-workflow-builder.ee/evaluations/utils/cache-analyzer.ts
Jiri De Jonghe 3afb4db4ae
feat(ai-builder): Optimize Anthropic prompt caching with 4-breakpoint strategy (#20484)
Co-authored-by: Oleg Ivaniv <me@olegivaniv.com>
2025-10-14 08:43:23 +02:00

139 lines
4.6 KiB
TypeScript

import type { UsageMetadata } from '../types/langsmith.js';
import type { CacheStatistics } from '../types/test-result.js';
/**
* Anthropic pricing (as of 2024)
* Source: https://www.anthropic.com/pricing
*/
const PRICING = {
// Claude Sonnet 4 pricing per million tokens
inputTokensPerMillion: 3.0,
outputTokensPerMillion: 15.0,
cacheWritePerMillion: 3.75, // 25% markup on input tokens
cacheReadPerMillion: 0.3, // 90% discount on input tokens
};
/**
* Calculate cache statistics from usage metadata
*/
export function calculateCacheStats(usage: Partial<UsageMetadata>): CacheStatistics {
const inputTokens = usage.input_tokens ?? 0;
const outputTokens = usage.output_tokens ?? 0;
const cacheCreationTokens = usage.cache_creation_input_tokens ?? 0;
const cacheReadTokens = usage.cache_read_input_tokens ?? 0;
// Calculate cache hit rate
// Cache hit rate = cache read tokens / (cache read + non-cached input tokens)
const totalInputTokens = inputTokens + cacheCreationTokens + cacheReadTokens;
const cacheHitRate = totalInputTokens > 0 ? cacheReadTokens / totalInputTokens : 0;
// Calculate cost savings
const estimatedCostSavings = calculateCostSavings(usage);
return {
inputTokens,
outputTokens,
cacheCreationTokens,
cacheReadTokens,
cacheHitRate,
estimatedCostSavings,
};
}
/**
* Calculate cost savings from using cache vs not using cache
* Returns the savings in dollars
*/
export function calculateCostSavings(usage: Partial<UsageMetadata>): number {
const inputTokens = usage.input_tokens ?? 0;
const outputTokens = usage.output_tokens ?? 0;
const cacheCreationTokens = usage.cache_creation_input_tokens ?? 0;
const cacheReadTokens = usage.cache_read_input_tokens ?? 0;
// Cost with caching
const costWithCache =
(inputTokens / 1_000_000) * PRICING.inputTokensPerMillion +
(outputTokens / 1_000_000) * PRICING.outputTokensPerMillion +
(cacheCreationTokens / 1_000_000) * PRICING.cacheWritePerMillion +
(cacheReadTokens / 1_000_000) * PRICING.cacheReadPerMillion;
// Cost without caching (all tokens would be regular input tokens)
const totalInputWithoutCache = inputTokens + cacheCreationTokens + cacheReadTokens;
const costWithoutCache =
(totalInputWithoutCache / 1_000_000) * PRICING.inputTokensPerMillion +
(outputTokens / 1_000_000) * PRICING.outputTokensPerMillion;
return costWithoutCache - costWithCache;
}
/**
* Calculate aggregate cache statistics from multiple test results
*/
export function aggregateCacheStats(stats: CacheStatistics[]): CacheStatistics {
if (stats.length === 0) {
return {
inputTokens: 0,
outputTokens: 0,
cacheCreationTokens: 0,
cacheReadTokens: 0,
cacheHitRate: 0,
estimatedCostSavings: 0,
};
}
const totalInputTokens = stats.reduce((sum, s) => sum + s.inputTokens, 0);
const totalOutputTokens = stats.reduce((sum, s) => sum + s.outputTokens, 0);
const totalCacheCreation = stats.reduce((sum, s) => sum + s.cacheCreationTokens, 0);
const totalCacheRead = stats.reduce((sum, s) => sum + s.cacheReadTokens, 0);
const totalCostSavings = stats.reduce((sum, s) => sum + s.estimatedCostSavings, 0);
// Recalculate aggregate cache hit rate
const totalTokens = totalInputTokens + totalCacheCreation + totalCacheRead;
const aggregateCacheHitRate = totalTokens > 0 ? totalCacheRead / totalTokens : 0;
return {
inputTokens: totalInputTokens,
outputTokens: totalOutputTokens,
cacheCreationTokens: totalCacheCreation,
cacheReadTokens: totalCacheRead,
cacheHitRate: aggregateCacheHitRate,
estimatedCostSavings: totalCostSavings,
};
}
/**
* Format cache statistics for display
*/
export function formatCacheStats(stats: CacheStatistics): {
inputTokens: string;
outputTokens: string;
cacheCreationTokens: string;
cacheReadTokens: string;
cacheHitRate: string;
costSavings: string;
} {
return {
inputTokens: stats.inputTokens.toLocaleString(),
outputTokens: stats.outputTokens.toLocaleString(),
cacheCreationTokens: stats.cacheCreationTokens.toLocaleString(),
cacheReadTokens: stats.cacheReadTokens.toLocaleString(),
cacheHitRate: `${(stats.cacheHitRate * 100).toFixed(2)}%`,
costSavings: `$${stats.estimatedCostSavings.toFixed(4)}`,
};
}
/**
* Calculate cache effectiveness score (0-1)
* Based on cache hit rate and cost savings
*/
export function calculateCacheEffectiveness(stats: CacheStatistics): number {
// Weight cache hit rate and cost savings
const hitRateWeight = 0.6;
const costSavingsWeight = 0.4;
// Normalize cost savings (assume $0.10 savings is excellent)
const normalizedSavings = Math.min(stats.estimatedCostSavings / 0.1, 1);
return stats.cacheHitRate * hitRateWeight + normalizedSavings * costSavingsWeight;
}