From b194dfa136aee0bfadcc4e5eb011de077de4c8ef Mon Sep 17 00:00:00 2001 From: chriscrosstalk <49691103+chriscrosstalk@users.noreply.github.com> Date: Mon, 27 Apr 2026 21:43:10 -0700 Subject: [PATCH] fix(RAG): pass num_ctx and truncate to Ollama embed call (#763) Some Ollama installs ship nomic-embed-text:v1.5 with the embedding model's default num_ctx=2048, which the RAG chunker (sized for ~1500 tokens of estimated content with ratio=2 chars/token) can exceed on dense PDFs. The result is `400 the input length exceeds the context length` from /api/embed, which then hits the OpenAI-compatible fallback (which also errors), and surfaces as a BadRequestError. Pass options.num_ctx=8192 (nomic-embed-text v1.5's RoPE-extrapolated max) and truncate=true (silent truncation safety net) on every embed call so we don't depend on the local modelfile defaults. Reported on #756 by @NC4WD; same root cause as #369 and #670 which were closed without an actual fix. Co-authored-by: Claude Opus 4.7 (1M context) --- admin/app/services/ollama_service.ts | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/admin/app/services/ollama_service.ts b/admin/app/services/ollama_service.ts index 27f5cac..fe0cb1c 100644 --- a/admin/app/services/ollama_service.ts +++ b/admin/app/services/ollama_service.ts @@ -480,10 +480,21 @@ export class OllamaService { } try { - // Prefer Ollama native endpoint (supports batch input natively) + // Prefer Ollama native endpoint (supports batch input natively). + // Pass num_ctx explicitly so we don't depend on the embedding model's + // modelfile defaults. Some installs ship nomic-embed-text:v1.5 with + // num_ctx=2048, which our chunker (sized for ~1500 tokens) can exceed + // on dense content, causing "input length exceeds context length" errors. + // truncate:true is a runtime safety net for any chunk that still overshoots. + // 8192 matches nomic-embed-text:v1.5's RoPE-extrapolated max. const response = await axios.post( `${this.baseUrl}/api/embed`, - { model, input }, + { + model, + input, + truncate: true, + options: { num_ctx: 8192 }, + }, { timeout: 60000 } ) // Some backends (e.g. LM Studio) return HTTP 200 for unknown endpoints with an incompatible