From b194dfa136aee0bfadcc4e5eb011de077de4c8ef Mon Sep 17 00:00:00 2001
From: chriscrosstalk <49691103+chriscrosstalk@users.noreply.github.com>
Date: Mon, 27 Apr 2026 21:43:10 -0700
Subject: [PATCH] fix(RAG): pass num_ctx and truncate to Ollama embed call
 (#763)

Some Ollama installs ship nomic-embed-text:v1.5 with the embedding
model's default num_ctx=2048, which the RAG chunker (sized for ~1500
tokens of estimated content with ratio=2 chars/token) can exceed on
dense PDFs. The result is `400 the input length exceeds the context
length` from /api/embed, which then hits the OpenAI-compatible
fallback (which also errors), and surfaces as a BadRequestError.

Pass options.num_ctx=8192 (nomic-embed-text v1.5's RoPE-extrapolated
max) and truncate=true (silent truncation safety net) on every
embed call so we don't depend on the local modelfile defaults.

Reported on #756 by @NC4WD; same root cause as #369 and #670 which
were closed without an actual fix.

Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 admin/app/services/ollama_service.ts | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/admin/app/services/ollama_service.ts b/admin/app/services/ollama_service.ts
index 27f5cac..fe0cb1c 100644
--- a/admin/app/services/ollama_service.ts
+++ b/admin/app/services/ollama_service.ts
@@ -480,10 +480,21 @@ export class OllamaService {
     }
 
     try {
-      // Prefer Ollama native endpoint (supports batch input natively)
+      // Prefer Ollama native endpoint (supports batch input natively).
+      // Pass num_ctx explicitly so we don't depend on the embedding model's
+      // modelfile defaults. Some installs ship nomic-embed-text:v1.5 with
+      // num_ctx=2048, which our chunker (sized for ~1500 tokens) can exceed
+      // on dense content, causing "input length exceeds context length" errors.
+      // truncate:true is a runtime safety net for any chunk that still overshoots.
+      // 8192 matches nomic-embed-text:v1.5's RoPE-extrapolated max.
       const response = await axios.post(
         `${this.baseUrl}/api/embed`,
-        { model, input },
+        {
+          model,
+          input,
+          truncate: true,
+          options: { num_ctx: 8192 },
+        },
         { timeout: 60000 }
       )
       // Some backends (e.g. LM Studio) return HTTP 200 for unknown endpoints with an incompatible