feat(ai-chat): collapse <think> sections produced by openai api

2026-04-09 02:06:16 +02:00 · 2026-03-20 14:06:29 -07:00 · 2026-03-20 14:06:29 -07:00 · f36a7594bd
commit f36a7594bd
parent 2350b79bb5
1 changed files with 52 additions and 2 deletions
--- a/admin/app/services/ollama_service.ts
+++ b/admin/app/services/ollama_service.ts
@ -248,13 +248,63 @@ export class OllamaService {
    const stream = (await this.openai.chat.completions.create(params)) as unknown as Stream<ChatCompletionChunk>
    // Returns how many trailing chars of `text` could be the start of `tag`
    function partialTagSuffix(tag: string, text: string): number {
      for (let len = Math.min(tag.length - 1, text.length); len >= 1; len--) {
        if (text.endsWith(tag.slice(0, len))) return len
      }
      return 0
    }
    async function* normalize(): AsyncGenerator<NomadChatStreamChunk> {
      // Stateful parser for <think>...</think> tags that may be split across chunks.
      // Ollama provides thinking natively via delta.thinking; OpenAI-compatible backends
      // (LM Studio, llama.cpp, etc.) embed them inline in delta.content.
      let tagBuffer = ''
      let inThink = false
      for await (const chunk of stream) {
        const delta = chunk.choices[0]?.delta
        const nativeThinking: string = (delta as any)?.thinking ?? ''
        const rawContent: string = delta?.content ?? ''
        // Parse <think> tags out of the content stream
        tagBuffer += rawContent
        let parsedContent = ''
        let parsedThinking = ''
        while (tagBuffer.length > 0) {
          if (inThink) {
            const closeIdx = tagBuffer.indexOf('</think>')
            if (closeIdx !== -1) {
              parsedThinking += tagBuffer.slice(0, closeIdx)
              tagBuffer = tagBuffer.slice(closeIdx + 8)
              inThink = false
            } else {
              const hold = partialTagSuffix('</think>', tagBuffer)
              parsedThinking += tagBuffer.slice(0, tagBuffer.length - hold)
              tagBuffer = tagBuffer.slice(tagBuffer.length - hold)
              break
            }
          } else {
            const openIdx = tagBuffer.indexOf('<think>')
            if (openIdx !== -1) {
              parsedContent += tagBuffer.slice(0, openIdx)
              tagBuffer = tagBuffer.slice(openIdx + 7)
              inThink = true
            } else {
              const hold = partialTagSuffix('<think>', tagBuffer)
              parsedContent += tagBuffer.slice(0, tagBuffer.length - hold)
              tagBuffer = tagBuffer.slice(tagBuffer.length - hold)
              break
            }
          }
        }
        yield {
          message: {
-            content: delta?.content ?? '',
+            content: parsedContent,
-            thinking: (delta as any)?.thinking ?? '',
+            thinking: nativeThinking + parsedThinking,
          },
          done: chunk.choices[0]?.finish_reason !== null && chunk.choices[0]?.finish_reason !== undefined,
        }