mirror of
https://github.com/Crosstalk-Solutions/project-nomad.git
synced 2026-04-09 02:06:16 +02:00
feat(ai-chat): collapse <think> sections produced by openai api
This commit is contained in:
parent
2350b79bb5
commit
f36a7594bd
|
|
@ -248,13 +248,63 @@ export class OllamaService {
|
||||||
|
|
||||||
const stream = (await this.openai.chat.completions.create(params)) as unknown as Stream<ChatCompletionChunk>
|
const stream = (await this.openai.chat.completions.create(params)) as unknown as Stream<ChatCompletionChunk>
|
||||||
|
|
||||||
|
// Returns how many trailing chars of `text` could be the start of `tag`
|
||||||
|
function partialTagSuffix(tag: string, text: string): number {
|
||||||
|
for (let len = Math.min(tag.length - 1, text.length); len >= 1; len--) {
|
||||||
|
if (text.endsWith(tag.slice(0, len))) return len
|
||||||
|
}
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
async function* normalize(): AsyncGenerator<NomadChatStreamChunk> {
|
async function* normalize(): AsyncGenerator<NomadChatStreamChunk> {
|
||||||
|
// Stateful parser for <think>...</think> tags that may be split across chunks.
|
||||||
|
// Ollama provides thinking natively via delta.thinking; OpenAI-compatible backends
|
||||||
|
// (LM Studio, llama.cpp, etc.) embed them inline in delta.content.
|
||||||
|
let tagBuffer = ''
|
||||||
|
let inThink = false
|
||||||
|
|
||||||
for await (const chunk of stream) {
|
for await (const chunk of stream) {
|
||||||
const delta = chunk.choices[0]?.delta
|
const delta = chunk.choices[0]?.delta
|
||||||
|
const nativeThinking: string = (delta as any)?.thinking ?? ''
|
||||||
|
const rawContent: string = delta?.content ?? ''
|
||||||
|
|
||||||
|
// Parse <think> tags out of the content stream
|
||||||
|
tagBuffer += rawContent
|
||||||
|
let parsedContent = ''
|
||||||
|
let parsedThinking = ''
|
||||||
|
|
||||||
|
while (tagBuffer.length > 0) {
|
||||||
|
if (inThink) {
|
||||||
|
const closeIdx = tagBuffer.indexOf('</think>')
|
||||||
|
if (closeIdx !== -1) {
|
||||||
|
parsedThinking += tagBuffer.slice(0, closeIdx)
|
||||||
|
tagBuffer = tagBuffer.slice(closeIdx + 8)
|
||||||
|
inThink = false
|
||||||
|
} else {
|
||||||
|
const hold = partialTagSuffix('</think>', tagBuffer)
|
||||||
|
parsedThinking += tagBuffer.slice(0, tagBuffer.length - hold)
|
||||||
|
tagBuffer = tagBuffer.slice(tagBuffer.length - hold)
|
||||||
|
break
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
const openIdx = tagBuffer.indexOf('<think>')
|
||||||
|
if (openIdx !== -1) {
|
||||||
|
parsedContent += tagBuffer.slice(0, openIdx)
|
||||||
|
tagBuffer = tagBuffer.slice(openIdx + 7)
|
||||||
|
inThink = true
|
||||||
|
} else {
|
||||||
|
const hold = partialTagSuffix('<think>', tagBuffer)
|
||||||
|
parsedContent += tagBuffer.slice(0, tagBuffer.length - hold)
|
||||||
|
tagBuffer = tagBuffer.slice(tagBuffer.length - hold)
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
yield {
|
yield {
|
||||||
message: {
|
message: {
|
||||||
content: delta?.content ?? '',
|
content: parsedContent,
|
||||||
thinking: (delta as any)?.thinking ?? '',
|
thinking: nativeThinking + parsedThinking,
|
||||||
},
|
},
|
||||||
done: chunk.choices[0]?.finish_reason !== null && chunk.choices[0]?.finish_reason !== undefined,
|
done: chunk.choices[0]?.finish_reason !== null && chunk.choices[0]?.finish_reason !== undefined,
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user