mirror of
https://github.com/Crosstalk-Solutions/project-nomad.git
synced 2026-03-28 03:29:25 +01:00
feat(AI): thinking and response streaming
This commit is contained in:
parent
16ce1e2945
commit
98b65c421c
|
|
@ -24,7 +24,7 @@ export default class OllamaController {
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
async chat({ request }: HttpContext) {
|
async chat({ request, response }: HttpContext) {
|
||||||
const reqData = await request.validateUsing(chatSchema)
|
const reqData = await request.validateUsing(chatSchema)
|
||||||
|
|
||||||
// If there are no system messages in the chat inject system prompts
|
// If there are no system messages in the chat inject system prompts
|
||||||
|
|
@ -73,7 +73,34 @@ export default class OllamaController {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return await this.ollamaService.chat(reqData)
|
// Check if the model supports "thinking" capability for enhanced response generation
|
||||||
|
// If gpt-oss model, it requires a text param for "think" https://docs.ollama.com/api/chat
|
||||||
|
const thinkingCapability = await this.ollamaService.checkModelHasThinking(reqData.model)
|
||||||
|
const think: boolean | 'medium' = thinkingCapability ? (reqData.model.startsWith('gpt-oss') ? 'medium' : true) : false
|
||||||
|
|
||||||
|
if (reqData.stream) {
|
||||||
|
logger.debug(`[OllamaController] Initiating streaming response for model: "${reqData.model}" with think: ${think}`)
|
||||||
|
// SSE streaming path
|
||||||
|
response.response.setHeader('Content-Type', 'text/event-stream')
|
||||||
|
response.response.setHeader('Cache-Control', 'no-cache')
|
||||||
|
response.response.setHeader('Connection', 'keep-alive')
|
||||||
|
response.response.flushHeaders()
|
||||||
|
|
||||||
|
try {
|
||||||
|
const stream = await this.ollamaService.chatStream({ ...reqData, think })
|
||||||
|
for await (const chunk of stream) {
|
||||||
|
response.response.write(`data: ${JSON.stringify(chunk)}\n\n`)
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
response.response.write(`data: ${JSON.stringify({ error: true })}\n\n`)
|
||||||
|
} finally {
|
||||||
|
response.response.end()
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Non-streaming (legacy) path
|
||||||
|
return await this.ollamaService.chat({ ...reqData, think })
|
||||||
}
|
}
|
||||||
|
|
||||||
async deleteModel({ request }: HttpContext) {
|
async deleteModel({ request }: HttpContext) {
|
||||||
|
|
|
||||||
|
|
@ -134,6 +134,30 @@ export class OllamaService {
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public async chatStream(chatRequest: ChatRequest) {
|
||||||
|
await this._ensureDependencies()
|
||||||
|
if (!this.ollama) {
|
||||||
|
throw new Error('Ollama client is not initialized.')
|
||||||
|
}
|
||||||
|
return await this.ollama.chat({
|
||||||
|
...chatRequest,
|
||||||
|
stream: true,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
public async checkModelHasThinking(modelName: string): Promise<boolean> {
|
||||||
|
await this._ensureDependencies()
|
||||||
|
if (!this.ollama) {
|
||||||
|
throw new Error('Ollama client is not initialized.')
|
||||||
|
}
|
||||||
|
|
||||||
|
const modelInfo = await this.ollama.show({
|
||||||
|
model: modelName,
|
||||||
|
})
|
||||||
|
|
||||||
|
return modelInfo.capabilities.includes('thinking')
|
||||||
|
}
|
||||||
|
|
||||||
public async deleteModel(modelName: string) {
|
public async deleteModel(modelName: string) {
|
||||||
await this._ensureDependencies()
|
await this._ensureDependencies()
|
||||||
if (!this.ollama) {
|
if (!this.ollama) {
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,16 @@
|
||||||
# Release Notes
|
# Release Notes
|
||||||
|
|
||||||
|
## Unreleased
|
||||||
|
|
||||||
|
### Features
|
||||||
|
- **AI Assistant**: Added support for showing reasoning stream for models with thinking capabilities
|
||||||
|
- **AI Assistant**: Added support for response streaming for improved UX
|
||||||
|
|
||||||
|
### Bug Fixes
|
||||||
|
|
||||||
|
### Improvements
|
||||||
|
|
||||||
|
|
||||||
## Version 1.25.2 - February 18, 2026
|
## Version 1.25.2 - February 18, 2026
|
||||||
|
|
||||||
### Features
|
### Features
|
||||||
|
|
|
||||||
|
|
@ -15,6 +15,27 @@ export default function ChatMessageBubble({ message }: ChatMessageBubbleProps) {
|
||||||
message.role === 'user' ? 'bg-desert-green text-white' : 'bg-gray-100 text-gray-800'
|
message.role === 'user' ? 'bg-desert-green text-white' : 'bg-gray-100 text-gray-800'
|
||||||
)}
|
)}
|
||||||
>
|
>
|
||||||
|
{message.isThinking && message.thinking && (
|
||||||
|
<div className="mb-3 rounded border border-amber-200 bg-amber-50 px-3 py-2 text-xs">
|
||||||
|
<div className="mb-1 flex items-center gap-1.5 font-medium text-amber-700">
|
||||||
|
<span>Reasoning</span>
|
||||||
|
<span className="h-1.5 w-1.5 rounded-full bg-amber-500 animate-pulse inline-block" />
|
||||||
|
</div>
|
||||||
|
<div className="prose prose-xs max-w-none text-amber-900/80 max-h-32 overflow-y-auto">
|
||||||
|
<ReactMarkdown remarkPlugins={[remarkGfm]}>{message.thinking}</ReactMarkdown>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
{!message.isThinking && message.thinking && (
|
||||||
|
<details className="mb-3 rounded border border-gray-200 bg-gray-50 text-xs">
|
||||||
|
<summary className="cursor-pointer px-3 py-2 font-medium text-gray-500 hover:text-gray-700 select-none">
|
||||||
|
Reasoning
|
||||||
|
</summary>
|
||||||
|
<div className="px-3 pb-3 prose prose-xs max-w-none text-gray-600 max-h-48 overflow-y-auto border-t border-gray-200 pt-2">
|
||||||
|
<ReactMarkdown remarkPlugins={[remarkGfm]}>{message.thinking}</ReactMarkdown>
|
||||||
|
</div>
|
||||||
|
</details>
|
||||||
|
)}
|
||||||
<div
|
<div
|
||||||
className={classNames(
|
className={classNames(
|
||||||
'break-words',
|
'break-words',
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
import { useState, useCallback, useEffect } from 'react'
|
import { useState, useCallback, useEffect, useRef } from 'react'
|
||||||
import { useQuery, useMutation, useQueryClient } from '@tanstack/react-query'
|
import { useQuery, useMutation, useQueryClient } from '@tanstack/react-query'
|
||||||
import ChatSidebar from './ChatSidebar'
|
import ChatSidebar from './ChatSidebar'
|
||||||
import ChatInterface from './ChatInterface'
|
import ChatInterface from './ChatInterface'
|
||||||
|
|
@ -15,6 +15,7 @@ interface ChatProps {
|
||||||
isInModal?: boolean
|
isInModal?: boolean
|
||||||
onClose?: () => void
|
onClose?: () => void
|
||||||
suggestionsEnabled?: boolean
|
suggestionsEnabled?: boolean
|
||||||
|
streamingEnabled?: boolean
|
||||||
}
|
}
|
||||||
|
|
||||||
export default function Chat({
|
export default function Chat({
|
||||||
|
|
@ -22,12 +23,15 @@ export default function Chat({
|
||||||
isInModal,
|
isInModal,
|
||||||
onClose,
|
onClose,
|
||||||
suggestionsEnabled = false,
|
suggestionsEnabled = false,
|
||||||
|
streamingEnabled = true,
|
||||||
}: ChatProps) {
|
}: ChatProps) {
|
||||||
const queryClient = useQueryClient()
|
const queryClient = useQueryClient()
|
||||||
const { openModal, closeAllModals } = useModals()
|
const { openModal, closeAllModals } = useModals()
|
||||||
const [activeSessionId, setActiveSessionId] = useState<string | null>(null)
|
const [activeSessionId, setActiveSessionId] = useState<string | null>(null)
|
||||||
const [messages, setMessages] = useState<ChatMessage[]>([])
|
const [messages, setMessages] = useState<ChatMessage[]>([])
|
||||||
const [selectedModel, setSelectedModel] = useState<string>('')
|
const [selectedModel, setSelectedModel] = useState<string>('')
|
||||||
|
const [isStreamingResponse, setIsStreamingResponse] = useState(false)
|
||||||
|
const streamAbortRef = useRef<AbortController | null>(null)
|
||||||
|
|
||||||
// Fetch all sessions
|
// Fetch all sessions
|
||||||
const { data: sessions = [] } = useQuery({
|
const { data: sessions = [] } = useQuery({
|
||||||
|
|
@ -209,16 +213,117 @@ export default function Chat({
|
||||||
// Save user message to backend
|
// Save user message to backend
|
||||||
await api.addChatMessage(sessionId, 'user', content)
|
await api.addChatMessage(sessionId, 'user', content)
|
||||||
|
|
||||||
// Send chat request using mutation
|
const chatMessages = [
|
||||||
chatMutation.mutate({
|
...messages.map((m) => ({ role: m.role, content: m.content })),
|
||||||
model: selectedModel || 'llama3.2',
|
{ role: 'user' as const, content },
|
||||||
messages: [
|
]
|
||||||
...messages.map((m) => ({ role: m.role, content: m.content })),
|
|
||||||
{ role: 'user', content },
|
if (streamingEnabled !== false) {
|
||||||
],
|
// Streaming path
|
||||||
})
|
const abortController = new AbortController()
|
||||||
|
streamAbortRef.current = abortController
|
||||||
|
|
||||||
|
setIsStreamingResponse(true)
|
||||||
|
|
||||||
|
const assistantMsgId = `msg-${Date.now()}-assistant`
|
||||||
|
let isFirstChunk = true
|
||||||
|
let fullContent = ''
|
||||||
|
let thinkingContent = ''
|
||||||
|
let isThinkingPhase = true
|
||||||
|
|
||||||
|
try {
|
||||||
|
await api.streamChatMessage(
|
||||||
|
{ model: selectedModel || 'llama3.2', messages: chatMessages, stream: true },
|
||||||
|
(chunkContent, chunkThinking, done) => {
|
||||||
|
if (isFirstChunk) {
|
||||||
|
isFirstChunk = false
|
||||||
|
setIsStreamingResponse(false)
|
||||||
|
setMessages((prev) => [
|
||||||
|
...prev,
|
||||||
|
{
|
||||||
|
id: assistantMsgId,
|
||||||
|
role: 'assistant',
|
||||||
|
content: chunkContent,
|
||||||
|
thinking: chunkThinking,
|
||||||
|
timestamp: new Date(),
|
||||||
|
isStreaming: true,
|
||||||
|
isThinking: chunkThinking.length > 0 && chunkContent.length === 0,
|
||||||
|
},
|
||||||
|
])
|
||||||
|
} else {
|
||||||
|
if (isThinkingPhase && chunkContent.length > 0) {
|
||||||
|
isThinkingPhase = false
|
||||||
|
}
|
||||||
|
setMessages((prev) =>
|
||||||
|
prev.map((m) =>
|
||||||
|
m.id === assistantMsgId
|
||||||
|
? {
|
||||||
|
...m,
|
||||||
|
content: m.content + chunkContent,
|
||||||
|
thinking: (m.thinking ?? '') + chunkThinking,
|
||||||
|
isStreaming: !done,
|
||||||
|
isThinking: isThinkingPhase,
|
||||||
|
}
|
||||||
|
: m
|
||||||
|
)
|
||||||
|
)
|
||||||
|
}
|
||||||
|
fullContent += chunkContent
|
||||||
|
thinkingContent += chunkThinking
|
||||||
|
},
|
||||||
|
abortController.signal
|
||||||
|
)
|
||||||
|
} catch (error: any) {
|
||||||
|
if (error?.name !== 'AbortError') {
|
||||||
|
setMessages((prev) => {
|
||||||
|
const hasAssistantMsg = prev.some((m) => m.id === assistantMsgId)
|
||||||
|
if (hasAssistantMsg) {
|
||||||
|
return prev.map((m) =>
|
||||||
|
m.id === assistantMsgId ? { ...m, isStreaming: false } : m
|
||||||
|
)
|
||||||
|
}
|
||||||
|
return [
|
||||||
|
...prev,
|
||||||
|
{
|
||||||
|
id: assistantMsgId,
|
||||||
|
role: 'assistant',
|
||||||
|
content: 'Sorry, there was an error processing your request. Please try again.',
|
||||||
|
timestamp: new Date(),
|
||||||
|
},
|
||||||
|
]
|
||||||
|
})
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
setIsStreamingResponse(false)
|
||||||
|
streamAbortRef.current = null
|
||||||
|
}
|
||||||
|
|
||||||
|
if (fullContent && sessionId) {
|
||||||
|
// Ensure the streaming cursor is removed
|
||||||
|
setMessages((prev) =>
|
||||||
|
prev.map((m) =>
|
||||||
|
m.id === assistantMsgId ? { ...m, isStreaming: false } : m
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
await api.addChatMessage(sessionId, 'assistant', fullContent)
|
||||||
|
|
||||||
|
const currentSession = sessions.find((s) => s.id === sessionId)
|
||||||
|
if (currentSession && currentSession.title === 'New Chat') {
|
||||||
|
const newTitle = content.slice(0, 50) + (content.length > 50 ? '...' : '')
|
||||||
|
await api.updateChatSession(sessionId, { title: newTitle })
|
||||||
|
queryClient.invalidateQueries({ queryKey: ['chatSessions'] })
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Non-streaming (legacy) path
|
||||||
|
chatMutation.mutate({
|
||||||
|
model: selectedModel || 'llama3.2',
|
||||||
|
messages: chatMessages,
|
||||||
|
})
|
||||||
|
}
|
||||||
},
|
},
|
||||||
[activeSessionId, messages, selectedModel, chatMutation, queryClient]
|
[activeSessionId, messages, selectedModel, chatMutation, queryClient, streamingEnabled, sessions]
|
||||||
)
|
)
|
||||||
|
|
||||||
return (
|
return (
|
||||||
|
|
@ -282,7 +387,7 @@ export default function Chat({
|
||||||
<ChatInterface
|
<ChatInterface
|
||||||
messages={messages}
|
messages={messages}
|
||||||
onSendMessage={handleSendMessage}
|
onSendMessage={handleSendMessage}
|
||||||
isLoading={chatMutation.isPending}
|
isLoading={isStreamingResponse || chatMutation.isPending}
|
||||||
chatSuggestions={chatSuggestions}
|
chatSuggestions={chatSuggestions}
|
||||||
chatSuggestionsEnabled={suggestionsEnabled}
|
chatSuggestionsEnabled={suggestionsEnabled}
|
||||||
chatSuggestionsLoading={chatSuggestionsLoading}
|
chatSuggestionsLoading={chatSuggestionsLoading}
|
||||||
|
|
|
||||||
|
|
@ -212,6 +212,57 @@ class API {
|
||||||
})()
|
})()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async streamChatMessage(
|
||||||
|
chatRequest: OllamaChatRequest,
|
||||||
|
onChunk: (content: string, thinking: string, done: boolean) => void,
|
||||||
|
signal?: AbortSignal
|
||||||
|
): Promise<void> {
|
||||||
|
// Axios doesn't support ReadableStream in browser, so need to use fetch
|
||||||
|
const response = await fetch('/api/ollama/chat', {
|
||||||
|
method: 'POST',
|
||||||
|
headers: { 'Content-Type': 'application/json' },
|
||||||
|
body: JSON.stringify({ ...chatRequest, stream: true }),
|
||||||
|
signal,
|
||||||
|
})
|
||||||
|
|
||||||
|
if (!response.ok || !response.body) {
|
||||||
|
throw new Error(`HTTP error: ${response.status}`)
|
||||||
|
}
|
||||||
|
|
||||||
|
const reader = response.body.getReader()
|
||||||
|
const decoder = new TextDecoder()
|
||||||
|
let buffer = ''
|
||||||
|
|
||||||
|
try {
|
||||||
|
while (true) {
|
||||||
|
const { done, value } = await reader.read()
|
||||||
|
if (done) break
|
||||||
|
|
||||||
|
buffer += decoder.decode(value, { stream: true })
|
||||||
|
const lines = buffer.split('\n')
|
||||||
|
buffer = lines.pop() || ''
|
||||||
|
|
||||||
|
for (const line of lines) {
|
||||||
|
if (!line.startsWith('data: ')) continue
|
||||||
|
let data: any
|
||||||
|
try {
|
||||||
|
data = JSON.parse(line.slice(6))
|
||||||
|
} catch { continue /* skip malformed chunks */ }
|
||||||
|
|
||||||
|
if (data.error) throw new Error('The model encountered an error. Please try again.')
|
||||||
|
|
||||||
|
onChunk(
|
||||||
|
data.message?.content ?? '',
|
||||||
|
data.message?.thinking ?? '',
|
||||||
|
data.done ?? false
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
reader.releaseLock()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
async getBenchmarkResults() {
|
async getBenchmarkResults() {
|
||||||
return catchInternal(async () => {
|
return catchInternal(async () => {
|
||||||
const response = await this.client.get<{ results: BenchmarkResult[], total: number }>('/benchmark/results')
|
const response = await this.client.get<{ results: BenchmarkResult[], total: number }>('/benchmark/results')
|
||||||
|
|
|
||||||
|
|
@ -4,6 +4,8 @@ export interface ChatMessage {
|
||||||
content: string
|
content: string
|
||||||
timestamp: Date
|
timestamp: Date
|
||||||
isStreaming?: boolean
|
isStreaming?: boolean
|
||||||
|
thinking?: string
|
||||||
|
isThinking?: boolean
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface ChatSession {
|
export interface ChatSession {
|
||||||
|
|
|
||||||
|
|
@ -14,6 +14,7 @@ export type NomadOllamaModelTag = {
|
||||||
context: string
|
context: string
|
||||||
input: string
|
input: string
|
||||||
cloud: boolean
|
cloud: boolean
|
||||||
|
thinking: boolean
|
||||||
}
|
}
|
||||||
|
|
||||||
export type NomadOllamaModelAPIResponse = {
|
export type NomadOllamaModelAPIResponse = {
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user