feat(AI Assistant): improved state management and performance

This commit is contained in:
Jake Turner 2026-03-10 23:50:27 +00:00
parent 07560d82e6
commit 8305e07c9f
No known key found for this signature in database
GPG Key ID: 6DCBBAE4FEAB53EB
6 changed files with 114 additions and 30 deletions

View File

@ -1,3 +1,4 @@
import { ChatService } from '#services/chat_service'
import { OllamaService } from '#services/ollama_service'
import { RagService } from '#services/rag_service'
import { modelNameSchema } from '#validators/download'
@ -11,6 +12,7 @@ import type { Message } from 'ollama'
@inject()
export default class OllamaController {
constructor(
private chatService: ChatService,
private ollamaService: OllamaService,
private ragService: RagService
) { }
@ -87,19 +89,59 @@ export default class OllamaController {
const thinkingCapability = await this.ollamaService.checkModelHasThinking(reqData.model)
const think: boolean | 'medium' = thinkingCapability ? (reqData.model.startsWith('gpt-oss') ? 'medium' : true) : false
// Separate sessionId from the Ollama request payload — Ollama rejects unknown fields
const { sessionId, ...ollamaRequest } = reqData
// Save user message to DB before streaming if sessionId provided
let userContent: string | null = null
if (sessionId) {
const lastUserMsg = [...reqData.messages].reverse().find((m) => m.role === 'user')
if (lastUserMsg) {
userContent = lastUserMsg.content
await this.chatService.addMessage(sessionId, 'user', userContent)
}
}
if (reqData.stream) {
logger.debug(`[OllamaController] Initiating streaming response for model: "${reqData.model}" with think: ${think}`)
// Headers already flushed above
const stream = await this.ollamaService.chatStream({ ...reqData, think })
const stream = await this.ollamaService.chatStream({ ...ollamaRequest, think })
let fullContent = ''
for await (const chunk of stream) {
if (chunk.message?.content) {
fullContent += chunk.message.content
}
response.response.write(`data: ${JSON.stringify(chunk)}\n\n`)
}
response.response.end()
// Save assistant message and optionally generate title
if (sessionId && fullContent) {
await this.chatService.addMessage(sessionId, 'assistant', fullContent)
const messageCount = await this.chatService.getMessageCount(sessionId)
if (messageCount <= 2 && userContent) {
this.chatService.generateTitle(sessionId, userContent, fullContent).catch((err) => {
logger.error(`[OllamaController] Title generation failed: ${err instanceof Error ? err.message : err}`)
})
}
}
return
}
// Non-streaming (legacy) path
return await this.ollamaService.chat({ ...reqData, think })
const result = await this.ollamaService.chat({ ...ollamaRequest, think })
if (sessionId && result?.message?.content) {
await this.chatService.addMessage(sessionId, 'assistant', result.message.content)
const messageCount = await this.chatService.getMessageCount(sessionId)
if (messageCount <= 2 && userContent) {
this.chatService.generateTitle(sessionId, userContent, result.message.content).catch((err) => {
logger.error(`[OllamaController] Title generation failed: ${err instanceof Error ? err.message : err}`)
})
}
}
return result
} catch (error) {
if (reqData.stream) {
response.response.write(`data: ${JSON.stringify({ error: true })}\n\n`)

View File

@ -4,7 +4,7 @@ import logger from '@adonisjs/core/services/logger'
import { DateTime } from 'luxon'
import { inject } from '@adonisjs/core'
import { OllamaService } from './ollama_service.js'
import { SYSTEM_PROMPTS } from '../../constants/ollama.js'
import { DEFAULT_QUERY_REWRITE_MODEL, SYSTEM_PROMPTS } from '../../constants/ollama.js'
import { toTitleCase } from '../utils/misc.js'
@inject()
@ -220,6 +220,59 @@ export class ChatService {
}
}
async getMessageCount(sessionId: number): Promise<number> {
try {
const count = await ChatMessage.query().where('session_id', sessionId).count('* as total')
return Number(count[0].$extras.total)
} catch (error) {
logger.error(
`[ChatService] Failed to get message count for session ${sessionId}: ${error instanceof Error ? error.message : error}`
)
return 0
}
}
async generateTitle(sessionId: number, userMessage: string, assistantMessage: string) {
try {
const models = await this.ollamaService.getModels()
const titleModelAvailable = models?.some((m) => m.name === DEFAULT_QUERY_REWRITE_MODEL)
let title: string
if (!titleModelAvailable) {
title = userMessage.slice(0, 57) + (userMessage.length > 57 ? '...' : '')
} else {
const response = await this.ollamaService.chat({
model: DEFAULT_QUERY_REWRITE_MODEL,
messages: [
{ role: 'system', content: SYSTEM_PROMPTS.title_generation },
{ role: 'user', content: userMessage },
{ role: 'assistant', content: assistantMessage },
],
})
title = response?.message?.content?.trim()
if (!title) {
title = userMessage.slice(0, 57) + (userMessage.length > 57 ? '...' : '')
}
}
await this.updateSession(sessionId, { title })
logger.info(`[ChatService] Generated title for session ${sessionId}: "${title}"`)
} catch (error) {
logger.error(
`[ChatService] Failed to generate title for session ${sessionId}: ${error instanceof Error ? error.message : error}`
)
// Fall back to truncated user message
try {
const fallbackTitle = userMessage.slice(0, 57) + (userMessage.length > 57 ? '...' : '')
await this.updateSession(sessionId, { title: fallbackTitle })
} catch {
// Silently fail - session keeps "New Chat" title
}
}
}
async deleteAllSessions() {
try {
await ChatSession.query().delete()

View File

@ -10,6 +10,7 @@ export const chatSchema = vine.compile(
})
),
stream: vine.boolean().optional(),
sessionId: vine.number().positive().optional(),
})
)

View File

@ -83,9 +83,9 @@ IMPORTANT INSTRUCTIONS:
1. If the user's question is directly related to the context above, use this information to provide accurate, detailed answers.
2. Always cite or reference the context when using it (e.g., "According to the information available..." or "Based on the knowledge base...").
3. If the context is only partially relevant, combine it with your general knowledge but be clear about what comes from the knowledge base.
4. If the context is not relevant to the user's question, you can respond using your general knowledge without forcing the context into your answer.
4. If the context is not relevant to the user's question, you can respond using your general knowledge without forcing the context into your answer. Do not mention the context if it's not relevant.
5. Never fabricate information that isn't in the context or your training data.
6. If you're unsure or the context doesn't contain enough information, acknowledge the limitations.
6. If you're unsure or you don't have enough information to answer the user's question, acknowledge the limitations.
Format your response using markdown for readability.
`,
@ -113,6 +113,7 @@ Ensure that your suggestions are comma-seperated with no conjunctions like "and"
Do not use line breaks, new lines, or extra spacing to separate the suggestions.
Format: suggestion1, suggestion2, suggestion3
`,
title_generation: `You are a title generator. Given the start of a conversation, generate a concise, descriptive title under 60 characters. Return ONLY the title text with no quotes, punctuation wrapping, or extra formatting.`,
query_rewrite: `
You are a query rewriting assistant. Your task is to reformulate the user's latest question to include relevant context from the conversation history.

View File

@ -90,8 +90,9 @@ export default function Chat({
mutationFn: (request: {
model: string
messages: Array<{ role: 'system' | 'user' | 'assistant'; content: string }>
sessionId?: number
}) => api.sendChatMessage({ ...request, stream: false }),
onSuccess: async (data, variables) => {
onSuccess: async (data) => {
if (!data || !activeSessionId) {
throw new Error('No response from Ollama')
}
@ -106,17 +107,9 @@ export default function Chat({
setMessages((prev) => [...prev, assistantMessage])
// Save assistant message to backend
await api.addChatMessage(activeSessionId, 'assistant', assistantMessage.content)
// Update session title if it's a new chat
const currentSession = sessions.find((s) => s.id === activeSessionId)
if (currentSession && currentSession.title === 'New Chat') {
const userContent = variables.messages[variables.messages.length - 1].content
const newTitle = userContent.slice(0, 50) + (userContent.length > 50 ? '...' : '')
await api.updateChatSession(activeSessionId, { title: newTitle })
queryClient.invalidateQueries({ queryKey: ['chatSessions'] })
}
// Refresh sessions to pick up backend-persisted messages and title
queryClient.invalidateQueries({ queryKey: ['chatSessions'] })
setTimeout(() => queryClient.invalidateQueries({ queryKey: ['chatSessions'] }), 3000)
},
onError: (error) => {
console.error('Error sending message:', error)
@ -230,9 +223,6 @@ export default function Chat({
setMessages((prev) => [...prev, userMessage])
// Save user message to backend
await api.addChatMessage(sessionId, 'user', content)
const chatMessages = [
...messages.map((m) => ({ role: m.role, content: m.content })),
{ role: 'user' as const, content },
@ -255,7 +245,7 @@ export default function Chat({
try {
await api.streamChatMessage(
{ model: selectedModel || 'llama3.2', messages: chatMessages, stream: true },
{ model: selectedModel || 'llama3.2', messages: chatMessages, stream: true, sessionId: sessionId ? Number(sessionId) : undefined },
(chunkContent, chunkThinking, done) => {
if (chunkThinking.length > 0 && thinkingStartTime === null) {
thinkingStartTime = Date.now()
@ -336,24 +326,20 @@ export default function Chat({
)
)
await api.addChatMessage(sessionId, 'assistant', fullContent)
const currentSession = sessions.find((s) => s.id === sessionId)
if (currentSession && currentSession.title === 'New Chat') {
const newTitle = content.slice(0, 50) + (content.length > 50 ? '...' : '')
await api.updateChatSession(sessionId, { title: newTitle })
queryClient.invalidateQueries({ queryKey: ['chatSessions'] })
}
// Refresh sessions to pick up backend-persisted messages and title
queryClient.invalidateQueries({ queryKey: ['chatSessions'] })
setTimeout(() => queryClient.invalidateQueries({ queryKey: ['chatSessions'] }), 3000)
}
} else {
// Non-streaming (legacy) path
chatMutation.mutate({
model: selectedModel || 'llama3.2',
messages: chatMessages,
sessionId: sessionId ? Number(sessionId) : undefined,
})
}
},
[activeSessionId, messages, selectedModel, chatMutation, queryClient, streamingEnabled, sessions]
[activeSessionId, messages, selectedModel, chatMutation, queryClient, streamingEnabled]
)
return (

View File

@ -32,6 +32,7 @@ export type OllamaChatRequest = {
model: string
messages: OllamaChatMessage[]
stream?: boolean
sessionId?: number
}
export type OllamaChatResponse = {