feat(AI Assistant): improved state management and performance

This commit is contained in:
Jake Turner 2026-03-10 23:50:27 +00:00 committed by Jake Turner
parent 6f0fae0033
commit 460756f581
6 changed files with 114 additions and 30 deletions

View File

@ -1,3 +1,4 @@
import { ChatService } from '#services/chat_service'
import { OllamaService } from '#services/ollama_service' import { OllamaService } from '#services/ollama_service'
import { RagService } from '#services/rag_service' import { RagService } from '#services/rag_service'
import { modelNameSchema } from '#validators/download' import { modelNameSchema } from '#validators/download'
@ -11,6 +12,7 @@ import type { Message } from 'ollama'
@inject() @inject()
export default class OllamaController { export default class OllamaController {
constructor( constructor(
private chatService: ChatService,
private ollamaService: OllamaService, private ollamaService: OllamaService,
private ragService: RagService private ragService: RagService
) { } ) { }
@ -87,19 +89,59 @@ export default class OllamaController {
const thinkingCapability = await this.ollamaService.checkModelHasThinking(reqData.model) const thinkingCapability = await this.ollamaService.checkModelHasThinking(reqData.model)
const think: boolean | 'medium' = thinkingCapability ? (reqData.model.startsWith('gpt-oss') ? 'medium' : true) : false const think: boolean | 'medium' = thinkingCapability ? (reqData.model.startsWith('gpt-oss') ? 'medium' : true) : false
// Separate sessionId from the Ollama request payload — Ollama rejects unknown fields
const { sessionId, ...ollamaRequest } = reqData
// Save user message to DB before streaming if sessionId provided
let userContent: string | null = null
if (sessionId) {
const lastUserMsg = [...reqData.messages].reverse().find((m) => m.role === 'user')
if (lastUserMsg) {
userContent = lastUserMsg.content
await this.chatService.addMessage(sessionId, 'user', userContent)
}
}
if (reqData.stream) { if (reqData.stream) {
logger.debug(`[OllamaController] Initiating streaming response for model: "${reqData.model}" with think: ${think}`) logger.debug(`[OllamaController] Initiating streaming response for model: "${reqData.model}" with think: ${think}`)
// Headers already flushed above // Headers already flushed above
const stream = await this.ollamaService.chatStream({ ...reqData, think }) const stream = await this.ollamaService.chatStream({ ...ollamaRequest, think })
let fullContent = ''
for await (const chunk of stream) { for await (const chunk of stream) {
if (chunk.message?.content) {
fullContent += chunk.message.content
}
response.response.write(`data: ${JSON.stringify(chunk)}\n\n`) response.response.write(`data: ${JSON.stringify(chunk)}\n\n`)
} }
response.response.end() response.response.end()
// Save assistant message and optionally generate title
if (sessionId && fullContent) {
await this.chatService.addMessage(sessionId, 'assistant', fullContent)
const messageCount = await this.chatService.getMessageCount(sessionId)
if (messageCount <= 2 && userContent) {
this.chatService.generateTitle(sessionId, userContent, fullContent).catch((err) => {
logger.error(`[OllamaController] Title generation failed: ${err instanceof Error ? err.message : err}`)
})
}
}
return return
} }
// Non-streaming (legacy) path // Non-streaming (legacy) path
return await this.ollamaService.chat({ ...reqData, think }) const result = await this.ollamaService.chat({ ...ollamaRequest, think })
if (sessionId && result?.message?.content) {
await this.chatService.addMessage(sessionId, 'assistant', result.message.content)
const messageCount = await this.chatService.getMessageCount(sessionId)
if (messageCount <= 2 && userContent) {
this.chatService.generateTitle(sessionId, userContent, result.message.content).catch((err) => {
logger.error(`[OllamaController] Title generation failed: ${err instanceof Error ? err.message : err}`)
})
}
}
return result
} catch (error) { } catch (error) {
if (reqData.stream) { if (reqData.stream) {
response.response.write(`data: ${JSON.stringify({ error: true })}\n\n`) response.response.write(`data: ${JSON.stringify({ error: true })}\n\n`)

View File

@ -4,7 +4,7 @@ import logger from '@adonisjs/core/services/logger'
import { DateTime } from 'luxon' import { DateTime } from 'luxon'
import { inject } from '@adonisjs/core' import { inject } from '@adonisjs/core'
import { OllamaService } from './ollama_service.js' import { OllamaService } from './ollama_service.js'
import { SYSTEM_PROMPTS } from '../../constants/ollama.js' import { DEFAULT_QUERY_REWRITE_MODEL, SYSTEM_PROMPTS } from '../../constants/ollama.js'
import { toTitleCase } from '../utils/misc.js' import { toTitleCase } from '../utils/misc.js'
@inject() @inject()
@ -220,6 +220,59 @@ export class ChatService {
} }
} }
async getMessageCount(sessionId: number): Promise<number> {
try {
const count = await ChatMessage.query().where('session_id', sessionId).count('* as total')
return Number(count[0].$extras.total)
} catch (error) {
logger.error(
`[ChatService] Failed to get message count for session ${sessionId}: ${error instanceof Error ? error.message : error}`
)
return 0
}
}
async generateTitle(sessionId: number, userMessage: string, assistantMessage: string) {
try {
const models = await this.ollamaService.getModels()
const titleModelAvailable = models?.some((m) => m.name === DEFAULT_QUERY_REWRITE_MODEL)
let title: string
if (!titleModelAvailable) {
title = userMessage.slice(0, 57) + (userMessage.length > 57 ? '...' : '')
} else {
const response = await this.ollamaService.chat({
model: DEFAULT_QUERY_REWRITE_MODEL,
messages: [
{ role: 'system', content: SYSTEM_PROMPTS.title_generation },
{ role: 'user', content: userMessage },
{ role: 'assistant', content: assistantMessage },
],
})
title = response?.message?.content?.trim()
if (!title) {
title = userMessage.slice(0, 57) + (userMessage.length > 57 ? '...' : '')
}
}
await this.updateSession(sessionId, { title })
logger.info(`[ChatService] Generated title for session ${sessionId}: "${title}"`)
} catch (error) {
logger.error(
`[ChatService] Failed to generate title for session ${sessionId}: ${error instanceof Error ? error.message : error}`
)
// Fall back to truncated user message
try {
const fallbackTitle = userMessage.slice(0, 57) + (userMessage.length > 57 ? '...' : '')
await this.updateSession(sessionId, { title: fallbackTitle })
} catch {
// Silently fail - session keeps "New Chat" title
}
}
}
async deleteAllSessions() { async deleteAllSessions() {
try { try {
await ChatSession.query().delete() await ChatSession.query().delete()

View File

@ -10,6 +10,7 @@ export const chatSchema = vine.compile(
}) })
), ),
stream: vine.boolean().optional(), stream: vine.boolean().optional(),
sessionId: vine.number().positive().optional(),
}) })
) )

View File

@ -83,9 +83,9 @@ IMPORTANT INSTRUCTIONS:
1. If the user's question is directly related to the context above, use this information to provide accurate, detailed answers. 1. If the user's question is directly related to the context above, use this information to provide accurate, detailed answers.
2. Always cite or reference the context when using it (e.g., "According to the information available..." or "Based on the knowledge base..."). 2. Always cite or reference the context when using it (e.g., "According to the information available..." or "Based on the knowledge base...").
3. If the context is only partially relevant, combine it with your general knowledge but be clear about what comes from the knowledge base. 3. If the context is only partially relevant, combine it with your general knowledge but be clear about what comes from the knowledge base.
4. If the context is not relevant to the user's question, you can respond using your general knowledge without forcing the context into your answer. 4. If the context is not relevant to the user's question, you can respond using your general knowledge without forcing the context into your answer. Do not mention the context if it's not relevant.
5. Never fabricate information that isn't in the context or your training data. 5. Never fabricate information that isn't in the context or your training data.
6. If you're unsure or the context doesn't contain enough information, acknowledge the limitations. 6. If you're unsure or you don't have enough information to answer the user's question, acknowledge the limitations.
Format your response using markdown for readability. Format your response using markdown for readability.
`, `,
@ -113,6 +113,7 @@ Ensure that your suggestions are comma-seperated with no conjunctions like "and"
Do not use line breaks, new lines, or extra spacing to separate the suggestions. Do not use line breaks, new lines, or extra spacing to separate the suggestions.
Format: suggestion1, suggestion2, suggestion3 Format: suggestion1, suggestion2, suggestion3
`, `,
title_generation: `You are a title generator. Given the start of a conversation, generate a concise, descriptive title under 60 characters. Return ONLY the title text with no quotes, punctuation wrapping, or extra formatting.`,
query_rewrite: ` query_rewrite: `
You are a query rewriting assistant. Your task is to reformulate the user's latest question to include relevant context from the conversation history. You are a query rewriting assistant. Your task is to reformulate the user's latest question to include relevant context from the conversation history.

View File

@ -90,8 +90,9 @@ export default function Chat({
mutationFn: (request: { mutationFn: (request: {
model: string model: string
messages: Array<{ role: 'system' | 'user' | 'assistant'; content: string }> messages: Array<{ role: 'system' | 'user' | 'assistant'; content: string }>
sessionId?: number
}) => api.sendChatMessage({ ...request, stream: false }), }) => api.sendChatMessage({ ...request, stream: false }),
onSuccess: async (data, variables) => { onSuccess: async (data) => {
if (!data || !activeSessionId) { if (!data || !activeSessionId) {
throw new Error('No response from Ollama') throw new Error('No response from Ollama')
} }
@ -106,17 +107,9 @@ export default function Chat({
setMessages((prev) => [...prev, assistantMessage]) setMessages((prev) => [...prev, assistantMessage])
// Save assistant message to backend // Refresh sessions to pick up backend-persisted messages and title
await api.addChatMessage(activeSessionId, 'assistant', assistantMessage.content) queryClient.invalidateQueries({ queryKey: ['chatSessions'] })
setTimeout(() => queryClient.invalidateQueries({ queryKey: ['chatSessions'] }), 3000)
// Update session title if it's a new chat
const currentSession = sessions.find((s) => s.id === activeSessionId)
if (currentSession && currentSession.title === 'New Chat') {
const userContent = variables.messages[variables.messages.length - 1].content
const newTitle = userContent.slice(0, 50) + (userContent.length > 50 ? '...' : '')
await api.updateChatSession(activeSessionId, { title: newTitle })
queryClient.invalidateQueries({ queryKey: ['chatSessions'] })
}
}, },
onError: (error) => { onError: (error) => {
console.error('Error sending message:', error) console.error('Error sending message:', error)
@ -230,9 +223,6 @@ export default function Chat({
setMessages((prev) => [...prev, userMessage]) setMessages((prev) => [...prev, userMessage])
// Save user message to backend
await api.addChatMessage(sessionId, 'user', content)
const chatMessages = [ const chatMessages = [
...messages.map((m) => ({ role: m.role, content: m.content })), ...messages.map((m) => ({ role: m.role, content: m.content })),
{ role: 'user' as const, content }, { role: 'user' as const, content },
@ -255,7 +245,7 @@ export default function Chat({
try { try {
await api.streamChatMessage( await api.streamChatMessage(
{ model: selectedModel || 'llama3.2', messages: chatMessages, stream: true }, { model: selectedModel || 'llama3.2', messages: chatMessages, stream: true, sessionId: sessionId ? Number(sessionId) : undefined },
(chunkContent, chunkThinking, done) => { (chunkContent, chunkThinking, done) => {
if (chunkThinking.length > 0 && thinkingStartTime === null) { if (chunkThinking.length > 0 && thinkingStartTime === null) {
thinkingStartTime = Date.now() thinkingStartTime = Date.now()
@ -336,24 +326,20 @@ export default function Chat({
) )
) )
await api.addChatMessage(sessionId, 'assistant', fullContent) // Refresh sessions to pick up backend-persisted messages and title
queryClient.invalidateQueries({ queryKey: ['chatSessions'] })
const currentSession = sessions.find((s) => s.id === sessionId) setTimeout(() => queryClient.invalidateQueries({ queryKey: ['chatSessions'] }), 3000)
if (currentSession && currentSession.title === 'New Chat') {
const newTitle = content.slice(0, 50) + (content.length > 50 ? '...' : '')
await api.updateChatSession(sessionId, { title: newTitle })
queryClient.invalidateQueries({ queryKey: ['chatSessions'] })
}
} }
} else { } else {
// Non-streaming (legacy) path // Non-streaming (legacy) path
chatMutation.mutate({ chatMutation.mutate({
model: selectedModel || 'llama3.2', model: selectedModel || 'llama3.2',
messages: chatMessages, messages: chatMessages,
sessionId: sessionId ? Number(sessionId) : undefined,
}) })
} }
}, },
[activeSessionId, messages, selectedModel, chatMutation, queryClient, streamingEnabled, sessions] [activeSessionId, messages, selectedModel, chatMutation, queryClient, streamingEnabled]
) )
return ( return (

View File

@ -32,6 +32,7 @@ export type OllamaChatRequest = {
model: string model: string
messages: OllamaChatMessage[] messages: OllamaChatMessage[]
stream?: boolean stream?: boolean
sessionId?: number
} }
export type OllamaChatResponse = { export type OllamaChatResponse = {