feat(AI Assistant): improved state management and performance

2026-03-28 03:29:25 +01:00 · 2026-03-10 23:50:27 +00:00 · 2026-03-10 23:50:27 +00:00 · 460756f581
commit 460756f581
parent 6f0fae0033
6 changed files with 114 additions and 30 deletions
--- a/admin/app/controllers/ollama_controller.ts
+++ b/admin/app/controllers/ollama_controller.ts
@ -1,3 +1,4 @@
 import { ChatService } from '#services/chat_service'
 import { OllamaService } from '#services/ollama_service'
 import { RagService } from '#services/rag_service'
 import { modelNameSchema } from '#validators/download'
@ -11,6 +12,7 @@ import type { Message } from 'ollama'
@inject()
 export default class OllamaController {
  constructor(
    private chatService: ChatService,
    private ollamaService: OllamaService,
    private ragService: RagService
  ) { }
@ -87,19 +89,59 @@ export default class OllamaController {
      const thinkingCapability = await this.ollamaService.checkModelHasThinking(reqData.model)
      const think: boolean | 'medium' = thinkingCapability ? (reqData.model.startsWith('gpt-oss') ? 'medium' : true) : false
      // Separate sessionId from the Ollama request payload — Ollama rejects unknown fields
      const { sessionId, ...ollamaRequest } = reqData
      // Save user message to DB before streaming if sessionId provided
      let userContent: string | null = null
      if (sessionId) {
        const lastUserMsg = [...reqData.messages].reverse().find((m) => m.role === 'user')
        if (lastUserMsg) {
          userContent = lastUserMsg.content
          await this.chatService.addMessage(sessionId, 'user', userContent)
        }
      }
      if (reqData.stream) {
        logger.debug(`[OllamaController] Initiating streaming response for model: "${reqData.model}" with think: ${think}`)
        // Headers already flushed above
-        const stream = await this.ollamaService.chatStream({ ...reqData, think })
+        const stream = await this.ollamaService.chatStream({ ...ollamaRequest, think })
        let fullContent = ''
        for await (const chunk of stream) {
          if (chunk.message?.content) {
            fullContent += chunk.message.content
          }
          response.response.write(`data: ${JSON.stringify(chunk)}\n\n`)
        }
        response.response.end()
        // Save assistant message and optionally generate title
        if (sessionId && fullContent) {
          await this.chatService.addMessage(sessionId, 'assistant', fullContent)
          const messageCount = await this.chatService.getMessageCount(sessionId)
          if (messageCount <= 2 && userContent) {
            this.chatService.generateTitle(sessionId, userContent, fullContent).catch((err) => {
              logger.error(`[OllamaController] Title generation failed: ${err instanceof Error ? err.message : err}`)
            })
          }
        }
        return
      }
      // Non-streaming (legacy) path
-      return await this.ollamaService.chat({ ...reqData, think })
+      const result = await this.ollamaService.chat({ ...ollamaRequest, think })
      if (sessionId && result?.message?.content) {
        await this.chatService.addMessage(sessionId, 'assistant', result.message.content)
        const messageCount = await this.chatService.getMessageCount(sessionId)
        if (messageCount <= 2 && userContent) {
          this.chatService.generateTitle(sessionId, userContent, result.message.content).catch((err) => {
            logger.error(`[OllamaController] Title generation failed: ${err instanceof Error ? err.message : err}`)
          })
        }
      }
      return result
    } catch (error) {
      if (reqData.stream) {
        response.response.write(`data: ${JSON.stringify({ error: true })}\n\n`)
--- a/admin/app/services/chat_service.ts
+++ b/admin/app/services/chat_service.ts
@ -4,7 +4,7 @@ import logger from '@adonisjs/core/services/logger'
 import { DateTime } from 'luxon'
 import { inject } from '@adonisjs/core'
 import { OllamaService } from './ollama_service.js'
-import { SYSTEM_PROMPTS } from '../../constants/ollama.js'
+import { DEFAULT_QUERY_REWRITE_MODEL, SYSTEM_PROMPTS } from '../../constants/ollama.js'
 import { toTitleCase } from '../utils/misc.js'
@inject()
@ -220,6 +220,59 @@ export class ChatService {
    }
  }
  async getMessageCount(sessionId: number): Promise<number> {
    try {
      const count = await ChatMessage.query().where('session_id', sessionId).count('* as total')
      return Number(count[0].$extras.total)
    } catch (error) {
      logger.error(
        `[ChatService] Failed to get message count for session ${sessionId}: ${error instanceof Error ? error.message : error}`
      )
      return 0
    }
  }
  async generateTitle(sessionId: number, userMessage: string, assistantMessage: string) {
    try {
      const models = await this.ollamaService.getModels()
      const titleModelAvailable = models?.some((m) => m.name === DEFAULT_QUERY_REWRITE_MODEL)
      let title: string
      if (!titleModelAvailable) {
        title = userMessage.slice(0, 57) + (userMessage.length > 57 ? '...' : '')
      } else {
        const response = await this.ollamaService.chat({
          model: DEFAULT_QUERY_REWRITE_MODEL,
          messages: [
            { role: 'system', content: SYSTEM_PROMPTS.title_generation },
            { role: 'user', content: userMessage },
            { role: 'assistant', content: assistantMessage },
          ],
        })
        title = response?.message?.content?.trim()
        if (!title) {
          title = userMessage.slice(0, 57) + (userMessage.length > 57 ? '...' : '')
        }
      }
      await this.updateSession(sessionId, { title })
      logger.info(`[ChatService] Generated title for session ${sessionId}: "${title}"`)
    } catch (error) {
      logger.error(
        `[ChatService] Failed to generate title for session ${sessionId}: ${error instanceof Error ? error.message : error}`
      )
      // Fall back to truncated user message
      try {
        const fallbackTitle = userMessage.slice(0, 57) + (userMessage.length > 57 ? '...' : '')
        await this.updateSession(sessionId, { title: fallbackTitle })
      } catch {
        // Silently fail - session keeps "New Chat" title
      }
    }
  }
  async deleteAllSessions() {
    try {
      await ChatSession.query().delete()
--- a/admin/app/validators/ollama.ts
+++ b/admin/app/validators/ollama.ts
@ -10,6 +10,7 @@ export const chatSchema = vine.compile(
      })
    ),
    stream: vine.boolean().optional(),
    sessionId: vine.number().positive().optional(),
  })
 )
--- a/admin/constants/ollama.ts
+++ b/admin/constants/ollama.ts
@ -83,9 +83,9 @@ IMPORTANT INSTRUCTIONS:
 1. If the user's question is directly related to the context above, use this information to provide accurate, detailed answers.
 2. Always cite or reference the context when using it (e.g., "According to the information available..." or "Based on the knowledge base...").
 3. If the context is only partially relevant, combine it with your general knowledge but be clear about what comes from the knowledge base.
-4. If the context is not relevant to the user's question, you can respond using your general knowledge without forcing the context into your answer.
+4. If the context is not relevant to the user's question, you can respond using your general knowledge without forcing the context into your answer. Do not mention the context if it's not relevant.
 5. Never fabricate information that isn't in the context or your training data.
-6. If you're unsure or the context doesn't contain enough information, acknowledge the limitations.
+6. If you're unsure or you don't have enough information to answer the user's question, acknowledge the limitations.
 Format your response using markdown for readability.
 `,
@ -113,6 +113,7 @@ Ensure that your suggestions are comma-seperated with no conjunctions like "and"
 Do not use line breaks, new lines, or extra spacing to separate the suggestions.
 Format: suggestion1, suggestion2, suggestion3
 `,
  title_generation: `You are a title generator. Given the start of a conversation, generate a concise, descriptive title under 60 characters. Return ONLY the title text with no quotes, punctuation wrapping, or extra formatting.`,
  query_rewrite: `
 You are a query rewriting assistant. Your task is to reformulate the user's latest question to include relevant context from the conversation history.
--- a/admin/inertia/components/chat/index.tsx
+++ b/admin/inertia/components/chat/index.tsx
@ -90,8 +90,9 @@ export default function Chat({
    mutationFn: (request: {
      model: string
      messages: Array<{ role: 'system' | 'user' | 'assistant'; content: string }>
      sessionId?: number
    }) => api.sendChatMessage({ ...request, stream: false }),
-    onSuccess: async (data, variables) => {
+    onSuccess: async (data) => {
      if (!data || !activeSessionId) {
        throw new Error('No response from Ollama')
      }
@ -106,17 +107,9 @@ export default function Chat({
      setMessages((prev) => [...prev, assistantMessage])
-      // Save assistant message to backend
+      // Refresh sessions to pick up backend-persisted messages and title
-      await api.addChatMessage(activeSessionId, 'assistant', assistantMessage.content)
+      queryClient.invalidateQueries({ queryKey: ['chatSessions'] })
-
+      setTimeout(() => queryClient.invalidateQueries({ queryKey: ['chatSessions'] }), 3000)
      // Update session title if it's a new chat
      const currentSession = sessions.find((s) => s.id === activeSessionId)
      if (currentSession && currentSession.title === 'New Chat') {
        const userContent = variables.messages[variables.messages.length - 1].content
        const newTitle = userContent.slice(0, 50) + (userContent.length > 50 ? '...' : '')
        await api.updateChatSession(activeSessionId, { title: newTitle })
        queryClient.invalidateQueries({ queryKey: ['chatSessions'] })
      }
    },
    onError: (error) => {
      console.error('Error sending message:', error)
@ -230,9 +223,6 @@ export default function Chat({
      setMessages((prev) => [...prev, userMessage])
      // Save user message to backend
      await api.addChatMessage(sessionId, 'user', content)
      const chatMessages = [
        ...messages.map((m) => ({ role: m.role, content: m.content })),
        { role: 'user' as const, content },
@ -255,7 +245,7 @@ export default function Chat({
        try {
          await api.streamChatMessage(
-            { model: selectedModel || 'llama3.2', messages: chatMessages, stream: true },
+            { model: selectedModel || 'llama3.2', messages: chatMessages, stream: true, sessionId: sessionId ? Number(sessionId) : undefined },
            (chunkContent, chunkThinking, done) => {
              if (chunkThinking.length > 0 && thinkingStartTime === null) {
                thinkingStartTime = Date.now()
@ -336,24 +326,20 @@ export default function Chat({
            )
          )
-          await api.addChatMessage(sessionId, 'assistant', fullContent)
+          // Refresh sessions to pick up backend-persisted messages and title
-
+          queryClient.invalidateQueries({ queryKey: ['chatSessions'] })
-          const currentSession = sessions.find((s) => s.id === sessionId)
+          setTimeout(() => queryClient.invalidateQueries({ queryKey: ['chatSessions'] }), 3000)
          if (currentSession && currentSession.title === 'New Chat') {
            const newTitle = content.slice(0, 50) + (content.length > 50 ? '...' : '')
            await api.updateChatSession(sessionId, { title: newTitle })
            queryClient.invalidateQueries({ queryKey: ['chatSessions'] })
          }
        }
      } else {
        // Non-streaming (legacy) path
        chatMutation.mutate({
          model: selectedModel || 'llama3.2',
          messages: chatMessages,
          sessionId: sessionId ? Number(sessionId) : undefined,
        })
      }
    },
-    [activeSessionId, messages, selectedModel, chatMutation, queryClient, streamingEnabled, sessions]
+    [activeSessionId, messages, selectedModel, chatMutation, queryClient, streamingEnabled]
  )
  return (
--- a/admin/types/ollama.ts
+++ b/admin/types/ollama.ts
@ -32,6 +32,7 @@ export type OllamaChatRequest = {
  model: string
  messages: OllamaChatMessage[]
  stream?: boolean
  sessionId?: number
 }
 export type OllamaChatResponse = {