From 932f60cdb63c9af20420867274d27f8f9fbd4d15 Mon Sep 17 00:00:00 2001
From: Jake Turner <jturner@cosmistack.com>
Date: Tue, 10 Mar 2026 23:50:27 +0000
Subject: [PATCH] feat(AI Assistant): improved state management and performance

---
 admin/app/controllers/ollama_controller.ts | 46 +++++++++++++++++-
 admin/app/services/chat_service.ts         | 55 +++++++++++++++++++++-
 admin/app/validators/ollama.ts             |  1 +
 admin/constants/ollama.ts                  |  5 +-
 admin/inertia/components/chat/index.tsx    | 36 +++++---------
 admin/types/ollama.ts                      |  1 +
 6 files changed, 114 insertions(+), 30 deletions(-)

diff --git a/admin/app/controllers/ollama_controller.ts b/admin/app/controllers/ollama_controller.ts
index 2b6cee3..1557062 100644
--- a/admin/app/controllers/ollama_controller.ts
+++ b/admin/app/controllers/ollama_controller.ts
@@ -1,3 +1,4 @@
+import { ChatService } from '#services/chat_service'
 import { OllamaService } from '#services/ollama_service'
 import { RagService } from '#services/rag_service'
 import { modelNameSchema } from '#validators/download'
@@ -11,6 +12,7 @@ import type { Message } from 'ollama'
 @inject()
 export default class OllamaController {
   constructor(
+    private chatService: ChatService,
     private ollamaService: OllamaService,
     private ragService: RagService
   ) { }
@@ -87,19 +89,59 @@ export default class OllamaController {
       const thinkingCapability = await this.ollamaService.checkModelHasThinking(reqData.model)
       const think: boolean | 'medium' = thinkingCapability ? (reqData.model.startsWith('gpt-oss') ? 'medium' : true) : false
 
+      // Separate sessionId from the Ollama request payload — Ollama rejects unknown fields
+      const { sessionId, ...ollamaRequest } = reqData
+
+      // Save user message to DB before streaming if sessionId provided
+      let userContent: string | null = null
+      if (sessionId) {
+        const lastUserMsg = [...reqData.messages].reverse().find((m) => m.role === 'user')
+        if (lastUserMsg) {
+          userContent = lastUserMsg.content
+          await this.chatService.addMessage(sessionId, 'user', userContent)
+        }
+      }
+
       if (reqData.stream) {
         logger.debug(`[OllamaController] Initiating streaming response for model: "${reqData.model}" with think: ${think}`)
         // Headers already flushed above
-        const stream = await this.ollamaService.chatStream({ ...reqData, think })
+        const stream = await this.ollamaService.chatStream({ ...ollamaRequest, think })
+        let fullContent = ''
         for await (const chunk of stream) {
+          if (chunk.message?.content) {
+            fullContent += chunk.message.content
+          }
           response.response.write(`data: ${JSON.stringify(chunk)}\n\n`)
         }
         response.response.end()
+
+        // Save assistant message and optionally generate title
+        if (sessionId && fullContent) {
+          await this.chatService.addMessage(sessionId, 'assistant', fullContent)
+          const messageCount = await this.chatService.getMessageCount(sessionId)
+          if (messageCount <= 2 && userContent) {
+            this.chatService.generateTitle(sessionId, userContent, fullContent).catch((err) => {
+              logger.error(`[OllamaController] Title generation failed: ${err instanceof Error ? err.message : err}`)
+            })
+          }
+        }
         return
       }
 
       // Non-streaming (legacy) path
-      return await this.ollamaService.chat({ ...reqData, think })
+      const result = await this.ollamaService.chat({ ...ollamaRequest, think })
+
+      if (sessionId && result?.message?.content) {
+        await this.chatService.addMessage(sessionId, 'assistant', result.message.content)
+        const messageCount = await this.chatService.getMessageCount(sessionId)
+        if (messageCount <= 2 && userContent) {
+          this.chatService.generateTitle(sessionId, userContent, result.message.content).catch((err) => {
+            logger.error(`[OllamaController] Title generation failed: ${err instanceof Error ? err.message : err}`)
+          })
+        }
+      }
+
+      return result
     } catch (error) {
       if (reqData.stream) {
         response.response.write(`data: ${JSON.stringify({ error: true })}\n\n`)
diff --git a/admin/app/services/chat_service.ts b/admin/app/services/chat_service.ts
index d395e53..18b0108 100644
--- a/admin/app/services/chat_service.ts
+++ b/admin/app/services/chat_service.ts
@@ -4,7 +4,7 @@ import logger from '@adonisjs/core/services/logger'
 import { DateTime } from 'luxon'
 import { inject } from '@adonisjs/core'
 import { OllamaService } from './ollama_service.js'
-import { SYSTEM_PROMPTS } from '../../constants/ollama.js'
+import { DEFAULT_QUERY_REWRITE_MODEL, SYSTEM_PROMPTS } from '../../constants/ollama.js'
 import { toTitleCase } from '../utils/misc.js'
 
 @inject()
@@ -220,6 +220,59 @@ export class ChatService {
     }
   }
 
+  async getMessageCount(sessionId: number): Promise<number> {
+    try {
+      const count = await ChatMessage.query().where('session_id', sessionId).count('* as total')
+      return Number(count[0].$extras.total)
+    } catch (error) {
+      logger.error(
+        `[ChatService] Failed to get message count for session ${sessionId}: ${error instanceof Error ? error.message : error}`
+      )
+      return 0
+    }
+  }
+
+  async generateTitle(sessionId: number, userMessage: string, assistantMessage: string) {
+    try {
+      const models = await this.ollamaService.getModels()
+      const titleModelAvailable = models?.some((m) => m.name === DEFAULT_QUERY_REWRITE_MODEL)
+
+      let title: string
+
+      if (!titleModelAvailable) {
+        title = userMessage.slice(0, 57) + (userMessage.length > 57 ? '...' : '')
+      } else {
+        const response = await this.ollamaService.chat({
+          model: DEFAULT_QUERY_REWRITE_MODEL,
+          messages: [
+            { role: 'system', content: SYSTEM_PROMPTS.title_generation },
+            { role: 'user', content: userMessage },
+            { role: 'assistant', content: assistantMessage },
+          ],
+        })
+
+        title = response?.message?.content?.trim()
+        if (!title) {
+          title = userMessage.slice(0, 57) + (userMessage.length > 57 ? '...' : '')
+        }
+      }
+
+      await this.updateSession(sessionId, { title })
+      logger.info(`[ChatService] Generated title for session ${sessionId}: "${title}"`)
+    } catch (error) {
+      logger.error(
+        `[ChatService] Failed to generate title for session ${sessionId}: ${error instanceof Error ? error.message : error}`
+      )
+      // Fall back to truncated user message
+      try {
+        const fallbackTitle = userMessage.slice(0, 57) + (userMessage.length > 57 ? '...' : '')
+        await this.updateSession(sessionId, { title: fallbackTitle })
+      } catch {
+        // Silently fail - session keeps "New Chat" title
+      }
+    }
+  }
+
   async deleteAllSessions() {
     try {
       await ChatSession.query().delete()
diff --git a/admin/app/validators/ollama.ts b/admin/app/validators/ollama.ts
index 2b754e8..d83ed3b 100644
--- a/admin/app/validators/ollama.ts
+++ b/admin/app/validators/ollama.ts
@@ -10,6 +10,7 @@ export const chatSchema = vine.compile(
       })
     ),
     stream: vine.boolean().optional(),
+    sessionId: vine.number().positive().optional(),
   })
 )
 
diff --git a/admin/constants/ollama.ts b/admin/constants/ollama.ts
index daf628b..dd0a1a6 100644
--- a/admin/constants/ollama.ts
+++ b/admin/constants/ollama.ts
@@ -83,9 +83,9 @@ IMPORTANT INSTRUCTIONS:
 1. If the user's question is directly related to the context above, use this information to provide accurate, detailed answers.
 2. Always cite or reference the context when using it (e.g., "According to the information available..." or "Based on the knowledge base...").
 3. If the context is only partially relevant, combine it with your general knowledge but be clear about what comes from the knowledge base.
-4. If the context is not relevant to the user's question, you can respond using your general knowledge without forcing the context into your answer.
+4. If the context is not relevant to the user's question, you can respond using your general knowledge without forcing the context into your answer. Do not mention the context if it's not relevant.
 5. Never fabricate information that isn't in the context or your training data.
-6. If you're unsure or the context doesn't contain enough information, acknowledge the limitations.
+6. If you're unsure or you don't have enough information to answer the user's question, acknowledge the limitations.
 
 Format your response using markdown for readability.
 `,
@@ -113,6 +113,7 @@ Ensure that your suggestions are comma-seperated with no conjunctions like "and"
 Do not use line breaks, new lines, or extra spacing to separate the suggestions.
 Format: suggestion1, suggestion2, suggestion3
 `,
+  title_generation: `You are a title generator. Given the start of a conversation, generate a concise, descriptive title under 60 characters. Return ONLY the title text with no quotes, punctuation wrapping, or extra formatting.`,
   query_rewrite: `
 You are a query rewriting assistant. Your task is to reformulate the user's latest question to include relevant context from the conversation history.
 
diff --git a/admin/inertia/components/chat/index.tsx b/admin/inertia/components/chat/index.tsx
index 994a0ae..799877c 100644
--- a/admin/inertia/components/chat/index.tsx
+++ b/admin/inertia/components/chat/index.tsx
@@ -90,8 +90,9 @@ export default function Chat({
     mutationFn: (request: {
       model: string
       messages: Array<{ role: 'system' | 'user' | 'assistant'; content: string }>
+      sessionId?: number
     }) => api.sendChatMessage({ ...request, stream: false }),
-    onSuccess: async (data, variables) => {
+    onSuccess: async (data) => {
       if (!data || !activeSessionId) {
         throw new Error('No response from Ollama')
       }
@@ -106,17 +107,9 @@ export default function Chat({
 
       setMessages((prev) => [...prev, assistantMessage])
 
-      // Save assistant message to backend
-      await api.addChatMessage(activeSessionId, 'assistant', assistantMessage.content)
-
-      // Update session title if it's a new chat
-      const currentSession = sessions.find((s) => s.id === activeSessionId)
-      if (currentSession && currentSession.title === 'New Chat') {
-        const userContent = variables.messages[variables.messages.length - 1].content
-        const newTitle = userContent.slice(0, 50) + (userContent.length > 50 ? '...' : '')
-        await api.updateChatSession(activeSessionId, { title: newTitle })
-        queryClient.invalidateQueries({ queryKey: ['chatSessions'] })
-      }
+      // Refresh sessions to pick up backend-persisted messages and title
+      queryClient.invalidateQueries({ queryKey: ['chatSessions'] })
+      setTimeout(() => queryClient.invalidateQueries({ queryKey: ['chatSessions'] }), 3000)
     },
     onError: (error) => {
       console.error('Error sending message:', error)
@@ -230,9 +223,6 @@ export default function Chat({
 
       setMessages((prev) => [...prev, userMessage])
 
-      // Save user message to backend
-      await api.addChatMessage(sessionId, 'user', content)
-
       const chatMessages = [
         ...messages.map((m) => ({ role: m.role, content: m.content })),
         { role: 'user' as const, content },
@@ -255,7 +245,7 @@ export default function Chat({
 
         try {
           await api.streamChatMessage(
-            { model: selectedModel || 'llama3.2', messages: chatMessages, stream: true },
+            { model: selectedModel || 'llama3.2', messages: chatMessages, stream: true, sessionId: sessionId ? Number(sessionId) : undefined },
             (chunkContent, chunkThinking, done) => {
               if (chunkThinking.length > 0 && thinkingStartTime === null) {
                 thinkingStartTime = Date.now()
@@ -336,24 +326,20 @@ export default function Chat({
             )
           )
 
-          await api.addChatMessage(sessionId, 'assistant', fullContent)
-
-          const currentSession = sessions.find((s) => s.id === sessionId)
-          if (currentSession && currentSession.title === 'New Chat') {
-            const newTitle = content.slice(0, 50) + (content.length > 50 ? '...' : '')
-            await api.updateChatSession(sessionId, { title: newTitle })
-            queryClient.invalidateQueries({ queryKey: ['chatSessions'] })
-          }
+          // Refresh sessions to pick up backend-persisted messages and title
+          queryClient.invalidateQueries({ queryKey: ['chatSessions'] })
+          setTimeout(() => queryClient.invalidateQueries({ queryKey: ['chatSessions'] }), 3000)
         }
       } else {
         // Non-streaming (legacy) path
         chatMutation.mutate({
           model: selectedModel || 'llama3.2',
           messages: chatMessages,
+          sessionId: sessionId ? Number(sessionId) : undefined,
         })
       }
     },
-    [activeSessionId, messages, selectedModel, chatMutation, queryClient, streamingEnabled, sessions]
+    [activeSessionId, messages, selectedModel, chatMutation, queryClient, streamingEnabled]
   )
 
   return (
diff --git a/admin/types/ollama.ts b/admin/types/ollama.ts
index 5d3e7c3..27b7e86 100644
--- a/admin/types/ollama.ts
+++ b/admin/types/ollama.ts
@@ -32,6 +32,7 @@ export type OllamaChatRequest = {
   model: string
   messages: OllamaChatMessage[]
   stream?: boolean
+  sessionId?: number
 }
 
 export type OllamaChatResponse = {