From 276bdcd0b254d40084d4d7b2ac8f4f278c02b475 Mon Sep 17 00:00:00 2001 From: Jake Turner Date: Sun, 8 Feb 2026 16:05:16 -0800 Subject: [PATCH] feat(AI Assistant): query rewriting for enhanced context retrieval --- admin/app/controllers/ollama_controller.ts | 83 ++++++++++++++++++---- admin/app/services/chat_service.ts | 13 ---- admin/constants/ollama.ts | 30 ++++++++ 3 files changed, 100 insertions(+), 26 deletions(-) diff --git a/admin/app/controllers/ollama_controller.ts b/admin/app/controllers/ollama_controller.ts index 81675c6..723759d 100644 --- a/admin/app/controllers/ollama_controller.ts +++ b/admin/app/controllers/ollama_controller.ts @@ -5,13 +5,15 @@ import { chatSchema, getAvailableModelsSchema } from '#validators/ollama' import { inject } from '@adonisjs/core' import type { HttpContext } from '@adonisjs/core/http' import { SYSTEM_PROMPTS } from '../../constants/ollama.js' +import logger from '@adonisjs/core/services/logger' +import type { Message } from 'ollama' @inject() export default class OllamaController { constructor( private ollamaService: OllamaService, private ragService: RagService - ) {} + ) { } async availableModels({ request }: HttpContext) { const reqData = await request.validateUsing(getAvailableModelsSchema) @@ -25,9 +27,8 @@ export default class OllamaController { async chat({ request }: HttpContext) { const reqData = await request.validateUsing(chatSchema) - /**If there are no system messages in the chat - *(i.e. first message from the user)inject system prompts - **/ + // If there are no system messages in the chat + // (i.e. first message from the user) inject system prompts const hasSystemMessage = reqData.messages.some((msg) => msg.role === 'system') if (!hasSystemMessage) { const systemPrompt = { @@ -37,18 +38,22 @@ export default class OllamaController { reqData.messages.unshift(systemPrompt) } - // Get the last user message to use for RAG context retrieval - const lastUserMessage = [...reqData.messages].reverse().find((msg) => msg.role === 'user') + // Query rewriting for better RAG retrieval with manageable context + // Will return user's latest message if no rewriting is needed + const rewrittenQuery = await this.rewriteQueryWithContext( + reqData.messages, + reqData.model + ) - if (lastUserMessage) { - // Search for relevant context in the knowledge base - // Using lower threshold (0.3) with improved hybrid search + if (rewrittenQuery) { const relevantDocs = await this.ragService.searchSimilarDocuments( - lastUserMessage.content, - 5, // Retrieve top 5 most relevant chunks - 0.3 // Minimum similarity score of 0.3 (lowered from 0.7 for better recall) + rewrittenQuery, + 5, // Top 5 most relevant chunks + 0.3 // Minimum similarity score of 0.3 ) + logger.debug(`[RAG] Retrieved ${relevantDocs.length} relevant documents for query: "${rewrittenQuery}"`) + // If relevant context is found, inject as a system message if (relevantDocs.length > 0) { const contextText = relevantDocs @@ -88,7 +93,59 @@ export default class OllamaController { } } - async installedModels({}: HttpContext) { + async installedModels({ }: HttpContext) { return await this.ollamaService.getModels() } + + private async rewriteQueryWithContext( + messages: Message[], + model: string + ): Promise { + try { + // Get recent conversation history (last 6 messages for 3 turns) + const recentMessages = messages.slice(-6) + + // If there's only one user message, no rewriting needed + const userMessages = recentMessages.filter(msg => msg.role === 'user') + if (userMessages.length <= 1) { + return userMessages[0]?.content || null + } + + const conversationContext = recentMessages + .map(msg => { + const role = msg.role === 'user' ? 'User' : 'Assistant' + // Truncate assistant messages to first 200 chars to keep context manageable + const content = msg.role === 'assistant' + ? msg.content.slice(0, 200) + (msg.content.length > 200 ? '...' : '') + : msg.content + return `${role}: "${content}"` + }) + .join('\n') + + const response = await this.ollamaService.chat({ + model, + messages: [ + { + role: 'system', + content: SYSTEM_PROMPTS.query_rewrite, + }, + { + role: 'user', + content: `Conversation:\n${conversationContext}\n\nRewritten Query:`, + }, + ], + }) + + const rewrittenQuery = response.message.content.trim() + logger.info(`[RAG] Query rewritten: "${rewrittenQuery}"`) + return rewrittenQuery + } catch (error) { + logger.error( + `[RAG] Query rewriting failed: ${error instanceof Error ? error.message : error}` + ) + // Fallback to last user message if rewriting fails + const lastUserMessage = [...messages].reverse().find(msg => msg.role === 'user') + return lastUserMessage?.content || null + } + } } diff --git a/admin/app/services/chat_service.ts b/admin/app/services/chat_service.ts index 48a1328..3bfb5d0 100644 --- a/admin/app/services/chat_service.ts +++ b/admin/app/services/chat_service.ts @@ -4,7 +4,6 @@ import logger from '@adonisjs/core/services/logger' import { DateTime } from 'luxon' import { inject } from '@adonisjs/core' import { OllamaService } from './ollama_service.js' -import { ChatRequest } from 'ollama' import { SYSTEM_PROMPTS } from '../../constants/ollama.js' import { toTitleCase } from '../utils/misc.js' @@ -12,15 +11,6 @@ import { toTitleCase } from '../utils/misc.js' export class ChatService { constructor(private ollamaService: OllamaService) {} - async chat(chatRequest: ChatRequest & { stream?: false }) { - try { - return await this.ollamaService.chat(chatRequest) - } catch (error) { - logger.error(`[ChatService] Chat error: ${error instanceof Error ? error.message : error}`) - throw new Error('Chat processing failed') - } - } - async getAllSessions() { try { const sessions = await ChatSession.query().orderBy('updated_at', 'desc') @@ -230,9 +220,6 @@ export class ChatService { } } - /** - * Delete all chat sessions and messages - */ async deleteAllSessions() { try { await ChatSession.query().delete() diff --git a/admin/constants/ollama.ts b/admin/constants/ollama.ts index 2e42fd8..1b65c80 100644 --- a/admin/constants/ollama.ts +++ b/admin/constants/ollama.ts @@ -107,5 +107,35 @@ The suggestions should be in title case. Ensure that your suggestions are comma-seperated with no conjunctions like "and" or "or". Do not use line breaks, new lines, or extra spacing to separate the suggestions. Format: suggestion1, suggestion2, suggestion3 +`, + query_rewrite: ` +You are a query rewriting assistant. Your task is to reformulate the user's latest question to include relevant context from the conversation history. + +Given the conversation history, rewrite the user's latest question to be a standalone, context-aware search query that will retrieve the most relevant information. + +Rules: +1. Keep the rewritten query concise (under 150 words) +2. Include key entities, topics, and context from previous messages +3. Make it a clear, searchable query +4. Do NOT answer the question - only rewrite the user's query to be more effective for retrieval +5. Output ONLY the rewritten query, nothing else + +Examples: + +Conversation: +User: "How do I install Gentoo?" +Assistant: [detailed installation guide] +User: "Is an internet connection required to install?" + +Rewritten Query: "Is an internet connection required to install Gentoo Linux?" + +--- + +Conversation: +User: "What's the best way to preserve meat?" +Assistant: [preservation methods] +User: "How long does it last?" + +Rewritten Query: "How long does preserved meat last using curing or smoking methods?" `, }