feat(AI Assistant): query rewriting for enhanced context retrieval

This commit is contained in:
Jake Turner 2026-02-08 16:05:16 -08:00 committed by Jake Turner
parent 921eef30d6
commit 276bdcd0b2
3 changed files with 100 additions and 26 deletions

View File

@ -5,13 +5,15 @@ import { chatSchema, getAvailableModelsSchema } from '#validators/ollama'
import { inject } from '@adonisjs/core'
import type { HttpContext } from '@adonisjs/core/http'
import { SYSTEM_PROMPTS } from '../../constants/ollama.js'
import logger from '@adonisjs/core/services/logger'
import type { Message } from 'ollama'
@inject()
export default class OllamaController {
constructor(
private ollamaService: OllamaService,
private ragService: RagService
) {}
) { }
async availableModels({ request }: HttpContext) {
const reqData = await request.validateUsing(getAvailableModelsSchema)
@ -25,9 +27,8 @@ export default class OllamaController {
async chat({ request }: HttpContext) {
const reqData = await request.validateUsing(chatSchema)
/**If there are no system messages in the chat
*(i.e. first message from the user)inject system prompts
**/
// If there are no system messages in the chat
// (i.e. first message from the user) inject system prompts
const hasSystemMessage = reqData.messages.some((msg) => msg.role === 'system')
if (!hasSystemMessage) {
const systemPrompt = {
@ -37,18 +38,22 @@ export default class OllamaController {
reqData.messages.unshift(systemPrompt)
}
// Get the last user message to use for RAG context retrieval
const lastUserMessage = [...reqData.messages].reverse().find((msg) => msg.role === 'user')
// Query rewriting for better RAG retrieval with manageable context
// Will return user's latest message if no rewriting is needed
const rewrittenQuery = await this.rewriteQueryWithContext(
reqData.messages,
reqData.model
)
if (lastUserMessage) {
// Search for relevant context in the knowledge base
// Using lower threshold (0.3) with improved hybrid search
if (rewrittenQuery) {
const relevantDocs = await this.ragService.searchSimilarDocuments(
lastUserMessage.content,
5, // Retrieve top 5 most relevant chunks
0.3 // Minimum similarity score of 0.3 (lowered from 0.7 for better recall)
rewrittenQuery,
5, // Top 5 most relevant chunks
0.3 // Minimum similarity score of 0.3
)
logger.debug(`[RAG] Retrieved ${relevantDocs.length} relevant documents for query: "${rewrittenQuery}"`)
// If relevant context is found, inject as a system message
if (relevantDocs.length > 0) {
const contextText = relevantDocs
@ -88,7 +93,59 @@ export default class OllamaController {
}
}
async installedModels({}: HttpContext) {
async installedModels({ }: HttpContext) {
return await this.ollamaService.getModels()
}
private async rewriteQueryWithContext(
messages: Message[],
model: string
): Promise<string | null> {
try {
// Get recent conversation history (last 6 messages for 3 turns)
const recentMessages = messages.slice(-6)
// If there's only one user message, no rewriting needed
const userMessages = recentMessages.filter(msg => msg.role === 'user')
if (userMessages.length <= 1) {
return userMessages[0]?.content || null
}
const conversationContext = recentMessages
.map(msg => {
const role = msg.role === 'user' ? 'User' : 'Assistant'
// Truncate assistant messages to first 200 chars to keep context manageable
const content = msg.role === 'assistant'
? msg.content.slice(0, 200) + (msg.content.length > 200 ? '...' : '')
: msg.content
return `${role}: "${content}"`
})
.join('\n')
const response = await this.ollamaService.chat({
model,
messages: [
{
role: 'system',
content: SYSTEM_PROMPTS.query_rewrite,
},
{
role: 'user',
content: `Conversation:\n${conversationContext}\n\nRewritten Query:`,
},
],
})
const rewrittenQuery = response.message.content.trim()
logger.info(`[RAG] Query rewritten: "${rewrittenQuery}"`)
return rewrittenQuery
} catch (error) {
logger.error(
`[RAG] Query rewriting failed: ${error instanceof Error ? error.message : error}`
)
// Fallback to last user message if rewriting fails
const lastUserMessage = [...messages].reverse().find(msg => msg.role === 'user')
return lastUserMessage?.content || null
}
}
}

View File

@ -4,7 +4,6 @@ import logger from '@adonisjs/core/services/logger'
import { DateTime } from 'luxon'
import { inject } from '@adonisjs/core'
import { OllamaService } from './ollama_service.js'
import { ChatRequest } from 'ollama'
import { SYSTEM_PROMPTS } from '../../constants/ollama.js'
import { toTitleCase } from '../utils/misc.js'
@ -12,15 +11,6 @@ import { toTitleCase } from '../utils/misc.js'
export class ChatService {
constructor(private ollamaService: OllamaService) {}
async chat(chatRequest: ChatRequest & { stream?: false }) {
try {
return await this.ollamaService.chat(chatRequest)
} catch (error) {
logger.error(`[ChatService] Chat error: ${error instanceof Error ? error.message : error}`)
throw new Error('Chat processing failed')
}
}
async getAllSessions() {
try {
const sessions = await ChatSession.query().orderBy('updated_at', 'desc')
@ -230,9 +220,6 @@ export class ChatService {
}
}
/**
* Delete all chat sessions and messages
*/
async deleteAllSessions() {
try {
await ChatSession.query().delete()

View File

@ -107,5 +107,35 @@ The suggestions should be in title case.
Ensure that your suggestions are comma-seperated with no conjunctions like "and" or "or".
Do not use line breaks, new lines, or extra spacing to separate the suggestions.
Format: suggestion1, suggestion2, suggestion3
`,
query_rewrite: `
You are a query rewriting assistant. Your task is to reformulate the user's latest question to include relevant context from the conversation history.
Given the conversation history, rewrite the user's latest question to be a standalone, context-aware search query that will retrieve the most relevant information.
Rules:
1. Keep the rewritten query concise (under 150 words)
2. Include key entities, topics, and context from previous messages
3. Make it a clear, searchable query
4. Do NOT answer the question - only rewrite the user's query to be more effective for retrieval
5. Output ONLY the rewritten query, nothing else
Examples:
Conversation:
User: "How do I install Gentoo?"
Assistant: [detailed installation guide]
User: "Is an internet connection required to install?"
Rewritten Query: "Is an internet connection required to install Gentoo Linux?"
---
Conversation:
User: "What's the best way to preserve meat?"
Assistant: [preservation methods]
User: "How long does it last?"
Rewritten Query: "How long does preserved meat last using curing or smoking methods?"
`,
}