feat(AI Assistant): query rewriting for enhanced context retrieval

This commit is contained in:
Jake Turner 2026-02-08 16:05:16 -08:00 committed by Jake Turner
parent 921eef30d6
commit 276bdcd0b2
3 changed files with 100 additions and 26 deletions

View File

@ -5,13 +5,15 @@ import { chatSchema, getAvailableModelsSchema } from '#validators/ollama'
import { inject } from '@adonisjs/core' import { inject } from '@adonisjs/core'
import type { HttpContext } from '@adonisjs/core/http' import type { HttpContext } from '@adonisjs/core/http'
import { SYSTEM_PROMPTS } from '../../constants/ollama.js' import { SYSTEM_PROMPTS } from '../../constants/ollama.js'
import logger from '@adonisjs/core/services/logger'
import type { Message } from 'ollama'
@inject() @inject()
export default class OllamaController { export default class OllamaController {
constructor( constructor(
private ollamaService: OllamaService, private ollamaService: OllamaService,
private ragService: RagService private ragService: RagService
) {} ) { }
async availableModels({ request }: HttpContext) { async availableModels({ request }: HttpContext) {
const reqData = await request.validateUsing(getAvailableModelsSchema) const reqData = await request.validateUsing(getAvailableModelsSchema)
@ -25,9 +27,8 @@ export default class OllamaController {
async chat({ request }: HttpContext) { async chat({ request }: HttpContext) {
const reqData = await request.validateUsing(chatSchema) const reqData = await request.validateUsing(chatSchema)
/**If there are no system messages in the chat // If there are no system messages in the chat
*(i.e. first message from the user)inject system prompts // (i.e. first message from the user) inject system prompts
**/
const hasSystemMessage = reqData.messages.some((msg) => msg.role === 'system') const hasSystemMessage = reqData.messages.some((msg) => msg.role === 'system')
if (!hasSystemMessage) { if (!hasSystemMessage) {
const systemPrompt = { const systemPrompt = {
@ -37,18 +38,22 @@ export default class OllamaController {
reqData.messages.unshift(systemPrompt) reqData.messages.unshift(systemPrompt)
} }
// Get the last user message to use for RAG context retrieval // Query rewriting for better RAG retrieval with manageable context
const lastUserMessage = [...reqData.messages].reverse().find((msg) => msg.role === 'user') // Will return user's latest message if no rewriting is needed
const rewrittenQuery = await this.rewriteQueryWithContext(
reqData.messages,
reqData.model
)
if (lastUserMessage) { if (rewrittenQuery) {
// Search for relevant context in the knowledge base
// Using lower threshold (0.3) with improved hybrid search
const relevantDocs = await this.ragService.searchSimilarDocuments( const relevantDocs = await this.ragService.searchSimilarDocuments(
lastUserMessage.content, rewrittenQuery,
5, // Retrieve top 5 most relevant chunks 5, // Top 5 most relevant chunks
0.3 // Minimum similarity score of 0.3 (lowered from 0.7 for better recall) 0.3 // Minimum similarity score of 0.3
) )
logger.debug(`[RAG] Retrieved ${relevantDocs.length} relevant documents for query: "${rewrittenQuery}"`)
// If relevant context is found, inject as a system message // If relevant context is found, inject as a system message
if (relevantDocs.length > 0) { if (relevantDocs.length > 0) {
const contextText = relevantDocs const contextText = relevantDocs
@ -88,7 +93,59 @@ export default class OllamaController {
} }
} }
async installedModels({}: HttpContext) { async installedModels({ }: HttpContext) {
return await this.ollamaService.getModels() return await this.ollamaService.getModels()
} }
private async rewriteQueryWithContext(
messages: Message[],
model: string
): Promise<string | null> {
try {
// Get recent conversation history (last 6 messages for 3 turns)
const recentMessages = messages.slice(-6)
// If there's only one user message, no rewriting needed
const userMessages = recentMessages.filter(msg => msg.role === 'user')
if (userMessages.length <= 1) {
return userMessages[0]?.content || null
}
const conversationContext = recentMessages
.map(msg => {
const role = msg.role === 'user' ? 'User' : 'Assistant'
// Truncate assistant messages to first 200 chars to keep context manageable
const content = msg.role === 'assistant'
? msg.content.slice(0, 200) + (msg.content.length > 200 ? '...' : '')
: msg.content
return `${role}: "${content}"`
})
.join('\n')
const response = await this.ollamaService.chat({
model,
messages: [
{
role: 'system',
content: SYSTEM_PROMPTS.query_rewrite,
},
{
role: 'user',
content: `Conversation:\n${conversationContext}\n\nRewritten Query:`,
},
],
})
const rewrittenQuery = response.message.content.trim()
logger.info(`[RAG] Query rewritten: "${rewrittenQuery}"`)
return rewrittenQuery
} catch (error) {
logger.error(
`[RAG] Query rewriting failed: ${error instanceof Error ? error.message : error}`
)
// Fallback to last user message if rewriting fails
const lastUserMessage = [...messages].reverse().find(msg => msg.role === 'user')
return lastUserMessage?.content || null
}
}
} }

View File

@ -4,7 +4,6 @@ import logger from '@adonisjs/core/services/logger'
import { DateTime } from 'luxon' import { DateTime } from 'luxon'
import { inject } from '@adonisjs/core' import { inject } from '@adonisjs/core'
import { OllamaService } from './ollama_service.js' import { OllamaService } from './ollama_service.js'
import { ChatRequest } from 'ollama'
import { SYSTEM_PROMPTS } from '../../constants/ollama.js' import { SYSTEM_PROMPTS } from '../../constants/ollama.js'
import { toTitleCase } from '../utils/misc.js' import { toTitleCase } from '../utils/misc.js'
@ -12,15 +11,6 @@ import { toTitleCase } from '../utils/misc.js'
export class ChatService { export class ChatService {
constructor(private ollamaService: OllamaService) {} constructor(private ollamaService: OllamaService) {}
async chat(chatRequest: ChatRequest & { stream?: false }) {
try {
return await this.ollamaService.chat(chatRequest)
} catch (error) {
logger.error(`[ChatService] Chat error: ${error instanceof Error ? error.message : error}`)
throw new Error('Chat processing failed')
}
}
async getAllSessions() { async getAllSessions() {
try { try {
const sessions = await ChatSession.query().orderBy('updated_at', 'desc') const sessions = await ChatSession.query().orderBy('updated_at', 'desc')
@ -230,9 +220,6 @@ export class ChatService {
} }
} }
/**
* Delete all chat sessions and messages
*/
async deleteAllSessions() { async deleteAllSessions() {
try { try {
await ChatSession.query().delete() await ChatSession.query().delete()

View File

@ -107,5 +107,35 @@ The suggestions should be in title case.
Ensure that your suggestions are comma-seperated with no conjunctions like "and" or "or". Ensure that your suggestions are comma-seperated with no conjunctions like "and" or "or".
Do not use line breaks, new lines, or extra spacing to separate the suggestions. Do not use line breaks, new lines, or extra spacing to separate the suggestions.
Format: suggestion1, suggestion2, suggestion3 Format: suggestion1, suggestion2, suggestion3
`,
query_rewrite: `
You are a query rewriting assistant. Your task is to reformulate the user's latest question to include relevant context from the conversation history.
Given the conversation history, rewrite the user's latest question to be a standalone, context-aware search query that will retrieve the most relevant information.
Rules:
1. Keep the rewritten query concise (under 150 words)
2. Include key entities, topics, and context from previous messages
3. Make it a clear, searchable query
4. Do NOT answer the question - only rewrite the user's query to be more effective for retrieval
5. Output ONLY the rewritten query, nothing else
Examples:
Conversation:
User: "How do I install Gentoo?"
Assistant: [detailed installation guide]
User: "Is an internet connection required to install?"
Rewritten Query: "Is an internet connection required to install Gentoo Linux?"
---
Conversation:
User: "What's the best way to preserve meat?"
Assistant: [preservation methods]
User: "How long does it last?"
Rewritten Query: "How long does preserved meat last using curing or smoking methods?"
`, `,
} }