mirror of
https://github.com/Crosstalk-Solutions/project-nomad.git
synced 2026-03-28 03:29:25 +01:00
feat(AI Assistant): query rewriting for enhanced context retrieval
This commit is contained in:
parent
921eef30d6
commit
276bdcd0b2
|
|
@ -5,13 +5,15 @@ import { chatSchema, getAvailableModelsSchema } from '#validators/ollama'
|
||||||
import { inject } from '@adonisjs/core'
|
import { inject } from '@adonisjs/core'
|
||||||
import type { HttpContext } from '@adonisjs/core/http'
|
import type { HttpContext } from '@adonisjs/core/http'
|
||||||
import { SYSTEM_PROMPTS } from '../../constants/ollama.js'
|
import { SYSTEM_PROMPTS } from '../../constants/ollama.js'
|
||||||
|
import logger from '@adonisjs/core/services/logger'
|
||||||
|
import type { Message } from 'ollama'
|
||||||
|
|
||||||
@inject()
|
@inject()
|
||||||
export default class OllamaController {
|
export default class OllamaController {
|
||||||
constructor(
|
constructor(
|
||||||
private ollamaService: OllamaService,
|
private ollamaService: OllamaService,
|
||||||
private ragService: RagService
|
private ragService: RagService
|
||||||
) {}
|
) { }
|
||||||
|
|
||||||
async availableModels({ request }: HttpContext) {
|
async availableModels({ request }: HttpContext) {
|
||||||
const reqData = await request.validateUsing(getAvailableModelsSchema)
|
const reqData = await request.validateUsing(getAvailableModelsSchema)
|
||||||
|
|
@ -25,9 +27,8 @@ export default class OllamaController {
|
||||||
async chat({ request }: HttpContext) {
|
async chat({ request }: HttpContext) {
|
||||||
const reqData = await request.validateUsing(chatSchema)
|
const reqData = await request.validateUsing(chatSchema)
|
||||||
|
|
||||||
/**If there are no system messages in the chat
|
// If there are no system messages in the chat
|
||||||
*(i.e. first message from the user)inject system prompts
|
// (i.e. first message from the user) inject system prompts
|
||||||
**/
|
|
||||||
const hasSystemMessage = reqData.messages.some((msg) => msg.role === 'system')
|
const hasSystemMessage = reqData.messages.some((msg) => msg.role === 'system')
|
||||||
if (!hasSystemMessage) {
|
if (!hasSystemMessage) {
|
||||||
const systemPrompt = {
|
const systemPrompt = {
|
||||||
|
|
@ -37,18 +38,22 @@ export default class OllamaController {
|
||||||
reqData.messages.unshift(systemPrompt)
|
reqData.messages.unshift(systemPrompt)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Get the last user message to use for RAG context retrieval
|
// Query rewriting for better RAG retrieval with manageable context
|
||||||
const lastUserMessage = [...reqData.messages].reverse().find((msg) => msg.role === 'user')
|
// Will return user's latest message if no rewriting is needed
|
||||||
|
const rewrittenQuery = await this.rewriteQueryWithContext(
|
||||||
|
reqData.messages,
|
||||||
|
reqData.model
|
||||||
|
)
|
||||||
|
|
||||||
if (lastUserMessage) {
|
if (rewrittenQuery) {
|
||||||
// Search for relevant context in the knowledge base
|
|
||||||
// Using lower threshold (0.3) with improved hybrid search
|
|
||||||
const relevantDocs = await this.ragService.searchSimilarDocuments(
|
const relevantDocs = await this.ragService.searchSimilarDocuments(
|
||||||
lastUserMessage.content,
|
rewrittenQuery,
|
||||||
5, // Retrieve top 5 most relevant chunks
|
5, // Top 5 most relevant chunks
|
||||||
0.3 // Minimum similarity score of 0.3 (lowered from 0.7 for better recall)
|
0.3 // Minimum similarity score of 0.3
|
||||||
)
|
)
|
||||||
|
|
||||||
|
logger.debug(`[RAG] Retrieved ${relevantDocs.length} relevant documents for query: "${rewrittenQuery}"`)
|
||||||
|
|
||||||
// If relevant context is found, inject as a system message
|
// If relevant context is found, inject as a system message
|
||||||
if (relevantDocs.length > 0) {
|
if (relevantDocs.length > 0) {
|
||||||
const contextText = relevantDocs
|
const contextText = relevantDocs
|
||||||
|
|
@ -88,7 +93,59 @@ export default class OllamaController {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async installedModels({}: HttpContext) {
|
async installedModels({ }: HttpContext) {
|
||||||
return await this.ollamaService.getModels()
|
return await this.ollamaService.getModels()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private async rewriteQueryWithContext(
|
||||||
|
messages: Message[],
|
||||||
|
model: string
|
||||||
|
): Promise<string | null> {
|
||||||
|
try {
|
||||||
|
// Get recent conversation history (last 6 messages for 3 turns)
|
||||||
|
const recentMessages = messages.slice(-6)
|
||||||
|
|
||||||
|
// If there's only one user message, no rewriting needed
|
||||||
|
const userMessages = recentMessages.filter(msg => msg.role === 'user')
|
||||||
|
if (userMessages.length <= 1) {
|
||||||
|
return userMessages[0]?.content || null
|
||||||
|
}
|
||||||
|
|
||||||
|
const conversationContext = recentMessages
|
||||||
|
.map(msg => {
|
||||||
|
const role = msg.role === 'user' ? 'User' : 'Assistant'
|
||||||
|
// Truncate assistant messages to first 200 chars to keep context manageable
|
||||||
|
const content = msg.role === 'assistant'
|
||||||
|
? msg.content.slice(0, 200) + (msg.content.length > 200 ? '...' : '')
|
||||||
|
: msg.content
|
||||||
|
return `${role}: "${content}"`
|
||||||
|
})
|
||||||
|
.join('\n')
|
||||||
|
|
||||||
|
const response = await this.ollamaService.chat({
|
||||||
|
model,
|
||||||
|
messages: [
|
||||||
|
{
|
||||||
|
role: 'system',
|
||||||
|
content: SYSTEM_PROMPTS.query_rewrite,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
role: 'user',
|
||||||
|
content: `Conversation:\n${conversationContext}\n\nRewritten Query:`,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
})
|
||||||
|
|
||||||
|
const rewrittenQuery = response.message.content.trim()
|
||||||
|
logger.info(`[RAG] Query rewritten: "${rewrittenQuery}"`)
|
||||||
|
return rewrittenQuery
|
||||||
|
} catch (error) {
|
||||||
|
logger.error(
|
||||||
|
`[RAG] Query rewriting failed: ${error instanceof Error ? error.message : error}`
|
||||||
|
)
|
||||||
|
// Fallback to last user message if rewriting fails
|
||||||
|
const lastUserMessage = [...messages].reverse().find(msg => msg.role === 'user')
|
||||||
|
return lastUserMessage?.content || null
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -4,7 +4,6 @@ import logger from '@adonisjs/core/services/logger'
|
||||||
import { DateTime } from 'luxon'
|
import { DateTime } from 'luxon'
|
||||||
import { inject } from '@adonisjs/core'
|
import { inject } from '@adonisjs/core'
|
||||||
import { OllamaService } from './ollama_service.js'
|
import { OllamaService } from './ollama_service.js'
|
||||||
import { ChatRequest } from 'ollama'
|
|
||||||
import { SYSTEM_PROMPTS } from '../../constants/ollama.js'
|
import { SYSTEM_PROMPTS } from '../../constants/ollama.js'
|
||||||
import { toTitleCase } from '../utils/misc.js'
|
import { toTitleCase } from '../utils/misc.js'
|
||||||
|
|
||||||
|
|
@ -12,15 +11,6 @@ import { toTitleCase } from '../utils/misc.js'
|
||||||
export class ChatService {
|
export class ChatService {
|
||||||
constructor(private ollamaService: OllamaService) {}
|
constructor(private ollamaService: OllamaService) {}
|
||||||
|
|
||||||
async chat(chatRequest: ChatRequest & { stream?: false }) {
|
|
||||||
try {
|
|
||||||
return await this.ollamaService.chat(chatRequest)
|
|
||||||
} catch (error) {
|
|
||||||
logger.error(`[ChatService] Chat error: ${error instanceof Error ? error.message : error}`)
|
|
||||||
throw new Error('Chat processing failed')
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
async getAllSessions() {
|
async getAllSessions() {
|
||||||
try {
|
try {
|
||||||
const sessions = await ChatSession.query().orderBy('updated_at', 'desc')
|
const sessions = await ChatSession.query().orderBy('updated_at', 'desc')
|
||||||
|
|
@ -230,9 +220,6 @@ export class ChatService {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Delete all chat sessions and messages
|
|
||||||
*/
|
|
||||||
async deleteAllSessions() {
|
async deleteAllSessions() {
|
||||||
try {
|
try {
|
||||||
await ChatSession.query().delete()
|
await ChatSession.query().delete()
|
||||||
|
|
|
||||||
|
|
@ -107,5 +107,35 @@ The suggestions should be in title case.
|
||||||
Ensure that your suggestions are comma-seperated with no conjunctions like "and" or "or".
|
Ensure that your suggestions are comma-seperated with no conjunctions like "and" or "or".
|
||||||
Do not use line breaks, new lines, or extra spacing to separate the suggestions.
|
Do not use line breaks, new lines, or extra spacing to separate the suggestions.
|
||||||
Format: suggestion1, suggestion2, suggestion3
|
Format: suggestion1, suggestion2, suggestion3
|
||||||
|
`,
|
||||||
|
query_rewrite: `
|
||||||
|
You are a query rewriting assistant. Your task is to reformulate the user's latest question to include relevant context from the conversation history.
|
||||||
|
|
||||||
|
Given the conversation history, rewrite the user's latest question to be a standalone, context-aware search query that will retrieve the most relevant information.
|
||||||
|
|
||||||
|
Rules:
|
||||||
|
1. Keep the rewritten query concise (under 150 words)
|
||||||
|
2. Include key entities, topics, and context from previous messages
|
||||||
|
3. Make it a clear, searchable query
|
||||||
|
4. Do NOT answer the question - only rewrite the user's query to be more effective for retrieval
|
||||||
|
5. Output ONLY the rewritten query, nothing else
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
|
||||||
|
Conversation:
|
||||||
|
User: "How do I install Gentoo?"
|
||||||
|
Assistant: [detailed installation guide]
|
||||||
|
User: "Is an internet connection required to install?"
|
||||||
|
|
||||||
|
Rewritten Query: "Is an internet connection required to install Gentoo Linux?"
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
Conversation:
|
||||||
|
User: "What's the best way to preserve meat?"
|
||||||
|
Assistant: [preservation methods]
|
||||||
|
User: "How long does it last?"
|
||||||
|
|
||||||
|
Rewritten Query: "How long does preserved meat last using curing or smoking methods?"
|
||||||
`,
|
`,
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user