fix(ai-chat): qwen2.5 loading on every chat message

qwen was getting loaded to generate the title for the chat but it was
also getting loaded on each new message even if you manually unloaded
the model.
This commit is contained in:
Henry Estela 2026-04-03 21:15:21 -07:00
parent fa93ed51e2
commit a6a5ea0d04
No known key found for this signature in database
GPG Key ID: 90439853E9E235BA
3 changed files with 39 additions and 37 deletions

View File

@ -8,7 +8,7 @@ import { modelNameSchema } from '#validators/download'
import { chatSchema, getAvailableModelsSchema } from '#validators/ollama'
import { inject } from '@adonisjs/core'
import type { HttpContext } from '@adonisjs/core/http'
import { DEFAULT_QUERY_REWRITE_MODEL, RAG_CONTEXT_LIMITS, SYSTEM_PROMPTS } from '../../constants/ollama.js'
import { RAG_CONTEXT_LIMITS, SYSTEM_PROMPTS } from '../../constants/ollama.js'
import { SERVICE_NAMES } from '../../constants/service_names.js'
import logger from '@adonisjs/core/services/logger'
type Message = { role: 'system' | 'user' | 'assistant'; content: string }
@ -59,7 +59,7 @@ export default class OllamaController {
// Query rewriting for better RAG retrieval with manageable context
// Will return user's latest message if no rewriting is needed
const rewrittenQuery = await this.rewriteQueryWithContext(reqData.messages)
const rewrittenQuery = await this.rewriteQueryWithContext(reqData.messages, reqData.model)
logger.debug(`[OllamaController] Rewritten query for RAG: "${rewrittenQuery}"`)
if (rewrittenQuery) {
@ -157,7 +157,7 @@ export default class OllamaController {
await this.chatService.addMessage(sessionId, 'assistant', fullContent)
const messageCount = await this.chatService.getMessageCount(sessionId)
if (messageCount <= 2 && userContent) {
this.chatService.generateTitle(sessionId, userContent, fullContent).catch((err) => {
this.chatService.generateTitle(sessionId, userContent, fullContent, reqData.model).catch((err) => {
logger.error(`[OllamaController] Title generation failed: ${err instanceof Error ? err.message : err}`)
})
}
@ -172,7 +172,7 @@ export default class OllamaController {
await this.chatService.addMessage(sessionId, 'assistant', result.message.content)
const messageCount = await this.chatService.getMessageCount(sessionId)
if (messageCount <= 2 && userContent) {
this.chatService.generateTitle(sessionId, userContent, result.message.content).catch((err) => {
this.chatService.generateTitle(sessionId, userContent, result.message.content, reqData.model).catch((err) => {
logger.error(`[OllamaController] Title generation failed: ${err instanceof Error ? err.message : err}`)
})
}
@ -312,9 +312,18 @@ export default class OllamaController {
}
private async rewriteQueryWithContext(
messages: Message[]
messages: Message[],
model: string
): Promise<string | null> {
const lastUserMessage = [...messages].reverse().find(msg => msg.role === 'user')
try {
// Skip the entire RAG pipeline if there are no documents to search
const hasDocuments = await this.ragService.hasDocuments()
if (!hasDocuments) {
return null
}
// Get recent conversation history (last 6 messages for 3 turns)
const recentMessages = messages.slice(-6)
@ -322,7 +331,7 @@ export default class OllamaController {
// little RAG benefit until there is enough context to matter.
const userMessages = recentMessages.filter(msg => msg.role === 'user')
if (userMessages.length <= 2) {
return userMessages[userMessages.length - 1]?.content || null
return lastUserMessage?.content || null
}
const conversationContext = recentMessages
@ -336,17 +345,8 @@ export default class OllamaController {
})
.join('\n')
const installedModels = await this.ollamaService.getModels(true)
const rewriteModelAvailable = installedModels?.some(model => model.name === DEFAULT_QUERY_REWRITE_MODEL)
if (!rewriteModelAvailable) {
logger.warn(`[RAG] Query rewrite model "${DEFAULT_QUERY_REWRITE_MODEL}" not available. Skipping query rewriting.`)
const lastUserMessage = [...messages].reverse().find(msg => msg.role === 'user')
return lastUserMessage?.content || null
}
// FUTURE ENHANCEMENT: allow the user to specify which model to use for rewriting
const response = await this.ollamaService.chat({
model: DEFAULT_QUERY_REWRITE_MODEL,
model,
messages: [
{
role: 'system',
@ -367,7 +367,6 @@ export default class OllamaController {
`[RAG] Query rewriting failed: ${error instanceof Error ? error.message : error}`
)
// Fallback to last user message if rewriting fails
const lastUserMessage = [...messages].reverse().find(msg => msg.role === 'user')
return lastUserMessage?.content || null
}
}

View File

@ -4,7 +4,7 @@ import logger from '@adonisjs/core/services/logger'
import { DateTime } from 'luxon'
import { inject } from '@adonisjs/core'
import { OllamaService } from './ollama_service.js'
import { DEFAULT_QUERY_REWRITE_MODEL, SYSTEM_PROMPTS } from '../../constants/ollama.js'
import { SYSTEM_PROMPTS } from '../../constants/ollama.js'
import { toTitleCase } from '../utils/misc.js'
@inject()
@ -232,29 +232,22 @@ export class ChatService {
}
}
async generateTitle(sessionId: number, userMessage: string, assistantMessage: string) {
async generateTitle(sessionId: number, userMessage: string, assistantMessage: string, model: string) {
try {
const models = await this.ollamaService.getModels()
const titleModelAvailable = models?.some((m) => m.name === DEFAULT_QUERY_REWRITE_MODEL)
let title: string
if (!titleModelAvailable) {
title = userMessage.slice(0, 57) + (userMessage.length > 57 ? '...' : '')
} else {
const response = await this.ollamaService.chat({
model: DEFAULT_QUERY_REWRITE_MODEL,
messages: [
{ role: 'system', content: SYSTEM_PROMPTS.title_generation },
{ role: 'user', content: userMessage },
{ role: 'assistant', content: assistantMessage },
],
})
const response = await this.ollamaService.chat({
model,
messages: [
{ role: 'system', content: SYSTEM_PROMPTS.title_generation },
{ role: 'user', content: userMessage },
{ role: 'assistant', content: assistantMessage },
],
})
title = response?.message?.content?.trim()
if (!title) {
title = userMessage.slice(0, 57) + (userMessage.length > 57 ? '...' : '')
}
title = response?.message?.content?.trim()
if (!title) {
title = userMessage.slice(0, 57) + (userMessage.length > 57 ? '...' : '')
}
await this.updateSession(sessionId, { title })

View File

@ -1013,6 +1013,16 @@ export class RagService {
* Retrieve all unique source files that have been stored in the knowledge base.
* @returns Array of unique full source paths
*/
public async hasDocuments(): Promise<boolean> {
try {
await this._ensureCollection(RagService.CONTENT_COLLECTION_NAME, RagService.EMBEDDING_DIMENSION)
const collectionInfo = await this.qdrant!.getCollection(RagService.CONTENT_COLLECTION_NAME)
return (collectionInfo.points_count ?? 0) > 0
} catch {
return false
}
}
public async getStoredFiles(): Promise<string[]> {
try {
await this._ensureCollection(