mirror of
https://github.com/Crosstalk-Solutions/project-nomad.git
synced 2026-03-28 03:29:25 +01:00
feat(AI Assistant): performance improvements and smarter RAG context usage
This commit is contained in:
parent
460756f581
commit
96e5027055
|
|
@ -5,7 +5,7 @@ import { modelNameSchema } from '#validators/download'
|
||||||
import { chatSchema, getAvailableModelsSchema } from '#validators/ollama'
|
import { chatSchema, getAvailableModelsSchema } from '#validators/ollama'
|
||||||
import { inject } from '@adonisjs/core'
|
import { inject } from '@adonisjs/core'
|
||||||
import type { HttpContext } from '@adonisjs/core/http'
|
import type { HttpContext } from '@adonisjs/core/http'
|
||||||
import { DEFAULT_QUERY_REWRITE_MODEL, SYSTEM_PROMPTS } from '../../constants/ollama.js'
|
import { DEFAULT_QUERY_REWRITE_MODEL, RAG_CONTEXT_LIMITS, SYSTEM_PROMPTS } from '../../constants/ollama.js'
|
||||||
import logger from '@adonisjs/core/services/logger'
|
import logger from '@adonisjs/core/services/logger'
|
||||||
import type { Message } from 'ollama'
|
import type { Message } from 'ollama'
|
||||||
|
|
||||||
|
|
@ -66,9 +66,28 @@ export default class OllamaController {
|
||||||
|
|
||||||
logger.debug(`[RAG] Retrieved ${relevantDocs.length} relevant documents for query: "${rewrittenQuery}"`)
|
logger.debug(`[RAG] Retrieved ${relevantDocs.length} relevant documents for query: "${rewrittenQuery}"`)
|
||||||
|
|
||||||
// If relevant context is found, inject as a system message
|
// If relevant context is found, inject as a system message with adaptive limits
|
||||||
if (relevantDocs.length > 0) {
|
if (relevantDocs.length > 0) {
|
||||||
const contextText = relevantDocs
|
// Determine context budget based on model size
|
||||||
|
const { maxResults, maxTokens } = this.getContextLimitsForModel(reqData.model)
|
||||||
|
let trimmedDocs = relevantDocs.slice(0, maxResults)
|
||||||
|
|
||||||
|
// Apply token cap if set (estimate ~4 chars per token)
|
||||||
|
// Always include the first (most relevant) result — the cap only gates subsequent results
|
||||||
|
if (maxTokens > 0) {
|
||||||
|
const charCap = maxTokens * 4
|
||||||
|
let totalChars = 0
|
||||||
|
trimmedDocs = trimmedDocs.filter((doc, idx) => {
|
||||||
|
totalChars += doc.text.length
|
||||||
|
return idx === 0 || totalChars <= charCap
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.debug(
|
||||||
|
`[RAG] Injecting ${trimmedDocs.length}/${relevantDocs.length} results (model: ${reqData.model}, maxResults: ${maxResults}, maxTokens: ${maxTokens || 'unlimited'})`
|
||||||
|
)
|
||||||
|
|
||||||
|
const contextText = trimmedDocs
|
||||||
.map((doc, idx) => `[Context ${idx + 1}] (Relevance: ${(doc.score * 100).toFixed(1)}%)\n${doc.text}`)
|
.map((doc, idx) => `[Context ${idx + 1}] (Relevance: ${(doc.score * 100).toFixed(1)}%)\n${doc.text}`)
|
||||||
.join('\n\n')
|
.join('\n\n')
|
||||||
|
|
||||||
|
|
@ -174,6 +193,25 @@ export default class OllamaController {
|
||||||
return await this.ollamaService.getModels()
|
return await this.ollamaService.getModels()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Determines RAG context limits based on model size extracted from the model name.
|
||||||
|
* Parses size indicators like "1b", "3b", "8b", "70b" from model names/tags.
|
||||||
|
*/
|
||||||
|
private getContextLimitsForModel(modelName: string): { maxResults: number; maxTokens: number } {
|
||||||
|
// Extract parameter count from model name (e.g., "llama3.2:3b", "qwen2.5:1.5b", "gemma:7b")
|
||||||
|
const sizeMatch = modelName.match(/(\d+\.?\d*)[bB]/)
|
||||||
|
const paramBillions = sizeMatch ? parseFloat(sizeMatch[1]) : 8 // default to 8B if unknown
|
||||||
|
|
||||||
|
for (const tier of RAG_CONTEXT_LIMITS) {
|
||||||
|
if (paramBillions <= tier.maxParams) {
|
||||||
|
return { maxResults: tier.maxResults, maxTokens: tier.maxTokens }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fallback: no limits
|
||||||
|
return { maxResults: 5, maxTokens: 0 }
|
||||||
|
}
|
||||||
|
|
||||||
private async rewriteQueryWithContext(
|
private async rewriteQueryWithContext(
|
||||||
messages: Message[]
|
messages: Message[]
|
||||||
): Promise<string | null> {
|
): Promise<string | null> {
|
||||||
|
|
@ -199,8 +237,8 @@ export default class OllamaController {
|
||||||
})
|
})
|
||||||
.join('\n')
|
.join('\n')
|
||||||
|
|
||||||
const availableModels = await this.ollamaService.getAvailableModels({ query: null, limit: 500 })
|
const installedModels = await this.ollamaService.getModels(true)
|
||||||
const rewriteModelAvailable = availableModels?.models.some(model => model.name === DEFAULT_QUERY_REWRITE_MODEL)
|
const rewriteModelAvailable = installedModels?.some(model => model.name === DEFAULT_QUERY_REWRITE_MODEL)
|
||||||
if (!rewriteModelAvailable) {
|
if (!rewriteModelAvailable) {
|
||||||
logger.warn(`[RAG] Query rewrite model "${DEFAULT_QUERY_REWRITE_MODEL}" not available. Skipping query rewriting.`)
|
logger.warn(`[RAG] Query rewrite model "${DEFAULT_QUERY_REWRITE_MODEL}" not available. Skipping query rewriting.`)
|
||||||
const lastUserMessage = [...messages].reverse().find(msg => msg.role === 'user')
|
const lastUserMessage = [...messages].reverse().find(msg => msg.role === 'user')
|
||||||
|
|
|
||||||
|
|
@ -16,11 +16,13 @@ import { join, resolve, sep } from 'node:path'
|
||||||
import KVStore from '#models/kv_store'
|
import KVStore from '#models/kv_store'
|
||||||
import { ZIMExtractionService } from './zim_extraction_service.js'
|
import { ZIMExtractionService } from './zim_extraction_service.js'
|
||||||
import { ZIM_BATCH_SIZE } from '../../constants/zim_extraction.js'
|
import { ZIM_BATCH_SIZE } from '../../constants/zim_extraction.js'
|
||||||
|
import { ProcessAndEmbedFileResponse, ProcessZIMFileResponse, RAGResult, RerankedRAGResult } from '../../types/rag.js'
|
||||||
|
|
||||||
@inject()
|
@inject()
|
||||||
export class RagService {
|
export class RagService {
|
||||||
private qdrant: QdrantClient | null = null
|
private qdrant: QdrantClient | null = null
|
||||||
private qdrantInitPromise: Promise<void> | null = null
|
private qdrantInitPromise: Promise<void> | null = null
|
||||||
|
private embeddingModelVerified = false
|
||||||
public static UPLOADS_STORAGE_PATH = 'storage/kb_uploads'
|
public static UPLOADS_STORAGE_PATH = 'storage/kb_uploads'
|
||||||
public static CONTENT_COLLECTION_NAME = 'nomad_knowledge_base'
|
public static CONTENT_COLLECTION_NAME = 'nomad_knowledge_base'
|
||||||
public static EMBEDDING_MODEL = 'nomic-embed-text:v1.5'
|
public static EMBEDDING_MODEL = 'nomic-embed-text:v1.5'
|
||||||
|
|
@ -33,6 +35,7 @@ export class RagService {
|
||||||
// Nomic Embed Text v1.5 uses task-specific prefixes for optimal performance
|
// Nomic Embed Text v1.5 uses task-specific prefixes for optimal performance
|
||||||
public static SEARCH_DOCUMENT_PREFIX = 'search_document: '
|
public static SEARCH_DOCUMENT_PREFIX = 'search_document: '
|
||||||
public static SEARCH_QUERY_PREFIX = 'search_query: '
|
public static SEARCH_QUERY_PREFIX = 'search_query: '
|
||||||
|
public static EMBEDDING_BATCH_SIZE = 8 // Conservative batch size for low-end hardware
|
||||||
|
|
||||||
constructor(
|
constructor(
|
||||||
private dockerService: DockerService,
|
private dockerService: DockerService,
|
||||||
|
|
@ -75,6 +78,16 @@ export class RagService {
|
||||||
},
|
},
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Create payload indexes for faster filtering (idempotent — Qdrant ignores duplicates)
|
||||||
|
await this.qdrant!.createPayloadIndex(collectionName, {
|
||||||
|
field_name: 'source',
|
||||||
|
field_schema: 'keyword',
|
||||||
|
})
|
||||||
|
await this.qdrant!.createPayloadIndex(collectionName, {
|
||||||
|
field_name: 'content_type',
|
||||||
|
field_schema: 'keyword',
|
||||||
|
})
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
logger.error('Error ensuring Qdrant collection:', error)
|
logger.error('Error ensuring Qdrant collection:', error)
|
||||||
throw error
|
throw error
|
||||||
|
|
@ -148,14 +161,57 @@ export class RagService {
|
||||||
/**
|
/**
|
||||||
* Preprocesses a query to improve retrieval by expanding it with context.
|
* Preprocesses a query to improve retrieval by expanding it with context.
|
||||||
* This helps match documents even when using different terminology.
|
* This helps match documents even when using different terminology.
|
||||||
|
* TODO: We could probably move this to a separate QueryPreprocessor class if it grows more complex, but for now it's manageable here.
|
||||||
*/
|
*/
|
||||||
|
private static QUERY_EXPANSION_DICTIONARY: Record<string, string> = {
|
||||||
|
'bob': 'bug out bag',
|
||||||
|
'bov': 'bug out vehicle',
|
||||||
|
'bol': 'bug out location',
|
||||||
|
'edc': 'every day carry',
|
||||||
|
'mre': 'meal ready to eat',
|
||||||
|
'shtf': 'shit hits the fan',
|
||||||
|
'teotwawki': 'the end of the world as we know it',
|
||||||
|
'opsec': 'operational security',
|
||||||
|
'ifak': 'individual first aid kit',
|
||||||
|
'ghb': 'get home bag',
|
||||||
|
'ghi': 'get home in',
|
||||||
|
'wrol': 'without rule of law',
|
||||||
|
'emp': 'electromagnetic pulse',
|
||||||
|
'ham': 'ham amateur radio',
|
||||||
|
'nbr': 'nuclear biological radiological',
|
||||||
|
'cbrn': 'chemical biological radiological nuclear',
|
||||||
|
'sar': 'search and rescue',
|
||||||
|
'comms': 'communications radio',
|
||||||
|
'fifo': 'first in first out',
|
||||||
|
'mylar': 'mylar bag food storage',
|
||||||
|
'paracord': 'paracord 550 cord',
|
||||||
|
'ferro': 'ferro rod fire starter',
|
||||||
|
'bivvy': 'bivvy bivy emergency shelter',
|
||||||
|
'bdu': 'battle dress uniform',
|
||||||
|
'gmrs': 'general mobile radio service',
|
||||||
|
'frs': 'family radio service',
|
||||||
|
'nbc': 'nuclear biological chemical',
|
||||||
|
}
|
||||||
|
|
||||||
private preprocessQuery(query: string): string {
|
private preprocessQuery(query: string): string {
|
||||||
// Future: this is a placeholder for more advanced query expansion techniques.
|
let expanded = query.trim()
|
||||||
// For now, we simply trim whitespace. Improvements could include:
|
|
||||||
// - Synonym expansion using a thesaurus
|
// Expand known domain abbreviations/acronyms
|
||||||
// - Adding related terms based on domain knowledge
|
const words = expanded.toLowerCase().split(/\s+/)
|
||||||
// - Using a language model to rephrase or elaborate the query
|
const expansions: string[] = []
|
||||||
const expanded = query.trim()
|
|
||||||
|
for (const word of words) {
|
||||||
|
const cleaned = word.replace(/[^\w]/g, '')
|
||||||
|
if (RagService.QUERY_EXPANSION_DICTIONARY[cleaned]) {
|
||||||
|
expansions.push(RagService.QUERY_EXPANSION_DICTIONARY[cleaned])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (expansions.length > 0) {
|
||||||
|
expanded = `${expanded} ${expansions.join(' ')}`
|
||||||
|
logger.debug(`[RAG] Query expanded with domain terms: "${expanded}"`)
|
||||||
|
}
|
||||||
|
|
||||||
logger.debug(`[RAG] Original query: "${query}"`)
|
logger.debug(`[RAG] Original query: "${query}"`)
|
||||||
logger.debug(`[RAG] Preprocessed query: "${expanded}"`)
|
logger.debug(`[RAG] Preprocessed query: "${expanded}"`)
|
||||||
return expanded
|
return expanded
|
||||||
|
|
@ -187,22 +243,26 @@ export class RagService {
|
||||||
RagService.EMBEDDING_DIMENSION
|
RagService.EMBEDDING_DIMENSION
|
||||||
)
|
)
|
||||||
|
|
||||||
const allModels = await this.ollamaService.getModels(true)
|
if (!this.embeddingModelVerified) {
|
||||||
const embeddingModel = allModels.find((model) => model.name === RagService.EMBEDDING_MODEL)
|
const allModels = await this.ollamaService.getModels(true)
|
||||||
|
const embeddingModel = allModels.find((model) => model.name === RagService.EMBEDDING_MODEL)
|
||||||
|
|
||||||
if (!embeddingModel) {
|
if (!embeddingModel) {
|
||||||
try {
|
try {
|
||||||
const downloadResult = await this.ollamaService.downloadModel(RagService.EMBEDDING_MODEL)
|
const downloadResult = await this.ollamaService.downloadModel(RagService.EMBEDDING_MODEL)
|
||||||
if (!downloadResult.success) {
|
if (!downloadResult.success) {
|
||||||
throw new Error(downloadResult.message || 'Unknown error during model download')
|
throw new Error(downloadResult.message || 'Unknown error during model download')
|
||||||
|
}
|
||||||
|
} catch (modelError) {
|
||||||
|
logger.error(
|
||||||
|
`[RAG] Embedding model ${RagService.EMBEDDING_MODEL} not found locally and failed to download:`,
|
||||||
|
modelError
|
||||||
|
)
|
||||||
|
this.embeddingModelVerified = false
|
||||||
|
return null
|
||||||
}
|
}
|
||||||
} catch (modelError) {
|
|
||||||
logger.error(
|
|
||||||
`[RAG] Embedding model ${RagService.EMBEDDING_MODEL} not found locally and failed to download:`,
|
|
||||||
modelError
|
|
||||||
)
|
|
||||||
return null
|
|
||||||
}
|
}
|
||||||
|
this.embeddingModelVerified = true
|
||||||
}
|
}
|
||||||
|
|
||||||
// TokenChunker uses character-based tokenization (1 char = 1 token)
|
// TokenChunker uses character-based tokenization (1 char = 1 token)
|
||||||
|
|
@ -227,7 +287,8 @@ export class RagService {
|
||||||
|
|
||||||
const ollamaClient = await this.ollamaService.getClient()
|
const ollamaClient = await this.ollamaService.getClient()
|
||||||
|
|
||||||
const embeddings: number[][] = []
|
// Prepare all chunk texts with prefix and truncation
|
||||||
|
const prefixedChunks: string[] = []
|
||||||
for (let i = 0; i < chunks.length; i++) {
|
for (let i = 0; i < chunks.length; i++) {
|
||||||
let chunkText = chunks[i]
|
let chunkText = chunks[i]
|
||||||
|
|
||||||
|
|
@ -237,7 +298,6 @@ export class RagService {
|
||||||
const estimatedTokens = this.estimateTokenCount(withPrefix)
|
const estimatedTokens = this.estimateTokenCount(withPrefix)
|
||||||
|
|
||||||
if (estimatedTokens > RagService.MAX_SAFE_TOKENS) {
|
if (estimatedTokens > RagService.MAX_SAFE_TOKENS) {
|
||||||
// This should be rare - log for debugging if it's occurring frequently
|
|
||||||
const prefixTokens = this.estimateTokenCount(prefixText)
|
const prefixTokens = this.estimateTokenCount(prefixText)
|
||||||
const maxTokensForText = RagService.MAX_SAFE_TOKENS - prefixTokens
|
const maxTokensForText = RagService.MAX_SAFE_TOKENS - prefixTokens
|
||||||
logger.warn(
|
logger.warn(
|
||||||
|
|
@ -246,17 +306,30 @@ export class RagService {
|
||||||
chunkText = this.truncateToTokenLimit(chunkText, maxTokensForText)
|
chunkText = this.truncateToTokenLimit(chunkText, maxTokensForText)
|
||||||
}
|
}
|
||||||
|
|
||||||
logger.debug(`[RAG] Generating embedding for chunk ${i + 1}/${chunks.length}`)
|
prefixedChunks.push(RagService.SEARCH_DOCUMENT_PREFIX + chunkText)
|
||||||
|
}
|
||||||
|
|
||||||
const response = await ollamaClient.embeddings({
|
// Batch embed chunks for performance
|
||||||
|
const embeddings: number[][] = []
|
||||||
|
const batchSize = RagService.EMBEDDING_BATCH_SIZE
|
||||||
|
const totalBatches = Math.ceil(prefixedChunks.length / batchSize)
|
||||||
|
|
||||||
|
for (let batchIdx = 0; batchIdx < totalBatches; batchIdx++) {
|
||||||
|
const batchStart = batchIdx * batchSize
|
||||||
|
const batch = prefixedChunks.slice(batchStart, batchStart + batchSize)
|
||||||
|
|
||||||
|
logger.debug(`[RAG] Embedding batch ${batchIdx + 1}/${totalBatches} (${batch.length} chunks)`)
|
||||||
|
|
||||||
|
const response = await ollamaClient.embed({
|
||||||
model: RagService.EMBEDDING_MODEL,
|
model: RagService.EMBEDDING_MODEL,
|
||||||
prompt: RagService.SEARCH_DOCUMENT_PREFIX + chunkText,
|
input: batch,
|
||||||
})
|
})
|
||||||
|
|
||||||
embeddings.push(response.embedding)
|
embeddings.push(...response.embeddings)
|
||||||
|
|
||||||
if (onProgress) {
|
if (onProgress) {
|
||||||
await onProgress(((i + 1) / chunks.length) * 100)
|
const progress = ((batchStart + batch.length) / prefixedChunks.length) * 100
|
||||||
|
await onProgress(progress)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -395,14 +468,7 @@ export class RagService {
|
||||||
deleteAfterEmbedding: boolean,
|
deleteAfterEmbedding: boolean,
|
||||||
batchOffset?: number,
|
batchOffset?: number,
|
||||||
onProgress?: (percent: number) => Promise<void>
|
onProgress?: (percent: number) => Promise<void>
|
||||||
): Promise<{
|
): Promise<ProcessZIMFileResponse> {
|
||||||
success: boolean
|
|
||||||
message: string
|
|
||||||
chunks?: number
|
|
||||||
hasMoreBatches?: boolean
|
|
||||||
articlesProcessed?: number
|
|
||||||
totalArticles?: number
|
|
||||||
}> {
|
|
||||||
const zimExtractionService = new ZIMExtractionService()
|
const zimExtractionService = new ZIMExtractionService()
|
||||||
|
|
||||||
// Process in batches to avoid lock timeout
|
// Process in batches to avoid lock timeout
|
||||||
|
|
@ -540,14 +606,7 @@ export class RagService {
|
||||||
deleteAfterEmbedding: boolean = false,
|
deleteAfterEmbedding: boolean = false,
|
||||||
batchOffset?: number,
|
batchOffset?: number,
|
||||||
onProgress?: (percent: number) => Promise<void>
|
onProgress?: (percent: number) => Promise<void>
|
||||||
): Promise<{
|
): Promise<ProcessAndEmbedFileResponse> {
|
||||||
success: boolean
|
|
||||||
message: string
|
|
||||||
chunks?: number
|
|
||||||
hasMoreBatches?: boolean
|
|
||||||
articlesProcessed?: number
|
|
||||||
totalArticles?: number
|
|
||||||
}> {
|
|
||||||
try {
|
try {
|
||||||
const fileType = determineFileType(filepath)
|
const fileType = determineFileType(filepath)
|
||||||
logger.debug(`[RAG] Processing file: ${filepath} (detected type: ${fileType})`)
|
logger.debug(`[RAG] Processing file: ${filepath} (detected type: ${fileType})`)
|
||||||
|
|
@ -631,14 +690,18 @@ export class RagService {
|
||||||
return []
|
return []
|
||||||
}
|
}
|
||||||
|
|
||||||
const allModels = await this.ollamaService.getModels(true)
|
if (!this.embeddingModelVerified) {
|
||||||
const embeddingModel = allModels.find((model) => model.name === RagService.EMBEDDING_MODEL)
|
const allModels = await this.ollamaService.getModels(true)
|
||||||
|
const embeddingModel = allModels.find((model) => model.name === RagService.EMBEDDING_MODEL)
|
||||||
|
|
||||||
if (!embeddingModel) {
|
if (!embeddingModel) {
|
||||||
logger.warn(
|
logger.warn(
|
||||||
`[RAG] ${RagService.EMBEDDING_MODEL} not found. Cannot perform similarity search.`
|
`[RAG] ${RagService.EMBEDDING_MODEL} not found. Cannot perform similarity search.`
|
||||||
)
|
)
|
||||||
return []
|
this.embeddingModelVerified = false
|
||||||
|
return []
|
||||||
|
}
|
||||||
|
this.embeddingModelVerified = true
|
||||||
}
|
}
|
||||||
|
|
||||||
// Preprocess query for better matching
|
// Preprocess query for better matching
|
||||||
|
|
@ -666,9 +729,9 @@ export class RagService {
|
||||||
return []
|
return []
|
||||||
}
|
}
|
||||||
|
|
||||||
const response = await ollamaClient.embeddings({
|
const response = await ollamaClient.embed({
|
||||||
model: RagService.EMBEDDING_MODEL,
|
model: RagService.EMBEDDING_MODEL,
|
||||||
prompt: prefixedQuery,
|
input: [prefixedQuery],
|
||||||
})
|
})
|
||||||
|
|
||||||
// Perform semantic search with a higher limit to enable reranking
|
// Perform semantic search with a higher limit to enable reranking
|
||||||
|
|
@ -678,7 +741,7 @@ export class RagService {
|
||||||
)
|
)
|
||||||
|
|
||||||
const searchResults = await this.qdrant!.search(RagService.CONTENT_COLLECTION_NAME, {
|
const searchResults = await this.qdrant!.search(RagService.CONTENT_COLLECTION_NAME, {
|
||||||
vector: response.embedding,
|
vector: response.embeddings[0],
|
||||||
limit: searchLimit,
|
limit: searchLimit,
|
||||||
score_threshold: scoreThreshold,
|
score_threshold: scoreThreshold,
|
||||||
with_payload: true,
|
with_payload: true,
|
||||||
|
|
@ -687,7 +750,7 @@ export class RagService {
|
||||||
logger.debug(`[RAG] Found ${searchResults.length} results above threshold ${scoreThreshold}`)
|
logger.debug(`[RAG] Found ${searchResults.length} results above threshold ${scoreThreshold}`)
|
||||||
|
|
||||||
// Map results with metadata for reranking
|
// Map results with metadata for reranking
|
||||||
const resultsWithMetadata = searchResults.map((result) => ({
|
const resultsWithMetadata: RAGResult[] = searchResults.map((result) => ({
|
||||||
text: (result.payload?.text as string) || '',
|
text: (result.payload?.text as string) || '',
|
||||||
score: result.score,
|
score: result.score,
|
||||||
keywords: (result.payload?.keywords as string) || '',
|
keywords: (result.payload?.keywords as string) || '',
|
||||||
|
|
@ -700,6 +763,7 @@ export class RagService {
|
||||||
hierarchy: result.payload?.hierarchy as string | undefined,
|
hierarchy: result.payload?.hierarchy as string | undefined,
|
||||||
document_id: result.payload?.document_id as string | undefined,
|
document_id: result.payload?.document_id as string | undefined,
|
||||||
content_type: result.payload?.content_type as string | undefined,
|
content_type: result.payload?.content_type as string | undefined,
|
||||||
|
source: result.payload?.source as string | undefined,
|
||||||
}))
|
}))
|
||||||
|
|
||||||
const rerankedResults = this.rerankResults(resultsWithMetadata, keywords, query)
|
const rerankedResults = this.rerankResults(resultsWithMetadata, keywords, query)
|
||||||
|
|
@ -711,8 +775,11 @@ export class RagService {
|
||||||
)
|
)
|
||||||
})
|
})
|
||||||
|
|
||||||
|
// Apply source diversity penalty to avoid all results from the same document
|
||||||
|
const diverseResults = this.applySourceDiversity(rerankedResults)
|
||||||
|
|
||||||
// Return top N results with enhanced metadata
|
// Return top N results with enhanced metadata
|
||||||
return rerankedResults.slice(0, limit).map((result) => ({
|
return diverseResults.slice(0, limit).map((result) => ({
|
||||||
text: result.text,
|
text: result.text,
|
||||||
score: result.finalScore,
|
score: result.finalScore,
|
||||||
metadata: {
|
metadata: {
|
||||||
|
|
@ -748,34 +815,10 @@ export class RagService {
|
||||||
* outweigh the overhead.
|
* outweigh the overhead.
|
||||||
*/
|
*/
|
||||||
private rerankResults(
|
private rerankResults(
|
||||||
results: Array<{
|
results: Array<RAGResult>,
|
||||||
text: string
|
|
||||||
score: number
|
|
||||||
keywords: string
|
|
||||||
chunk_index: number
|
|
||||||
created_at: number
|
|
||||||
article_title?: string
|
|
||||||
section_title?: string
|
|
||||||
full_title?: string
|
|
||||||
hierarchy?: string
|
|
||||||
document_id?: string
|
|
||||||
content_type?: string
|
|
||||||
}>,
|
|
||||||
queryKeywords: string[],
|
queryKeywords: string[],
|
||||||
originalQuery: string
|
originalQuery: string
|
||||||
): Array<{
|
): Array<RerankedRAGResult> {
|
||||||
text: string
|
|
||||||
score: number
|
|
||||||
finalScore: number
|
|
||||||
chunk_index: number
|
|
||||||
created_at: number
|
|
||||||
article_title?: string
|
|
||||||
section_title?: string
|
|
||||||
full_title?: string
|
|
||||||
hierarchy?: string
|
|
||||||
document_id?: string
|
|
||||||
content_type?: string
|
|
||||||
}> {
|
|
||||||
return results
|
return results
|
||||||
.map((result) => {
|
.map((result) => {
|
||||||
let finalScore = result.score
|
let finalScore = result.score
|
||||||
|
|
@ -851,6 +894,37 @@ export class RagService {
|
||||||
.sort((a, b) => b.finalScore - a.finalScore)
|
.sort((a, b) => b.finalScore - a.finalScore)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Applies a diversity penalty so results from the same source are down-weighted.
|
||||||
|
* Uses greedy selection: for each result, apply 0.85^n penalty where n is the
|
||||||
|
* number of results already selected from the same source.
|
||||||
|
*/
|
||||||
|
private applySourceDiversity(
|
||||||
|
results: Array<RerankedRAGResult>
|
||||||
|
) {
|
||||||
|
const sourceCounts = new Map<string, number>()
|
||||||
|
const DIVERSITY_PENALTY = 0.85
|
||||||
|
|
||||||
|
return results
|
||||||
|
.map((result) => {
|
||||||
|
const sourceKey = result.document_id || result.source || 'unknown'
|
||||||
|
const count = sourceCounts.get(sourceKey) || 0
|
||||||
|
const penalty = Math.pow(DIVERSITY_PENALTY, count)
|
||||||
|
const diverseScore = result.finalScore * penalty
|
||||||
|
|
||||||
|
sourceCounts.set(sourceKey, count + 1)
|
||||||
|
|
||||||
|
if (count > 0) {
|
||||||
|
logger.debug(
|
||||||
|
`[RAG] Source diversity penalty for "${sourceKey}": ${result.finalScore.toFixed(4)} → ${diverseScore.toFixed(4)} (seen ${count}x)`
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
return { ...result, finalScore: diverseScore }
|
||||||
|
})
|
||||||
|
.sort((a, b) => b.finalScore - a.finalScore)
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Retrieve all unique source files that have been stored in the knowledge base.
|
* Retrieve all unique source files that have been stored in the knowledge base.
|
||||||
* @returns Array of unique full source paths
|
* @returns Array of unique full source paths
|
||||||
|
|
@ -866,12 +940,12 @@ export class RagService {
|
||||||
let offset: string | number | null | Record<string, unknown> = null
|
let offset: string | number | null | Record<string, unknown> = null
|
||||||
const batchSize = 100
|
const batchSize = 100
|
||||||
|
|
||||||
// Scroll through all points in the collection
|
// Scroll through all points in the collection (only fetch source field)
|
||||||
do {
|
do {
|
||||||
const scrollResult = await this.qdrant!.scroll(RagService.CONTENT_COLLECTION_NAME, {
|
const scrollResult = await this.qdrant!.scroll(RagService.CONTENT_COLLECTION_NAME, {
|
||||||
limit: batchSize,
|
limit: batchSize,
|
||||||
offset: offset,
|
offset: offset,
|
||||||
with_payload: true,
|
with_payload: ['source'],
|
||||||
with_vector: false,
|
with_vector: false,
|
||||||
})
|
})
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -64,6 +64,16 @@ export const FALLBACK_RECOMMENDED_OLLAMA_MODELS: NomadOllamaModel[] = [
|
||||||
|
|
||||||
export const DEFAULT_QUERY_REWRITE_MODEL = 'qwen2.5:3b' // default to qwen2.5 for query rewriting with good balance of text task performance and resource usage
|
export const DEFAULT_QUERY_REWRITE_MODEL = 'qwen2.5:3b' // default to qwen2.5 for query rewriting with good balance of text task performance and resource usage
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Adaptive RAG context limits based on model size.
|
||||||
|
* Smaller models get overwhelmed with too much context, so we cap it.
|
||||||
|
*/
|
||||||
|
export const RAG_CONTEXT_LIMITS: { maxParams: number; maxResults: number; maxTokens: number }[] = [
|
||||||
|
{ maxParams: 3, maxResults: 2, maxTokens: 1000 }, // 1-3B models
|
||||||
|
{ maxParams: 8, maxResults: 4, maxTokens: 2500 }, // 4-8B models
|
||||||
|
{ maxParams: Infinity, maxResults: 5, maxTokens: 0 }, // 13B+ (no cap)
|
||||||
|
]
|
||||||
|
|
||||||
export const SYSTEM_PROMPTS = {
|
export const SYSTEM_PROMPTS = {
|
||||||
default: `
|
default: `
|
||||||
Format all responses using markdown for better readability. Vanilla markdown or GitHub-flavored markdown is preferred.
|
Format all responses using markdown for better readability. Vanilla markdown or GitHub-flavored markdown is preferred.
|
||||||
|
|
@ -113,7 +123,7 @@ Ensure that your suggestions are comma-seperated with no conjunctions like "and"
|
||||||
Do not use line breaks, new lines, or extra spacing to separate the suggestions.
|
Do not use line breaks, new lines, or extra spacing to separate the suggestions.
|
||||||
Format: suggestion1, suggestion2, suggestion3
|
Format: suggestion1, suggestion2, suggestion3
|
||||||
`,
|
`,
|
||||||
title_generation: `You are a title generator. Given the start of a conversation, generate a concise, descriptive title under 60 characters. Return ONLY the title text with no quotes, punctuation wrapping, or extra formatting.`,
|
title_generation: `You are a title generator. Given the start of a conversation, generate a concise, descriptive title under 50 characters. Return ONLY the title text with no quotes, punctuation wrapping, or extra formatting.`,
|
||||||
query_rewrite: `
|
query_rewrite: `
|
||||||
You are a query rewriting assistant. Your task is to reformulate the user's latest question to include relevant context from the conversation history.
|
You are a query rewriting assistant. Your task is to reformulate the user's latest question to include relevant context from the conversation history.
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -4,13 +4,16 @@
|
||||||
|
|
||||||
### Features
|
### Features
|
||||||
- **AI Assistant**: Added improved user guidance for troubleshooting GPU pass-through issues
|
- **AI Assistant**: Added improved user guidance for troubleshooting GPU pass-through issues
|
||||||
|
- **AI Assistant**: The last used model is now automatically selected when a new chat is started
|
||||||
- **Settings**: Nomad now automatically performs nightly checks for available app updates, and users can select and apply updates from the Apps page in Settings
|
- **Settings**: Nomad now automatically performs nightly checks for available app updates, and users can select and apply updates from the Apps page in Settings
|
||||||
|
|
||||||
### Bug Fixes
|
### Bug Fixes
|
||||||
- **Settings**: Fixed an issue where the AI Assistant settings page would be shown in navigation even if the AI Assistant was not installed, thus causing 404 errors when clicked
|
- **Settings**: Fixed an issue where the AI Assistant settings page would be shown in navigation even if the AI Assistant was not installed, thus causing 404 errors when clicked
|
||||||
- **Security**: Path traversal and SSRF mitigations
|
- **Security**: Path traversal and SSRF mitigations
|
||||||
|
- **AI Assistant**: Fixed an issue that was causing intermittent failures saving chat session titles
|
||||||
|
|
||||||
### Improvements
|
### Improvements
|
||||||
|
- **AI Assistant**: Extensive performance improvements and improved RAG intelligence/context usage
|
||||||
|
|
||||||
## Version 1.28.0 - March 5, 2026
|
## Version 1.28.0 - March 5, 2026
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -5,3 +5,32 @@ export type EmbedJobWithProgress = {
|
||||||
progress: number
|
progress: number
|
||||||
status: string
|
status: string
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export type ProcessAndEmbedFileResponse = {
|
||||||
|
success: boolean
|
||||||
|
message: string
|
||||||
|
chunks?: number
|
||||||
|
hasMoreBatches?: boolean
|
||||||
|
articlesProcessed?: number
|
||||||
|
totalArticles?: number
|
||||||
|
}
|
||||||
|
export type ProcessZIMFileResponse = ProcessAndEmbedFileResponse
|
||||||
|
|
||||||
|
export type RAGResult = {
|
||||||
|
text: string
|
||||||
|
score: number
|
||||||
|
keywords: string
|
||||||
|
chunk_index: number
|
||||||
|
created_at: number
|
||||||
|
article_title?: string
|
||||||
|
section_title?: string
|
||||||
|
full_title?: string
|
||||||
|
hierarchy?: string
|
||||||
|
document_id?: string
|
||||||
|
content_type?: string
|
||||||
|
source?: string
|
||||||
|
}
|
||||||
|
|
||||||
|
export type RerankedRAGResult = Omit<RAGResult, 'keywords'> & {
|
||||||
|
finalScore: number
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue
Block a user