mirror of
https://github.com/Crosstalk-Solutions/project-nomad.git
synced 2026-03-28 03:29:25 +01:00
feat(RAG): initial beta with preprocessing, embedding, semantic retrieval, and ctx passage
This commit is contained in:
parent
1923cd4cde
commit
d1f40663d3
|
|
@ -41,16 +41,17 @@ export default class OllamaController {
|
|||
|
||||
if (lastUserMessage) {
|
||||
// Search for relevant context in the knowledge base
|
||||
// Using lower threshold (0.3) with improved hybrid search
|
||||
const relevantDocs = await this.ragService.searchSimilarDocuments(
|
||||
lastUserMessage.content,
|
||||
5, // Retrieve top 5 most relevant chunks
|
||||
0.7 // Minimum similarity score of 0.7
|
||||
0.3 // Minimum similarity score of 0.3 (lowered from 0.7 for better recall)
|
||||
)
|
||||
|
||||
// If relevant context is found, inject as a system message
|
||||
if (relevantDocs.length > 0) {
|
||||
const contextText = relevantDocs
|
||||
.map((doc, idx) => `[Context ${idx + 1}]\n${doc.text}`)
|
||||
.map((doc, idx) => `[Context ${idx + 1}] (Relevance: ${(doc.score * 100).toFixed(1)}%)\n${doc.text}`)
|
||||
.join('\n\n')
|
||||
|
||||
const systemMessage = {
|
||||
|
|
|
|||
|
|
@ -1,9 +1,12 @@
|
|||
import { RagService } from '#services/rag_service'
|
||||
import { EmbedFileJob } from '#jobs/embed_file_job'
|
||||
import { inject } from '@adonisjs/core'
|
||||
import type { HttpContext } from '@adonisjs/core/http'
|
||||
import app from '@adonisjs/core/services/app'
|
||||
import { randomBytes } from 'node:crypto'
|
||||
import { sanitizeFilename } from '../utils/fs.js'
|
||||
import { stat } from 'node:fs/promises'
|
||||
import { getJobStatusSchema } from '#validators/rag'
|
||||
|
||||
@inject()
|
||||
export default class RagController {
|
||||
|
|
@ -19,19 +22,48 @@ export default class RagController {
|
|||
const sanitizedName = sanitizeFilename(uploadedFile.clientName)
|
||||
|
||||
const fileName = `${sanitizedName}-${randomSuffix}.${uploadedFile.extname || 'txt'}`
|
||||
const fullPath = app.makePath('storage/uploads', fileName)
|
||||
const fullPath = app.makePath(RagService.UPLOADS_STORAGE_PATH, fileName)
|
||||
|
||||
await uploadedFile.move(app.makePath('storage/uploads'), {
|
||||
await uploadedFile.move(app.makePath(RagService.UPLOADS_STORAGE_PATH), {
|
||||
name: fileName,
|
||||
})
|
||||
|
||||
// Don't await this - process in background
|
||||
this.ragService.processAndEmbedFile(fullPath)
|
||||
// Get file size for tracking
|
||||
let fileSize: number | undefined = undefined
|
||||
try {
|
||||
const stats = await stat(fullPath)
|
||||
fileSize = stats.size
|
||||
} catch (error) {
|
||||
// Not critical if we can't get file size, just swallow the error
|
||||
}
|
||||
|
||||
return response.status(200).json({
|
||||
message: 'File has been uploaded and queued for processing.',
|
||||
file_path: `/uploads/${fileName}`,
|
||||
// Dispatch background job for embedding
|
||||
const result = await EmbedFileJob.dispatch({
|
||||
filePath: fullPath,
|
||||
fileName,
|
||||
fileSize,
|
||||
})
|
||||
|
||||
return response.status(202).json({
|
||||
message: result.message,
|
||||
jobId: result.jobId,
|
||||
fileName,
|
||||
filePath: `/${RagService.UPLOADS_STORAGE_PATH}/${fileName}`,
|
||||
alreadyProcessing: !result.created,
|
||||
})
|
||||
}
|
||||
|
||||
public async getJobStatus({ request, response }: HttpContext) {
|
||||
const reqData = await request.validateUsing(getJobStatusSchema)
|
||||
|
||||
const fullPath = app.makePath(RagService.UPLOADS_STORAGE_PATH, reqData.filePath)
|
||||
const status = await EmbedFileJob.getStatus(fullPath)
|
||||
|
||||
if (!status.exists) {
|
||||
return response.status(404).json({ error: 'Job not found for this file' })
|
||||
}
|
||||
|
||||
return response.status(200).json(status)
|
||||
}
|
||||
|
||||
public async getStoredFiles({ response }: HttpContext) {
|
||||
|
|
|
|||
161
admin/app/jobs/embed_file_job.ts
Normal file
161
admin/app/jobs/embed_file_job.ts
Normal file
|
|
@ -0,0 +1,161 @@
|
|||
import { Job } from 'bullmq'
|
||||
import { QueueService } from '#services/queue_service'
|
||||
import { RagService } from '#services/rag_service'
|
||||
import { DockerService } from '#services/docker_service'
|
||||
import { OllamaService } from '#services/ollama_service'
|
||||
import { createHash } from 'crypto'
|
||||
import logger from '@adonisjs/core/services/logger'
|
||||
|
||||
export interface EmbedFileJobParams {
|
||||
filePath: string
|
||||
fileName: string
|
||||
fileSize?: number
|
||||
}
|
||||
|
||||
export class EmbedFileJob {
|
||||
static get queue() {
|
||||
return 'file-embeddings'
|
||||
}
|
||||
|
||||
static get key() {
|
||||
return 'embed-file'
|
||||
}
|
||||
|
||||
static getJobId(filePath: string): string {
|
||||
return createHash('sha256').update(filePath).digest('hex').slice(0, 16)
|
||||
}
|
||||
|
||||
async handle(job: Job) {
|
||||
const { filePath, fileName } = job.data as EmbedFileJobParams
|
||||
|
||||
logger.info(`[EmbedFileJob] Starting embedding process for: ${fileName}`)
|
||||
|
||||
const dockerService = new DockerService()
|
||||
const ollamaService = new OllamaService()
|
||||
const ragService = new RagService(dockerService, ollamaService)
|
||||
|
||||
try {
|
||||
// Update progress starting
|
||||
await job.updateProgress(0)
|
||||
await job.updateData({
|
||||
...job.data,
|
||||
status: 'processing',
|
||||
startedAt: Date.now(),
|
||||
})
|
||||
|
||||
logger.info(`[EmbedFileJob] Processing file: ${filePath}`)
|
||||
|
||||
// Process and embed the file
|
||||
const result = await ragService.processAndEmbedFile(filePath)
|
||||
|
||||
if (!result.success) {
|
||||
logger.error(`[EmbedFileJob] Failed to process file ${fileName}: ${result.message}`)
|
||||
throw new Error(result.message)
|
||||
}
|
||||
|
||||
// Update progress complete
|
||||
await job.updateProgress(100)
|
||||
await job.updateData({
|
||||
...job.data,
|
||||
status: 'completed',
|
||||
completedAt: Date.now(),
|
||||
chunks: result.chunks,
|
||||
})
|
||||
|
||||
logger.info(
|
||||
`[EmbedFileJob] Successfully embedded ${result.chunks} chunks from file: ${fileName}`
|
||||
)
|
||||
|
||||
return {
|
||||
success: true,
|
||||
fileName,
|
||||
filePath,
|
||||
chunks: result.chunks,
|
||||
message: `Successfully embedded ${result.chunks} chunks`,
|
||||
}
|
||||
} catch (error) {
|
||||
logger.error(`[EmbedFileJob] Error embedding file ${fileName}:`, error)
|
||||
|
||||
await job.updateData({
|
||||
...job.data,
|
||||
status: 'failed',
|
||||
failedAt: Date.now(),
|
||||
error: error instanceof Error ? error.message : 'Unknown error',
|
||||
})
|
||||
|
||||
throw error
|
||||
}
|
||||
}
|
||||
|
||||
static async getByFilePath(filePath: string): Promise<Job | undefined> {
|
||||
const queueService = new QueueService()
|
||||
const queue = queueService.getQueue(this.queue)
|
||||
const jobId = this.getJobId(filePath)
|
||||
return await queue.getJob(jobId)
|
||||
}
|
||||
|
||||
static async dispatch(params: EmbedFileJobParams) {
|
||||
const queueService = new QueueService()
|
||||
const queue = queueService.getQueue(this.queue)
|
||||
const jobId = this.getJobId(params.filePath)
|
||||
|
||||
try {
|
||||
const job = await queue.add(this.key, params, {
|
||||
jobId,
|
||||
attempts: 3,
|
||||
backoff: {
|
||||
type: 'exponential',
|
||||
delay: 5000, // Delay 5 seconds before retrying
|
||||
},
|
||||
removeOnComplete: { count: 50 }, // Keep last 50 completed jobs for history
|
||||
removeOnFail: { count: 20 } // Keep last 20 failed jobs for debugging
|
||||
})
|
||||
|
||||
logger.info(`[EmbedFileJob] Dispatched embedding job for file: ${params.fileName}`)
|
||||
|
||||
return {
|
||||
job,
|
||||
created: true,
|
||||
jobId,
|
||||
message: `File queued for embedding: ${params.fileName}`,
|
||||
}
|
||||
} catch (error) {
|
||||
if (error.message && error.message.includes('job already exists')) {
|
||||
const existing = await queue.getJob(jobId)
|
||||
logger.info(`[EmbedFileJob] Job already exists for file: ${params.fileName}`)
|
||||
return {
|
||||
job: existing,
|
||||
created: false,
|
||||
jobId,
|
||||
message: `Embedding job already exists for: ${params.fileName}`,
|
||||
}
|
||||
}
|
||||
throw error
|
||||
}
|
||||
}
|
||||
|
||||
static async getStatus(filePath: string): Promise<{
|
||||
exists: boolean
|
||||
status?: string
|
||||
progress?: number
|
||||
chunks?: number
|
||||
error?: string
|
||||
}> {
|
||||
const job = await this.getByFilePath(filePath)
|
||||
|
||||
if (!job) {
|
||||
return { exists: false }
|
||||
}
|
||||
|
||||
const state = await job.getState()
|
||||
const data = job.data
|
||||
|
||||
return {
|
||||
exists: true,
|
||||
status: data.status || state,
|
||||
progress: typeof job.progress === 'number' ? job.progress : undefined,
|
||||
chunks: data.chunks,
|
||||
error: data.error,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -2,28 +2,115 @@ import { QdrantClient } from '@qdrant/js-client-rest'
|
|||
import { DockerService } from './docker_service.js'
|
||||
import { inject } from '@adonisjs/core'
|
||||
import logger from '@adonisjs/core/services/logger'
|
||||
import { chunk } from 'llm-chunk'
|
||||
import { TokenChunker } from '@chonkiejs/core'
|
||||
import sharp from 'sharp'
|
||||
import { determineFileType, getFile } from '../utils/fs.js'
|
||||
import { deleteFileIfExists, determineFileType, getFile } from '../utils/fs.js'
|
||||
import { PDFParse } from 'pdf-parse'
|
||||
import { createWorker } from 'tesseract.js'
|
||||
import { fromBuffer } from 'pdf2pic'
|
||||
import { OllamaService } from './ollama_service.js'
|
||||
import { SERVICE_NAMES } from '../../constants/service_names.js'
|
||||
import { removeStopwords } from 'stopword'
|
||||
import { randomUUID } from 'node:crypto'
|
||||
|
||||
@inject()
|
||||
export class RagService {
|
||||
private qdrant: QdrantClient | null = null
|
||||
private qdrantInitPromise: Promise<void> | null = null
|
||||
public static CONTENT_COLLECTION_NAME = 'open-webui_knowledge' // This is the collection name OWUI uses for uploaded knowledge
|
||||
public static UPLOADS_STORAGE_PATH = 'storage/kb_uploads'
|
||||
public static CONTENT_COLLECTION_NAME = 'nomad_knowledge_base'
|
||||
public static EMBEDDING_MODEL = 'nomic-embed-text:v1.5'
|
||||
public static EMBEDDING_DIMENSION = 768 // Nomic Embed Text v1.5 dimension is 768
|
||||
public static MODEL_CONTEXT_LENGTH = 2048 // nomic-embed-text has 2K token context
|
||||
public static MAX_SAFE_TOKENS = 1800 // Leave buffer for prefix and tokenization variance
|
||||
public static TARGET_TOKENS_PER_CHUNK = 1700 // Target 1700 tokens per chunk for embedding
|
||||
public static PREFIX_TOKEN_BUDGET = 10 // Reserve ~10 tokens for prefixes
|
||||
public static CHAR_TO_TOKEN_RATIO = 3 // Approximate chars per token
|
||||
// Nomic Embed Text v1.5 uses task-specific prefixes for optimal performance
|
||||
public static SEARCH_DOCUMENT_PREFIX = 'search_document: '
|
||||
public static SEARCH_QUERY_PREFIX = 'search_query: '
|
||||
|
||||
constructor(
|
||||
private dockerService: DockerService,
|
||||
private ollamaService: OllamaService
|
||||
) {}
|
||||
|
||||
/**
|
||||
* Estimates token count for text. This is a conservative approximation:
|
||||
* - English text: ~1 token per 3 characters
|
||||
* - Adds buffer for special characters and tokenization variance
|
||||
*
|
||||
* Note: This is approximate and realistic english
|
||||
* tokenization is ~4 chars/token, but we use 3 here to be safe.
|
||||
* Actual tokenization may differ, but being
|
||||
* conservative prevents context length errors.
|
||||
*/
|
||||
private estimateTokenCount(text: string): number {
|
||||
// This accounts for special characters, numbers, and punctuation
|
||||
return Math.ceil(text.length / RagService.CHAR_TO_TOKEN_RATIO)
|
||||
}
|
||||
|
||||
/**
|
||||
* Truncates text to fit within token limit, preserving word boundaries.
|
||||
* Ensures the text + prefix won't exceed the model's context window.
|
||||
*/
|
||||
private truncateToTokenLimit(text: string, maxTokens: number): string {
|
||||
const estimatedTokens = this.estimateTokenCount(text)
|
||||
|
||||
if (estimatedTokens <= maxTokens) {
|
||||
return text
|
||||
}
|
||||
|
||||
// Calculate how many characters we can keep using our ratio
|
||||
const maxChars = Math.floor(maxTokens * RagService.CHAR_TO_TOKEN_RATIO)
|
||||
|
||||
// Truncate at word boundary
|
||||
let truncated = text.substring(0, maxChars)
|
||||
const lastSpace = truncated.lastIndexOf(' ')
|
||||
|
||||
if (lastSpace > maxChars * 0.8) {
|
||||
// If we found a space in the last 20%, use it
|
||||
truncated = truncated.substring(0, lastSpace)
|
||||
}
|
||||
|
||||
logger.warn(
|
||||
`[RAG] Truncated text from ${text.length} to ${truncated.length} chars (est. ${estimatedTokens} → ${this.estimateTokenCount(truncated)} tokens)`
|
||||
)
|
||||
|
||||
return truncated
|
||||
}
|
||||
|
||||
/**
|
||||
* Preprocesses a query to improve retrieval by expanding it with context.
|
||||
* This helps match documents even when using different terminology.
|
||||
*/
|
||||
private preprocessQuery(query: string): string {
|
||||
// Future: this is a placeholder for more advanced query expansion techniques.
|
||||
// For now, we simply trim whitespace. Improvements could include:
|
||||
// - Synonym expansion using a thesaurus
|
||||
// - Adding related terms based on domain knowledge
|
||||
// - Using a language model to rephrase or elaborate the query
|
||||
const expanded = query.trim()
|
||||
logger.debug(`[RAG] Original query: "${query}"`)
|
||||
logger.debug(`[RAG] Preprocessed query: "${expanded}"`)
|
||||
return expanded
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract keywords from query for hybrid search
|
||||
*/
|
||||
private extractKeywords(query: string): string[] {
|
||||
const split = query.split(' ')
|
||||
const noStopWords = removeStopwords(split)
|
||||
|
||||
// Future: This is basic normalization, could be improved with stemming/lemmatization later
|
||||
const keywords = noStopWords
|
||||
.map((word) => word.replace(/[^\w]/g, '').toLowerCase())
|
||||
.filter((word) => word.length > 2)
|
||||
|
||||
return [...new Set(keywords)]
|
||||
}
|
||||
|
||||
private async _initializeQdrantClient() {
|
||||
if (!this.qdrantInitPromise) {
|
||||
this.qdrantInitPromise = (async () => {
|
||||
|
|
@ -84,43 +171,87 @@ export class RagService {
|
|||
throw new Error(`${RagService.EMBEDDING_MODEL} does not exist and could not be downloaded.`)
|
||||
}
|
||||
|
||||
const chunks = chunk(text, {
|
||||
// These settings should provide a good balance between context and precision
|
||||
minLength: 512,
|
||||
maxLength: 1024,
|
||||
overlap: 200,
|
||||
// TokenChunker uses character-based tokenization (1 char = 1 token)
|
||||
// We need to convert our embedding model's token counts to character counts
|
||||
// since nomic-embed-text tokenizer uses ~3 chars per token
|
||||
const targetCharsPerChunk = Math.floor(RagService.TARGET_TOKENS_PER_CHUNK * RagService.CHAR_TO_TOKEN_RATIO)
|
||||
const overlapChars = Math.floor(150 * RagService.CHAR_TO_TOKEN_RATIO)
|
||||
|
||||
const chunker = await TokenChunker.create({
|
||||
chunkSize: targetCharsPerChunk,
|
||||
chunkOverlap: overlapChars,
|
||||
})
|
||||
|
||||
if (!chunks || chunks.length === 0) {
|
||||
const chunkResults = await chunker.chunk(text)
|
||||
|
||||
if (!chunkResults || chunkResults.length === 0) {
|
||||
throw new Error('No text chunks generated for embedding.')
|
||||
}
|
||||
|
||||
const embeddings: number[][] = []
|
||||
// Extract text from chunk results
|
||||
const chunks = chunkResults.map((chunk) => chunk.text)
|
||||
|
||||
const ollamaClient = await this.ollamaService.getClient()
|
||||
for (const chunkText of chunks) {
|
||||
|
||||
const embeddings: number[][] = []
|
||||
for (let i = 0; i < chunks.length; i++) {
|
||||
let chunkText = chunks[i]
|
||||
|
||||
// Final safety check: ensure chunk + prefix fits
|
||||
const prefixText = RagService.SEARCH_DOCUMENT_PREFIX
|
||||
const withPrefix = prefixText + chunkText
|
||||
const estimatedTokens = this.estimateTokenCount(withPrefix)
|
||||
|
||||
if (estimatedTokens > RagService.MAX_SAFE_TOKENS) {
|
||||
// This should be rare - log for debugging if it's occurring frequently
|
||||
const prefixTokens = this.estimateTokenCount(prefixText)
|
||||
const maxTokensForText = RagService.MAX_SAFE_TOKENS - prefixTokens
|
||||
logger.warn(
|
||||
`[RAG] Chunk ${i} estimated at ${estimatedTokens} tokens (${chunkText.length} chars), truncating to ${maxTokensForText} tokens`
|
||||
)
|
||||
chunkText = this.truncateToTokenLimit(chunkText, maxTokensForText)
|
||||
}
|
||||
|
||||
logger.debug(`[RAG] Generating embedding for chunk ${i + 1}/${chunks.length}`)
|
||||
|
||||
const response = await ollamaClient.embeddings({
|
||||
model: RagService.EMBEDDING_MODEL,
|
||||
prompt: chunkText,
|
||||
prompt: RagService.SEARCH_DOCUMENT_PREFIX + chunkText,
|
||||
})
|
||||
|
||||
embeddings.push(response.embedding)
|
||||
}
|
||||
|
||||
const points = chunks.map((chunkText, index) => ({
|
||||
id: `${Date.now()}_${index}`,
|
||||
const timestamp = Date.now()
|
||||
const points = chunks.map((chunkText, index) => {
|
||||
// Extract keywords for hybrid search
|
||||
const keywords = this.extractKeywords(chunkText)
|
||||
logger.debug(`[RAG] Extracted keywords for chunk ${index}: [${keywords.join(', ')}]`)
|
||||
return {
|
||||
id: randomUUID(), // qdrant requires either uuid or unsigned int
|
||||
vector: embeddings[index],
|
||||
payload: {
|
||||
...metadata,
|
||||
text: chunkText,
|
||||
chunk_index: index,
|
||||
total_chunks: chunks.length,
|
||||
keywords: keywords.join(' '), // Store as space-separated string for text search
|
||||
char_count: chunkText.length,
|
||||
created_at: timestamp,
|
||||
source: metadata.source || 'unknown'
|
||||
},
|
||||
}))
|
||||
}
|
||||
})
|
||||
|
||||
await this.qdrant!.upsert(RagService.CONTENT_COLLECTION_NAME, { points })
|
||||
|
||||
logger.debug(`[RAG] Successfully embedded and stored ${chunks.length} chunks`)
|
||||
logger.debug(`[RAG] First chunk preview: "${chunks[0].substring(0, 100)}..."`)
|
||||
|
||||
return { chunks: chunks.length }
|
||||
} catch (error) {
|
||||
logger.error('Error embedding text:', error)
|
||||
console.error(error)
|
||||
logger.error('[RAG] Error embedding text:', error)
|
||||
return null
|
||||
}
|
||||
}
|
||||
|
|
@ -195,7 +326,7 @@ export class RagService {
|
|||
* This includes text extraction, chunking, embedding, and storing in Qdrant.
|
||||
*/
|
||||
public async processAndEmbedFile(
|
||||
filepath: string
|
||||
filepath: string // Should already be the full path to the uploaded file
|
||||
): Promise<{ success: boolean; message: string; chunks?: number }> {
|
||||
try {
|
||||
const fileType = determineFileType(filepath)
|
||||
|
|
@ -233,7 +364,13 @@ export class RagService {
|
|||
return { success: false, message: 'No text could be extracted from the file.' }
|
||||
}
|
||||
|
||||
const embedResult = await this.embedAndStoreText(extractedText, {})
|
||||
const embedResult = await this.embedAndStoreText(extractedText, {
|
||||
source: filepath
|
||||
})
|
||||
|
||||
// Cleanup the file from disk
|
||||
logger.info(`[RAG] Embedding complete, deleting uploaded file: ${filepath}`)
|
||||
await deleteFileIfExists(filepath)
|
||||
|
||||
return {
|
||||
success: true,
|
||||
|
|
@ -248,60 +385,230 @@ export class RagService {
|
|||
|
||||
/**
|
||||
* Search for documents similar to the query text in the Qdrant knowledge base.
|
||||
* Returns the most relevant text chunks based on semantic similarity.
|
||||
* Uses a hybrid approach combining semantic similarity and keyword matching.
|
||||
* Implements adaptive thresholds and result reranking for optimal retrieval.
|
||||
* @param query - The search query text
|
||||
* @param limit - Maximum number of results to return (default: 5)
|
||||
* @param scoreThreshold - Minimum similarity score threshold (default: 0.7)
|
||||
* @param scoreThreshold - Minimum similarity score threshold (default: 0.3, much lower than before)
|
||||
* @returns Array of relevant text chunks with their scores
|
||||
*/
|
||||
public async searchSimilarDocuments(
|
||||
query: string,
|
||||
limit: number = 5,
|
||||
scoreThreshold: number = 0.7
|
||||
): Promise<Array<{ text: string; score: number }>> {
|
||||
scoreThreshold: number = 0.3 // Lower default threshold - was 0.7, now 0.3
|
||||
): Promise<Array<{ text: string; score: number; metadata?: Record<string, any> }>> {
|
||||
try {
|
||||
logger.debug(`[RAG] Starting similarity search for query: "${query}"`)
|
||||
|
||||
await this._ensureCollection(
|
||||
RagService.CONTENT_COLLECTION_NAME,
|
||||
RagService.EMBEDDING_DIMENSION
|
||||
)
|
||||
|
||||
// Check if collection has any points
|
||||
const collectionInfo = await this.qdrant!.getCollection(RagService.CONTENT_COLLECTION_NAME)
|
||||
const pointCount = collectionInfo.points_count || 0
|
||||
logger.debug(`[RAG] Knowledge base contains ${pointCount} document chunks`)
|
||||
|
||||
if (pointCount === 0) {
|
||||
logger.debug('[RAG] Knowledge base is empty. Could not perform search.')
|
||||
return []
|
||||
}
|
||||
|
||||
const allModels = await this.ollamaService.getModels(true)
|
||||
const embeddingModel = allModels.find((model) => model.name === RagService.EMBEDDING_MODEL)
|
||||
|
||||
if (!embeddingModel) {
|
||||
logger.warn(
|
||||
`${RagService.EMBEDDING_MODEL} not found. Cannot perform similarity search.`
|
||||
`[RAG] ${RagService.EMBEDDING_MODEL} not found. Cannot perform similarity search.`
|
||||
)
|
||||
return []
|
||||
}
|
||||
|
||||
// Generate embedding for the query
|
||||
// Preprocess query for better matching
|
||||
const processedQuery = this.preprocessQuery(query)
|
||||
const keywords = this.extractKeywords(processedQuery)
|
||||
logger.debug(`[RAG] Extracted keywords: [${keywords.join(', ')}]`)
|
||||
|
||||
// Generate embedding for the query with search_query prefix
|
||||
const ollamaClient = await this.ollamaService.getClient()
|
||||
|
||||
// Ensure query doesn't exceed token limit
|
||||
const prefixTokens = this.estimateTokenCount(RagService.SEARCH_QUERY_PREFIX)
|
||||
const maxQueryTokens = RagService.MAX_SAFE_TOKENS - prefixTokens
|
||||
const truncatedQuery = this.truncateToTokenLimit(processedQuery, maxQueryTokens)
|
||||
|
||||
const prefixedQuery = RagService.SEARCH_QUERY_PREFIX + truncatedQuery
|
||||
logger.debug(`[RAG] Generating embedding with prefix: "${RagService.SEARCH_QUERY_PREFIX}"`)
|
||||
|
||||
// Validate final token count
|
||||
const queryTokenCount = this.estimateTokenCount(prefixedQuery)
|
||||
if (queryTokenCount > RagService.MAX_SAFE_TOKENS) {
|
||||
logger.error(
|
||||
`[RAG] Query too long even after truncation: ${queryTokenCount} tokens (max: ${RagService.MAX_SAFE_TOKENS})`
|
||||
)
|
||||
return []
|
||||
}
|
||||
|
||||
const response = await ollamaClient.embeddings({
|
||||
model: RagService.EMBEDDING_MODEL,
|
||||
prompt: query,
|
||||
prompt: prefixedQuery,
|
||||
})
|
||||
|
||||
// Search for similar vectors in Qdrant
|
||||
// Perform semantic search with a higher limit to enable reranking
|
||||
const searchLimit = limit * 3 // Get more results for reranking
|
||||
logger.debug(
|
||||
`[RAG] Searching for top ${searchLimit} semantic matches (threshold: ${scoreThreshold})`
|
||||
)
|
||||
|
||||
const searchResults = await this.qdrant!.search(RagService.CONTENT_COLLECTION_NAME, {
|
||||
vector: response.embedding,
|
||||
limit: limit,
|
||||
limit: searchLimit,
|
||||
score_threshold: scoreThreshold,
|
||||
with_payload: true,
|
||||
})
|
||||
|
||||
console.log("Got search results:", searchResults);
|
||||
logger.debug(`[RAG] Found ${searchResults.length} results above threshold ${scoreThreshold}`)
|
||||
|
||||
return searchResults.map((result) => ({
|
||||
// Map results with metadata for reranking
|
||||
const resultsWithMetadata = searchResults.map((result) => ({
|
||||
text: (result.payload?.text as string) || '',
|
||||
score: result.score,
|
||||
keywords: (result.payload?.keywords as string) || '',
|
||||
chunk_index: (result.payload?.chunk_index as number) || 0,
|
||||
created_at: (result.payload?.created_at as number) || 0,
|
||||
}))
|
||||
|
||||
const rerankedResults = this.rerankResults(resultsWithMetadata, keywords, query)
|
||||
|
||||
logger.debug(`[RAG] Top 3 results after reranking:`)
|
||||
rerankedResults.slice(0, 3).forEach((result, idx) => {
|
||||
logger.debug(
|
||||
`[RAG] ${idx + 1}. Score: ${result.finalScore.toFixed(4)} (semantic: ${result.score.toFixed(4)}) - "${result.text.substring(0, 100)}..."`
|
||||
)
|
||||
})
|
||||
|
||||
// Return top N results
|
||||
return rerankedResults.slice(0, limit).map((result) => ({
|
||||
text: result.text,
|
||||
score: result.finalScore,
|
||||
metadata: {
|
||||
chunk_index: result.chunk_index,
|
||||
created_at: result.created_at,
|
||||
semantic_score: result.score,
|
||||
},
|
||||
}))
|
||||
} catch (error) {
|
||||
logger.error('Error searching similar documents:', error)
|
||||
logger.error('[RAG] Error searching similar documents:', error)
|
||||
return []
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Rerank search results using hybrid scoring that combines:
|
||||
* 1. Semantic similarity score (primary signal)
|
||||
* 2. Keyword overlap bonus (conservative, quality-gated)
|
||||
* 3. Direct term matches (conservative)
|
||||
*
|
||||
* Tries to boost only already-relevant results, not promote
|
||||
* low-quality results just because they have keyword matches.
|
||||
*
|
||||
* Future: this is a decent feature-based approach, but we could
|
||||
* switch to a python-based reranker in the future if the benefits
|
||||
* outweigh the overhead.
|
||||
*/
|
||||
private rerankResults(
|
||||
results: Array<{
|
||||
text: string
|
||||
score: number
|
||||
keywords: string
|
||||
chunk_index: number
|
||||
created_at: number
|
||||
}>,
|
||||
queryKeywords: string[],
|
||||
originalQuery: string
|
||||
): Array<{
|
||||
text: string
|
||||
score: number
|
||||
finalScore: number
|
||||
chunk_index: number
|
||||
created_at: number
|
||||
}> {
|
||||
return results
|
||||
.map((result) => {
|
||||
let finalScore = result.score
|
||||
|
||||
// Quality gate: Only apply boosts if semantic score is reasonable
|
||||
// Try to prevent promoting irrelevant results that just happen to have keyword matches
|
||||
const MIN_SEMANTIC_THRESHOLD = 0.35
|
||||
|
||||
if (result.score < MIN_SEMANTIC_THRESHOLD) {
|
||||
// For low-scoring results, use semantic score as-is
|
||||
// This prevents false positives from keyword gaming
|
||||
logger.debug(
|
||||
`[RAG] Skipping boost for low semantic score: ${result.score.toFixed(3)} (threshold: ${MIN_SEMANTIC_THRESHOLD})`
|
||||
)
|
||||
return {
|
||||
...result,
|
||||
finalScore,
|
||||
}
|
||||
}
|
||||
|
||||
// Boost score based on keyword overlap (diminishing returns - overlap goes down, so does boost)
|
||||
const docKeywords = result.keywords
|
||||
.toLowerCase()
|
||||
.split(' ')
|
||||
.filter((k) => k.length > 0)
|
||||
const matchingKeywords = queryKeywords.filter(
|
||||
(kw) =>
|
||||
docKeywords.includes(kw.toLowerCase()) ||
|
||||
result.text.toLowerCase().includes(kw.toLowerCase())
|
||||
)
|
||||
const keywordOverlap = matchingKeywords.length / Math.max(queryKeywords.length, 1)
|
||||
|
||||
// Use square root for diminishing returns: 100% overlap = sqrt(1.0) = 1.0, 25% = 0.5
|
||||
// Then scale conservatively (max 10% boost instead of 20%)
|
||||
const keywordBoost = Math.sqrt(keywordOverlap) * 0.1 * result.score
|
||||
|
||||
if (keywordOverlap > 0) {
|
||||
logger.debug(
|
||||
`[RAG] Keyword overlap: ${matchingKeywords.length}/${queryKeywords.length} - Boost: ${keywordBoost.toFixed(3)}`
|
||||
)
|
||||
}
|
||||
|
||||
// Boost if original query terms appear in text (case-insensitive)
|
||||
// Scale boost proportionally to base score to avoid over-promoting weak matches
|
||||
const queryTerms = originalQuery
|
||||
.toLowerCase()
|
||||
.split(/\s+/)
|
||||
.filter((t) => t.length > 3)
|
||||
const directMatches = queryTerms.filter((term) =>
|
||||
result.text.toLowerCase().includes(term)
|
||||
).length
|
||||
|
||||
if (queryTerms.length > 0) {
|
||||
const directMatchRatio = directMatches / queryTerms.length
|
||||
// Conservative boost: max 7.5% of the base score
|
||||
const directMatchBoost = Math.sqrt(directMatchRatio) * 0.075 * result.score
|
||||
|
||||
if (directMatches > 0) {
|
||||
logger.debug(
|
||||
`[RAG] Direct term matches: ${directMatches}/${queryTerms.length} - Boost: ${directMatchBoost.toFixed(3)}`
|
||||
)
|
||||
finalScore += directMatchBoost
|
||||
}
|
||||
}
|
||||
|
||||
finalScore = Math.min(1.0, finalScore + keywordBoost)
|
||||
|
||||
return {
|
||||
...result,
|
||||
finalScore,
|
||||
}
|
||||
})
|
||||
.sort((a, b) => b.finalScore - a.finalScore)
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieve all unique source files that have been stored in the knowledge base.
|
||||
* @returns Array of unique source file identifiers
|
||||
|
|
@ -328,9 +635,8 @@ export class RagService {
|
|||
|
||||
// Extract unique source values from payloads
|
||||
scrollResult.points.forEach((point) => {
|
||||
const metadata = point.payload?.metadata
|
||||
if (metadata && typeof metadata === 'object' && 'source' in metadata) {
|
||||
const source = metadata.source as string
|
||||
const source = point.payload?.source
|
||||
if (source && typeof source === 'string') {
|
||||
sources.add(source)
|
||||
}
|
||||
})
|
||||
|
|
@ -338,7 +644,13 @@ export class RagService {
|
|||
offset = scrollResult.next_page_offset || null
|
||||
} while (offset !== null)
|
||||
|
||||
return Array.from(sources)
|
||||
const sourcesArr = Array.from(sources)
|
||||
|
||||
// The source is a full path - only extract the filename for display
|
||||
return sourcesArr.map((src) => {
|
||||
const parts = src.split(/[/\\]/)
|
||||
return parts[parts.length - 1] // Return the last part as filename
|
||||
})
|
||||
} catch (error) {
|
||||
logger.error('Error retrieving stored files:', error)
|
||||
return []
|
||||
|
|
|
|||
7
admin/app/validators/rag.ts
Normal file
7
admin/app/validators/rag.ts
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
import vine from '@vinejs/vine'
|
||||
|
||||
export const getJobStatusSchema = vine.compile(
|
||||
vine.object({
|
||||
filePath: vine.string(),
|
||||
})
|
||||
)
|
||||
|
|
@ -5,6 +5,7 @@ import queueConfig from '#config/queue'
|
|||
import { RunDownloadJob } from '#jobs/run_download_job'
|
||||
import { DownloadModelJob } from '#jobs/download_model_job'
|
||||
import { RunBenchmarkJob } from '#jobs/run_benchmark_job'
|
||||
import { EmbedFileJob } from '#jobs/embed_file_job'
|
||||
|
||||
export default class QueueWork extends BaseCommand {
|
||||
static commandName = 'queue:work'
|
||||
|
|
@ -90,10 +91,12 @@ export default class QueueWork extends BaseCommand {
|
|||
handlers.set(RunDownloadJob.key, new RunDownloadJob())
|
||||
handlers.set(DownloadModelJob.key, new DownloadModelJob())
|
||||
handlers.set(RunBenchmarkJob.key, new RunBenchmarkJob())
|
||||
handlers.set(EmbedFileJob.key, new EmbedFileJob())
|
||||
|
||||
queues.set(RunDownloadJob.key, RunDownloadJob.queue)
|
||||
queues.set(DownloadModelJob.key, DownloadModelJob.queue)
|
||||
queues.set(RunBenchmarkJob.key, RunBenchmarkJob.queue)
|
||||
queues.set(EmbedFileJob.key, EmbedFileJob.queue)
|
||||
|
||||
return [handlers, queues]
|
||||
}
|
||||
|
|
@ -107,6 +110,7 @@ export default class QueueWork extends BaseCommand {
|
|||
[RunDownloadJob.queue]: 3,
|
||||
[DownloadModelJob.queue]: 2, // Lower concurrency for resource-intensive model downloads
|
||||
[RunBenchmarkJob.queue]: 1, // Run benchmarks one at a time for accurate results
|
||||
[EmbedFileJob.queue]: 2, // Lower concurrency for embedding jobs, can be resource intensive
|
||||
default: 3,
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -66,12 +66,20 @@ export const SYSTEM_PROMPTS = {
|
|||
- Use tables when presenting structured data.
|
||||
`,
|
||||
rag_context: (context: string) => `
|
||||
You have access to the following relevant information from the knowledge base. Use this context to provide accurate and informed responses when relevant:
|
||||
You have access to relevant information from the knowledge base. This context has been retrieved based on semantic similarity to the user's question.
|
||||
|
||||
[Context]
|
||||
[Knowledge Base Context]
|
||||
${context}
|
||||
|
||||
If the user's question is related to this context, incorporate it into your response. Otherwise, respond normally.
|
||||
IMPORTANT INSTRUCTIONS:
|
||||
1. If the user's question is directly related to the context above, use this information to provide accurate, detailed answers.
|
||||
2. Always cite or reference the context when using it (e.g., "According to the information available..." or "Based on the knowledge base...").
|
||||
3. If the context is only partially relevant, combine it with your general knowledge but be clear about what comes from the knowledge base.
|
||||
4. If the context is not relevant to the user's question, you can respond using your general knowledge without forcing the context into your answer.
|
||||
5. Never fabricate information that isn't in the context or your training data.
|
||||
6. If you're unsure or the context doesn't contain enough information, acknowledge the limitations.
|
||||
|
||||
Format your response using markdown for readability.
|
||||
`,
|
||||
chat_suggestions: `
|
||||
You are a helpful assistant that generates conversation starter suggestions for a survivalist/prepper using an AI assistant.
|
||||
|
|
|
|||
38
admin/package-lock.json
generated
38
admin/package-lock.json
generated
|
|
@ -20,6 +20,7 @@
|
|||
"@adonisjs/transmit": "^2.0.2",
|
||||
"@adonisjs/transmit-client": "^1.0.0",
|
||||
"@adonisjs/vite": "^4.0.0",
|
||||
"@chonkiejs/core": "^0.0.7",
|
||||
"@headlessui/react": "^2.2.4",
|
||||
"@inertiajs/react": "^2.0.13",
|
||||
"@markdoc/markdoc": "^0.5.2",
|
||||
|
|
@ -42,7 +43,6 @@
|
|||
"dockerode": "^4.0.7",
|
||||
"edge.js": "^6.2.1",
|
||||
"fast-xml-parser": "^5.2.5",
|
||||
"llm-chunk": "^0.0.1",
|
||||
"luxon": "^3.6.1",
|
||||
"maplibre-gl": "^4.7.1",
|
||||
"mysql2": "^3.14.1",
|
||||
|
|
@ -60,6 +60,7 @@
|
|||
"reflect-metadata": "^0.2.2",
|
||||
"remark-gfm": "^4.0.1",
|
||||
"sharp": "^0.34.5",
|
||||
"stopword": "^3.1.5",
|
||||
"systeminformation": "^5.27.14",
|
||||
"tailwindcss": "^4.1.10",
|
||||
"tar": "^7.5.6",
|
||||
|
|
@ -83,6 +84,7 @@
|
|||
"@types/node": "^22.15.18",
|
||||
"@types/react": "^19.1.8",
|
||||
"@types/react-dom": "^19.1.6",
|
||||
"@types/stopword": "^2.0.3",
|
||||
"eslint": "^9.26.0",
|
||||
"hot-hook": "^0.4.0",
|
||||
"prettier": "^3.5.3",
|
||||
|
|
@ -1230,6 +1232,21 @@
|
|||
"dev": true,
|
||||
"license": "Apache-2.0"
|
||||
},
|
||||
"node_modules/@chonkiejs/chunk": {
|
||||
"version": "0.9.3",
|
||||
"resolved": "https://registry.npmjs.org/@chonkiejs/chunk/-/chunk-0.9.3.tgz",
|
||||
"integrity": "sha512-uUOeoFGY3s6kzAoKskI50weZN0zvW3oLwUijA1uX7Wxuy9yZStF2IvGuXRigMgP2g/L85lsotYGkjpBMLjQnrg==",
|
||||
"license": "MIT OR Apache-2.0"
|
||||
},
|
||||
"node_modules/@chonkiejs/core": {
|
||||
"version": "0.0.7",
|
||||
"resolved": "https://registry.npmjs.org/@chonkiejs/core/-/core-0.0.7.tgz",
|
||||
"integrity": "sha512-R17OW9TT1x7B6lDKTCaMd6NluAObleN/cCQtUbMK2UcFOguJtQz/cL0n1t0AzJWBFMVgYP8EcqTFn/fcKhzPiA==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@chonkiejs/chunk": "^0.9.3"
|
||||
}
|
||||
},
|
||||
"node_modules/@colors/colors": {
|
||||
"version": "1.5.0",
|
||||
"resolved": "https://registry.npmjs.org/@colors/colors/-/colors-1.5.0.tgz",
|
||||
|
|
@ -5084,6 +5101,13 @@
|
|||
"dev": true,
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/@types/stopword": {
|
||||
"version": "2.0.3",
|
||||
"resolved": "https://registry.npmjs.org/@types/stopword/-/stopword-2.0.3.tgz",
|
||||
"integrity": "sha512-hioMj0lOvISM+EDevf7ijG8EMbU+J3pj4SstCyfQC1t39uPYpAe7beSfBdU6c1d9jeECTQQtR3UJWtVoUO8Weg==",
|
||||
"dev": true,
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/@types/supercluster": {
|
||||
"version": "7.1.3",
|
||||
"resolved": "https://registry.npmjs.org/@types/supercluster/-/supercluster-7.1.3.tgz",
|
||||
|
|
@ -9866,12 +9890,6 @@
|
|||
"url": "https://opencollective.com/parcel"
|
||||
}
|
||||
},
|
||||
"node_modules/llm-chunk": {
|
||||
"version": "0.0.1",
|
||||
"resolved": "https://registry.npmjs.org/llm-chunk/-/llm-chunk-0.0.1.tgz",
|
||||
"integrity": "sha512-n9fHgsSiJb7vXZiC5c4XV6rme+tC7WX/cWH6EJvPPmMOMwOZ9xdg/U9LY5Qhmixd3K1PdRB0FVOdzoJF2HUZbg==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/locate-path": {
|
||||
"version": "7.2.0",
|
||||
"resolved": "https://registry.npmjs.org/locate-path/-/locate-path-7.2.0.tgz",
|
||||
|
|
@ -13619,6 +13637,12 @@
|
|||
"node": ">= 0.8"
|
||||
}
|
||||
},
|
||||
"node_modules/stopword": {
|
||||
"version": "3.1.5",
|
||||
"resolved": "https://registry.npmjs.org/stopword/-/stopword-3.1.5.tgz",
|
||||
"integrity": "sha512-OgLYGVFCNa430WOrj9tYZhQge5yg6vd6JsKredveAqEhdLVQkfrpnQIGjx0L9lLqzL4Kq4J8yNTcfQR/MpBwhg==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/string_decoder": {
|
||||
"version": "1.3.0",
|
||||
"resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.3.0.tgz",
|
||||
|
|
|
|||
|
|
@ -52,6 +52,7 @@
|
|||
"@types/node": "^22.15.18",
|
||||
"@types/react": "^19.1.8",
|
||||
"@types/react-dom": "^19.1.6",
|
||||
"@types/stopword": "^2.0.3",
|
||||
"eslint": "^9.26.0",
|
||||
"hot-hook": "^0.4.0",
|
||||
"prettier": "^3.5.3",
|
||||
|
|
@ -71,6 +72,7 @@
|
|||
"@adonisjs/transmit": "^2.0.2",
|
||||
"@adonisjs/transmit-client": "^1.0.0",
|
||||
"@adonisjs/vite": "^4.0.0",
|
||||
"@chonkiejs/core": "^0.0.7",
|
||||
"@headlessui/react": "^2.2.4",
|
||||
"@inertiajs/react": "^2.0.13",
|
||||
"@markdoc/markdoc": "^0.5.2",
|
||||
|
|
@ -93,7 +95,6 @@
|
|||
"dockerode": "^4.0.7",
|
||||
"edge.js": "^6.2.1",
|
||||
"fast-xml-parser": "^5.2.5",
|
||||
"llm-chunk": "^0.0.1",
|
||||
"luxon": "^3.6.1",
|
||||
"maplibre-gl": "^4.7.1",
|
||||
"mysql2": "^3.14.1",
|
||||
|
|
@ -111,6 +112,7 @@
|
|||
"reflect-metadata": "^0.2.2",
|
||||
"remark-gfm": "^4.0.1",
|
||||
"sharp": "^0.34.5",
|
||||
"stopword": "^3.1.5",
|
||||
"systeminformation": "^5.27.14",
|
||||
"tailwindcss": "^4.1.10",
|
||||
"tar": "^7.5.6",
|
||||
|
|
|
|||
|
|
@ -119,6 +119,7 @@ router
|
|||
.group(() => {
|
||||
router.post('/upload', [RagController, 'upload'])
|
||||
router.get('/files', [RagController, 'getStoredFiles'])
|
||||
router.get('/job-status', [RagController, 'getJobStatus'])
|
||||
})
|
||||
.prefix('/api/rag')
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user