diff --git a/admin/app/controllers/ollama_controller.ts b/admin/app/controllers/ollama_controller.ts index 0148672..311c5ff 100644 --- a/admin/app/controllers/ollama_controller.ts +++ b/admin/app/controllers/ollama_controller.ts @@ -41,16 +41,17 @@ export default class OllamaController { if (lastUserMessage) { // Search for relevant context in the knowledge base + // Using lower threshold (0.3) with improved hybrid search const relevantDocs = await this.ragService.searchSimilarDocuments( lastUserMessage.content, 5, // Retrieve top 5 most relevant chunks - 0.7 // Minimum similarity score of 0.7 + 0.3 // Minimum similarity score of 0.3 (lowered from 0.7 for better recall) ) // If relevant context is found, inject as a system message if (relevantDocs.length > 0) { const contextText = relevantDocs - .map((doc, idx) => `[Context ${idx + 1}]\n${doc.text}`) + .map((doc, idx) => `[Context ${idx + 1}] (Relevance: ${(doc.score * 100).toFixed(1)}%)\n${doc.text}`) .join('\n\n') const systemMessage = { diff --git a/admin/app/controllers/rag_controller.ts b/admin/app/controllers/rag_controller.ts index 73d6906..7af9cc4 100644 --- a/admin/app/controllers/rag_controller.ts +++ b/admin/app/controllers/rag_controller.ts @@ -1,9 +1,12 @@ import { RagService } from '#services/rag_service' +import { EmbedFileJob } from '#jobs/embed_file_job' import { inject } from '@adonisjs/core' import type { HttpContext } from '@adonisjs/core/http' import app from '@adonisjs/core/services/app' import { randomBytes } from 'node:crypto' import { sanitizeFilename } from '../utils/fs.js' +import { stat } from 'node:fs/promises' +import { getJobStatusSchema } from '#validators/rag' @inject() export default class RagController { @@ -19,19 +22,48 @@ export default class RagController { const sanitizedName = sanitizeFilename(uploadedFile.clientName) const fileName = `${sanitizedName}-${randomSuffix}.${uploadedFile.extname || 'txt'}` - const fullPath = app.makePath('storage/uploads', fileName) + const fullPath = app.makePath(RagService.UPLOADS_STORAGE_PATH, fileName) - await uploadedFile.move(app.makePath('storage/uploads'), { + await uploadedFile.move(app.makePath(RagService.UPLOADS_STORAGE_PATH), { name: fileName, }) - // Don't await this - process in background - this.ragService.processAndEmbedFile(fullPath) + // Get file size for tracking + let fileSize: number | undefined = undefined + try { + const stats = await stat(fullPath) + fileSize = stats.size + } catch (error) { + // Not critical if we can't get file size, just swallow the error + } - return response.status(200).json({ - message: 'File has been uploaded and queued for processing.', - file_path: `/uploads/${fileName}`, + // Dispatch background job for embedding + const result = await EmbedFileJob.dispatch({ + filePath: fullPath, + fileName, + fileSize, }) + + return response.status(202).json({ + message: result.message, + jobId: result.jobId, + fileName, + filePath: `/${RagService.UPLOADS_STORAGE_PATH}/${fileName}`, + alreadyProcessing: !result.created, + }) + } + + public async getJobStatus({ request, response }: HttpContext) { + const reqData = await request.validateUsing(getJobStatusSchema) + + const fullPath = app.makePath(RagService.UPLOADS_STORAGE_PATH, reqData.filePath) + const status = await EmbedFileJob.getStatus(fullPath) + + if (!status.exists) { + return response.status(404).json({ error: 'Job not found for this file' }) + } + + return response.status(200).json(status) } public async getStoredFiles({ response }: HttpContext) { diff --git a/admin/app/jobs/embed_file_job.ts b/admin/app/jobs/embed_file_job.ts new file mode 100644 index 0000000..f7cd6c8 --- /dev/null +++ b/admin/app/jobs/embed_file_job.ts @@ -0,0 +1,161 @@ +import { Job } from 'bullmq' +import { QueueService } from '#services/queue_service' +import { RagService } from '#services/rag_service' +import { DockerService } from '#services/docker_service' +import { OllamaService } from '#services/ollama_service' +import { createHash } from 'crypto' +import logger from '@adonisjs/core/services/logger' + +export interface EmbedFileJobParams { + filePath: string + fileName: string + fileSize?: number +} + +export class EmbedFileJob { + static get queue() { + return 'file-embeddings' + } + + static get key() { + return 'embed-file' + } + + static getJobId(filePath: string): string { + return createHash('sha256').update(filePath).digest('hex').slice(0, 16) + } + + async handle(job: Job) { + const { filePath, fileName } = job.data as EmbedFileJobParams + + logger.info(`[EmbedFileJob] Starting embedding process for: ${fileName}`) + + const dockerService = new DockerService() + const ollamaService = new OllamaService() + const ragService = new RagService(dockerService, ollamaService) + + try { + // Update progress starting + await job.updateProgress(0) + await job.updateData({ + ...job.data, + status: 'processing', + startedAt: Date.now(), + }) + + logger.info(`[EmbedFileJob] Processing file: ${filePath}`) + + // Process and embed the file + const result = await ragService.processAndEmbedFile(filePath) + + if (!result.success) { + logger.error(`[EmbedFileJob] Failed to process file ${fileName}: ${result.message}`) + throw new Error(result.message) + } + + // Update progress complete + await job.updateProgress(100) + await job.updateData({ + ...job.data, + status: 'completed', + completedAt: Date.now(), + chunks: result.chunks, + }) + + logger.info( + `[EmbedFileJob] Successfully embedded ${result.chunks} chunks from file: ${fileName}` + ) + + return { + success: true, + fileName, + filePath, + chunks: result.chunks, + message: `Successfully embedded ${result.chunks} chunks`, + } + } catch (error) { + logger.error(`[EmbedFileJob] Error embedding file ${fileName}:`, error) + + await job.updateData({ + ...job.data, + status: 'failed', + failedAt: Date.now(), + error: error instanceof Error ? error.message : 'Unknown error', + }) + + throw error + } + } + + static async getByFilePath(filePath: string): Promise { + const queueService = new QueueService() + const queue = queueService.getQueue(this.queue) + const jobId = this.getJobId(filePath) + return await queue.getJob(jobId) + } + + static async dispatch(params: EmbedFileJobParams) { + const queueService = new QueueService() + const queue = queueService.getQueue(this.queue) + const jobId = this.getJobId(params.filePath) + + try { + const job = await queue.add(this.key, params, { + jobId, + attempts: 3, + backoff: { + type: 'exponential', + delay: 5000, // Delay 5 seconds before retrying + }, + removeOnComplete: { count: 50 }, // Keep last 50 completed jobs for history + removeOnFail: { count: 20 } // Keep last 20 failed jobs for debugging + }) + + logger.info(`[EmbedFileJob] Dispatched embedding job for file: ${params.fileName}`) + + return { + job, + created: true, + jobId, + message: `File queued for embedding: ${params.fileName}`, + } + } catch (error) { + if (error.message && error.message.includes('job already exists')) { + const existing = await queue.getJob(jobId) + logger.info(`[EmbedFileJob] Job already exists for file: ${params.fileName}`) + return { + job: existing, + created: false, + jobId, + message: `Embedding job already exists for: ${params.fileName}`, + } + } + throw error + } + } + + static async getStatus(filePath: string): Promise<{ + exists: boolean + status?: string + progress?: number + chunks?: number + error?: string + }> { + const job = await this.getByFilePath(filePath) + + if (!job) { + return { exists: false } + } + + const state = await job.getState() + const data = job.data + + return { + exists: true, + status: data.status || state, + progress: typeof job.progress === 'number' ? job.progress : undefined, + chunks: data.chunks, + error: data.error, + } + } +} diff --git a/admin/app/services/rag_service.ts b/admin/app/services/rag_service.ts index 7573fcd..b558eb1 100644 --- a/admin/app/services/rag_service.ts +++ b/admin/app/services/rag_service.ts @@ -2,28 +2,115 @@ import { QdrantClient } from '@qdrant/js-client-rest' import { DockerService } from './docker_service.js' import { inject } from '@adonisjs/core' import logger from '@adonisjs/core/services/logger' -import { chunk } from 'llm-chunk' +import { TokenChunker } from '@chonkiejs/core' import sharp from 'sharp' -import { determineFileType, getFile } from '../utils/fs.js' +import { deleteFileIfExists, determineFileType, getFile } from '../utils/fs.js' import { PDFParse } from 'pdf-parse' import { createWorker } from 'tesseract.js' import { fromBuffer } from 'pdf2pic' import { OllamaService } from './ollama_service.js' import { SERVICE_NAMES } from '../../constants/service_names.js' +import { removeStopwords } from 'stopword' +import { randomUUID } from 'node:crypto' @inject() export class RagService { private qdrant: QdrantClient | null = null private qdrantInitPromise: Promise | null = null - public static CONTENT_COLLECTION_NAME = 'open-webui_knowledge' // This is the collection name OWUI uses for uploaded knowledge + public static UPLOADS_STORAGE_PATH = 'storage/kb_uploads' + public static CONTENT_COLLECTION_NAME = 'nomad_knowledge_base' public static EMBEDDING_MODEL = 'nomic-embed-text:v1.5' public static EMBEDDING_DIMENSION = 768 // Nomic Embed Text v1.5 dimension is 768 + public static MODEL_CONTEXT_LENGTH = 2048 // nomic-embed-text has 2K token context + public static MAX_SAFE_TOKENS = 1800 // Leave buffer for prefix and tokenization variance + public static TARGET_TOKENS_PER_CHUNK = 1700 // Target 1700 tokens per chunk for embedding + public static PREFIX_TOKEN_BUDGET = 10 // Reserve ~10 tokens for prefixes + public static CHAR_TO_TOKEN_RATIO = 3 // Approximate chars per token + // Nomic Embed Text v1.5 uses task-specific prefixes for optimal performance + public static SEARCH_DOCUMENT_PREFIX = 'search_document: ' + public static SEARCH_QUERY_PREFIX = 'search_query: ' constructor( private dockerService: DockerService, private ollamaService: OllamaService ) {} + /** + * Estimates token count for text. This is a conservative approximation: + * - English text: ~1 token per 3 characters + * - Adds buffer for special characters and tokenization variance + * + * Note: This is approximate and realistic english + * tokenization is ~4 chars/token, but we use 3 here to be safe. + * Actual tokenization may differ, but being + * conservative prevents context length errors. + */ + private estimateTokenCount(text: string): number { + // This accounts for special characters, numbers, and punctuation + return Math.ceil(text.length / RagService.CHAR_TO_TOKEN_RATIO) + } + + /** + * Truncates text to fit within token limit, preserving word boundaries. + * Ensures the text + prefix won't exceed the model's context window. + */ + private truncateToTokenLimit(text: string, maxTokens: number): string { + const estimatedTokens = this.estimateTokenCount(text) + + if (estimatedTokens <= maxTokens) { + return text + } + + // Calculate how many characters we can keep using our ratio + const maxChars = Math.floor(maxTokens * RagService.CHAR_TO_TOKEN_RATIO) + + // Truncate at word boundary + let truncated = text.substring(0, maxChars) + const lastSpace = truncated.lastIndexOf(' ') + + if (lastSpace > maxChars * 0.8) { + // If we found a space in the last 20%, use it + truncated = truncated.substring(0, lastSpace) + } + + logger.warn( + `[RAG] Truncated text from ${text.length} to ${truncated.length} chars (est. ${estimatedTokens} → ${this.estimateTokenCount(truncated)} tokens)` + ) + + return truncated + } + + /** + * Preprocesses a query to improve retrieval by expanding it with context. + * This helps match documents even when using different terminology. + */ + private preprocessQuery(query: string): string { + // Future: this is a placeholder for more advanced query expansion techniques. + // For now, we simply trim whitespace. Improvements could include: + // - Synonym expansion using a thesaurus + // - Adding related terms based on domain knowledge + // - Using a language model to rephrase or elaborate the query + const expanded = query.trim() + logger.debug(`[RAG] Original query: "${query}"`) + logger.debug(`[RAG] Preprocessed query: "${expanded}"`) + return expanded + } + + /** + * Extract keywords from query for hybrid search + */ + private extractKeywords(query: string): string[] { + const split = query.split(' ') + const noStopWords = removeStopwords(split) + + // Future: This is basic normalization, could be improved with stemming/lemmatization later + const keywords = noStopWords + .map((word) => word.replace(/[^\w]/g, '').toLowerCase()) + .filter((word) => word.length > 2) + + return [...new Set(keywords)] + } + private async _initializeQdrantClient() { if (!this.qdrantInitPromise) { this.qdrantInitPromise = (async () => { @@ -84,43 +171,87 @@ export class RagService { throw new Error(`${RagService.EMBEDDING_MODEL} does not exist and could not be downloaded.`) } - const chunks = chunk(text, { - // These settings should provide a good balance between context and precision - minLength: 512, - maxLength: 1024, - overlap: 200, + // TokenChunker uses character-based tokenization (1 char = 1 token) + // We need to convert our embedding model's token counts to character counts + // since nomic-embed-text tokenizer uses ~3 chars per token + const targetCharsPerChunk = Math.floor(RagService.TARGET_TOKENS_PER_CHUNK * RagService.CHAR_TO_TOKEN_RATIO) + const overlapChars = Math.floor(150 * RagService.CHAR_TO_TOKEN_RATIO) + + const chunker = await TokenChunker.create({ + chunkSize: targetCharsPerChunk, + chunkOverlap: overlapChars, }) - if (!chunks || chunks.length === 0) { + const chunkResults = await chunker.chunk(text) + + if (!chunkResults || chunkResults.length === 0) { throw new Error('No text chunks generated for embedding.') } - const embeddings: number[][] = [] + // Extract text from chunk results + const chunks = chunkResults.map((chunk) => chunk.text) + const ollamaClient = await this.ollamaService.getClient() - for (const chunkText of chunks) { + + const embeddings: number[][] = [] + for (let i = 0; i < chunks.length; i++) { + let chunkText = chunks[i] + + // Final safety check: ensure chunk + prefix fits + const prefixText = RagService.SEARCH_DOCUMENT_PREFIX + const withPrefix = prefixText + chunkText + const estimatedTokens = this.estimateTokenCount(withPrefix) + + if (estimatedTokens > RagService.MAX_SAFE_TOKENS) { + // This should be rare - log for debugging if it's occurring frequently + const prefixTokens = this.estimateTokenCount(prefixText) + const maxTokensForText = RagService.MAX_SAFE_TOKENS - prefixTokens + logger.warn( + `[RAG] Chunk ${i} estimated at ${estimatedTokens} tokens (${chunkText.length} chars), truncating to ${maxTokensForText} tokens` + ) + chunkText = this.truncateToTokenLimit(chunkText, maxTokensForText) + } + + logger.debug(`[RAG] Generating embedding for chunk ${i + 1}/${chunks.length}`) + const response = await ollamaClient.embeddings({ model: RagService.EMBEDDING_MODEL, - prompt: chunkText, + prompt: RagService.SEARCH_DOCUMENT_PREFIX + chunkText, }) embeddings.push(response.embedding) } - const points = chunks.map((chunkText, index) => ({ - id: `${Date.now()}_${index}`, - vector: embeddings[index], - payload: { - ...metadata, - text: chunkText, - chunk_index: index, - }, - })) + const timestamp = Date.now() + const points = chunks.map((chunkText, index) => { + // Extract keywords for hybrid search + const keywords = this.extractKeywords(chunkText) + logger.debug(`[RAG] Extracted keywords for chunk ${index}: [${keywords.join(', ')}]`) + return { + id: randomUUID(), // qdrant requires either uuid or unsigned int + vector: embeddings[index], + payload: { + ...metadata, + text: chunkText, + chunk_index: index, + total_chunks: chunks.length, + keywords: keywords.join(' '), // Store as space-separated string for text search + char_count: chunkText.length, + created_at: timestamp, + source: metadata.source || 'unknown' + }, + } + }) await this.qdrant!.upsert(RagService.CONTENT_COLLECTION_NAME, { points }) + logger.debug(`[RAG] Successfully embedded and stored ${chunks.length} chunks`) + logger.debug(`[RAG] First chunk preview: "${chunks[0].substring(0, 100)}..."`) + return { chunks: chunks.length } } catch (error) { - logger.error('Error embedding text:', error) + console.error(error) + logger.error('[RAG] Error embedding text:', error) return null } } @@ -195,7 +326,7 @@ export class RagService { * This includes text extraction, chunking, embedding, and storing in Qdrant. */ public async processAndEmbedFile( - filepath: string + filepath: string // Should already be the full path to the uploaded file ): Promise<{ success: boolean; message: string; chunks?: number }> { try { const fileType = determineFileType(filepath) @@ -233,7 +364,13 @@ export class RagService { return { success: false, message: 'No text could be extracted from the file.' } } - const embedResult = await this.embedAndStoreText(extractedText, {}) + const embedResult = await this.embedAndStoreText(extractedText, { + source: filepath + }) + + // Cleanup the file from disk + logger.info(`[RAG] Embedding complete, deleting uploaded file: ${filepath}`) + await deleteFileIfExists(filepath) return { success: true, @@ -248,60 +385,230 @@ export class RagService { /** * Search for documents similar to the query text in the Qdrant knowledge base. - * Returns the most relevant text chunks based on semantic similarity. + * Uses a hybrid approach combining semantic similarity and keyword matching. + * Implements adaptive thresholds and result reranking for optimal retrieval. * @param query - The search query text * @param limit - Maximum number of results to return (default: 5) - * @param scoreThreshold - Minimum similarity score threshold (default: 0.7) + * @param scoreThreshold - Minimum similarity score threshold (default: 0.3, much lower than before) * @returns Array of relevant text chunks with their scores */ public async searchSimilarDocuments( query: string, limit: number = 5, - scoreThreshold: number = 0.7 - ): Promise> { + scoreThreshold: number = 0.3 // Lower default threshold - was 0.7, now 0.3 + ): Promise }>> { try { + logger.debug(`[RAG] Starting similarity search for query: "${query}"`) + await this._ensureCollection( RagService.CONTENT_COLLECTION_NAME, RagService.EMBEDDING_DIMENSION ) + // Check if collection has any points + const collectionInfo = await this.qdrant!.getCollection(RagService.CONTENT_COLLECTION_NAME) + const pointCount = collectionInfo.points_count || 0 + logger.debug(`[RAG] Knowledge base contains ${pointCount} document chunks`) + + if (pointCount === 0) { + logger.debug('[RAG] Knowledge base is empty. Could not perform search.') + return [] + } + const allModels = await this.ollamaService.getModels(true) const embeddingModel = allModels.find((model) => model.name === RagService.EMBEDDING_MODEL) if (!embeddingModel) { logger.warn( - `${RagService.EMBEDDING_MODEL} not found. Cannot perform similarity search.` + `[RAG] ${RagService.EMBEDDING_MODEL} not found. Cannot perform similarity search.` ) return [] } - // Generate embedding for the query + // Preprocess query for better matching + const processedQuery = this.preprocessQuery(query) + const keywords = this.extractKeywords(processedQuery) + logger.debug(`[RAG] Extracted keywords: [${keywords.join(', ')}]`) + + // Generate embedding for the query with search_query prefix const ollamaClient = await this.ollamaService.getClient() + + // Ensure query doesn't exceed token limit + const prefixTokens = this.estimateTokenCount(RagService.SEARCH_QUERY_PREFIX) + const maxQueryTokens = RagService.MAX_SAFE_TOKENS - prefixTokens + const truncatedQuery = this.truncateToTokenLimit(processedQuery, maxQueryTokens) + + const prefixedQuery = RagService.SEARCH_QUERY_PREFIX + truncatedQuery + logger.debug(`[RAG] Generating embedding with prefix: "${RagService.SEARCH_QUERY_PREFIX}"`) + + // Validate final token count + const queryTokenCount = this.estimateTokenCount(prefixedQuery) + if (queryTokenCount > RagService.MAX_SAFE_TOKENS) { + logger.error( + `[RAG] Query too long even after truncation: ${queryTokenCount} tokens (max: ${RagService.MAX_SAFE_TOKENS})` + ) + return [] + } + const response = await ollamaClient.embeddings({ model: RagService.EMBEDDING_MODEL, - prompt: query, + prompt: prefixedQuery, }) - // Search for similar vectors in Qdrant + // Perform semantic search with a higher limit to enable reranking + const searchLimit = limit * 3 // Get more results for reranking + logger.debug( + `[RAG] Searching for top ${searchLimit} semantic matches (threshold: ${scoreThreshold})` + ) + const searchResults = await this.qdrant!.search(RagService.CONTENT_COLLECTION_NAME, { vector: response.embedding, - limit: limit, + limit: searchLimit, score_threshold: scoreThreshold, with_payload: true, }) - console.log("Got search results:", searchResults); + logger.debug(`[RAG] Found ${searchResults.length} results above threshold ${scoreThreshold}`) - return searchResults.map((result) => ({ + // Map results with metadata for reranking + const resultsWithMetadata = searchResults.map((result) => ({ text: (result.payload?.text as string) || '', score: result.score, + keywords: (result.payload?.keywords as string) || '', + chunk_index: (result.payload?.chunk_index as number) || 0, + created_at: (result.payload?.created_at as number) || 0, + })) + + const rerankedResults = this.rerankResults(resultsWithMetadata, keywords, query) + + logger.debug(`[RAG] Top 3 results after reranking:`) + rerankedResults.slice(0, 3).forEach((result, idx) => { + logger.debug( + `[RAG] ${idx + 1}. Score: ${result.finalScore.toFixed(4)} (semantic: ${result.score.toFixed(4)}) - "${result.text.substring(0, 100)}..."` + ) + }) + + // Return top N results + return rerankedResults.slice(0, limit).map((result) => ({ + text: result.text, + score: result.finalScore, + metadata: { + chunk_index: result.chunk_index, + created_at: result.created_at, + semantic_score: result.score, + }, })) } catch (error) { - logger.error('Error searching similar documents:', error) + logger.error('[RAG] Error searching similar documents:', error) return [] } } + /** + * Rerank search results using hybrid scoring that combines: + * 1. Semantic similarity score (primary signal) + * 2. Keyword overlap bonus (conservative, quality-gated) + * 3. Direct term matches (conservative) + * + * Tries to boost only already-relevant results, not promote + * low-quality results just because they have keyword matches. + * + * Future: this is a decent feature-based approach, but we could + * switch to a python-based reranker in the future if the benefits + * outweigh the overhead. + */ + private rerankResults( + results: Array<{ + text: string + score: number + keywords: string + chunk_index: number + created_at: number + }>, + queryKeywords: string[], + originalQuery: string + ): Array<{ + text: string + score: number + finalScore: number + chunk_index: number + created_at: number + }> { + return results + .map((result) => { + let finalScore = result.score + + // Quality gate: Only apply boosts if semantic score is reasonable + // Try to prevent promoting irrelevant results that just happen to have keyword matches + const MIN_SEMANTIC_THRESHOLD = 0.35 + + if (result.score < MIN_SEMANTIC_THRESHOLD) { + // For low-scoring results, use semantic score as-is + // This prevents false positives from keyword gaming + logger.debug( + `[RAG] Skipping boost for low semantic score: ${result.score.toFixed(3)} (threshold: ${MIN_SEMANTIC_THRESHOLD})` + ) + return { + ...result, + finalScore, + } + } + + // Boost score based on keyword overlap (diminishing returns - overlap goes down, so does boost) + const docKeywords = result.keywords + .toLowerCase() + .split(' ') + .filter((k) => k.length > 0) + const matchingKeywords = queryKeywords.filter( + (kw) => + docKeywords.includes(kw.toLowerCase()) || + result.text.toLowerCase().includes(kw.toLowerCase()) + ) + const keywordOverlap = matchingKeywords.length / Math.max(queryKeywords.length, 1) + + // Use square root for diminishing returns: 100% overlap = sqrt(1.0) = 1.0, 25% = 0.5 + // Then scale conservatively (max 10% boost instead of 20%) + const keywordBoost = Math.sqrt(keywordOverlap) * 0.1 * result.score + + if (keywordOverlap > 0) { + logger.debug( + `[RAG] Keyword overlap: ${matchingKeywords.length}/${queryKeywords.length} - Boost: ${keywordBoost.toFixed(3)}` + ) + } + + // Boost if original query terms appear in text (case-insensitive) + // Scale boost proportionally to base score to avoid over-promoting weak matches + const queryTerms = originalQuery + .toLowerCase() + .split(/\s+/) + .filter((t) => t.length > 3) + const directMatches = queryTerms.filter((term) => + result.text.toLowerCase().includes(term) + ).length + + if (queryTerms.length > 0) { + const directMatchRatio = directMatches / queryTerms.length + // Conservative boost: max 7.5% of the base score + const directMatchBoost = Math.sqrt(directMatchRatio) * 0.075 * result.score + + if (directMatches > 0) { + logger.debug( + `[RAG] Direct term matches: ${directMatches}/${queryTerms.length} - Boost: ${directMatchBoost.toFixed(3)}` + ) + finalScore += directMatchBoost + } + } + + finalScore = Math.min(1.0, finalScore + keywordBoost) + + return { + ...result, + finalScore, + } + }) + .sort((a, b) => b.finalScore - a.finalScore) + } + /** * Retrieve all unique source files that have been stored in the knowledge base. * @returns Array of unique source file identifiers @@ -328,9 +635,8 @@ export class RagService { // Extract unique source values from payloads scrollResult.points.forEach((point) => { - const metadata = point.payload?.metadata - if (metadata && typeof metadata === 'object' && 'source' in metadata) { - const source = metadata.source as string + const source = point.payload?.source + if (source && typeof source === 'string') { sources.add(source) } }) @@ -338,7 +644,13 @@ export class RagService { offset = scrollResult.next_page_offset || null } while (offset !== null) - return Array.from(sources) + const sourcesArr = Array.from(sources) + + // The source is a full path - only extract the filename for display + return sourcesArr.map((src) => { + const parts = src.split(/[/\\]/) + return parts[parts.length - 1] // Return the last part as filename + }) } catch (error) { logger.error('Error retrieving stored files:', error) return [] diff --git a/admin/app/validators/rag.ts b/admin/app/validators/rag.ts new file mode 100644 index 0000000..92799bf --- /dev/null +++ b/admin/app/validators/rag.ts @@ -0,0 +1,7 @@ +import vine from '@vinejs/vine' + +export const getJobStatusSchema = vine.compile( + vine.object({ + filePath: vine.string(), + }) +) diff --git a/admin/commands/queue/work.ts b/admin/commands/queue/work.ts index 2ee93b5..e381aa0 100644 --- a/admin/commands/queue/work.ts +++ b/admin/commands/queue/work.ts @@ -5,6 +5,7 @@ import queueConfig from '#config/queue' import { RunDownloadJob } from '#jobs/run_download_job' import { DownloadModelJob } from '#jobs/download_model_job' import { RunBenchmarkJob } from '#jobs/run_benchmark_job' +import { EmbedFileJob } from '#jobs/embed_file_job' export default class QueueWork extends BaseCommand { static commandName = 'queue:work' @@ -90,10 +91,12 @@ export default class QueueWork extends BaseCommand { handlers.set(RunDownloadJob.key, new RunDownloadJob()) handlers.set(DownloadModelJob.key, new DownloadModelJob()) handlers.set(RunBenchmarkJob.key, new RunBenchmarkJob()) + handlers.set(EmbedFileJob.key, new EmbedFileJob()) queues.set(RunDownloadJob.key, RunDownloadJob.queue) queues.set(DownloadModelJob.key, DownloadModelJob.queue) queues.set(RunBenchmarkJob.key, RunBenchmarkJob.queue) + queues.set(EmbedFileJob.key, EmbedFileJob.queue) return [handlers, queues] } @@ -107,6 +110,7 @@ export default class QueueWork extends BaseCommand { [RunDownloadJob.queue]: 3, [DownloadModelJob.queue]: 2, // Lower concurrency for resource-intensive model downloads [RunBenchmarkJob.queue]: 1, // Run benchmarks one at a time for accurate results + [EmbedFileJob.queue]: 2, // Lower concurrency for embedding jobs, can be resource intensive default: 3, } diff --git a/admin/constants/ollama.ts b/admin/constants/ollama.ts index 3bf9914..6bac8bd 100644 --- a/admin/constants/ollama.ts +++ b/admin/constants/ollama.ts @@ -66,12 +66,20 @@ export const SYSTEM_PROMPTS = { - Use tables when presenting structured data. `, rag_context: (context: string) => ` -You have access to the following relevant information from the knowledge base. Use this context to provide accurate and informed responses when relevant: +You have access to relevant information from the knowledge base. This context has been retrieved based on semantic similarity to the user's question. -[Context] +[Knowledge Base Context] ${context} -If the user's question is related to this context, incorporate it into your response. Otherwise, respond normally. +IMPORTANT INSTRUCTIONS: +1. If the user's question is directly related to the context above, use this information to provide accurate, detailed answers. +2. Always cite or reference the context when using it (e.g., "According to the information available..." or "Based on the knowledge base..."). +3. If the context is only partially relevant, combine it with your general knowledge but be clear about what comes from the knowledge base. +4. If the context is not relevant to the user's question, you can respond using your general knowledge without forcing the context into your answer. +5. Never fabricate information that isn't in the context or your training data. +6. If you're unsure or the context doesn't contain enough information, acknowledge the limitations. + +Format your response using markdown for readability. `, chat_suggestions: ` You are a helpful assistant that generates conversation starter suggestions for a survivalist/prepper using an AI assistant. diff --git a/admin/package-lock.json b/admin/package-lock.json index 62fa906..ef367bb 100644 --- a/admin/package-lock.json +++ b/admin/package-lock.json @@ -20,6 +20,7 @@ "@adonisjs/transmit": "^2.0.2", "@adonisjs/transmit-client": "^1.0.0", "@adonisjs/vite": "^4.0.0", + "@chonkiejs/core": "^0.0.7", "@headlessui/react": "^2.2.4", "@inertiajs/react": "^2.0.13", "@markdoc/markdoc": "^0.5.2", @@ -42,7 +43,6 @@ "dockerode": "^4.0.7", "edge.js": "^6.2.1", "fast-xml-parser": "^5.2.5", - "llm-chunk": "^0.0.1", "luxon": "^3.6.1", "maplibre-gl": "^4.7.1", "mysql2": "^3.14.1", @@ -60,6 +60,7 @@ "reflect-metadata": "^0.2.2", "remark-gfm": "^4.0.1", "sharp": "^0.34.5", + "stopword": "^3.1.5", "systeminformation": "^5.27.14", "tailwindcss": "^4.1.10", "tar": "^7.5.6", @@ -83,6 +84,7 @@ "@types/node": "^22.15.18", "@types/react": "^19.1.8", "@types/react-dom": "^19.1.6", + "@types/stopword": "^2.0.3", "eslint": "^9.26.0", "hot-hook": "^0.4.0", "prettier": "^3.5.3", @@ -1230,6 +1232,21 @@ "dev": true, "license": "Apache-2.0" }, + "node_modules/@chonkiejs/chunk": { + "version": "0.9.3", + "resolved": "https://registry.npmjs.org/@chonkiejs/chunk/-/chunk-0.9.3.tgz", + "integrity": "sha512-uUOeoFGY3s6kzAoKskI50weZN0zvW3oLwUijA1uX7Wxuy9yZStF2IvGuXRigMgP2g/L85lsotYGkjpBMLjQnrg==", + "license": "MIT OR Apache-2.0" + }, + "node_modules/@chonkiejs/core": { + "version": "0.0.7", + "resolved": "https://registry.npmjs.org/@chonkiejs/core/-/core-0.0.7.tgz", + "integrity": "sha512-R17OW9TT1x7B6lDKTCaMd6NluAObleN/cCQtUbMK2UcFOguJtQz/cL0n1t0AzJWBFMVgYP8EcqTFn/fcKhzPiA==", + "license": "MIT", + "dependencies": { + "@chonkiejs/chunk": "^0.9.3" + } + }, "node_modules/@colors/colors": { "version": "1.5.0", "resolved": "https://registry.npmjs.org/@colors/colors/-/colors-1.5.0.tgz", @@ -5084,6 +5101,13 @@ "dev": true, "license": "MIT" }, + "node_modules/@types/stopword": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/@types/stopword/-/stopword-2.0.3.tgz", + "integrity": "sha512-hioMj0lOvISM+EDevf7ijG8EMbU+J3pj4SstCyfQC1t39uPYpAe7beSfBdU6c1d9jeECTQQtR3UJWtVoUO8Weg==", + "dev": true, + "license": "MIT" + }, "node_modules/@types/supercluster": { "version": "7.1.3", "resolved": "https://registry.npmjs.org/@types/supercluster/-/supercluster-7.1.3.tgz", @@ -9866,12 +9890,6 @@ "url": "https://opencollective.com/parcel" } }, - "node_modules/llm-chunk": { - "version": "0.0.1", - "resolved": "https://registry.npmjs.org/llm-chunk/-/llm-chunk-0.0.1.tgz", - "integrity": "sha512-n9fHgsSiJb7vXZiC5c4XV6rme+tC7WX/cWH6EJvPPmMOMwOZ9xdg/U9LY5Qhmixd3K1PdRB0FVOdzoJF2HUZbg==", - "license": "MIT" - }, "node_modules/locate-path": { "version": "7.2.0", "resolved": "https://registry.npmjs.org/locate-path/-/locate-path-7.2.0.tgz", @@ -13619,6 +13637,12 @@ "node": ">= 0.8" } }, + "node_modules/stopword": { + "version": "3.1.5", + "resolved": "https://registry.npmjs.org/stopword/-/stopword-3.1.5.tgz", + "integrity": "sha512-OgLYGVFCNa430WOrj9tYZhQge5yg6vd6JsKredveAqEhdLVQkfrpnQIGjx0L9lLqzL4Kq4J8yNTcfQR/MpBwhg==", + "license": "MIT" + }, "node_modules/string_decoder": { "version": "1.3.0", "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.3.0.tgz", diff --git a/admin/package.json b/admin/package.json index e42690e..a7141a1 100644 --- a/admin/package.json +++ b/admin/package.json @@ -52,6 +52,7 @@ "@types/node": "^22.15.18", "@types/react": "^19.1.8", "@types/react-dom": "^19.1.6", + "@types/stopword": "^2.0.3", "eslint": "^9.26.0", "hot-hook": "^0.4.0", "prettier": "^3.5.3", @@ -71,6 +72,7 @@ "@adonisjs/transmit": "^2.0.2", "@adonisjs/transmit-client": "^1.0.0", "@adonisjs/vite": "^4.0.0", + "@chonkiejs/core": "^0.0.7", "@headlessui/react": "^2.2.4", "@inertiajs/react": "^2.0.13", "@markdoc/markdoc": "^0.5.2", @@ -93,7 +95,6 @@ "dockerode": "^4.0.7", "edge.js": "^6.2.1", "fast-xml-parser": "^5.2.5", - "llm-chunk": "^0.0.1", "luxon": "^3.6.1", "maplibre-gl": "^4.7.1", "mysql2": "^3.14.1", @@ -111,6 +112,7 @@ "reflect-metadata": "^0.2.2", "remark-gfm": "^4.0.1", "sharp": "^0.34.5", + "stopword": "^3.1.5", "systeminformation": "^5.27.14", "tailwindcss": "^4.1.10", "tar": "^7.5.6", diff --git a/admin/start/routes.ts b/admin/start/routes.ts index d88ae14..0a552d2 100644 --- a/admin/start/routes.ts +++ b/admin/start/routes.ts @@ -119,6 +119,7 @@ router .group(() => { router.post('/upload', [RagController, 'upload']) router.get('/files', [RagController, 'getStoredFiles']) + router.get('/job-status', [RagController, 'getJobStatus']) }) .prefix('/api/rag')