feat(RAG): initial beta with preprocessing, embedding, semantic retrieval, and ctx passage

2026-04-03 23:36:17 +02:00 · 2026-02-01 23:59:21 +00:00 · 2026-02-01 23:59:21 +00:00 · d1f40663d3
commit d1f40663d3
parent 1923cd4cde
10 changed files with 612 additions and 60 deletions
--- a/admin/app/controllers/ollama_controller.ts
+++ b/admin/app/controllers/ollama_controller.ts
@ -41,16 +41,17 @@ export default class OllamaController {
    if (lastUserMessage) {
      // Search for relevant context in the knowledge base
      // Using lower threshold (0.3) with improved hybrid search
      const relevantDocs = await this.ragService.searchSimilarDocuments(
        lastUserMessage.content,
        5, // Retrieve top 5 most relevant chunks
-        0.7 // Minimum similarity score of 0.7
+        0.3 // Minimum similarity score of 0.3 (lowered from 0.7 for better recall)
      )
      // If relevant context is found, inject as a system message
      if (relevantDocs.length > 0) {
        const contextText = relevantDocs
-          .map((doc, idx) => `[Context ${idx + 1}]\n${doc.text}`)
+          .map((doc, idx) => `[Context ${idx + 1}] (Relevance: ${(doc.score * 100).toFixed(1)}%)\n${doc.text}`)
          .join('\n\n')
        const systemMessage = {
--- a/admin/app/controllers/rag_controller.ts
+++ b/admin/app/controllers/rag_controller.ts
@ -1,9 +1,12 @@
 import { RagService } from '#services/rag_service'
 import { EmbedFileJob } from '#jobs/embed_file_job'
 import { inject } from '@adonisjs/core'
 import type { HttpContext } from '@adonisjs/core/http'
 import app from '@adonisjs/core/services/app'
 import { randomBytes } from 'node:crypto'
 import { sanitizeFilename } from '../utils/fs.js'
 import { stat } from 'node:fs/promises'
 import { getJobStatusSchema } from '#validators/rag'
@inject()
 export default class RagController {
@ -19,19 +22,48 @@ export default class RagController {
    const sanitizedName = sanitizeFilename(uploadedFile.clientName)
    const fileName = `${sanitizedName}-${randomSuffix}.${uploadedFile.extname || 'txt'}`
-    const fullPath = app.makePath('storage/uploads', fileName)
+    const fullPath = app.makePath(RagService.UPLOADS_STORAGE_PATH, fileName)
-    await uploadedFile.move(app.makePath('storage/uploads'), {
+    await uploadedFile.move(app.makePath(RagService.UPLOADS_STORAGE_PATH), {
      name: fileName,
    })
-    // Don't await this - process in background
+    // Get file size for tracking
-    this.ragService.processAndEmbedFile(fullPath)
+    let fileSize: number | undefined = undefined
    try {
      const stats = await stat(fullPath)
      fileSize = stats.size
    } catch (error) {
      // Not critical if we can't get file size, just swallow the error
    }
-    return response.status(200).json({
+    // Dispatch background job for embedding
-      message: 'File has been uploaded and queued for processing.',
+    const result = await EmbedFileJob.dispatch({
-      file_path: `/uploads/${fileName}`,
+      filePath: fullPath,
      fileName,
      fileSize,
    })
    return response.status(202).json({
      message: result.message,
      jobId: result.jobId,
      fileName,
      filePath: `/${RagService.UPLOADS_STORAGE_PATH}/${fileName}`,
      alreadyProcessing: !result.created,
    })
  }
  public async getJobStatus({ request, response }: HttpContext) {
    const reqData = await request.validateUsing(getJobStatusSchema)
    const fullPath = app.makePath(RagService.UPLOADS_STORAGE_PATH, reqData.filePath)
    const status = await EmbedFileJob.getStatus(fullPath)
    if (!status.exists) {
      return response.status(404).json({ error: 'Job not found for this file' })
    }
    return response.status(200).json(status)
  }
  public async getStoredFiles({ response }: HttpContext) {
--- a/admin/app/jobs/embed_file_job.ts
+++ b/admin/app/jobs/embed_file_job.ts
@ -0,0 +1,161 @@
 import { Job } from 'bullmq'
 import { QueueService } from '#services/queue_service'
 import { RagService } from '#services/rag_service'
 import { DockerService } from '#services/docker_service'
 import { OllamaService } from '#services/ollama_service'
 import { createHash } from 'crypto'
 import logger from '@adonisjs/core/services/logger'
 export interface EmbedFileJobParams {
  filePath: string
  fileName: string
  fileSize?: number
 }
 export class EmbedFileJob {
  static get queue() {
    return 'file-embeddings'
  }
  static get key() {
    return 'embed-file'
  }
  static getJobId(filePath: string): string {
    return createHash('sha256').update(filePath).digest('hex').slice(0, 16)
  }
  async handle(job: Job) {
    const { filePath, fileName } = job.data as EmbedFileJobParams
    logger.info(`[EmbedFileJob] Starting embedding process for: ${fileName}`)
    const dockerService = new DockerService()
    const ollamaService = new OllamaService()
    const ragService = new RagService(dockerService, ollamaService)
    try {
      // Update progress starting
      await job.updateProgress(0)
      await job.updateData({
        ...job.data,
        status: 'processing',
        startedAt: Date.now(),
      })
      logger.info(`[EmbedFileJob] Processing file: ${filePath}`)
      // Process and embed the file
      const result = await ragService.processAndEmbedFile(filePath)
      if (!result.success) {
        logger.error(`[EmbedFileJob] Failed to process file ${fileName}: ${result.message}`)
        throw new Error(result.message)
      }
      // Update progress complete
      await job.updateProgress(100)
      await job.updateData({
        ...job.data,
        status: 'completed',
        completedAt: Date.now(),
        chunks: result.chunks,
      })
      logger.info(
        `[EmbedFileJob] Successfully embedded ${result.chunks} chunks from file: ${fileName}`
      )
      return {
        success: true,
        fileName,
        filePath,
        chunks: result.chunks,
        message: `Successfully embedded ${result.chunks} chunks`,
      }
    } catch (error) {
      logger.error(`[EmbedFileJob] Error embedding file ${fileName}:`, error)
      await job.updateData({
        ...job.data,
        status: 'failed',
        failedAt: Date.now(),
        error: error instanceof Error ? error.message : 'Unknown error',
      })
      throw error
    }
  }
  static async getByFilePath(filePath: string): Promise<Job | undefined> {
    const queueService = new QueueService()
    const queue = queueService.getQueue(this.queue)
    const jobId = this.getJobId(filePath)
    return await queue.getJob(jobId)
  }
  static async dispatch(params: EmbedFileJobParams) {
    const queueService = new QueueService()
    const queue = queueService.getQueue(this.queue)
    const jobId = this.getJobId(params.filePath)
    try {
      const job = await queue.add(this.key, params, {
        jobId,
        attempts: 3,
        backoff: {
          type: 'exponential',
          delay: 5000, // Delay 5 seconds before retrying
        },
        removeOnComplete: { count: 50 }, // Keep last 50 completed jobs for history
        removeOnFail: { count: 20 } // Keep last 20 failed jobs for debugging
      })
      logger.info(`[EmbedFileJob] Dispatched embedding job for file: ${params.fileName}`)
      return {
        job,
        created: true,
        jobId,
        message: `File queued for embedding: ${params.fileName}`,
      }
    } catch (error) {
      if (error.message && error.message.includes('job already exists')) {
        const existing = await queue.getJob(jobId)
        logger.info(`[EmbedFileJob] Job already exists for file: ${params.fileName}`)
        return {
          job: existing,
          created: false,
          jobId,
          message: `Embedding job already exists for: ${params.fileName}`,
        }
      }
      throw error
    }
  }
  static async getStatus(filePath: string): Promise<{
    exists: boolean
    status?: string
    progress?: number
    chunks?: number
    error?: string
  }> {
    const job = await this.getByFilePath(filePath)
    if (!job) {
      return { exists: false }
    }
    const state = await job.getState()
    const data = job.data
    return {
      exists: true,
      status: data.status || state,
      progress: typeof job.progress === 'number' ? job.progress : undefined,
      chunks: data.chunks,
      error: data.error,
    }
  }
 }
--- a/admin/app/services/rag_service.ts
+++ b/admin/app/services/rag_service.ts
@ -2,28 +2,115 @@ import { QdrantClient } from '@qdrant/js-client-rest'
 import { DockerService } from './docker_service.js'
 import { inject } from '@adonisjs/core'
 import logger from '@adonisjs/core/services/logger'
-import { chunk } from 'llm-chunk'
+import { TokenChunker } from '@chonkiejs/core'
 import sharp from 'sharp'
-import { determineFileType, getFile } from '../utils/fs.js'
+import { deleteFileIfExists, determineFileType, getFile } from '../utils/fs.js'
 import { PDFParse } from 'pdf-parse'
 import { createWorker } from 'tesseract.js'
 import { fromBuffer } from 'pdf2pic'
 import { OllamaService } from './ollama_service.js'
 import { SERVICE_NAMES } from '../../constants/service_names.js'
 import { removeStopwords } from 'stopword'
 import { randomUUID } from 'node:crypto'
@inject()
 export class RagService {
  private qdrant: QdrantClient | null = null
  private qdrantInitPromise: Promise<void> | null = null
-  public static CONTENT_COLLECTION_NAME = 'open-webui_knowledge' // This is the collection name OWUI uses for uploaded knowledge
+  public static UPLOADS_STORAGE_PATH = 'storage/kb_uploads'
  public static CONTENT_COLLECTION_NAME = 'nomad_knowledge_base'
  public static EMBEDDING_MODEL = 'nomic-embed-text:v1.5'
  public static EMBEDDING_DIMENSION = 768 // Nomic Embed Text v1.5 dimension is 768
  public static MODEL_CONTEXT_LENGTH = 2048 // nomic-embed-text has 2K token context
  public static MAX_SAFE_TOKENS = 1800 // Leave buffer for prefix and tokenization variance
  public static TARGET_TOKENS_PER_CHUNK = 1700 // Target 1700 tokens per chunk for embedding
  public static PREFIX_TOKEN_BUDGET = 10 // Reserve ~10 tokens for prefixes
  public static CHAR_TO_TOKEN_RATIO = 3 // Approximate chars per token
  // Nomic Embed Text v1.5 uses task-specific prefixes for optimal performance
  public static SEARCH_DOCUMENT_PREFIX = 'search_document: '
  public static SEARCH_QUERY_PREFIX = 'search_query: '
  constructor(
    private dockerService: DockerService,
    private ollamaService: OllamaService
  ) {}
  /**
   * Estimates token count for text. This is a conservative approximation:
   * - English text: ~1 token per 3 characters
   * - Adds buffer for special characters and tokenization variance
   *
   * Note: This is approximate and realistic english
   * tokenization is ~4 chars/token, but we use 3 here to be safe.
   * Actual tokenization may differ, but being
   * conservative prevents context length errors.
   */
  private estimateTokenCount(text: string): number {
    // This accounts for special characters, numbers, and punctuation
    return Math.ceil(text.length / RagService.CHAR_TO_TOKEN_RATIO)
  }
  /**
   * Truncates text to fit within token limit, preserving word boundaries.
   * Ensures the text + prefix won't exceed the model's context window.
   */
  private truncateToTokenLimit(text: string, maxTokens: number): string {
    const estimatedTokens = this.estimateTokenCount(text)
    if (estimatedTokens <= maxTokens) {
      return text
    }
    // Calculate how many characters we can keep using our ratio
    const maxChars = Math.floor(maxTokens * RagService.CHAR_TO_TOKEN_RATIO)
    // Truncate at word boundary
    let truncated = text.substring(0, maxChars)
    const lastSpace = truncated.lastIndexOf(' ')
    if (lastSpace > maxChars * 0.8) {
      // If we found a space in the last 20%, use it
      truncated = truncated.substring(0, lastSpace)
    }
    logger.warn(
      `[RAG] Truncated text from ${text.length} to ${truncated.length} chars (est. ${estimatedTokens} → ${this.estimateTokenCount(truncated)} tokens)`
    )
    return truncated
  }
  /**
   * Preprocesses a query to improve retrieval by expanding it with context.
   * This helps match documents even when using different terminology.
   */
  private preprocessQuery(query: string): string {
    // Future: this is a placeholder for more advanced query expansion techniques.
    // For now, we simply trim whitespace. Improvements could include:
    // - Synonym expansion using a thesaurus
    // - Adding related terms based on domain knowledge
    // - Using a language model to rephrase or elaborate the query
    const expanded = query.trim()
    logger.debug(`[RAG] Original query: "${query}"`)
    logger.debug(`[RAG] Preprocessed query: "${expanded}"`)
    return expanded
  }
  /**
   * Extract keywords from query for hybrid search
   */
  private extractKeywords(query: string): string[] {
    const split = query.split(' ')
    const noStopWords = removeStopwords(split)
    // Future: This is basic normalization, could be improved with stemming/lemmatization later
    const keywords = noStopWords
      .map((word) => word.replace(/[^\w]/g, '').toLowerCase())
      .filter((word) => word.length > 2)
    return [...new Set(keywords)]
  }
  private async _initializeQdrantClient() {
    if (!this.qdrantInitPromise) {
      this.qdrantInitPromise = (async () => {
@ -84,43 +171,87 @@ export class RagService {
        throw new Error(`${RagService.EMBEDDING_MODEL} does not exist and could not be downloaded.`)
      }
-      const chunks = chunk(text, {
+      // TokenChunker uses character-based tokenization (1 char = 1 token)
-        // These settings should provide a good balance between context and precision
+      // We need to convert our embedding model's token counts to character counts
-        minLength: 512,
+      // since nomic-embed-text tokenizer uses ~3 chars per token
-        maxLength: 1024,
+      const targetCharsPerChunk = Math.floor(RagService.TARGET_TOKENS_PER_CHUNK * RagService.CHAR_TO_TOKEN_RATIO)
-        overlap: 200,
+      const overlapChars = Math.floor(150 * RagService.CHAR_TO_TOKEN_RATIO)
      const chunker = await TokenChunker.create({
        chunkSize: targetCharsPerChunk,
        chunkOverlap: overlapChars,
      })
-      if (!chunks || chunks.length === 0) {
+      const chunkResults = await chunker.chunk(text)
      if (!chunkResults || chunkResults.length === 0) {
        throw new Error('No text chunks generated for embedding.')
      }
-      const embeddings: number[][] = []
+      // Extract text from chunk results
      const chunks = chunkResults.map((chunk) => chunk.text)
      const ollamaClient = await this.ollamaService.getClient()
-      for (const chunkText of chunks) {
+
      const embeddings: number[][] = []
      for (let i = 0; i < chunks.length; i++) {
        let chunkText = chunks[i]
        // Final safety check: ensure chunk + prefix fits
        const prefixText = RagService.SEARCH_DOCUMENT_PREFIX
        const withPrefix = prefixText + chunkText
        const estimatedTokens = this.estimateTokenCount(withPrefix)
        if (estimatedTokens > RagService.MAX_SAFE_TOKENS) {
          // This should be rare - log for debugging if it's occurring frequently
          const prefixTokens = this.estimateTokenCount(prefixText)
          const maxTokensForText = RagService.MAX_SAFE_TOKENS - prefixTokens
          logger.warn(
            `[RAG] Chunk ${i} estimated at ${estimatedTokens} tokens (${chunkText.length} chars), truncating to ${maxTokensForText} tokens`
          )
          chunkText = this.truncateToTokenLimit(chunkText, maxTokensForText)
        }
        logger.debug(`[RAG] Generating embedding for chunk ${i + 1}/${chunks.length}`)
        const response = await ollamaClient.embeddings({
          model: RagService.EMBEDDING_MODEL,
-          prompt: chunkText,
+          prompt: RagService.SEARCH_DOCUMENT_PREFIX + chunkText,
        })
        embeddings.push(response.embedding)
      }
-      const points = chunks.map((chunkText, index) => ({
+      const timestamp = Date.now()
-        id: `${Date.now()}_${index}`,
+      const points = chunks.map((chunkText, index) => {
-        vector: embeddings[index],
+        // Extract keywords for hybrid search
-        payload: {
+        const keywords = this.extractKeywords(chunkText)
-          ...metadata,
+        logger.debug(`[RAG] Extracted keywords for chunk ${index}: [${keywords.join(', ')}]`)
-          text: chunkText,
+        return {
-          chunk_index: index,
+          id: randomUUID(), // qdrant requires either uuid or unsigned int
-        },
+          vector: embeddings[index],
-      }))
+          payload: {
            ...metadata,
            text: chunkText,
            chunk_index: index,
            total_chunks: chunks.length,
            keywords: keywords.join(' '), // Store as space-separated string for text search
            char_count: chunkText.length,
            created_at: timestamp,
            source: metadata.source || 'unknown'
          },
        }
      })
      await this.qdrant!.upsert(RagService.CONTENT_COLLECTION_NAME, { points })
      logger.debug(`[RAG] Successfully embedded and stored ${chunks.length} chunks`)
      logger.debug(`[RAG] First chunk preview: "${chunks[0].substring(0, 100)}..."`)
      return { chunks: chunks.length }
    } catch (error) {
-      logger.error('Error embedding text:', error)
+      console.error(error)
      logger.error('[RAG] Error embedding text:', error)
      return null
    }
  }
@ -195,7 +326,7 @@ export class RagService {
   * This includes text extraction, chunking, embedding, and storing in Qdrant.
   */
  public async processAndEmbedFile(
-    filepath: string
+    filepath: string // Should already be the full path to the uploaded file
  ): Promise<{ success: boolean; message: string; chunks?: number }> {
    try {
      const fileType = determineFileType(filepath)
@ -233,7 +364,13 @@ export class RagService {
        return { success: false, message: 'No text could be extracted from the file.' }
      }
-      const embedResult = await this.embedAndStoreText(extractedText, {})
+      const embedResult = await this.embedAndStoreText(extractedText, {
        source: filepath
      })
      // Cleanup the file from disk
      logger.info(`[RAG] Embedding complete, deleting uploaded file: ${filepath}`)
      await deleteFileIfExists(filepath)
      return {
        success: true,
@ -248,60 +385,230 @@ export class RagService {
  /**
   * Search for documents similar to the query text in the Qdrant knowledge base.
-   * Returns the most relevant text chunks based on semantic similarity.
+   * Uses a hybrid approach combining semantic similarity and keyword matching.
   * Implements adaptive thresholds and result reranking for optimal retrieval.
   * @param query - The search query text
   * @param limit - Maximum number of results to return (default: 5)
-   * @param scoreThreshold - Minimum similarity score threshold (default: 0.7)
+   * @param scoreThreshold - Minimum similarity score threshold (default: 0.3, much lower than before)
   * @returns Array of relevant text chunks with their scores
   */
  public async searchSimilarDocuments(
    query: string,
    limit: number = 5,
-    scoreThreshold: number = 0.7
+    scoreThreshold: number = 0.3 // Lower default threshold - was 0.7, now 0.3
-  ): Promise<Array<{ text: string; score: number }>> {
+  ): Promise<Array<{ text: string; score: number; metadata?: Record<string, any> }>> {
    try {
      logger.debug(`[RAG] Starting similarity search for query: "${query}"`)
      await this._ensureCollection(
        RagService.CONTENT_COLLECTION_NAME,
        RagService.EMBEDDING_DIMENSION
      )
      // Check if collection has any points
      const collectionInfo = await this.qdrant!.getCollection(RagService.CONTENT_COLLECTION_NAME)
      const pointCount = collectionInfo.points_count || 0
      logger.debug(`[RAG] Knowledge base contains ${pointCount} document chunks`)
      if (pointCount === 0) {
        logger.debug('[RAG] Knowledge base is empty. Could not perform search.')
        return []
      }
      const allModels = await this.ollamaService.getModels(true)
      const embeddingModel = allModels.find((model) => model.name === RagService.EMBEDDING_MODEL)
      if (!embeddingModel) {
        logger.warn(
-          `${RagService.EMBEDDING_MODEL} not found. Cannot perform similarity search.`
+          `[RAG] ${RagService.EMBEDDING_MODEL} not found. Cannot perform similarity search.`
        )
        return []
      }
-      // Generate embedding for the query
+      // Preprocess query for better matching
      const processedQuery = this.preprocessQuery(query)
      const keywords = this.extractKeywords(processedQuery)
      logger.debug(`[RAG] Extracted keywords: [${keywords.join(', ')}]`)
      // Generate embedding for the query with search_query prefix
      const ollamaClient = await this.ollamaService.getClient()
      // Ensure query doesn't exceed token limit
      const prefixTokens = this.estimateTokenCount(RagService.SEARCH_QUERY_PREFIX)
      const maxQueryTokens = RagService.MAX_SAFE_TOKENS - prefixTokens
      const truncatedQuery = this.truncateToTokenLimit(processedQuery, maxQueryTokens)
      const prefixedQuery = RagService.SEARCH_QUERY_PREFIX + truncatedQuery
      logger.debug(`[RAG] Generating embedding with prefix: "${RagService.SEARCH_QUERY_PREFIX}"`)
      // Validate final token count
      const queryTokenCount = this.estimateTokenCount(prefixedQuery)
      if (queryTokenCount > RagService.MAX_SAFE_TOKENS) {
        logger.error(
          `[RAG] Query too long even after truncation: ${queryTokenCount} tokens (max: ${RagService.MAX_SAFE_TOKENS})`
        )
        return []
      }
      const response = await ollamaClient.embeddings({
        model: RagService.EMBEDDING_MODEL,
-        prompt: query,
+        prompt: prefixedQuery,
      })
-      // Search for similar vectors in Qdrant
+      // Perform semantic search with a higher limit to enable reranking
      const searchLimit = limit * 3 // Get more results for reranking
      logger.debug(
        `[RAG] Searching for top ${searchLimit} semantic matches (threshold: ${scoreThreshold})`
      )
      const searchResults = await this.qdrant!.search(RagService.CONTENT_COLLECTION_NAME, {
        vector: response.embedding,
-        limit: limit,
+        limit: searchLimit,
        score_threshold: scoreThreshold,
        with_payload: true,
      })
-      console.log("Got search results:", searchResults);
+      logger.debug(`[RAG] Found ${searchResults.length} results above threshold ${scoreThreshold}`)
-      return searchResults.map((result) => ({
+      // Map results with metadata for reranking
      const resultsWithMetadata = searchResults.map((result) => ({
        text: (result.payload?.text as string) || '',
        score: result.score,
        keywords: (result.payload?.keywords as string) || '',
        chunk_index: (result.payload?.chunk_index as number) || 0,
        created_at: (result.payload?.created_at as number) || 0,
      }))
      const rerankedResults = this.rerankResults(resultsWithMetadata, keywords, query)
      logger.debug(`[RAG] Top 3 results after reranking:`)
      rerankedResults.slice(0, 3).forEach((result, idx) => {
        logger.debug(
          `[RAG]   ${idx + 1}. Score: ${result.finalScore.toFixed(4)} (semantic: ${result.score.toFixed(4)}) - "${result.text.substring(0, 100)}..."`
        )
      })
      // Return top N results
      return rerankedResults.slice(0, limit).map((result) => ({
        text: result.text,
        score: result.finalScore,
        metadata: {
          chunk_index: result.chunk_index,
          created_at: result.created_at,
          semantic_score: result.score,
        },
      }))
    } catch (error) {
-      logger.error('Error searching similar documents:', error)
+      logger.error('[RAG] Error searching similar documents:', error)
      return []
    }
  }
  /**
   * Rerank search results using hybrid scoring that combines:
   * 1. Semantic similarity score (primary signal)
   * 2. Keyword overlap bonus (conservative, quality-gated)
   * 3. Direct term matches (conservative)
   *
   * Tries to boost only already-relevant results, not promote
   * low-quality results just because they have keyword matches.
   *
   * Future: this is a decent feature-based approach, but we could
   * switch to a python-based reranker in the future if the benefits
   * outweigh the overhead.
   */
  private rerankResults(
    results: Array<{
      text: string
      score: number
      keywords: string
      chunk_index: number
      created_at: number
    }>,
    queryKeywords: string[],
    originalQuery: string
  ): Array<{
    text: string
    score: number
    finalScore: number
    chunk_index: number
    created_at: number
  }> {
    return results
      .map((result) => {
        let finalScore = result.score
        // Quality gate: Only apply boosts if semantic score is reasonable
        // Try to prevent promoting irrelevant results that just happen to have keyword matches
        const MIN_SEMANTIC_THRESHOLD = 0.35
        if (result.score < MIN_SEMANTIC_THRESHOLD) {
          // For low-scoring results, use semantic score as-is
          // This prevents false positives from keyword gaming
          logger.debug(
            `[RAG] Skipping boost for low semantic score: ${result.score.toFixed(3)} (threshold: ${MIN_SEMANTIC_THRESHOLD})`
          )
          return {
            ...result,
            finalScore,
          }
        }
        // Boost score based on keyword overlap (diminishing returns - overlap goes down, so does boost)
        const docKeywords = result.keywords
          .toLowerCase()
          .split(' ')
          .filter((k) => k.length > 0)
        const matchingKeywords = queryKeywords.filter(
          (kw) =>
            docKeywords.includes(kw.toLowerCase()) ||
            result.text.toLowerCase().includes(kw.toLowerCase())
        )
        const keywordOverlap = matchingKeywords.length / Math.max(queryKeywords.length, 1)
        // Use square root for diminishing returns: 100% overlap = sqrt(1.0) = 1.0, 25% = 0.5
        // Then scale conservatively (max 10% boost instead of 20%)
        const keywordBoost = Math.sqrt(keywordOverlap) * 0.1 * result.score
        if (keywordOverlap > 0) {
          logger.debug(
            `[RAG] Keyword overlap: ${matchingKeywords.length}/${queryKeywords.length} - Boost: ${keywordBoost.toFixed(3)}`
          )
        }
        // Boost if original query terms appear in text (case-insensitive)
        // Scale boost proportionally to base score to avoid over-promoting weak matches
        const queryTerms = originalQuery
          .toLowerCase()
          .split(/\s+/)
          .filter((t) => t.length > 3)
        const directMatches = queryTerms.filter((term) =>
          result.text.toLowerCase().includes(term)
        ).length
        if (queryTerms.length > 0) {
          const directMatchRatio = directMatches / queryTerms.length
          // Conservative boost: max 7.5% of the base score
          const directMatchBoost = Math.sqrt(directMatchRatio) * 0.075 * result.score
          if (directMatches > 0) {
            logger.debug(
              `[RAG] Direct term matches: ${directMatches}/${queryTerms.length} - Boost: ${directMatchBoost.toFixed(3)}`
            )
            finalScore += directMatchBoost
          }
        }
        finalScore = Math.min(1.0, finalScore + keywordBoost)
        return {
          ...result,
          finalScore,
        }
      })
      .sort((a, b) => b.finalScore - a.finalScore)
  }
  /**
   * Retrieve all unique source files that have been stored in the knowledge base.
   * @returns Array of unique source file identifiers
@ -328,9 +635,8 @@ export class RagService {
        // Extract unique source values from payloads
        scrollResult.points.forEach((point) => {
-          const metadata = point.payload?.metadata
+          const source = point.payload?.source
-          if (metadata && typeof metadata === 'object' && 'source' in metadata) {
+          if (source && typeof source === 'string') {
            const source = metadata.source as string
            sources.add(source)
          }
        })
@ -338,7 +644,13 @@ export class RagService {
        offset = scrollResult.next_page_offset || null
      } while (offset !== null)
-      return Array.from(sources)
+      const sourcesArr = Array.from(sources)
      // The source is a full path - only extract the filename for display
      return sourcesArr.map((src) => {
        const parts = src.split(/[/\\]/)
        return parts[parts.length - 1] // Return the last part as filename
      })
    } catch (error) {
      logger.error('Error retrieving stored files:', error)
      return []
--- a/admin/app/validators/rag.ts
+++ b/admin/app/validators/rag.ts
@ -0,0 +1,7 @@
 import vine from '@vinejs/vine'
 export const getJobStatusSchema = vine.compile(
  vine.object({
    filePath: vine.string(),
  })
 )
--- a/admin/commands/queue/work.ts
+++ b/admin/commands/queue/work.ts
@ -5,6 +5,7 @@ import queueConfig from '#config/queue'
 import { RunDownloadJob } from '#jobs/run_download_job'
 import { DownloadModelJob } from '#jobs/download_model_job'
 import { RunBenchmarkJob } from '#jobs/run_benchmark_job'
 import { EmbedFileJob } from '#jobs/embed_file_job'
 export default class QueueWork extends BaseCommand {
  static commandName = 'queue:work'
@ -90,10 +91,12 @@ export default class QueueWork extends BaseCommand {
    handlers.set(RunDownloadJob.key, new RunDownloadJob())
    handlers.set(DownloadModelJob.key, new DownloadModelJob())
    handlers.set(RunBenchmarkJob.key, new RunBenchmarkJob())
    handlers.set(EmbedFileJob.key, new EmbedFileJob())
    queues.set(RunDownloadJob.key, RunDownloadJob.queue)
    queues.set(DownloadModelJob.key, DownloadModelJob.queue)
    queues.set(RunBenchmarkJob.key, RunBenchmarkJob.queue)
    queues.set(EmbedFileJob.key, EmbedFileJob.queue)
    return [handlers, queues]
  }
@ -107,6 +110,7 @@ export default class QueueWork extends BaseCommand {
      [RunDownloadJob.queue]: 3,
      [DownloadModelJob.queue]: 2, // Lower concurrency for resource-intensive model downloads
      [RunBenchmarkJob.queue]: 1, // Run benchmarks one at a time for accurate results
      [EmbedFileJob.queue]: 2, // Lower concurrency for embedding jobs, can be resource intensive
      default: 3,
    }
--- a/admin/constants/ollama.ts
+++ b/admin/constants/ollama.ts
@ -66,12 +66,20 @@ export const SYSTEM_PROMPTS = {
 - Use tables when presenting structured data.
 `,
  rag_context: (context: string) => `
-You have access to the following relevant information from the knowledge base. Use this context to provide accurate and informed responses when relevant:
+You have access to relevant information from the knowledge base. This context has been retrieved based on semantic similarity to the user's question.
-[Context]
+[Knowledge Base Context]
 ${context}
-If the user's question is related to this context, incorporate it into your response. Otherwise, respond normally.
+IMPORTANT INSTRUCTIONS:
 1. If the user's question is directly related to the context above, use this information to provide accurate, detailed answers.
 2. Always cite or reference the context when using it (e.g., "According to the information available..." or "Based on the knowledge base...").
 3. If the context is only partially relevant, combine it with your general knowledge but be clear about what comes from the knowledge base.
 4. If the context is not relevant to the user's question, you can respond using your general knowledge without forcing the context into your answer.
 5. Never fabricate information that isn't in the context or your training data.
 6. If you're unsure or the context doesn't contain enough information, acknowledge the limitations.
 Format your response using markdown for readability.
 `,
  chat_suggestions: `
 You are a helpful assistant that generates conversation starter suggestions for a survivalist/prepper using an AI assistant.
--- a/admin/package-lock.json
+++ b/admin/package-lock.json
@ -20,6 +20,7 @@
        "@adonisjs/transmit": "^2.0.2",
        "@adonisjs/transmit-client": "^1.0.0",
        "@adonisjs/vite": "^4.0.0",
        "@chonkiejs/core": "^0.0.7",
        "@headlessui/react": "^2.2.4",
        "@inertiajs/react": "^2.0.13",
        "@markdoc/markdoc": "^0.5.2",
@ -42,7 +43,6 @@
        "dockerode": "^4.0.7",
        "edge.js": "^6.2.1",
        "fast-xml-parser": "^5.2.5",
        "llm-chunk": "^0.0.1",
        "luxon": "^3.6.1",
        "maplibre-gl": "^4.7.1",
        "mysql2": "^3.14.1",
@ -60,6 +60,7 @@
        "reflect-metadata": "^0.2.2",
        "remark-gfm": "^4.0.1",
        "sharp": "^0.34.5",
        "stopword": "^3.1.5",
        "systeminformation": "^5.27.14",
        "tailwindcss": "^4.1.10",
        "tar": "^7.5.6",
@ -83,6 +84,7 @@
        "@types/node": "^22.15.18",
        "@types/react": "^19.1.8",
        "@types/react-dom": "^19.1.6",
        "@types/stopword": "^2.0.3",
        "eslint": "^9.26.0",
        "hot-hook": "^0.4.0",
        "prettier": "^3.5.3",
@ -1230,6 +1232,21 @@
      "dev": true,
      "license": "Apache-2.0"
    },
    "node_modules/@chonkiejs/chunk": {
      "version": "0.9.3",
      "resolved": "https://registry.npmjs.org/@chonkiejs/chunk/-/chunk-0.9.3.tgz",
      "integrity": "sha512-uUOeoFGY3s6kzAoKskI50weZN0zvW3oLwUijA1uX7Wxuy9yZStF2IvGuXRigMgP2g/L85lsotYGkjpBMLjQnrg==",
      "license": "MIT OR Apache-2.0"
    },
    "node_modules/@chonkiejs/core": {
      "version": "0.0.7",
      "resolved": "https://registry.npmjs.org/@chonkiejs/core/-/core-0.0.7.tgz",
      "integrity": "sha512-R17OW9TT1x7B6lDKTCaMd6NluAObleN/cCQtUbMK2UcFOguJtQz/cL0n1t0AzJWBFMVgYP8EcqTFn/fcKhzPiA==",
      "license": "MIT",
      "dependencies": {
        "@chonkiejs/chunk": "^0.9.3"
      }
    },
    "node_modules/@colors/colors": {
      "version": "1.5.0",
      "resolved": "https://registry.npmjs.org/@colors/colors/-/colors-1.5.0.tgz",
@ -5084,6 +5101,13 @@
      "dev": true,
      "license": "MIT"
    },
    "node_modules/@types/stopword": {
      "version": "2.0.3",
      "resolved": "https://registry.npmjs.org/@types/stopword/-/stopword-2.0.3.tgz",
      "integrity": "sha512-hioMj0lOvISM+EDevf7ijG8EMbU+J3pj4SstCyfQC1t39uPYpAe7beSfBdU6c1d9jeECTQQtR3UJWtVoUO8Weg==",
      "dev": true,
      "license": "MIT"
    },
    "node_modules/@types/supercluster": {
      "version": "7.1.3",
      "resolved": "https://registry.npmjs.org/@types/supercluster/-/supercluster-7.1.3.tgz",
@ -9866,12 +9890,6 @@
        "url": "https://opencollective.com/parcel"
      }
    },
    "node_modules/llm-chunk": {
      "version": "0.0.1",
      "resolved": "https://registry.npmjs.org/llm-chunk/-/llm-chunk-0.0.1.tgz",
      "integrity": "sha512-n9fHgsSiJb7vXZiC5c4XV6rme+tC7WX/cWH6EJvPPmMOMwOZ9xdg/U9LY5Qhmixd3K1PdRB0FVOdzoJF2HUZbg==",
      "license": "MIT"
    },
    "node_modules/locate-path": {
      "version": "7.2.0",
      "resolved": "https://registry.npmjs.org/locate-path/-/locate-path-7.2.0.tgz",
@ -13619,6 +13637,12 @@
        "node": ">= 0.8"
      }
    },
    "node_modules/stopword": {
      "version": "3.1.5",
      "resolved": "https://registry.npmjs.org/stopword/-/stopword-3.1.5.tgz",
      "integrity": "sha512-OgLYGVFCNa430WOrj9tYZhQge5yg6vd6JsKredveAqEhdLVQkfrpnQIGjx0L9lLqzL4Kq4J8yNTcfQR/MpBwhg==",
      "license": "MIT"
    },
    "node_modules/string_decoder": {
      "version": "1.3.0",
      "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.3.0.tgz",
--- a/admin/package.json
+++ b/admin/package.json
@ -52,6 +52,7 @@
    "@types/node": "^22.15.18",
    "@types/react": "^19.1.8",
    "@types/react-dom": "^19.1.6",
    "@types/stopword": "^2.0.3",
    "eslint": "^9.26.0",
    "hot-hook": "^0.4.0",
    "prettier": "^3.5.3",
@ -71,6 +72,7 @@
    "@adonisjs/transmit": "^2.0.2",
    "@adonisjs/transmit-client": "^1.0.0",
    "@adonisjs/vite": "^4.0.0",
    "@chonkiejs/core": "^0.0.7",
    "@headlessui/react": "^2.2.4",
    "@inertiajs/react": "^2.0.13",
    "@markdoc/markdoc": "^0.5.2",
@ -93,7 +95,6 @@
    "dockerode": "^4.0.7",
    "edge.js": "^6.2.1",
    "fast-xml-parser": "^5.2.5",
    "llm-chunk": "^0.0.1",
    "luxon": "^3.6.1",
    "maplibre-gl": "^4.7.1",
    "mysql2": "^3.14.1",
@ -111,6 +112,7 @@
    "reflect-metadata": "^0.2.2",
    "remark-gfm": "^4.0.1",
    "sharp": "^0.34.5",
    "stopword": "^3.1.5",
    "systeminformation": "^5.27.14",
    "tailwindcss": "^4.1.10",
    "tar": "^7.5.6",
--- a/admin/start/routes.ts
+++ b/admin/start/routes.ts
@ -119,6 +119,7 @@ router
  .group(() => {
    router.post('/upload', [RagController, 'upload'])
    router.get('/files', [RagController, 'getStoredFiles'])
    router.get('/job-status', [RagController, 'getJobStatus'])
  })
  .prefix('/api/rag')