diff --git a/admin/app/controllers/ollama_controller.ts b/admin/app/controllers/ollama_controller.ts
index 0148672..311c5ff 100644
--- a/admin/app/controllers/ollama_controller.ts
+++ b/admin/app/controllers/ollama_controller.ts
@@ -41,16 +41,17 @@ export default class OllamaController {
 
     if (lastUserMessage) {
       // Search for relevant context in the knowledge base
+      // Using lower threshold (0.3) with improved hybrid search
       const relevantDocs = await this.ragService.searchSimilarDocuments(
         lastUserMessage.content,
         5, // Retrieve top 5 most relevant chunks
-        0.7 // Minimum similarity score of 0.7
+        0.3 // Minimum similarity score of 0.3 (lowered from 0.7 for better recall)
       )
 
       // If relevant context is found, inject as a system message
       if (relevantDocs.length > 0) {
         const contextText = relevantDocs
-          .map((doc, idx) => `[Context ${idx + 1}]\n${doc.text}`)
+          .map((doc, idx) => `[Context ${idx + 1}] (Relevance: ${(doc.score * 100).toFixed(1)}%)\n${doc.text}`)
           .join('\n\n')
 
         const systemMessage = {
diff --git a/admin/app/controllers/rag_controller.ts b/admin/app/controllers/rag_controller.ts
index 73d6906..7af9cc4 100644
--- a/admin/app/controllers/rag_controller.ts
+++ b/admin/app/controllers/rag_controller.ts
@@ -1,9 +1,12 @@
 import { RagService } from '#services/rag_service'
+import { EmbedFileJob } from '#jobs/embed_file_job'
 import { inject } from '@adonisjs/core'
 import type { HttpContext } from '@adonisjs/core/http'
 import app from '@adonisjs/core/services/app'
 import { randomBytes } from 'node:crypto'
 import { sanitizeFilename } from '../utils/fs.js'
+import { stat } from 'node:fs/promises'
+import { getJobStatusSchema } from '#validators/rag'
 
 @inject()
 export default class RagController {
@@ -19,19 +22,48 @@ export default class RagController {
     const sanitizedName = sanitizeFilename(uploadedFile.clientName)
 
     const fileName = `${sanitizedName}-${randomSuffix}.${uploadedFile.extname || 'txt'}`
-    const fullPath = app.makePath('storage/uploads', fileName)
+    const fullPath = app.makePath(RagService.UPLOADS_STORAGE_PATH, fileName)
 
-    await uploadedFile.move(app.makePath('storage/uploads'), {
+    await uploadedFile.move(app.makePath(RagService.UPLOADS_STORAGE_PATH), {
       name: fileName,
     })
 
-    // Don't await this - process in background
-    this.ragService.processAndEmbedFile(fullPath)
+    // Get file size for tracking
+    let fileSize: number | undefined = undefined
+    try {
+      const stats = await stat(fullPath)
+      fileSize = stats.size
+    } catch (error) {
+      // Not critical if we can't get file size, just swallow the error
+    }
 
-    return response.status(200).json({
-      message: 'File has been uploaded and queued for processing.',
-      file_path: `/uploads/${fileName}`,
+    // Dispatch background job for embedding
+    const result = await EmbedFileJob.dispatch({
+      filePath: fullPath,
+      fileName,
+      fileSize,
     })
+
+    return response.status(202).json({
+      message: result.message,
+      jobId: result.jobId,
+      fileName,
+      filePath: `/${RagService.UPLOADS_STORAGE_PATH}/${fileName}`,
+      alreadyProcessing: !result.created,
+    })
+  }
+
+  public async getJobStatus({ request, response }: HttpContext) {
+    const reqData = await request.validateUsing(getJobStatusSchema)
+
+    const fullPath = app.makePath(RagService.UPLOADS_STORAGE_PATH, reqData.filePath)
+    const status = await EmbedFileJob.getStatus(fullPath)
+
+    if (!status.exists) {
+      return response.status(404).json({ error: 'Job not found for this file' })
+    }
+
+    return response.status(200).json(status)
   }
 
   public async getStoredFiles({ response }: HttpContext) {
diff --git a/admin/app/jobs/embed_file_job.ts b/admin/app/jobs/embed_file_job.ts
new file mode 100644
index 0000000..f7cd6c8
--- /dev/null
+++ b/admin/app/jobs/embed_file_job.ts
@@ -0,0 +1,161 @@
+import { Job } from 'bullmq'
+import { QueueService } from '#services/queue_service'
+import { RagService } from '#services/rag_service'
+import { DockerService } from '#services/docker_service'
+import { OllamaService } from '#services/ollama_service'
+import { createHash } from 'crypto'
+import logger from '@adonisjs/core/services/logger'
+
+export interface EmbedFileJobParams {
+  filePath: string
+  fileName: string
+  fileSize?: number
+}
+
+export class EmbedFileJob {
+  static get queue() {
+    return 'file-embeddings'
+  }
+
+  static get key() {
+    return 'embed-file'
+  }
+
+  static getJobId(filePath: string): string {
+    return createHash('sha256').update(filePath).digest('hex').slice(0, 16)
+  }
+
+  async handle(job: Job) {
+    const { filePath, fileName } = job.data as EmbedFileJobParams
+
+    logger.info(`[EmbedFileJob] Starting embedding process for: ${fileName}`)
+
+    const dockerService = new DockerService()
+    const ollamaService = new OllamaService()
+    const ragService = new RagService(dockerService, ollamaService)
+
+    try {
+      // Update progress starting
+      await job.updateProgress(0)
+      await job.updateData({
+        ...job.data,
+        status: 'processing',
+        startedAt: Date.now(),
+      })
+
+      logger.info(`[EmbedFileJob] Processing file: ${filePath}`)
+
+      // Process and embed the file
+      const result = await ragService.processAndEmbedFile(filePath)
+
+      if (!result.success) {
+        logger.error(`[EmbedFileJob] Failed to process file ${fileName}: ${result.message}`)
+        throw new Error(result.message)
+      }
+
+      // Update progress complete
+      await job.updateProgress(100)
+      await job.updateData({
+        ...job.data,
+        status: 'completed',
+        completedAt: Date.now(),
+        chunks: result.chunks,
+      })
+
+      logger.info(
+        `[EmbedFileJob] Successfully embedded ${result.chunks} chunks from file: ${fileName}`
+      )
+
+      return {
+        success: true,
+        fileName,
+        filePath,
+        chunks: result.chunks,
+        message: `Successfully embedded ${result.chunks} chunks`,
+      }
+    } catch (error) {
+      logger.error(`[EmbedFileJob] Error embedding file ${fileName}:`, error)
+
+      await job.updateData({
+        ...job.data,
+        status: 'failed',
+        failedAt: Date.now(),
+        error: error instanceof Error ? error.message : 'Unknown error',
+      })
+
+      throw error
+    }
+  }
+
+  static async getByFilePath(filePath: string): Promise<Job | undefined> {
+    const queueService = new QueueService()
+    const queue = queueService.getQueue(this.queue)
+    const jobId = this.getJobId(filePath)
+    return await queue.getJob(jobId)
+  }
+
+  static async dispatch(params: EmbedFileJobParams) {
+    const queueService = new QueueService()
+    const queue = queueService.getQueue(this.queue)
+    const jobId = this.getJobId(params.filePath)
+
+    try {
+      const job = await queue.add(this.key, params, {
+        jobId,
+        attempts: 3,
+        backoff: {
+          type: 'exponential',
+          delay: 5000, // Delay 5 seconds before retrying
+        },
+        removeOnComplete: { count: 50 }, // Keep last 50 completed jobs for history
+        removeOnFail: { count: 20 } // Keep last 20 failed jobs for debugging
+      })
+
+      logger.info(`[EmbedFileJob] Dispatched embedding job for file: ${params.fileName}`)
+
+      return {
+        job,
+        created: true,
+        jobId,
+        message: `File queued for embedding: ${params.fileName}`,
+      }
+    } catch (error) {
+      if (error.message && error.message.includes('job already exists')) {
+        const existing = await queue.getJob(jobId)
+        logger.info(`[EmbedFileJob] Job already exists for file: ${params.fileName}`)
+        return {
+          job: existing,
+          created: false,
+          jobId,
+          message: `Embedding job already exists for: ${params.fileName}`,
+        }
+      }
+      throw error
+    }
+  }
+
+  static async getStatus(filePath: string): Promise<{
+    exists: boolean
+    status?: string
+    progress?: number
+    chunks?: number
+    error?: string
+  }> {
+    const job = await this.getByFilePath(filePath)
+
+    if (!job) {
+      return { exists: false }
+    }
+
+    const state = await job.getState()
+    const data = job.data
+
+    return {
+      exists: true,
+      status: data.status || state,
+      progress: typeof job.progress === 'number' ? job.progress : undefined,
+      chunks: data.chunks,
+      error: data.error,
+    }
+  }
+}
diff --git a/admin/app/services/rag_service.ts b/admin/app/services/rag_service.ts
index 7573fcd..b558eb1 100644
--- a/admin/app/services/rag_service.ts
+++ b/admin/app/services/rag_service.ts
@@ -2,28 +2,115 @@ import { QdrantClient } from '@qdrant/js-client-rest'
 import { DockerService } from './docker_service.js'
 import { inject } from '@adonisjs/core'
 import logger from '@adonisjs/core/services/logger'
-import { chunk } from 'llm-chunk'
+import { TokenChunker } from '@chonkiejs/core'
 import sharp from 'sharp'
-import { determineFileType, getFile } from '../utils/fs.js'
+import { deleteFileIfExists, determineFileType, getFile } from '../utils/fs.js'
 import { PDFParse } from 'pdf-parse'
 import { createWorker } from 'tesseract.js'
 import { fromBuffer } from 'pdf2pic'
 import { OllamaService } from './ollama_service.js'
 import { SERVICE_NAMES } from '../../constants/service_names.js'
+import { removeStopwords } from 'stopword'
+import { randomUUID } from 'node:crypto'
 
 @inject()
 export class RagService {
   private qdrant: QdrantClient | null = null
   private qdrantInitPromise: Promise<void> | null = null
-  public static CONTENT_COLLECTION_NAME = 'open-webui_knowledge' // This is the collection name OWUI uses for uploaded knowledge
+  public static UPLOADS_STORAGE_PATH = 'storage/kb_uploads'
+  public static CONTENT_COLLECTION_NAME = 'nomad_knowledge_base'
   public static EMBEDDING_MODEL = 'nomic-embed-text:v1.5'
   public static EMBEDDING_DIMENSION = 768 // Nomic Embed Text v1.5 dimension is 768
+  public static MODEL_CONTEXT_LENGTH = 2048 // nomic-embed-text has 2K token context
+  public static MAX_SAFE_TOKENS = 1800 // Leave buffer for prefix and tokenization variance
+  public static TARGET_TOKENS_PER_CHUNK = 1700 // Target 1700 tokens per chunk for embedding
+  public static PREFIX_TOKEN_BUDGET = 10 // Reserve ~10 tokens for prefixes
+  public static CHAR_TO_TOKEN_RATIO = 3 // Approximate chars per token
+  // Nomic Embed Text v1.5 uses task-specific prefixes for optimal performance
+  public static SEARCH_DOCUMENT_PREFIX = 'search_document: '
+  public static SEARCH_QUERY_PREFIX = 'search_query: '
 
   constructor(
     private dockerService: DockerService,
     private ollamaService: OllamaService
   ) {}
 
+  /**
+   * Estimates token count for text. This is a conservative approximation:
+   * - English text: ~1 token per 3 characters
+   * - Adds buffer for special characters and tokenization variance
+   *
+   * Note: This is approximate and realistic english
+   * tokenization is ~4 chars/token, but we use 3 here to be safe.
+   * Actual tokenization may differ, but being
+   * conservative prevents context length errors.
+   */
+  private estimateTokenCount(text: string): number {
+    // This accounts for special characters, numbers, and punctuation
+    return Math.ceil(text.length / RagService.CHAR_TO_TOKEN_RATIO)
+  }
+
+  /**
+   * Truncates text to fit within token limit, preserving word boundaries.
+   * Ensures the text + prefix won't exceed the model's context window.
+   */
+  private truncateToTokenLimit(text: string, maxTokens: number): string {
+    const estimatedTokens = this.estimateTokenCount(text)
+
+    if (estimatedTokens <= maxTokens) {
+      return text
+    }
+
+    // Calculate how many characters we can keep using our ratio
+    const maxChars = Math.floor(maxTokens * RagService.CHAR_TO_TOKEN_RATIO)
+
+    // Truncate at word boundary
+    let truncated = text.substring(0, maxChars)
+    const lastSpace = truncated.lastIndexOf(' ')
+
+    if (lastSpace > maxChars * 0.8) {
+      // If we found a space in the last 20%, use it
+      truncated = truncated.substring(0, lastSpace)
+    }
+
+    logger.warn(
+      `[RAG] Truncated text from ${text.length} to ${truncated.length} chars (est. ${estimatedTokens} → ${this.estimateTokenCount(truncated)} tokens)`
+    )
+
+    return truncated
+  }
+
+  /**
+   * Preprocesses a query to improve retrieval by expanding it with context.
+   * This helps match documents even when using different terminology.
+   */
+  private preprocessQuery(query: string): string {
+    // Future: this is a placeholder for more advanced query expansion techniques.
+    // For now, we simply trim whitespace. Improvements could include:
+    // - Synonym expansion using a thesaurus
+    // - Adding related terms based on domain knowledge
+    // - Using a language model to rephrase or elaborate the query
+    const expanded = query.trim()
+    logger.debug(`[RAG] Original query: "${query}"`)
+    logger.debug(`[RAG] Preprocessed query: "${expanded}"`)
+    return expanded
+  }
+
+  /**
+   * Extract keywords from query for hybrid search
+   */
+  private extractKeywords(query: string): string[] {
+    const split = query.split(' ')
+    const noStopWords = removeStopwords(split)
+
+    // Future: This is basic normalization, could be improved with stemming/lemmatization later
+    const keywords = noStopWords
+      .map((word) => word.replace(/[^\w]/g, '').toLowerCase())
+      .filter((word) => word.length > 2)
+
+    return [...new Set(keywords)]
+  }
+
   private async _initializeQdrantClient() {
     if (!this.qdrantInitPromise) {
       this.qdrantInitPromise = (async () => {
@@ -84,43 +171,87 @@ export class RagService {
         throw new Error(`${RagService.EMBEDDING_MODEL} does not exist and could not be downloaded.`)
       }
 
-      const chunks = chunk(text, {
-        // These settings should provide a good balance between context and precision
-        minLength: 512,
-        maxLength: 1024,
-        overlap: 200,
+      // TokenChunker uses character-based tokenization (1 char = 1 token)
+      // We need to convert our embedding model's token counts to character counts
+      // since nomic-embed-text tokenizer uses ~3 chars per token
+      const targetCharsPerChunk = Math.floor(RagService.TARGET_TOKENS_PER_CHUNK * RagService.CHAR_TO_TOKEN_RATIO)
+      const overlapChars = Math.floor(150 * RagService.CHAR_TO_TOKEN_RATIO)
+
+      const chunker = await TokenChunker.create({
+        chunkSize: targetCharsPerChunk,
+        chunkOverlap: overlapChars,
       })
 
-      if (!chunks || chunks.length === 0) {
+      const chunkResults = await chunker.chunk(text)
+
+      if (!chunkResults || chunkResults.length === 0) {
         throw new Error('No text chunks generated for embedding.')
       }
 
-      const embeddings: number[][] = []
+      // Extract text from chunk results
+      const chunks = chunkResults.map((chunk) => chunk.text)
+
       const ollamaClient = await this.ollamaService.getClient()
-      for (const chunkText of chunks) {
+
+      const embeddings: number[][] = []
+      for (let i = 0; i < chunks.length; i++) {
+        let chunkText = chunks[i]
+
+        // Final safety check: ensure chunk + prefix fits
+        const prefixText = RagService.SEARCH_DOCUMENT_PREFIX
+        const withPrefix = prefixText + chunkText
+        const estimatedTokens = this.estimateTokenCount(withPrefix)
+
+        if (estimatedTokens > RagService.MAX_SAFE_TOKENS) {
+          // This should be rare - log for debugging if it's occurring frequently
+          const prefixTokens = this.estimateTokenCount(prefixText)
+          const maxTokensForText = RagService.MAX_SAFE_TOKENS - prefixTokens
+          logger.warn(
+            `[RAG] Chunk ${i} estimated at ${estimatedTokens} tokens (${chunkText.length} chars), truncating to ${maxTokensForText} tokens`
+          )
+          chunkText = this.truncateToTokenLimit(chunkText, maxTokensForText)
+        }
+
+        logger.debug(`[RAG] Generating embedding for chunk ${i + 1}/${chunks.length}`)
+
         const response = await ollamaClient.embeddings({
           model: RagService.EMBEDDING_MODEL,
-          prompt: chunkText,
+          prompt: RagService.SEARCH_DOCUMENT_PREFIX + chunkText,
         })
 
         embeddings.push(response.embedding)
       }
 
-      const points = chunks.map((chunkText, index) => ({
-        id: `${Date.now()}_${index}`,
-        vector: embeddings[index],
-        payload: {
-          ...metadata,
-          text: chunkText,
-          chunk_index: index,
-        },
-      }))
+      const timestamp = Date.now()
+      const points = chunks.map((chunkText, index) => {
+        // Extract keywords for hybrid search
+        const keywords = this.extractKeywords(chunkText)
+        logger.debug(`[RAG] Extracted keywords for chunk ${index}: [${keywords.join(', ')}]`)
+        return {
+          id: randomUUID(), // qdrant requires either uuid or unsigned int
+          vector: embeddings[index],
+          payload: {
+            ...metadata,
+            text: chunkText,
+            chunk_index: index,
+            total_chunks: chunks.length,
+            keywords: keywords.join(' '), // Store as space-separated string for text search
+            char_count: chunkText.length,
+            created_at: timestamp,
+            source: metadata.source || 'unknown'
+          },
+        }
+      })
 
       await this.qdrant!.upsert(RagService.CONTENT_COLLECTION_NAME, { points })
 
+      logger.debug(`[RAG] Successfully embedded and stored ${chunks.length} chunks`)
+      logger.debug(`[RAG] First chunk preview: "${chunks[0].substring(0, 100)}..."`)
+
       return { chunks: chunks.length }
     } catch (error) {
-      logger.error('Error embedding text:', error)
+      console.error(error)
+      logger.error('[RAG] Error embedding text:', error)
       return null
     }
   }
@@ -195,7 +326,7 @@ export class RagService {
    * This includes text extraction, chunking, embedding, and storing in Qdrant.
    */
   public async processAndEmbedFile(
-    filepath: string
+    filepath: string // Should already be the full path to the uploaded file
   ): Promise<{ success: boolean; message: string; chunks?: number }> {
     try {
       const fileType = determineFileType(filepath)
@@ -233,7 +364,13 @@ export class RagService {
         return { success: false, message: 'No text could be extracted from the file.' }
       }
 
-      const embedResult = await this.embedAndStoreText(extractedText, {})
+      const embedResult = await this.embedAndStoreText(extractedText, {
+        source: filepath
+      })
+
+      // Cleanup the file from disk
+      logger.info(`[RAG] Embedding complete, deleting uploaded file: ${filepath}`)
+      await deleteFileIfExists(filepath)
 
       return {
         success: true,
@@ -248,60 +385,230 @@ export class RagService {
 
   /**
    * Search for documents similar to the query text in the Qdrant knowledge base.
-   * Returns the most relevant text chunks based on semantic similarity.
+   * Uses a hybrid approach combining semantic similarity and keyword matching.
+   * Implements adaptive thresholds and result reranking for optimal retrieval.
    * @param query - The search query text
    * @param limit - Maximum number of results to return (default: 5)
-   * @param scoreThreshold - Minimum similarity score threshold (default: 0.7)
+   * @param scoreThreshold - Minimum similarity score threshold (default: 0.3, much lower than before)
    * @returns Array of relevant text chunks with their scores
    */
   public async searchSimilarDocuments(
     query: string,
     limit: number = 5,
-    scoreThreshold: number = 0.7
-  ): Promise<Array<{ text: string; score: number }>> {
+    scoreThreshold: number = 0.3 // Lower default threshold - was 0.7, now 0.3
+  ): Promise<Array<{ text: string; score: number; metadata?: Record<string, any> }>> {
     try {
+      logger.debug(`[RAG] Starting similarity search for query: "${query}"`)
+
       await this._ensureCollection(
         RagService.CONTENT_COLLECTION_NAME,
         RagService.EMBEDDING_DIMENSION
       )
 
+      // Check if collection has any points
+      const collectionInfo = await this.qdrant!.getCollection(RagService.CONTENT_COLLECTION_NAME)
+      const pointCount = collectionInfo.points_count || 0
+      logger.debug(`[RAG] Knowledge base contains ${pointCount} document chunks`)
+
+      if (pointCount === 0) {
+        logger.debug('[RAG] Knowledge base is empty. Could not perform search.')
+        return []
+      }
+
       const allModels = await this.ollamaService.getModels(true)
       const embeddingModel = allModels.find((model) => model.name === RagService.EMBEDDING_MODEL)
 
       if (!embeddingModel) {
         logger.warn(
-          `${RagService.EMBEDDING_MODEL} not found. Cannot perform similarity search.`
+          `[RAG] ${RagService.EMBEDDING_MODEL} not found. Cannot perform similarity search.`
         )
         return []
       }
 
-      // Generate embedding for the query
+      // Preprocess query for better matching
+      const processedQuery = this.preprocessQuery(query)
+      const keywords = this.extractKeywords(processedQuery)
+      logger.debug(`[RAG] Extracted keywords: [${keywords.join(', ')}]`)
+
+      // Generate embedding for the query with search_query prefix
       const ollamaClient = await this.ollamaService.getClient()
+
+      // Ensure query doesn't exceed token limit
+      const prefixTokens = this.estimateTokenCount(RagService.SEARCH_QUERY_PREFIX)
+      const maxQueryTokens = RagService.MAX_SAFE_TOKENS - prefixTokens
+      const truncatedQuery = this.truncateToTokenLimit(processedQuery, maxQueryTokens)
+
+      const prefixedQuery = RagService.SEARCH_QUERY_PREFIX + truncatedQuery
+      logger.debug(`[RAG] Generating embedding with prefix: "${RagService.SEARCH_QUERY_PREFIX}"`)
+
+      // Validate final token count
+      const queryTokenCount = this.estimateTokenCount(prefixedQuery)
+      if (queryTokenCount > RagService.MAX_SAFE_TOKENS) {
+        logger.error(
+          `[RAG] Query too long even after truncation: ${queryTokenCount} tokens (max: ${RagService.MAX_SAFE_TOKENS})`
+        )
+        return []
+      }
+
       const response = await ollamaClient.embeddings({
         model: RagService.EMBEDDING_MODEL,
-        prompt: query,
+        prompt: prefixedQuery,
       })
 
-      // Search for similar vectors in Qdrant
+      // Perform semantic search with a higher limit to enable reranking
+      const searchLimit = limit * 3 // Get more results for reranking
+      logger.debug(
+        `[RAG] Searching for top ${searchLimit} semantic matches (threshold: ${scoreThreshold})`
+      )
+
       const searchResults = await this.qdrant!.search(RagService.CONTENT_COLLECTION_NAME, {
         vector: response.embedding,
-        limit: limit,
+        limit: searchLimit,
         score_threshold: scoreThreshold,
         with_payload: true,
       })
 
-      console.log("Got search results:", searchResults);
+      logger.debug(`[RAG] Found ${searchResults.length} results above threshold ${scoreThreshold}`)
 
-      return searchResults.map((result) => ({
+      // Map results with metadata for reranking
+      const resultsWithMetadata = searchResults.map((result) => ({
         text: (result.payload?.text as string) || '',
         score: result.score,
+        keywords: (result.payload?.keywords as string) || '',
+        chunk_index: (result.payload?.chunk_index as number) || 0,
+        created_at: (result.payload?.created_at as number) || 0,
+      }))
+
+      const rerankedResults = this.rerankResults(resultsWithMetadata, keywords, query)
+
+      logger.debug(`[RAG] Top 3 results after reranking:`)
+      rerankedResults.slice(0, 3).forEach((result, idx) => {
+        logger.debug(
+          `[RAG]   ${idx + 1}. Score: ${result.finalScore.toFixed(4)} (semantic: ${result.score.toFixed(4)}) - "${result.text.substring(0, 100)}..."`
+        )
+      })
+
+      // Return top N results
+      return rerankedResults.slice(0, limit).map((result) => ({
+        text: result.text,
+        score: result.finalScore,
+        metadata: {
+          chunk_index: result.chunk_index,
+          created_at: result.created_at,
+          semantic_score: result.score,
+        },
       }))
     } catch (error) {
-      logger.error('Error searching similar documents:', error)
+      logger.error('[RAG] Error searching similar documents:', error)
       return []
     }
   }
 
+  /**
+   * Rerank search results using hybrid scoring that combines:
+   * 1. Semantic similarity score (primary signal)
+   * 2. Keyword overlap bonus (conservative, quality-gated)
+   * 3. Direct term matches (conservative)
+   *
+   * Tries to boost only already-relevant results, not promote
+   * low-quality results just because they have keyword matches.
+   *
+   * Future: this is a decent feature-based approach, but we could
+   * switch to a python-based reranker in the future if the benefits
+   * outweigh the overhead.
+   */
+  private rerankResults(
+    results: Array<{
+      text: string
+      score: number
+      keywords: string
+      chunk_index: number
+      created_at: number
+    }>,
+    queryKeywords: string[],
+    originalQuery: string
+  ): Array<{
+    text: string
+    score: number
+    finalScore: number
+    chunk_index: number
+    created_at: number
+  }> {
+    return results
+      .map((result) => {
+        let finalScore = result.score
+
+        // Quality gate: Only apply boosts if semantic score is reasonable
+        // Try to prevent promoting irrelevant results that just happen to have keyword matches
+        const MIN_SEMANTIC_THRESHOLD = 0.35
+
+        if (result.score < MIN_SEMANTIC_THRESHOLD) {
+          // For low-scoring results, use semantic score as-is
+          // This prevents false positives from keyword gaming
+          logger.debug(
+            `[RAG] Skipping boost for low semantic score: ${result.score.toFixed(3)} (threshold: ${MIN_SEMANTIC_THRESHOLD})`
+          )
+          return {
+            ...result,
+            finalScore,
+          }
+        }
+
+        // Boost score based on keyword overlap (diminishing returns - overlap goes down, so does boost)
+        const docKeywords = result.keywords
+          .toLowerCase()
+          .split(' ')
+          .filter((k) => k.length > 0)
+        const matchingKeywords = queryKeywords.filter(
+          (kw) =>
+            docKeywords.includes(kw.toLowerCase()) ||
+            result.text.toLowerCase().includes(kw.toLowerCase())
+        )
+        const keywordOverlap = matchingKeywords.length / Math.max(queryKeywords.length, 1)
+
+        // Use square root for diminishing returns: 100% overlap = sqrt(1.0) = 1.0, 25% = 0.5
+        // Then scale conservatively (max 10% boost instead of 20%)
+        const keywordBoost = Math.sqrt(keywordOverlap) * 0.1 * result.score
+
+        if (keywordOverlap > 0) {
+          logger.debug(
+            `[RAG] Keyword overlap: ${matchingKeywords.length}/${queryKeywords.length} - Boost: ${keywordBoost.toFixed(3)}`
+          )
+        }
+
+        // Boost if original query terms appear in text (case-insensitive)
+        // Scale boost proportionally to base score to avoid over-promoting weak matches
+        const queryTerms = originalQuery
+          .toLowerCase()
+          .split(/\s+/)
+          .filter((t) => t.length > 3)
+        const directMatches = queryTerms.filter((term) =>
+          result.text.toLowerCase().includes(term)
+        ).length
+
+        if (queryTerms.length > 0) {
+          const directMatchRatio = directMatches / queryTerms.length
+          // Conservative boost: max 7.5% of the base score
+          const directMatchBoost = Math.sqrt(directMatchRatio) * 0.075 * result.score
+
+          if (directMatches > 0) {
+            logger.debug(
+              `[RAG] Direct term matches: ${directMatches}/${queryTerms.length} - Boost: ${directMatchBoost.toFixed(3)}`
+            )
+            finalScore += directMatchBoost
+          }
+        }
+
+        finalScore = Math.min(1.0, finalScore + keywordBoost)
+
+        return {
+          ...result,
+          finalScore,
+        }
+      })
+      .sort((a, b) => b.finalScore - a.finalScore)
+  }
+
   /**
    * Retrieve all unique source files that have been stored in the knowledge base.
    * @returns Array of unique source file identifiers
@@ -328,9 +635,8 @@ export class RagService {
 
         // Extract unique source values from payloads
         scrollResult.points.forEach((point) => {
-          const metadata = point.payload?.metadata
-          if (metadata && typeof metadata === 'object' && 'source' in metadata) {
-            const source = metadata.source as string
+          const source = point.payload?.source
+          if (source && typeof source === 'string') {
             sources.add(source)
           }
         })
@@ -338,7 +644,13 @@ export class RagService {
         offset = scrollResult.next_page_offset || null
       } while (offset !== null)
 
-      return Array.from(sources)
+      const sourcesArr = Array.from(sources)
+
+      // The source is a full path - only extract the filename for display
+      return sourcesArr.map((src) => {
+        const parts = src.split(/[/\\]/)
+        return parts[parts.length - 1] // Return the last part as filename
+      })
     } catch (error) {
       logger.error('Error retrieving stored files:', error)
       return []
diff --git a/admin/app/validators/rag.ts b/admin/app/validators/rag.ts
new file mode 100644
index 0000000..92799bf
--- /dev/null
+++ b/admin/app/validators/rag.ts
@@ -0,0 +1,7 @@
+import vine from '@vinejs/vine'
+
+export const getJobStatusSchema = vine.compile(
+  vine.object({
+    filePath: vine.string(),
+  })
+)
diff --git a/admin/commands/queue/work.ts b/admin/commands/queue/work.ts
index 2ee93b5..e381aa0 100644
--- a/admin/commands/queue/work.ts
+++ b/admin/commands/queue/work.ts
@@ -5,6 +5,7 @@ import queueConfig from '#config/queue'
 import { RunDownloadJob } from '#jobs/run_download_job'
 import { DownloadModelJob } from '#jobs/download_model_job'
 import { RunBenchmarkJob } from '#jobs/run_benchmark_job'
+import { EmbedFileJob } from '#jobs/embed_file_job'
 
 export default class QueueWork extends BaseCommand {
   static commandName = 'queue:work'
@@ -90,10 +91,12 @@ export default class QueueWork extends BaseCommand {
     handlers.set(RunDownloadJob.key, new RunDownloadJob())
     handlers.set(DownloadModelJob.key, new DownloadModelJob())
     handlers.set(RunBenchmarkJob.key, new RunBenchmarkJob())
+    handlers.set(EmbedFileJob.key, new EmbedFileJob())
 
     queues.set(RunDownloadJob.key, RunDownloadJob.queue)
     queues.set(DownloadModelJob.key, DownloadModelJob.queue)
     queues.set(RunBenchmarkJob.key, RunBenchmarkJob.queue)
+    queues.set(EmbedFileJob.key, EmbedFileJob.queue)
 
     return [handlers, queues]
   }
@@ -107,6 +110,7 @@ export default class QueueWork extends BaseCommand {
       [RunDownloadJob.queue]: 3,
       [DownloadModelJob.queue]: 2, // Lower concurrency for resource-intensive model downloads
       [RunBenchmarkJob.queue]: 1, // Run benchmarks one at a time for accurate results
+      [EmbedFileJob.queue]: 2, // Lower concurrency for embedding jobs, can be resource intensive
       default: 3,
     }
 
diff --git a/admin/constants/ollama.ts b/admin/constants/ollama.ts
index 3bf9914..6bac8bd 100644
--- a/admin/constants/ollama.ts
+++ b/admin/constants/ollama.ts
@@ -66,12 +66,20 @@ export const SYSTEM_PROMPTS = {
  - Use tables when presenting structured data.
 `,
   rag_context: (context: string) => `
-You have access to the following relevant information from the knowledge base. Use this context to provide accurate and informed responses when relevant:
+You have access to relevant information from the knowledge base. This context has been retrieved based on semantic similarity to the user's question.
 
-[Context]
+[Knowledge Base Context]
 ${context}
 
-If the user's question is related to this context, incorporate it into your response. Otherwise, respond normally.
+IMPORTANT INSTRUCTIONS:
+1. If the user's question is directly related to the context above, use this information to provide accurate, detailed answers.
+2. Always cite or reference the context when using it (e.g., "According to the information available..." or "Based on the knowledge base...").
+3. If the context is only partially relevant, combine it with your general knowledge but be clear about what comes from the knowledge base.
+4. If the context is not relevant to the user's question, you can respond using your general knowledge without forcing the context into your answer.
+5. Never fabricate information that isn't in the context or your training data.
+6. If you're unsure or the context doesn't contain enough information, acknowledge the limitations.
+
+Format your response using markdown for readability.
 `,
   chat_suggestions: `
 You are a helpful assistant that generates conversation starter suggestions for a survivalist/prepper using an AI assistant.
diff --git a/admin/package-lock.json b/admin/package-lock.json
index 62fa906..ef367bb 100644
--- a/admin/package-lock.json
+++ b/admin/package-lock.json
@@ -20,6 +20,7 @@
         "@adonisjs/transmit": "^2.0.2",
         "@adonisjs/transmit-client": "^1.0.0",
         "@adonisjs/vite": "^4.0.0",
+        "@chonkiejs/core": "^0.0.7",
         "@headlessui/react": "^2.2.4",
         "@inertiajs/react": "^2.0.13",
         "@markdoc/markdoc": "^0.5.2",
@@ -42,7 +43,6 @@
         "dockerode": "^4.0.7",
         "edge.js": "^6.2.1",
         "fast-xml-parser": "^5.2.5",
-        "llm-chunk": "^0.0.1",
         "luxon": "^3.6.1",
         "maplibre-gl": "^4.7.1",
         "mysql2": "^3.14.1",
@@ -60,6 +60,7 @@
         "reflect-metadata": "^0.2.2",
         "remark-gfm": "^4.0.1",
         "sharp": "^0.34.5",
+        "stopword": "^3.1.5",
         "systeminformation": "^5.27.14",
         "tailwindcss": "^4.1.10",
         "tar": "^7.5.6",
@@ -83,6 +84,7 @@
         "@types/node": "^22.15.18",
         "@types/react": "^19.1.8",
         "@types/react-dom": "^19.1.6",
+        "@types/stopword": "^2.0.3",
         "eslint": "^9.26.0",
         "hot-hook": "^0.4.0",
         "prettier": "^3.5.3",
@@ -1230,6 +1232,21 @@
       "dev": true,
       "license": "Apache-2.0"
     },
+    "node_modules/@chonkiejs/chunk": {
+      "version": "0.9.3",
+      "resolved": "https://registry.npmjs.org/@chonkiejs/chunk/-/chunk-0.9.3.tgz",
+      "integrity": "sha512-uUOeoFGY3s6kzAoKskI50weZN0zvW3oLwUijA1uX7Wxuy9yZStF2IvGuXRigMgP2g/L85lsotYGkjpBMLjQnrg==",
+      "license": "MIT OR Apache-2.0"
+    },
+    "node_modules/@chonkiejs/core": {
+      "version": "0.0.7",
+      "resolved": "https://registry.npmjs.org/@chonkiejs/core/-/core-0.0.7.tgz",
+      "integrity": "sha512-R17OW9TT1x7B6lDKTCaMd6NluAObleN/cCQtUbMK2UcFOguJtQz/cL0n1t0AzJWBFMVgYP8EcqTFn/fcKhzPiA==",
+      "license": "MIT",
+      "dependencies": {
+        "@chonkiejs/chunk": "^0.9.3"
+      }
+    },
     "node_modules/@colors/colors": {
       "version": "1.5.0",
       "resolved": "https://registry.npmjs.org/@colors/colors/-/colors-1.5.0.tgz",
@@ -5084,6 +5101,13 @@
       "dev": true,
       "license": "MIT"
     },
+    "node_modules/@types/stopword": {
+      "version": "2.0.3",
+      "resolved": "https://registry.npmjs.org/@types/stopword/-/stopword-2.0.3.tgz",
+      "integrity": "sha512-hioMj0lOvISM+EDevf7ijG8EMbU+J3pj4SstCyfQC1t39uPYpAe7beSfBdU6c1d9jeECTQQtR3UJWtVoUO8Weg==",
+      "dev": true,
+      "license": "MIT"
+    },
     "node_modules/@types/supercluster": {
       "version": "7.1.3",
       "resolved": "https://registry.npmjs.org/@types/supercluster/-/supercluster-7.1.3.tgz",
@@ -9866,12 +9890,6 @@
         "url": "https://opencollective.com/parcel"
       }
     },
-    "node_modules/llm-chunk": {
-      "version": "0.0.1",
-      "resolved": "https://registry.npmjs.org/llm-chunk/-/llm-chunk-0.0.1.tgz",
-      "integrity": "sha512-n9fHgsSiJb7vXZiC5c4XV6rme+tC7WX/cWH6EJvPPmMOMwOZ9xdg/U9LY5Qhmixd3K1PdRB0FVOdzoJF2HUZbg==",
-      "license": "MIT"
-    },
     "node_modules/locate-path": {
       "version": "7.2.0",
       "resolved": "https://registry.npmjs.org/locate-path/-/locate-path-7.2.0.tgz",
@@ -13619,6 +13637,12 @@
         "node": ">= 0.8"
       }
     },
+    "node_modules/stopword": {
+      "version": "3.1.5",
+      "resolved": "https://registry.npmjs.org/stopword/-/stopword-3.1.5.tgz",
+      "integrity": "sha512-OgLYGVFCNa430WOrj9tYZhQge5yg6vd6JsKredveAqEhdLVQkfrpnQIGjx0L9lLqzL4Kq4J8yNTcfQR/MpBwhg==",
+      "license": "MIT"
+    },
     "node_modules/string_decoder": {
       "version": "1.3.0",
       "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.3.0.tgz",
diff --git a/admin/package.json b/admin/package.json
index e42690e..a7141a1 100644
--- a/admin/package.json
+++ b/admin/package.json
@@ -52,6 +52,7 @@
     "@types/node": "^22.15.18",
     "@types/react": "^19.1.8",
     "@types/react-dom": "^19.1.6",
+    "@types/stopword": "^2.0.3",
     "eslint": "^9.26.0",
     "hot-hook": "^0.4.0",
     "prettier": "^3.5.3",
@@ -71,6 +72,7 @@
     "@adonisjs/transmit": "^2.0.2",
     "@adonisjs/transmit-client": "^1.0.0",
     "@adonisjs/vite": "^4.0.0",
+    "@chonkiejs/core": "^0.0.7",
     "@headlessui/react": "^2.2.4",
     "@inertiajs/react": "^2.0.13",
     "@markdoc/markdoc": "^0.5.2",
@@ -93,7 +95,6 @@
     "dockerode": "^4.0.7",
     "edge.js": "^6.2.1",
     "fast-xml-parser": "^5.2.5",
-    "llm-chunk": "^0.0.1",
     "luxon": "^3.6.1",
     "maplibre-gl": "^4.7.1",
     "mysql2": "^3.14.1",
@@ -111,6 +112,7 @@
     "reflect-metadata": "^0.2.2",
     "remark-gfm": "^4.0.1",
     "sharp": "^0.34.5",
+    "stopword": "^3.1.5",
     "systeminformation": "^5.27.14",
     "tailwindcss": "^4.1.10",
     "tar": "^7.5.6",
diff --git a/admin/start/routes.ts b/admin/start/routes.ts
index d88ae14..0a552d2 100644
--- a/admin/start/routes.ts
+++ b/admin/start/routes.ts
@@ -119,6 +119,7 @@ router
   .group(() => {
     router.post('/upload', [RagController, 'upload'])
     router.get('/files', [RagController, 'getStoredFiles'])
+    router.get('/job-status', [RagController, 'getJobStatus'])
   })
   .prefix('/api/rag')