feat(RAG): initial beta with preprocessing, embedding, semantic retrieval, and ctx passage

This commit is contained in:
Jake Turner 2026-02-01 23:59:21 +00:00
parent 1923cd4cde
commit d1f40663d3
10 changed files with 612 additions and 60 deletions

View File

@ -41,16 +41,17 @@ export default class OllamaController {
if (lastUserMessage) {
// Search for relevant context in the knowledge base
// Using lower threshold (0.3) with improved hybrid search
const relevantDocs = await this.ragService.searchSimilarDocuments(
lastUserMessage.content,
5, // Retrieve top 5 most relevant chunks
0.7 // Minimum similarity score of 0.7
0.3 // Minimum similarity score of 0.3 (lowered from 0.7 for better recall)
)
// If relevant context is found, inject as a system message
if (relevantDocs.length > 0) {
const contextText = relevantDocs
.map((doc, idx) => `[Context ${idx + 1}]\n${doc.text}`)
.map((doc, idx) => `[Context ${idx + 1}] (Relevance: ${(doc.score * 100).toFixed(1)}%)\n${doc.text}`)
.join('\n\n')
const systemMessage = {

View File

@ -1,9 +1,12 @@
import { RagService } from '#services/rag_service'
import { EmbedFileJob } from '#jobs/embed_file_job'
import { inject } from '@adonisjs/core'
import type { HttpContext } from '@adonisjs/core/http'
import app from '@adonisjs/core/services/app'
import { randomBytes } from 'node:crypto'
import { sanitizeFilename } from '../utils/fs.js'
import { stat } from 'node:fs/promises'
import { getJobStatusSchema } from '#validators/rag'
@inject()
export default class RagController {
@ -19,19 +22,48 @@ export default class RagController {
const sanitizedName = sanitizeFilename(uploadedFile.clientName)
const fileName = `${sanitizedName}-${randomSuffix}.${uploadedFile.extname || 'txt'}`
const fullPath = app.makePath('storage/uploads', fileName)
const fullPath = app.makePath(RagService.UPLOADS_STORAGE_PATH, fileName)
await uploadedFile.move(app.makePath('storage/uploads'), {
await uploadedFile.move(app.makePath(RagService.UPLOADS_STORAGE_PATH), {
name: fileName,
})
// Don't await this - process in background
this.ragService.processAndEmbedFile(fullPath)
// Get file size for tracking
let fileSize: number | undefined = undefined
try {
const stats = await stat(fullPath)
fileSize = stats.size
} catch (error) {
// Not critical if we can't get file size, just swallow the error
}
return response.status(200).json({
message: 'File has been uploaded and queued for processing.',
file_path: `/uploads/${fileName}`,
// Dispatch background job for embedding
const result = await EmbedFileJob.dispatch({
filePath: fullPath,
fileName,
fileSize,
})
return response.status(202).json({
message: result.message,
jobId: result.jobId,
fileName,
filePath: `/${RagService.UPLOADS_STORAGE_PATH}/${fileName}`,
alreadyProcessing: !result.created,
})
}
public async getJobStatus({ request, response }: HttpContext) {
const reqData = await request.validateUsing(getJobStatusSchema)
const fullPath = app.makePath(RagService.UPLOADS_STORAGE_PATH, reqData.filePath)
const status = await EmbedFileJob.getStatus(fullPath)
if (!status.exists) {
return response.status(404).json({ error: 'Job not found for this file' })
}
return response.status(200).json(status)
}
public async getStoredFiles({ response }: HttpContext) {

View File

@ -0,0 +1,161 @@
import { Job } from 'bullmq'
import { QueueService } from '#services/queue_service'
import { RagService } from '#services/rag_service'
import { DockerService } from '#services/docker_service'
import { OllamaService } from '#services/ollama_service'
import { createHash } from 'crypto'
import logger from '@adonisjs/core/services/logger'
export interface EmbedFileJobParams {
filePath: string
fileName: string
fileSize?: number
}
export class EmbedFileJob {
static get queue() {
return 'file-embeddings'
}
static get key() {
return 'embed-file'
}
static getJobId(filePath: string): string {
return createHash('sha256').update(filePath).digest('hex').slice(0, 16)
}
async handle(job: Job) {
const { filePath, fileName } = job.data as EmbedFileJobParams
logger.info(`[EmbedFileJob] Starting embedding process for: ${fileName}`)
const dockerService = new DockerService()
const ollamaService = new OllamaService()
const ragService = new RagService(dockerService, ollamaService)
try {
// Update progress starting
await job.updateProgress(0)
await job.updateData({
...job.data,
status: 'processing',
startedAt: Date.now(),
})
logger.info(`[EmbedFileJob] Processing file: ${filePath}`)
// Process and embed the file
const result = await ragService.processAndEmbedFile(filePath)
if (!result.success) {
logger.error(`[EmbedFileJob] Failed to process file ${fileName}: ${result.message}`)
throw new Error(result.message)
}
// Update progress complete
await job.updateProgress(100)
await job.updateData({
...job.data,
status: 'completed',
completedAt: Date.now(),
chunks: result.chunks,
})
logger.info(
`[EmbedFileJob] Successfully embedded ${result.chunks} chunks from file: ${fileName}`
)
return {
success: true,
fileName,
filePath,
chunks: result.chunks,
message: `Successfully embedded ${result.chunks} chunks`,
}
} catch (error) {
logger.error(`[EmbedFileJob] Error embedding file ${fileName}:`, error)
await job.updateData({
...job.data,
status: 'failed',
failedAt: Date.now(),
error: error instanceof Error ? error.message : 'Unknown error',
})
throw error
}
}
static async getByFilePath(filePath: string): Promise<Job | undefined> {
const queueService = new QueueService()
const queue = queueService.getQueue(this.queue)
const jobId = this.getJobId(filePath)
return await queue.getJob(jobId)
}
static async dispatch(params: EmbedFileJobParams) {
const queueService = new QueueService()
const queue = queueService.getQueue(this.queue)
const jobId = this.getJobId(params.filePath)
try {
const job = await queue.add(this.key, params, {
jobId,
attempts: 3,
backoff: {
type: 'exponential',
delay: 5000, // Delay 5 seconds before retrying
},
removeOnComplete: { count: 50 }, // Keep last 50 completed jobs for history
removeOnFail: { count: 20 } // Keep last 20 failed jobs for debugging
})
logger.info(`[EmbedFileJob] Dispatched embedding job for file: ${params.fileName}`)
return {
job,
created: true,
jobId,
message: `File queued for embedding: ${params.fileName}`,
}
} catch (error) {
if (error.message && error.message.includes('job already exists')) {
const existing = await queue.getJob(jobId)
logger.info(`[EmbedFileJob] Job already exists for file: ${params.fileName}`)
return {
job: existing,
created: false,
jobId,
message: `Embedding job already exists for: ${params.fileName}`,
}
}
throw error
}
}
static async getStatus(filePath: string): Promise<{
exists: boolean
status?: string
progress?: number
chunks?: number
error?: string
}> {
const job = await this.getByFilePath(filePath)
if (!job) {
return { exists: false }
}
const state = await job.getState()
const data = job.data
return {
exists: true,
status: data.status || state,
progress: typeof job.progress === 'number' ? job.progress : undefined,
chunks: data.chunks,
error: data.error,
}
}
}

View File

@ -2,28 +2,115 @@ import { QdrantClient } from '@qdrant/js-client-rest'
import { DockerService } from './docker_service.js'
import { inject } from '@adonisjs/core'
import logger from '@adonisjs/core/services/logger'
import { chunk } from 'llm-chunk'
import { TokenChunker } from '@chonkiejs/core'
import sharp from 'sharp'
import { determineFileType, getFile } from '../utils/fs.js'
import { deleteFileIfExists, determineFileType, getFile } from '../utils/fs.js'
import { PDFParse } from 'pdf-parse'
import { createWorker } from 'tesseract.js'
import { fromBuffer } from 'pdf2pic'
import { OllamaService } from './ollama_service.js'
import { SERVICE_NAMES } from '../../constants/service_names.js'
import { removeStopwords } from 'stopword'
import { randomUUID } from 'node:crypto'
@inject()
export class RagService {
private qdrant: QdrantClient | null = null
private qdrantInitPromise: Promise<void> | null = null
public static CONTENT_COLLECTION_NAME = 'open-webui_knowledge' // This is the collection name OWUI uses for uploaded knowledge
public static UPLOADS_STORAGE_PATH = 'storage/kb_uploads'
public static CONTENT_COLLECTION_NAME = 'nomad_knowledge_base'
public static EMBEDDING_MODEL = 'nomic-embed-text:v1.5'
public static EMBEDDING_DIMENSION = 768 // Nomic Embed Text v1.5 dimension is 768
public static MODEL_CONTEXT_LENGTH = 2048 // nomic-embed-text has 2K token context
public static MAX_SAFE_TOKENS = 1800 // Leave buffer for prefix and tokenization variance
public static TARGET_TOKENS_PER_CHUNK = 1700 // Target 1700 tokens per chunk for embedding
public static PREFIX_TOKEN_BUDGET = 10 // Reserve ~10 tokens for prefixes
public static CHAR_TO_TOKEN_RATIO = 3 // Approximate chars per token
// Nomic Embed Text v1.5 uses task-specific prefixes for optimal performance
public static SEARCH_DOCUMENT_PREFIX = 'search_document: '
public static SEARCH_QUERY_PREFIX = 'search_query: '
constructor(
private dockerService: DockerService,
private ollamaService: OllamaService
) {}
/**
* Estimates token count for text. This is a conservative approximation:
* - English text: ~1 token per 3 characters
* - Adds buffer for special characters and tokenization variance
*
* Note: This is approximate and realistic english
* tokenization is ~4 chars/token, but we use 3 here to be safe.
* Actual tokenization may differ, but being
* conservative prevents context length errors.
*/
private estimateTokenCount(text: string): number {
// This accounts for special characters, numbers, and punctuation
return Math.ceil(text.length / RagService.CHAR_TO_TOKEN_RATIO)
}
/**
* Truncates text to fit within token limit, preserving word boundaries.
* Ensures the text + prefix won't exceed the model's context window.
*/
private truncateToTokenLimit(text: string, maxTokens: number): string {
const estimatedTokens = this.estimateTokenCount(text)
if (estimatedTokens <= maxTokens) {
return text
}
// Calculate how many characters we can keep using our ratio
const maxChars = Math.floor(maxTokens * RagService.CHAR_TO_TOKEN_RATIO)
// Truncate at word boundary
let truncated = text.substring(0, maxChars)
const lastSpace = truncated.lastIndexOf(' ')
if (lastSpace > maxChars * 0.8) {
// If we found a space in the last 20%, use it
truncated = truncated.substring(0, lastSpace)
}
logger.warn(
`[RAG] Truncated text from ${text.length} to ${truncated.length} chars (est. ${estimatedTokens}${this.estimateTokenCount(truncated)} tokens)`
)
return truncated
}
/**
* Preprocesses a query to improve retrieval by expanding it with context.
* This helps match documents even when using different terminology.
*/
private preprocessQuery(query: string): string {
// Future: this is a placeholder for more advanced query expansion techniques.
// For now, we simply trim whitespace. Improvements could include:
// - Synonym expansion using a thesaurus
// - Adding related terms based on domain knowledge
// - Using a language model to rephrase or elaborate the query
const expanded = query.trim()
logger.debug(`[RAG] Original query: "${query}"`)
logger.debug(`[RAG] Preprocessed query: "${expanded}"`)
return expanded
}
/**
* Extract keywords from query for hybrid search
*/
private extractKeywords(query: string): string[] {
const split = query.split(' ')
const noStopWords = removeStopwords(split)
// Future: This is basic normalization, could be improved with stemming/lemmatization later
const keywords = noStopWords
.map((word) => word.replace(/[^\w]/g, '').toLowerCase())
.filter((word) => word.length > 2)
return [...new Set(keywords)]
}
private async _initializeQdrantClient() {
if (!this.qdrantInitPromise) {
this.qdrantInitPromise = (async () => {
@ -84,43 +171,87 @@ export class RagService {
throw new Error(`${RagService.EMBEDDING_MODEL} does not exist and could not be downloaded.`)
}
const chunks = chunk(text, {
// These settings should provide a good balance between context and precision
minLength: 512,
maxLength: 1024,
overlap: 200,
// TokenChunker uses character-based tokenization (1 char = 1 token)
// We need to convert our embedding model's token counts to character counts
// since nomic-embed-text tokenizer uses ~3 chars per token
const targetCharsPerChunk = Math.floor(RagService.TARGET_TOKENS_PER_CHUNK * RagService.CHAR_TO_TOKEN_RATIO)
const overlapChars = Math.floor(150 * RagService.CHAR_TO_TOKEN_RATIO)
const chunker = await TokenChunker.create({
chunkSize: targetCharsPerChunk,
chunkOverlap: overlapChars,
})
if (!chunks || chunks.length === 0) {
const chunkResults = await chunker.chunk(text)
if (!chunkResults || chunkResults.length === 0) {
throw new Error('No text chunks generated for embedding.')
}
const embeddings: number[][] = []
// Extract text from chunk results
const chunks = chunkResults.map((chunk) => chunk.text)
const ollamaClient = await this.ollamaService.getClient()
for (const chunkText of chunks) {
const embeddings: number[][] = []
for (let i = 0; i < chunks.length; i++) {
let chunkText = chunks[i]
// Final safety check: ensure chunk + prefix fits
const prefixText = RagService.SEARCH_DOCUMENT_PREFIX
const withPrefix = prefixText + chunkText
const estimatedTokens = this.estimateTokenCount(withPrefix)
if (estimatedTokens > RagService.MAX_SAFE_TOKENS) {
// This should be rare - log for debugging if it's occurring frequently
const prefixTokens = this.estimateTokenCount(prefixText)
const maxTokensForText = RagService.MAX_SAFE_TOKENS - prefixTokens
logger.warn(
`[RAG] Chunk ${i} estimated at ${estimatedTokens} tokens (${chunkText.length} chars), truncating to ${maxTokensForText} tokens`
)
chunkText = this.truncateToTokenLimit(chunkText, maxTokensForText)
}
logger.debug(`[RAG] Generating embedding for chunk ${i + 1}/${chunks.length}`)
const response = await ollamaClient.embeddings({
model: RagService.EMBEDDING_MODEL,
prompt: chunkText,
prompt: RagService.SEARCH_DOCUMENT_PREFIX + chunkText,
})
embeddings.push(response.embedding)
}
const points = chunks.map((chunkText, index) => ({
id: `${Date.now()}_${index}`,
vector: embeddings[index],
payload: {
...metadata,
text: chunkText,
chunk_index: index,
},
}))
const timestamp = Date.now()
const points = chunks.map((chunkText, index) => {
// Extract keywords for hybrid search
const keywords = this.extractKeywords(chunkText)
logger.debug(`[RAG] Extracted keywords for chunk ${index}: [${keywords.join(', ')}]`)
return {
id: randomUUID(), // qdrant requires either uuid or unsigned int
vector: embeddings[index],
payload: {
...metadata,
text: chunkText,
chunk_index: index,
total_chunks: chunks.length,
keywords: keywords.join(' '), // Store as space-separated string for text search
char_count: chunkText.length,
created_at: timestamp,
source: metadata.source || 'unknown'
},
}
})
await this.qdrant!.upsert(RagService.CONTENT_COLLECTION_NAME, { points })
logger.debug(`[RAG] Successfully embedded and stored ${chunks.length} chunks`)
logger.debug(`[RAG] First chunk preview: "${chunks[0].substring(0, 100)}..."`)
return { chunks: chunks.length }
} catch (error) {
logger.error('Error embedding text:', error)
console.error(error)
logger.error('[RAG] Error embedding text:', error)
return null
}
}
@ -195,7 +326,7 @@ export class RagService {
* This includes text extraction, chunking, embedding, and storing in Qdrant.
*/
public async processAndEmbedFile(
filepath: string
filepath: string // Should already be the full path to the uploaded file
): Promise<{ success: boolean; message: string; chunks?: number }> {
try {
const fileType = determineFileType(filepath)
@ -233,7 +364,13 @@ export class RagService {
return { success: false, message: 'No text could be extracted from the file.' }
}
const embedResult = await this.embedAndStoreText(extractedText, {})
const embedResult = await this.embedAndStoreText(extractedText, {
source: filepath
})
// Cleanup the file from disk
logger.info(`[RAG] Embedding complete, deleting uploaded file: ${filepath}`)
await deleteFileIfExists(filepath)
return {
success: true,
@ -248,60 +385,230 @@ export class RagService {
/**
* Search for documents similar to the query text in the Qdrant knowledge base.
* Returns the most relevant text chunks based on semantic similarity.
* Uses a hybrid approach combining semantic similarity and keyword matching.
* Implements adaptive thresholds and result reranking for optimal retrieval.
* @param query - The search query text
* @param limit - Maximum number of results to return (default: 5)
* @param scoreThreshold - Minimum similarity score threshold (default: 0.7)
* @param scoreThreshold - Minimum similarity score threshold (default: 0.3, much lower than before)
* @returns Array of relevant text chunks with their scores
*/
public async searchSimilarDocuments(
query: string,
limit: number = 5,
scoreThreshold: number = 0.7
): Promise<Array<{ text: string; score: number }>> {
scoreThreshold: number = 0.3 // Lower default threshold - was 0.7, now 0.3
): Promise<Array<{ text: string; score: number; metadata?: Record<string, any> }>> {
try {
logger.debug(`[RAG] Starting similarity search for query: "${query}"`)
await this._ensureCollection(
RagService.CONTENT_COLLECTION_NAME,
RagService.EMBEDDING_DIMENSION
)
// Check if collection has any points
const collectionInfo = await this.qdrant!.getCollection(RagService.CONTENT_COLLECTION_NAME)
const pointCount = collectionInfo.points_count || 0
logger.debug(`[RAG] Knowledge base contains ${pointCount} document chunks`)
if (pointCount === 0) {
logger.debug('[RAG] Knowledge base is empty. Could not perform search.')
return []
}
const allModels = await this.ollamaService.getModels(true)
const embeddingModel = allModels.find((model) => model.name === RagService.EMBEDDING_MODEL)
if (!embeddingModel) {
logger.warn(
`${RagService.EMBEDDING_MODEL} not found. Cannot perform similarity search.`
`[RAG] ${RagService.EMBEDDING_MODEL} not found. Cannot perform similarity search.`
)
return []
}
// Generate embedding for the query
// Preprocess query for better matching
const processedQuery = this.preprocessQuery(query)
const keywords = this.extractKeywords(processedQuery)
logger.debug(`[RAG] Extracted keywords: [${keywords.join(', ')}]`)
// Generate embedding for the query with search_query prefix
const ollamaClient = await this.ollamaService.getClient()
// Ensure query doesn't exceed token limit
const prefixTokens = this.estimateTokenCount(RagService.SEARCH_QUERY_PREFIX)
const maxQueryTokens = RagService.MAX_SAFE_TOKENS - prefixTokens
const truncatedQuery = this.truncateToTokenLimit(processedQuery, maxQueryTokens)
const prefixedQuery = RagService.SEARCH_QUERY_PREFIX + truncatedQuery
logger.debug(`[RAG] Generating embedding with prefix: "${RagService.SEARCH_QUERY_PREFIX}"`)
// Validate final token count
const queryTokenCount = this.estimateTokenCount(prefixedQuery)
if (queryTokenCount > RagService.MAX_SAFE_TOKENS) {
logger.error(
`[RAG] Query too long even after truncation: ${queryTokenCount} tokens (max: ${RagService.MAX_SAFE_TOKENS})`
)
return []
}
const response = await ollamaClient.embeddings({
model: RagService.EMBEDDING_MODEL,
prompt: query,
prompt: prefixedQuery,
})
// Search for similar vectors in Qdrant
// Perform semantic search with a higher limit to enable reranking
const searchLimit = limit * 3 // Get more results for reranking
logger.debug(
`[RAG] Searching for top ${searchLimit} semantic matches (threshold: ${scoreThreshold})`
)
const searchResults = await this.qdrant!.search(RagService.CONTENT_COLLECTION_NAME, {
vector: response.embedding,
limit: limit,
limit: searchLimit,
score_threshold: scoreThreshold,
with_payload: true,
})
console.log("Got search results:", searchResults);
logger.debug(`[RAG] Found ${searchResults.length} results above threshold ${scoreThreshold}`)
return searchResults.map((result) => ({
// Map results with metadata for reranking
const resultsWithMetadata = searchResults.map((result) => ({
text: (result.payload?.text as string) || '',
score: result.score,
keywords: (result.payload?.keywords as string) || '',
chunk_index: (result.payload?.chunk_index as number) || 0,
created_at: (result.payload?.created_at as number) || 0,
}))
const rerankedResults = this.rerankResults(resultsWithMetadata, keywords, query)
logger.debug(`[RAG] Top 3 results after reranking:`)
rerankedResults.slice(0, 3).forEach((result, idx) => {
logger.debug(
`[RAG] ${idx + 1}. Score: ${result.finalScore.toFixed(4)} (semantic: ${result.score.toFixed(4)}) - "${result.text.substring(0, 100)}..."`
)
})
// Return top N results
return rerankedResults.slice(0, limit).map((result) => ({
text: result.text,
score: result.finalScore,
metadata: {
chunk_index: result.chunk_index,
created_at: result.created_at,
semantic_score: result.score,
},
}))
} catch (error) {
logger.error('Error searching similar documents:', error)
logger.error('[RAG] Error searching similar documents:', error)
return []
}
}
/**
* Rerank search results using hybrid scoring that combines:
* 1. Semantic similarity score (primary signal)
* 2. Keyword overlap bonus (conservative, quality-gated)
* 3. Direct term matches (conservative)
*
* Tries to boost only already-relevant results, not promote
* low-quality results just because they have keyword matches.
*
* Future: this is a decent feature-based approach, but we could
* switch to a python-based reranker in the future if the benefits
* outweigh the overhead.
*/
private rerankResults(
results: Array<{
text: string
score: number
keywords: string
chunk_index: number
created_at: number
}>,
queryKeywords: string[],
originalQuery: string
): Array<{
text: string
score: number
finalScore: number
chunk_index: number
created_at: number
}> {
return results
.map((result) => {
let finalScore = result.score
// Quality gate: Only apply boosts if semantic score is reasonable
// Try to prevent promoting irrelevant results that just happen to have keyword matches
const MIN_SEMANTIC_THRESHOLD = 0.35
if (result.score < MIN_SEMANTIC_THRESHOLD) {
// For low-scoring results, use semantic score as-is
// This prevents false positives from keyword gaming
logger.debug(
`[RAG] Skipping boost for low semantic score: ${result.score.toFixed(3)} (threshold: ${MIN_SEMANTIC_THRESHOLD})`
)
return {
...result,
finalScore,
}
}
// Boost score based on keyword overlap (diminishing returns - overlap goes down, so does boost)
const docKeywords = result.keywords
.toLowerCase()
.split(' ')
.filter((k) => k.length > 0)
const matchingKeywords = queryKeywords.filter(
(kw) =>
docKeywords.includes(kw.toLowerCase()) ||
result.text.toLowerCase().includes(kw.toLowerCase())
)
const keywordOverlap = matchingKeywords.length / Math.max(queryKeywords.length, 1)
// Use square root for diminishing returns: 100% overlap = sqrt(1.0) = 1.0, 25% = 0.5
// Then scale conservatively (max 10% boost instead of 20%)
const keywordBoost = Math.sqrt(keywordOverlap) * 0.1 * result.score
if (keywordOverlap > 0) {
logger.debug(
`[RAG] Keyword overlap: ${matchingKeywords.length}/${queryKeywords.length} - Boost: ${keywordBoost.toFixed(3)}`
)
}
// Boost if original query terms appear in text (case-insensitive)
// Scale boost proportionally to base score to avoid over-promoting weak matches
const queryTerms = originalQuery
.toLowerCase()
.split(/\s+/)
.filter((t) => t.length > 3)
const directMatches = queryTerms.filter((term) =>
result.text.toLowerCase().includes(term)
).length
if (queryTerms.length > 0) {
const directMatchRatio = directMatches / queryTerms.length
// Conservative boost: max 7.5% of the base score
const directMatchBoost = Math.sqrt(directMatchRatio) * 0.075 * result.score
if (directMatches > 0) {
logger.debug(
`[RAG] Direct term matches: ${directMatches}/${queryTerms.length} - Boost: ${directMatchBoost.toFixed(3)}`
)
finalScore += directMatchBoost
}
}
finalScore = Math.min(1.0, finalScore + keywordBoost)
return {
...result,
finalScore,
}
})
.sort((a, b) => b.finalScore - a.finalScore)
}
/**
* Retrieve all unique source files that have been stored in the knowledge base.
* @returns Array of unique source file identifiers
@ -328,9 +635,8 @@ export class RagService {
// Extract unique source values from payloads
scrollResult.points.forEach((point) => {
const metadata = point.payload?.metadata
if (metadata && typeof metadata === 'object' && 'source' in metadata) {
const source = metadata.source as string
const source = point.payload?.source
if (source && typeof source === 'string') {
sources.add(source)
}
})
@ -338,7 +644,13 @@ export class RagService {
offset = scrollResult.next_page_offset || null
} while (offset !== null)
return Array.from(sources)
const sourcesArr = Array.from(sources)
// The source is a full path - only extract the filename for display
return sourcesArr.map((src) => {
const parts = src.split(/[/\\]/)
return parts[parts.length - 1] // Return the last part as filename
})
} catch (error) {
logger.error('Error retrieving stored files:', error)
return []

View File

@ -0,0 +1,7 @@
import vine from '@vinejs/vine'
export const getJobStatusSchema = vine.compile(
vine.object({
filePath: vine.string(),
})
)

View File

@ -5,6 +5,7 @@ import queueConfig from '#config/queue'
import { RunDownloadJob } from '#jobs/run_download_job'
import { DownloadModelJob } from '#jobs/download_model_job'
import { RunBenchmarkJob } from '#jobs/run_benchmark_job'
import { EmbedFileJob } from '#jobs/embed_file_job'
export default class QueueWork extends BaseCommand {
static commandName = 'queue:work'
@ -90,10 +91,12 @@ export default class QueueWork extends BaseCommand {
handlers.set(RunDownloadJob.key, new RunDownloadJob())
handlers.set(DownloadModelJob.key, new DownloadModelJob())
handlers.set(RunBenchmarkJob.key, new RunBenchmarkJob())
handlers.set(EmbedFileJob.key, new EmbedFileJob())
queues.set(RunDownloadJob.key, RunDownloadJob.queue)
queues.set(DownloadModelJob.key, DownloadModelJob.queue)
queues.set(RunBenchmarkJob.key, RunBenchmarkJob.queue)
queues.set(EmbedFileJob.key, EmbedFileJob.queue)
return [handlers, queues]
}
@ -107,6 +110,7 @@ export default class QueueWork extends BaseCommand {
[RunDownloadJob.queue]: 3,
[DownloadModelJob.queue]: 2, // Lower concurrency for resource-intensive model downloads
[RunBenchmarkJob.queue]: 1, // Run benchmarks one at a time for accurate results
[EmbedFileJob.queue]: 2, // Lower concurrency for embedding jobs, can be resource intensive
default: 3,
}

View File

@ -66,12 +66,20 @@ export const SYSTEM_PROMPTS = {
- Use tables when presenting structured data.
`,
rag_context: (context: string) => `
You have access to the following relevant information from the knowledge base. Use this context to provide accurate and informed responses when relevant:
You have access to relevant information from the knowledge base. This context has been retrieved based on semantic similarity to the user's question.
[Context]
[Knowledge Base Context]
${context}
If the user's question is related to this context, incorporate it into your response. Otherwise, respond normally.
IMPORTANT INSTRUCTIONS:
1. If the user's question is directly related to the context above, use this information to provide accurate, detailed answers.
2. Always cite or reference the context when using it (e.g., "According to the information available..." or "Based on the knowledge base...").
3. If the context is only partially relevant, combine it with your general knowledge but be clear about what comes from the knowledge base.
4. If the context is not relevant to the user's question, you can respond using your general knowledge without forcing the context into your answer.
5. Never fabricate information that isn't in the context or your training data.
6. If you're unsure or the context doesn't contain enough information, acknowledge the limitations.
Format your response using markdown for readability.
`,
chat_suggestions: `
You are a helpful assistant that generates conversation starter suggestions for a survivalist/prepper using an AI assistant.

View File

@ -20,6 +20,7 @@
"@adonisjs/transmit": "^2.0.2",
"@adonisjs/transmit-client": "^1.0.0",
"@adonisjs/vite": "^4.0.0",
"@chonkiejs/core": "^0.0.7",
"@headlessui/react": "^2.2.4",
"@inertiajs/react": "^2.0.13",
"@markdoc/markdoc": "^0.5.2",
@ -42,7 +43,6 @@
"dockerode": "^4.0.7",
"edge.js": "^6.2.1",
"fast-xml-parser": "^5.2.5",
"llm-chunk": "^0.0.1",
"luxon": "^3.6.1",
"maplibre-gl": "^4.7.1",
"mysql2": "^3.14.1",
@ -60,6 +60,7 @@
"reflect-metadata": "^0.2.2",
"remark-gfm": "^4.0.1",
"sharp": "^0.34.5",
"stopword": "^3.1.5",
"systeminformation": "^5.27.14",
"tailwindcss": "^4.1.10",
"tar": "^7.5.6",
@ -83,6 +84,7 @@
"@types/node": "^22.15.18",
"@types/react": "^19.1.8",
"@types/react-dom": "^19.1.6",
"@types/stopword": "^2.0.3",
"eslint": "^9.26.0",
"hot-hook": "^0.4.0",
"prettier": "^3.5.3",
@ -1230,6 +1232,21 @@
"dev": true,
"license": "Apache-2.0"
},
"node_modules/@chonkiejs/chunk": {
"version": "0.9.3",
"resolved": "https://registry.npmjs.org/@chonkiejs/chunk/-/chunk-0.9.3.tgz",
"integrity": "sha512-uUOeoFGY3s6kzAoKskI50weZN0zvW3oLwUijA1uX7Wxuy9yZStF2IvGuXRigMgP2g/L85lsotYGkjpBMLjQnrg==",
"license": "MIT OR Apache-2.0"
},
"node_modules/@chonkiejs/core": {
"version": "0.0.7",
"resolved": "https://registry.npmjs.org/@chonkiejs/core/-/core-0.0.7.tgz",
"integrity": "sha512-R17OW9TT1x7B6lDKTCaMd6NluAObleN/cCQtUbMK2UcFOguJtQz/cL0n1t0AzJWBFMVgYP8EcqTFn/fcKhzPiA==",
"license": "MIT",
"dependencies": {
"@chonkiejs/chunk": "^0.9.3"
}
},
"node_modules/@colors/colors": {
"version": "1.5.0",
"resolved": "https://registry.npmjs.org/@colors/colors/-/colors-1.5.0.tgz",
@ -5084,6 +5101,13 @@
"dev": true,
"license": "MIT"
},
"node_modules/@types/stopword": {
"version": "2.0.3",
"resolved": "https://registry.npmjs.org/@types/stopword/-/stopword-2.0.3.tgz",
"integrity": "sha512-hioMj0lOvISM+EDevf7ijG8EMbU+J3pj4SstCyfQC1t39uPYpAe7beSfBdU6c1d9jeECTQQtR3UJWtVoUO8Weg==",
"dev": true,
"license": "MIT"
},
"node_modules/@types/supercluster": {
"version": "7.1.3",
"resolved": "https://registry.npmjs.org/@types/supercluster/-/supercluster-7.1.3.tgz",
@ -9866,12 +9890,6 @@
"url": "https://opencollective.com/parcel"
}
},
"node_modules/llm-chunk": {
"version": "0.0.1",
"resolved": "https://registry.npmjs.org/llm-chunk/-/llm-chunk-0.0.1.tgz",
"integrity": "sha512-n9fHgsSiJb7vXZiC5c4XV6rme+tC7WX/cWH6EJvPPmMOMwOZ9xdg/U9LY5Qhmixd3K1PdRB0FVOdzoJF2HUZbg==",
"license": "MIT"
},
"node_modules/locate-path": {
"version": "7.2.0",
"resolved": "https://registry.npmjs.org/locate-path/-/locate-path-7.2.0.tgz",
@ -13619,6 +13637,12 @@
"node": ">= 0.8"
}
},
"node_modules/stopword": {
"version": "3.1.5",
"resolved": "https://registry.npmjs.org/stopword/-/stopword-3.1.5.tgz",
"integrity": "sha512-OgLYGVFCNa430WOrj9tYZhQge5yg6vd6JsKredveAqEhdLVQkfrpnQIGjx0L9lLqzL4Kq4J8yNTcfQR/MpBwhg==",
"license": "MIT"
},
"node_modules/string_decoder": {
"version": "1.3.0",
"resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.3.0.tgz",

View File

@ -52,6 +52,7 @@
"@types/node": "^22.15.18",
"@types/react": "^19.1.8",
"@types/react-dom": "^19.1.6",
"@types/stopword": "^2.0.3",
"eslint": "^9.26.0",
"hot-hook": "^0.4.0",
"prettier": "^3.5.3",
@ -71,6 +72,7 @@
"@adonisjs/transmit": "^2.0.2",
"@adonisjs/transmit-client": "^1.0.0",
"@adonisjs/vite": "^4.0.0",
"@chonkiejs/core": "^0.0.7",
"@headlessui/react": "^2.2.4",
"@inertiajs/react": "^2.0.13",
"@markdoc/markdoc": "^0.5.2",
@ -93,7 +95,6 @@
"dockerode": "^4.0.7",
"edge.js": "^6.2.1",
"fast-xml-parser": "^5.2.5",
"llm-chunk": "^0.0.1",
"luxon": "^3.6.1",
"maplibre-gl": "^4.7.1",
"mysql2": "^3.14.1",
@ -111,6 +112,7 @@
"reflect-metadata": "^0.2.2",
"remark-gfm": "^4.0.1",
"sharp": "^0.34.5",
"stopword": "^3.1.5",
"systeminformation": "^5.27.14",
"tailwindcss": "^4.1.10",
"tar": "^7.5.6",

View File

@ -119,6 +119,7 @@ router
.group(() => {
router.post('/upload', [RagController, 'upload'])
router.get('/files', [RagController, 'getStoredFiles'])
router.get('/job-status', [RagController, 'getJobStatus'])
})
.prefix('/api/rag')