feat(AI Assistant): performance improvements and smarter RAG context usage

This commit is contained in:
Jake Turner 2026-03-11 05:52:46 +00:00 committed by Jake Turner
parent 460756f581
commit 96e5027055
5 changed files with 242 additions and 88 deletions

View File

@ -5,7 +5,7 @@ import { modelNameSchema } from '#validators/download'
import { chatSchema, getAvailableModelsSchema } from '#validators/ollama' import { chatSchema, getAvailableModelsSchema } from '#validators/ollama'
import { inject } from '@adonisjs/core' import { inject } from '@adonisjs/core'
import type { HttpContext } from '@adonisjs/core/http' import type { HttpContext } from '@adonisjs/core/http'
import { DEFAULT_QUERY_REWRITE_MODEL, SYSTEM_PROMPTS } from '../../constants/ollama.js' import { DEFAULT_QUERY_REWRITE_MODEL, RAG_CONTEXT_LIMITS, SYSTEM_PROMPTS } from '../../constants/ollama.js'
import logger from '@adonisjs/core/services/logger' import logger from '@adonisjs/core/services/logger'
import type { Message } from 'ollama' import type { Message } from 'ollama'
@ -66,9 +66,28 @@ export default class OllamaController {
logger.debug(`[RAG] Retrieved ${relevantDocs.length} relevant documents for query: "${rewrittenQuery}"`) logger.debug(`[RAG] Retrieved ${relevantDocs.length} relevant documents for query: "${rewrittenQuery}"`)
// If relevant context is found, inject as a system message // If relevant context is found, inject as a system message with adaptive limits
if (relevantDocs.length > 0) { if (relevantDocs.length > 0) {
const contextText = relevantDocs // Determine context budget based on model size
const { maxResults, maxTokens } = this.getContextLimitsForModel(reqData.model)
let trimmedDocs = relevantDocs.slice(0, maxResults)
// Apply token cap if set (estimate ~4 chars per token)
// Always include the first (most relevant) result — the cap only gates subsequent results
if (maxTokens > 0) {
const charCap = maxTokens * 4
let totalChars = 0
trimmedDocs = trimmedDocs.filter((doc, idx) => {
totalChars += doc.text.length
return idx === 0 || totalChars <= charCap
})
}
logger.debug(
`[RAG] Injecting ${trimmedDocs.length}/${relevantDocs.length} results (model: ${reqData.model}, maxResults: ${maxResults}, maxTokens: ${maxTokens || 'unlimited'})`
)
const contextText = trimmedDocs
.map((doc, idx) => `[Context ${idx + 1}] (Relevance: ${(doc.score * 100).toFixed(1)}%)\n${doc.text}`) .map((doc, idx) => `[Context ${idx + 1}] (Relevance: ${(doc.score * 100).toFixed(1)}%)\n${doc.text}`)
.join('\n\n') .join('\n\n')
@ -174,6 +193,25 @@ export default class OllamaController {
return await this.ollamaService.getModels() return await this.ollamaService.getModels()
} }
/**
* Determines RAG context limits based on model size extracted from the model name.
* Parses size indicators like "1b", "3b", "8b", "70b" from model names/tags.
*/
private getContextLimitsForModel(modelName: string): { maxResults: number; maxTokens: number } {
// Extract parameter count from model name (e.g., "llama3.2:3b", "qwen2.5:1.5b", "gemma:7b")
const sizeMatch = modelName.match(/(\d+\.?\d*)[bB]/)
const paramBillions = sizeMatch ? parseFloat(sizeMatch[1]) : 8 // default to 8B if unknown
for (const tier of RAG_CONTEXT_LIMITS) {
if (paramBillions <= tier.maxParams) {
return { maxResults: tier.maxResults, maxTokens: tier.maxTokens }
}
}
// Fallback: no limits
return { maxResults: 5, maxTokens: 0 }
}
private async rewriteQueryWithContext( private async rewriteQueryWithContext(
messages: Message[] messages: Message[]
): Promise<string | null> { ): Promise<string | null> {
@ -199,8 +237,8 @@ export default class OllamaController {
}) })
.join('\n') .join('\n')
const availableModels = await this.ollamaService.getAvailableModels({ query: null, limit: 500 }) const installedModels = await this.ollamaService.getModels(true)
const rewriteModelAvailable = availableModels?.models.some(model => model.name === DEFAULT_QUERY_REWRITE_MODEL) const rewriteModelAvailable = installedModels?.some(model => model.name === DEFAULT_QUERY_REWRITE_MODEL)
if (!rewriteModelAvailable) { if (!rewriteModelAvailable) {
logger.warn(`[RAG] Query rewrite model "${DEFAULT_QUERY_REWRITE_MODEL}" not available. Skipping query rewriting.`) logger.warn(`[RAG] Query rewrite model "${DEFAULT_QUERY_REWRITE_MODEL}" not available. Skipping query rewriting.`)
const lastUserMessage = [...messages].reverse().find(msg => msg.role === 'user') const lastUserMessage = [...messages].reverse().find(msg => msg.role === 'user')

View File

@ -16,11 +16,13 @@ import { join, resolve, sep } from 'node:path'
import KVStore from '#models/kv_store' import KVStore from '#models/kv_store'
import { ZIMExtractionService } from './zim_extraction_service.js' import { ZIMExtractionService } from './zim_extraction_service.js'
import { ZIM_BATCH_SIZE } from '../../constants/zim_extraction.js' import { ZIM_BATCH_SIZE } from '../../constants/zim_extraction.js'
import { ProcessAndEmbedFileResponse, ProcessZIMFileResponse, RAGResult, RerankedRAGResult } from '../../types/rag.js'
@inject() @inject()
export class RagService { export class RagService {
private qdrant: QdrantClient | null = null private qdrant: QdrantClient | null = null
private qdrantInitPromise: Promise<void> | null = null private qdrantInitPromise: Promise<void> | null = null
private embeddingModelVerified = false
public static UPLOADS_STORAGE_PATH = 'storage/kb_uploads' public static UPLOADS_STORAGE_PATH = 'storage/kb_uploads'
public static CONTENT_COLLECTION_NAME = 'nomad_knowledge_base' public static CONTENT_COLLECTION_NAME = 'nomad_knowledge_base'
public static EMBEDDING_MODEL = 'nomic-embed-text:v1.5' public static EMBEDDING_MODEL = 'nomic-embed-text:v1.5'
@ -33,6 +35,7 @@ export class RagService {
// Nomic Embed Text v1.5 uses task-specific prefixes for optimal performance // Nomic Embed Text v1.5 uses task-specific prefixes for optimal performance
public static SEARCH_DOCUMENT_PREFIX = 'search_document: ' public static SEARCH_DOCUMENT_PREFIX = 'search_document: '
public static SEARCH_QUERY_PREFIX = 'search_query: ' public static SEARCH_QUERY_PREFIX = 'search_query: '
public static EMBEDDING_BATCH_SIZE = 8 // Conservative batch size for low-end hardware
constructor( constructor(
private dockerService: DockerService, private dockerService: DockerService,
@ -75,6 +78,16 @@ export class RagService {
}, },
}) })
} }
// Create payload indexes for faster filtering (idempotent — Qdrant ignores duplicates)
await this.qdrant!.createPayloadIndex(collectionName, {
field_name: 'source',
field_schema: 'keyword',
})
await this.qdrant!.createPayloadIndex(collectionName, {
field_name: 'content_type',
field_schema: 'keyword',
})
} catch (error) { } catch (error) {
logger.error('Error ensuring Qdrant collection:', error) logger.error('Error ensuring Qdrant collection:', error)
throw error throw error
@ -148,14 +161,57 @@ export class RagService {
/** /**
* Preprocesses a query to improve retrieval by expanding it with context. * Preprocesses a query to improve retrieval by expanding it with context.
* This helps match documents even when using different terminology. * This helps match documents even when using different terminology.
* TODO: We could probably move this to a separate QueryPreprocessor class if it grows more complex, but for now it's manageable here.
*/ */
private static QUERY_EXPANSION_DICTIONARY: Record<string, string> = {
'bob': 'bug out bag',
'bov': 'bug out vehicle',
'bol': 'bug out location',
'edc': 'every day carry',
'mre': 'meal ready to eat',
'shtf': 'shit hits the fan',
'teotwawki': 'the end of the world as we know it',
'opsec': 'operational security',
'ifak': 'individual first aid kit',
'ghb': 'get home bag',
'ghi': 'get home in',
'wrol': 'without rule of law',
'emp': 'electromagnetic pulse',
'ham': 'ham amateur radio',
'nbr': 'nuclear biological radiological',
'cbrn': 'chemical biological radiological nuclear',
'sar': 'search and rescue',
'comms': 'communications radio',
'fifo': 'first in first out',
'mylar': 'mylar bag food storage',
'paracord': 'paracord 550 cord',
'ferro': 'ferro rod fire starter',
'bivvy': 'bivvy bivy emergency shelter',
'bdu': 'battle dress uniform',
'gmrs': 'general mobile radio service',
'frs': 'family radio service',
'nbc': 'nuclear biological chemical',
}
private preprocessQuery(query: string): string { private preprocessQuery(query: string): string {
// Future: this is a placeholder for more advanced query expansion techniques. let expanded = query.trim()
// For now, we simply trim whitespace. Improvements could include:
// - Synonym expansion using a thesaurus // Expand known domain abbreviations/acronyms
// - Adding related terms based on domain knowledge const words = expanded.toLowerCase().split(/\s+/)
// - Using a language model to rephrase or elaborate the query const expansions: string[] = []
const expanded = query.trim()
for (const word of words) {
const cleaned = word.replace(/[^\w]/g, '')
if (RagService.QUERY_EXPANSION_DICTIONARY[cleaned]) {
expansions.push(RagService.QUERY_EXPANSION_DICTIONARY[cleaned])
}
}
if (expansions.length > 0) {
expanded = `${expanded} ${expansions.join(' ')}`
logger.debug(`[RAG] Query expanded with domain terms: "${expanded}"`)
}
logger.debug(`[RAG] Original query: "${query}"`) logger.debug(`[RAG] Original query: "${query}"`)
logger.debug(`[RAG] Preprocessed query: "${expanded}"`) logger.debug(`[RAG] Preprocessed query: "${expanded}"`)
return expanded return expanded
@ -187,22 +243,26 @@ export class RagService {
RagService.EMBEDDING_DIMENSION RagService.EMBEDDING_DIMENSION
) )
const allModels = await this.ollamaService.getModels(true) if (!this.embeddingModelVerified) {
const embeddingModel = allModels.find((model) => model.name === RagService.EMBEDDING_MODEL) const allModels = await this.ollamaService.getModels(true)
const embeddingModel = allModels.find((model) => model.name === RagService.EMBEDDING_MODEL)
if (!embeddingModel) { if (!embeddingModel) {
try { try {
const downloadResult = await this.ollamaService.downloadModel(RagService.EMBEDDING_MODEL) const downloadResult = await this.ollamaService.downloadModel(RagService.EMBEDDING_MODEL)
if (!downloadResult.success) { if (!downloadResult.success) {
throw new Error(downloadResult.message || 'Unknown error during model download') throw new Error(downloadResult.message || 'Unknown error during model download')
}
} catch (modelError) {
logger.error(
`[RAG] Embedding model ${RagService.EMBEDDING_MODEL} not found locally and failed to download:`,
modelError
)
this.embeddingModelVerified = false
return null
} }
} catch (modelError) {
logger.error(
`[RAG] Embedding model ${RagService.EMBEDDING_MODEL} not found locally and failed to download:`,
modelError
)
return null
} }
this.embeddingModelVerified = true
} }
// TokenChunker uses character-based tokenization (1 char = 1 token) // TokenChunker uses character-based tokenization (1 char = 1 token)
@ -227,7 +287,8 @@ export class RagService {
const ollamaClient = await this.ollamaService.getClient() const ollamaClient = await this.ollamaService.getClient()
const embeddings: number[][] = [] // Prepare all chunk texts with prefix and truncation
const prefixedChunks: string[] = []
for (let i = 0; i < chunks.length; i++) { for (let i = 0; i < chunks.length; i++) {
let chunkText = chunks[i] let chunkText = chunks[i]
@ -237,7 +298,6 @@ export class RagService {
const estimatedTokens = this.estimateTokenCount(withPrefix) const estimatedTokens = this.estimateTokenCount(withPrefix)
if (estimatedTokens > RagService.MAX_SAFE_TOKENS) { if (estimatedTokens > RagService.MAX_SAFE_TOKENS) {
// This should be rare - log for debugging if it's occurring frequently
const prefixTokens = this.estimateTokenCount(prefixText) const prefixTokens = this.estimateTokenCount(prefixText)
const maxTokensForText = RagService.MAX_SAFE_TOKENS - prefixTokens const maxTokensForText = RagService.MAX_SAFE_TOKENS - prefixTokens
logger.warn( logger.warn(
@ -246,17 +306,30 @@ export class RagService {
chunkText = this.truncateToTokenLimit(chunkText, maxTokensForText) chunkText = this.truncateToTokenLimit(chunkText, maxTokensForText)
} }
logger.debug(`[RAG] Generating embedding for chunk ${i + 1}/${chunks.length}`) prefixedChunks.push(RagService.SEARCH_DOCUMENT_PREFIX + chunkText)
}
const response = await ollamaClient.embeddings({ // Batch embed chunks for performance
const embeddings: number[][] = []
const batchSize = RagService.EMBEDDING_BATCH_SIZE
const totalBatches = Math.ceil(prefixedChunks.length / batchSize)
for (let batchIdx = 0; batchIdx < totalBatches; batchIdx++) {
const batchStart = batchIdx * batchSize
const batch = prefixedChunks.slice(batchStart, batchStart + batchSize)
logger.debug(`[RAG] Embedding batch ${batchIdx + 1}/${totalBatches} (${batch.length} chunks)`)
const response = await ollamaClient.embed({
model: RagService.EMBEDDING_MODEL, model: RagService.EMBEDDING_MODEL,
prompt: RagService.SEARCH_DOCUMENT_PREFIX + chunkText, input: batch,
}) })
embeddings.push(response.embedding) embeddings.push(...response.embeddings)
if (onProgress) { if (onProgress) {
await onProgress(((i + 1) / chunks.length) * 100) const progress = ((batchStart + batch.length) / prefixedChunks.length) * 100
await onProgress(progress)
} }
} }
@ -395,14 +468,7 @@ export class RagService {
deleteAfterEmbedding: boolean, deleteAfterEmbedding: boolean,
batchOffset?: number, batchOffset?: number,
onProgress?: (percent: number) => Promise<void> onProgress?: (percent: number) => Promise<void>
): Promise<{ ): Promise<ProcessZIMFileResponse> {
success: boolean
message: string
chunks?: number
hasMoreBatches?: boolean
articlesProcessed?: number
totalArticles?: number
}> {
const zimExtractionService = new ZIMExtractionService() const zimExtractionService = new ZIMExtractionService()
// Process in batches to avoid lock timeout // Process in batches to avoid lock timeout
@ -540,14 +606,7 @@ export class RagService {
deleteAfterEmbedding: boolean = false, deleteAfterEmbedding: boolean = false,
batchOffset?: number, batchOffset?: number,
onProgress?: (percent: number) => Promise<void> onProgress?: (percent: number) => Promise<void>
): Promise<{ ): Promise<ProcessAndEmbedFileResponse> {
success: boolean
message: string
chunks?: number
hasMoreBatches?: boolean
articlesProcessed?: number
totalArticles?: number
}> {
try { try {
const fileType = determineFileType(filepath) const fileType = determineFileType(filepath)
logger.debug(`[RAG] Processing file: ${filepath} (detected type: ${fileType})`) logger.debug(`[RAG] Processing file: ${filepath} (detected type: ${fileType})`)
@ -631,14 +690,18 @@ export class RagService {
return [] return []
} }
const allModels = await this.ollamaService.getModels(true) if (!this.embeddingModelVerified) {
const embeddingModel = allModels.find((model) => model.name === RagService.EMBEDDING_MODEL) const allModels = await this.ollamaService.getModels(true)
const embeddingModel = allModels.find((model) => model.name === RagService.EMBEDDING_MODEL)
if (!embeddingModel) { if (!embeddingModel) {
logger.warn( logger.warn(
`[RAG] ${RagService.EMBEDDING_MODEL} not found. Cannot perform similarity search.` `[RAG] ${RagService.EMBEDDING_MODEL} not found. Cannot perform similarity search.`
) )
return [] this.embeddingModelVerified = false
return []
}
this.embeddingModelVerified = true
} }
// Preprocess query for better matching // Preprocess query for better matching
@ -666,9 +729,9 @@ export class RagService {
return [] return []
} }
const response = await ollamaClient.embeddings({ const response = await ollamaClient.embed({
model: RagService.EMBEDDING_MODEL, model: RagService.EMBEDDING_MODEL,
prompt: prefixedQuery, input: [prefixedQuery],
}) })
// Perform semantic search with a higher limit to enable reranking // Perform semantic search with a higher limit to enable reranking
@ -678,7 +741,7 @@ export class RagService {
) )
const searchResults = await this.qdrant!.search(RagService.CONTENT_COLLECTION_NAME, { const searchResults = await this.qdrant!.search(RagService.CONTENT_COLLECTION_NAME, {
vector: response.embedding, vector: response.embeddings[0],
limit: searchLimit, limit: searchLimit,
score_threshold: scoreThreshold, score_threshold: scoreThreshold,
with_payload: true, with_payload: true,
@ -687,7 +750,7 @@ export class RagService {
logger.debug(`[RAG] Found ${searchResults.length} results above threshold ${scoreThreshold}`) logger.debug(`[RAG] Found ${searchResults.length} results above threshold ${scoreThreshold}`)
// Map results with metadata for reranking // Map results with metadata for reranking
const resultsWithMetadata = searchResults.map((result) => ({ const resultsWithMetadata: RAGResult[] = searchResults.map((result) => ({
text: (result.payload?.text as string) || '', text: (result.payload?.text as string) || '',
score: result.score, score: result.score,
keywords: (result.payload?.keywords as string) || '', keywords: (result.payload?.keywords as string) || '',
@ -700,6 +763,7 @@ export class RagService {
hierarchy: result.payload?.hierarchy as string | undefined, hierarchy: result.payload?.hierarchy as string | undefined,
document_id: result.payload?.document_id as string | undefined, document_id: result.payload?.document_id as string | undefined,
content_type: result.payload?.content_type as string | undefined, content_type: result.payload?.content_type as string | undefined,
source: result.payload?.source as string | undefined,
})) }))
const rerankedResults = this.rerankResults(resultsWithMetadata, keywords, query) const rerankedResults = this.rerankResults(resultsWithMetadata, keywords, query)
@ -711,8 +775,11 @@ export class RagService {
) )
}) })
// Apply source diversity penalty to avoid all results from the same document
const diverseResults = this.applySourceDiversity(rerankedResults)
// Return top N results with enhanced metadata // Return top N results with enhanced metadata
return rerankedResults.slice(0, limit).map((result) => ({ return diverseResults.slice(0, limit).map((result) => ({
text: result.text, text: result.text,
score: result.finalScore, score: result.finalScore,
metadata: { metadata: {
@ -748,34 +815,10 @@ export class RagService {
* outweigh the overhead. * outweigh the overhead.
*/ */
private rerankResults( private rerankResults(
results: Array<{ results: Array<RAGResult>,
text: string
score: number
keywords: string
chunk_index: number
created_at: number
article_title?: string
section_title?: string
full_title?: string
hierarchy?: string
document_id?: string
content_type?: string
}>,
queryKeywords: string[], queryKeywords: string[],
originalQuery: string originalQuery: string
): Array<{ ): Array<RerankedRAGResult> {
text: string
score: number
finalScore: number
chunk_index: number
created_at: number
article_title?: string
section_title?: string
full_title?: string
hierarchy?: string
document_id?: string
content_type?: string
}> {
return results return results
.map((result) => { .map((result) => {
let finalScore = result.score let finalScore = result.score
@ -851,6 +894,37 @@ export class RagService {
.sort((a, b) => b.finalScore - a.finalScore) .sort((a, b) => b.finalScore - a.finalScore)
} }
/**
* Applies a diversity penalty so results from the same source are down-weighted.
* Uses greedy selection: for each result, apply 0.85^n penalty where n is the
* number of results already selected from the same source.
*/
private applySourceDiversity(
results: Array<RerankedRAGResult>
) {
const sourceCounts = new Map<string, number>()
const DIVERSITY_PENALTY = 0.85
return results
.map((result) => {
const sourceKey = result.document_id || result.source || 'unknown'
const count = sourceCounts.get(sourceKey) || 0
const penalty = Math.pow(DIVERSITY_PENALTY, count)
const diverseScore = result.finalScore * penalty
sourceCounts.set(sourceKey, count + 1)
if (count > 0) {
logger.debug(
`[RAG] Source diversity penalty for "${sourceKey}": ${result.finalScore.toFixed(4)}${diverseScore.toFixed(4)} (seen ${count}x)`
)
}
return { ...result, finalScore: diverseScore }
})
.sort((a, b) => b.finalScore - a.finalScore)
}
/** /**
* Retrieve all unique source files that have been stored in the knowledge base. * Retrieve all unique source files that have been stored in the knowledge base.
* @returns Array of unique full source paths * @returns Array of unique full source paths
@ -866,12 +940,12 @@ export class RagService {
let offset: string | number | null | Record<string, unknown> = null let offset: string | number | null | Record<string, unknown> = null
const batchSize = 100 const batchSize = 100
// Scroll through all points in the collection // Scroll through all points in the collection (only fetch source field)
do { do {
const scrollResult = await this.qdrant!.scroll(RagService.CONTENT_COLLECTION_NAME, { const scrollResult = await this.qdrant!.scroll(RagService.CONTENT_COLLECTION_NAME, {
limit: batchSize, limit: batchSize,
offset: offset, offset: offset,
with_payload: true, with_payload: ['source'],
with_vector: false, with_vector: false,
}) })

View File

@ -64,6 +64,16 @@ export const FALLBACK_RECOMMENDED_OLLAMA_MODELS: NomadOllamaModel[] = [
export const DEFAULT_QUERY_REWRITE_MODEL = 'qwen2.5:3b' // default to qwen2.5 for query rewriting with good balance of text task performance and resource usage export const DEFAULT_QUERY_REWRITE_MODEL = 'qwen2.5:3b' // default to qwen2.5 for query rewriting with good balance of text task performance and resource usage
/**
* Adaptive RAG context limits based on model size.
* Smaller models get overwhelmed with too much context, so we cap it.
*/
export const RAG_CONTEXT_LIMITS: { maxParams: number; maxResults: number; maxTokens: number }[] = [
{ maxParams: 3, maxResults: 2, maxTokens: 1000 }, // 1-3B models
{ maxParams: 8, maxResults: 4, maxTokens: 2500 }, // 4-8B models
{ maxParams: Infinity, maxResults: 5, maxTokens: 0 }, // 13B+ (no cap)
]
export const SYSTEM_PROMPTS = { export const SYSTEM_PROMPTS = {
default: ` default: `
Format all responses using markdown for better readability. Vanilla markdown or GitHub-flavored markdown is preferred. Format all responses using markdown for better readability. Vanilla markdown or GitHub-flavored markdown is preferred.
@ -113,7 +123,7 @@ Ensure that your suggestions are comma-seperated with no conjunctions like "and"
Do not use line breaks, new lines, or extra spacing to separate the suggestions. Do not use line breaks, new lines, or extra spacing to separate the suggestions.
Format: suggestion1, suggestion2, suggestion3 Format: suggestion1, suggestion2, suggestion3
`, `,
title_generation: `You are a title generator. Given the start of a conversation, generate a concise, descriptive title under 60 characters. Return ONLY the title text with no quotes, punctuation wrapping, or extra formatting.`, title_generation: `You are a title generator. Given the start of a conversation, generate a concise, descriptive title under 50 characters. Return ONLY the title text with no quotes, punctuation wrapping, or extra formatting.`,
query_rewrite: ` query_rewrite: `
You are a query rewriting assistant. Your task is to reformulate the user's latest question to include relevant context from the conversation history. You are a query rewriting assistant. Your task is to reformulate the user's latest question to include relevant context from the conversation history.

View File

@ -4,13 +4,16 @@
### Features ### Features
- **AI Assistant**: Added improved user guidance for troubleshooting GPU pass-through issues - **AI Assistant**: Added improved user guidance for troubleshooting GPU pass-through issues
- **AI Assistant**: The last used model is now automatically selected when a new chat is started
- **Settings**: Nomad now automatically performs nightly checks for available app updates, and users can select and apply updates from the Apps page in Settings - **Settings**: Nomad now automatically performs nightly checks for available app updates, and users can select and apply updates from the Apps page in Settings
### Bug Fixes ### Bug Fixes
- **Settings**: Fixed an issue where the AI Assistant settings page would be shown in navigation even if the AI Assistant was not installed, thus causing 404 errors when clicked - **Settings**: Fixed an issue where the AI Assistant settings page would be shown in navigation even if the AI Assistant was not installed, thus causing 404 errors when clicked
- **Security**: Path traversal and SSRF mitigations - **Security**: Path traversal and SSRF mitigations
- **AI Assistant**: Fixed an issue that was causing intermittent failures saving chat session titles
### Improvements ### Improvements
- **AI Assistant**: Extensive performance improvements and improved RAG intelligence/context usage
## Version 1.28.0 - March 5, 2026 ## Version 1.28.0 - March 5, 2026

View File

@ -5,3 +5,32 @@ export type EmbedJobWithProgress = {
progress: number progress: number
status: string status: string
} }
export type ProcessAndEmbedFileResponse = {
success: boolean
message: string
chunks?: number
hasMoreBatches?: boolean
articlesProcessed?: number
totalArticles?: number
}
export type ProcessZIMFileResponse = ProcessAndEmbedFileResponse
export type RAGResult = {
text: string
score: number
keywords: string
chunk_index: number
created_at: number
article_title?: string
section_title?: string
full_title?: string
hierarchy?: string
document_id?: string
content_type?: string
source?: string
}
export type RerankedRAGResult = Omit<RAGResult, 'keywords'> & {
finalScore: number
}