mirror of
https://github.com/Crosstalk-Solutions/project-nomad.git
synced 2026-03-28 03:29:25 +01:00
feat(AI Assistant): performance improvements and smarter RAG context usage
This commit is contained in:
parent
932f60cdb6
commit
6600c03a71
|
|
@ -5,7 +5,7 @@ import { modelNameSchema } from '#validators/download'
|
|||
import { chatSchema, getAvailableModelsSchema } from '#validators/ollama'
|
||||
import { inject } from '@adonisjs/core'
|
||||
import type { HttpContext } from '@adonisjs/core/http'
|
||||
import { DEFAULT_QUERY_REWRITE_MODEL, SYSTEM_PROMPTS } from '../../constants/ollama.js'
|
||||
import { DEFAULT_QUERY_REWRITE_MODEL, RAG_CONTEXT_LIMITS, SYSTEM_PROMPTS } from '../../constants/ollama.js'
|
||||
import logger from '@adonisjs/core/services/logger'
|
||||
import type { Message } from 'ollama'
|
||||
|
||||
|
|
@ -66,9 +66,28 @@ export default class OllamaController {
|
|||
|
||||
logger.debug(`[RAG] Retrieved ${relevantDocs.length} relevant documents for query: "${rewrittenQuery}"`)
|
||||
|
||||
// If relevant context is found, inject as a system message
|
||||
// If relevant context is found, inject as a system message with adaptive limits
|
||||
if (relevantDocs.length > 0) {
|
||||
const contextText = relevantDocs
|
||||
// Determine context budget based on model size
|
||||
const { maxResults, maxTokens } = this.getContextLimitsForModel(reqData.model)
|
||||
let trimmedDocs = relevantDocs.slice(0, maxResults)
|
||||
|
||||
// Apply token cap if set (estimate ~4 chars per token)
|
||||
// Always include the first (most relevant) result — the cap only gates subsequent results
|
||||
if (maxTokens > 0) {
|
||||
const charCap = maxTokens * 4
|
||||
let totalChars = 0
|
||||
trimmedDocs = trimmedDocs.filter((doc, idx) => {
|
||||
totalChars += doc.text.length
|
||||
return idx === 0 || totalChars <= charCap
|
||||
})
|
||||
}
|
||||
|
||||
logger.debug(
|
||||
`[RAG] Injecting ${trimmedDocs.length}/${relevantDocs.length} results (model: ${reqData.model}, maxResults: ${maxResults}, maxTokens: ${maxTokens || 'unlimited'})`
|
||||
)
|
||||
|
||||
const contextText = trimmedDocs
|
||||
.map((doc, idx) => `[Context ${idx + 1}] (Relevance: ${(doc.score * 100).toFixed(1)}%)\n${doc.text}`)
|
||||
.join('\n\n')
|
||||
|
||||
|
|
@ -174,6 +193,25 @@ export default class OllamaController {
|
|||
return await this.ollamaService.getModels()
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines RAG context limits based on model size extracted from the model name.
|
||||
* Parses size indicators like "1b", "3b", "8b", "70b" from model names/tags.
|
||||
*/
|
||||
private getContextLimitsForModel(modelName: string): { maxResults: number; maxTokens: number } {
|
||||
// Extract parameter count from model name (e.g., "llama3.2:3b", "qwen2.5:1.5b", "gemma:7b")
|
||||
const sizeMatch = modelName.match(/(\d+\.?\d*)[bB]/)
|
||||
const paramBillions = sizeMatch ? parseFloat(sizeMatch[1]) : 8 // default to 8B if unknown
|
||||
|
||||
for (const tier of RAG_CONTEXT_LIMITS) {
|
||||
if (paramBillions <= tier.maxParams) {
|
||||
return { maxResults: tier.maxResults, maxTokens: tier.maxTokens }
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback: no limits
|
||||
return { maxResults: 5, maxTokens: 0 }
|
||||
}
|
||||
|
||||
private async rewriteQueryWithContext(
|
||||
messages: Message[]
|
||||
): Promise<string | null> {
|
||||
|
|
@ -199,8 +237,8 @@ export default class OllamaController {
|
|||
})
|
||||
.join('\n')
|
||||
|
||||
const availableModels = await this.ollamaService.getAvailableModels({ query: null, limit: 500 })
|
||||
const rewriteModelAvailable = availableModels?.models.some(model => model.name === DEFAULT_QUERY_REWRITE_MODEL)
|
||||
const installedModels = await this.ollamaService.getModels(true)
|
||||
const rewriteModelAvailable = installedModels?.some(model => model.name === DEFAULT_QUERY_REWRITE_MODEL)
|
||||
if (!rewriteModelAvailable) {
|
||||
logger.warn(`[RAG] Query rewrite model "${DEFAULT_QUERY_REWRITE_MODEL}" not available. Skipping query rewriting.`)
|
||||
const lastUserMessage = [...messages].reverse().find(msg => msg.role === 'user')
|
||||
|
|
|
|||
|
|
@ -16,11 +16,13 @@ import { join, resolve, sep } from 'node:path'
|
|||
import KVStore from '#models/kv_store'
|
||||
import { ZIMExtractionService } from './zim_extraction_service.js'
|
||||
import { ZIM_BATCH_SIZE } from '../../constants/zim_extraction.js'
|
||||
import { ProcessAndEmbedFileResponse, ProcessZIMFileResponse, RAGResult, RerankedRAGResult } from '../../types/rag.js'
|
||||
|
||||
@inject()
|
||||
export class RagService {
|
||||
private qdrant: QdrantClient | null = null
|
||||
private qdrantInitPromise: Promise<void> | null = null
|
||||
private embeddingModelVerified = false
|
||||
public static UPLOADS_STORAGE_PATH = 'storage/kb_uploads'
|
||||
public static CONTENT_COLLECTION_NAME = 'nomad_knowledge_base'
|
||||
public static EMBEDDING_MODEL = 'nomic-embed-text:v1.5'
|
||||
|
|
@ -33,6 +35,7 @@ export class RagService {
|
|||
// Nomic Embed Text v1.5 uses task-specific prefixes for optimal performance
|
||||
public static SEARCH_DOCUMENT_PREFIX = 'search_document: '
|
||||
public static SEARCH_QUERY_PREFIX = 'search_query: '
|
||||
public static EMBEDDING_BATCH_SIZE = 8 // Conservative batch size for low-end hardware
|
||||
|
||||
constructor(
|
||||
private dockerService: DockerService,
|
||||
|
|
@ -75,6 +78,16 @@ export class RagService {
|
|||
},
|
||||
})
|
||||
}
|
||||
|
||||
// Create payload indexes for faster filtering (idempotent — Qdrant ignores duplicates)
|
||||
await this.qdrant!.createPayloadIndex(collectionName, {
|
||||
field_name: 'source',
|
||||
field_schema: 'keyword',
|
||||
})
|
||||
await this.qdrant!.createPayloadIndex(collectionName, {
|
||||
field_name: 'content_type',
|
||||
field_schema: 'keyword',
|
||||
})
|
||||
} catch (error) {
|
||||
logger.error('Error ensuring Qdrant collection:', error)
|
||||
throw error
|
||||
|
|
@ -148,14 +161,57 @@ export class RagService {
|
|||
/**
|
||||
* Preprocesses a query to improve retrieval by expanding it with context.
|
||||
* This helps match documents even when using different terminology.
|
||||
* TODO: We could probably move this to a separate QueryPreprocessor class if it grows more complex, but for now it's manageable here.
|
||||
*/
|
||||
private static QUERY_EXPANSION_DICTIONARY: Record<string, string> = {
|
||||
'bob': 'bug out bag',
|
||||
'bov': 'bug out vehicle',
|
||||
'bol': 'bug out location',
|
||||
'edc': 'every day carry',
|
||||
'mre': 'meal ready to eat',
|
||||
'shtf': 'shit hits the fan',
|
||||
'teotwawki': 'the end of the world as we know it',
|
||||
'opsec': 'operational security',
|
||||
'ifak': 'individual first aid kit',
|
||||
'ghb': 'get home bag',
|
||||
'ghi': 'get home in',
|
||||
'wrol': 'without rule of law',
|
||||
'emp': 'electromagnetic pulse',
|
||||
'ham': 'ham amateur radio',
|
||||
'nbr': 'nuclear biological radiological',
|
||||
'cbrn': 'chemical biological radiological nuclear',
|
||||
'sar': 'search and rescue',
|
||||
'comms': 'communications radio',
|
||||
'fifo': 'first in first out',
|
||||
'mylar': 'mylar bag food storage',
|
||||
'paracord': 'paracord 550 cord',
|
||||
'ferro': 'ferro rod fire starter',
|
||||
'bivvy': 'bivvy bivy emergency shelter',
|
||||
'bdu': 'battle dress uniform',
|
||||
'gmrs': 'general mobile radio service',
|
||||
'frs': 'family radio service',
|
||||
'nbc': 'nuclear biological chemical',
|
||||
}
|
||||
|
||||
private preprocessQuery(query: string): string {
|
||||
// Future: this is a placeholder for more advanced query expansion techniques.
|
||||
// For now, we simply trim whitespace. Improvements could include:
|
||||
// - Synonym expansion using a thesaurus
|
||||
// - Adding related terms based on domain knowledge
|
||||
// - Using a language model to rephrase or elaborate the query
|
||||
const expanded = query.trim()
|
||||
let expanded = query.trim()
|
||||
|
||||
// Expand known domain abbreviations/acronyms
|
||||
const words = expanded.toLowerCase().split(/\s+/)
|
||||
const expansions: string[] = []
|
||||
|
||||
for (const word of words) {
|
||||
const cleaned = word.replace(/[^\w]/g, '')
|
||||
if (RagService.QUERY_EXPANSION_DICTIONARY[cleaned]) {
|
||||
expansions.push(RagService.QUERY_EXPANSION_DICTIONARY[cleaned])
|
||||
}
|
||||
}
|
||||
|
||||
if (expansions.length > 0) {
|
||||
expanded = `${expanded} ${expansions.join(' ')}`
|
||||
logger.debug(`[RAG] Query expanded with domain terms: "${expanded}"`)
|
||||
}
|
||||
|
||||
logger.debug(`[RAG] Original query: "${query}"`)
|
||||
logger.debug(`[RAG] Preprocessed query: "${expanded}"`)
|
||||
return expanded
|
||||
|
|
@ -187,22 +243,26 @@ export class RagService {
|
|||
RagService.EMBEDDING_DIMENSION
|
||||
)
|
||||
|
||||
const allModels = await this.ollamaService.getModels(true)
|
||||
const embeddingModel = allModels.find((model) => model.name === RagService.EMBEDDING_MODEL)
|
||||
if (!this.embeddingModelVerified) {
|
||||
const allModels = await this.ollamaService.getModels(true)
|
||||
const embeddingModel = allModels.find((model) => model.name === RagService.EMBEDDING_MODEL)
|
||||
|
||||
if (!embeddingModel) {
|
||||
try {
|
||||
const downloadResult = await this.ollamaService.downloadModel(RagService.EMBEDDING_MODEL)
|
||||
if (!downloadResult.success) {
|
||||
throw new Error(downloadResult.message || 'Unknown error during model download')
|
||||
if (!embeddingModel) {
|
||||
try {
|
||||
const downloadResult = await this.ollamaService.downloadModel(RagService.EMBEDDING_MODEL)
|
||||
if (!downloadResult.success) {
|
||||
throw new Error(downloadResult.message || 'Unknown error during model download')
|
||||
}
|
||||
} catch (modelError) {
|
||||
logger.error(
|
||||
`[RAG] Embedding model ${RagService.EMBEDDING_MODEL} not found locally and failed to download:`,
|
||||
modelError
|
||||
)
|
||||
this.embeddingModelVerified = false
|
||||
return null
|
||||
}
|
||||
} catch (modelError) {
|
||||
logger.error(
|
||||
`[RAG] Embedding model ${RagService.EMBEDDING_MODEL} not found locally and failed to download:`,
|
||||
modelError
|
||||
)
|
||||
return null
|
||||
}
|
||||
this.embeddingModelVerified = true
|
||||
}
|
||||
|
||||
// TokenChunker uses character-based tokenization (1 char = 1 token)
|
||||
|
|
@ -227,7 +287,8 @@ export class RagService {
|
|||
|
||||
const ollamaClient = await this.ollamaService.getClient()
|
||||
|
||||
const embeddings: number[][] = []
|
||||
// Prepare all chunk texts with prefix and truncation
|
||||
const prefixedChunks: string[] = []
|
||||
for (let i = 0; i < chunks.length; i++) {
|
||||
let chunkText = chunks[i]
|
||||
|
||||
|
|
@ -237,7 +298,6 @@ export class RagService {
|
|||
const estimatedTokens = this.estimateTokenCount(withPrefix)
|
||||
|
||||
if (estimatedTokens > RagService.MAX_SAFE_TOKENS) {
|
||||
// This should be rare - log for debugging if it's occurring frequently
|
||||
const prefixTokens = this.estimateTokenCount(prefixText)
|
||||
const maxTokensForText = RagService.MAX_SAFE_TOKENS - prefixTokens
|
||||
logger.warn(
|
||||
|
|
@ -246,17 +306,30 @@ export class RagService {
|
|||
chunkText = this.truncateToTokenLimit(chunkText, maxTokensForText)
|
||||
}
|
||||
|
||||
logger.debug(`[RAG] Generating embedding for chunk ${i + 1}/${chunks.length}`)
|
||||
prefixedChunks.push(RagService.SEARCH_DOCUMENT_PREFIX + chunkText)
|
||||
}
|
||||
|
||||
const response = await ollamaClient.embeddings({
|
||||
// Batch embed chunks for performance
|
||||
const embeddings: number[][] = []
|
||||
const batchSize = RagService.EMBEDDING_BATCH_SIZE
|
||||
const totalBatches = Math.ceil(prefixedChunks.length / batchSize)
|
||||
|
||||
for (let batchIdx = 0; batchIdx < totalBatches; batchIdx++) {
|
||||
const batchStart = batchIdx * batchSize
|
||||
const batch = prefixedChunks.slice(batchStart, batchStart + batchSize)
|
||||
|
||||
logger.debug(`[RAG] Embedding batch ${batchIdx + 1}/${totalBatches} (${batch.length} chunks)`)
|
||||
|
||||
const response = await ollamaClient.embed({
|
||||
model: RagService.EMBEDDING_MODEL,
|
||||
prompt: RagService.SEARCH_DOCUMENT_PREFIX + chunkText,
|
||||
input: batch,
|
||||
})
|
||||
|
||||
embeddings.push(response.embedding)
|
||||
embeddings.push(...response.embeddings)
|
||||
|
||||
if (onProgress) {
|
||||
await onProgress(((i + 1) / chunks.length) * 100)
|
||||
const progress = ((batchStart + batch.length) / prefixedChunks.length) * 100
|
||||
await onProgress(progress)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -395,14 +468,7 @@ export class RagService {
|
|||
deleteAfterEmbedding: boolean,
|
||||
batchOffset?: number,
|
||||
onProgress?: (percent: number) => Promise<void>
|
||||
): Promise<{
|
||||
success: boolean
|
||||
message: string
|
||||
chunks?: number
|
||||
hasMoreBatches?: boolean
|
||||
articlesProcessed?: number
|
||||
totalArticles?: number
|
||||
}> {
|
||||
): Promise<ProcessZIMFileResponse> {
|
||||
const zimExtractionService = new ZIMExtractionService()
|
||||
|
||||
// Process in batches to avoid lock timeout
|
||||
|
|
@ -540,14 +606,7 @@ export class RagService {
|
|||
deleteAfterEmbedding: boolean = false,
|
||||
batchOffset?: number,
|
||||
onProgress?: (percent: number) => Promise<void>
|
||||
): Promise<{
|
||||
success: boolean
|
||||
message: string
|
||||
chunks?: number
|
||||
hasMoreBatches?: boolean
|
||||
articlesProcessed?: number
|
||||
totalArticles?: number
|
||||
}> {
|
||||
): Promise<ProcessAndEmbedFileResponse> {
|
||||
try {
|
||||
const fileType = determineFileType(filepath)
|
||||
logger.debug(`[RAG] Processing file: ${filepath} (detected type: ${fileType})`)
|
||||
|
|
@ -631,14 +690,18 @@ export class RagService {
|
|||
return []
|
||||
}
|
||||
|
||||
const allModels = await this.ollamaService.getModels(true)
|
||||
const embeddingModel = allModels.find((model) => model.name === RagService.EMBEDDING_MODEL)
|
||||
if (!this.embeddingModelVerified) {
|
||||
const allModels = await this.ollamaService.getModels(true)
|
||||
const embeddingModel = allModels.find((model) => model.name === RagService.EMBEDDING_MODEL)
|
||||
|
||||
if (!embeddingModel) {
|
||||
logger.warn(
|
||||
`[RAG] ${RagService.EMBEDDING_MODEL} not found. Cannot perform similarity search.`
|
||||
)
|
||||
return []
|
||||
if (!embeddingModel) {
|
||||
logger.warn(
|
||||
`[RAG] ${RagService.EMBEDDING_MODEL} not found. Cannot perform similarity search.`
|
||||
)
|
||||
this.embeddingModelVerified = false
|
||||
return []
|
||||
}
|
||||
this.embeddingModelVerified = true
|
||||
}
|
||||
|
||||
// Preprocess query for better matching
|
||||
|
|
@ -666,9 +729,9 @@ export class RagService {
|
|||
return []
|
||||
}
|
||||
|
||||
const response = await ollamaClient.embeddings({
|
||||
const response = await ollamaClient.embed({
|
||||
model: RagService.EMBEDDING_MODEL,
|
||||
prompt: prefixedQuery,
|
||||
input: [prefixedQuery],
|
||||
})
|
||||
|
||||
// Perform semantic search with a higher limit to enable reranking
|
||||
|
|
@ -678,7 +741,7 @@ export class RagService {
|
|||
)
|
||||
|
||||
const searchResults = await this.qdrant!.search(RagService.CONTENT_COLLECTION_NAME, {
|
||||
vector: response.embedding,
|
||||
vector: response.embeddings[0],
|
||||
limit: searchLimit,
|
||||
score_threshold: scoreThreshold,
|
||||
with_payload: true,
|
||||
|
|
@ -687,7 +750,7 @@ export class RagService {
|
|||
logger.debug(`[RAG] Found ${searchResults.length} results above threshold ${scoreThreshold}`)
|
||||
|
||||
// Map results with metadata for reranking
|
||||
const resultsWithMetadata = searchResults.map((result) => ({
|
||||
const resultsWithMetadata: RAGResult[] = searchResults.map((result) => ({
|
||||
text: (result.payload?.text as string) || '',
|
||||
score: result.score,
|
||||
keywords: (result.payload?.keywords as string) || '',
|
||||
|
|
@ -700,6 +763,7 @@ export class RagService {
|
|||
hierarchy: result.payload?.hierarchy as string | undefined,
|
||||
document_id: result.payload?.document_id as string | undefined,
|
||||
content_type: result.payload?.content_type as string | undefined,
|
||||
source: result.payload?.source as string | undefined,
|
||||
}))
|
||||
|
||||
const rerankedResults = this.rerankResults(resultsWithMetadata, keywords, query)
|
||||
|
|
@ -711,8 +775,11 @@ export class RagService {
|
|||
)
|
||||
})
|
||||
|
||||
// Apply source diversity penalty to avoid all results from the same document
|
||||
const diverseResults = this.applySourceDiversity(rerankedResults)
|
||||
|
||||
// Return top N results with enhanced metadata
|
||||
return rerankedResults.slice(0, limit).map((result) => ({
|
||||
return diverseResults.slice(0, limit).map((result) => ({
|
||||
text: result.text,
|
||||
score: result.finalScore,
|
||||
metadata: {
|
||||
|
|
@ -748,34 +815,10 @@ export class RagService {
|
|||
* outweigh the overhead.
|
||||
*/
|
||||
private rerankResults(
|
||||
results: Array<{
|
||||
text: string
|
||||
score: number
|
||||
keywords: string
|
||||
chunk_index: number
|
||||
created_at: number
|
||||
article_title?: string
|
||||
section_title?: string
|
||||
full_title?: string
|
||||
hierarchy?: string
|
||||
document_id?: string
|
||||
content_type?: string
|
||||
}>,
|
||||
results: Array<RAGResult>,
|
||||
queryKeywords: string[],
|
||||
originalQuery: string
|
||||
): Array<{
|
||||
text: string
|
||||
score: number
|
||||
finalScore: number
|
||||
chunk_index: number
|
||||
created_at: number
|
||||
article_title?: string
|
||||
section_title?: string
|
||||
full_title?: string
|
||||
hierarchy?: string
|
||||
document_id?: string
|
||||
content_type?: string
|
||||
}> {
|
||||
): Array<RerankedRAGResult> {
|
||||
return results
|
||||
.map((result) => {
|
||||
let finalScore = result.score
|
||||
|
|
@ -851,6 +894,37 @@ export class RagService {
|
|||
.sort((a, b) => b.finalScore - a.finalScore)
|
||||
}
|
||||
|
||||
/**
|
||||
* Applies a diversity penalty so results from the same source are down-weighted.
|
||||
* Uses greedy selection: for each result, apply 0.85^n penalty where n is the
|
||||
* number of results already selected from the same source.
|
||||
*/
|
||||
private applySourceDiversity(
|
||||
results: Array<RerankedRAGResult>
|
||||
) {
|
||||
const sourceCounts = new Map<string, number>()
|
||||
const DIVERSITY_PENALTY = 0.85
|
||||
|
||||
return results
|
||||
.map((result) => {
|
||||
const sourceKey = result.document_id || result.source || 'unknown'
|
||||
const count = sourceCounts.get(sourceKey) || 0
|
||||
const penalty = Math.pow(DIVERSITY_PENALTY, count)
|
||||
const diverseScore = result.finalScore * penalty
|
||||
|
||||
sourceCounts.set(sourceKey, count + 1)
|
||||
|
||||
if (count > 0) {
|
||||
logger.debug(
|
||||
`[RAG] Source diversity penalty for "${sourceKey}": ${result.finalScore.toFixed(4)} → ${diverseScore.toFixed(4)} (seen ${count}x)`
|
||||
)
|
||||
}
|
||||
|
||||
return { ...result, finalScore: diverseScore }
|
||||
})
|
||||
.sort((a, b) => b.finalScore - a.finalScore)
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieve all unique source files that have been stored in the knowledge base.
|
||||
* @returns Array of unique full source paths
|
||||
|
|
@ -866,12 +940,12 @@ export class RagService {
|
|||
let offset: string | number | null | Record<string, unknown> = null
|
||||
const batchSize = 100
|
||||
|
||||
// Scroll through all points in the collection
|
||||
// Scroll through all points in the collection (only fetch source field)
|
||||
do {
|
||||
const scrollResult = await this.qdrant!.scroll(RagService.CONTENT_COLLECTION_NAME, {
|
||||
limit: batchSize,
|
||||
offset: offset,
|
||||
with_payload: true,
|
||||
with_payload: ['source'],
|
||||
with_vector: false,
|
||||
})
|
||||
|
||||
|
|
|
|||
|
|
@ -64,6 +64,16 @@ export const FALLBACK_RECOMMENDED_OLLAMA_MODELS: NomadOllamaModel[] = [
|
|||
|
||||
export const DEFAULT_QUERY_REWRITE_MODEL = 'qwen2.5:3b' // default to qwen2.5 for query rewriting with good balance of text task performance and resource usage
|
||||
|
||||
/**
|
||||
* Adaptive RAG context limits based on model size.
|
||||
* Smaller models get overwhelmed with too much context, so we cap it.
|
||||
*/
|
||||
export const RAG_CONTEXT_LIMITS: { maxParams: number; maxResults: number; maxTokens: number }[] = [
|
||||
{ maxParams: 3, maxResults: 2, maxTokens: 1000 }, // 1-3B models
|
||||
{ maxParams: 8, maxResults: 4, maxTokens: 2500 }, // 4-8B models
|
||||
{ maxParams: Infinity, maxResults: 5, maxTokens: 0 }, // 13B+ (no cap)
|
||||
]
|
||||
|
||||
export const SYSTEM_PROMPTS = {
|
||||
default: `
|
||||
Format all responses using markdown for better readability. Vanilla markdown or GitHub-flavored markdown is preferred.
|
||||
|
|
@ -113,7 +123,7 @@ Ensure that your suggestions are comma-seperated with no conjunctions like "and"
|
|||
Do not use line breaks, new lines, or extra spacing to separate the suggestions.
|
||||
Format: suggestion1, suggestion2, suggestion3
|
||||
`,
|
||||
title_generation: `You are a title generator. Given the start of a conversation, generate a concise, descriptive title under 60 characters. Return ONLY the title text with no quotes, punctuation wrapping, or extra formatting.`,
|
||||
title_generation: `You are a title generator. Given the start of a conversation, generate a concise, descriptive title under 50 characters. Return ONLY the title text with no quotes, punctuation wrapping, or extra formatting.`,
|
||||
query_rewrite: `
|
||||
You are a query rewriting assistant. Your task is to reformulate the user's latest question to include relevant context from the conversation history.
|
||||
|
||||
|
|
|
|||
|
|
@ -4,13 +4,16 @@
|
|||
|
||||
### Features
|
||||
- **AI Assistant**: Added improved user guidance for troubleshooting GPU pass-through issues
|
||||
- **AI Assistant**: The last used model is now automatically selected when a new chat is started
|
||||
- **Settings**: Nomad now automatically performs nightly checks for available app updates, and users can select and apply updates from the Apps page in Settings
|
||||
|
||||
### Bug Fixes
|
||||
- **Settings**: Fixed an issue where the AI Assistant settings page would be shown in navigation even if the AI Assistant was not installed, thus causing 404 errors when clicked
|
||||
- **Security**: Path traversal and SSRF mitigations
|
||||
- **AI Assistant**: Fixed an issue that was causing intermittent failures saving chat session titles
|
||||
|
||||
### Improvements
|
||||
- **AI Assistant**: Extensive performance improvements and improved RAG intelligence/context usage
|
||||
|
||||
## Version 1.28.0 - March 5, 2026
|
||||
|
||||
|
|
|
|||
|
|
@ -5,3 +5,32 @@ export type EmbedJobWithProgress = {
|
|||
progress: number
|
||||
status: string
|
||||
}
|
||||
|
||||
export type ProcessAndEmbedFileResponse = {
|
||||
success: boolean
|
||||
message: string
|
||||
chunks?: number
|
||||
hasMoreBatches?: boolean
|
||||
articlesProcessed?: number
|
||||
totalArticles?: number
|
||||
}
|
||||
export type ProcessZIMFileResponse = ProcessAndEmbedFileResponse
|
||||
|
||||
export type RAGResult = {
|
||||
text: string
|
||||
score: number
|
||||
keywords: string
|
||||
chunk_index: number
|
||||
created_at: number
|
||||
article_title?: string
|
||||
section_title?: string
|
||||
full_title?: string
|
||||
hierarchy?: string
|
||||
document_id?: string
|
||||
content_type?: string
|
||||
source?: string
|
||||
}
|
||||
|
||||
export type RerankedRAGResult = Omit<RAGResult, 'keywords'> & {
|
||||
finalScore: number
|
||||
}
|
||||
Loading…
Reference in New Issue
Block a user