mirror of
https://github.com/Crosstalk-Solutions/project-nomad.git
synced 2026-05-25 22:05:07 +02:00
Compare commits
48 Commits
main
...
v1.32.0-rc
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
cb47e1e4e3 | ||
|
|
1ad898bc8b | ||
|
|
0fdf31c2e4 | ||
|
|
0ddcfe9011 | ||
|
|
a7dbee55c4 | ||
|
|
d66eaa3d42 | ||
|
|
d81b66bb14 | ||
|
|
8ef2c69f56 | ||
|
|
a2f3a84446 | ||
|
|
822b94629c | ||
|
|
27cd803090 | ||
|
|
360e7a0af4 | ||
|
|
bb1834a364 | ||
|
|
0836d84bb2 | ||
|
|
5924056502 | ||
|
|
322087c1b7 | ||
|
|
cc789c1863 | ||
|
|
fe57d59868 | ||
|
|
269c7ce695 | ||
|
|
b194dfa136 | ||
|
|
00b4b26224 | ||
|
|
3bacd14dbd | ||
|
|
b168001450 | ||
|
|
90946ecf5a | ||
|
|
08d14473d2 | ||
|
|
9c98d8225b | ||
|
|
d22c0b202c | ||
|
|
36b7613f85 | ||
|
|
644170ed6b | ||
|
|
776d099c4a | ||
|
|
c4aa23a9b6 | ||
|
|
6e4795f0d8 | ||
|
|
dcd9f4b238 | ||
|
|
4497e36100 | ||
|
|
1aa26011b1 | ||
|
|
38dfb19f18 | ||
|
|
5ee4e1187c | ||
|
|
2075a62b60 | ||
|
|
d8ee6f5ceb | ||
|
|
53d143bb22 | ||
|
|
0d5b6f7927 | ||
|
|
f1dd184f4d | ||
|
|
b5d4804d57 | ||
|
|
898c4441b9 | ||
|
|
b365130e76 | ||
|
|
10e8957b78 | ||
|
|
10ba8000cf | ||
|
|
462afae4ec |
2
.github/workflows/release.yml
vendored
2
.github/workflows/release.yml
vendored
|
|
@ -26,8 +26,6 @@ jobs:
|
|||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: false
|
||||
- name: Sync tags
|
||||
run: git fetch --tags --force
|
||||
- name: semantic-release
|
||||
uses: cycjimmy/semantic-release-action@v6
|
||||
id: semver
|
||||
|
|
|
|||
|
|
@ -57,7 +57,6 @@ export default defineConfig({
|
|||
() => import('#providers/kiwix_migration_provider'),
|
||||
() => import('#providers/qdrant_restart_policy_provider'),
|
||||
() => import('#providers/version_check_provider'),
|
||||
() => import('#providers/gpu_passthrough_remediation_provider'),
|
||||
],
|
||||
|
||||
/*
|
||||
|
|
|
|||
|
|
@ -5,8 +5,7 @@ import { RagService } from '#services/rag_service'
|
|||
import Service from '#models/service'
|
||||
import KVStore from '#models/kv_store'
|
||||
import { modelNameSchema } from '#validators/download'
|
||||
import { chatSchema, getAvailableModelsSchema, unloadChatModelsSchema } from '#validators/ollama'
|
||||
import { assertNotCloudMetadataUrl } from '#validators/common'
|
||||
import { chatSchema, getAvailableModelsSchema } from '#validators/ollama'
|
||||
import { inject } from '@adonisjs/core'
|
||||
import type { HttpContext } from '@adonisjs/core/http'
|
||||
import { RAG_CONTEXT_LIMITS, SYSTEM_PROMPTS } from '../../constants/ollama.js'
|
||||
|
|
@ -34,19 +33,6 @@ export default class OllamaController {
|
|||
})
|
||||
}
|
||||
|
||||
/**
|
||||
* Send Ollama `keep_alive: 0` hints to every currently-loaded chat model
|
||||
* except the embedding model and (optionally) a target model to preserve.
|
||||
* Used by the chat UI to enforce the "one chat model at a time" invariant
|
||||
* on model-switch, session-switch, and page-load. Best-effort: a failure
|
||||
* here should not block the calling flow.
|
||||
*/
|
||||
async unloadChatModels({ request, response }: HttpContext) {
|
||||
const { targetModel } = await request.validateUsing(unloadChatModelsSchema)
|
||||
const unloaded = await this.ollamaService.unloadAllChatModelsExcept(targetModel ?? null)
|
||||
return response.status(200).json({ unloaded })
|
||||
}
|
||||
|
||||
async chat({ request, response }: HttpContext) {
|
||||
const reqData = await request.validateUsing(chatSchema)
|
||||
|
||||
|
|
@ -243,12 +229,11 @@ export default class OllamaController {
|
|||
}
|
||||
}
|
||||
|
||||
try {
|
||||
assertNotCloudMetadataUrl(remoteUrl)
|
||||
} catch (err) {
|
||||
// Validate URL format
|
||||
if (!remoteUrl.startsWith('http')) {
|
||||
return response.status(400).send({
|
||||
success: false,
|
||||
message: err instanceof Error ? err.message : 'Invalid URL.',
|
||||
message: 'Invalid URL. Must start with http:// or https://',
|
||||
})
|
||||
}
|
||||
|
||||
|
|
@ -398,15 +383,10 @@ export default class OllamaController {
|
|||
// Get recent conversation history (last 6 messages for 3 turns)
|
||||
const recentMessages = messages.slice(-6)
|
||||
|
||||
// Skip rewriting on the very first turn — with only one user message
|
||||
// there is no prior context to fold in, so the rewrite would just echo
|
||||
// the message back at the cost of an extra LLM round-trip. From the
|
||||
// first follow-up onward we need the rewrite so the RAG query carries
|
||||
// entities and topics from earlier turns ("the bars" → "Hershey's bars
|
||||
// chocolate poisoning dog"); without it, embeddings match nothing and
|
||||
// the assistant loses the thread.
|
||||
// Skip rewriting for short conversations. Rewriting adds latency with
|
||||
// little RAG benefit until there is enough context to matter.
|
||||
const userMessages = recentMessages.filter(msg => msg.role === 'user')
|
||||
if (userMessages.length < 2) {
|
||||
if (userMessages.length <= 2) {
|
||||
return lastUserMessage?.content || null
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,13 +1,11 @@
|
|||
import { RagService } from '#services/rag_service'
|
||||
import { EmbedFileJob } from '#jobs/embed_file_job'
|
||||
import KbRatioRegistry from '#models/kb_ratio_registry'
|
||||
import { inject } from '@adonisjs/core'
|
||||
import type { HttpContext } from '@adonisjs/core/http'
|
||||
import app from '@adonisjs/core/services/app'
|
||||
import { randomBytes } from 'node:crypto'
|
||||
import { sanitizeFilename } from '../utils/fs.js'
|
||||
import { basename } from 'node:path'
|
||||
import { deleteFileSchema, embedFileSchema, estimateBatchSchema, getJobStatusSchema } from '#validators/rag'
|
||||
import { deleteFileSchema, getJobStatusSchema } from '#validators/rag'
|
||||
import logger from '@adonisjs/core/services/logger'
|
||||
|
||||
@inject()
|
||||
|
|
@ -68,11 +66,6 @@ export default class RagController {
|
|||
return response.status(200).json({ files })
|
||||
}
|
||||
|
||||
public async getFileWarnings({ response }: HttpContext) {
|
||||
const result = await this.ragService.computeFileWarnings()
|
||||
return response.status(200).json(result)
|
||||
}
|
||||
|
||||
public async deleteFile({ request, response }: HttpContext) {
|
||||
const { source } = await request.validateUsing(deleteFileSchema)
|
||||
const result = await this.ragService.deleteFileBySource(source)
|
||||
|
|
@ -82,21 +75,6 @@ export default class RagController {
|
|||
return response.status(200).json({ message: result.message })
|
||||
}
|
||||
|
||||
public async embedFile({ request, response }: HttpContext) {
|
||||
const { source, force } = await request.validateUsing(embedFileSchema)
|
||||
const result = await this.ragService.embedSingleFile(source, force ?? false)
|
||||
if (!result.success) {
|
||||
const status = {
|
||||
not_found: 404,
|
||||
inflight: 409,
|
||||
delete_failed: 500,
|
||||
dispatch_failed: 500,
|
||||
}[result.code]
|
||||
return response.status(status).json({ error: result.message, code: result.code })
|
||||
}
|
||||
return response.status(202).json({ message: result.message })
|
||||
}
|
||||
|
||||
public async getFailedJobs({ response }: HttpContext) {
|
||||
const jobs = await EmbedFileJob.listFailedJobs()
|
||||
return response.status(200).json(jobs)
|
||||
|
|
@ -110,11 +88,6 @@ export default class RagController {
|
|||
})
|
||||
}
|
||||
|
||||
public async policyPromptState({ response }: HttpContext) {
|
||||
const result = await this.ragService.getPolicyPromptState()
|
||||
return response.status(200).json(result)
|
||||
}
|
||||
|
||||
public async scanAndSync({ response }: HttpContext) {
|
||||
try {
|
||||
const syncResult = await this.ragService.scanAndSyncStorage()
|
||||
|
|
@ -125,41 +98,8 @@ export default class RagController {
|
|||
}
|
||||
}
|
||||
|
||||
public async reembedAll({ response }: HttpContext) {
|
||||
try {
|
||||
const result = await this.ragService.reembedAll()
|
||||
return response.status(200).json(result)
|
||||
} catch (error) {
|
||||
logger.error({ err: error }, '[RagController] Error during re-embed all')
|
||||
return response.status(500).json({ error: 'Error during re-embed all' })
|
||||
}
|
||||
}
|
||||
|
||||
public async resetAndRebuild({ response }: HttpContext) {
|
||||
try {
|
||||
const result = await this.ragService.resetAndRebuild()
|
||||
return response.status(200).json(result)
|
||||
} catch (error) {
|
||||
logger.error({ err: error }, '[RagController] Error during reset and rebuild')
|
||||
return response.status(500).json({ error: 'Error during reset and rebuild' })
|
||||
}
|
||||
}
|
||||
|
||||
public async health({ response }: HttpContext) {
|
||||
const result = await this.ragService.checkQdrantHealth()
|
||||
return response.status(200).json(result)
|
||||
}
|
||||
|
||||
public async estimateBatch({ request, response }: HttpContext) {
|
||||
const { files } = await request.validateUsing(estimateBatchSchema)
|
||||
// The registry matches on basename prefixes; if a caller passes a full path
|
||||
// (e.g. /app/storage/zim/wikipedia_en_simple_…), strip directories first so
|
||||
// patterns like `wikipedia_en_simple_` still match.
|
||||
const normalized = files.map((f) => ({
|
||||
filename: basename(f.filename),
|
||||
sizeBytes: f.sizeBytes,
|
||||
}))
|
||||
const result = await KbRatioRegistry.estimateBatch(normalized)
|
||||
return response.status(200).json(result)
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -95,7 +95,7 @@ export class CheckServiceUpdatesJob {
|
|||
}
|
||||
|
||||
static async scheduleNightly() {
|
||||
const queueService = QueueService.getInstance()
|
||||
const queueService = new QueueService()
|
||||
const queue = queueService.getQueue(this.queue)
|
||||
|
||||
await queue.upsertJobScheduler(
|
||||
|
|
@ -114,7 +114,7 @@ export class CheckServiceUpdatesJob {
|
|||
}
|
||||
|
||||
static async dispatch() {
|
||||
const queueService = QueueService.getInstance()
|
||||
const queueService = new QueueService()
|
||||
const queue = queueService.getQueue(this.queue)
|
||||
|
||||
const job = await queue.add(
|
||||
|
|
|
|||
|
|
@ -42,7 +42,7 @@ export class CheckUpdateJob {
|
|||
}
|
||||
|
||||
static async scheduleNightly() {
|
||||
const queueService = QueueService.getInstance()
|
||||
const queueService = new QueueService()
|
||||
const queue = queueService.getQueue(this.queue)
|
||||
|
||||
await queue.upsertJobScheduler(
|
||||
|
|
@ -61,7 +61,7 @@ export class CheckUpdateJob {
|
|||
}
|
||||
|
||||
static async dispatch() {
|
||||
const queueService = QueueService.getInstance()
|
||||
const queueService = new QueueService()
|
||||
const queue = queueService.getQueue(this.queue)
|
||||
|
||||
const job = await queue.add(this.key, {}, {
|
||||
|
|
|
|||
|
|
@ -34,7 +34,7 @@ export class DownloadModelJob {
|
|||
|
||||
/** Signal cancellation via Redis so the worker process can pick it up on its next poll tick */
|
||||
static async signalCancel(jobId: string): Promise<void> {
|
||||
const queueService = QueueService.getInstance()
|
||||
const queueService = new QueueService()
|
||||
const queue = queueService.getQueue(this.queue)
|
||||
const client = await queue.client
|
||||
await client.set(this.cancelKey(jobId), '1', 'EX', 300) // 5 min TTL
|
||||
|
|
@ -66,7 +66,7 @@ export class DownloadModelJob {
|
|||
DownloadModelJob.abortControllers.set(job.id!, abortController)
|
||||
|
||||
// Get Redis client for checking cancel signals from the API process
|
||||
const queueService = QueueService.getInstance()
|
||||
const queueService = new QueueService()
|
||||
const cancelRedis = await queueService.getQueue(DownloadModelJob.queue).client
|
||||
|
||||
// Track whether cancellation was explicitly requested by the user. Only user-initiated
|
||||
|
|
@ -154,14 +154,14 @@ export class DownloadModelJob {
|
|||
}
|
||||
|
||||
static async getByModelName(modelName: string): Promise<Job | undefined> {
|
||||
const queueService = QueueService.getInstance()
|
||||
const queueService = new QueueService()
|
||||
const queue = queueService.getQueue(this.queue)
|
||||
const jobId = this.getJobId(modelName)
|
||||
return await queue.getJob(jobId)
|
||||
}
|
||||
|
||||
static async dispatch(params: DownloadModelJobParams) {
|
||||
const queueService = QueueService.getInstance()
|
||||
const queueService = new QueueService()
|
||||
const queue = queueService.getQueue(this.queue)
|
||||
const jobId = this.getJobId(params.modelName)
|
||||
|
||||
|
|
|
|||
|
|
@ -4,11 +4,9 @@ import { EmbedJobWithProgress } from '../../types/rag.js'
|
|||
import { RagService } from '#services/rag_service'
|
||||
import { DockerService } from '#services/docker_service'
|
||||
import { OllamaService } from '#services/ollama_service'
|
||||
import KbIngestState from '#models/kb_ingest_state'
|
||||
import { createHash } from 'crypto'
|
||||
import logger from '@adonisjs/core/services/logger'
|
||||
import fs from 'node:fs/promises'
|
||||
import { ZIM_BATCH_SIZE } from '../../constants/zim_extraction.js'
|
||||
|
||||
export interface EmbedFileJobParams {
|
||||
filePath: string
|
||||
|
|
@ -29,12 +27,6 @@ export class EmbedFileJob {
|
|||
return 'embed-file'
|
||||
}
|
||||
|
||||
// Delay between continuation batches when embedding runs CPU-only. Gives the OS
|
||||
// scheduler a brief idle window so sshd / disk-collector / other services don't
|
||||
// starve during long multi-batch ZIM ingestions. Skipped entirely when the
|
||||
// embedding model is GPU-offloaded — see OllamaService.isEmbeddingGpuAccelerated().
|
||||
static readonly CPU_BATCH_DELAY_MS = 1000
|
||||
|
||||
static getJobId(filePath: string): string {
|
||||
return createHash('sha256').update(filePath).digest('hex').slice(0, 16)
|
||||
}
|
||||
|
|
@ -85,16 +77,8 @@ export class EmbedFileJob {
|
|||
|
||||
logger.info(`[EmbedFileJob] Services ready. Processing file: ${fileName}`)
|
||||
|
||||
// Anchor initial progress to where we are in the overall file. For a
|
||||
// continuation batch midway through a multi-batch ZIM (e.g. offset 100k of
|
||||
// 600k), the hardcoded 5 used to make the gauge briefly flash 0→5→real,
|
||||
// which read as a backward jump. Fall back to 5 for single-batch files
|
||||
// where totalArticles isn't set.
|
||||
const initialPercent =
|
||||
totalArticles && totalArticles > 0
|
||||
? Math.min(99, Math.round(((batchOffset || 0) / totalArticles) * 100))
|
||||
: 5
|
||||
await this.safeUpdateProgress(job, initialPercent)
|
||||
// Update progress starting
|
||||
await this.safeUpdateProgress(job, 5)
|
||||
await job.updateData({
|
||||
...job.data,
|
||||
status: 'processing',
|
||||
|
|
@ -103,25 +87,9 @@ export class EmbedFileJob {
|
|||
|
||||
logger.info(`[EmbedFileJob] Processing file: ${filePath}`)
|
||||
|
||||
// Progress callback. For multi-batch ZIM ingestions, scale the service-reported
|
||||
// 0-100% (which is % through the current batch's chunks) into the overall-file
|
||||
// frame so the UI gauge climbs monotonically across the many continuation jobs
|
||||
// BullMQ creates per file. Without this, every new continuation jobId resets the
|
||||
// gauge to ~5% and the user sees ingestion progress "jumping around" between
|
||||
// each batch's local frame and the end-of-batch overall-file overwrite below.
|
||||
//
|
||||
// For single-batch files (uploaded PDFs, txts) totalArticles is undefined and
|
||||
// we fall back to the original 5-95% per-job range, which is what the UI expects
|
||||
// for a one-shot file with no continuations.
|
||||
// Progress callback: maps service-reported 0-100% into the 5-95% job range
|
||||
const onProgress = async (percent: number) => {
|
||||
const useOverallFrame = totalArticles && totalArticles > 0
|
||||
if (useOverallFrame) {
|
||||
const articlesDone = (batchOffset || 0) + (percent / 100) * ZIM_BATCH_SIZE
|
||||
const overallPercent = Math.min(99, Math.round((articlesDone / totalArticles) * 100))
|
||||
await this.safeUpdateProgress(job, overallPercent)
|
||||
} else {
|
||||
await this.safeUpdateProgress(job, Math.min(95, Math.round(5 + percent * 0.9)))
|
||||
}
|
||||
await this.safeUpdateProgress(job, Math.min(95, Math.round(5 + percent * 0.9)))
|
||||
}
|
||||
|
||||
// Process and embed the file
|
||||
|
|
@ -146,19 +114,6 @@ export class EmbedFileJob {
|
|||
`[EmbedFileJob] Batch complete. Dispatching next batch at offset ${nextOffset}`
|
||||
)
|
||||
|
||||
// Pace continuation batches when embedding is CPU-bound. Sustained 100% CPU
|
||||
// saturation across all cores during multi-batch ZIM ingestion can starve
|
||||
// other services (sshd has been seen to lose responsiveness hard enough to
|
||||
// require a power-cycle). When GPU-accelerated, embeddings stream through
|
||||
// the GPU and CPUs stay free — no pacing needed.
|
||||
const isGpuAccelerated = await ollamaService.isEmbeddingGpuAccelerated()
|
||||
if (!isGpuAccelerated) {
|
||||
logger.info(
|
||||
`[EmbedFileJob] Embedding is CPU-only — pacing ${EmbedFileJob.CPU_BATCH_DELAY_MS}ms before dispatching next batch`
|
||||
)
|
||||
await new Promise((resolve) => setTimeout(resolve, EmbedFileJob.CPU_BATCH_DELAY_MS))
|
||||
}
|
||||
|
||||
// Dispatch next batch (not final yet)
|
||||
await EmbedFileJob.dispatch({
|
||||
filePath,
|
||||
|
|
@ -202,18 +157,6 @@ export class EmbedFileJob {
|
|||
chunks: totalChunks,
|
||||
})
|
||||
|
||||
// Persist the post-job state so scanAndSyncStorage knows this file is done.
|
||||
// BullMQ's :completed retention (50 jobs) ages out, so the state row is
|
||||
// the only durable record of "this file finished embedding".
|
||||
try {
|
||||
await KbIngestState.markIndexed(filePath, totalChunks)
|
||||
} catch (stateErr) {
|
||||
logger.warn(
|
||||
`[EmbedFileJob] Failed to persist ingest state for ${fileName}: %s`,
|
||||
stateErr instanceof Error ? stateErr.message : String(stateErr)
|
||||
)
|
||||
}
|
||||
|
||||
const batchMsg = isZimBatch ? ` (final batch, total chunks: ${totalChunks})` : ''
|
||||
logger.info(
|
||||
`[EmbedFileJob] Successfully embedded ${result.chunks} chunks from file: ${fileName}${batchMsg}`
|
||||
|
|
@ -236,125 +179,64 @@ export class EmbedFileJob {
|
|||
error: error instanceof Error ? error.message : 'Unknown error',
|
||||
})
|
||||
|
||||
// Only persist `failed` for unrecoverable errors. Retryable errors get
|
||||
// automatic BullMQ retries (30 attempts); marking state failed on every
|
||||
// transient blip would suppress the retry-driven recovery path.
|
||||
if (error instanceof UnrecoverableError) {
|
||||
try {
|
||||
await KbIngestState.markFailed(
|
||||
filePath,
|
||||
error instanceof Error ? error.message : 'Unknown error'
|
||||
)
|
||||
} catch (stateErr) {
|
||||
logger.warn(
|
||||
`[EmbedFileJob] Failed to persist failed state for ${fileName}: %s`,
|
||||
stateErr instanceof Error ? stateErr.message : String(stateErr)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
throw error
|
||||
}
|
||||
}
|
||||
|
||||
static async listActiveJobs(): Promise<EmbedJobWithProgress[]> {
|
||||
const queueService = QueueService.getInstance()
|
||||
const queueService = new QueueService()
|
||||
const queue = queueService.getQueue(this.queue)
|
||||
const jobs = await queue.getJobs(['waiting', 'active', 'delayed'])
|
||||
|
||||
return jobs.map((job) => {
|
||||
const data = job.data as EmbedFileJobParams & {
|
||||
status?: string
|
||||
lastBatchAt?: number
|
||||
startedAt?: number
|
||||
chunks?: number
|
||||
}
|
||||
return {
|
||||
jobId: job.id!.toString(),
|
||||
fileName: data.fileName,
|
||||
filePath: data.filePath,
|
||||
progress: typeof job.progress === 'number' ? job.progress : 0,
|
||||
status: data.status ?? 'waiting',
|
||||
lastBatchAt: data.lastBatchAt,
|
||||
startedAt: data.startedAt,
|
||||
chunks: data.chunks,
|
||||
}
|
||||
})
|
||||
return jobs.map((job) => ({
|
||||
jobId: job.id!.toString(),
|
||||
fileName: (job.data as EmbedFileJobParams).fileName,
|
||||
filePath: (job.data as EmbedFileJobParams).filePath,
|
||||
progress: typeof job.progress === 'number' ? job.progress : 0,
|
||||
status: ((job.data as any).status as string) ?? 'waiting',
|
||||
}))
|
||||
}
|
||||
|
||||
static async getByFilePath(filePath: string): Promise<Job | undefined> {
|
||||
const queueService = QueueService.getInstance()
|
||||
const queueService = new QueueService()
|
||||
const queue = queueService.getQueue(this.queue)
|
||||
const jobId = this.getJobId(filePath)
|
||||
return await queue.getJob(jobId)
|
||||
}
|
||||
|
||||
static async dispatch(params: EmbedFileJobParams, options?: { force?: boolean }) {
|
||||
const queueService = QueueService.getInstance()
|
||||
static async dispatch(params: EmbedFileJobParams) {
|
||||
const queueService = new QueueService()
|
||||
const queue = queueService.getQueue(this.queue)
|
||||
|
||||
// Continuation batches (batchOffset > 0) must NOT reuse the deterministic
|
||||
// per-file jobId. Two BullMQ dedupe paths would otherwise silently swallow them:
|
||||
// 1) The parent batch's handle() calls dispatch() before returning, so the
|
||||
// parent job is still `active` and locked — queue.add() with the same
|
||||
// jobId returns the locked parent rather than enqueueing the new batch.
|
||||
// 2) After the parent completes, its entry stays in `completed` (held by
|
||||
// `removeOnComplete: { count: 50 }`), still tripping jobId dedupe.
|
||||
// Letting BullMQ auto-generate a unique jobId for continuation batches stacks
|
||||
// them as independent queue entries that each process via handle().
|
||||
// Initial dispatches keep the deterministic jobId so re-triggering an install
|
||||
// (UI re-click, sync rescan, etc.) is still idempotent.
|
||||
// `force` skips the deterministic jobId for bulk callers (reembedAll /
|
||||
// resetAndRebuild) where historical entries in :completed would otherwise
|
||||
// silently swallow the new dispatch.
|
||||
const isContinuation = !!(params.batchOffset && params.batchOffset > 0)
|
||||
const force = !!options?.force
|
||||
const initialJobId = this.getJobId(params.filePath)
|
||||
|
||||
const jobOptions: Parameters<typeof queue.add>[2] = {
|
||||
attempts: 30,
|
||||
backoff: {
|
||||
type: 'fixed',
|
||||
delay: 60000, // Check every 60 seconds for service readiness
|
||||
},
|
||||
removeOnComplete: { count: 50 }, // Keep last 50 completed jobs for history
|
||||
removeOnFail: { count: 20 }, // Keep last 20 failed jobs for debugging
|
||||
}
|
||||
if (!isContinuation && !force) {
|
||||
jobOptions.jobId = initialJobId
|
||||
}
|
||||
const jobId = this.getJobId(params.filePath)
|
||||
|
||||
try {
|
||||
const job = await queue.add(this.key, params, jobOptions)
|
||||
const job = await queue.add(this.key, params, {
|
||||
jobId,
|
||||
attempts: 30,
|
||||
backoff: {
|
||||
type: 'fixed',
|
||||
delay: 60000, // Check every 60 seconds for service readiness
|
||||
},
|
||||
removeOnComplete: { count: 50 }, // Keep last 50 completed jobs for history
|
||||
removeOnFail: { count: 20 } // Keep last 20 failed jobs for debugging
|
||||
})
|
||||
|
||||
const label = isContinuation
|
||||
? ` (continuation @ offset ${params.batchOffset})`
|
||||
: force
|
||||
? ' (forced re-dispatch)'
|
||||
: ''
|
||||
logger.info(
|
||||
`[EmbedFileJob] Dispatched embedding job for file: ${params.fileName}${label}`
|
||||
)
|
||||
logger.info(`[EmbedFileJob] Dispatched embedding job for file: ${params.fileName}`)
|
||||
|
||||
return {
|
||||
job,
|
||||
created: true,
|
||||
jobId: job.id ?? initialJobId,
|
||||
jobId,
|
||||
message: `File queued for embedding: ${params.fileName}`,
|
||||
}
|
||||
} catch (error) {
|
||||
if (
|
||||
!isContinuation &&
|
||||
!force &&
|
||||
error.message &&
|
||||
error.message.includes('job already exists')
|
||||
) {
|
||||
const existing = await queue.getJob(initialJobId)
|
||||
if (error.message && error.message.includes('job already exists')) {
|
||||
const existing = await queue.getJob(jobId)
|
||||
logger.info(`[EmbedFileJob] Job already exists for file: ${params.fileName}`)
|
||||
return {
|
||||
job: existing,
|
||||
created: false,
|
||||
jobId: initialJobId,
|
||||
jobId,
|
||||
message: `Embedding job already exists for: ${params.fileName}`,
|
||||
}
|
||||
}
|
||||
|
|
@ -363,7 +245,7 @@ export class EmbedFileJob {
|
|||
}
|
||||
|
||||
static async listFailedJobs(): Promise<EmbedJobWithProgress[]> {
|
||||
const queueService = QueueService.getInstance()
|
||||
const queueService = new QueueService()
|
||||
const queue = queueService.getQueue(this.queue)
|
||||
// Jobs that have failed at least once are in 'delayed' (retrying) or terminal 'failed' state.
|
||||
// We identify them by job.data.status === 'failed' set in the catch block of handle().
|
||||
|
|
@ -382,7 +264,7 @@ export class EmbedFileJob {
|
|||
}
|
||||
|
||||
static async cleanupFailedJobs(): Promise<{ cleaned: number; filesDeleted: number }> {
|
||||
const queueService = QueueService.getInstance()
|
||||
const queueService = new QueueService()
|
||||
const queue = queueService.getQueue(this.queue)
|
||||
const allJobs = await queue.getJobs(['waiting', 'delayed', 'failed'])
|
||||
const failedJobs = allJobs.filter((job) => (job.data as any).status === 'failed')
|
||||
|
|
|
|||
|
|
@ -53,7 +53,7 @@ export class RunBenchmarkJob {
|
|||
}
|
||||
|
||||
static async dispatch(params: RunBenchmarkJobParams) {
|
||||
const queueService = QueueService.getInstance()
|
||||
const queueService = new QueueService()
|
||||
const queue = queueService.getQueue(this.queue)
|
||||
|
||||
try {
|
||||
|
|
@ -89,7 +89,7 @@ export class RunBenchmarkJob {
|
|||
}
|
||||
|
||||
static async getJob(benchmarkId: string): Promise<Job | undefined> {
|
||||
const queueService = QueueService.getInstance()
|
||||
const queueService = new QueueService()
|
||||
const queue = queueService.getQueue(this.queue)
|
||||
return await queue.getJob(benchmarkId)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -31,7 +31,7 @@ export class RunDownloadJob {
|
|||
|
||||
/** Signal cancellation via Redis so the worker process can pick it up */
|
||||
static async signalCancel(jobId: string): Promise<void> {
|
||||
const queueService = QueueService.getInstance()
|
||||
const queueService = new QueueService()
|
||||
const queue = queueService.getQueue(this.queue)
|
||||
const client = await queue.client
|
||||
await client.set(this.cancelKey(jobId), '1', 'EX', 300) // 5 min TTL
|
||||
|
|
@ -46,7 +46,7 @@ export class RunDownloadJob {
|
|||
RunDownloadJob.abortControllers.set(job.id!, abortController)
|
||||
|
||||
// Get Redis client for checking cancel signals from the API process
|
||||
const queueService = QueueService.getInstance()
|
||||
const queueService = new QueueService()
|
||||
const cancelRedis = await queueService.getQueue(RunDownloadJob.queue).client
|
||||
|
||||
let lastKnownProgress: Pick<DownloadProgressData, 'downloadedBytes' | 'totalBytes'> = {
|
||||
|
|
@ -147,40 +147,13 @@ export class RunDownloadJob {
|
|||
// Only dispatch embedding job if AI Assistant (Ollama) is installed
|
||||
const ollamaUrl = await dockerService.getServiceURL('nomad_ollama')
|
||||
if (ollamaUrl) {
|
||||
// Respect the global ingest policy. Under Manual, record the file
|
||||
// as pending_decision so the KB panel surfaces the per-file Index
|
||||
// affordance (PR #909) instead of silently auto-embedding behind
|
||||
// the user's back. Unset is treated as Always to preserve legacy
|
||||
// behavior — mirrors rag_service.ts:1587-1588.
|
||||
const { default: KVStore } = await import('#models/kv_store')
|
||||
const { default: KbIngestState } = await import('#models/kb_ingest_state')
|
||||
const policyRaw = await KVStore.getValue('rag.defaultIngestPolicy')
|
||||
const policy: 'Always' | 'Manual' = policyRaw === 'Manual' ? 'Manual' : 'Always'
|
||||
|
||||
if (policy === 'Manual') {
|
||||
try {
|
||||
// firstOrCreate so a re-download doesn't demote an existing
|
||||
// indexed/failed row — user keeps prior state and can re-index
|
||||
// explicitly from the KB panel if they want fresh content.
|
||||
await KbIngestState.firstOrCreate(
|
||||
{ file_path: filepath },
|
||||
{ file_path: filepath, state: 'pending_decision', chunks_embedded: 0 }
|
||||
)
|
||||
} catch (error) {
|
||||
console.error(
|
||||
`[RunDownloadJob] Error recording pending_decision state for ${filepath}:`,
|
||||
error
|
||||
)
|
||||
}
|
||||
} else {
|
||||
try {
|
||||
await EmbedFileJob.dispatch({
|
||||
fileName: url.split('/').pop() || '',
|
||||
filePath: filepath,
|
||||
})
|
||||
} catch (error) {
|
||||
console.error(`[RunDownloadJob] Error dispatching EmbedFileJob for URL ${url}:`, error)
|
||||
}
|
||||
try {
|
||||
await EmbedFileJob.dispatch({
|
||||
fileName: url.split('/').pop() || '',
|
||||
filePath: filepath,
|
||||
})
|
||||
} catch (error) {
|
||||
console.error(`[RunDownloadJob] Error dispatching EmbedFileJob for URL ${url}:`, error)
|
||||
}
|
||||
}
|
||||
} else if (filetype === 'map') {
|
||||
|
|
@ -226,7 +199,7 @@ export class RunDownloadJob {
|
|||
}
|
||||
|
||||
static async getByUrl(url: string): Promise<Job | undefined> {
|
||||
const queueService = QueueService.getInstance()
|
||||
const queueService = new QueueService()
|
||||
const queue = queueService.getQueue(this.queue)
|
||||
const jobId = this.getJobId(url)
|
||||
return await queue.getJob(jobId)
|
||||
|
|
@ -256,7 +229,7 @@ export class RunDownloadJob {
|
|||
}
|
||||
|
||||
static async dispatch(params: RunDownloadJobParams) {
|
||||
const queueService = QueueService.getInstance()
|
||||
const queueService = new QueueService()
|
||||
const queue = queueService.getQueue(this.queue)
|
||||
const jobId = this.getJobId(params.url)
|
||||
|
||||
|
|
|
|||
|
|
@ -49,7 +49,7 @@ export class RunExtractPmtilesJob {
|
|||
}
|
||||
|
||||
static async signalCancel(jobId: string): Promise<void> {
|
||||
const queueService = QueueService.getInstance()
|
||||
const queueService = new QueueService()
|
||||
const queue = queueService.getQueue(this.queue)
|
||||
const client = await queue.client
|
||||
await client.set(this.cancelKey(jobId), '1', 'EX', 300)
|
||||
|
|
@ -77,7 +77,7 @@ export class RunExtractPmtilesJob {
|
|||
`maxzoom=${maxzoom ?? 'source-max'} out=${outputFilepath}`
|
||||
)
|
||||
|
||||
const queueService = QueueService.getInstance()
|
||||
const queueService = new QueueService()
|
||||
const cancelRedis = await queueService.getQueue(RunExtractPmtilesJob.queue).client
|
||||
|
||||
let userCancelled = false
|
||||
|
|
@ -249,13 +249,13 @@ export class RunExtractPmtilesJob {
|
|||
}
|
||||
|
||||
static async getById(jobId: string): Promise<Job | undefined> {
|
||||
const queueService = QueueService.getInstance()
|
||||
const queueService = new QueueService()
|
||||
const queue = queueService.getQueue(this.queue)
|
||||
return await queue.getJob(jobId)
|
||||
}
|
||||
|
||||
static async dispatch(params: RunExtractPmtilesJobParams) {
|
||||
const queueService = QueueService.getInstance()
|
||||
const queueService = new QueueService()
|
||||
const queue = queueService.getQueue(this.queue)
|
||||
const jobId = this.getJobId(params.sourceUrl, params.regionFilepath, params.maxzoom)
|
||||
|
||||
|
|
|
|||
|
|
@ -18,7 +18,7 @@ export default class ChatMessage extends BaseModel {
|
|||
@column()
|
||||
declare content: string
|
||||
|
||||
@belongsTo(() => ChatSession, { foreignKey: 'session_id', localKey: 'id' })
|
||||
@belongsTo(() => ChatSession, { foreignKey: 'id', localKey: 'session_id' })
|
||||
declare session: BelongsTo<typeof ChatSession>
|
||||
|
||||
@column.dateTime({ autoCreate: true })
|
||||
|
|
|
|||
|
|
@ -1,77 +0,0 @@
|
|||
import { DateTime } from 'luxon'
|
||||
import { BaseModel, column, SnakeCaseNamingStrategy } from '@adonisjs/lucid/orm'
|
||||
import type { KbIngestStateValue } from '../../types/kb_ingest_state.js'
|
||||
|
||||
const LAST_ERROR_MAX_LEN = 1024
|
||||
|
||||
/**
|
||||
* Tracks the per-file decision and outcome of AI knowledge-base ingestion.
|
||||
*
|
||||
* The row exists for any embeddable file the scanner has seen and is independent
|
||||
* of `installed_resources` (which only covers curated downloads). Replaces the
|
||||
* earlier "any chunks in qdrant ⇒ embedded" binary check, which conflated
|
||||
* partially-stalled ingestions with fully-indexed files. See RFC #883.
|
||||
*/
|
||||
export default class KbIngestState extends BaseModel {
|
||||
static table = 'kb_ingest_state'
|
||||
static namingStrategy = new SnakeCaseNamingStrategy()
|
||||
|
||||
@column({ isPrimary: true })
|
||||
declare id: number
|
||||
|
||||
@column()
|
||||
declare file_path: string
|
||||
|
||||
@column()
|
||||
declare state: KbIngestStateValue
|
||||
|
||||
@column()
|
||||
declare chunks_embedded: number
|
||||
|
||||
@column()
|
||||
declare last_error: string | null
|
||||
|
||||
@column.dateTime({ autoCreate: true })
|
||||
declare created_at: DateTime
|
||||
|
||||
@column.dateTime({ autoCreate: true, autoUpdate: true })
|
||||
declare updated_at: DateTime
|
||||
|
||||
static async getOrCreate(filePath: string): Promise<KbIngestState> {
|
||||
return this.firstOrCreate(
|
||||
{ file_path: filePath },
|
||||
{ file_path: filePath, state: 'pending_decision', chunks_embedded: 0 }
|
||||
)
|
||||
}
|
||||
|
||||
static async markIndexed(filePath: string, chunksEmbedded: number): Promise<void> {
|
||||
const row = await this.getOrCreate(filePath)
|
||||
row.state = 'indexed'
|
||||
row.chunks_embedded = chunksEmbedded
|
||||
row.last_error = null
|
||||
await row.save()
|
||||
}
|
||||
|
||||
static async markFailed(filePath: string, errorMessage: string): Promise<void> {
|
||||
const row = await this.getOrCreate(filePath)
|
||||
row.state = 'failed'
|
||||
row.last_error = errorMessage.slice(0, LAST_ERROR_MAX_LEN)
|
||||
await row.save()
|
||||
}
|
||||
|
||||
static async markBrowseOnly(filePath: string): Promise<void> {
|
||||
const row = await this.getOrCreate(filePath)
|
||||
row.state = 'browse_only'
|
||||
await row.save()
|
||||
}
|
||||
|
||||
static async markStalled(filePath: string): Promise<void> {
|
||||
const row = await this.getOrCreate(filePath)
|
||||
row.state = 'stalled'
|
||||
await row.save()
|
||||
}
|
||||
|
||||
static async remove(filePath: string): Promise<void> {
|
||||
await this.query().where('file_path', filePath).delete()
|
||||
}
|
||||
}
|
||||
|
|
@ -1,67 +0,0 @@
|
|||
import { DateTime } from 'luxon'
|
||||
import { BaseModel, column, SnakeCaseNamingStrategy } from '@adonisjs/lucid/orm'
|
||||
import {
|
||||
findChunksPerMb,
|
||||
estimateChunkCount,
|
||||
estimateBatch,
|
||||
type BatchEstimate,
|
||||
type BatchEstimateInput,
|
||||
} from '../utils/kb_ratio_lookup.js'
|
||||
|
||||
/**
|
||||
* Self-calibrating registry of `{filename-prefix → chunks_per_mb}` ratios used
|
||||
* for disk-footprint and time-to-embed estimates surfaced in the KB panel.
|
||||
*
|
||||
* Migration seeds the registry with heuristic defaults from the RFC #883
|
||||
* appendix; Phase 4 self-calibration will update rows in place as ZIMs finish
|
||||
* ingesting and the real ratio becomes known. Lookup is longest-prefix-match
|
||||
* (see `kb_ratio_lookup.ts`) so a specific entry (`wikipedia_en_simple_`)
|
||||
* overrides a broader one (`wikipedia_en_`).
|
||||
*/
|
||||
export default class KbRatioRegistry extends BaseModel {
|
||||
static table = 'kb_ratio_registry'
|
||||
static namingStrategy = new SnakeCaseNamingStrategy()
|
||||
|
||||
@column({ isPrimary: true })
|
||||
declare id: number
|
||||
|
||||
@column()
|
||||
declare pattern: string
|
||||
|
||||
@column()
|
||||
declare chunks_per_mb: number
|
||||
|
||||
@column()
|
||||
declare sample_count: number
|
||||
|
||||
@column()
|
||||
declare notes: string | null
|
||||
|
||||
@column.dateTime({ autoCreate: true })
|
||||
declare created_at: DateTime
|
||||
|
||||
@column.dateTime({ autoCreate: true, autoUpdate: true })
|
||||
declare updated_at: DateTime
|
||||
|
||||
/** Look up chunks_per_mb for a filename by longest-prefix match. */
|
||||
static async lookup(filename: string): Promise<number | null> {
|
||||
const rows = await this.all()
|
||||
return findChunksPerMb(filename, rows)
|
||||
}
|
||||
|
||||
/** Estimate total chunks for a file of the given size on disk. */
|
||||
static async estimateChunks(filename: string, fileSizeBytes: number): Promise<number | null> {
|
||||
const rows = await this.all()
|
||||
return estimateChunkCount(filename, fileSizeBytes, rows)
|
||||
}
|
||||
|
||||
/**
|
||||
* Aggregate an embedding-disk-cost estimate across a batch of files. Used by
|
||||
* the curated-tier-change UI to show "you're about to add ~X GB of
|
||||
* embeddings on top of the ZIM downloads" before the user commits.
|
||||
*/
|
||||
static async estimateBatch(files: BatchEstimateInput[]): Promise<BatchEstimate> {
|
||||
const rows = await this.all()
|
||||
return estimateBatch(files, rows)
|
||||
}
|
||||
}
|
||||
|
|
@ -317,23 +317,6 @@ export class BenchmarkService {
|
|||
}
|
||||
}
|
||||
|
||||
// Fallback: AMD discrete cards. si.graphics() returns empty inside Docker for AMD,
|
||||
// the nvidia-smi path doesn't apply, and the APU regex only catches integrated parts.
|
||||
// SystemService.getSystemInfo() already handles AMD via the marker file + Ollama log
|
||||
// probe added in PR #804, so reuse that plumbing rather than duplicating it here.
|
||||
if (!gpuModel) {
|
||||
try {
|
||||
const systemService = new (await import('./system_service.js')).SystemService(this.dockerService)
|
||||
const sysInfo = await systemService.getSystemInfo()
|
||||
const sysGpuModel = sysInfo?.graphics?.controllers?.[0]?.model
|
||||
if (sysGpuModel) {
|
||||
gpuModel = sysGpuModel
|
||||
}
|
||||
} catch (sysError: any) {
|
||||
logger.warn(`[BenchmarkService] system_service AMD fallback failed: ${sysError.message}`)
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
cpu_model: `${cpu.manufacturer} ${cpu.brand}`,
|
||||
cpu_cores: cpu.physicalCores,
|
||||
|
|
|
|||
|
|
@ -5,8 +5,6 @@ import { DateTime } from 'luxon'
|
|||
import { join } from 'path'
|
||||
import CollectionManifest from '#models/collection_manifest'
|
||||
import InstalledResource from '#models/installed_resource'
|
||||
import { QueueService } from './queue_service.js'
|
||||
import { RunDownloadJob } from '#jobs/run_download_job'
|
||||
import { zimCategoriesSpecSchema, mapsSpecSchema, wikipediaSpecSchema } from '#validators/curated_collections'
|
||||
import {
|
||||
ensureDirectoryExists,
|
||||
|
|
@ -100,74 +98,10 @@ export class CollectionManifestService {
|
|||
const installedResources = await InstalledResource.query().where('resource_type', 'zim')
|
||||
const installedMap = new Map(installedResources.map((r) => [r.resource_id, r]))
|
||||
|
||||
// In-flight ZIM download resource IDs from the BullMQ queue. Used to
|
||||
// surface the user's tier intent immediately on submit, before any single
|
||||
// file has finished downloading. Failed jobs are excluded so a stuck
|
||||
// queue entry doesn't keep claiming the user's pick forever.
|
||||
const inFlightIds = await this.getInFlightZimResourceIds()
|
||||
|
||||
return spec.categories.map((category) => {
|
||||
const installedTierSlug = this.getInstalledTierForCategory(category.tiers, installedMap)
|
||||
const downloadingTierSlug = this.getDownloadingTierForCategory(
|
||||
category.tiers,
|
||||
installedMap,
|
||||
inFlightIds,
|
||||
installedTierSlug
|
||||
)
|
||||
return { ...category, installedTierSlug, downloadingTierSlug }
|
||||
})
|
||||
}
|
||||
|
||||
private async getInFlightZimResourceIds(): Promise<Set<string>> {
|
||||
const ids = new Set<string>()
|
||||
try {
|
||||
const queue = QueueService.getInstance().getQueue(RunDownloadJob.queue)
|
||||
const jobs = await queue.getJobs(['waiting', 'active', 'delayed'])
|
||||
for (const job of jobs) {
|
||||
if (job.data?.filetype !== 'zim') continue
|
||||
const resourceId = job.data?.resourceMetadata?.resource_id
|
||||
if (typeof resourceId === 'string') ids.add(resourceId)
|
||||
}
|
||||
} catch (error) {
|
||||
// Don't fail the whole categories endpoint if the queue is briefly
|
||||
// unreachable — just report no in-flight downloads.
|
||||
logger.warn('[CollectionManifestService] Could not read download queue:', error?.message || error)
|
||||
}
|
||||
return ids
|
||||
}
|
||||
|
||||
/**
|
||||
* Highest tier whose every resource is installed OR has an in-flight
|
||||
* download. Returns undefined when there are no in-flight downloads for this
|
||||
* category, or when the result would just duplicate installedTierSlug (i.e.
|
||||
* everything that's downloading is already installed — nothing new to show).
|
||||
*/
|
||||
getDownloadingTierForCategory(
|
||||
tiers: SpecTier[],
|
||||
installedMap: Map<string, InstalledResource>,
|
||||
inFlightIds: Set<string>,
|
||||
installedTierSlug: string | undefined
|
||||
): string | undefined {
|
||||
if (inFlightIds.size === 0) return undefined
|
||||
|
||||
// Cheap pre-check: any of this category's resources actually in flight?
|
||||
const anyInFlight = tiers.some((tier) =>
|
||||
CollectionManifestService.resolveTierResources(tier, tiers).some((r) => inFlightIds.has(r.id))
|
||||
)
|
||||
if (!anyInFlight) return undefined
|
||||
|
||||
const reversedTiers = [...tiers].reverse()
|
||||
for (const tier of reversedTiers) {
|
||||
const resolved = CollectionManifestService.resolveTierResources(tier, tiers)
|
||||
if (resolved.length === 0) continue
|
||||
const allAccountedFor = resolved.every(
|
||||
(r) => installedMap.has(r.id) || inFlightIds.has(r.id)
|
||||
)
|
||||
if (allAccountedFor) {
|
||||
return tier.slug === installedTierSlug ? undefined : tier.slug
|
||||
}
|
||||
}
|
||||
return undefined
|
||||
return spec.categories.map((category) => ({
|
||||
...category,
|
||||
installedTierSlug: this.getInstalledTierForCategory(category.tiers, installedMap),
|
||||
}))
|
||||
}
|
||||
|
||||
async getMapCollectionsWithStatus(): Promise<CollectionWithStatus[]> {
|
||||
|
|
|
|||
|
|
@ -291,12 +291,8 @@ export class DockerService {
|
|||
|
||||
/**
|
||||
* Force reinstall a service by stopping, removing, and recreating its container.
|
||||
*
|
||||
* Volume handling: removes Docker-managed named volumes whose name equals
|
||||
* `serviceName`, starts with `${serviceName}_`, or carries a `service=${serviceName}`
|
||||
* label. Host bind mounts are NOT touched — any data living on a bind-mounted
|
||||
* host path (ZIM stores, model caches, MySQL data dir, etc.) survives the reinstall.
|
||||
* Anonymous volumes (random hash names) are also not matched.
|
||||
* This method will also clear any associated volumes/data.
|
||||
* Handles edge cases gracefully (e.g., container not running, container not found).
|
||||
*/
|
||||
async forceReinstall(serviceName: string): Promise<{ success: boolean; message: string }> {
|
||||
try {
|
||||
|
|
@ -369,10 +365,7 @@ export class DockerService {
|
|||
const volumes = await this.docker.listVolumes()
|
||||
const serviceVolumes =
|
||||
volumes.Volumes?.filter(
|
||||
(v) =>
|
||||
v.Name === serviceName ||
|
||||
v.Name.startsWith(`${serviceName}_`) ||
|
||||
v.Labels?.service === serviceName
|
||||
(v) => v.Name.includes(serviceName) || v.Labels?.service === serviceName
|
||||
) || []
|
||||
|
||||
for (const vol of serviceVolumes) {
|
||||
|
|
@ -599,12 +592,10 @@ export class DockerService {
|
|||
ollamaEnv.push('OLLAMA_FLASH_ATTENTION=1')
|
||||
}
|
||||
if (amdGpuConfigured) {
|
||||
// gfx-aware HSA override — only set for cards that actually need it. See
|
||||
// _resolveAmdHsaOverride() for the resolution order and gfx → version mapping.
|
||||
const hsaOverride = await this._resolveAmdHsaOverride()
|
||||
if (hsaOverride) {
|
||||
ollamaEnv.push(`HSA_OVERRIDE_GFX_VERSION=${hsaOverride}`)
|
||||
}
|
||||
// RDNA3 iGPUs (gfx1103: 780M, 880M, 890M, ...) aren't on AMD's official ROCm
|
||||
// allowlist but work when forced to identify as gfx1100 via HSA_OVERRIDE_GFX_VERSION.
|
||||
// Harmless on supported discrete cards (gfx1030 RX 6800, etc.) — they ignore the override.
|
||||
ollamaEnv.push('HSA_OVERRIDE_GFX_VERSION=11.0.0')
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1008,67 +999,6 @@ export class DockerService {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve the HSA_OVERRIDE_GFX_VERSION value for the host's AMD GPU.
|
||||
*
|
||||
* gfx1030 (RX 6800/6700/etc.), gfx1100/1101/1102 (RX 7900/7800/7600) are on AMD's
|
||||
* official ROCm allowlist — forcing an override on these breaks GPU discovery.
|
||||
* gfx1035 / gfx1036 (RDNA 2 iGPUs like 680M) need 10.3.0 to coerce to gfx1030.
|
||||
* gfx1103 / gfx1150 / gfx1151 (RDNA 3/3.5 iGPUs like 780M / 890M / Strix Halo) need 11.0.0.
|
||||
*
|
||||
* Resolution order:
|
||||
* 1. KV `ai.amdHsaOverride` — manual user override; accepts 'none' (disable) or a semver-style value.
|
||||
* 2. Marker file `/app/storage/.nomad-amd-gfx` written by install_nomad.sh.
|
||||
* 3. Default: '11.0.0' — preserves prior behavior so existing iGPU users don't regress on
|
||||
* upgrade. Discrete-card users on existing installs can opt out via the KV.
|
||||
*
|
||||
* Returns null when no override should be applied.
|
||||
*/
|
||||
private async _resolveAmdHsaOverride(): Promise<string | null> {
|
||||
const manualRaw = await KVStore.getValue('ai.amdHsaOverride')
|
||||
if (manualRaw !== null && manualRaw !== undefined && String(manualRaw).trim() !== '') {
|
||||
const manual = String(manualRaw).trim().toLowerCase()
|
||||
if (manual === 'none' || manual === 'off' || manual === 'false') {
|
||||
logger.info('[DockerService] HSA override disabled via ai.amdHsaOverride')
|
||||
return null
|
||||
}
|
||||
if (/^\d+\.\d+\.\d+$/.test(manual)) {
|
||||
logger.info(`[DockerService] HSA override forced to ${manual} via ai.amdHsaOverride`)
|
||||
return manual
|
||||
}
|
||||
logger.warn(`[DockerService] Ignoring invalid ai.amdHsaOverride value: ${manualRaw}`)
|
||||
}
|
||||
|
||||
try {
|
||||
const gfx = (await readFile('/app/storage/.nomad-amd-gfx', 'utf8')).trim()
|
||||
const mapped = this._mapGfxToHsaOverride(gfx)
|
||||
logger.info(`[DockerService] AMD gfx marker '${gfx}' → HSA override ${mapped ?? 'none'}`)
|
||||
return mapped
|
||||
} catch {
|
||||
// Marker absent — most likely an existing install upgraded without re-running
|
||||
// install_nomad.sh. Fall through to the default.
|
||||
}
|
||||
|
||||
logger.info('[DockerService] No AMD gfx marker; defaulting HSA override to 11.0.0 for backward compatibility')
|
||||
return '11.0.0'
|
||||
}
|
||||
|
||||
private _mapGfxToHsaOverride(gfx: string): string | null {
|
||||
// Officially supported by ROCm — no override needed
|
||||
if (gfx === 'gfx1030' || gfx === 'gfx1100' || gfx === 'gfx1101' || gfx === 'gfx1102') {
|
||||
return null
|
||||
}
|
||||
// RDNA 2 variants + iGPUs (gfx1031..gfx1036, e.g. Rembrandt 680M)
|
||||
if (/^gfx103[1-6]$/.test(gfx)) {
|
||||
return '10.3.0'
|
||||
}
|
||||
// RDNA 3 / 3.5 mobile parts (Phoenix 780M = gfx1103, Strix 890M = gfx1150, Strix Halo = gfx1151)
|
||||
if (gfx === 'gfx1103' || gfx === 'gfx1150' || gfx === 'gfx1151') {
|
||||
return '11.0.0'
|
||||
}
|
||||
return '11.0.0'
|
||||
}
|
||||
|
||||
/**
|
||||
* Build the Docker Devices array for AMD GPU passthrough.
|
||||
*
|
||||
|
|
@ -1110,17 +1040,13 @@ export class DockerService {
|
|||
|
||||
this.activeInstallations.add(serviceName)
|
||||
|
||||
// newImage = the semver tag we record in the DB after the update (e.g. ollama/ollama:0.23.2).
|
||||
// runtimeImage = the tag we actually pull and run. For AMD-on-Ollama these diverge: we run
|
||||
// the rolling :rocm tag because per-version ROCm tags aren't always published, but the DB
|
||||
// must keep the semver tag so the Apps page shows the actual version (not literally "rocm")
|
||||
// and the registry update-check parses a valid tag (instead of looping on the same update).
|
||||
// Compute new image string. AMD-on-Ollama overrides this to the rolling :rocm tag
|
||||
// (set during GPU detection below) since per-version ROCm tags aren't always published.
|
||||
const currentImage = service.container_image
|
||||
const imageBase = currentImage.includes(':')
|
||||
? currentImage.substring(0, currentImage.lastIndexOf(':'))
|
||||
: currentImage
|
||||
const newImage = `${imageBase}:${targetVersion}`
|
||||
let runtimeImage = newImage
|
||||
let newImage = `${imageBase}:${targetVersion}`
|
||||
|
||||
// GPU detection runs before the pull so AMD updates pull ollama/ollama:rocm rather
|
||||
// than the standard tag. Detection result is reused below when building the new
|
||||
|
|
@ -1148,7 +1074,7 @@ export class DockerService {
|
|||
'update-gpu-config',
|
||||
`AMD GPU detected. Using ROCm image with /dev/kfd and /dev/dri passthrough...`
|
||||
)
|
||||
runtimeImage = 'ollama/ollama:rocm'
|
||||
newImage = 'ollama/ollama:rocm'
|
||||
updatedAmdDevices = await this._discoverAMDDevices()
|
||||
updatedAmdGpuConfigured = true
|
||||
} else {
|
||||
|
|
@ -1169,9 +1095,9 @@ export class DockerService {
|
|||
}
|
||||
}
|
||||
|
||||
// Step 1: Pull new image (runtimeImage diverges from newImage for AMD, see above)
|
||||
this._broadcast(serviceName, 'update-pulling', `Pulling image ${runtimeImage}...`)
|
||||
const pullStream = await this.docker.pull(runtimeImage)
|
||||
// Step 1: Pull new image
|
||||
this._broadcast(serviceName, 'update-pulling', `Pulling image ${newImage}...`)
|
||||
const pullStream = await this.docker.pull(newImage)
|
||||
await new Promise((res) => this.docker.modem.followProgress(pullStream, res))
|
||||
|
||||
// Step 2: Find and stop existing container
|
||||
|
|
@ -1206,17 +1132,15 @@ export class DockerService {
|
|||
// and whether HSA_OVERRIDE needs injection. For AMD, replace any prior HSA_OVERRIDE in
|
||||
// the inspect-captured env so updates from older containers pick up the current value.
|
||||
const baseEnv = inspectData.Config?.Env || []
|
||||
let finalEnv = baseEnv
|
||||
if (updatedAmdGpuConfigured) {
|
||||
const hsaOverride = await this._resolveAmdHsaOverride()
|
||||
finalEnv = baseEnv.filter((e: string) => !e.startsWith('HSA_OVERRIDE_GFX_VERSION='))
|
||||
if (hsaOverride) {
|
||||
finalEnv.push(`HSA_OVERRIDE_GFX_VERSION=${hsaOverride}`)
|
||||
}
|
||||
}
|
||||
const finalEnv = updatedAmdGpuConfigured
|
||||
? [
|
||||
...baseEnv.filter((e: string) => !e.startsWith('HSA_OVERRIDE_GFX_VERSION=')),
|
||||
'HSA_OVERRIDE_GFX_VERSION=11.0.0',
|
||||
]
|
||||
: baseEnv
|
||||
|
||||
const newContainerConfig: any = {
|
||||
Image: runtimeImage,
|
||||
Image: newImage,
|
||||
name: serviceName,
|
||||
Env: finalEnv.length > 0 ? finalEnv : undefined,
|
||||
Cmd: inspectData.Config?.Cmd || undefined,
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@ import OpenAI from 'openai'
|
|||
import type { ChatCompletionChunk, ChatCompletionMessageParam } from 'openai/resources/chat/completions.js'
|
||||
import type { Stream } from 'openai/streaming.js'
|
||||
import { NomadOllamaModel } from '../../types/ollama.js'
|
||||
import { EMBEDDING_MODEL_NAME, FALLBACK_RECOMMENDED_OLLAMA_MODELS } from '../../constants/ollama.js'
|
||||
import { FALLBACK_RECOMMENDED_OLLAMA_MODELS } from '../../constants/ollama.js'
|
||||
import fs from 'node:fs/promises'
|
||||
import path from 'node:path'
|
||||
import logger from '@adonisjs/core/services/logger'
|
||||
|
|
@ -469,18 +469,6 @@ export class OllamaService {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Hard char cap per embed input, applied as a runtime safety net regardless of
|
||||
* which backend path runs. The chunker in RagService caps at MAX_SAFE_TOKENS=1600
|
||||
* (3200 chars at the conservative 2 chars/token estimate), but dense technical
|
||||
* content has been observed to slip past on multi-batch ZIM ingestion (#881).
|
||||
*
|
||||
* 4000 chars ≈ 1000–2000 tokens depending on density, which keeps us comfortably
|
||||
* under nomic-embed-text:v1.5's default 2048-token context even on the OpenAI-compat
|
||||
* fallback path (which can't pass `truncate:true`/`num_ctx` to the model).
|
||||
*/
|
||||
public static readonly EMBED_MAX_INPUT_CHARS = 4000
|
||||
|
||||
/**
|
||||
* Generate embeddings for the given input strings.
|
||||
* Tries the Ollama native /api/embed endpoint first, falls back to /v1/embeddings.
|
||||
|
|
@ -491,28 +479,6 @@ export class OllamaService {
|
|||
throw new Error('AI service is not initialized.')
|
||||
}
|
||||
|
||||
// Runtime safety net (#881). The OpenAI-compat fallback has no equivalent of
|
||||
// truncate:true, so a chunk that exceeds the model's loaded context_length
|
||||
// (often 2048 for nomic-embed-text:v1.5) returns 400 and the chunk is silently
|
||||
// dropped from Qdrant. Pre-capping at the input layer protects both paths.
|
||||
const safeInput = input.map((s) =>
|
||||
s.length > OllamaService.EMBED_MAX_INPUT_CHARS
|
||||
? s.slice(0, OllamaService.EMBED_MAX_INPUT_CHARS)
|
||||
: s
|
||||
)
|
||||
const truncatedCount = input.reduce(
|
||||
(n, s) => (s.length > OllamaService.EMBED_MAX_INPUT_CHARS ? n + 1 : n),
|
||||
0
|
||||
)
|
||||
if (truncatedCount > 0) {
|
||||
logger.debug(
|
||||
'[OllamaService] embed: pre-capped %d/%d inputs at %d chars',
|
||||
truncatedCount,
|
||||
input.length,
|
||||
OllamaService.EMBED_MAX_INPUT_CHARS
|
||||
)
|
||||
}
|
||||
|
||||
try {
|
||||
// Prefer Ollama native endpoint (supports batch input natively).
|
||||
// Pass num_ctx explicitly so we don't depend on the embedding model's
|
||||
|
|
@ -525,7 +491,7 @@ export class OllamaService {
|
|||
`${this.baseUrl}/api/embed`,
|
||||
{
|
||||
model,
|
||||
input: safeInput,
|
||||
input,
|
||||
truncate: true,
|
||||
options: { num_ctx: 8192 },
|
||||
},
|
||||
|
|
@ -537,130 +503,16 @@ export class OllamaService {
|
|||
throw new Error('Invalid /api/embed response — missing embeddings array')
|
||||
}
|
||||
return { embeddings: response.data.embeddings }
|
||||
} catch (err) {
|
||||
// Capture the original error so we know *why* we fell back. Earlier bare
|
||||
// catches here masked recurring "input length exceeds context length"
|
||||
// failures for months (#369, #670, #881) — without this log we have no
|
||||
// signal that /api/embed is the broken path vs the fallback.
|
||||
logger.warn(
|
||||
'[OllamaService] /api/embed failed, falling back to /v1/embeddings: %s',
|
||||
err instanceof Error ? err.message : String(err)
|
||||
)
|
||||
// Fall back to OpenAI-compatible /v1/embeddings.
|
||||
} catch {
|
||||
// Fall back to OpenAI-compatible /v1/embeddings
|
||||
// Explicitly request float format — some backends (e.g. LM Studio) don't reliably
|
||||
// implement the base64 encoding the OpenAI SDK requests by default.
|
||||
const results = await this.openai.embeddings.create({
|
||||
model,
|
||||
input: safeInput,
|
||||
encoding_format: 'float',
|
||||
})
|
||||
logger.info('[OllamaService] /api/embed unavailable, falling back to /v1/embeddings')
|
||||
const results = await this.openai.embeddings.create({ model, input, encoding_format: 'float' })
|
||||
return { embeddings: results.data.map((e) => e.embedding as number[]) }
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if Ollama is currently running an embedding model with non-zero VRAM
|
||||
* (i.e., GPU-offloaded). Returns false if the model is running CPU-only OR if it's
|
||||
* not currently loaded OR if /api/ps is unreachable.
|
||||
*
|
||||
* Used by EmbedFileJob to pace continuation batches when the embedding model is
|
||||
* CPU-bound — sustained 100% CPU on a multi-batch ZIM ingestion can starve other
|
||||
* services (sshd, etc.) hard enough to require a power-cycle. AMD ROCm installs
|
||||
* hit this today because Ollama's ROCm build doesn't accelerate nomic-bert; on
|
||||
* NVIDIA, nomic-embed-text runs at 100% GPU and pacing is unnecessary.
|
||||
*
|
||||
* Only the Ollama-native endpoint is supported — backends that expose
|
||||
* `/v1/embeddings` (LM Studio, llama.cpp) don't surface placement info.
|
||||
*/
|
||||
public async isEmbeddingGpuAccelerated(): Promise<boolean> {
|
||||
await this._ensureDependencies()
|
||||
if (!this.baseUrl) return false
|
||||
|
||||
try {
|
||||
const response = await axios.get(`${this.baseUrl}/api/ps`, { timeout: 5000 })
|
||||
const models: Array<{ name?: string; size_vram?: number }> = response.data?.models ?? []
|
||||
// Match any loaded model whose name signals it's an embedding model.
|
||||
// nomic-embed-text, mxbai-embed-large, snowflake-arctic-embed, etc. all follow this convention.
|
||||
return models.some(
|
||||
(m) => m.name?.toLowerCase().includes('embed') && (m.size_vram ?? 0) > 0
|
||||
)
|
||||
} catch (err: any) {
|
||||
// /api/ps unreachable (Ollama down, non-native backend, etc.) — fail closed: assume CPU,
|
||||
// which means we'll pace. Better to over-pace than risk box-killing CPU saturation.
|
||||
logger.warn(
|
||||
`[OllamaService] Could not check embedding placement via /api/ps: ${err?.message ?? err}`
|
||||
)
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Enforces the "at most one chat model resident in VRAM" invariant by firing
|
||||
* `keep_alive: 0` against every currently-loaded model except (a) the
|
||||
* embedding model (always exempt) and (b) `targetModel` (the one we want
|
||||
* loaded next — leaving it alone preserves a hot model when the target is
|
||||
* already loaded).
|
||||
*
|
||||
* Best-effort: queries `/api/ps` and POSTs unload hints in parallel. Network
|
||||
* or Ollama errors are swallowed and logged — neither chat nor page-load
|
||||
* should fail just because the unload housekeeping didn't go through.
|
||||
*
|
||||
* Returns the list of model names that were sent the unload hint, so the
|
||||
* caller (and tests) can confirm what actually happened.
|
||||
*
|
||||
* Pass `targetModel: null` to unload every chat model (used for the future
|
||||
* "free up VRAM" path; not exposed yet but the helper supports it).
|
||||
*
|
||||
* Note that `keep_alive: 0` is a post-completion hint, not a force-kill —
|
||||
* Ollama defers eviction until the runner is idle, so in-flight inference
|
||||
* on the same model is never interrupted. See the design doc for the race
|
||||
* analysis behind this.
|
||||
*/
|
||||
public async unloadAllChatModelsExcept(targetModel: string | null): Promise<string[]> {
|
||||
await this._ensureDependencies()
|
||||
if (!this.baseUrl) return []
|
||||
|
||||
let loadedModels: string[] = []
|
||||
try {
|
||||
const response = await axios.get(`${this.baseUrl}/api/ps`, { timeout: 5000 })
|
||||
loadedModels = (response.data?.models ?? [])
|
||||
.map((m: { name?: string }) => m.name)
|
||||
.filter((name: unknown): name is string => typeof name === 'string')
|
||||
} catch (err: any) {
|
||||
logger.warn(
|
||||
`[OllamaService] unloadAllChatModelsExcept: /api/ps unreachable, skipping unload sweep: ${err?.message ?? err}`
|
||||
)
|
||||
return []
|
||||
}
|
||||
|
||||
const toUnload = loadedModels.filter(
|
||||
(name) => name !== EMBEDDING_MODEL_NAME && name !== targetModel
|
||||
)
|
||||
|
||||
await Promise.all(
|
||||
toUnload.map(async (modelName) => {
|
||||
try {
|
||||
await axios.post(
|
||||
`${this.baseUrl}/api/generate`,
|
||||
{ model: modelName, prompt: '', keep_alive: 0 },
|
||||
{ timeout: 10000 }
|
||||
)
|
||||
} catch (err: any) {
|
||||
logger.warn(
|
||||
`[OllamaService] Failed to send unload hint for ${modelName}: ${err?.message ?? err}`
|
||||
)
|
||||
}
|
||||
})
|
||||
)
|
||||
|
||||
if (toUnload.length > 0) {
|
||||
logger.info(
|
||||
`[OllamaService] Sent unload hint for ${toUnload.length} chat model(s): ${toUnload.join(', ')}`
|
||||
)
|
||||
}
|
||||
return toUnload
|
||||
}
|
||||
|
||||
public async getModels(includeEmbeddings = false): Promise<NomadInstalledModel[]> {
|
||||
await this._ensureDependencies()
|
||||
if (!this.baseUrl) {
|
||||
|
|
|
|||
|
|
@ -1,25 +1,9 @@
|
|||
import { Queue } from 'bullmq'
|
||||
import queueConfig from '#config/queue'
|
||||
|
||||
// Process-wide singleton. Each `Queue` opens two ioredis connections (one for
|
||||
// commands, one blocking). Instantiating a fresh QueueService per dispatch /
|
||||
// status lookup leaks both, and under sustained job churn (e.g. multi-batch ZIM
|
||||
// ingestion enqueueing a continuation every few seconds) it saturates Redis's
|
||||
// maxclients within hours.
|
||||
export class QueueService {
|
||||
private queues: Map<string, Queue> = new Map()
|
||||
|
||||
private static _instance: QueueService | null = null
|
||||
|
||||
private constructor() {}
|
||||
|
||||
static getInstance(): QueueService {
|
||||
if (!QueueService._instance) {
|
||||
QueueService._instance = new QueueService()
|
||||
}
|
||||
return QueueService._instance
|
||||
}
|
||||
|
||||
getQueue(name: string): Queue {
|
||||
if (!this.queues.has(name)) {
|
||||
const queue = new Queue(name, {
|
||||
|
|
@ -34,6 +18,5 @@ export class QueueService {
|
|||
for (const queue of this.queues.values()) {
|
||||
await queue.close()
|
||||
}
|
||||
this.queues.clear()
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -16,27 +16,10 @@ import { removeStopwords } from 'stopword'
|
|||
import { randomUUID } from 'node:crypto'
|
||||
import { join, resolve, sep } from 'node:path'
|
||||
import KVStore from '#models/kv_store'
|
||||
import KbIngestState from '#models/kb_ingest_state'
|
||||
import { decideScanAction, type IngestPolicy } from '../utils/kb_ingest_decision.js'
|
||||
import KbRatioRegistry from '#models/kb_ratio_registry'
|
||||
import { decideWarnings } from '../utils/kb_warning_decision.js'
|
||||
import type { FileWarning, FileWarningsResult, StoredFileInfo } from '../../types/rag.js'
|
||||
import type { KbIngestStateValue } from '../../types/kb_ingest_state.js'
|
||||
import { ZIMExtractionService } from './zim_extraction_service.js'
|
||||
import { ZIM_BATCH_SIZE } from '../../constants/zim_extraction.js'
|
||||
import { EMBEDDING_MODEL_NAME } from '../../constants/ollama.js'
|
||||
import { ProcessAndEmbedFileResponse, ProcessZIMFileResponse, RAGResult, RerankedRAGResult } from '../../types/rag.js'
|
||||
|
||||
export type EmbedSingleFileFailureCode =
|
||||
| 'not_found'
|
||||
| 'inflight'
|
||||
| 'delete_failed'
|
||||
| 'dispatch_failed'
|
||||
|
||||
export type EmbedSingleFileResult =
|
||||
| { success: true; message: string }
|
||||
| { success: false; code: EmbedSingleFileFailureCode; message: string }
|
||||
|
||||
@inject()
|
||||
export class RagService {
|
||||
private qdrant: QdrantClient | null = null
|
||||
|
|
@ -45,6 +28,7 @@ export class RagService {
|
|||
private resolvedEmbeddingModel: string | null = null
|
||||
public static UPLOADS_STORAGE_PATH = 'storage/kb_uploads'
|
||||
public static CONTENT_COLLECTION_NAME = 'nomad_knowledge_base'
|
||||
public static EMBEDDING_MODEL = 'nomic-embed-text:v1.5'
|
||||
public static EMBEDDING_DIMENSION = 768 // Nomic Embed Text v1.5 dimension is 768
|
||||
public static MODEL_CONTEXT_LENGTH = 2048 // nomic-embed-text has 2K token context
|
||||
public static MAX_SAFE_TOKENS = 1600 // Leave buffer for prefix and tokenization variance
|
||||
|
|
@ -286,25 +270,25 @@ export class RagService {
|
|||
if (!this.embeddingModelVerified) {
|
||||
const allModels = await this.ollamaService.getModels(true)
|
||||
const embeddingModel =
|
||||
allModels.find((model) => model.name === EMBEDDING_MODEL_NAME) ??
|
||||
allModels.find((model) => model.name === RagService.EMBEDDING_MODEL) ??
|
||||
allModels.find((model) => model.name.toLowerCase().includes('nomic-embed-text'))
|
||||
|
||||
if (!embeddingModel) {
|
||||
try {
|
||||
const downloadResult = await this.ollamaService.downloadModel(EMBEDDING_MODEL_NAME)
|
||||
const downloadResult = await this.ollamaService.downloadModel(RagService.EMBEDDING_MODEL)
|
||||
if (!downloadResult.success) {
|
||||
throw new Error(downloadResult.message || 'Unknown error during model download')
|
||||
}
|
||||
} catch (modelError) {
|
||||
logger.error(
|
||||
`[RAG] Embedding model ${EMBEDDING_MODEL_NAME} not found locally and failed to download:`,
|
||||
`[RAG] Embedding model ${RagService.EMBEDDING_MODEL} not found locally and failed to download:`,
|
||||
modelError
|
||||
)
|
||||
this.embeddingModelVerified = false
|
||||
return null
|
||||
}
|
||||
}
|
||||
this.resolvedEmbeddingModel = embeddingModel?.name ?? EMBEDDING_MODEL_NAME
|
||||
this.resolvedEmbeddingModel = embeddingModel?.name ?? RagService.EMBEDDING_MODEL
|
||||
this.embeddingModelVerified = true
|
||||
}
|
||||
|
||||
|
|
@ -361,7 +345,7 @@ export class RagService {
|
|||
|
||||
logger.debug(`[RAG] Embedding batch ${batchIdx + 1}/${totalBatches} (${batch.length} chunks)`)
|
||||
|
||||
const response = await this.ollamaService.embed(this.resolvedEmbeddingModel ?? EMBEDDING_MODEL_NAME, batch)
|
||||
const response = await this.ollamaService.embed(this.resolvedEmbeddingModel ?? RagService.EMBEDDING_MODEL, batch)
|
||||
|
||||
embeddings.push(...response.embeddings)
|
||||
|
||||
|
|
@ -516,13 +500,13 @@ export class RagService {
|
|||
`[RAG] Extracting ZIM content (batch: offset=${startOffset}, size=${ZIM_BATCH_SIZE})`
|
||||
)
|
||||
|
||||
const { chunks: zimChunks, totalArticles } = await zimExtractionService.extractZIMContent(
|
||||
filepath,
|
||||
{ startOffset, batchSize: ZIM_BATCH_SIZE }
|
||||
)
|
||||
const zimChunks = await zimExtractionService.extractZIMContent(filepath, {
|
||||
startOffset,
|
||||
batchSize: ZIM_BATCH_SIZE,
|
||||
})
|
||||
|
||||
logger.info(
|
||||
`[RAG] Extracted ${zimChunks.length} chunks from ZIM file with enhanced metadata (file totalArticles=${totalArticles})`
|
||||
`[RAG] Extracted ${zimChunks.length} chunks from ZIM file with enhanced metadata`
|
||||
)
|
||||
|
||||
// Process each chunk individually with its metadata
|
||||
|
|
@ -598,7 +582,6 @@ export class RagService {
|
|||
chunks: totalChunks,
|
||||
hasMoreBatches,
|
||||
articlesProcessed: articlesInBatch,
|
||||
totalArticles,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -818,12 +801,12 @@ export class RagService {
|
|||
if (!this.embeddingModelVerified) {
|
||||
const allModels = await this.ollamaService.getModels(true)
|
||||
const embeddingModel =
|
||||
allModels.find((model) => model.name === EMBEDDING_MODEL_NAME) ??
|
||||
allModels.find((model) => model.name === RagService.EMBEDDING_MODEL) ??
|
||||
allModels.find((model) => model.name.toLowerCase().includes('nomic-embed-text'))
|
||||
|
||||
if (!embeddingModel) {
|
||||
logger.warn(
|
||||
`[RAG] ${EMBEDDING_MODEL_NAME} not found. Cannot perform similarity search.`
|
||||
`[RAG] ${RagService.EMBEDDING_MODEL} not found. Cannot perform similarity search.`
|
||||
)
|
||||
this.embeddingModelVerified = false
|
||||
return []
|
||||
|
|
@ -855,7 +838,7 @@ export class RagService {
|
|||
return []
|
||||
}
|
||||
|
||||
const response = await this.ollamaService.embed(this.resolvedEmbeddingModel ?? EMBEDDING_MODEL_NAME, [prefixedQuery])
|
||||
const response = await this.ollamaService.embed(this.resolvedEmbeddingModel ?? RagService.EMBEDDING_MODEL, [prefixedQuery])
|
||||
|
||||
// Perform semantic search with a higher limit to enable reranking
|
||||
const searchLimit = limit * 3 // Get more results for reranking
|
||||
|
|
@ -1062,7 +1045,7 @@ export class RagService {
|
|||
}
|
||||
}
|
||||
|
||||
public async getStoredFiles(): Promise<StoredFileInfo[]> {
|
||||
public async getStoredFiles(): Promise<string[]> {
|
||||
try {
|
||||
await this._ensureCollection(
|
||||
RagService.CONTENT_COLLECTION_NAME,
|
||||
|
|
@ -1093,159 +1076,13 @@ export class RagService {
|
|||
offset = scrollResult.next_page_offset || null
|
||||
} while (offset !== null)
|
||||
|
||||
// Union the Qdrant-derived list with the disk-backed file paths the
|
||||
// state machine has tracked. Without this, files known to the scanner
|
||||
// but with zero embedded chunks (video-only ZIMs, failed-before-first-
|
||||
// chunk ingestions, browse_only opt-outs) never get a row in Stored
|
||||
// Files — which means warnings keyed off those files (#895 zero_chunks
|
||||
// in particular) have no row to attach to. The state machine is the
|
||||
// authoritative "what's on disk?" view; Qdrant is "what made it into
|
||||
// the vector store?". Both are needed to render the KB UI honestly.
|
||||
const stateByPath = new Map<string, { state: KbIngestStateValue; chunks_embedded: number }>()
|
||||
try {
|
||||
const stateRows = await KbIngestState.query().select('file_path', 'state', 'chunks_embedded')
|
||||
for (const row of stateRows) {
|
||||
sources.add(row.file_path)
|
||||
stateByPath.set(row.file_path, {
|
||||
state: row.state,
|
||||
chunks_embedded: row.chunks_embedded,
|
||||
})
|
||||
}
|
||||
} catch (error) {
|
||||
// Non-fatal: if the state machine query fails for any reason we'd
|
||||
// rather return the Qdrant-derived list than 500 the whole panel.
|
||||
logger.warn(
|
||||
{ err: error },
|
||||
'[RagService.getStoredFiles] state-machine union skipped; returning Qdrant-only list'
|
||||
)
|
||||
}
|
||||
|
||||
return Array.from(sources).map((source) => {
|
||||
const row = stateByPath.get(source)
|
||||
return {
|
||||
source,
|
||||
state: row?.state ?? null,
|
||||
chunksEmbedded: row?.chunks_embedded ?? 0,
|
||||
}
|
||||
})
|
||||
return Array.from(sources)
|
||||
} catch (error) {
|
||||
logger.error('Error retrieving stored files:', error)
|
||||
return []
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute whether the first-chat JIT prompt should fire and surface the file
|
||||
* count the banner uses in its copy ("Index your N existing files?"). The
|
||||
* banner appears when the user hasn't yet picked a global ingest policy
|
||||
* (`rag.defaultIngestPolicy` unset) and the scanner has actually seen at
|
||||
* least one embeddable file — i.e., the prompt is actionable, not theoretical
|
||||
* on a freshly-installed empty NOMAD.
|
||||
*
|
||||
* Once the user picks a policy (Always or Manual) via the banner buttons or
|
||||
* the KB modal toggle, `shouldPrompt` flips to false for good.
|
||||
*/
|
||||
public async getPolicyPromptState(): Promise<{
|
||||
shouldPrompt: boolean
|
||||
hasContent: boolean
|
||||
totalFiles: number
|
||||
}> {
|
||||
const policy = await KVStore.getValue('rag.defaultIngestPolicy')
|
||||
const countRow = await KbIngestState.query().count('* as total').first()
|
||||
const totalFiles = Number((countRow as any)?.$extras?.total ?? 0)
|
||||
return {
|
||||
shouldPrompt: policy === null && totalFiles > 0,
|
||||
hasContent: totalFiles > 0,
|
||||
totalFiles,
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute conditional warnings (RFC #883 §6) for every source the scanner
|
||||
* sees on disk. Returns `{ ok, warnings }` — `ok: false` distinguishes a
|
||||
* computation failure (Qdrant unreachable, DB outage, FS error) from the
|
||||
* healthy-but-empty case, which is critical because the whole point of this
|
||||
* surface is to expose silent failures; reporting "everything healthy" when
|
||||
* we couldn't actually check would reintroduce the bug we set out to fix.
|
||||
*
|
||||
* Per-source chunk counts come from a single Qdrant scroll over the
|
||||
* collection's points; expected-chunk estimates come from the ratio
|
||||
* registry. Files in the scanner's directories that have no qdrant points
|
||||
* at all show up with `chunksInQdrant: 0` so Warning A can fire.
|
||||
*/
|
||||
public async computeFileWarnings(): Promise<FileWarningsResult> {
|
||||
try {
|
||||
await this._ensureCollection(
|
||||
RagService.CONTENT_COLLECTION_NAME,
|
||||
RagService.EMBEDDING_DIMENSION
|
||||
)
|
||||
|
||||
// Per-source chunk count from a single scroll. We deliberately don't
|
||||
// assume `kb_ingest_state.chunks_embedded` here so this PR stays
|
||||
// independent of the state-machine PR (#888) — but a future cleanup can
|
||||
// read from there for efficiency once both have landed.
|
||||
const chunksBySource = new Map<string, number>()
|
||||
let offset: string | number | null | Record<string, unknown> = null
|
||||
const batchSize = 100
|
||||
do {
|
||||
const scrollResult = await this.qdrant!.scroll(RagService.CONTENT_COLLECTION_NAME, {
|
||||
limit: batchSize,
|
||||
offset,
|
||||
with_payload: ['source'],
|
||||
with_vector: false,
|
||||
})
|
||||
for (const point of scrollResult.points) {
|
||||
const source = point.payload?.source
|
||||
if (source && typeof source === 'string') {
|
||||
chunksBySource.set(source, (chunksBySource.get(source) ?? 0) + 1)
|
||||
}
|
||||
}
|
||||
offset = scrollResult.next_page_offset || null
|
||||
} while (offset !== null)
|
||||
|
||||
// Scan the filesystem the same way scanAndSyncStorage does so Warning A
|
||||
// can fire on files with zero qdrant points (the headline "video-only
|
||||
// ZIM" case).
|
||||
const KB_UPLOADS_PATH = join(process.cwd(), RagService.UPLOADS_STORAGE_PATH)
|
||||
const ZIM_PATH = join(process.cwd(), ZIM_STORAGE_PATH)
|
||||
const allSources = new Set<string>(chunksBySource.keys())
|
||||
const sizeByPath = new Map<string, number>()
|
||||
|
||||
for (const dir of [KB_UPLOADS_PATH, ZIM_PATH]) {
|
||||
try {
|
||||
const entries = await listDirectoryContentsRecursive(dir)
|
||||
for (const entry of entries) {
|
||||
if (entry.type !== 'file') continue
|
||||
allSources.add(entry.key)
|
||||
const stat = await getFileStatsIfExists(entry.key)
|
||||
if (stat) sizeByPath.set(entry.key, Number(stat.size))
|
||||
}
|
||||
} catch (error: any) {
|
||||
if (error?.code !== 'ENOENT') throw error
|
||||
}
|
||||
}
|
||||
|
||||
const out: Record<string, FileWarning[]> = {}
|
||||
for (const source of allSources) {
|
||||
const fileSizeBytes = sizeByPath.get(source) ?? 0
|
||||
const chunksInQdrant = chunksBySource.get(source) ?? 0
|
||||
const fileName = source.split(/[/\\]/).pop() ?? source
|
||||
const expectedChunks =
|
||||
fileSizeBytes > 0
|
||||
? await KbRatioRegistry.estimateChunks(fileName, fileSizeBytes)
|
||||
: null
|
||||
|
||||
const warnings = decideWarnings({ fileSizeBytes, chunksInQdrant, expectedChunks })
|
||||
if (warnings.length > 0) out[source] = warnings
|
||||
}
|
||||
|
||||
return { ok: true, warnings: out }
|
||||
} catch (error) {
|
||||
logger.error('[RAG] Error computing file warnings:', error)
|
||||
return { ok: false, warnings: {} }
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Delete all Qdrant points associated with a given source path and remove
|
||||
* the corresponding file from disk if it lives under the uploads directory.
|
||||
|
|
@ -1280,11 +1117,6 @@ export class RagService {
|
|||
logger.warn(`[RAG] File was removed from knowledge base but doesn't live in Nomad's uploads directory, so it can't be safely removed. Skipping deletion of physical file...`)
|
||||
}
|
||||
|
||||
// Drop the ingest state row last so the file disappears entirely. Without
|
||||
// this, the next scanAndSyncStorage would see `indexed + no chunks` for a
|
||||
// path that no longer exists in storage and try to re-embed nothing.
|
||||
await KbIngestState.remove(source)
|
||||
|
||||
return { success: true, message: 'File removed from knowledge base.' }
|
||||
} catch (error) {
|
||||
logger.error('[RAG] Error deleting file from knowledge base:', error)
|
||||
|
|
@ -1349,182 +1181,12 @@ export class RagService {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Walk kb_uploads and zim storage directories, returning the full path of
|
||||
* every embeddable file. Non-embeddable types (e.g. kiwix-library.xml) are
|
||||
* filtered out so they aren't dispatched only to fail with "Unsupported file
|
||||
* type" and retry on every sync.
|
||||
*/
|
||||
private async _discoverKbFiles(): Promise<string[]> {
|
||||
const KB_UPLOADS_PATH = join(process.cwd(), RagService.UPLOADS_STORAGE_PATH)
|
||||
const ZIM_PATH = join(process.cwd(), ZIM_STORAGE_PATH)
|
||||
const filesInStorage: string[] = []
|
||||
|
||||
for (const [label, dirPath] of [
|
||||
[RagService.UPLOADS_STORAGE_PATH, KB_UPLOADS_PATH] as const,
|
||||
[ZIM_STORAGE_PATH, ZIM_PATH] as const,
|
||||
]) {
|
||||
try {
|
||||
const contents = await listDirectoryContentsRecursive(dirPath)
|
||||
contents.forEach((entry) => {
|
||||
if (entry.type === 'file') filesInStorage.push(entry.key)
|
||||
})
|
||||
logger.debug(`[RAG] Found ${contents.length} files in ${label}`)
|
||||
} catch (error) {
|
||||
if (error.code === 'ENOENT') {
|
||||
logger.debug(`[RAG] ${label} directory does not exist, skipping`)
|
||||
} else {
|
||||
throw error
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return filesInStorage.filter((f) => determineFileType(f) !== 'unknown')
|
||||
}
|
||||
|
||||
/**
|
||||
* Dispatch one EmbedFileJob per file path. Returns honest counts: `queuedCount`
|
||||
* is jobs newly enqueued, `dedupedCount` is jobs that hit BullMQ's per-file
|
||||
* jobId dedupe (an existing :completed/:waiting/etc. entry was returned
|
||||
* instead of a new enqueue), and `failedPaths` lists files whose dispatch
|
||||
* threw. Pass `force: true` for bulk callers that need to bypass dedupe
|
||||
* entirely. Per-file errors are logged but don't abort the batch — callers
|
||||
* must inspect `failedPaths` to surface partial failure to the operator.
|
||||
*/
|
||||
private async _dispatchEmbedJobsFor(
|
||||
filePaths: string[],
|
||||
options?: { force?: boolean }
|
||||
): Promise<{ queuedCount: number; dedupedCount: number; failedPaths: string[] }> {
|
||||
const { EmbedFileJob } = await import('#jobs/embed_file_job')
|
||||
let queuedCount = 0
|
||||
let dedupedCount = 0
|
||||
const failedPaths: string[] = []
|
||||
for (const filePath of filePaths) {
|
||||
try {
|
||||
const fileName = filePath.split(/[/\\]/).pop() || filePath
|
||||
const stats = await getFileStatsIfExists(filePath)
|
||||
const result = await EmbedFileJob.dispatch(
|
||||
{
|
||||
filePath,
|
||||
fileName,
|
||||
fileSize: stats?.size,
|
||||
},
|
||||
{ force: options?.force }
|
||||
)
|
||||
if (result.created) {
|
||||
queuedCount++
|
||||
} else {
|
||||
dedupedCount++
|
||||
}
|
||||
} catch (fileError) {
|
||||
failedPaths.push(filePath)
|
||||
logger.error(`[RAG] Error dispatching job for file ${filePath}:`, fileError)
|
||||
}
|
||||
}
|
||||
return { queuedCount, dedupedCount, failedPaths }
|
||||
}
|
||||
|
||||
/**
|
||||
* Dispatch an embed job for a single stored file. Wraps `_dispatchEmbedJobsFor`
|
||||
* with the safety checks needed for a user-triggered per-row action:
|
||||
* 1. The source must be known to the scanner OR have a state row — prevents
|
||||
* arbitrary path dispatch from the public API.
|
||||
* 2. We refuse if any inflight job (waiting/active/delayed/paused) already
|
||||
* targets this filePath. Otherwise a double-click or a rapid retry could
|
||||
* enqueue duplicate jobs, producing duplicate chunks.
|
||||
* 3. When `force` is true (Re-embed of an already-indexed file), we
|
||||
* pre-delete the prior Qdrant points so the new run doesn't stack on
|
||||
* top of the old ones. For force=false (Index of a never-embedded file),
|
||||
* there's nothing to clear.
|
||||
*/
|
||||
public async embedSingleFile(
|
||||
source: string,
|
||||
force: boolean = false
|
||||
): Promise<EmbedSingleFileResult> {
|
||||
const stateRow = await KbIngestState.query().where('file_path', source).first()
|
||||
if (!stateRow) {
|
||||
const knownFiles = await this._discoverKbFiles()
|
||||
if (!knownFiles.includes(source)) {
|
||||
return {
|
||||
success: false,
|
||||
code: 'not_found',
|
||||
message: 'File is not a tracked knowledge-base source.',
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const { EmbedFileJob } = await import('#jobs/embed_file_job')
|
||||
const { QueueService } = await import('#services/queue_service')
|
||||
const queue = QueueService.getInstance().getQueue(EmbedFileJob.queue)
|
||||
const inflight = await queue.getJobs(['waiting', 'active', 'delayed', 'paused'])
|
||||
if (inflight.some((j) => j.data?.filePath === source)) {
|
||||
return {
|
||||
success: false,
|
||||
code: 'inflight',
|
||||
message: 'A job for this file is already in progress. Wait for it to finish before re-queuing.',
|
||||
}
|
||||
}
|
||||
|
||||
if (force) {
|
||||
try {
|
||||
await this._deletePointsBySource(source)
|
||||
} catch (err) {
|
||||
logger.error(`[RAG] Failed to delete prior points for ${source}; aborting re-embed:`, err)
|
||||
return {
|
||||
success: false,
|
||||
code: 'delete_failed',
|
||||
message: 'Failed to clear prior embeddings before re-embed.',
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const result = await this._dispatchEmbedJobsFor([source], { force })
|
||||
if (result.failedPaths.length > 0) {
|
||||
return {
|
||||
success: false,
|
||||
code: 'dispatch_failed',
|
||||
message: 'Failed to dispatch embed job for this file.',
|
||||
}
|
||||
}
|
||||
return {
|
||||
success: true,
|
||||
message: force ? 'Re-embed queued for this file.' : 'Indexing queued for this file.',
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Delete all Qdrant points whose `source` payload matches the given path.
|
||||
* Unlike deleteFileBySource(), this does NOT touch the file on disk — used
|
||||
* by reembedAll() where the file must remain so it can be re-ingested.
|
||||
*/
|
||||
private async _deletePointsBySource(source: string): Promise<void> {
|
||||
await this._ensureCollection(
|
||||
RagService.CONTENT_COLLECTION_NAME,
|
||||
RagService.EMBEDDING_DIMENSION
|
||||
)
|
||||
await this.qdrant!.delete(RagService.CONTENT_COLLECTION_NAME, {
|
||||
filter: { must: [{ key: 'source', match: { value: source } }] },
|
||||
})
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if the file-embeddings queue has any in-flight work
|
||||
* (waiting, active, delayed, or paused). Bulk re-embed actions use this
|
||||
* to refuse mid-flight to avoid racing with deletes/dispatches already
|
||||
* in progress.
|
||||
*/
|
||||
private async _hasInflightEmbedJobs(): Promise<boolean> {
|
||||
const { EmbedFileJob } = await import('#jobs/embed_file_job')
|
||||
const { QueueService } = await import('#services/queue_service')
|
||||
const queue = QueueService.getInstance().getQueue(EmbedFileJob.queue)
|
||||
const counts = await queue.getJobCounts('waiting', 'active', 'delayed', 'paused')
|
||||
return (counts.waiting || 0) + (counts.active || 0) + (counts.delayed || 0) + (counts.paused || 0) > 0
|
||||
}
|
||||
|
||||
/**
|
||||
* Scans the knowledge base storage directories and syncs with Qdrant.
|
||||
* Identifies files that exist in storage but haven't been embedded yet,
|
||||
* and dispatches EmbedFileJob for each missing file.
|
||||
*
|
||||
* @returns Object containing success status, message, and counts of scanned/queued files
|
||||
*/
|
||||
public async scanAndSyncStorage(): Promise<{
|
||||
success: boolean
|
||||
|
|
@ -1535,111 +1197,91 @@ export class RagService {
|
|||
try {
|
||||
logger.info('[RAG] Starting knowledge base sync scan')
|
||||
|
||||
const KB_UPLOADS_PATH = join(process.cwd(), RagService.UPLOADS_STORAGE_PATH)
|
||||
const ZIM_PATH = join(process.cwd(), ZIM_STORAGE_PATH)
|
||||
|
||||
const filesInStorage: string[] = []
|
||||
|
||||
// Force resync of Nomad docs
|
||||
await this.discoverNomadDocs(true).catch((error) => {
|
||||
logger.error('[RAG] Error during Nomad docs discovery in sync process:', error)
|
||||
})
|
||||
|
||||
const filesInStorage = await this._discoverKbFiles()
|
||||
logger.info(`[RAG] Found ${filesInStorage.length} embeddable files in storage`)
|
||||
// Scan kb_uploads directory
|
||||
try {
|
||||
const kbContents = await listDirectoryContentsRecursive(KB_UPLOADS_PATH)
|
||||
kbContents.forEach((entry) => {
|
||||
if (entry.type === 'file') {
|
||||
filesInStorage.push(entry.key)
|
||||
}
|
||||
})
|
||||
logger.debug(`[RAG] Found ${kbContents.length} files in ${RagService.UPLOADS_STORAGE_PATH}`)
|
||||
} catch (error) {
|
||||
if (error.code === 'ENOENT') {
|
||||
logger.debug(`[RAG] ${RagService.UPLOADS_STORAGE_PATH} directory does not exist, skipping`)
|
||||
} else {
|
||||
throw error
|
||||
}
|
||||
}
|
||||
|
||||
// Scan zim directory
|
||||
try {
|
||||
const zimContents = await listDirectoryContentsRecursive(ZIM_PATH)
|
||||
zimContents.forEach((entry) => {
|
||||
if (entry.type === 'file') {
|
||||
filesInStorage.push(entry.key)
|
||||
}
|
||||
})
|
||||
logger.debug(`[RAG] Found ${zimContents.length} files in ${ZIM_STORAGE_PATH}`)
|
||||
} catch (error) {
|
||||
if (error.code === 'ENOENT') {
|
||||
logger.debug(`[RAG] ${ZIM_STORAGE_PATH} directory does not exist, skipping`)
|
||||
} else {
|
||||
throw error
|
||||
}
|
||||
}
|
||||
|
||||
logger.info(`[RAG] Found ${filesInStorage.length} total files in storage directories`)
|
||||
|
||||
// Get all stored sources from Qdrant
|
||||
await this._ensureCollection(
|
||||
RagService.CONTENT_COLLECTION_NAME,
|
||||
RagService.EMBEDDING_DIMENSION
|
||||
)
|
||||
|
||||
// Collect every unique `source` already in Qdrant so we can skip files
|
||||
// that have already been embedded.
|
||||
const sourcesInQdrant = new Set<string>()
|
||||
let offset: string | number | null | Record<string, unknown> = null
|
||||
const batchSize = 100
|
||||
|
||||
// Scroll through all points to get sources
|
||||
do {
|
||||
const scrollResult = await this.qdrant!.scroll(RagService.CONTENT_COLLECTION_NAME, {
|
||||
limit: 100,
|
||||
offset,
|
||||
with_payload: ['source'],
|
||||
limit: batchSize,
|
||||
offset: offset,
|
||||
with_payload: ['source'], // Only fetch source field for efficiency
|
||||
with_vector: false,
|
||||
})
|
||||
|
||||
scrollResult.points.forEach((point) => {
|
||||
const source = point.payload?.source
|
||||
if (source && typeof source === 'string') sourcesInQdrant.add(source)
|
||||
if (source && typeof source === 'string') {
|
||||
sourcesInQdrant.add(source)
|
||||
}
|
||||
})
|
||||
|
||||
offset = scrollResult.next_page_offset || null
|
||||
} while (offset !== null)
|
||||
|
||||
logger.info(`[RAG] Found ${sourcesInQdrant.size} unique sources in Qdrant`)
|
||||
|
||||
// Load all known per-file ingest states. The state row is authoritative
|
||||
// over the "any chunks in Qdrant" heuristic — it captures user choices
|
||||
// (browse_only) and terminal outcomes (failed, stalled) that aren't visible
|
||||
// from Qdrant alone. See RFC #883 for the full state machine.
|
||||
const stateRows = await KbIngestState.all()
|
||||
const stateByPath = new Map(stateRows.map((row) => [row.file_path, row]))
|
||||
|
||||
// Non-embeddable files (e.g. kiwix-library.xml in /storage/zim) would otherwise
|
||||
// be dispatched to EmbedFileJob, fail with "Unsupported file type", and retry
|
||||
// on every sync — filter them out before state decisions.
|
||||
const embeddableFiles = filesInStorage.filter(
|
||||
(filePath) => determineFileType(filePath) !== 'unknown'
|
||||
// Find files that are in storage, not already in Qdrant, and have an embeddable type.
|
||||
// Non-embeddable files (e.g. kiwix-library.xml in /storage/zim) would otherwise be
|
||||
// dispatched to EmbedFileJob, fail with "Unsupported file type", and retry on every sync.
|
||||
const filesToEmbed = filesInStorage.filter(
|
||||
(filePath) => !sourcesInQdrant.has(filePath) && determineFileType(filePath) !== 'unknown'
|
||||
)
|
||||
|
||||
// Read the global ingest policy. Unset is treated as 'Always' so legacy
|
||||
// installs keep their current behavior until the user explicitly opts
|
||||
// into Manual mode from the KB panel.
|
||||
const policyRaw = await KVStore.getValue('rag.defaultIngestPolicy')
|
||||
const policy: IngestPolicy = policyRaw === 'Manual' ? 'Manual' : 'Always'
|
||||
|
||||
const filesToEmbed: string[] = []
|
||||
let backfilled = 0
|
||||
let createdRows = 0
|
||||
let createdPending = 0
|
||||
let skipped = 0
|
||||
|
||||
for (const filePath of embeddableFiles) {
|
||||
const stateRow = stateByPath.get(filePath) ?? null
|
||||
const action = decideScanAction(stateRow, sourcesInQdrant.has(filePath), policy)
|
||||
|
||||
switch (action.kind) {
|
||||
case 'skip':
|
||||
skipped++
|
||||
break
|
||||
case 'backfill_indexed':
|
||||
// Pre-RFC install (or a fresh admin pointed at an existing Qdrant volume):
|
||||
// chunks already exist with no state row, so trust Qdrant and record
|
||||
// `indexed` without re-embedding. chunks_embedded is left 0 because
|
||||
// we don't count points-per-source during the scroll above.
|
||||
await KbIngestState.create({
|
||||
file_path: filePath,
|
||||
state: 'indexed',
|
||||
chunks_embedded: 0,
|
||||
})
|
||||
backfilled++
|
||||
break
|
||||
case 'create_pending':
|
||||
// Manual mode: record that we've seen the file but don't dispatch.
|
||||
// The KB panel surfaces a per-card "Index" affordance for these.
|
||||
await KbIngestState.create({
|
||||
file_path: filePath,
|
||||
state: 'pending_decision',
|
||||
chunks_embedded: 0,
|
||||
})
|
||||
createdPending++
|
||||
break
|
||||
case 'dispatch':
|
||||
if (action.createStateRow) {
|
||||
await KbIngestState.create({
|
||||
file_path: filePath,
|
||||
state: 'pending_decision',
|
||||
chunks_embedded: 0,
|
||||
})
|
||||
createdRows++
|
||||
}
|
||||
filesToEmbed.push(filePath)
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
logger.info(
|
||||
`[RAG] Scan results (policy=${policy}): ${filesToEmbed.length} to embed, ${backfilled} backfilled, ${createdRows} new pending, ${createdPending} waiting on user, ${skipped} skipped`
|
||||
)
|
||||
logger.info(`[RAG] Found ${filesToEmbed.length} files that need embedding`)
|
||||
|
||||
if (filesToEmbed.length === 0) {
|
||||
return {
|
||||
|
|
@ -1650,193 +1292,41 @@ export class RagService {
|
|||
}
|
||||
}
|
||||
|
||||
const { queuedCount, dedupedCount } = await this._dispatchEmbedJobsFor(filesToEmbed)
|
||||
const dedupeNote = dedupedCount > 0 ? ` (${dedupedCount} already queued)` : ''
|
||||
// Import EmbedFileJob dynamically to avoid circular dependencies
|
||||
const { EmbedFileJob } = await import('#jobs/embed_file_job')
|
||||
|
||||
// Dispatch jobs for files that need embedding
|
||||
let queuedCount = 0
|
||||
for (const filePath of filesToEmbed) {
|
||||
try {
|
||||
const fileName = filePath.split(/[/\\]/).pop() || filePath
|
||||
const stats = await getFileStatsIfExists(filePath)
|
||||
|
||||
logger.info(`[RAG] Dispatching embed job for: ${fileName}`)
|
||||
await EmbedFileJob.dispatch({
|
||||
filePath: filePath,
|
||||
fileName: fileName,
|
||||
fileSize: stats?.size,
|
||||
})
|
||||
queuedCount++
|
||||
logger.debug(`[RAG] Successfully dispatched job for ${fileName}`)
|
||||
} catch (fileError) {
|
||||
logger.error(`[RAG] Error dispatching job for file ${filePath}:`, fileError)
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
success: true,
|
||||
message: `Scanned ${filesInStorage.length} files, queued ${queuedCount} for embedding${dedupeNote}`,
|
||||
message: `Scanned ${filesInStorage.length} files, queued ${queuedCount} for embedding`,
|
||||
filesScanned: filesInStorage.length,
|
||||
filesQueued: queuedCount,
|
||||
}
|
||||
} catch (error) {
|
||||
logger.error('[RAG] Error scanning and syncing knowledge base:', error)
|
||||
return { success: false, message: 'Error scanning and syncing knowledge base' }
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Re-embed every file on disk (per-file replace). For each discovered file:
|
||||
* delete its existing Qdrant points by `source` match, then dispatch a fresh
|
||||
* EmbedFileJob. Files are NOT removed from disk. Any orphan points (points
|
||||
* whose source file no longer exists) are intentionally preserved — use
|
||||
* resetAndRebuild() if a clean slate is required.
|
||||
*
|
||||
* Refuses to run if the embeddings queue already has in-flight work.
|
||||
*/
|
||||
public async reembedAll(): Promise<{
|
||||
success: boolean
|
||||
message: string
|
||||
filesScanned?: number
|
||||
filesQueued?: number
|
||||
failedPaths?: string[]
|
||||
}> {
|
||||
try {
|
||||
if (await this._hasInflightEmbedJobs()) {
|
||||
return {
|
||||
success: false,
|
||||
message: 'Embed jobs are already in progress. Wait for the queue to drain (or clean up failed jobs) before triggering a bulk re-embed.',
|
||||
}
|
||||
}
|
||||
|
||||
logger.info('[RAG] Starting full re-embed (per-file replace)')
|
||||
|
||||
await this.discoverNomadDocs(true).catch((error) => {
|
||||
logger.error('[RAG] Error re-running Nomad docs discovery during re-embed:', error)
|
||||
})
|
||||
|
||||
const filesInStorage = await this._discoverKbFiles()
|
||||
|
||||
await this._ensureCollection(
|
||||
RagService.CONTENT_COLLECTION_NAME,
|
||||
RagService.EMBEDDING_DIMENSION
|
||||
)
|
||||
|
||||
// Per-file: delete-then-dispatch. We tried dispatch-then-delete but that
|
||||
// opens a race where a fast worker can write new points before our
|
||||
// delete-by-source runs, wiping both. Instead we delete first, then
|
||||
// dispatch — and if dispatch fails, we surface the failed paths in the
|
||||
// response so the operator knows which files dropped out (rather than
|
||||
// silently leaving them unindexed). A subsequent sync rescan picks them
|
||||
// back up. Note: a delete-failure aborts the per-file pair (we don't
|
||||
// dispatch a job whose old points are still present, since they'd live
|
||||
// alongside the new vectors forever).
|
||||
const { EmbedFileJob } = await import('#jobs/embed_file_job')
|
||||
let queuedCount = 0
|
||||
const failedPaths: string[] = []
|
||||
for (const filePath of filesInStorage) {
|
||||
try {
|
||||
await this._deletePointsBySource(filePath)
|
||||
} catch (err) {
|
||||
logger.error(`[RAG] Failed to delete prior points for ${filePath}; skipping dispatch:`, err)
|
||||
failedPaths.push(filePath)
|
||||
continue
|
||||
}
|
||||
try {
|
||||
const fileName = filePath.split(/[/\\]/).pop() || filePath
|
||||
const stats = await getFileStatsIfExists(filePath)
|
||||
const result = await EmbedFileJob.dispatch(
|
||||
{ filePath, fileName, fileSize: stats?.size },
|
||||
{ force: true }
|
||||
)
|
||||
if (result.created) queuedCount++
|
||||
} catch (fileError) {
|
||||
// Old points already deleted but the new job never made it onto the
|
||||
// queue. Logged + surfaced so an operator can rerun a sync.
|
||||
logger.error(`[RAG] Re-embed dispatch failed for ${filePath} after delete; file is now unindexed until next sync:`, fileError)
|
||||
failedPaths.push(filePath)
|
||||
}
|
||||
}
|
||||
|
||||
logger.info(
|
||||
`[RAG] Re-embed dispatched ${queuedCount}/${filesInStorage.length} files` +
|
||||
(failedPaths.length > 0 ? ` (${failedPaths.length} failed)` : '')
|
||||
)
|
||||
|
||||
const failureSuffix =
|
||||
failedPaths.length > 0
|
||||
? ` ${failedPaths.length} file${failedPaths.length === 1 ? '' : 's'} failed to dispatch and are temporarily unindexed — run a sync rescan to recover.`
|
||||
: ''
|
||||
|
||||
return {
|
||||
success: failedPaths.length === 0,
|
||||
message:
|
||||
`Re-embedding ${queuedCount} file${queuedCount === 1 ? '' : 's'}. Existing points were replaced.` +
|
||||
failureSuffix,
|
||||
filesScanned: filesInStorage.length,
|
||||
filesQueued: queuedCount,
|
||||
...(failedPaths.length > 0 ? { failedPaths } : {}),
|
||||
success: false,
|
||||
message: 'Error scanning and syncing knowledge base',
|
||||
}
|
||||
} catch (error) {
|
||||
logger.error('[RAG] Error during re-embed:', error)
|
||||
return { success: false, message: 'Error during re-embed' }
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Destructive rebuild. Drops the entire Qdrant collection (wiping every
|
||||
* point including orphans), recreates it with the correct dimension, clears
|
||||
* the Nomad-docs discovery flag, then dispatches an EmbedFileJob for every
|
||||
* file currently on disk.
|
||||
*
|
||||
* Refuses to run if the embeddings queue already has in-flight work.
|
||||
*/
|
||||
public async resetAndRebuild(): Promise<{
|
||||
success: boolean
|
||||
message: string
|
||||
filesScanned?: number
|
||||
filesQueued?: number
|
||||
failedPaths?: string[]
|
||||
}> {
|
||||
try {
|
||||
if (await this._hasInflightEmbedJobs()) {
|
||||
return {
|
||||
success: false,
|
||||
message: 'Embed jobs are already in progress. Wait for the queue to drain (or clean up failed jobs) before triggering a reset.',
|
||||
}
|
||||
}
|
||||
|
||||
logger.info('[RAG] Starting destructive reset & rebuild')
|
||||
|
||||
await this._initializeQdrantClient()
|
||||
try {
|
||||
await this.qdrant!.deleteCollection(RagService.CONTENT_COLLECTION_NAME)
|
||||
logger.info(`[RAG] Dropped collection ${RagService.CONTENT_COLLECTION_NAME}`)
|
||||
} catch (err) {
|
||||
// Collection may not exist yet on a fresh install — log and continue.
|
||||
logger.warn(`[RAG] deleteCollection failed (may not exist): ${(err as Error).message}`)
|
||||
}
|
||||
|
||||
await this._ensureCollection(
|
||||
RagService.CONTENT_COLLECTION_NAME,
|
||||
RagService.EMBEDDING_DIMENSION
|
||||
)
|
||||
|
||||
// Force Nomad docs to be re-dispatched.
|
||||
await KVStore.setValue('rag.docsEmbedded', false)
|
||||
await this.discoverNomadDocs(true).catch((error) => {
|
||||
logger.error('[RAG] Error re-running Nomad docs discovery after reset:', error)
|
||||
})
|
||||
|
||||
const filesInStorage = await this._discoverKbFiles()
|
||||
const { queuedCount, failedPaths } = await this._dispatchEmbedJobsFor(filesInStorage, {
|
||||
force: true,
|
||||
})
|
||||
|
||||
logger.info(
|
||||
`[RAG] Reset complete — dispatched ${queuedCount}/${filesInStorage.length} files` +
|
||||
(failedPaths.length > 0 ? ` (${failedPaths.length} failed)` : '')
|
||||
)
|
||||
|
||||
// Collection was already dropped, so dispatch failures here mean the
|
||||
// file is gone from Qdrant with no pending job to repopulate it. Surface
|
||||
// the count + paths so the operator can rerun a sync rescan to recover.
|
||||
const failureSuffix =
|
||||
failedPaths.length > 0
|
||||
? ` ${failedPaths.length} file${failedPaths.length === 1 ? '' : 's'} failed to dispatch and are temporarily unindexed — run a sync rescan to recover.`
|
||||
: ''
|
||||
|
||||
return {
|
||||
success: failedPaths.length === 0,
|
||||
message:
|
||||
`Collection wiped. Queued ${queuedCount} file${queuedCount === 1 ? '' : 's'} for a full rebuild.` +
|
||||
failureSuffix,
|
||||
filesScanned: filesInStorage.length,
|
||||
filesQueued: queuedCount,
|
||||
...(failedPaths.length > 0 ? { failedPaths } : {}),
|
||||
}
|
||||
} catch (error) {
|
||||
logger.error('[RAG] Error during reset & rebuild:', error)
|
||||
return { success: false, message: 'Error during reset & rebuild' }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -95,40 +95,12 @@ export class SystemService {
|
|||
if (!ollamaContainer) return null
|
||||
|
||||
const container = this.dockerService.docker.getContainer(ollamaContainer.Id)
|
||||
|
||||
// Read logs only from the first 5 minutes after container start. The
|
||||
// "inference compute" line is written once during Ollama's GPU discovery
|
||||
// phase, within seconds of startup. Using tail:N here is fragile: under
|
||||
// active embedding workloads we've seen >1000 lines/min, which pushes the
|
||||
// line past any reasonable tail in minutes. Pinning to the startup window
|
||||
// is bounded (~5 min of logs regardless of container uptime) and never
|
||||
// ages out.
|
||||
//
|
||||
// Fall back to the previous tail:500 strategy if StartedAt is missing or
|
||||
// unparseable — we can't construct a since/until window without it, but
|
||||
// tail:500 is still useful when the container just started and the line
|
||||
// is still recent.
|
||||
const inspect = await container.inspect()
|
||||
const startedAtRaw = inspect?.State?.StartedAt
|
||||
const startedAtMs = startedAtRaw ? new Date(startedAtRaw).getTime() : NaN
|
||||
const hasValidStartedAt = Number.isFinite(startedAtMs) && startedAtMs > 0
|
||||
|
||||
const logsOpts: { stdout: true; stderr: true; follow: false; since?: number; until?: number; tail?: number } = {
|
||||
const buf = (await container.logs({
|
||||
stdout: true,
|
||||
stderr: true,
|
||||
tail: 500,
|
||||
follow: false,
|
||||
}
|
||||
if (hasValidStartedAt) {
|
||||
const startedAtSec = Math.floor(startedAtMs / 1000)
|
||||
logsOpts.since = startedAtSec
|
||||
logsOpts.until = startedAtSec + 300 // 5-minute window
|
||||
} else {
|
||||
logger.warn(
|
||||
`[SystemService] nomad_ollama State.StartedAt missing or invalid (${startedAtRaw ?? 'undefined'}); falling back to tail:500 for inference-compute probe`
|
||||
)
|
||||
logsOpts.tail = 500
|
||||
}
|
||||
const buf = (await container.logs(logsOpts)) as unknown as Buffer
|
||||
})) as unknown as Buffer
|
||||
const logs = buf.toString('utf8')
|
||||
|
||||
const lines = logs.split('\n').filter((l) => l.includes('msg="inference compute"'))
|
||||
|
|
@ -427,42 +399,9 @@ export class SystemService {
|
|||
os.kernel = dockerInfo.KernelVersion
|
||||
}
|
||||
|
||||
// si.graphics() in the admin container uses lspci (pciutils ships in
|
||||
// the image for AMD detection). lspci has no real VRAM info for
|
||||
// discrete GPUs, so systeminformation parses the first PCI memory
|
||||
// Region (BAR0, typically 1-32 MiB) as `vram`. nvidia-smi / ROCm
|
||||
// tooling enrichment also can't run since neither is in the admin
|
||||
// image. No real dGPU has under 256 MiB, so any discrete-GPU controller
|
||||
// below that threshold needs the probes below to give us real data.
|
||||
// Applies to both NVIDIA and AMD; Intel iGPUs are exempt because their
|
||||
// shared-system-memory VRAM reading via lspci can legitimately be small.
|
||||
const DGPU_BOGUS_VRAM_THRESHOLD_MIB = 256
|
||||
const isDiscreteGpuVendor = (vendor: string) =>
|
||||
/nvidia|advanced micro devices|amd|ati/i.test(vendor)
|
||||
const isBogusDgpuVram = (c: { vendor?: string; vram?: number | null }) =>
|
||||
isDiscreteGpuVendor(c.vendor || '') &&
|
||||
typeof c.vram === 'number' &&
|
||||
c.vram < DGPU_BOGUS_VRAM_THRESHOLD_MIB
|
||||
|
||||
// Clear the bogus value up front. If a probe replaces the entry below
|
||||
// we get the real VRAM; if no probe succeeds (Ollama not installed,
|
||||
// passthrough_failed) the UI falls back to "N/A" instead of showing
|
||||
// "1 MB" / "32 MB". The lspci model/vendor strings stay since they're
|
||||
// still useful for identifying the card.
|
||||
const hasLspciBogusDgpuVram = (graphics.controllers || []).some(isBogusDgpuVram)
|
||||
if (hasLspciBogusDgpuVram) {
|
||||
for (const c of graphics.controllers) {
|
||||
if (isBogusDgpuVram(c)) c.vram = null
|
||||
}
|
||||
}
|
||||
|
||||
// Run the probes when controllers are empty (common inside Docker) or
|
||||
// when lspci gave us bogus discrete-GPU BAR0 values that need replacing.
|
||||
if (
|
||||
!graphics.controllers ||
|
||||
graphics.controllers.length === 0 ||
|
||||
hasLspciBogusDgpuVram
|
||||
) {
|
||||
// If si.graphics() returned no controllers (common inside Docker),
|
||||
// fall back to runtime + Ollama log probe to figure out what's accessible.
|
||||
if (!graphics.controllers || graphics.controllers.length === 0) {
|
||||
const runtimes = dockerInfo.Runtimes || {}
|
||||
gpuHealth.hasNvidiaRuntime = 'nvidia' in runtimes
|
||||
|
||||
|
|
|
|||
|
|
@ -40,10 +40,7 @@ export class ZIMExtractionService {
|
|||
* @param filePath - Path to the ZIM file
|
||||
* @param opts - Options including maxArticles, strategy, onProgress, startOffset, and batchSize
|
||||
*/
|
||||
async extractZIMContent(
|
||||
filePath: string,
|
||||
opts: ExtractZIMContentOptions = {}
|
||||
): Promise<{ chunks: ZIMContentChunk[]; totalArticles: number }> {
|
||||
async extractZIMContent(filePath: string, opts: ExtractZIMContentOptions = {}): Promise<ZIMContentChunk[]> {
|
||||
try {
|
||||
logger.info(`[ZIMExtractionService]: Processing ZIM file at path: ${filePath}`)
|
||||
|
||||
|
|
@ -164,7 +161,7 @@ export class ZIMExtractionService {
|
|||
textPreview: c.text.substring(0, 100)
|
||||
})))
|
||||
logger.debug("Total structured sections extracted:", toReturn.length)
|
||||
return { chunks: toReturn, totalArticles: archive.articleCount }
|
||||
return toReturn
|
||||
} catch (error) {
|
||||
logger.error('Error processing ZIM file:', error)
|
||||
throw error
|
||||
|
|
|
|||
|
|
@ -7,7 +7,6 @@ import axios from 'axios'
|
|||
import * as cheerio from 'cheerio'
|
||||
import { XMLParser } from 'fast-xml-parser'
|
||||
import { isRawListRemoteZimFilesResponse, isRawRemoteZimFileEntry } from '../../util/zim.js'
|
||||
import { findReplacedWikipediaFiles } from '../utils/zim_filename.js'
|
||||
import logger from '@adonisjs/core/services/logger'
|
||||
import { DockerService } from './docker_service.js'
|
||||
import { inject } from '@adonisjs/core'
|
||||
|
|
@ -315,7 +314,7 @@ export class ZimService {
|
|||
if (restart) {
|
||||
// Check if there are any remaining ZIM download jobs before restarting
|
||||
const { QueueService } = await import('./queue_service.js')
|
||||
const queueService = QueueService.getInstance()
|
||||
const queueService = new QueueService()
|
||||
const queue = queueService.getQueue('downloads')
|
||||
|
||||
// Get all active and waiting jobs
|
||||
|
|
@ -628,21 +627,18 @@ export class ZimService {
|
|||
|
||||
logger.info(`[ZimService] Wikipedia download completed successfully: ${filename}`)
|
||||
|
||||
// Delete prior versions of THIS specific Wikipedia variant only.
|
||||
// Earlier logic deleted anything starting with `wikipedia_en_`, which silently
|
||||
// wiped distinct corpora the user had installed independently (issue #884).
|
||||
// Delete old Wikipedia files (keep only the newly installed one)
|
||||
const existingFiles = await this.list()
|
||||
const wikipediaFiles = findReplacedWikipediaFiles(
|
||||
filename,
|
||||
existingFiles.files.map((f) => f.name)
|
||||
const wikipediaFiles = existingFiles.files.filter((f) =>
|
||||
f.name.startsWith('wikipedia_en_') && f.name !== filename
|
||||
)
|
||||
|
||||
for (const oldFile of wikipediaFiles) {
|
||||
try {
|
||||
await this.delete(oldFile)
|
||||
logger.info(`[ZimService] Deleted old Wikipedia file: ${oldFile}`)
|
||||
await this.delete(oldFile.name)
|
||||
logger.info(`[ZimService] Deleted old Wikipedia file: ${oldFile.name}`)
|
||||
} catch (error) {
|
||||
logger.warn(`[ZimService] Could not delete old Wikipedia file: ${oldFile}`, error)
|
||||
logger.warn(`[ZimService] Could not delete old Wikipedia file: ${oldFile.name}`, error)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -47,14 +47,7 @@ export async function doResumableDownload({
|
|||
timeout,
|
||||
})
|
||||
|
||||
// Some upstream hosts (notably download.kiwix.org for .zim files) don't set a
|
||||
// Content-Type header at all. Per RFC 7231 §3.1.1.5, "if no Content-Type is
|
||||
// provided" the recipient may treat it as application/octet-stream — which is
|
||||
// already in every binary-content allowlist we use (ZIM, PMTILES, base assets).
|
||||
// Without this default, the validator below throws `MIME type is not allowed`
|
||||
// and breaks all downloads from kiwix's primary host (#848).
|
||||
const contentType =
|
||||
headResponse.headers['content-type'] || 'application/octet-stream'
|
||||
const contentType = headResponse.headers['content-type'] || ''
|
||||
const totalBytes = parseInt(headResponse.headers['content-length'] || '0')
|
||||
const supportsRangeRequests = headResponse.headers['accept-ranges'] === 'bytes'
|
||||
|
||||
|
|
|
|||
|
|
@ -1,70 +0,0 @@
|
|||
import type { KbIngestStateValue } from '../../types/kb_ingest_state.js'
|
||||
|
||||
/**
|
||||
* Decision returned by `decideScanAction` describing what scanAndSyncStorage
|
||||
* should do for one file given its current state row (if any), whether Qdrant
|
||||
* already has chunks for it, and the global ingest policy.
|
||||
*
|
||||
* - `skip` — file is in a settled state (already indexed, deliberately not
|
||||
* indexed, or in a manual-recovery state); no auto-dispatch.
|
||||
* - `dispatch` — file needs to be (re-)embedded; an EmbedFileJob should be
|
||||
* dispatched. `createStateRow` indicates whether a new state row needs to
|
||||
* be created before dispatch (i.e. first time the scanner has seen it).
|
||||
* - `backfill_indexed` — Qdrant has chunks but no state row exists yet
|
||||
* (pre-RFC install, or new admin instance pointed at an existing Qdrant
|
||||
* volume). Create a row in `indexed` state without re-embedding.
|
||||
* - `create_pending` — Manual mode: record that we've seen the file but
|
||||
* don't dispatch. Frontend surfaces a per-card "Index" affordance.
|
||||
*/
|
||||
export type ScanAction =
|
||||
| { kind: 'skip' }
|
||||
| { kind: 'dispatch'; createStateRow: boolean }
|
||||
| { kind: 'backfill_indexed' }
|
||||
| { kind: 'create_pending' }
|
||||
|
||||
export interface KbIngestStateRow {
|
||||
state: KbIngestStateValue
|
||||
}
|
||||
|
||||
/**
|
||||
* Global auto-index policy stored at KV `rag.defaultIngestPolicy`. Unset is
|
||||
* treated as `Always` so existing installs keep their current behavior until
|
||||
* the user opts into Manual mode through the KB panel.
|
||||
*/
|
||||
export type IngestPolicy = 'Always' | 'Manual'
|
||||
|
||||
/**
|
||||
* Decide what scanAndSyncStorage should do for a single embeddable file.
|
||||
*
|
||||
* Replaces the earlier `!sourcesInQdrant.has(filePath)` binary check, which
|
||||
* couldn't tell a fully-indexed file from a stalled mid-batch ingestion, and
|
||||
* couldn't honor a user's "browse only" choice. The state row is now the
|
||||
* authoritative answer; Qdrant chunk presence is corroborating evidence.
|
||||
*/
|
||||
export function decideScanAction(
|
||||
stateRow: KbIngestStateRow | null,
|
||||
hasChunksInQdrant: boolean,
|
||||
policy: IngestPolicy = 'Always'
|
||||
): ScanAction {
|
||||
if (!stateRow) {
|
||||
if (hasChunksInQdrant) return { kind: 'backfill_indexed' }
|
||||
return policy === 'Always'
|
||||
? { kind: 'dispatch', createStateRow: true }
|
||||
: { kind: 'create_pending' }
|
||||
}
|
||||
|
||||
switch (stateRow.state) {
|
||||
case 'indexed':
|
||||
return hasChunksInQdrant ? { kind: 'skip' } : { kind: 'dispatch', createStateRow: false }
|
||||
case 'pending_decision':
|
||||
// Manual mode: file is waiting for the user to opt in via per-card Index.
|
||||
// Always mode: treat as "user-equivalent of auto-index" and dispatch.
|
||||
return policy === 'Always'
|
||||
? { kind: 'dispatch', createStateRow: false }
|
||||
: { kind: 'skip' }
|
||||
case 'browse_only':
|
||||
case 'failed':
|
||||
case 'stalled':
|
||||
return { kind: 'skip' }
|
||||
}
|
||||
}
|
||||
|
|
@ -1,50 +0,0 @@
|
|||
/**
|
||||
* Visual status assigned to an in-flight (or stuck) embedding job, used to
|
||||
* pick the colored status pill in the KB Processing Queue. See RFC #883 §5.
|
||||
*
|
||||
* - `waiting` — queued, no batch has started yet
|
||||
* - `healthy` — last batch < 2 minutes ago
|
||||
* - `slow` — last batch 2-5 minutes ago (CPU-paced multi-batch ingestion
|
||||
* falls into this band; not necessarily a problem)
|
||||
* - `stalled` — last batch > 5 minutes ago (likely a real problem)
|
||||
* - `failed` — job recorded a failed status
|
||||
*/
|
||||
export type JobHealthStatus = 'waiting' | 'healthy' | 'slow' | 'stalled' | 'failed'
|
||||
|
||||
export interface JobHealthInput {
|
||||
/** BullMQ job.data.status — set by EmbedFileJob.handle on transitions. */
|
||||
status: string
|
||||
/** 0-100. 0 means no work observed yet on this job-row. */
|
||||
progress: number
|
||||
/** ms epoch of the last completed batch. Multi-batch ZIMs update this on
|
||||
* every continuation; single-batch jobs leave it unset until completion. */
|
||||
lastBatchAt?: number
|
||||
/** ms epoch of the first batch start. Used as a fallback "last activity"
|
||||
* signal for jobs that haven't yet completed their first batch. */
|
||||
startedAt?: number
|
||||
/** Current ms epoch. Injected for testability. */
|
||||
now: number
|
||||
}
|
||||
|
||||
const SLOW_THRESHOLD_MS = 2 * 60 * 1000
|
||||
const STALLED_THRESHOLD_MS = 5 * 60 * 1000
|
||||
|
||||
export function computeJobHealth(input: JobHealthInput): JobHealthStatus {
|
||||
if (input.status === 'failed') return 'failed'
|
||||
|
||||
// No progress recorded and no activity timestamps — job is still queued.
|
||||
if (
|
||||
input.progress === 0 &&
|
||||
input.lastBatchAt === undefined &&
|
||||
input.startedAt === undefined
|
||||
) {
|
||||
return 'waiting'
|
||||
}
|
||||
|
||||
const lastActivity = input.lastBatchAt ?? input.startedAt ?? input.now
|
||||
const stalenessMs = input.now - lastActivity
|
||||
|
||||
if (stalenessMs > STALLED_THRESHOLD_MS) return 'stalled'
|
||||
if (stalenessMs > SLOW_THRESHOLD_MS) return 'slow'
|
||||
return 'healthy'
|
||||
}
|
||||
|
|
@ -1,97 +0,0 @@
|
|||
export interface RatioRow {
|
||||
pattern: string
|
||||
chunks_per_mb: number
|
||||
}
|
||||
|
||||
/**
|
||||
* Bytes of on-disk storage one embedded chunk consumes inside Qdrant.
|
||||
*
|
||||
* Rough composition for our pipeline:
|
||||
* - vector: 768 dims × float32 = 3,072 B
|
||||
* - chunk text payload: ~3,000 B (target 1,500 tokens × 2 chars/token)
|
||||
* - source/metadata payload + Qdrant indexes: ~2,000 B
|
||||
*
|
||||
* Used for surfacing pre-ingest disk-cost estimates; the actual figure
|
||||
* varies with collection params and will be replaced by self-calibration
|
||||
* (RFC #883 Phase 4) once we have real measurements.
|
||||
*/
|
||||
export const BYTES_PER_CHUNK_ON_DISK = 8_000
|
||||
|
||||
export interface BatchEstimateInput {
|
||||
filename: string
|
||||
sizeBytes: number
|
||||
}
|
||||
|
||||
export interface BatchEstimate {
|
||||
totalChunks: number
|
||||
totalBytes: number
|
||||
hasUnknown: boolean
|
||||
}
|
||||
|
||||
/**
|
||||
* Aggregate an embedding-disk-cost estimate across a batch of files (curated
|
||||
* tier add, multi-upload, sync preview, etc). `hasUnknown` is true when at
|
||||
* least one file did not match any registry row — the totals only include
|
||||
* matched files, so callers should annotate "estimate excludes unknown files"
|
||||
* when surfacing the figure.
|
||||
*/
|
||||
export function estimateBatch(
|
||||
files: BatchEstimateInput[],
|
||||
rows: RatioRow[]
|
||||
): BatchEstimate {
|
||||
let totalChunks = 0
|
||||
let hasUnknown = false
|
||||
for (const f of files) {
|
||||
const chunks = estimateChunkCount(f.filename, f.sizeBytes, rows)
|
||||
if (chunks === null) {
|
||||
hasUnknown = true
|
||||
} else {
|
||||
totalChunks += chunks
|
||||
}
|
||||
}
|
||||
return {
|
||||
totalChunks,
|
||||
totalBytes: totalChunks * BYTES_PER_CHUNK_ON_DISK,
|
||||
hasUnknown,
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Pick the chunks_per_mb estimate for a filename by longest-prefix match.
|
||||
*
|
||||
* Patterns are filename prefixes (`devdocs_`, `wikipedia_en_simple_`, ...).
|
||||
* The longest matching prefix wins, so a specific entry (`wikipedia_en_simple_`)
|
||||
* overrides the broader fallback (`wikipedia_en_`). An empty-string pattern in
|
||||
* the registry serves as a catch-all that matches every input.
|
||||
*
|
||||
* Returns `null` if no row matches and no empty-string fallback is present —
|
||||
* caller decides whether to surface "unknown" or use its own default.
|
||||
*/
|
||||
export function findChunksPerMb(filename: string, rows: RatioRow[]): number | null {
|
||||
let best: RatioRow | null = null
|
||||
for (const row of rows) {
|
||||
if (!filename.startsWith(row.pattern)) continue
|
||||
if (best === null || row.pattern.length > best.pattern.length) {
|
||||
best = row
|
||||
}
|
||||
}
|
||||
return best === null ? null : best.chunks_per_mb
|
||||
}
|
||||
|
||||
/**
|
||||
* Estimate the number of embedding chunks a ZIM-style file will produce given
|
||||
* its size on disk in bytes. Returns `null` when the registry has nothing to
|
||||
* match against. Caller is responsible for converting the estimate into either
|
||||
* a disk-footprint estimate (chunks × bytes-per-chunk in Qdrant) or a time
|
||||
* estimate (chunks ÷ chunks-per-minute-on-this-hardware).
|
||||
*/
|
||||
export function estimateChunkCount(
|
||||
filename: string,
|
||||
fileSizeBytes: number,
|
||||
rows: RatioRow[]
|
||||
): number | null {
|
||||
const ratio = findChunksPerMb(filename, rows)
|
||||
if (ratio === null) return null
|
||||
const megabytes = fileSizeBytes / (1024 * 1024)
|
||||
return Math.round(ratio * megabytes)
|
||||
}
|
||||
|
|
@ -1,70 +0,0 @@
|
|||
/**
|
||||
* Conditional warnings surfaced on Stored Files rows in the KB panel.
|
||||
* See RFC #883 §6 — these warnings appear ONLY when their triggering condition
|
||||
* is met, never on healthy files, to keep the panel silent in the common case.
|
||||
*
|
||||
* - `zero_chunks` — a non-trivial file produced 0 embedding chunks. Common
|
||||
* cause: video-only or image-only ZIMs that the pipeline
|
||||
* completes "successfully" with no extractable text.
|
||||
* AI Assistant cannot reference this content.
|
||||
* - `partial_stall` — the file has embedded chunks but well below the count
|
||||
* expected from the ratio registry. Likely a mid-batch
|
||||
* stall (which the binary "any chunks ⇒ embedded" check
|
||||
* used to mask). Surfaces a Retry affordance.
|
||||
*/
|
||||
import type { FileWarning } from '../../types/rag.js'
|
||||
|
||||
export type { FileWarning }
|
||||
|
||||
/** Files smaller than this are too small to flag as suspicious zero-chunk
|
||||
* cases — a 5 KB upload that produces 0 chunks is much more likely to be a
|
||||
* legitimate edge case (placeholder file) than the gigabyte-scale video ZIM
|
||||
* problem this warning targets. */
|
||||
export const ZERO_CHUNKS_MIN_SIZE_BYTES = 100 * 1024 * 1024 // 100 MB
|
||||
|
||||
/** Fraction of expected chunks below which we consider a file partially
|
||||
* stalled. 0.5 (50%) matches the threshold described in RFC #883 §6 Warning B. */
|
||||
export const PARTIAL_STALL_RATIO_THRESHOLD = 0.5
|
||||
|
||||
export interface WarningInputs {
|
||||
/** Source file size on disk in bytes. */
|
||||
fileSizeBytes: number
|
||||
/** Distinct chunks present in Qdrant for this source. */
|
||||
chunksInQdrant: number
|
||||
/** Best estimate of chunks the file should produce, from the ratio
|
||||
* registry. `null` when no registry pattern matches and no fallback is
|
||||
* configured — Warning B is suppressed in that case (we'd rather be silent
|
||||
* than wrong). */
|
||||
expectedChunks: number | null
|
||||
}
|
||||
|
||||
export function decideWarnings(inputs: WarningInputs): FileWarning[] {
|
||||
const warnings: FileWarning[] = []
|
||||
|
||||
// Warning A: file is large but produced nothing. Almost always a video-only
|
||||
// or image-only ZIM; AI Assistant literally cannot reference this content.
|
||||
if (
|
||||
inputs.chunksInQdrant === 0 &&
|
||||
inputs.fileSizeBytes > ZERO_CHUNKS_MIN_SIZE_BYTES
|
||||
) {
|
||||
warnings.push({ kind: 'zero_chunks', fileSizeBytes: inputs.fileSizeBytes })
|
||||
}
|
||||
|
||||
// Warning B: chunks present but far below expectation. Suppresses when we
|
||||
// have no expectation (registry miss) since the comparison would be
|
||||
// meaningless and we'd rather under-warn than mislead.
|
||||
if (
|
||||
inputs.expectedChunks !== null &&
|
||||
inputs.expectedChunks > 0 &&
|
||||
inputs.chunksInQdrant > 0 &&
|
||||
inputs.chunksInQdrant < inputs.expectedChunks * PARTIAL_STALL_RATIO_THRESHOLD
|
||||
) {
|
||||
warnings.push({
|
||||
kind: 'partial_stall',
|
||||
chunksEmbedded: inputs.chunksInQdrant,
|
||||
chunksExpected: inputs.expectedChunks,
|
||||
})
|
||||
}
|
||||
|
||||
return warnings
|
||||
}
|
||||
|
|
@ -1,26 +0,0 @@
|
|||
/**
|
||||
* Strip the trailing `_YYYY-MM(-DD).zim` date suffix from a Kiwix-style ZIM
|
||||
* filename so different release dates of the same variant share a stem
|
||||
* (e.g., `wikipedia_en_all_nopic`) while distinct corpora keep distinct stems
|
||||
* (`wikipedia_en_simple_all_nopic`, `wikipedia_en_medicine_nopic`, etc.).
|
||||
*/
|
||||
export function zimFilenameStem(name: string): string {
|
||||
return name.replace(/_\d{4}-\d{2}(?:-\d{2})?\.zim$/i, '')
|
||||
}
|
||||
|
||||
/**
|
||||
* Of the existing files, return only those that are prior-version replacements
|
||||
* of `currentFilename` — same Wikipedia variant stem, different release. Used
|
||||
* by the post-download cleanup to avoid deleting unrelated Wikipedia corpora
|
||||
* the user has installed independently (issue #884).
|
||||
*/
|
||||
export function findReplacedWikipediaFiles(
|
||||
currentFilename: string,
|
||||
existingNames: string[]
|
||||
): string[] {
|
||||
const currentStem = zimFilenameStem(currentFilename)
|
||||
return existingNames.filter(
|
||||
(n) =>
|
||||
n.startsWith('wikipedia_en_') && n !== currentFilename && zimFilenameStem(n) === currentStem
|
||||
)
|
||||
}
|
||||
|
|
@ -1,5 +1,4 @@
|
|||
import vine from '@vinejs/vine'
|
||||
import ipaddr from 'ipaddr.js'
|
||||
|
||||
/**
|
||||
* Checks whether a URL points to a loopback or link-local address.
|
||||
|
|
@ -16,18 +15,15 @@ export function assertNotPrivateUrl(urlString: string): void {
|
|||
const parsed = new URL(urlString)
|
||||
const hostname = parsed.hostname.toLowerCase()
|
||||
|
||||
// `URL.hostname` strips the surrounding brackets from IPv6 literals
|
||||
// (e.g. `http://[::1]/` → hostname `::1`), so IPv6 patterns must match
|
||||
// the unbracketed form.
|
||||
const blockedPatterns = [
|
||||
/^localhost$/,
|
||||
/^127\.\d+\.\d+\.\d+$/,
|
||||
/^0\.0\.0\.0$/,
|
||||
/^169\.254\.\d+\.\d+$/, // Link-local / cloud metadata
|
||||
/^::1$/, // IPv6 loopback
|
||||
/^fe80:/i, // IPv6 link-local
|
||||
/^::ffff:/i, // IPv4-mapped IPv6 (e.g. ::ffff:7f00:1 = 127.0.0.1)
|
||||
/^::$/, // IPv6 all-zeros (equivalent to 0.0.0.0)
|
||||
/^\[::1\]$/,
|
||||
/^\[?fe80:/i, // IPv6 link-local
|
||||
/^\[::ffff:/i, // IPv4-mapped IPv6 (e.g. [::ffff:7f00:1] = 127.0.0.1)
|
||||
/^\[::\]$/, // IPv6 all-zeros (equivalent to 0.0.0.0)
|
||||
]
|
||||
|
||||
if (blockedPatterns.some((re) => re.test(hostname))) {
|
||||
|
|
@ -35,63 +31,6 @@ export function assertNotPrivateUrl(urlString: string): void {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Narrower SSRF guard for "remote service" URLs the user points NOMAD at
|
||||
* (e.g. an OpenAI-compatible endpoint like LM Studio, llama.cpp, vLLM, or a
|
||||
* sibling Ollama container). Unlike `assertNotPrivateUrl`, this intentionally
|
||||
* ALLOWS loopback, link-local-ish, and RFC1918 hosts because the legitimate
|
||||
* target is frequently on the same host or LAN (host.docker.internal,
|
||||
* the docker bridge gateway, or a LAN IP).
|
||||
*
|
||||
* It blocks only:
|
||||
* - the cloud instance-metadata IP (169.254.169.254), to avoid leaking
|
||||
* IAM creds on a misconfigured cloud VM
|
||||
* - non-HTTP schemes (file:, gopher:, etc.)
|
||||
*/
|
||||
// Canonical cloud instance-metadata addresses. AWS, GCP, Azure, DigitalOcean,
|
||||
// Oracle Cloud, and Alibaba all expose IMDS at 169.254.169.254 over IPv4;
|
||||
// AWS additionally exposes it at fd00:ec2::254 over IPv6.
|
||||
// Compared after `ipaddr.toNormalizedString()`, which expands IPv6 to its
|
||||
// fully-zero-padded form (e.g. `fd00:ec2::254` → `fd00:ec2:0:0:0:0:0:254`).
|
||||
const BLOCKED_METADATA_IPV4 = new Set(['169.254.169.254'])
|
||||
const BLOCKED_METADATA_IPV6 = new Set([
|
||||
ipaddr.parse('fd00:ec2::254').toNormalizedString(),
|
||||
])
|
||||
|
||||
export function assertNotCloudMetadataUrl(urlString: string): void {
|
||||
const parsed = new URL(urlString)
|
||||
|
||||
if (parsed.protocol !== 'http:' && parsed.protocol !== 'https:') {
|
||||
throw new Error(`URL must use http or https scheme: ${parsed.protocol}`)
|
||||
}
|
||||
|
||||
// Node's WHATWG URL parser keeps the brackets on IPv6 literals
|
||||
// (`http://[::1]/` → hostname `[::1]`), so strip them before parsing.
|
||||
const hostname = parsed.hostname.toLowerCase().replace(/^\[|\]$/g, '')
|
||||
|
||||
// If the hostname isn't an IP literal it's a DNS name; allow it. (DNS
|
||||
// rebinding is out of scope here — would require resolving and re-checking
|
||||
// at fetch time.)
|
||||
if (!ipaddr.isValid(hostname)) return
|
||||
|
||||
let addr = ipaddr.parse(hostname)
|
||||
|
||||
// Unwrap IPv4-mapped IPv6 (e.g. ::ffff:169.254.169.254, ::ffff:a9fe:a9fe,
|
||||
// and the fully-expanded 0:0:0:0:0:ffff:a9fe:a9fe) so the IPv4 check below
|
||||
// sees the embedded address.
|
||||
if (addr.kind() === 'ipv6' && (addr as ipaddr.IPv6).isIPv4MappedAddress()) {
|
||||
addr = (addr as ipaddr.IPv6).toIPv4Address()
|
||||
}
|
||||
|
||||
const canonical = addr.toNormalizedString()
|
||||
|
||||
const blocked =
|
||||
addr.kind() === 'ipv4' ? BLOCKED_METADATA_IPV4 : BLOCKED_METADATA_IPV6
|
||||
if (blocked.has(canonical)) {
|
||||
throw new Error(`URL must not point to the cloud instance metadata endpoint: ${canonical}`)
|
||||
}
|
||||
}
|
||||
|
||||
export const remoteDownloadValidator = vine.compile(
|
||||
vine.object({
|
||||
url: vine
|
||||
|
|
|
|||
|
|
@ -14,12 +14,6 @@ export const chatSchema = vine.compile(
|
|||
})
|
||||
)
|
||||
|
||||
export const unloadChatModelsSchema = vine.compile(
|
||||
vine.object({
|
||||
targetModel: vine.string().trim().minLength(1).nullable().optional(),
|
||||
})
|
||||
)
|
||||
|
||||
export const getAvailableModelsSchema = vine.compile(
|
||||
vine.object({
|
||||
sort: vine.enum(['pulls', 'name'] as const).optional(),
|
||||
|
|
|
|||
|
|
@ -11,24 +11,3 @@ export const deleteFileSchema = vine.compile(
|
|||
source: vine.string(),
|
||||
})
|
||||
)
|
||||
|
||||
export const embedFileSchema = vine.compile(
|
||||
vine.object({
|
||||
source: vine.string().minLength(1),
|
||||
force: vine.boolean().optional(),
|
||||
})
|
||||
)
|
||||
|
||||
export const estimateBatchSchema = vine.compile(
|
||||
vine.object({
|
||||
files: vine
|
||||
.array(
|
||||
vine.object({
|
||||
filename: vine.string().minLength(1).maxLength(255),
|
||||
sizeBytes: vine.number().min(0),
|
||||
})
|
||||
)
|
||||
.minLength(1)
|
||||
.maxLength(500),
|
||||
})
|
||||
)
|
||||
|
|
|
|||
|
|
@ -1,3 +1,3 @@
|
|||
import { KVStoreKey } from "../types/kv_store.js";
|
||||
|
||||
export const SETTINGS_KEYS: KVStoreKey[] = ['chat.suggestionsEnabled', 'chat.lastModel', 'ui.hasVisitedEasySetup', 'ui.theme', 'system.earlyAccess', 'ai.assistantCustomName', 'ai.remoteOllamaUrl', 'ai.ollamaFlashAttention', 'rag.defaultIngestPolicy'];
|
||||
export const SETTINGS_KEYS: KVStoreKey[] = ['chat.suggestionsEnabled', 'chat.lastModel', 'ui.hasVisitedEasySetup', 'ui.theme', 'system.earlyAccess', 'ai.assistantCustomName', 'ai.remoteOllamaUrl', 'ai.ollamaFlashAttention'];
|
||||
|
|
@ -64,8 +64,6 @@ export const FALLBACK_RECOMMENDED_OLLAMA_MODELS: NomadOllamaModel[] = [
|
|||
|
||||
export const DEFAULT_QUERY_REWRITE_MODEL = 'qwen2.5:3b' // default to qwen2.5 for query rewriting with good balance of text task performance and resource usage
|
||||
|
||||
export const EMBEDDING_MODEL_NAME = 'nomic-embed-text:v1.5'
|
||||
|
||||
/**
|
||||
* Adaptive RAG context limits based on model size.
|
||||
* Smaller models get overwhelmed with too much context, so we cap it.
|
||||
|
|
|
|||
|
|
@ -1,26 +0,0 @@
|
|||
import { BaseSchema } from '@adonisjs/lucid/schema'
|
||||
|
||||
export default class extends BaseSchema {
|
||||
protected tableName = 'kb_ingest_state'
|
||||
|
||||
async up() {
|
||||
this.schema.createTable(this.tableName, (table) => {
|
||||
table.increments('id').primary()
|
||||
// utf8mb4 caps an indexed varchar at 768 chars (3072 byte InnoDB key limit);
|
||||
// 512 leaves headroom and is plenty for any NOMAD-managed file path.
|
||||
table.string('file_path', 512).notNullable().unique()
|
||||
table
|
||||
.enum('state', ['pending_decision', 'indexed', 'browse_only', 'failed', 'stalled'])
|
||||
.notNullable()
|
||||
.defaultTo('pending_decision')
|
||||
table.integer('chunks_embedded').notNullable().defaultTo(0)
|
||||
table.text('last_error').nullable()
|
||||
table.timestamp('created_at').notNullable()
|
||||
table.timestamp('updated_at').notNullable()
|
||||
})
|
||||
}
|
||||
|
||||
async down() {
|
||||
this.schema.dropTable(this.tableName)
|
||||
}
|
||||
}
|
||||
|
|
@ -1,64 +0,0 @@
|
|||
import { BaseSchema } from '@adonisjs/lucid/schema'
|
||||
import { DateTime } from 'luxon'
|
||||
|
||||
const SEED_ROWS: Array<{ pattern: string; chunks_per_mb: number; notes: string }> = [
|
||||
// Dense technical reference — every paragraph carries content
|
||||
{ pattern: 'devdocs_', chunks_per_mb: 1100, notes: 'Heuristic seed: dense API references' },
|
||||
// Encyclopedia prose — Simple English & general Wikipedia variants
|
||||
{
|
||||
pattern: 'wikipedia_en_simple_',
|
||||
chunks_per_mb: 270,
|
||||
notes: 'Heuristic seed: Simple English Wikipedia',
|
||||
},
|
||||
{
|
||||
pattern: 'wikipedia_en_',
|
||||
chunks_per_mb: 270,
|
||||
notes: 'Heuristic seed: general Wikipedia variants',
|
||||
},
|
||||
// Sparse text, image-heavy
|
||||
{ pattern: 'ifixit_', chunks_per_mb: 50, notes: 'Heuristic seed: image-heavy repair guides' },
|
||||
// Q&A pages — moderate density, mostly short answers
|
||||
{
|
||||
pattern: 'cooking.stackexchange.com_',
|
||||
chunks_per_mb: 200,
|
||||
notes: 'Heuristic seed: Stack Exchange Q&A',
|
||||
},
|
||||
// Video-only ZIMs produce zero text chunks. Listing these explicitly keeps
|
||||
// the cost estimator from spinning up "indexing in progress" UI for content
|
||||
// that has no embeddable text whatsoever.
|
||||
{ pattern: 'lrnselfreliance_', chunks_per_mb: 0, notes: 'Heuristic seed: video-only ZIM' },
|
||||
{ pattern: 'ted_', chunks_per_mb: 0, notes: 'Heuristic seed: video-only ZIM' },
|
||||
{ pattern: 'freedom-of-religion_', chunks_per_mb: 0, notes: 'Heuristic seed: video-only ZIM' },
|
||||
// Empty-pattern fallback — every filename startsWith('') is true. The lookup
|
||||
// picks the longest matching pattern, so this only fires for ZIMs that match
|
||||
// none of the above (medium prose density).
|
||||
{ pattern: '', chunks_per_mb: 100, notes: 'Heuristic fallback' },
|
||||
]
|
||||
|
||||
export default class extends BaseSchema {
|
||||
protected tableName = 'kb_ratio_registry'
|
||||
|
||||
async up() {
|
||||
this.schema.createTable(this.tableName, (table) => {
|
||||
table.increments('id').primary()
|
||||
table.string('pattern', 255).notNullable().unique()
|
||||
table.integer('chunks_per_mb').unsigned().notNullable()
|
||||
// 0 = heuristic seed, >0 = number of observed ZIMs that have updated this entry.
|
||||
// Phase 4 self-calibration increments this on each successful ingestion.
|
||||
table.integer('sample_count').notNullable().defaultTo(0)
|
||||
table.text('notes').nullable()
|
||||
table.timestamp('created_at').notNullable()
|
||||
table.timestamp('updated_at').notNullable()
|
||||
})
|
||||
|
||||
const now = DateTime.utc().toSQL({ includeOffset: false }) as string
|
||||
const rows = SEED_ROWS.map((row) => ({ ...row, created_at: now, updated_at: now }))
|
||||
this.defer(async (db) => {
|
||||
await db.table(this.tableName).multiInsert(rows)
|
||||
})
|
||||
}
|
||||
|
||||
async down() {
|
||||
this.schema.dropTable(this.tableName)
|
||||
}
|
||||
}
|
||||
|
|
@ -114,18 +114,6 @@ The Maps feature requires downloaded map data. If you see a blank area:
|
|||
3. Wait for downloads to complete
|
||||
4. Return to Maps and refresh
|
||||
|
||||
### ERROR: Failed to load the XML library file '/data/kiwix-library.xml'
|
||||
|
||||
This usually means the Information Library service started before its Kiwix library index was fully initialized.
|
||||
|
||||
Try this recovery flow:
|
||||
1. Go to **[Apps](/settings/apps)**
|
||||
2. Stop **Information Library (Kiwix)**
|
||||
3. Wait 10-15 seconds, then start it again
|
||||
4. If the error persists, run **Force Reinstall** for Information Library from the same page
|
||||
|
||||
After restart/reinstall completes, refresh the Information Library page.
|
||||
|
||||
### AI responses are slow
|
||||
|
||||
Local AI requires significant computing power. To improve speed:
|
||||
|
|
|
|||
|
|
@ -1,92 +1,32 @@
|
|||
# Release Notes
|
||||
|
||||
## Version 1.31.1 - April 21, 2026
|
||||
## Unreleased
|
||||
|
||||
### Features
|
||||
- feat(content): custom ZIM library sources with pre-seeded mirrors (#593). Thanks @chriscrosstalk!
|
||||
- feat(content-manager): add sortable file size column (#698). Thanks @chriscrosstalk!
|
||||
- feat(ai-chat): allow cancelling in-progress model downloads (#701). Thanks @chriscrosstalk!
|
||||
- feat(content-updates): show size, surface downloads in Active Downloads (#773). Thanks @chriscrosstalk!
|
||||
- feat(maps): regional map downloads via go-pmtiles extract (#780). Thanks @bgauger!
|
||||
- feat(maps): show map coordinates on mouse move (#786). Thanks @kennethbrewer3!
|
||||
- feat(AI): re-enable AMD GPU acceleration for Ollama via ROCm + HSA override (#804). Thanks @chriscrosstalk!
|
||||
- feat(GPU): auto-remediate nomad_ollama passthrough loss on admin boot (#878). Thanks @chriscrosstalk!
|
||||
- feat(KB): per-file ingest state machine (Phase 1 of RFC #883) (#888). Thanks @chriscrosstalk!
|
||||
- feat(KB): ratio registry for disk + time estimates (Phase 1B of RFC #883) (#891). Thanks @chriscrosstalk!
|
||||
- feat(KB): group admin docs into single row in Stored Files (§9) (#892). Thanks @chriscrosstalk!
|
||||
- feat(KB): status pill + last-activity on Processing Queue (§5/§10) (#893). Thanks @chriscrosstalk!
|
||||
- feat(KB): Always/Manual ingest policy toggle (§1/§4) (#894). Thanks @chriscrosstalk!
|
||||
- feat(KB): conditional warnings A + B on Stored Files (§6) (#895). Thanks @chriscrosstalk!
|
||||
- feat(KB): surface embedding-disk estimate in curated tier-change modal (§1) (#897). Thanks @chriscrosstalk!
|
||||
- feat(KB): first-chat JIT prompt for ingest policy (Phase 3 task 12) (#899). Thanks @chriscrosstalk!
|
||||
- feat(KB): wizard AI policy step (Phase 3 task 13) (#900). Thanks @chriscrosstalk!
|
||||
- feat(KB): guardrail modal at 50GB / 10%-free thresholds (§7) (#901). Thanks @chriscrosstalk!
|
||||
- feat(easy-setup): split AI into its own conditional step (#908). Thanks @chriscrosstalk!
|
||||
- feat(KB): per-file ingest action + state indicator on Stored Files (§5) (#909). Thanks @chriscrosstalk!
|
||||
- feat(chat): confirm-on-switch + one-chat-model-at-a-time enforcement (#916). Thanks @chriscrosstalk!
|
||||
- **AI Assistant**: Added improved support for AMD GPU acceleration for Ollama via ROCm + HSA override. Thanks @chriscrosstalk for the contribution!
|
||||
- **Content Explorer**: Added support for custom ZIM library sources and pre-seeded ZIM library mirrors in addition to the default Kiwix library. Thanks @chriscrosstalk for the contribution!
|
||||
- **Content Manager**: Content update sizes and downloads are now properly displayed in Active Downloads with progress bars and friendly names. Thanks @chriscrosstalk for the contribution!
|
||||
- **Maps**: Map regions can now be extracted and downloaded locally from PMTiles to avoid the need for a full global map download for users who only want specific regions. Thanks @bgauger for the contribution!
|
||||
|
||||
### Bug Fixes
|
||||
- fix(downloads): stage downloads to .tmp to prevent Kiwix loading partial files (#448). Thanks @artbird309!
|
||||
- fix(security): close remaining security audit items 3 & 4 (CWE-918, CWE-209) (#552). Thanks @LuisMIguelFurlanettoSousa!
|
||||
- fix(ai-chat): add null check to model name (#645). Thanks @hestela!
|
||||
- fix(ai-chat): qwen2.5 loading on every chat message (#649). Thanks @hestela!
|
||||
- fix(disk-collector): fix storage reporting for NFS mounts (#686). Thanks @bgauger!
|
||||
- fix(rag): add start button in kb modal and ensure restart policy exists (#700). Thanks @hestela!
|
||||
- fix(admin): only hide global map banner after download (#702). Thanks @Gujiassh!
|
||||
- fix(maps): wire delete confirmation to API (#732). Thanks @cuyua9!
|
||||
- fix: prevent ZIM corrupt file crash and deduplicate Ollama download logs (#741). Thanks @jakeaturner!
|
||||
- fix(ai): stop local nomad_ollama when remote Ollama is configured (#744). Thanks @chriscrosstalk!
|
||||
- fix(rag): repair ZIM embedding pipeline (sync filter, batch gate, DOM walk) (#745). Thanks @chriscrosstalk!
|
||||
- fix(zim): accumulate across Kiwix pages to prevent empty Content Explorer (#746). Thanks @chriscrosstalk!
|
||||
- fix(qdrant): disable anonymous telemetry by default (#747). Thanks @chriscrosstalk!
|
||||
- fix(disk-display): gate NAS Storage label on network filesystem type (#749). Thanks @bgauger!
|
||||
- fix(docker): write /app/version.json from VERSION build-arg (#754). Thanks @chriscrosstalk!
|
||||
- fix(rag): pass num_ctx and truncate to Ollama embed call (#763). Thanks @chriscrosstalk!
|
||||
- fix(api): accept notes, marker_type, and position on markers endpoints (#770). Thanks @jrsphoto!
|
||||
- fix(install): warn loudly on non-x86_64 architectures before pulling images (#797). Thanks @chriscrosstalk!
|
||||
- fix(stream): skip compression for Server-Sent Events (#798). Thanks @chriscrosstalk!
|
||||
- fix(maps): Country Picker UX polish + auto-refresh stored files (#817). Thanks @chriscrosstalk!
|
||||
- fix(System): self-heal stale updateAvailable flag after sidecar-driven update (#825). Thanks @jakeaturner!
|
||||
- fix(settings/update): four UI/UX fixes for the System Update page (#827). Thanks @chriscrosstalk!
|
||||
- fix(Maps): send filename instead of full path to delete endpoint (#829). Thanks @bgauger!
|
||||
- fix(Maps): render notes in marker popup when populated (#830). Thanks @chriscrosstalk!
|
||||
- fix(AI): vendor-aware AMD HSA override + benchmark discrete-GPU detection (#832). Thanks @chriscrosstalk!
|
||||
- fix(System): correct NVIDIA VRAM in Graphics card (#850). Thanks @bgauger!
|
||||
- fix(Downloads): treat missing Content-Type as octet-stream (#859). Thanks @bgauger!
|
||||
- fix(AI): preserve semver tag in DB on AMD Ollama updates (#868). Thanks @chriscrosstalk!
|
||||
- fix(AI): rewrite RAG query on first chat follow-up (#869). Thanks @chriscrosstalk!
|
||||
- fix(RAG): unbreak multi-batch ZIM ingestion (jobId dedupe) (#872). Thanks @chriscrosstalk!
|
||||
- fix(RAG): pace continuation batches when embedding is CPU-only (#873). Thanks @chriscrosstalk!
|
||||
- fix(queue): singleton QueueService to stop ioredis connection leak (#877). Thanks @chriscrosstalk!
|
||||
- fix(System): correct AMD VRAM in Graphics card + harden log probe (#879). Thanks @chriscrosstalk!
|
||||
- fix(RAG): report ZIM ingestion progress in overall-file frame (#880). Thanks @chriscrosstalk!
|
||||
- fix(KB): add re-embed and reset & rebuild options to fix broken embeddings (#886). Thanks @jakeaturner!
|
||||
- fix(ZIM): preserve co-existing Wikipedia corpora on cleanup (#887). Thanks @chriscrosstalk!
|
||||
- fix(RAG): anchor continuation-batch initial progress to overall-file frame (#889). Thanks @chriscrosstalk!
|
||||
- fix(AI): pre-cap embed input + log fallback reason (#890). Thanks @chriscrosstalk!
|
||||
- fix(KB): remove redundant Refresh button from Processing Queue (#896). Thanks @chriscrosstalk!
|
||||
- fix(KB): union Stored Files list with state-machine file paths (#898). Thanks @chriscrosstalk!
|
||||
- fix(KB): blank-screen on panel open + tooltips on bulk-action buttons (#907). Thanks @chriscrosstalk!
|
||||
- fix(KB): TierSelectionModal hook order + register IconLibrary (#917). Thanks @chriscrosstalk!
|
||||
- fix(content): show selected tier on cards while downloads are in flight (#918). Thanks @chriscrosstalk!
|
||||
- fix(KB): respect Manual ingest policy on post-download dispatch (#919). Thanks @chriscrosstalk!
|
||||
- fix(AI): improve remote Ollama url validation to prevent SSRF vuln (#920). Thanks @jakeaturner!
|
||||
- fix(models): correct inverted belongsTo keys on ChatMessage.session (#921). Thanks @jakeaturner!
|
||||
- **API**: Compression is now skipped for Server-Sent Events (SSE) responses to prevent issues with streaming endpoints. Thanks @chriscrosstalk for the fix!
|
||||
- **Maps**: Fixed logic issues with the global map banner display. Thanks @Gujiassh for the fix!
|
||||
- **Maps**: The selected map file is now properly deleted after confirming the action in the UI. Thanks @cuyua9 for the fix!
|
||||
- **System**: Fixed an issue where the a pending update could still be indicated in the UI even after the system was updated successfully. Thanks @jakeaturner for the fix!
|
||||
|
||||
### Improvements
|
||||
- docs: add Community Add-Ons page with field manuals + W3Schools packs (#753). Thanks @chriscrosstalk!
|
||||
- docs: add map marker API reference (#783). Thanks @kennethbrewer3!
|
||||
- docs: require linked issue for non-trivial PRs (#799). Thanks @chriscrosstalk!
|
||||
- docs(map): updated notes on the map pin api (#803). Thanks @kennethbrewer3!
|
||||
- docs: link to new WSL2 install guide from README and FAQ (#811). Thanks @chriscrosstalk!
|
||||
- build(deps): bump picomatch in /admin (#544). Thanks @dependabot[bot]!
|
||||
- build(deps): bump lodash from 4.17.23 to 4.18.1 in /admin (#643). Thanks @dependabot[bot]!
|
||||
- build(deps-dev): bump vite from 6.4.1 to 6.4.2 in /admin (#677). Thanks @dependabot[bot]!
|
||||
- build(deps): bump axios from 1.13.5 to 1.15.0 in /admin (#708). Thanks @dependabot[bot]!
|
||||
- build(deps): bump @adonisjs/http-server from 7.8.0 to 7.8.1 in /admin (#724). Thanks @dependabot[bot]!
|
||||
- build(deps): bump follow-redirects from 1.15.11 to 1.16.0 in /admin (#729). Thanks @dependabot[bot]!
|
||||
- build(deps): bump protocol-buffers-schema from 3.6.0 to 3.6.1 in /admin (#736). Thanks @dependabot[bot]!
|
||||
- build(deps): bump protobufjs from 7.5.4 to 7.5.5 in /admin (#737). Thanks @dependabot[bot]!
|
||||
- **Build**: The Command Center image now uses the VERSION build arg to write `app/version.json` with the current version for improved version tracking and debugging, even in RC environments. Thanks @chriscrosstalk for the contribution!
|
||||
- **Content Manager**: Added a sortable file size column to the ZIM files table in the Content Manager for easier management of storage space. Thanks @chriscrosstalk for the contribution!
|
||||
- **Dependencies**: All package.json dependencies have been pinned to specific versions to ensure stability and reduce the risk of unexpected breaking changes/supply-chain compromises from upstream packages. Thanks @jakeaturner for the contribution!
|
||||
- **Dependencies**: Updated various dependencies to close security vulnerabilities and improve stability
|
||||
- **Docs**: Update CONTIRBUTING.md to require an issue to be opened before submitting a PR for non-trivial changes to ensure proper discussion and review of proposed changes. Thanks @chriscrosstalk for the contribution!
|
||||
- **Docs**: Added the map markers endpoints to the API reference documentation. Thanks @kennethbrewer3 for the contribution!
|
||||
- **Docs**: Added a link to the new WSL2 install guide in the README and FAQ. Thanks @chriscrosstalk for the contribution!
|
||||
- **Install**: The install script now warns loudly if the user is attempting to install on a non-x86_64/amd64 platform to prevent unsupported installations and potential issues. Thanks @chriscrosstalk for the contribution!
|
||||
- **Maps**: The maps API endpoints now properly accept and validate notes, marker_type, and position data for map markers and persist them in the database for retrieval in the UI. Thanks @jrsphoto for the contribution!
|
||||
- **Maps**: The current coordinates of the mouse pointer can now be displayed in the map viewer for easier navigation and exploration. Thanks @kennethbrewer3 for the contribution!
|
||||
- **RAG**: NOMAD now properly passed `num_ctx` and truncation to the Ollama embedding endpoint to ensure that the context window of the model is best utilized for embeddings. Thanks @chriscrosstalk for the contribution!
|
||||
- **RAG**: Added a manual start button for Qdrant and a self-healing mechanism for Qdrant's restart-policy to ensure that the vector database is running properly for embedding and retrieval tasks. Thanks @hestela for the contribution!
|
||||
|
||||
## Version 1.31.1 - April 21, 2026
|
||||
|
||||
|
|
|
|||
|
|
@ -1,12 +1,6 @@
|
|||
import { useEffect, useState } from 'react'
|
||||
import useEmbedJobs from '~/hooks/useEmbedJobs'
|
||||
import HorizontalBarChart from './HorizontalBarChart'
|
||||
import StyledSectionHeader from './StyledSectionHeader'
|
||||
import {
|
||||
JOB_HEALTH_DISPLAY,
|
||||
computeJobHealth,
|
||||
formatTimeAgo,
|
||||
} from '~/lib/kb_job_health_display'
|
||||
|
||||
interface ActiveEmbedJobsProps {
|
||||
withHeader?: boolean
|
||||
|
|
@ -15,70 +9,31 @@ interface ActiveEmbedJobsProps {
|
|||
const ActiveEmbedJobs = ({ withHeader = false }: ActiveEmbedJobsProps) => {
|
||||
const { data: jobs } = useEmbedJobs()
|
||||
|
||||
// Re-render every 5s to keep per-job "last activity Xs ago" timestamps fresh.
|
||||
const [tick, setTick] = useState(() => Date.now())
|
||||
useEffect(() => {
|
||||
const id = setInterval(() => setTick(Date.now()), 5000)
|
||||
return () => clearInterval(id)
|
||||
}, [])
|
||||
|
||||
return (
|
||||
<>
|
||||
{withHeader && (
|
||||
<StyledSectionHeader title="Processing Queue" className="mt-12 mb-4" />
|
||||
)}
|
||||
|
||||
<div className="space-y-4">
|
||||
{jobs && jobs.length > 0 ? (
|
||||
jobs.map((job) => {
|
||||
const health = computeJobHealth({
|
||||
status: job.status,
|
||||
progress: job.progress,
|
||||
lastBatchAt: job.lastBatchAt,
|
||||
startedAt: job.startedAt,
|
||||
now: tick,
|
||||
})
|
||||
const display = JOB_HEALTH_DISPLAY[health]
|
||||
const lastActivityMs = job.lastBatchAt ?? job.startedAt
|
||||
return (
|
||||
<div
|
||||
key={job.jobId}
|
||||
className="bg-desert-white rounded-lg p-4 border border-desert-stone-light shadow-sm hover:shadow-lg transition-shadow"
|
||||
>
|
||||
<div className="flex items-center gap-3 mb-2">
|
||||
<span
|
||||
className={`inline-block w-2.5 h-2.5 rounded-full ${display.dot}`}
|
||||
aria-label={display.ariaLabel}
|
||||
title={display.ariaLabel}
|
||||
/>
|
||||
<span className="text-sm font-medium text-text-primary">
|
||||
{display.label}
|
||||
</span>
|
||||
{lastActivityMs !== undefined && (
|
||||
<span className="text-xs text-text-muted">
|
||||
· last activity {formatTimeAgo(lastActivityMs, tick)}
|
||||
</span>
|
||||
)}
|
||||
{typeof job.chunks === 'number' && job.chunks > 0 && (
|
||||
<span className="text-xs text-text-muted">
|
||||
· {job.chunks.toLocaleString()} chunks
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
<HorizontalBarChart
|
||||
items={[
|
||||
{
|
||||
label: job.fileName,
|
||||
value: job.progress,
|
||||
total: '100%',
|
||||
used: `${job.progress}%`,
|
||||
type: job.status,
|
||||
},
|
||||
]}
|
||||
/>
|
||||
</div>
|
||||
)
|
||||
})
|
||||
jobs.map((job) => (
|
||||
<div
|
||||
key={job.jobId}
|
||||
className="bg-desert-white rounded-lg p-4 border border-desert-stone-light shadow-sm hover:shadow-lg transition-shadow"
|
||||
>
|
||||
<HorizontalBarChart
|
||||
items={[
|
||||
{
|
||||
label: job.fileName,
|
||||
value: job.progress,
|
||||
total: '100%',
|
||||
used: `${job.progress}%`,
|
||||
type: job.status,
|
||||
},
|
||||
]}
|
||||
/>
|
||||
</div>
|
||||
))
|
||||
) : (
|
||||
<p className="text-text-muted">No files are currently being processed</p>
|
||||
)}
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@ import { formatBytes } from '~/lib/util'
|
|||
import DynamicIcon, { DynamicIconName } from './DynamicIcon'
|
||||
import type { CategoryWithStatus, SpecTier } from '../../types/collections'
|
||||
import classNames from 'classnames'
|
||||
import { IconChevronRight, IconCircleCheck, IconLoader2 } from '@tabler/icons-react'
|
||||
import { IconChevronRight, IconCircleCheck } from '@tabler/icons-react'
|
||||
|
||||
export interface CategoryCardProps {
|
||||
category: CategoryWithStatus
|
||||
|
|
@ -29,34 +29,14 @@ const CategoryCard: React.FC<CategoryCardProps> = ({ category, selectedTier, onC
|
|||
const minSize = getTierTotalSize(category.tiers[0], category.tiers)
|
||||
const maxSize = getTierTotalSize(category.tiers[category.tiers.length - 1], category.tiers)
|
||||
|
||||
// Priority order for the prominent corner badge + lime border:
|
||||
// 1. selectedTier — in-session wizard pick (highest priority, reflects
|
||||
// what the user is editing right now)
|
||||
// 2. downloadingTierSlug — backend-derived from in-flight downloads, so
|
||||
// the card shows the user's intent immediately after Submit, before
|
||||
// any single file has finished downloading
|
||||
// 3. installedTierSlug — fully on disk
|
||||
const downloadingTier = !selectedTier && category.downloadingTierSlug
|
||||
? category.tiers.find((t) => t.slug === category.downloadingTierSlug)
|
||||
: null
|
||||
const installedTier = !selectedTier && !downloadingTier && category.installedTierSlug
|
||||
? category.tiers.find((t) => t.slug === category.installedTierSlug)
|
||||
: null
|
||||
const badgeTier = selectedTier || downloadingTier || installedTier
|
||||
const badgeStatus: 'selected' | 'downloading' | 'installed' | null = selectedTier
|
||||
? 'selected'
|
||||
: downloadingTier
|
||||
? 'downloading'
|
||||
: installedTier
|
||||
? 'installed'
|
||||
: null
|
||||
const highlightedTierSlug = badgeTier?.slug
|
||||
// Determine which tier to highlight: selectedTier (wizard) > installedTierSlug (persisted)
|
||||
const highlightedTierSlug = selectedTier?.slug || category.installedTierSlug
|
||||
|
||||
return (
|
||||
<div
|
||||
className={classNames(
|
||||
'flex flex-col bg-desert-green rounded-lg p-6 text-white border shadow-sm hover:shadow-lg transition-shadow cursor-pointer h-80',
|
||||
badgeTier ? 'border-lime-400 border-2' : 'border-desert-green'
|
||||
selectedTier ? 'border-lime-400 border-2' : 'border-desert-green'
|
||||
)}
|
||||
onClick={() => onClick?.(category)}
|
||||
>
|
||||
|
|
@ -66,17 +46,10 @@ const CategoryCard: React.FC<CategoryCardProps> = ({ category, selectedTier, onC
|
|||
<DynamicIcon icon={category.icon as DynamicIconName} className="w-6 h-6 mr-2" />
|
||||
<h3 className="text-lg font-semibold">{category.name}</h3>
|
||||
</div>
|
||||
{badgeTier ? (
|
||||
{selectedTier ? (
|
||||
<div className="flex items-center">
|
||||
{badgeStatus === 'downloading' ? (
|
||||
<IconLoader2 className="w-5 h-5 text-lime-400 animate-spin" />
|
||||
) : (
|
||||
<IconCircleCheck className="w-5 h-5 text-lime-400" />
|
||||
)}
|
||||
<span className="text-lime-400 text-sm ml-1">
|
||||
{badgeTier.name}
|
||||
{badgeStatus === 'downloading' && ' (downloading)'}
|
||||
</span>
|
||||
<IconCircleCheck className="w-5 h-5 text-lime-400" />
|
||||
<span className="text-lime-400 text-sm ml-1">{selectedTier.name}</span>
|
||||
</div>
|
||||
) : (
|
||||
<IconChevronRight className="w-5 h-5 text-white opacity-70" />
|
||||
|
|
|
|||
|
|
@ -1,109 +0,0 @@
|
|||
import { Fragment } from 'react'
|
||||
import { Dialog, Transition } from '@headlessui/react'
|
||||
import { IconAlertTriangle, IconX } from '@tabler/icons-react'
|
||||
import { formatBytes } from '~/lib/util'
|
||||
import StyledButton from './StyledButton'
|
||||
import type { GuardrailVerdict } from '~/lib/kb_guardrail'
|
||||
|
||||
/**
|
||||
* One-time confirmation modal for bulk indexing actions that trip the
|
||||
* disk-usage thresholds in `lib/kb_guardrail.ts`. The caller (e.g.
|
||||
* TierSelectionModal) decides whether to show the modal by evaluating the
|
||||
* guardrail BEFORE submit; this component just presents the verdict and
|
||||
* passes the user's choice back via `onConfirm` / `onCancel`.
|
||||
*/
|
||||
interface KbGuardrailModalProps {
|
||||
isOpen: boolean
|
||||
verdict: GuardrailVerdict
|
||||
onConfirm: () => void
|
||||
onCancel: () => void
|
||||
}
|
||||
|
||||
export default function KbGuardrailModal({
|
||||
isOpen,
|
||||
verdict,
|
||||
onConfirm,
|
||||
onCancel,
|
||||
}: KbGuardrailModalProps) {
|
||||
// The primary number to surface — every triggered reason carries the same
|
||||
// estimateBytes, so just grab the first one. `0` is a defensive fallback
|
||||
// for the (impossible-by-construction) "open with empty verdict" case.
|
||||
const estimateBytes = verdict.reasons[0]?.estimateBytes ?? 0
|
||||
const freeReason = verdict.reasons.find((r) => r.kind === 'over_free_disk')
|
||||
|
||||
return (
|
||||
<Transition appear show={isOpen} as={Fragment}>
|
||||
<Dialog as="div" className="relative z-[60]" onClose={onCancel}>
|
||||
<Transition.Child
|
||||
as={Fragment}
|
||||
enter="ease-out duration-200"
|
||||
enterFrom="opacity-0"
|
||||
enterTo="opacity-100"
|
||||
leave="ease-in duration-150"
|
||||
leaveFrom="opacity-100"
|
||||
leaveTo="opacity-0"
|
||||
>
|
||||
<div className="fixed inset-0 bg-black/50" />
|
||||
</Transition.Child>
|
||||
|
||||
<div className="fixed inset-0 overflow-y-auto">
|
||||
<div className="flex min-h-full items-center justify-center p-4">
|
||||
<Transition.Child
|
||||
as={Fragment}
|
||||
enter="ease-out duration-200"
|
||||
enterFrom="opacity-0 scale-95"
|
||||
enterTo="opacity-100 scale-100"
|
||||
leave="ease-in duration-150"
|
||||
leaveFrom="opacity-100 scale-100"
|
||||
leaveTo="opacity-0 scale-95"
|
||||
>
|
||||
<Dialog.Panel className="w-full max-w-lg transform overflow-hidden rounded-lg bg-surface-primary shadow-xl transition-all">
|
||||
<div className="bg-amber-50 dark:bg-amber-950/30 px-6 py-4 border-b border-amber-200 dark:border-amber-800 flex items-start justify-between gap-3">
|
||||
<div className="flex items-start gap-3">
|
||||
<IconAlertTriangle className="h-6 w-6 text-amber-600 dark:text-amber-300 flex-shrink-0 mt-0.5" />
|
||||
<Dialog.Title className="text-lg font-semibold text-text-primary">
|
||||
Confirm large AI indexing operation
|
||||
</Dialog.Title>
|
||||
</div>
|
||||
<button
|
||||
onClick={onCancel}
|
||||
className="text-text-muted hover:text-text-primary transition-colors flex-shrink-0"
|
||||
aria-label="Cancel"
|
||||
>
|
||||
<IconX size={20} />
|
||||
</button>
|
||||
</div>
|
||||
|
||||
<div className="px-6 py-5 space-y-3">
|
||||
<p className="text-text-primary text-sm">
|
||||
Indexing this batch for the AI Assistant will use approximately{' '}
|
||||
<strong>{formatBytes(estimateBytes, 1)}</strong> of disk space for embeddings, on top of the raw downloads.
|
||||
</p>
|
||||
|
||||
{freeReason && (
|
||||
<p className="text-text-secondary text-sm">
|
||||
That's more than 10% of your remaining free disk space ({formatBytes(freeReason.freeBytes, 1)} free). Embedding can take several hours and is hard to interrupt cleanly once started.
|
||||
</p>
|
||||
)}
|
||||
|
||||
<p className="text-text-secondary text-sm">
|
||||
If you'd rather review per-item before indexing, cancel here and switch your Auto-index setting to <strong>Manual</strong> from the Knowledge Base panel.
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<div className="bg-surface-secondary px-6 py-4 flex justify-end gap-3">
|
||||
<StyledButton variant="outline" size="md" onClick={onCancel}>
|
||||
Cancel
|
||||
</StyledButton>
|
||||
<StyledButton variant="primary" size="md" onClick={onConfirm}>
|
||||
Proceed anyway
|
||||
</StyledButton>
|
||||
</div>
|
||||
</Dialog.Panel>
|
||||
</Transition.Child>
|
||||
</div>
|
||||
</div>
|
||||
</Dialog>
|
||||
</Transition>
|
||||
)
|
||||
}
|
||||
|
|
@ -1,28 +1,12 @@
|
|||
import { Fragment, useState, useEffect, useMemo } from 'react'
|
||||
import { Fragment, useState, useEffect } from 'react'
|
||||
import { Dialog, Transition } from '@headlessui/react'
|
||||
import { IconX, IconCheck, IconInfoCircle } from '@tabler/icons-react'
|
||||
import { useQuery } from '@tanstack/react-query'
|
||||
import type { CategoryWithStatus, SpecTier, SpecResource } from '../../types/collections'
|
||||
import { resolveTierResources } from '~/lib/collections'
|
||||
import { formatBytes } from '~/lib/util'
|
||||
import api from '~/lib/api'
|
||||
import classNames from 'classnames'
|
||||
import DynamicIcon, { DynamicIconName } from './DynamicIcon'
|
||||
import StyledButton from './StyledButton'
|
||||
import KbGuardrailModal from './KbGuardrailModal'
|
||||
import { evaluateGuardrail, type GuardrailVerdict } from '~/lib/kb_guardrail'
|
||||
import { useSystemInfo } from '~/hooks/useSystemInfo'
|
||||
import { getPrimaryDiskInfo } from '~/hooks/useDiskDisplayData'
|
||||
|
||||
/**
|
||||
* Filename for the embed-estimate registry lookup. Strips the URL path so
|
||||
* patterns like `wikipedia_en_simple_` continue to match upstream filenames
|
||||
* regardless of mirror domain.
|
||||
*/
|
||||
function resourceFilename(resource: SpecResource): string {
|
||||
const last = resource.url.split('/').pop()
|
||||
return last && last.length > 0 ? last : resource.id
|
||||
}
|
||||
|
||||
interface TierSelectionModalProps {
|
||||
isOpen: boolean
|
||||
|
|
@ -49,70 +33,13 @@ const TierSelectionModal: React.FC<TierSelectionModalProps> = ({
|
|||
}
|
||||
}, [isOpen, category, selectedTierSlug])
|
||||
|
||||
// Get all resources for a tier (including inherited resources). Defined as a
|
||||
// hook-safe closure (always callable, returns [] when no category) so the
|
||||
// memo below can depend on `category` without breaking hook order.
|
||||
if (!category) return null
|
||||
|
||||
// Get all resources for a tier (including inherited resources)
|
||||
const getAllResourcesForTier = (tier: SpecTier): SpecResource[] => {
|
||||
if (!category) return []
|
||||
return resolveTierResources(tier, category.tiers)
|
||||
}
|
||||
|
||||
// Pre-compute the selected tier's resources outside the JSX so hooks below
|
||||
// don't re-run on every render. Empty array when no selection.
|
||||
const selectedTierResources = useMemo<SpecResource[]>(() => {
|
||||
if (!category || !localSelectedSlug) return []
|
||||
const tier = category.tiers.find((t) => t.slug === localSelectedSlug)
|
||||
return tier ? resolveTierResources(tier, category.tiers) : []
|
||||
}, [category, localSelectedSlug])
|
||||
|
||||
const embedEstimateRequest = useMemo(
|
||||
() =>
|
||||
selectedTierResources.map((r) => ({
|
||||
filename: resourceFilename(r),
|
||||
sizeBytes: Math.round(r.size_mb * 1024 * 1024),
|
||||
})),
|
||||
[selectedTierResources]
|
||||
)
|
||||
|
||||
const { data: embedEstimate, isLoading: isEstimating } = useQuery({
|
||||
queryKey: ['embedEstimateBatch', embedEstimateRequest],
|
||||
queryFn: () => api.estimateEmbeddingBatch(embedEstimateRequest),
|
||||
enabled: embedEstimateRequest.length > 0,
|
||||
staleTime: 5 * 60_000,
|
||||
})
|
||||
|
||||
const { data: ingestPolicySetting } = useQuery({
|
||||
queryKey: ['ingestPolicy'],
|
||||
queryFn: () => api.getSetting('rag.defaultIngestPolicy'),
|
||||
})
|
||||
|
||||
// System info for the disk-free side of the guardrail. Shared queryKey with
|
||||
// the home / easy-setup pages so we don't refetch when the user already has
|
||||
// a fresh copy in cache from a sibling component.
|
||||
const { data: systemInfo } = useSystemInfo({ enabled: true })
|
||||
|
||||
// Open state for the guardrail modal — separate from the tier modal so the
|
||||
// user sees the warning as an overlay without losing their tier selection
|
||||
// underneath. Cancel returns to the tier modal as-is; Proceed closes both
|
||||
// and runs the original onSelectTier path.
|
||||
const [guardrailVerdict, setGuardrailVerdict] = useState<GuardrailVerdict | null>(null)
|
||||
|
||||
// Compute disk-free bytes from system info; 0 means "unknown", which the
|
||||
// guardrail helper treats as "skip the relative-disk check".
|
||||
// Must be declared before the `!category` early return so the hook count
|
||||
// stays constant across renders (category transitions null → non-null when
|
||||
// the user opens the modal).
|
||||
const freeBytes = useMemo<number>(() => {
|
||||
const primary = getPrimaryDiskInfo(systemInfo?.disk, systemInfo?.fsSize)
|
||||
if (!primary) return 0
|
||||
return Math.max(0, primary.totalSize - primary.totalUsed)
|
||||
}, [systemInfo])
|
||||
|
||||
const ingestPolicy: 'Always' | 'Manual' =
|
||||
ingestPolicySetting?.value === 'Manual' ? 'Manual' : 'Always'
|
||||
|
||||
if (!category) return null
|
||||
|
||||
const getTierTotalSize = (tier: SpecTier): number => {
|
||||
return getAllResourcesForTier(tier).reduce((acc, r) => acc + r.size_mb * 1024 * 1024, 0)
|
||||
}
|
||||
|
|
@ -126,43 +53,17 @@ const TierSelectionModal: React.FC<TierSelectionModalProps> = ({
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Runs the original onSelectTier-then-onClose flow. Pulled out of
|
||||
* handleSubmit so the guardrail modal's confirm path can call it after
|
||||
* the user has consented to the large operation.
|
||||
*/
|
||||
const finalizeSubmit = () => {
|
||||
if (!localSelectedSlug || !category) return
|
||||
const selectedTier = category.tiers.find((t) => t.slug === localSelectedSlug)
|
||||
const handleSubmit = () => {
|
||||
if (!localSelectedSlug) return
|
||||
|
||||
const selectedTier = category.tiers.find(t => t.slug === localSelectedSlug)
|
||||
if (selectedTier) {
|
||||
onSelectTier(category, selectedTier)
|
||||
}
|
||||
onClose()
|
||||
}
|
||||
|
||||
const handleSubmit = () => {
|
||||
if (!localSelectedSlug || !category) return
|
||||
|
||||
// Guardrail only runs when we have an estimate AND the global policy
|
||||
// would auto-index this batch. Under Manual the user has already opted
|
||||
// out of automatic ingestion, so the bulk-disk warning would be a false
|
||||
// alarm — the files would just queue as pending_decision.
|
||||
if (ingestPolicy === 'Always' && embedEstimate) {
|
||||
const verdict = evaluateGuardrail({
|
||||
estimateBytes: embedEstimate.totalBytes,
|
||||
freeBytes,
|
||||
})
|
||||
if (verdict.trips) {
|
||||
setGuardrailVerdict(verdict)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
finalizeSubmit()
|
||||
}
|
||||
|
||||
return (
|
||||
<>
|
||||
<Transition appear show={isOpen} as={Fragment}>
|
||||
<Dialog as="div" className="relative z-50" onClose={onClose}>
|
||||
<Transition.Child
|
||||
|
|
@ -302,41 +203,8 @@ const TierSelectionModal: React.FC<TierSelectionModalProps> = ({
|
|||
})}
|
||||
</div>
|
||||
|
||||
{/* Embedding-cost preview — visible whenever a tier is
|
||||
selected. The estimate uses #891's ratio registry to
|
||||
project how much extra disk space the AI Assistant will
|
||||
need for these files on top of the raw downloads. */}
|
||||
{localSelectedSlug && embedEstimate && embedEstimate.totalBytes > 0 && (
|
||||
<div className="mt-4 bg-surface-secondary border border-border-subtle rounded p-3 text-sm">
|
||||
<div className="flex items-start gap-2">
|
||||
<DynamicIcon icon="IconBrain" className="w-5 h-5 text-desert-green flex-shrink-0 mt-0.5" />
|
||||
<div className="flex-1">
|
||||
<p className="text-text-primary">
|
||||
<span className="font-medium">+~{formatBytes(embedEstimate.totalBytes, 1)}</span>
|
||||
{' '}of additional storage if these are indexed for the AI Assistant
|
||||
{embedEstimate.hasUnknown && (
|
||||
<span className="text-text-muted"> (estimate excludes some files we have no prior data for)</span>
|
||||
)}
|
||||
.
|
||||
</p>
|
||||
<p className="text-text-muted text-xs mt-1">
|
||||
{ingestPolicy === 'Always' ? (
|
||||
<>
|
||||
Your <strong>Auto-index</strong> setting is <strong>Always</strong>, so these files will be indexed automatically once downloaded. You can change this in the Knowledge Base settings.
|
||||
</>
|
||||
) : (
|
||||
<>
|
||||
Your <strong>Auto-index</strong> setting is <strong>Manual</strong>, so these files will sit unindexed until you opt in from the Knowledge Base settings.
|
||||
</>
|
||||
)}
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Info note */}
|
||||
<div className="mt-4 flex items-start gap-2 text-sm text-text-muted bg-blue-50 p-3 rounded">
|
||||
<div className="mt-6 flex items-start gap-2 text-sm text-text-muted bg-blue-50 p-3 rounded">
|
||||
<IconInfoCircle size={18} className="text-blue-500 flex-shrink-0 mt-0.5" />
|
||||
<p>
|
||||
You can change your selection at any time. Click Submit to confirm your choice.
|
||||
|
|
@ -350,7 +218,7 @@ const TierSelectionModal: React.FC<TierSelectionModalProps> = ({
|
|||
variant='primary'
|
||||
size='lg'
|
||||
onClick={handleSubmit}
|
||||
disabled={!localSelectedSlug || (embedEstimateRequest.length > 0 && isEstimating)}
|
||||
disabled={!localSelectedSlug}
|
||||
>
|
||||
Submit
|
||||
</StyledButton>
|
||||
|
|
@ -361,18 +229,6 @@ const TierSelectionModal: React.FC<TierSelectionModalProps> = ({
|
|||
</div>
|
||||
</Dialog>
|
||||
</Transition>
|
||||
{guardrailVerdict && (
|
||||
<KbGuardrailModal
|
||||
isOpen={true}
|
||||
verdict={guardrailVerdict}
|
||||
onConfirm={() => {
|
||||
setGuardrailVerdict(null)
|
||||
finalizeSubmit()
|
||||
}}
|
||||
onCancel={() => setGuardrailVerdict(null)}
|
||||
/>
|
||||
)}
|
||||
</>
|
||||
)
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,126 +0,0 @@
|
|||
import { useMutation, useQuery, useQueryClient } from '@tanstack/react-query'
|
||||
import { usePage } from '@inertiajs/react'
|
||||
import { IconBrain } from '@tabler/icons-react'
|
||||
import api from '~/lib/api'
|
||||
import StyledButton from '~/components/StyledButton'
|
||||
import { useNotifications } from '~/context/NotificationContext'
|
||||
|
||||
/**
|
||||
* First-chat onboarding banner (RFC #883 Phase 3 task 12).
|
||||
*
|
||||
* Renders above the chat header when the scanner has seen at least one
|
||||
* embeddable file AND the user has not yet picked a global ingest policy
|
||||
* (`rag.defaultIngestPolicy` unset). Two buttons let the user decide once,
|
||||
* after which the prompt never returns:
|
||||
*
|
||||
* - "Index existing content" → sets policy=Always and dispatches a sync so
|
||||
* anything already on disk + in `pending_decision` gets queued for embed.
|
||||
* - "Maybe later" → sets policy=Manual. New content waits in
|
||||
* `pending_decision` until the user opts in from the KB modal.
|
||||
*
|
||||
* The "dismiss without deciding" X is intentionally NOT here. Dismissing
|
||||
* without setting policy would make the banner reappear on every visit until
|
||||
* a choice is recorded — annoying. The two action buttons each set policy,
|
||||
* and the user can change their mind any time via the Always/Manual radio in
|
||||
* the KB modal.
|
||||
*/
|
||||
export default function KbPolicyPromptBanner() {
|
||||
const queryClient = useQueryClient()
|
||||
const { addNotification } = useNotifications()
|
||||
// Inertia injects `aiAssistantName` as a shared page prop on chat-mounted
|
||||
// pages so the banner pulls the user-set name when surfaced. Default to
|
||||
// "AI Assistant" when accessed outside that context (no-op for chat pages,
|
||||
// but keeps the component safe for future reuse elsewhere).
|
||||
const aiAssistantName =
|
||||
usePage<{ aiAssistantName?: string }>().props?.aiAssistantName || 'AI Assistant'
|
||||
|
||||
const { data: promptState } = useQuery({
|
||||
queryKey: ['kbPolicyPromptState'],
|
||||
queryFn: () => api.getKbPolicyPromptState(),
|
||||
staleTime: Infinity,
|
||||
})
|
||||
|
||||
const indexNowMutation = useMutation({
|
||||
mutationFn: async () => {
|
||||
await api.updateSetting('rag.defaultIngestPolicy', 'Always')
|
||||
await api.syncRAGStorage()
|
||||
},
|
||||
onSuccess: () => {
|
||||
addNotification({
|
||||
type: 'success',
|
||||
message: `${aiAssistantName} will index your existing content. You can track progress in the Knowledge Base panel.`,
|
||||
})
|
||||
queryClient.invalidateQueries({ queryKey: ['kbPolicyPromptState'] })
|
||||
queryClient.invalidateQueries({ queryKey: ['ingestPolicy'] })
|
||||
queryClient.invalidateQueries({ queryKey: ['embed-jobs'] })
|
||||
queryClient.invalidateQueries({ queryKey: ['storedFiles'] })
|
||||
},
|
||||
onError: (error: any) => {
|
||||
addNotification({
|
||||
type: 'error',
|
||||
message: error?.message || 'Could not start indexing. Try again from the Knowledge Base panel.',
|
||||
})
|
||||
},
|
||||
})
|
||||
|
||||
const maybeLaterMutation = useMutation({
|
||||
mutationFn: () => api.updateSetting('rag.defaultIngestPolicy', 'Manual'),
|
||||
onSuccess: () => {
|
||||
addNotification({
|
||||
type: 'success',
|
||||
message: 'Your content stays unindexed for now. You can opt in any time from the Knowledge Base panel.',
|
||||
})
|
||||
queryClient.invalidateQueries({ queryKey: ['kbPolicyPromptState'] })
|
||||
queryClient.invalidateQueries({ queryKey: ['ingestPolicy'] })
|
||||
},
|
||||
onError: (error: any) => {
|
||||
addNotification({
|
||||
type: 'error',
|
||||
message: error?.message || 'Could not save your choice. Try again.',
|
||||
})
|
||||
},
|
||||
})
|
||||
|
||||
if (!promptState?.shouldPrompt) return null
|
||||
|
||||
const fileCount = promptState.totalFiles
|
||||
const isBusy = indexNowMutation.isPending || maybeLaterMutation.isPending
|
||||
|
||||
return (
|
||||
<div className="px-6 py-3 bg-blue-50 dark:bg-blue-950/30 border-b border-blue-200 dark:border-blue-800 flex-shrink-0">
|
||||
<div className="flex items-center gap-3">
|
||||
<IconBrain className="h-6 w-6 text-blue-600 dark:text-blue-300 flex-shrink-0" />
|
||||
<div className="flex-1 min-w-0">
|
||||
<p className="text-sm text-text-primary">
|
||||
<strong>
|
||||
{fileCount === 1
|
||||
? `Index your existing file for ${aiAssistantName}?`
|
||||
: `Index your ${fileCount.toLocaleString()} existing files for ${aiAssistantName}?`}
|
||||
</strong>
|
||||
{' '}When indexed, {aiAssistantName} can reference them while answering your questions.
|
||||
</p>
|
||||
</div>
|
||||
<div className="flex gap-2 flex-shrink-0">
|
||||
<StyledButton
|
||||
onClick={() => indexNowMutation.mutate()}
|
||||
variant="primary"
|
||||
size="sm"
|
||||
disabled={isBusy}
|
||||
loading={indexNowMutation.isPending}
|
||||
>
|
||||
Index existing content
|
||||
</StyledButton>
|
||||
<StyledButton
|
||||
onClick={() => maybeLaterMutation.mutate()}
|
||||
variant="ghost"
|
||||
size="sm"
|
||||
disabled={isBusy}
|
||||
loading={maybeLaterMutation.isPending}
|
||||
>
|
||||
Maybe later
|
||||
</StyledButton>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
|
@ -2,16 +2,10 @@ import { useMutation, useQuery, useQueryClient } from '@tanstack/react-query'
|
|||
import { useEffect, useRef, useState } from 'react'
|
||||
import FileUploader from '~/components/file-uploader'
|
||||
import StyledButton from '~/components/StyledButton'
|
||||
import type { DynamicIconName } from '~/lib/icons'
|
||||
import StyledSectionHeader from '~/components/StyledSectionHeader'
|
||||
import StyledTable from '~/components/StyledTable'
|
||||
import { useNotifications } from '~/context/NotificationContext'
|
||||
import api from '~/lib/api'
|
||||
import {
|
||||
groupAndSortKbFiles,
|
||||
type KbFileGroup,
|
||||
} from '~/lib/kb_file_grouping'
|
||||
import type { KbIngestStateValue } from '../../../types/kb_ingest_state'
|
||||
import { IconX } from '@tabler/icons-react'
|
||||
import { useModals } from '~/context/ModalContext'
|
||||
import StyledModal from '../StyledModal'
|
||||
|
|
@ -23,74 +17,9 @@ interface KnowledgeBaseModalProps {
|
|||
onClose: () => void
|
||||
}
|
||||
|
||||
/**
|
||||
* Compact label for the per-row ingestion state. Files that exist in Qdrant
|
||||
* with no `kb_ingest_state` row (`state === null`) are legacy/pre-RFC-883
|
||||
* installs whose chunks are real, so we display them as "Indexed" rather than
|
||||
* surfacing the absent-row detail. Admin-docs group has no pill (the "Managed
|
||||
* by NOMAD" message in the action column carries the same signal).
|
||||
*/
|
||||
function renderStatePill(record: KbFileGroup): React.ReactNode {
|
||||
if (record.bucket === 'admin_docs') return null
|
||||
const effective: KbIngestStateValue = record.state ?? 'indexed'
|
||||
|
||||
const base = 'inline-flex items-center text-xs font-medium rounded px-2 py-0.5 border'
|
||||
switch (effective) {
|
||||
case 'indexed':
|
||||
return (
|
||||
<span className={`${base} text-green-700 bg-green-50 border-green-200 dark:text-green-300 dark:bg-green-950/40 dark:border-green-800`}>
|
||||
Indexed
|
||||
</span>
|
||||
)
|
||||
case 'pending_decision':
|
||||
case 'browse_only':
|
||||
return (
|
||||
<span className={`${base} text-text-secondary bg-surface-secondary border-border-subtle`}>
|
||||
Not Indexed
|
||||
</span>
|
||||
)
|
||||
case 'failed':
|
||||
return (
|
||||
<span className={`${base} text-red-700 bg-red-50 border-red-200 dark:text-red-300 dark:bg-red-950/40 dark:border-red-800`}>
|
||||
Failed
|
||||
</span>
|
||||
)
|
||||
case 'stalled':
|
||||
return (
|
||||
<span className={`${base} text-amber-700 bg-amber-50 border-amber-200 dark:text-amber-300 dark:bg-amber-950/40 dark:border-amber-800`}>
|
||||
Stalled
|
||||
</span>
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
type RowAction =
|
||||
| { kind: 'index'; label: string; force: boolean; variant: 'primary'; icon: DynamicIconName }
|
||||
| { kind: 'reembed'; label: string; force: true; variant: 'secondary'; icon: DynamicIconName }
|
||||
|
||||
/**
|
||||
* Pick the single adaptive per-row action button. Returns null when no action
|
||||
* makes sense for the current state (e.g. healthy indexed file with no
|
||||
* warnings — bulk Re-embed All covers that case). `hasWarnings` lets us
|
||||
* surface a Re-embed affordance specifically when a file *looks* indexed but
|
||||
* has zero chunks or a stalled-mid-ingestion warning attached.
|
||||
*/
|
||||
function pickRowAction(record: KbFileGroup, hasWarnings: boolean): RowAction | null {
|
||||
if (record.bucket === 'admin_docs') return null
|
||||
const effective: KbIngestStateValue = record.state ?? 'indexed'
|
||||
switch (effective) {
|
||||
case 'indexed':
|
||||
return hasWarnings
|
||||
? { kind: 'reembed', label: 'Re-embed', force: true, variant: 'secondary', icon: 'IconRefreshAlert' }
|
||||
: null
|
||||
case 'pending_decision':
|
||||
return { kind: 'index', label: 'Index', force: false, variant: 'primary', icon: 'IconDownload' }
|
||||
case 'browse_only':
|
||||
return { kind: 'index', label: 'Index', force: true, variant: 'primary', icon: 'IconDownload' }
|
||||
case 'failed':
|
||||
case 'stalled':
|
||||
return { kind: 'index', label: 'Retry', force: true, variant: 'primary', icon: 'IconRefresh' }
|
||||
}
|
||||
function sourceToDisplayName(source: string): string {
|
||||
const parts = source.split(/[/\\]/)
|
||||
return parts[parts.length - 1]
|
||||
}
|
||||
|
||||
export default function KnowledgeBaseModal({ aiAssistantName = "AI Assistant", onClose }: KnowledgeBaseModalProps) {
|
||||
|
|
@ -98,9 +27,6 @@ export default function KnowledgeBaseModal({ aiAssistantName = "AI Assistant", o
|
|||
const [files, setFiles] = useState<File[]>([])
|
||||
const [isUploading, setIsUploading] = useState(false)
|
||||
const [confirmDeleteSource, setConfirmDeleteSource] = useState<string | null>(null)
|
||||
const [confirmReembed, setConfirmReembed] = useState<{ source: string; displayName: string } | null>(null)
|
||||
const [bulkMode, setBulkMode] = useState<null | 'reembed' | 'reset'>(null)
|
||||
const [resetTyped, setResetTyped] = useState('')
|
||||
const fileUploaderRef = useRef<React.ComponentRef<typeof FileUploader>>(null)
|
||||
const { openModal, closeModal } = useModals()
|
||||
const queryClient = useQueryClient()
|
||||
|
|
@ -124,49 +50,6 @@ export default function KnowledgeBaseModal({ aiAssistantName = "AI Assistant", o
|
|||
select: (data) => data || [],
|
||||
})
|
||||
|
||||
// Per-file conditional warnings (RFC #883 §6). `ok: false` means the
|
||||
// computation itself failed (Qdrant/DB/FS) — distinct from `ok: true` with
|
||||
// an empty map, which means everything is healthy. We surface the failure
|
||||
// explicitly so a silent backend failure doesn't masquerade as health.
|
||||
const { data: warningsResult } = useQuery({
|
||||
queryKey: ['kbFileWarnings'],
|
||||
queryFn: () => api.getKbFileWarnings(),
|
||||
refetchInterval: 30_000,
|
||||
})
|
||||
const fileWarnings = warningsResult?.warnings ?? {}
|
||||
const warningsUnavailable = warningsResult !== undefined && warningsResult.ok === false
|
||||
|
||||
// Global auto-index policy. KVStore returns `null` for an unset key, which
|
||||
// we treat as 'Always' for backward compatibility with installs that predate
|
||||
// this UI. The user can opt into Manual mode from the toggle below.
|
||||
const { data: ingestPolicySetting } = useQuery({
|
||||
queryKey: ['ingestPolicy'],
|
||||
queryFn: () => api.getSetting('rag.defaultIngestPolicy'),
|
||||
})
|
||||
const ingestPolicy: 'Always' | 'Manual' =
|
||||
ingestPolicySetting?.value === 'Manual' ? 'Manual' : 'Always'
|
||||
|
||||
const updateIngestPolicyMutation = useMutation({
|
||||
mutationFn: (policy: 'Always' | 'Manual') =>
|
||||
api.updateSetting('rag.defaultIngestPolicy', policy),
|
||||
onSuccess: (_data, policy) => {
|
||||
queryClient.invalidateQueries({ queryKey: ['ingestPolicy'] })
|
||||
addNotification({
|
||||
type: 'success',
|
||||
message:
|
||||
policy === 'Always'
|
||||
? 'New content will be auto-indexed for AI.'
|
||||
: 'New content will wait for you to opt in.',
|
||||
})
|
||||
},
|
||||
onError: (error: any) => {
|
||||
addNotification({
|
||||
type: 'error',
|
||||
message: error?.message || 'Failed to update indexing policy.',
|
||||
})
|
||||
},
|
||||
})
|
||||
|
||||
const uploadMutation = useMutation({
|
||||
mutationFn: (file: File) => api.uploadDocument(file),
|
||||
})
|
||||
|
|
@ -184,25 +67,6 @@ export default function KnowledgeBaseModal({ aiAssistantName = "AI Assistant", o
|
|||
},
|
||||
})
|
||||
|
||||
const embedMutation = useMutation({
|
||||
mutationFn: ({ source, force }: { source: string; force: boolean }) =>
|
||||
api.embedSingleRAGFile(source, force),
|
||||
onSuccess: (data) => {
|
||||
addNotification({
|
||||
type: 'success',
|
||||
message: data?.message || 'File queued for embedding.',
|
||||
})
|
||||
setConfirmReembed(null)
|
||||
queryClient.invalidateQueries({ queryKey: ['storedFiles'] })
|
||||
queryClient.invalidateQueries({ queryKey: ['embed-jobs'] })
|
||||
queryClient.invalidateQueries({ queryKey: ['kbFileWarnings'] })
|
||||
},
|
||||
onError: (error: any) => {
|
||||
addNotification({ type: 'error', message: error?.message || 'Failed to queue file.' })
|
||||
setConfirmReembed(null)
|
||||
},
|
||||
})
|
||||
|
||||
const cleanupFailedMutation = useMutation({
|
||||
mutationFn: () => api.cleanupFailedEmbedJobs(),
|
||||
onSuccess: (data) => {
|
||||
|
|
@ -241,44 +105,6 @@ export default function KnowledgeBaseModal({ aiAssistantName = "AI Assistant", o
|
|||
},
|
||||
})
|
||||
|
||||
const reembedMutation = useMutation({
|
||||
mutationFn: () => api.reembedAllRAG(),
|
||||
onSuccess: (data) => {
|
||||
addNotification({
|
||||
type: data?.success ? 'success' : 'error',
|
||||
message: data?.message || 'Re-embed completed.',
|
||||
})
|
||||
queryClient.invalidateQueries({ queryKey: ['storedFiles'] })
|
||||
queryClient.invalidateQueries({ queryKey: ['embed-jobs'] })
|
||||
setBulkMode(null)
|
||||
setResetTyped('')
|
||||
},
|
||||
onError: () => {
|
||||
addNotification({ type: 'error', message: 'Failed to re-embed knowledge base.' })
|
||||
setBulkMode(null)
|
||||
},
|
||||
})
|
||||
|
||||
const resetMutation = useMutation({
|
||||
mutationFn: () => api.resetAndRebuildRAG(),
|
||||
onSuccess: (data) => {
|
||||
addNotification({
|
||||
type: data?.success ? 'success' : 'error',
|
||||
message: data?.message || 'Reset complete.',
|
||||
})
|
||||
queryClient.invalidateQueries({ queryKey: ['storedFiles'] })
|
||||
queryClient.invalidateQueries({ queryKey: ['embed-jobs'] })
|
||||
setBulkMode(null)
|
||||
setResetTyped('')
|
||||
},
|
||||
onError: () => {
|
||||
addNotification({ type: 'error', message: 'Failed to reset knowledge base.' })
|
||||
setBulkMode(null)
|
||||
},
|
||||
})
|
||||
|
||||
const bulkBusy = reembedMutation.isPending || resetMutation.isPending
|
||||
|
||||
const handleUpload = async () => {
|
||||
if (files.length === 0) return
|
||||
setIsUploading(true)
|
||||
|
|
@ -442,48 +268,6 @@ export default function KnowledgeBaseModal({ aiAssistantName = "AI Assistant", o
|
|||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div className="my-8 p-4 rounded-lg border border-border-subtle bg-surface-secondary">
|
||||
<div className="flex flex-wrap items-center justify-between gap-3">
|
||||
<div className="flex-1 min-w-[14rem]">
|
||||
<p className="text-sm font-medium text-text-primary">
|
||||
Auto-index new content for AI?
|
||||
</p>
|
||||
<p className="text-xs text-text-muted mt-1">
|
||||
Indexed content typically uses 5–10× the original file size on disk.
|
||||
Changes apply to new content added after this setting changes.
|
||||
</p>
|
||||
</div>
|
||||
<div
|
||||
role="radiogroup"
|
||||
aria-label="Ingest policy"
|
||||
className="inline-flex rounded-md overflow-hidden border border-border-subtle"
|
||||
>
|
||||
{(['Always', 'Manual'] as const).map((option) => {
|
||||
const isActive = ingestPolicy === option
|
||||
return (
|
||||
<button
|
||||
key={option}
|
||||
type="button"
|
||||
role="radio"
|
||||
aria-checked={isActive}
|
||||
onClick={() =>
|
||||
!isActive && updateIngestPolicyMutation.mutate(option)
|
||||
}
|
||||
disabled={updateIngestPolicyMutation.isPending}
|
||||
className={`px-4 py-2 text-sm font-medium transition-colors ${
|
||||
isActive
|
||||
? 'bg-desert-green text-white'
|
||||
: 'bg-surface-primary text-text-secondary hover:bg-surface-tertiary'
|
||||
} ${updateIngestPolicyMutation.isPending ? 'opacity-50 cursor-not-allowed' : ''}`}
|
||||
>
|
||||
{option}
|
||||
</button>
|
||||
)
|
||||
})}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div className="my-8">
|
||||
<div className="flex items-center justify-between mb-4">
|
||||
<StyledSectionHeader title="Processing Queue" className="!mb-0" />
|
||||
|
|
@ -502,54 +286,20 @@ export default function KnowledgeBaseModal({ aiAssistantName = "AI Assistant", o
|
|||
</div>
|
||||
|
||||
<div className="my-12">
|
||||
<div className='flex items-center justify-between mb-6 gap-2 flex-wrap'>
|
||||
<div className='flex items-center justify-between mb-6'>
|
||||
<StyledSectionHeader title="Stored Knowledge Base Files" className='!mb-0' />
|
||||
<div className="flex items-center gap-2 flex-wrap">
|
||||
<StyledButton
|
||||
variant="danger"
|
||||
size="md"
|
||||
icon='IconAlertTriangle'
|
||||
onClick={() => { setResetTyped(''); setBulkMode('reset') }}
|
||||
disabled={isUploading || qdrantOffline || bulkBusy}
|
||||
loading={resetMutation.isPending}
|
||||
title="Drop the entire embeddings collection and re-embed everything from scratch. Permanently removes vectors for files no longer on disk. Destructive: requires typing RESET to confirm."
|
||||
>
|
||||
Reset & Rebuild
|
||||
</StyledButton>
|
||||
<StyledButton
|
||||
variant="secondary"
|
||||
size="md"
|
||||
icon='IconRefreshAlert'
|
||||
onClick={() => setBulkMode('reembed')}
|
||||
disabled={isUploading || qdrantOffline || bulkBusy || storedFiles.length === 0}
|
||||
loading={reembedMutation.isPending}
|
||||
title="Re-embed every file on disk, replacing existing vectors file-by-file. Vectors for files no longer on disk are preserved. Use this if the chunker or embedding model has changed."
|
||||
>
|
||||
Re-embed All
|
||||
</StyledButton>
|
||||
<StyledButton
|
||||
variant="secondary"
|
||||
size="md"
|
||||
icon='IconRefresh'
|
||||
onClick={handleConfirmSync}
|
||||
disabled={syncMutation.isPending || isUploading || qdrantOffline || bulkBusy}
|
||||
loading={syncMutation.isPending || isUploading}
|
||||
title="Scan storage for new files and queue any that haven't been embedded yet. Safe to run anytime; won't touch already-embedded content."
|
||||
>
|
||||
Sync Storage
|
||||
</StyledButton>
|
||||
|
||||
</div>
|
||||
<StyledButton
|
||||
variant="secondary"
|
||||
size="md"
|
||||
icon='IconRefresh'
|
||||
onClick={handleConfirmSync}
|
||||
disabled={syncMutation.isPending || isUploading || qdrantOffline}
|
||||
loading={syncMutation.isPending || isUploading}
|
||||
>
|
||||
Sync Storage
|
||||
</StyledButton>
|
||||
</div>
|
||||
{warningsUnavailable && (
|
||||
<div className="mb-4 inline-flex items-center gap-2 text-xs text-amber-700 dark:text-amber-300 bg-amber-50 dark:bg-amber-950/40 border border-amber-200 dark:border-amber-800 rounded px-3 py-2">
|
||||
<span aria-hidden="true">⚠</span>
|
||||
<span>
|
||||
File warnings unavailable — couldn't read storage state. Retrying…
|
||||
</span>
|
||||
</div>
|
||||
)}
|
||||
<StyledTable<KbFileGroup>
|
||||
<StyledTable<{ source: string }>
|
||||
className="font-semibold"
|
||||
rowLines={true}
|
||||
columns={[
|
||||
|
|
@ -557,60 +307,13 @@ export default function KnowledgeBaseModal({ aiAssistantName = "AI Assistant", o
|
|||
accessor: 'source',
|
||||
title: 'File Name',
|
||||
render(record) {
|
||||
const warnings = fileWarnings[record.source] ?? []
|
||||
const pill = renderStatePill(record)
|
||||
return (
|
||||
<div className="flex flex-col gap-1.5">
|
||||
<span className="text-text-primary">
|
||||
{record.displayName}
|
||||
</span>
|
||||
{(pill || warnings.length > 0) && (
|
||||
<div className="flex flex-wrap items-center gap-1.5">
|
||||
{pill}
|
||||
{warnings.map((w, i) => (
|
||||
<span
|
||||
key={i}
|
||||
className="inline-flex items-center gap-1.5 self-start text-xs text-amber-700 dark:text-amber-300 bg-amber-50 dark:bg-amber-950/40 border border-amber-200 dark:border-amber-800 rounded px-2 py-0.5"
|
||||
>
|
||||
<span aria-hidden="true">⚠</span>
|
||||
{w.kind === 'zero_chunks' && (
|
||||
<span>
|
||||
Embedded 0 chunks — this file has no text content.
|
||||
AI Assistant cannot reference it.
|
||||
</span>
|
||||
)}
|
||||
{w.kind === 'partial_stall' && (
|
||||
<span>
|
||||
Only {w.chunksEmbedded.toLocaleString()} of est.{' '}
|
||||
{w.chunksExpected.toLocaleString()} chunks embedded —
|
||||
ingestion may have stalled.
|
||||
</span>
|
||||
)}
|
||||
</span>
|
||||
))}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
return <span className="text-text-primary">{sourceToDisplayName(record.source)}</span>
|
||||
},
|
||||
},
|
||||
{
|
||||
accessor: 'source',
|
||||
title: '',
|
||||
render(record) {
|
||||
// Admin docs are auto-discovered and managed by NOMAD itself —
|
||||
// deleting one would just be re-embedded on the next sync, so
|
||||
// we surface them as informational only and hide Delete.
|
||||
if (record.bucket === 'admin_docs') {
|
||||
return (
|
||||
<div className="flex justify-end">
|
||||
<span className="text-sm text-text-muted italic">
|
||||
Managed by NOMAD
|
||||
</span>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
const isConfirming = confirmDeleteSource === record.source
|
||||
const isDeleting = deleteMutation.isPending && confirmDeleteSource === record.source
|
||||
if (isConfirming) {
|
||||
|
|
@ -636,38 +339,14 @@ export default function KnowledgeBaseModal({ aiAssistantName = "AI Assistant", o
|
|||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
const warnings = fileWarnings[record.source] ?? []
|
||||
const action = pickRowAction(record, warnings.length > 0)
|
||||
const actionPendingForThisRow =
|
||||
embedMutation.isPending && embedMutation.variables?.source === record.source
|
||||
|
||||
return (
|
||||
<div className="flex justify-end items-center gap-2">
|
||||
{action && (
|
||||
<StyledButton
|
||||
variant={action.variant}
|
||||
size="sm"
|
||||
icon={action.icon}
|
||||
onClick={() => {
|
||||
if (action.kind === 'reembed') {
|
||||
setConfirmReembed({ source: record.source, displayName: record.displayName })
|
||||
} else {
|
||||
embedMutation.mutate({ source: record.source, force: action.force })
|
||||
}
|
||||
}}
|
||||
disabled={qdrantOffline || deleteMutation.isPending || embedMutation.isPending}
|
||||
loading={actionPendingForThisRow}
|
||||
>
|
||||
{action.label}
|
||||
</StyledButton>
|
||||
)}
|
||||
<div className="flex justify-end">
|
||||
<StyledButton
|
||||
variant="danger"
|
||||
size="sm"
|
||||
icon="IconTrash"
|
||||
onClick={() => setConfirmDeleteSource(record.source)}
|
||||
disabled={deleteMutation.isPending || embedMutation.isPending}
|
||||
disabled={deleteMutation.isPending}
|
||||
loading={deleteMutation.isPending && confirmDeleteSource === record.source}
|
||||
>Delete</StyledButton>
|
||||
</div>
|
||||
|
|
@ -675,138 +354,12 @@ export default function KnowledgeBaseModal({ aiAssistantName = "AI Assistant", o
|
|||
},
|
||||
},
|
||||
]}
|
||||
data={groupAndSortKbFiles(storedFiles)}
|
||||
data={storedFiles.map((source) => ({ source }))}
|
||||
loading={isLoadingFiles}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{bulkMode === 'reembed' && (
|
||||
<StyledModal
|
||||
title='Re-embed All Documents?'
|
||||
open={true}
|
||||
confirmText={reembedMutation.isPending ? 'Re-embedding…' : 'Re-embed All'}
|
||||
cancelText='Cancel'
|
||||
confirmVariant='primary'
|
||||
confirmLoading={reembedMutation.isPending}
|
||||
onConfirm={() => reembedMutation.mutate()}
|
||||
onCancel={() => setBulkMode(null)}
|
||||
>
|
||||
<div className='text-text-primary text-sm space-y-3 text-left'>
|
||||
<p>
|
||||
This will re-process every document currently in your knowledge base — about
|
||||
<strong> {storedFiles.length} file{storedFiles.length === 1 ? '' : 's'}</strong>.
|
||||
For each file, NOMAD will delete the existing embeddings from Qdrant and queue a fresh
|
||||
embedding job using the current chunking and embedding model.
|
||||
</p>
|
||||
<div className='rounded border border-border-subtle bg-surface-secondary p-3'>
|
||||
<p className='font-semibold mb-1'>What this is for</p>
|
||||
<p className='text-text-secondary'>
|
||||
Use this when the embedding model or chunking logic has changed, or when you suspect
|
||||
stored vectors are stale. Files on disk are <em>not</em> deleted, and any orphan
|
||||
points whose source file is no longer present will be preserved untouched (see
|
||||
<em> Reset & Rebuild </em>if you want a fully clean slate).
|
||||
</p>
|
||||
</div>
|
||||
<div className='rounded border border-amber-300 bg-amber-50 dark:bg-amber-950 dark:border-amber-800 p-3 text-amber-900 dark:text-amber-200'>
|
||||
<p className='font-semibold mb-1'>Heads up</p>
|
||||
<ul className='list-disc pl-5 space-y-1'>
|
||||
<li>Embedding {storedFiles.length} file{storedFiles.length === 1 ? '' : 's'} may take a long time, especially for large PDFs or ZIM archives.</li>
|
||||
<li>On systems without GPU acceleration, expect sustained high CPU usage for the duration.</li>
|
||||
<li>Knowledge Base search results may be incomplete until every file finishes re-embedding.</li>
|
||||
<li>If embed jobs are already in progress, this action will be refused — wait for the queue to drain first.</li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
</StyledModal>
|
||||
)}
|
||||
|
||||
{bulkMode === 'reset' && (
|
||||
<StyledModal
|
||||
title='Reset & Rebuild Knowledge Base?'
|
||||
open={true}
|
||||
confirmText={resetMutation.isPending ? 'Resetting…' : 'Wipe & Rebuild'}
|
||||
cancelText='Cancel'
|
||||
confirmVariant='danger'
|
||||
confirmLoading={resetMutation.isPending}
|
||||
onConfirm={() => {
|
||||
if (resetTyped === 'RESET') resetMutation.mutate()
|
||||
}}
|
||||
onCancel={() => { setBulkMode(null); setResetTyped('') }}
|
||||
>
|
||||
<div className='text-text-primary text-sm space-y-3 text-left'>
|
||||
<p>
|
||||
This will <strong>permanently delete every point</strong> in the
|
||||
<code> nomad_knowledge_base </code>Qdrant collection and rebuild from the
|
||||
<strong> {storedFiles.length} file{storedFiles.length === 1 ? '' : 's'}</strong> currently
|
||||
on disk. The collection is dropped, recreated, and every file is re-queued for embedding.
|
||||
</p>
|
||||
<div className='rounded border border-border-subtle bg-surface-secondary p-3'>
|
||||
<p className='font-semibold mb-1'>How this differs from Re-embed All</p>
|
||||
<ul className='list-disc pl-5 space-y-1 text-text-secondary'>
|
||||
<li><strong>Re-embed All</strong> replaces vectors file-by-file. Any orphan points (vectors whose source file was deleted from disk at some point) are preserved.</li>
|
||||
<li><strong>Reset & Rebuild</strong> drops the entire collection. Orphan points are <strong>gone forever</strong>. Only files currently on disk will exist in Qdrant afterwards.</li>
|
||||
</ul>
|
||||
</div>
|
||||
<div className='rounded border border-red-300 bg-red-50 dark:bg-red-950 dark:border-red-800 p-3 text-red-900 dark:text-red-200'>
|
||||
<p className='font-semibold mb-1'>This action is destructive and cannot be undone</p>
|
||||
<ul className='list-disc pl-5 space-y-1'>
|
||||
<li>Knowledge Base search will be empty until embedding finishes (potentially hours on CPU-only systems).</li>
|
||||
<li>For a few seconds during the reset, the Qdrant collection does not exist — any chat-with-RAG queries in that window may return a "collection not found" error. Avoid using chat until the rebuild has begun.</li>
|
||||
<li>If embed jobs are already in progress, this action will be refused — wait for the queue to drain first.</li>
|
||||
</ul>
|
||||
</div>
|
||||
<div>
|
||||
<label className='block text-sm font-semibold mb-1'>
|
||||
Type <code>RESET</code> to confirm:
|
||||
</label>
|
||||
<input
|
||||
type='text'
|
||||
value={resetTyped}
|
||||
onChange={(e) => setResetTyped(e.target.value)}
|
||||
placeholder='RESET'
|
||||
autoFocus
|
||||
className='w-full rounded border border-border-subtle bg-surface-primary px-3 py-2 text-text-primary focus:outline-none focus:ring-2 focus:ring-red-500'
|
||||
/>
|
||||
{resetTyped.length > 0 && resetTyped !== 'RESET' && (
|
||||
<p className='text-xs text-red-600 mt-1'>Type RESET exactly (uppercase, no spaces) to enable the confirm button.</p>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
</StyledModal>
|
||||
)}
|
||||
|
||||
{confirmReembed && (
|
||||
<StyledModal
|
||||
title='Re-embed this file?'
|
||||
open={true}
|
||||
confirmText={embedMutation.isPending ? 'Queuing…' : 'Re-embed'}
|
||||
cancelText='Cancel'
|
||||
confirmVariant='primary'
|
||||
confirmLoading={embedMutation.isPending}
|
||||
onConfirm={() =>
|
||||
embedMutation.mutate({ source: confirmReembed.source, force: true })
|
||||
}
|
||||
onCancel={() => setConfirmReembed(null)}
|
||||
>
|
||||
<div className='text-text-primary text-sm space-y-3 text-left'>
|
||||
<p>
|
||||
This will delete the existing embeddings for{' '}
|
||||
<strong>{confirmReembed.displayName}</strong> and queue
|
||||
a fresh embedding job. The file on disk is not touched.
|
||||
</p>
|
||||
<div className='rounded border border-amber-300 bg-amber-50 dark:bg-amber-950 dark:border-amber-800 p-3 text-amber-900 dark:text-amber-200'>
|
||||
<p className='font-semibold mb-1'>Heads up</p>
|
||||
<ul className='list-disc pl-5 space-y-1'>
|
||||
<li>For large ZIM archives this can take a long time, especially on CPU-only systems.</li>
|
||||
<li>Search results that referenced this file will be incomplete until the new embedding finishes.</li>
|
||||
<li>If a job for this file is already running, the re-embed will be refused — wait for it to finish first.</li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
</StyledModal>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,7 +2,6 @@ import { useState, useCallback, useEffect, useRef, useMemo } from 'react'
|
|||
import { useQuery, useMutation, useQueryClient } from '@tanstack/react-query'
|
||||
import ChatSidebar from './ChatSidebar'
|
||||
import ChatInterface from './ChatInterface'
|
||||
import KbPolicyPromptBanner from './KbPolicyPromptBanner'
|
||||
import StyledModal from '../StyledModal'
|
||||
import api from '~/lib/api'
|
||||
import { formatBytes } from '~/lib/util'
|
||||
|
|
@ -33,8 +32,6 @@ export default function Chat({
|
|||
const [activeSessionId, setActiveSessionId] = useState<string | null>(null)
|
||||
const [messages, setMessages] = useState<ChatMessage[]>([])
|
||||
const [selectedModel, setSelectedModel] = useState<string>('')
|
||||
const [pendingModelSwitch, setPendingModelSwitch] = useState<string | null>(null)
|
||||
const pageLoadNormalizedRef = useRef(false)
|
||||
const [isStreamingResponse, setIsStreamingResponse] = useState(false)
|
||||
const streamAbortRef = useRef<AbortController | null>(null)
|
||||
|
||||
|
|
@ -153,62 +150,6 @@ export default function Chat({
|
|||
}
|
||||
}, [selectedModel])
|
||||
|
||||
// Page-load normalization: enforce the "one chat model at a time" invariant
|
||||
// when the chat page first mounts. Anything stacked from a prior session
|
||||
// gets `keep_alive: 0` so it can be evicted; the embedding model is exempt
|
||||
// server-side. We wait for `selectedModel` to be populated by the
|
||||
// first-installed / lastModel effect so the request has a target to preserve.
|
||||
useEffect(() => {
|
||||
if (!enabled) return
|
||||
if (!selectedModel) return
|
||||
if (pageLoadNormalizedRef.current) return
|
||||
pageLoadNormalizedRef.current = true
|
||||
api.unloadChatModels(selectedModel).catch((err) => {
|
||||
console.warn('Failed to normalize loaded models on chat-page mount:', err)
|
||||
})
|
||||
}, [enabled, selectedModel])
|
||||
|
||||
const handleUserSelectedModel = useCallback(
|
||||
(newModel: string) => {
|
||||
if (newModel === selectedModel) return
|
||||
// No active chat session yet → no conversation to lose, no popup needed.
|
||||
// Just update the dropdown silently; the next "New Chat" will use it.
|
||||
if (!activeSessionId) {
|
||||
setSelectedModel(newModel)
|
||||
return
|
||||
}
|
||||
// Active session: defer the actual model swap until the user confirms.
|
||||
// Setting `pendingModelSwitch` drives the dropdown's effective value
|
||||
// *and* opens the confirm modal — clearing it on cancel reverts the
|
||||
// visible selection without us having to touch `selectedModel`.
|
||||
setPendingModelSwitch(newModel)
|
||||
},
|
||||
[selectedModel, activeSessionId]
|
||||
)
|
||||
|
||||
const handleConfirmModelSwitch = useCallback(async () => {
|
||||
const newModel = pendingModelSwitch
|
||||
if (!newModel) return
|
||||
// Best-effort unload of the previously-active chat model. Fire-and-forget:
|
||||
// Ollama queues the eviction until the runner is idle, so an in-flight
|
||||
// request on the old model finishes cleanly. We don't await this before
|
||||
// clearing the session — UI responsiveness wins over housekeeping.
|
||||
api.unloadChatModels(newModel).catch((err) => {
|
||||
console.warn('Failed to unload previous chat model:', err)
|
||||
})
|
||||
setSelectedModel(newModel)
|
||||
setPendingModelSwitch(null)
|
||||
// Clear the active session and messages — the next user message will
|
||||
// lazily create a new session via the existing handleSendMessage path,
|
||||
// which already calls api.createChatSession with `selectedModel`.
|
||||
setActiveSessionId(null)
|
||||
setMessages([])
|
||||
}, [pendingModelSwitch])
|
||||
|
||||
const handleCancelModelSwitch = useCallback(() => {
|
||||
setPendingModelSwitch(null)
|
||||
}, [])
|
||||
|
||||
const handleNewChat = useCallback(() => {
|
||||
// Just clear the active session and messages - don't create a session yet
|
||||
setActiveSessionId(null)
|
||||
|
|
@ -260,19 +201,8 @@ export default function Chat({
|
|||
if (sessionData?.model) {
|
||||
setSelectedModel(sessionData.model)
|
||||
}
|
||||
|
||||
// Enforce the one-chat-model-at-a-time invariant: ask the backend to
|
||||
// unload anything that isn't the target session's model. Fire-and-forget;
|
||||
// this is housekeeping. Note we pass the *session's* model here rather
|
||||
// than reading `selectedModel`, because setSelectedModel above is async
|
||||
// and the effect-driven page-load normalize wouldn't catch a sidebar
|
||||
// click after the first render.
|
||||
const targetModel = sessionData?.model ?? selectedModel ?? null
|
||||
api.unloadChatModels(targetModel).catch((err) => {
|
||||
console.warn('Failed to unload non-target chat models on session switch:', err)
|
||||
})
|
||||
},
|
||||
[installedModels, queryClient, selectedModel]
|
||||
[installedModels, queryClient]
|
||||
)
|
||||
|
||||
const handleSendMessage = useCallback(
|
||||
|
|
@ -421,23 +351,6 @@ export default function Chat({
|
|||
)
|
||||
|
||||
return (
|
||||
<>
|
||||
{pendingModelSwitch && (
|
||||
<StyledModal
|
||||
title={`Switch to ${pendingModelSwitch}?`}
|
||||
onConfirm={handleConfirmModelSwitch}
|
||||
onCancel={handleCancelModelSwitch}
|
||||
open={true}
|
||||
confirmText="Switch & New Chat"
|
||||
cancelText="Cancel"
|
||||
confirmVariant="primary"
|
||||
>
|
||||
<p className="text-text-primary">
|
||||
Switching to <strong>{pendingModelSwitch}</strong> will start a new chat. Your current
|
||||
conversation stays available in the sidebar.
|
||||
</p>
|
||||
</StyledModal>
|
||||
)}
|
||||
<div
|
||||
className={classNames(
|
||||
'flex border border-border-subtle overflow-hidden shadow-sm w-full',
|
||||
|
|
@ -453,7 +366,6 @@ export default function Chat({
|
|||
isInModal={isInModal}
|
||||
/>
|
||||
<div className="flex-1 flex flex-col min-h-0">
|
||||
<KbPolicyPromptBanner />
|
||||
<div className="px-6 py-3 border-b border-border-subtle bg-surface-secondary flex items-center justify-between h-[75px] flex-shrink-0">
|
||||
<h2 className="text-lg font-semibold text-text-primary">
|
||||
{activeSession?.title || 'New Chat'}
|
||||
|
|
@ -482,8 +394,8 @@ export default function Chat({
|
|||
) : (
|
||||
<select
|
||||
id="model-select"
|
||||
value={pendingModelSwitch ?? selectedModel}
|
||||
onChange={(e) => handleUserSelectedModel(e.target.value)}
|
||||
value={selectedModel}
|
||||
onChange={(e) => setSelectedModel(e.target.value)}
|
||||
className="px-3 py-1.5 border border-border-default rounded-lg text-sm focus:outline-none focus:ring-2 focus:ring-desert-green focus:border-transparent bg-surface-primary"
|
||||
>
|
||||
{installedModels.map((model) => (
|
||||
|
|
@ -519,6 +431,5 @@ export default function Chat({
|
|||
/>
|
||||
</div>
|
||||
</div>
|
||||
</>
|
||||
)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -243,11 +243,6 @@ export default function MapComponent({
|
|||
closeOnClick={false}
|
||||
>
|
||||
<div className="text-sm font-medium">{selectedMarker.name}</div>
|
||||
{selectedMarker.notes && selectedMarker.notes.trim() && (
|
||||
<div className="mt-1 text-xs text-desert-stone-dark whitespace-pre-wrap break-words max-w-[240px]">
|
||||
{selectedMarker.notes}
|
||||
</div>
|
||||
)}
|
||||
</Popup>
|
||||
)}
|
||||
|
||||
|
|
|
|||
|
|
@ -18,7 +18,6 @@ export interface MapMarker {
|
|||
longitude: number
|
||||
latitude: number
|
||||
color: PinColorId
|
||||
notes: string | null
|
||||
createdAt: string
|
||||
}
|
||||
|
||||
|
|
@ -37,7 +36,6 @@ export function useMapMarkers() {
|
|||
longitude: m.longitude,
|
||||
latitude: m.latitude,
|
||||
color: m.color as PinColorId,
|
||||
notes: m.notes ?? null,
|
||||
createdAt: m.created_at,
|
||||
}))
|
||||
)
|
||||
|
|
@ -56,7 +54,6 @@ export function useMapMarkers() {
|
|||
longitude: result.longitude,
|
||||
latitude: result.latitude,
|
||||
color: result.color as PinColorId,
|
||||
notes: result.notes ?? null,
|
||||
createdAt: result.created_at,
|
||||
}
|
||||
setMarkers((prev) => [...prev, marker])
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ import { FileEntry } from '../../types/files'
|
|||
import { CheckLatestVersionResult, SystemInformationResponse, SystemUpdateStatus } from '../../types/system'
|
||||
import { DownloadJobWithProgress, WikipediaState } from '../../types/downloads'
|
||||
import type { Country, CountryCode, CountryGroup, MapExtractPreflight } from '../../types/maps'
|
||||
import { EmbedJobWithProgress, FileWarningsResult, StoredFileInfo } from '../../types/rag'
|
||||
import { EmbedJobWithProgress } from '../../types/rag'
|
||||
import type { CategoryWithStatus, CollectionWithStatus, ContentUpdateCheckResult, ResourceUpdateInfo } from '../../types/collections'
|
||||
import { catchInternal } from './util'
|
||||
import { NomadChatResponse, NomadInstalledModel, NomadOllamaModel, OllamaChatRequest } from '../../types/ollama'
|
||||
|
|
@ -272,24 +272,6 @@ class API {
|
|||
})()
|
||||
}
|
||||
|
||||
/**
|
||||
* Ask the backend to send Ollama `keep_alive: 0` to every currently-loaded
|
||||
* chat model except `targetModel` (and the embedding model, which is always
|
||||
* exempt server-side). Fire-and-forget — the chat UI doesn't await this
|
||||
* before creating a new session, since unload is housekeeping.
|
||||
*
|
||||
* Pass `null` to unload every chat model.
|
||||
*/
|
||||
async unloadChatModels(targetModel: string | null) {
|
||||
return catchInternal(async () => {
|
||||
const response = await this.client.post<{ unloaded: string[] }>(
|
||||
'/ollama/unload-chat-models',
|
||||
{ targetModel }
|
||||
)
|
||||
return response.data
|
||||
})()
|
||||
}
|
||||
|
||||
async getAvailableModels(params: { query?: string; recommendedOnly?: boolean; limit?: number; force?: boolean }) {
|
||||
return catchInternal(async () => {
|
||||
const response = await this.client.get<{
|
||||
|
|
@ -488,25 +470,11 @@ class API {
|
|||
|
||||
async getStoredRAGFiles() {
|
||||
return catchInternal(async () => {
|
||||
const response = await this.client.get<{ files: StoredFileInfo[] }>('/rag/files')
|
||||
const response = await this.client.get<{ files: string[] }>('/rag/files')
|
||||
return response.data.files
|
||||
})()
|
||||
}
|
||||
|
||||
async embedSingleRAGFile(source: string, force: boolean = false) {
|
||||
return catchInternal(async () => {
|
||||
const response = await this.client.post<{ message: string }>('/rag/files/embed', { source, force })
|
||||
return response.data
|
||||
})()
|
||||
}
|
||||
|
||||
async getKbFileWarnings() {
|
||||
return catchInternal(async () => {
|
||||
const response = await this.client.get<FileWarningsResult>('/rag/file-warnings')
|
||||
return response.data
|
||||
})()
|
||||
}
|
||||
|
||||
async deleteRAGFile(source: string) {
|
||||
return catchInternal(async () => {
|
||||
const response = await this.client.delete<{ message: string }>('/rag/files', { data: { source } })
|
||||
|
|
@ -663,7 +631,7 @@ class API {
|
|||
async listMapMarkers() {
|
||||
return catchInternal(async () => {
|
||||
const response = await this.client.get<
|
||||
Array<{ id: number; name: string; longitude: number; latitude: number; color: string; notes: string | null; created_at: string }>
|
||||
Array<{ id: number; name: string; longitude: number; latitude: number; color: string; created_at: string }>
|
||||
>('/maps/markers')
|
||||
return response.data
|
||||
})()
|
||||
|
|
@ -672,7 +640,7 @@ class API {
|
|||
async createMapMarker(data: { name: string; longitude: number; latitude: number; color?: string }) {
|
||||
return catchInternal(async () => {
|
||||
const response = await this.client.post<
|
||||
{ id: number; name: string; longitude: number; latitude: number; color: string; notes: string | null; created_at: string }
|
||||
{ id: number; name: string; longitude: number; latitude: number; color: string; created_at: string }
|
||||
>('/maps/markers', data)
|
||||
return response.data
|
||||
})()
|
||||
|
|
@ -843,52 +811,6 @@ class API {
|
|||
})()
|
||||
}
|
||||
|
||||
async reembedAllRAG() {
|
||||
return catchInternal(async () => {
|
||||
const response = await this.client.post<{
|
||||
success: boolean
|
||||
message: string
|
||||
filesScanned?: number
|
||||
filesQueued?: number
|
||||
}>('/rag/re-embed-all')
|
||||
return response.data
|
||||
})()
|
||||
}
|
||||
|
||||
async resetAndRebuildRAG() {
|
||||
return catchInternal(async () => {
|
||||
const response = await this.client.post<{
|
||||
success: boolean
|
||||
message: string
|
||||
filesScanned?: number
|
||||
filesQueued?: number
|
||||
}>('/rag/reset-and-rebuild')
|
||||
return response.data
|
||||
})()
|
||||
}
|
||||
|
||||
async estimateEmbeddingBatch(files: { filename: string; sizeBytes: number }[]) {
|
||||
return catchInternal(async () => {
|
||||
const response = await this.client.post<{
|
||||
totalChunks: number
|
||||
totalBytes: number
|
||||
hasUnknown: boolean
|
||||
}>('/rag/estimate-batch', { files })
|
||||
return response.data
|
||||
})()
|
||||
}
|
||||
|
||||
async getKbPolicyPromptState() {
|
||||
return catchInternal(async () => {
|
||||
const response = await this.client.get<{
|
||||
shouldPrompt: boolean
|
||||
hasContent: boolean
|
||||
totalFiles: number
|
||||
}>('/rag/policy-prompt-state')
|
||||
return response.data
|
||||
})()
|
||||
}
|
||||
|
||||
// Wikipedia selector methods
|
||||
|
||||
async getWikipediaState(): Promise<WikipediaState | undefined> {
|
||||
|
|
|
|||
|
|
@ -32,7 +32,6 @@ import {
|
|||
IconInfoCircle,
|
||||
IconBug,
|
||||
IconCopy,
|
||||
IconLibrary,
|
||||
IconServer,
|
||||
IconMenu2,
|
||||
IconArrowLeft,
|
||||
|
|
@ -76,7 +75,6 @@ export const icons = {
|
|||
IconDownload,
|
||||
IconHome,
|
||||
IconInfoCircle,
|
||||
IconLibrary,
|
||||
IconLogs,
|
||||
IconMap,
|
||||
IconMenu2,
|
||||
|
|
|
|||
|
|
@ -1,113 +0,0 @@
|
|||
import type { KbIngestStateValue } from '../../types/kb_ingest_state.js'
|
||||
import type { StoredFileInfo } from '../../types/rag.js'
|
||||
|
||||
/**
|
||||
* Knowledge-base files come back as a list of `{source, state, chunksEmbedded}`
|
||||
* objects from `/api/rag/files`. The UI groups them so the user sees the
|
||||
* categories that matter to them — ZIMs, uploaded documents, and a single
|
||||
* rolled-up entry for Project NOMAD's bundled docs (rather than the 12+
|
||||
* individual markdown files those break into).
|
||||
*
|
||||
* Bucket assignment is purely by path prefix; matching is done on `/` so the
|
||||
* server-emitted absolute paths work regardless of which Linux mount the admin
|
||||
* container uses.
|
||||
*/
|
||||
export type KbFileBucket = 'zim' | 'upload' | 'admin_docs' | 'other'
|
||||
|
||||
const ADMIN_DOCS_PREFIXES = ['/app/docs/', '/app/README.md']
|
||||
const ZIM_PREFIX = '/app/storage/zim/'
|
||||
const UPLOADS_PREFIX = '/app/storage/kb_uploads/'
|
||||
|
||||
export function classifyKbFile(source: string): KbFileBucket {
|
||||
if (
|
||||
ADMIN_DOCS_PREFIXES.some((p) =>
|
||||
p.endsWith('/') ? source.startsWith(p) : source === p
|
||||
)
|
||||
) {
|
||||
return 'admin_docs'
|
||||
}
|
||||
if (source.startsWith(ZIM_PREFIX)) return 'zim'
|
||||
if (source.startsWith(UPLOADS_PREFIX)) return 'upload'
|
||||
return 'other'
|
||||
}
|
||||
|
||||
export function sourceToDisplayName(source: string): string {
|
||||
const parts = source.split(/[/\\]/)
|
||||
return parts[parts.length - 1] || source
|
||||
}
|
||||
|
||||
export interface KbFileGroup {
|
||||
bucket: KbFileBucket
|
||||
/** Source path used as the row's stable React key. For collapsed admin docs
|
||||
* this is a synthetic marker; individual file paths live in `members`. */
|
||||
source: string
|
||||
displayName: string
|
||||
/** Number of underlying files this row represents (1 for non-collapsed). */
|
||||
count: number
|
||||
/** All member source paths — populated for collapsed groups, empty otherwise. */
|
||||
members: string[]
|
||||
/** Per-file ingestion state. `null` for the collapsed admin_docs group and
|
||||
* for any source that exists in Qdrant but has no state row yet. */
|
||||
state: KbIngestStateValue | null
|
||||
/** Chunks currently embedded for this source; 0 for state-row-less or
|
||||
* zero-chunk files. Always 0 for the collapsed admin_docs group. */
|
||||
chunksEmbedded: number
|
||||
}
|
||||
|
||||
const BUCKET_SORT_ORDER: KbFileBucket[] = ['zim', 'upload', 'admin_docs', 'other']
|
||||
|
||||
/**
|
||||
* Group stored-file rows into table rows for the Stored Files panel.
|
||||
*
|
||||
* - Admin docs (`/app/docs/*`, README) collapse into a single
|
||||
* "Project NOMAD documentation · N files" row.
|
||||
* - ZIMs, uploads, and others stay as individual rows, sorted by bucket then
|
||||
* alphabetically by filename so related items cluster naturally.
|
||||
*/
|
||||
export function groupAndSortKbFiles(files: StoredFileInfo[]): KbFileGroup[] {
|
||||
const buckets: Record<KbFileBucket, StoredFileInfo[]> = {
|
||||
zim: [],
|
||||
upload: [],
|
||||
admin_docs: [],
|
||||
other: [],
|
||||
}
|
||||
for (const file of files) {
|
||||
buckets[classifyKbFile(file.source)].push(file)
|
||||
}
|
||||
|
||||
const groups: KbFileGroup[] = []
|
||||
|
||||
for (const bucket of BUCKET_SORT_ORDER) {
|
||||
const members = buckets[bucket]
|
||||
if (members.length === 0) continue
|
||||
|
||||
if (bucket === 'admin_docs') {
|
||||
groups.push({
|
||||
bucket,
|
||||
source: '__admin_docs_group__',
|
||||
displayName: `Project NOMAD documentation · ${members.length} file${members.length === 1 ? '' : 's'}`,
|
||||
count: members.length,
|
||||
members: members.map((m) => m.source),
|
||||
state: null,
|
||||
chunksEmbedded: 0,
|
||||
})
|
||||
continue
|
||||
}
|
||||
|
||||
for (const file of members.sort((a, b) =>
|
||||
sourceToDisplayName(a.source).localeCompare(sourceToDisplayName(b.source))
|
||||
)) {
|
||||
groups.push({
|
||||
bucket,
|
||||
source: file.source,
|
||||
displayName: sourceToDisplayName(file.source),
|
||||
count: 1,
|
||||
members: [],
|
||||
state: file.state,
|
||||
chunksEmbedded: file.chunksEmbedded,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
return groups
|
||||
}
|
||||
|
|
@ -1,74 +0,0 @@
|
|||
/**
|
||||
* Auto-index guardrail thresholds and pure decision logic (RFC #883 §7).
|
||||
*
|
||||
* The guardrail fires when a user is about to commit to a bulk indexing
|
||||
* action (curated tier change, large multi-file upload, etc.) that would
|
||||
* use a substantial amount of disk for embedding storage. It's a one-time
|
||||
* confirmation step at scary thresholds — it doesn't fire for ordinary
|
||||
* everyday operations. After the user confirms once for a given batch
|
||||
* the action proceeds as it would have without the guardrail.
|
||||
*
|
||||
* Thresholds are intentionally conservative to avoid surprise consumption
|
||||
* of a user's storage. Tweak both constants if the field experience
|
||||
* suggests we're nagging users too aggressively.
|
||||
*/
|
||||
|
||||
/** Absolute upper bound: estimates at or above this trip the guardrail. */
|
||||
export const GUARDRAIL_ABSOLUTE_BYTES = 50 * 1024 * 1024 * 1024 // 50 GB
|
||||
|
||||
/** Relative-to-free-disk bound: estimates >= 10% of free disk trip too. */
|
||||
export const GUARDRAIL_FREE_DISK_RATIO = 0.1
|
||||
|
||||
export type GuardrailReason =
|
||||
| {
|
||||
kind: 'over_absolute'
|
||||
estimateBytes: number
|
||||
thresholdBytes: number
|
||||
}
|
||||
| {
|
||||
kind: 'over_free_disk'
|
||||
estimateBytes: number
|
||||
freeBytes: number
|
||||
thresholdBytes: number
|
||||
}
|
||||
|
||||
export type GuardrailVerdict = {
|
||||
trips: boolean
|
||||
reasons: GuardrailReason[]
|
||||
}
|
||||
|
||||
/**
|
||||
* Decide whether a bulk indexing action should be gated behind the
|
||||
* guardrail modal. Caller passes the precomputed embedding-storage
|
||||
* estimate (from `KbRatioRegistry.estimateBatch` in #891 / #897) and
|
||||
* the free-disk figure from system info. Pass `freeBytes = 0` to skip
|
||||
* the relative-disk check when free space isn't known.
|
||||
*/
|
||||
export function evaluateGuardrail(input: {
|
||||
estimateBytes: number
|
||||
freeBytes: number
|
||||
}): GuardrailVerdict {
|
||||
const reasons: GuardrailReason[] = []
|
||||
|
||||
if (input.estimateBytes >= GUARDRAIL_ABSOLUTE_BYTES) {
|
||||
reasons.push({
|
||||
kind: 'over_absolute',
|
||||
estimateBytes: input.estimateBytes,
|
||||
thresholdBytes: GUARDRAIL_ABSOLUTE_BYTES,
|
||||
})
|
||||
}
|
||||
|
||||
if (input.freeBytes > 0) {
|
||||
const relativeThreshold = input.freeBytes * GUARDRAIL_FREE_DISK_RATIO
|
||||
if (input.estimateBytes >= relativeThreshold) {
|
||||
reasons.push({
|
||||
kind: 'over_free_disk',
|
||||
estimateBytes: input.estimateBytes,
|
||||
freeBytes: input.freeBytes,
|
||||
thresholdBytes: relativeThreshold,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
return { trips: reasons.length > 0, reasons }
|
||||
}
|
||||
|
|
@ -1,63 +0,0 @@
|
|||
import { computeJobHealth, type JobHealthStatus } from '../../app/utils/kb_job_health.js'
|
||||
|
||||
export { computeJobHealth, type JobHealthStatus } from '../../app/utils/kb_job_health.js'
|
||||
|
||||
/**
|
||||
* Visual presentation for each health status — pill color, dot color, and the
|
||||
* short label rendered alongside the dot. Kept in one place so backend health
|
||||
* decisions (`computeJobHealth`) and frontend rendering stay in sync.
|
||||
*/
|
||||
export const JOB_HEALTH_DISPLAY: Record<
|
||||
JobHealthStatus,
|
||||
{ dot: string; label: string; ariaLabel: string }
|
||||
> = {
|
||||
waiting: {
|
||||
dot: 'bg-gray-400 dark:bg-gray-500',
|
||||
label: 'Waiting',
|
||||
ariaLabel: 'Job is queued and waiting to start',
|
||||
},
|
||||
healthy: {
|
||||
dot: 'bg-green-500',
|
||||
label: 'Active',
|
||||
ariaLabel: 'Job is embedding at a normal rate',
|
||||
},
|
||||
slow: {
|
||||
dot: 'bg-yellow-500',
|
||||
label: 'Slow',
|
||||
ariaLabel: 'Job has not made progress for at least 2 minutes',
|
||||
},
|
||||
stalled: {
|
||||
dot: 'bg-red-500',
|
||||
label: 'Stalled',
|
||||
ariaLabel: 'Job has not made progress for at least 5 minutes',
|
||||
},
|
||||
failed: {
|
||||
dot: 'bg-red-700',
|
||||
label: 'Failed',
|
||||
ariaLabel: 'Job failed',
|
||||
},
|
||||
}
|
||||
|
||||
/**
|
||||
* Format a relative timestamp as "Xs ago", "Xm ago", "Xh ago" with sensible
|
||||
* thresholds for the KB Processing Queue's "Last activity" line.
|
||||
*/
|
||||
export function formatTimeAgo(timestampMs: number, now: number): string {
|
||||
const seconds = Math.max(0, Math.floor((now - timestampMs) / 1000))
|
||||
if (seconds < 5) return 'just now'
|
||||
if (seconds < 60) return `${seconds}s ago`
|
||||
const minutes = Math.floor(seconds / 60)
|
||||
if (minutes < 60) return `${minutes}m ago`
|
||||
const hours = Math.floor(minutes / 60)
|
||||
return `${hours}h ago`
|
||||
}
|
||||
|
||||
/**
|
||||
* Convenience wrapper that resolves a job's health status without the caller
|
||||
* having to remember to pass `now`. Mostly for ergonomic frontend use.
|
||||
*/
|
||||
export function computeJobHealthNow(
|
||||
input: Omit<Parameters<typeof computeJobHealth>[0], 'now'>
|
||||
): JobHealthStatus {
|
||||
return computeJobHealth({ ...input, now: Date.now() })
|
||||
}
|
||||
|
|
@ -106,7 +106,7 @@ const ADDITIONAL_TOOLS: Capability[] = [
|
|||
},
|
||||
]
|
||||
|
||||
type WizardStep = 1 | 2 | 3 | 4 | 5
|
||||
type WizardStep = 1 | 2 | 3 | 4
|
||||
|
||||
const CURATED_MAP_COLLECTIONS_KEY = 'curated-map-collections'
|
||||
const CURATED_CATEGORIES_KEY = 'curated-categories'
|
||||
|
|
@ -122,13 +122,6 @@ export default function EasySetupWizard(props: {
|
|||
const [selectedServices, setSelectedServices] = useState<string[]>([])
|
||||
const [selectedMapCollections, setSelectedMapCollections] = useState<string[]>([])
|
||||
const [selectedAiModels, setSelectedAiModels] = useState<string[]>([])
|
||||
// Auto-index policy for the AI Assistant Knowledge Base. Defaults to
|
||||
// 'Always' so a new user who keeps the default behavior gets the "just
|
||||
// works" experience — downloads become searchable automatically. Persisted
|
||||
// to KVStore['rag.defaultIngestPolicy'] on wizard submit (same key #894's
|
||||
// KB modal toggle reads/writes) so the JIT prompt at first chat sees a
|
||||
// decided policy and doesn't ask again.
|
||||
const [ingestPolicy, setIngestPolicy] = useState<'Always' | 'Manual'>('Always')
|
||||
const [isProcessing, setIsProcessing] = useState(false)
|
||||
const [showAdditionalTools, setShowAdditionalTools] = useState(false)
|
||||
const [remoteOllamaEnabled, setRemoteOllamaEnabled] = useState(
|
||||
|
|
@ -199,19 +192,6 @@ export default function EasySetupWizard(props: {
|
|||
// Services that are already installed
|
||||
const installedServices = props.system.services.filter((service) => service.installed)
|
||||
|
||||
// Canonical "is AI part of this user's setup?" predicate (RFC #883 / issue #905).
|
||||
// Single source consumed by step-indicator render, navigation skip logic, the
|
||||
// review summary, and handleFinish. The AI step renders if and only if this
|
||||
// is true; if false, the wizard collapses to 4 steps and the AI step is
|
||||
// skipped on both forward and back nav.
|
||||
const isAiInSetup = useMemo(
|
||||
() =>
|
||||
selectedServices.includes(SERVICE_NAMES.OLLAMA) ||
|
||||
installedServices.some((s) => s.service_name === SERVICE_NAMES.OLLAMA) ||
|
||||
remoteOllamaEnabled,
|
||||
[selectedServices, installedServices, remoteOllamaEnabled]
|
||||
)
|
||||
|
||||
const toggleMapCollection = (slug: string) => {
|
||||
setSelectedMapCollections((prev) =>
|
||||
prev.includes(slug) ? prev.filter((s) => s !== slug) : [...prev, slug]
|
||||
|
|
@ -326,29 +306,24 @@ export default function EasySetupWizard(props: {
|
|||
// Get primary disk/filesystem info for storage projection
|
||||
const storageInfo = getPrimaryDiskInfo(systemInfo?.disk, systemInfo?.fsSize)
|
||||
|
||||
// Final step number (4 when AI is off, 5 when AI is on). Centralizing this
|
||||
// here so canProceedToNextStep / handleNext / handleBack / the bottom-bar
|
||||
// Next-vs-Finish switch all read the same value.
|
||||
const finalStep: WizardStep = isAiInSetup ? 5 : 4
|
||||
|
||||
const canProceedToNextStep = () => {
|
||||
if (!isOnline) return false // Must be online to proceed
|
||||
// Every step before the review is skippable; the review step shows Finish, not Next.
|
||||
return currentStep < finalStep
|
||||
if (currentStep === 1) return true // Can skip app installation
|
||||
if (currentStep === 2) return true // Can skip map downloads
|
||||
if (currentStep === 3) return true // Can skip ZIM downloads
|
||||
return false
|
||||
}
|
||||
|
||||
const handleNext = () => {
|
||||
if (currentStep >= finalStep) return
|
||||
// Skip the AI step (4) on forward nav when isAiInSetup is false.
|
||||
const next = currentStep === 3 && !isAiInSetup ? 5 : currentStep + 1
|
||||
setCurrentStep(next as WizardStep)
|
||||
if (currentStep < 4) {
|
||||
setCurrentStep((prev) => (prev + 1) as WizardStep)
|
||||
}
|
||||
}
|
||||
|
||||
const handleBack = () => {
|
||||
if (currentStep <= 1) return
|
||||
// Skip the AI step (4) on back nav when isAiInSetup is false.
|
||||
const prev = currentStep === 5 && !isAiInSetup ? 3 : currentStep - 1
|
||||
setCurrentStep(prev as WizardStep)
|
||||
if (currentStep > 1) {
|
||||
setCurrentStep((prev) => (prev - 1) as WizardStep)
|
||||
}
|
||||
}
|
||||
|
||||
const handleFinish = async () => {
|
||||
|
|
@ -363,21 +338,6 @@ export default function EasySetupWizard(props: {
|
|||
setIsProcessing(true)
|
||||
|
||||
try {
|
||||
// Persist the auto-index policy choice before kicking off downloads so
|
||||
// any content that finishes during this same wizard run sees the right
|
||||
// policy. Skipped when AI is not in the user's setup; the KV stays null
|
||||
// and the first-chat JIT prompt (#899) handles the decision later if/when
|
||||
// the user enables AI. Uses the canonical isAiInSetup predicate so step
|
||||
// 3 / step 4 / step 5 / handleFinish never disagree (issue #905).
|
||||
if (isAiInSetup) {
|
||||
try {
|
||||
await api.updateSetting('rag.defaultIngestPolicy', ingestPolicy)
|
||||
} catch (err) {
|
||||
// Non-fatal: the user can still set the policy from the KB modal.
|
||||
console.warn('Could not persist ingest policy from wizard:', err)
|
||||
}
|
||||
}
|
||||
|
||||
// If using remote Ollama, configure it first before other installs
|
||||
if (remoteOllamaEnabled && remoteOllamaUrl) {
|
||||
const remoteResult = await api.configureRemoteOllama(remoteOllamaUrl)
|
||||
|
|
@ -470,25 +430,12 @@ export default function EasySetupWizard(props: {
|
|||
}, [])
|
||||
|
||||
const renderStepIndicator = () => {
|
||||
// `step` is the stable WizardStep value (1=Apps, 2=Maps, 3=Content,
|
||||
// 4=AI, 5=Review). `displayNumber` is the sequential position shown in
|
||||
// the dot (always 1..N) so users see "1 2 3 4" when AI is off and
|
||||
// "1 2 3 4 5" when AI is on, with no gap.
|
||||
const baseSteps: Array<{ step: WizardStep; label: string }> = isAiInSetup
|
||||
? [
|
||||
{ step: 1, label: 'Apps' },
|
||||
{ step: 2, label: 'Maps' },
|
||||
{ step: 3, label: 'Content' },
|
||||
{ step: 4, label: 'AI' },
|
||||
{ step: 5, label: 'Review' },
|
||||
]
|
||||
: [
|
||||
{ step: 1, label: 'Apps' },
|
||||
{ step: 2, label: 'Maps' },
|
||||
{ step: 3, label: 'Content' },
|
||||
{ step: 5, label: 'Review' },
|
||||
]
|
||||
const steps = baseSteps.map((s, idx) => ({ ...s, displayNumber: idx + 1 }))
|
||||
const steps = [
|
||||
{ number: 1, label: 'Apps' },
|
||||
{ number: 2, label: 'Maps' },
|
||||
{ number: 3, label: 'Content' },
|
||||
{ number: 4, label: 'Review' },
|
||||
]
|
||||
|
||||
return (
|
||||
<nav aria-label="Progress" className="px-6 pt-6">
|
||||
|
|
@ -497,8 +444,8 @@ export default function EasySetupWizard(props: {
|
|||
className="divide-y divide-border-default rounded-md md:flex md:divide-y-0 md:justify-between border border-desert-green"
|
||||
>
|
||||
{steps.map((step, stepIdx) => (
|
||||
<li key={step.step} className="relative md:flex-1 md:flex md:justify-center">
|
||||
{currentStep > step.step ? (
|
||||
<li key={step.number} className="relative md:flex-1 md:flex md:justify-center">
|
||||
{currentStep > step.number ? (
|
||||
<div className="group flex w-full items-center md:justify-center">
|
||||
<span className="flex items-center px-6 py-2 text-sm font-medium">
|
||||
<span className="flex size-10 shrink-0 items-center justify-center rounded-full bg-desert-green">
|
||||
|
|
@ -507,13 +454,13 @@ export default function EasySetupWizard(props: {
|
|||
<span className="ml-4 text-lg font-medium text-text-primary">{step.label}</span>
|
||||
</span>
|
||||
</div>
|
||||
) : currentStep === step.step ? (
|
||||
) : currentStep === step.number ? (
|
||||
<div
|
||||
aria-current="step"
|
||||
className="flex items-center px-6 py-2 text-sm font-medium md:justify-center"
|
||||
>
|
||||
<span className="flex size-10 shrink-0 items-center justify-center rounded-full bg-desert-green border-2 border-desert-green">
|
||||
<span className="text-white">{step.displayNumber}</span>
|
||||
<span className="text-white">{step.number}</span>
|
||||
</span>
|
||||
<span className="ml-4 text-lg font-medium text-desert-green">{step.label}</span>
|
||||
</div>
|
||||
|
|
@ -521,7 +468,7 @@ export default function EasySetupWizard(props: {
|
|||
<div className="group flex items-center md:justify-center">
|
||||
<span className="flex items-center px-6 py-2 text-sm font-medium">
|
||||
<span className="flex size-10 shrink-0 items-center justify-center rounded-full border-2 border-border-default">
|
||||
<span className="text-text-muted">{step.displayNumber}</span>
|
||||
<span className="text-text-muted">{step.number}</span>
|
||||
</span>
|
||||
<span className="ml-4 text-lg font-medium text-text-muted">{step.label}</span>
|
||||
</span>
|
||||
|
|
@ -539,7 +486,7 @@ export default function EasySetupWizard(props: {
|
|||
fill="none"
|
||||
viewBox="0 0 22 80"
|
||||
preserveAspectRatio="none"
|
||||
className={`size-full ${currentStep > step.step ? 'text-desert-green' : 'text-text-muted'}`}
|
||||
className={`size-full ${currentStep > step.number ? 'text-desert-green' : 'text-text-muted'}`}
|
||||
>
|
||||
<path
|
||||
d="M0 -2L20 40L0 82"
|
||||
|
|
@ -583,29 +530,6 @@ export default function EasySetupWizard(props: {
|
|||
if (isCapabilityInstalled(capability)) return
|
||||
|
||||
const isSelected = isCapabilitySelected(capability)
|
||||
|
||||
// Toggling AI off needs to clear dependent state that lives in the AI
|
||||
// step (model picks, ingest policy, remote Ollama config). If the user
|
||||
// has any of that filled in, confirm before discarding so a stray click
|
||||
// doesn't quietly wipe their setup.
|
||||
if (capability.id === 'ai' && isSelected) {
|
||||
const hasAiSelections =
|
||||
selectedAiModels.length > 0 ||
|
||||
ingestPolicy !== 'Always' ||
|
||||
remoteOllamaEnabled
|
||||
if (hasAiSelections) {
|
||||
const confirmed = window.confirm(
|
||||
"Turning off AI will discard your AI model picks, indexing policy, and remote Ollama configuration. Continue?"
|
||||
)
|
||||
if (!confirmed) return
|
||||
}
|
||||
setSelectedAiModels([])
|
||||
setIngestPolicy('Always')
|
||||
setRemoteOllamaEnabled(false)
|
||||
setRemoteOllamaUrl('')
|
||||
setRemoteOllamaUrlError(null)
|
||||
}
|
||||
|
||||
if (isSelected) {
|
||||
// Deselect all services in this capability
|
||||
setSelectedServices((prev) => prev.filter((s) => !capability.services.includes(s)))
|
||||
|
|
@ -887,11 +811,10 @@ export default function EasySetupWizard(props: {
|
|||
)
|
||||
|
||||
const renderStep3 = () => {
|
||||
// Issue #905: AI moved to its own conditional Step 4. Step 3 is now
|
||||
// content-only (Wikipedia + curated tiers), gated on the Information
|
||||
// capability (Kiwix).
|
||||
const isInformationSelected =
|
||||
selectedServices.includes(SERVICE_NAMES.KIWIX) ||
|
||||
// Check if AI or Information capabilities are selected OR already installed
|
||||
const isAiSelected = selectedServices.includes(SERVICE_NAMES.OLLAMA) ||
|
||||
installedServices.some((s) => s.service_name === SERVICE_NAMES.OLLAMA)
|
||||
const isInformationSelected = selectedServices.includes(SERVICE_NAMES.KIWIX) ||
|
||||
installedServices.some((s) => s.service_name === SERVICE_NAMES.KIWIX)
|
||||
|
||||
return (
|
||||
|
|
@ -899,31 +822,132 @@ export default function EasySetupWizard(props: {
|
|||
<div className="text-center mb-6">
|
||||
<h2 className="text-3xl font-bold text-text-primary mb-2">Choose Content</h2>
|
||||
<p className="text-text-secondary">
|
||||
{isInformationSelected
|
||||
? 'Select content categories for offline knowledge.'
|
||||
: 'Configure content for your selected capabilities.'}
|
||||
{isAiSelected && isInformationSelected
|
||||
? 'Select AI models and content categories for offline use.'
|
||||
: isAiSelected
|
||||
? 'Select AI models to download for offline use.'
|
||||
: isInformationSelected
|
||||
? 'Select content categories for offline knowledge.'
|
||||
: 'Configure content for your selected capabilities.'}
|
||||
</p>
|
||||
</div>
|
||||
|
||||
{/* Wikipedia Selection - Only show if Information capability is selected */}
|
||||
{isInformationSelected && (
|
||||
{/* AI Model Selection - Only show if AI capability is selected */}
|
||||
{isAiSelected && (
|
||||
<div className="mb-8">
|
||||
{isLoadingWikipedia ? (
|
||||
<div className="flex items-center gap-3 mb-4">
|
||||
<div className="w-10 h-10 rounded-full bg-surface-primary border border-border-subtle flex items-center justify-center shadow-sm">
|
||||
<IconCpu className="w-6 h-6 text-text-primary" />
|
||||
</div>
|
||||
<div>
|
||||
<h3 className="text-xl font-semibold text-text-primary">AI Models</h3>
|
||||
<p className="text-sm text-text-muted">Select models to download for offline AI</p>
|
||||
</div>
|
||||
</div>
|
||||
{remoteOllamaEnabled && remoteOllamaUrl ? (
|
||||
<Alert
|
||||
title="Remote Ollama selected"
|
||||
message="Models are managed on the remote machine. You can add models from Settings > AI Assistant after setup, note this is only supported when using Ollama, not LM Studio and other OpenAI API software."
|
||||
type="info"
|
||||
variant="bordered"
|
||||
/>
|
||||
) : isLoadingRecommendedModels ? (
|
||||
<div className="flex justify-center py-12">
|
||||
<LoadingSpinner />
|
||||
</div>
|
||||
) : wikipediaState && wikipediaState.options.length > 0 ? (
|
||||
<WikipediaSelector
|
||||
options={wikipediaState.options}
|
||||
currentSelection={wikipediaState.currentSelection}
|
||||
selectedOptionId={selectedWikipedia}
|
||||
onSelect={(optionId) => isOnline && setSelectedWikipedia(optionId)}
|
||||
disabled={!isOnline}
|
||||
/>
|
||||
) : null}
|
||||
) : recommendedModels && recommendedModels.length > 0 ? (
|
||||
<div className="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-3 gap-4">
|
||||
{recommendedModels.map((model) => (
|
||||
<div
|
||||
key={model.name}
|
||||
onClick={() => isOnline && toggleAiModel(model.name)}
|
||||
className={classNames(
|
||||
'p-4 rounded-lg border-2 transition-all cursor-pointer',
|
||||
selectedAiModels.includes(model.name)
|
||||
? 'border-desert-green bg-desert-green shadow-md'
|
||||
: 'border-desert-stone-light bg-surface-primary hover:border-desert-green hover:shadow-sm',
|
||||
!isOnline && 'opacity-50 cursor-not-allowed'
|
||||
)}
|
||||
>
|
||||
<div className="flex items-start justify-between">
|
||||
<div className="flex-1">
|
||||
<h4
|
||||
className={classNames(
|
||||
'text-lg font-semibold mb-1',
|
||||
selectedAiModels.includes(model.name) ? 'text-white' : 'text-text-primary'
|
||||
)}
|
||||
>
|
||||
{model.name}
|
||||
</h4>
|
||||
<p
|
||||
className={classNames(
|
||||
'text-sm mb-2',
|
||||
selectedAiModels.includes(model.name) ? 'text-white' : 'text-text-secondary'
|
||||
)}
|
||||
>
|
||||
{model.description}
|
||||
</p>
|
||||
{model.tags?.[0]?.size && (
|
||||
<div
|
||||
className={classNames(
|
||||
'text-xs',
|
||||
selectedAiModels.includes(model.name)
|
||||
? 'text-green-100'
|
||||
: 'text-text-muted'
|
||||
)}
|
||||
>
|
||||
Size: {model.tags[0].size}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
<div
|
||||
className={classNames(
|
||||
'ml-4 w-6 h-6 rounded-full border-2 flex items-center justify-center transition-all flex-shrink-0',
|
||||
selectedAiModels.includes(model.name)
|
||||
? 'border-white bg-white'
|
||||
: 'border-desert-stone'
|
||||
)}
|
||||
>
|
||||
{selectedAiModels.includes(model.name) && (
|
||||
<IconCheck size={16} className="text-desert-green" />
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
) : (
|
||||
<div className="text-center py-8 bg-surface-secondary rounded-lg">
|
||||
<p className="text-text-secondary">No recommended AI models available at this time.</p>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Wikipedia Selection - Only show if Information capability is selected */}
|
||||
{isInformationSelected && (
|
||||
<>
|
||||
{/* Divider between AI Models and Wikipedia */}
|
||||
{isAiSelected && <hr className="my-8 border-border-subtle" />}
|
||||
|
||||
<div className="mb-8">
|
||||
{isLoadingWikipedia ? (
|
||||
<div className="flex justify-center py-12">
|
||||
<LoadingSpinner />
|
||||
</div>
|
||||
) : wikipediaState && wikipediaState.options.length > 0 ? (
|
||||
<WikipediaSelector
|
||||
options={wikipediaState.options}
|
||||
currentSelection={wikipediaState.currentSelection}
|
||||
selectedOptionId={selectedWikipedia}
|
||||
onSelect={(optionId) => isOnline && setSelectedWikipedia(optionId)}
|
||||
disabled={!isOnline}
|
||||
/>
|
||||
) : null}
|
||||
</div>
|
||||
</>
|
||||
)}
|
||||
|
||||
{/* Curated Categories with Tiers - Only show if Information capability is selected */}
|
||||
{isInformationSelected && (
|
||||
<>
|
||||
|
|
@ -975,8 +999,8 @@ export default function EasySetupWizard(props: {
|
|||
</>
|
||||
)}
|
||||
|
||||
{/* Show message if no content-bearing capabilities are selected */}
|
||||
{!isInformationSelected && (
|
||||
{/* Show message if no capabilities requiring content are selected */}
|
||||
{!isAiSelected && !isInformationSelected && (
|
||||
<div className="text-center py-12">
|
||||
<p className="text-text-secondary text-lg">
|
||||
No content-based capabilities selected. You can skip this step or go back to select
|
||||
|
|
@ -989,149 +1013,6 @@ export default function EasySetupWizard(props: {
|
|||
}
|
||||
|
||||
const renderStep4 = () => {
|
||||
// AI step (issue #905). Only rendered when isAiInSetup is true; otherwise
|
||||
// the wizard's step array drops it and forward/back nav jumps Content → Review.
|
||||
return (
|
||||
<div className="space-y-6">
|
||||
<div className="text-center mb-6">
|
||||
<h2 className="text-3xl font-bold text-text-primary mb-2">Configure {aiAssistantName}</h2>
|
||||
<p className="text-text-secondary">
|
||||
Choose models to download and set how {aiAssistantName} handles new content.
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<div className="flex items-center gap-3 mb-4">
|
||||
<div className="w-10 h-10 rounded-full bg-surface-primary border border-border-subtle flex items-center justify-center shadow-sm">
|
||||
<IconCpu className="w-6 h-6 text-text-primary" />
|
||||
</div>
|
||||
<div>
|
||||
<h3 className="text-xl font-semibold text-text-primary">AI Models</h3>
|
||||
<p className="text-sm text-text-muted">Select models to download for offline AI</p>
|
||||
</div>
|
||||
</div>
|
||||
{remoteOllamaEnabled && remoteOllamaUrl ? (
|
||||
<Alert
|
||||
title="Remote Ollama selected"
|
||||
message="Models are managed on the remote machine. You can add models from Settings > AI Assistant after setup, note this is only supported when using Ollama, not LM Studio and other OpenAI API software."
|
||||
type="info"
|
||||
variant="bordered"
|
||||
/>
|
||||
) : isLoadingRecommendedModels ? (
|
||||
<div className="flex justify-center py-12">
|
||||
<LoadingSpinner />
|
||||
</div>
|
||||
) : recommendedModels && recommendedModels.length > 0 ? (
|
||||
<div className="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-3 gap-4">
|
||||
{recommendedModels.map((model) => (
|
||||
<div
|
||||
key={model.name}
|
||||
onClick={() => isOnline && toggleAiModel(model.name)}
|
||||
className={classNames(
|
||||
'p-4 rounded-lg border-2 transition-all cursor-pointer',
|
||||
selectedAiModels.includes(model.name)
|
||||
? 'border-desert-green bg-desert-green shadow-md'
|
||||
: 'border-desert-stone-light bg-surface-primary hover:border-desert-green hover:shadow-sm',
|
||||
!isOnline && 'opacity-50 cursor-not-allowed'
|
||||
)}
|
||||
>
|
||||
<div className="flex items-start justify-between">
|
||||
<div className="flex-1">
|
||||
<h4
|
||||
className={classNames(
|
||||
'text-lg font-semibold mb-1',
|
||||
selectedAiModels.includes(model.name) ? 'text-white' : 'text-text-primary'
|
||||
)}
|
||||
>
|
||||
{model.name}
|
||||
</h4>
|
||||
<p
|
||||
className={classNames(
|
||||
'text-sm mb-2',
|
||||
selectedAiModels.includes(model.name) ? 'text-white' : 'text-text-secondary'
|
||||
)}
|
||||
>
|
||||
{model.description}
|
||||
</p>
|
||||
{model.tags?.[0]?.size && (
|
||||
<div
|
||||
className={classNames(
|
||||
'text-xs',
|
||||
selectedAiModels.includes(model.name)
|
||||
? 'text-green-100'
|
||||
: 'text-text-muted'
|
||||
)}
|
||||
>
|
||||
Size: {model.tags[0].size}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
<div
|
||||
className={classNames(
|
||||
'ml-4 w-6 h-6 rounded-full border-2 flex items-center justify-center transition-all flex-shrink-0',
|
||||
selectedAiModels.includes(model.name)
|
||||
? 'border-white bg-white'
|
||||
: 'border-desert-stone'
|
||||
)}
|
||||
>
|
||||
{selectedAiModels.includes(model.name) && (
|
||||
<IconCheck size={16} className="text-desert-green" />
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
) : (
|
||||
<div className="text-center py-8 bg-surface-secondary rounded-lg">
|
||||
<p className="text-text-secondary">No recommended AI models available at this time.</p>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Auto-index policy — choose now so the JIT prompt at first chat
|
||||
doesn't ask again (RFC #883 Phase 3 task 13). Persisted to
|
||||
rag.defaultIngestPolicy on wizard submit. */}
|
||||
<div className="mt-8 pt-6 border-t border-border-subtle">
|
||||
<h4 className="text-lg font-semibold text-text-primary mb-1">
|
||||
Auto-index new content for {aiAssistantName}?
|
||||
</h4>
|
||||
<p className="text-sm text-text-muted mb-4">
|
||||
When you add new ZIMs, documents, or curated content, should {aiAssistantName} index them automatically so it can search them while answering your questions?
|
||||
</p>
|
||||
<div className="inline-flex rounded-md border border-border-default overflow-hidden" role="group">
|
||||
<button
|
||||
type="button"
|
||||
onClick={() => setIngestPolicy('Always')}
|
||||
className={classNames(
|
||||
'px-5 py-2 text-sm font-medium transition-colors',
|
||||
ingestPolicy === 'Always'
|
||||
? 'bg-desert-green text-white'
|
||||
: 'bg-surface-primary text-text-secondary hover:bg-surface-secondary'
|
||||
)}
|
||||
>
|
||||
Yes, always
|
||||
</button>
|
||||
<button
|
||||
type="button"
|
||||
onClick={() => setIngestPolicy('Manual')}
|
||||
className={classNames(
|
||||
'px-5 py-2 text-sm font-medium transition-colors border-l border-border-default',
|
||||
ingestPolicy === 'Manual'
|
||||
? 'bg-desert-green text-white'
|
||||
: 'bg-surface-primary text-text-secondary hover:bg-surface-secondary'
|
||||
)}
|
||||
>
|
||||
Ask me first
|
||||
</button>
|
||||
</div>
|
||||
<p className="text-xs text-text-muted mt-3">
|
||||
You can change this any time from the Knowledge Base panel inside AI Chat.
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
const renderStep5 = () => {
|
||||
const hasSelections =
|
||||
selectedServices.length > 0 ||
|
||||
selectedMapCollections.length > 0 ||
|
||||
|
|
@ -1276,25 +1157,6 @@ export default function EasySetupWizard(props: {
|
|||
</div>
|
||||
)}
|
||||
|
||||
{isAiInSetup && (
|
||||
<div className="bg-surface-primary rounded-lg border-2 border-desert-stone-light p-6">
|
||||
<h3 className="text-xl font-semibold text-text-primary mb-2">
|
||||
Auto-index Setting
|
||||
</h3>
|
||||
<p className="text-text-secondary text-sm">
|
||||
{ingestPolicy === 'Always' ? (
|
||||
<>
|
||||
New content will be <strong>indexed automatically</strong> as it arrives so {aiAssistantName} can search it.
|
||||
</>
|
||||
) : (
|
||||
<>
|
||||
New content will <strong>wait for you to opt in</strong> from the Knowledge Base panel before {aiAssistantName} indexes it.
|
||||
</>
|
||||
)}
|
||||
</p>
|
||||
</div>
|
||||
)}
|
||||
|
||||
<Alert
|
||||
title="Ready to Start"
|
||||
message="Click 'Complete Setup' to begin installing apps and downloading content. This may take some time depending on your internet connection and the size of the downloads."
|
||||
|
|
@ -1335,8 +1197,7 @@ export default function EasySetupWizard(props: {
|
|||
{currentStep === 1 && renderStep1()}
|
||||
{currentStep === 2 && renderStep2()}
|
||||
{currentStep === 3 && renderStep3()}
|
||||
{currentStep === 4 && isAiInSetup && renderStep4()}
|
||||
{currentStep === 5 && renderStep5()}
|
||||
{currentStep === 4 && renderStep4()}
|
||||
|
||||
<div className="flex justify-between mt-8 pt-4 border-t border-desert-stone-light">
|
||||
<div className="flex space-x-4 items-center">
|
||||
|
|
@ -1374,7 +1235,7 @@ export default function EasySetupWizard(props: {
|
|||
Cancel & Go to Home
|
||||
</StyledButton>
|
||||
|
||||
{currentStep < finalStep ? (
|
||||
{currentStep < 4 ? (
|
||||
<StyledButton
|
||||
onClick={handleNext}
|
||||
disabled={!canProceedToNextStep() || isProcessing}
|
||||
|
|
|
|||
|
|
@ -138,7 +138,7 @@ export default function MapsManager(props: {
|
|||
|
||||
try {
|
||||
setDeletingFileKey(file.key)
|
||||
await api.deleteMapRegionFile(file.name)
|
||||
await api.deleteMapRegionFile(file.key)
|
||||
addNotification({
|
||||
type: 'success',
|
||||
message: `${file.name} has been deleted.`,
|
||||
|
|
|
|||
188
admin/package-lock.json
generated
188
admin/package-lock.json
generated
|
|
@ -9,95 +9,94 @@
|
|||
"version": "0.0.0",
|
||||
"license": "ISC",
|
||||
"dependencies": {
|
||||
"@adonisjs/auth": "9.6.0",
|
||||
"@adonisjs/core": "6.19.3",
|
||||
"@adonisjs/cors": "2.2.1",
|
||||
"@adonisjs/inertia": "3.1.1",
|
||||
"@adonisjs/lucid": "21.8.2",
|
||||
"@adonisjs/session": "7.7.1",
|
||||
"@adonisjs/shield": "8.2.0",
|
||||
"@adonisjs/static": "1.1.1",
|
||||
"@adonisjs/transmit": "2.0.2",
|
||||
"@adonisjs/transmit-client": "1.1.0",
|
||||
"@adonisjs/vite": "4.0.0",
|
||||
"@chonkiejs/core": "0.0.7",
|
||||
"@headlessui/react": "2.2.9",
|
||||
"@inertiajs/react": "2.3.13",
|
||||
"@markdoc/markdoc": "0.5.4",
|
||||
"@openzim/libzim": "4.0.0",
|
||||
"@protomaps/basemaps": "5.7.0",
|
||||
"@qdrant/js-client-rest": "1.16.2",
|
||||
"@tabler/icons-react": "3.36.1",
|
||||
"@tailwindcss/vite": "4.1.18",
|
||||
"@tanstack/react-query": "5.90.20",
|
||||
"@tanstack/react-query-devtools": "5.91.3",
|
||||
"@tanstack/react-virtual": "3.13.18",
|
||||
"@uppy/core": "5.2.0",
|
||||
"@uppy/dashboard": "5.1.0",
|
||||
"@uppy/react": "5.1.1",
|
||||
"@vinejs/vine": "3.0.1",
|
||||
"@vitejs/plugin-react": "4.7.0",
|
||||
"autoprefixer": "10.4.24",
|
||||
"axios": "1.15.0",
|
||||
"better-sqlite3": "12.6.2",
|
||||
"bullmq": "5.67.2",
|
||||
"cheerio": "1.2.0",
|
||||
"compression": "1.8.1",
|
||||
"dockerode": "4.0.9",
|
||||
"edge.js": "6.4.0",
|
||||
"fast-xml-parser": "5.5.9",
|
||||
"fuse.js": "7.1.0",
|
||||
"ipaddr.js": "^2.4.0",
|
||||
"jszip": "3.10.1",
|
||||
"luxon": "3.7.2",
|
||||
"maplibre-gl": "4.7.1",
|
||||
"mysql2": "3.16.2",
|
||||
"ollama": "0.6.3",
|
||||
"openai": "6.27.0",
|
||||
"pdf-parse": "2.4.5",
|
||||
"pdf2pic": "3.2.0",
|
||||
"pino-pretty": "13.1.3",
|
||||
"pmtiles": "4.4.0",
|
||||
"postcss": "8.5.6",
|
||||
"react": "19.2.4",
|
||||
"react-adonis-transmit": "1.0.1",
|
||||
"react-dom": "19.2.4",
|
||||
"react-map-gl": "8.1.0",
|
||||
"react-markdown": "10.1.0",
|
||||
"reflect-metadata": "0.2.2",
|
||||
"remark-gfm": "4.0.1",
|
||||
"sharp": "0.34.5",
|
||||
"stopword": "3.1.5",
|
||||
"systeminformation": "5.31.0",
|
||||
"tailwindcss": "4.2.2",
|
||||
"tar": "7.5.11",
|
||||
"tesseract.js": "7.0.0",
|
||||
"url-join": "5.0.0",
|
||||
"yaml": "2.8.3"
|
||||
"@adonisjs/auth": "^9.4.0",
|
||||
"@adonisjs/core": "^6.18.0",
|
||||
"@adonisjs/cors": "^2.2.1",
|
||||
"@adonisjs/inertia": "^3.1.1",
|
||||
"@adonisjs/lucid": "^21.8.2",
|
||||
"@adonisjs/session": "^7.5.1",
|
||||
"@adonisjs/shield": "^8.2.0",
|
||||
"@adonisjs/static": "^1.1.1",
|
||||
"@adonisjs/transmit": "^2.0.2",
|
||||
"@adonisjs/transmit-client": "^1.0.0",
|
||||
"@adonisjs/vite": "^4.0.0",
|
||||
"@chonkiejs/core": "^0.0.7",
|
||||
"@headlessui/react": "^2.2.4",
|
||||
"@inertiajs/react": "^2.0.13",
|
||||
"@markdoc/markdoc": "^0.5.2",
|
||||
"@openzim/libzim": "^4.0.0",
|
||||
"@protomaps/basemaps": "^5.7.0",
|
||||
"@qdrant/js-client-rest": "^1.16.2",
|
||||
"@tabler/icons-react": "^3.34.0",
|
||||
"@tailwindcss/vite": "^4.1.10",
|
||||
"@tanstack/react-query": "^5.81.5",
|
||||
"@tanstack/react-query-devtools": "^5.83.0",
|
||||
"@tanstack/react-virtual": "^3.13.12",
|
||||
"@uppy/core": "^5.2.0",
|
||||
"@uppy/dashboard": "^5.1.0",
|
||||
"@uppy/react": "^5.1.1",
|
||||
"@vinejs/vine": "^3.0.1",
|
||||
"@vitejs/plugin-react": "^4.6.0",
|
||||
"autoprefixer": "^10.4.21",
|
||||
"axios": "^1.15.0",
|
||||
"better-sqlite3": "^12.1.1",
|
||||
"bullmq": "^5.65.1",
|
||||
"cheerio": "^1.2.0",
|
||||
"compression": "^1.8.1",
|
||||
"dockerode": "^4.0.7",
|
||||
"edge.js": "^6.2.1",
|
||||
"fast-xml-parser": "^5.5.7",
|
||||
"fuse.js": "^7.1.0",
|
||||
"jszip": "^3.10.1",
|
||||
"luxon": "^3.6.1",
|
||||
"maplibre-gl": "^4.7.1",
|
||||
"mysql2": "^3.14.1",
|
||||
"ollama": "^0.6.3",
|
||||
"openai": "^6.27.0",
|
||||
"pdf-parse": "^2.4.5",
|
||||
"pdf2pic": "^3.2.0",
|
||||
"pino-pretty": "^13.0.0",
|
||||
"pmtiles": "^4.4.0",
|
||||
"postcss": "^8.5.6",
|
||||
"react": "^19.1.0",
|
||||
"react-adonis-transmit": "^1.0.1",
|
||||
"react-dom": "^19.1.0",
|
||||
"react-map-gl": "^8.1.0",
|
||||
"react-markdown": "^10.1.0",
|
||||
"reflect-metadata": "^0.2.2",
|
||||
"remark-gfm": "^4.0.1",
|
||||
"sharp": "^0.34.5",
|
||||
"stopword": "^3.1.5",
|
||||
"systeminformation": "^5.31.0",
|
||||
"tailwindcss": "^4.2.1",
|
||||
"tar": "^7.5.11",
|
||||
"tesseract.js": "^7.0.0",
|
||||
"url-join": "^5.0.0",
|
||||
"yaml": "^2.8.3"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@adonisjs/assembler": "7.8.2",
|
||||
"@adonisjs/eslint-config": "2.1.2",
|
||||
"@adonisjs/prettier-config": "1.4.5",
|
||||
"@adonisjs/tsconfig": "1.4.1",
|
||||
"@japa/assert": "4.2.0",
|
||||
"@japa/plugin-adonisjs": "4.0.0",
|
||||
"@japa/runner": "4.5.0",
|
||||
"@adonisjs/assembler": "^7.8.2",
|
||||
"@adonisjs/eslint-config": "^2.0.0",
|
||||
"@adonisjs/prettier-config": "^1.4.4",
|
||||
"@adonisjs/tsconfig": "^1.4.0",
|
||||
"@japa/assert": "^4.0.1",
|
||||
"@japa/plugin-adonisjs": "^4.0.0",
|
||||
"@japa/runner": "^4.2.0",
|
||||
"@swc/core": "1.11.24",
|
||||
"@tanstack/eslint-plugin-query": "5.91.4",
|
||||
"@types/compression": "1.8.1",
|
||||
"@types/dockerode": "4.0.1",
|
||||
"@types/luxon": "3.7.1",
|
||||
"@types/node": "22.19.7",
|
||||
"@types/react": "19.2.10",
|
||||
"@types/react-dom": "19.2.3",
|
||||
"@types/stopword": "2.0.3",
|
||||
"eslint": "9.39.2",
|
||||
"hot-hook": "0.4.0",
|
||||
"prettier": "3.8.1",
|
||||
"ts-node-maintained": "10.9.6",
|
||||
"typescript": "5.8.3",
|
||||
"vite": "6.4.2"
|
||||
"@tanstack/eslint-plugin-query": "^5.81.2",
|
||||
"@types/compression": "^1.8.1",
|
||||
"@types/dockerode": "^4.0.1",
|
||||
"@types/luxon": "^3.6.2",
|
||||
"@types/node": "^22.15.18",
|
||||
"@types/react": "^19.1.8",
|
||||
"@types/react-dom": "^19.1.6",
|
||||
"@types/stopword": "^2.0.3",
|
||||
"eslint": "^9.26.0",
|
||||
"hot-hook": "^0.4.0",
|
||||
"prettier": "^3.5.3",
|
||||
"ts-node-maintained": "^10.9.5",
|
||||
"typescript": "~5.8.3",
|
||||
"vite": "^6.4.2"
|
||||
}
|
||||
},
|
||||
"node_modules/@adobe/css-tools": {
|
||||
|
|
@ -10100,12 +10099,12 @@
|
|||
}
|
||||
},
|
||||
"node_modules/ipaddr.js": {
|
||||
"version": "2.4.0",
|
||||
"resolved": "https://registry.npmjs.org/ipaddr.js/-/ipaddr.js-2.4.0.tgz",
|
||||
"integrity": "sha512-9VGk3HGanVE6JoZXHiCpnGy5X0jYDnN4EA4lntFPj+1vIWlFhIylq2CrrCOJH9EAhc5CYhq18F2Av2tgoAPsYQ==",
|
||||
"version": "1.9.1",
|
||||
"resolved": "https://registry.npmjs.org/ipaddr.js/-/ipaddr.js-1.9.1.tgz",
|
||||
"integrity": "sha512-0KI/607xoxSToH7GjN1FfSbLoU0+btTicjsQSWQlh/hZykN8KpmMf7uYwPW3R+akZ6R/w18ZlXSHBYXiYUPO3g==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">= 10"
|
||||
"node": ">= 0.10"
|
||||
}
|
||||
},
|
||||
"node_modules/is-alphabetical": {
|
||||
|
|
@ -13791,15 +13790,6 @@
|
|||
"node": ">= 0.10"
|
||||
}
|
||||
},
|
||||
"node_modules/proxy-addr/node_modules/ipaddr.js": {
|
||||
"version": "1.9.1",
|
||||
"resolved": "https://registry.npmjs.org/ipaddr.js/-/ipaddr.js-1.9.1.tgz",
|
||||
"integrity": "sha512-0KI/607xoxSToH7GjN1FfSbLoU0+btTicjsQSWQlh/hZykN8KpmMf7uYwPW3R+akZ6R/w18ZlXSHBYXiYUPO3g==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">= 0.10"
|
||||
}
|
||||
},
|
||||
"node_modules/proxy-from-env": {
|
||||
"version": "2.1.0",
|
||||
"resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-2.1.0.tgz",
|
||||
|
|
|
|||
|
|
@ -100,7 +100,6 @@
|
|||
"edge.js": "6.4.0",
|
||||
"fast-xml-parser": "5.5.9",
|
||||
"fuse.js": "7.1.0",
|
||||
"ipaddr.js": "2.4.0",
|
||||
"jszip": "3.10.1",
|
||||
"luxon": "3.7.2",
|
||||
"maplibre-gl": "4.7.1",
|
||||
|
|
|
|||
|
|
@ -1,122 +0,0 @@
|
|||
import logger from '@adonisjs/core/services/logger'
|
||||
import type { ApplicationService } from '@adonisjs/core/types'
|
||||
|
||||
/**
|
||||
* Auto-remediates NVIDIA GPU passthrough loss after admin / host restart.
|
||||
*
|
||||
* After an update or container recreate, nomad_ollama's HostConfig.DeviceRequests
|
||||
* still lists the nvidia driver, but the NVIDIA Container Toolkit binding inside
|
||||
* the container is torn. `nvidia-smi` inside the container returns
|
||||
* "Failed to initialize NVML: Unknown Error" and Ollama silently falls back to
|
||||
* CPU inference. PR #208 added detection + a one-click "Fix: Reinstall AI Assistant"
|
||||
* banner. This provider does that click automatically on admin boot when the
|
||||
* condition is detected.
|
||||
*
|
||||
* Guards:
|
||||
* - NVIDIA-only. AMD passthrough_failed has a different fix path (HSA override
|
||||
* handling in PR #804) and is left to the user.
|
||||
* - One-shot per admin boot. The provider runs once on startup; if the recreate
|
||||
* itself fails the banner remains as a fallback.
|
||||
* - Opt-out via KV `ai.autoFixGpuPassthrough = false`.
|
||||
* - Skipped entirely when no NVIDIA runtime is registered with Docker.
|
||||
*/
|
||||
export default class GpuPassthroughRemediationProvider {
|
||||
constructor(protected app: ApplicationService) {}
|
||||
|
||||
async boot() {
|
||||
if (this.app.getEnvironment() !== 'web') return
|
||||
|
||||
setImmediate(async () => {
|
||||
try {
|
||||
const KVStore = (await import('#models/kv_store')).default
|
||||
const { DockerService } = await import('#services/docker_service')
|
||||
const { SERVICE_NAMES } = await import('../constants/service_names.js')
|
||||
const Docker = (await import('dockerode')).default
|
||||
|
||||
const enabledRaw = await KVStore.getValue('ai.autoFixGpuPassthrough')
|
||||
if (String(enabledRaw) === 'false') {
|
||||
logger.info(
|
||||
'[GpuPassthroughRemediationProvider] Auto-fix disabled via KV — skipping.'
|
||||
)
|
||||
return
|
||||
}
|
||||
|
||||
const docker = new Docker({ socketPath: '/var/run/docker.sock' })
|
||||
const dockerInfo = await docker.info()
|
||||
const runtimes = dockerInfo.Runtimes || {}
|
||||
const hasNvidiaRuntime = 'nvidia' in runtimes
|
||||
|
||||
if (!hasNvidiaRuntime) {
|
||||
logger.info(
|
||||
'[GpuPassthroughRemediationProvider] No NVIDIA runtime registered — skipping.'
|
||||
)
|
||||
return
|
||||
}
|
||||
|
||||
const containers = await docker.listContainers({ all: false })
|
||||
const ollama = containers.find((c) => c.Names.includes(`/${SERVICE_NAMES.OLLAMA}`))
|
||||
|
||||
if (!ollama) {
|
||||
logger.info(
|
||||
'[GpuPassthroughRemediationProvider] nomad_ollama not running — skipping.'
|
||||
)
|
||||
return
|
||||
}
|
||||
|
||||
// Probe: exec nvidia-smi inside the Ollama container. NVML init failure
|
||||
// is the signature of a broken passthrough that DeviceRequests can't see.
|
||||
const container = docker.getContainer(ollama.Id)
|
||||
const exec = await container.exec({
|
||||
Cmd: ['nvidia-smi', '--query-gpu=name', '--format=csv,noheader'],
|
||||
AttachStdout: true,
|
||||
AttachStderr: true,
|
||||
})
|
||||
const stream = await exec.start({ Tty: true })
|
||||
const output = await new Promise<string>((resolve) => {
|
||||
let buf = ''
|
||||
const timer = setTimeout(() => resolve(buf || 'TIMEOUT'), 8000)
|
||||
stream.on('data', (chunk: Buffer) => (buf += chunk.toString('utf8')))
|
||||
stream.on('end', () => {
|
||||
clearTimeout(timer)
|
||||
resolve(buf)
|
||||
})
|
||||
})
|
||||
|
||||
const passthroughBroken =
|
||||
/Failed to initialize NVML|Unknown Error|TIMEOUT/i.test(output) ||
|
||||
!/[A-Za-z]/.test(output)
|
||||
|
||||
if (!passthroughBroken) {
|
||||
logger.info(
|
||||
'[GpuPassthroughRemediationProvider] NVIDIA passthrough healthy — no action needed.'
|
||||
)
|
||||
return
|
||||
}
|
||||
|
||||
logger.warn(
|
||||
'[GpuPassthroughRemediationProvider] NVIDIA passthrough broken (nvidia-smi inside nomad_ollama failed). ' +
|
||||
'Auto-reinstalling nomad_ollama; volumes and installed models are preserved.'
|
||||
)
|
||||
|
||||
const dockerService = new DockerService()
|
||||
const result = await dockerService.forceReinstall(SERVICE_NAMES.OLLAMA)
|
||||
|
||||
if (result.success) {
|
||||
await KVStore.setValue('gpu.autoRemediatedAt', new Date().toISOString())
|
||||
logger.info(
|
||||
'[GpuPassthroughRemediationProvider] nomad_ollama force-reinstall completed successfully.'
|
||||
)
|
||||
} else {
|
||||
logger.error(
|
||||
`[GpuPassthroughRemediationProvider] Force-reinstall failed: ${result.message}. ` +
|
||||
'User can still click the "Fix: Reinstall AI Assistant" banner manually.'
|
||||
)
|
||||
}
|
||||
} catch (err: any) {
|
||||
logger.error(
|
||||
`[GpuPassthroughRemediationProvider] Auto-remediation check failed: ${err?.message ?? err}`
|
||||
)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
|
@ -118,7 +118,6 @@ router
|
|||
router.post('/models', [OllamaController, 'dispatchModelDownload'])
|
||||
router.delete('/models', [OllamaController, 'deleteModel'])
|
||||
router.get('/installed-models', [OllamaController, 'installedModels'])
|
||||
router.post('/unload-chat-models', [OllamaController, 'unloadChatModels'])
|
||||
router.post('/configure-remote', [OllamaController, 'configureRemote'])
|
||||
router.get('/remote-status', [OllamaController, 'remoteStatus'])
|
||||
})
|
||||
|
|
@ -142,18 +141,12 @@ router
|
|||
.group(() => {
|
||||
router.post('/upload', [RagController, 'upload'])
|
||||
router.get('/files', [RagController, 'getStoredFiles'])
|
||||
router.get('/file-warnings', [RagController, 'getFileWarnings'])
|
||||
router.delete('/files', [RagController, 'deleteFile'])
|
||||
router.post('/files/embed', [RagController, 'embedFile'])
|
||||
router.get('/active-jobs', [RagController, 'getActiveJobs'])
|
||||
router.get('/failed-jobs', [RagController, 'getFailedJobs'])
|
||||
router.delete('/failed-jobs', [RagController, 'cleanupFailedJobs'])
|
||||
router.get('/job-status', [RagController, 'getJobStatus'])
|
||||
router.post('/sync', [RagController, 'scanAndSync'])
|
||||
router.post('/re-embed-all', [RagController, 'reembedAll'])
|
||||
router.post('/reset-and-rebuild', [RagController, 'resetAndRebuild'])
|
||||
router.post('/estimate-batch', [RagController, 'estimateBatch'])
|
||||
router.get('/policy-prompt-state', [RagController, 'policyPromptState'])
|
||||
router.get('/health', [RagController, 'health'])
|
||||
})
|
||||
.prefix('/api/rag')
|
||||
|
|
|
|||
|
|
@ -1,57 +0,0 @@
|
|||
import * as assert from 'node:assert/strict'
|
||||
import { test } from 'node:test'
|
||||
|
||||
import { assertNotCloudMetadataUrl } from '../../app/validators/common.js'
|
||||
|
||||
const expectBlocked = (url: string) => {
|
||||
assert.throws(() => assertNotCloudMetadataUrl(url), /instance metadata|http or https/)
|
||||
}
|
||||
|
||||
const expectAllowed = (url: string) => {
|
||||
assert.doesNotThrow(() => assertNotCloudMetadataUrl(url))
|
||||
}
|
||||
|
||||
test('blocks plain IPv4 IMDS', () => {
|
||||
expectBlocked('http://169.254.169.254/latest/meta-data/')
|
||||
})
|
||||
|
||||
test('blocks IPv6 EC2 IMDS', () => {
|
||||
expectBlocked('http://[fd00:ec2::254]/')
|
||||
})
|
||||
|
||||
test('blocks fully-expanded IPv6 EC2 IMDS', () => {
|
||||
expectBlocked('http://[fd00:ec2:0:0:0:0:0:254]/')
|
||||
})
|
||||
|
||||
test('blocks IPv4-mapped IPv6 form of IMDS (dotted)', () => {
|
||||
expectBlocked('http://[::ffff:169.254.169.254]/')
|
||||
})
|
||||
|
||||
test('blocks IPv4-mapped IPv6 form of IMDS (hex)', () => {
|
||||
expectBlocked('http://[::ffff:a9fe:a9fe]/')
|
||||
})
|
||||
|
||||
test('blocks fully-expanded IPv4-mapped IPv6 form of IMDS', () => {
|
||||
expectBlocked('http://[0:0:0:0:0:ffff:a9fe:a9fe]/')
|
||||
})
|
||||
|
||||
test('blocks non-http(s) schemes', () => {
|
||||
expectBlocked('file:///etc/passwd')
|
||||
expectBlocked('gopher://169.254.169.254/')
|
||||
})
|
||||
|
||||
test('allows LAN / loopback / RFC1918 hosts (intentional for remote-ollama use case)', () => {
|
||||
expectAllowed('http://127.0.0.1:11434/')
|
||||
expectAllowed('http://192.168.1.10:11434/')
|
||||
expectAllowed('http://10.0.0.5:11434/')
|
||||
expectAllowed('http://[::1]:11434/')
|
||||
})
|
||||
|
||||
test('allows DNS hostnames', () => {
|
||||
expectAllowed('http://ollama.lan:11434/')
|
||||
expectAllowed('https://api.example.com/v1')
|
||||
})
|
||||
|
||||
test('allows other link-local IPv4 addresses (not the metadata IP)', () => {
|
||||
expectAllowed('http://169.254.1.1/')
|
||||
})
|
||||
|
|
@ -1,108 +0,0 @@
|
|||
import * as assert from 'node:assert/strict'
|
||||
import { test } from 'node:test'
|
||||
|
||||
import {
|
||||
classifyKbFile,
|
||||
groupAndSortKbFiles,
|
||||
sourceToDisplayName,
|
||||
} from '../../inertia/lib/kb_file_grouping.js'
|
||||
import type { StoredFileInfo } from '../../types/rag.js'
|
||||
|
||||
/** Wrap source paths into the minimal StoredFileInfo shape that
|
||||
* `groupAndSortKbFiles` now expects. State + chunk count are irrelevant to
|
||||
* grouping/sorting behavior; the per-file state-pill rendering is exercised
|
||||
* separately in the modal's component tests (added in the follow-up PR). */
|
||||
const asInfos = (sources: string[]): StoredFileInfo[] =>
|
||||
sources.map((source) => ({ source, state: null, chunksEmbedded: 0 }))
|
||||
|
||||
test('classifyKbFile distinguishes ZIM, upload, admin_docs, and other', () => {
|
||||
assert.equal(
|
||||
classifyKbFile('/app/storage/zim/devdocs_en_python_2026-02.zim'),
|
||||
'zim'
|
||||
)
|
||||
assert.equal(
|
||||
classifyKbFile('/app/storage/kb_uploads/federalist.txt-8cc4ec95aa8f.txt'),
|
||||
'upload'
|
||||
)
|
||||
assert.equal(classifyKbFile('/app/docs/release-notes.md'), 'admin_docs')
|
||||
assert.equal(classifyKbFile('/app/README.md'), 'admin_docs')
|
||||
assert.equal(classifyKbFile('/unexpected/path/file.txt'), 'other')
|
||||
})
|
||||
|
||||
test('classifyKbFile does not match /app/READMEs that are not the bundled one', () => {
|
||||
assert.equal(classifyKbFile('/app/README.md.bak'), 'other')
|
||||
})
|
||||
|
||||
test('sourceToDisplayName returns the basename', () => {
|
||||
assert.equal(
|
||||
sourceToDisplayName('/app/storage/zim/devdocs_en_python_2026-02.zim'),
|
||||
'devdocs_en_python_2026-02.zim'
|
||||
)
|
||||
assert.equal(sourceToDisplayName('/app/docs/release-notes.md'), 'release-notes.md')
|
||||
})
|
||||
|
||||
test('groupAndSortKbFiles collapses all admin docs into a single row', () => {
|
||||
const groups = groupAndSortKbFiles(asInfos([
|
||||
'/app/docs/release-notes.md',
|
||||
'/app/docs/getting-started.md',
|
||||
'/app/docs/maps.md',
|
||||
'/app/README.md',
|
||||
]))
|
||||
|
||||
assert.equal(groups.length, 1)
|
||||
assert.equal(groups[0].bucket, 'admin_docs')
|
||||
assert.equal(groups[0].count, 4)
|
||||
assert.equal(groups[0].displayName, 'Project NOMAD documentation · 4 files')
|
||||
assert.deepEqual(groups[0].members.sort(), [
|
||||
'/app/README.md',
|
||||
'/app/docs/getting-started.md',
|
||||
'/app/docs/maps.md',
|
||||
'/app/docs/release-notes.md',
|
||||
])
|
||||
})
|
||||
|
||||
test('groupAndSortKbFiles orders buckets ZIM → upload → admin_docs → other', () => {
|
||||
const groups = groupAndSortKbFiles(asInfos([
|
||||
'/app/docs/release-notes.md',
|
||||
'/unexpected/foo.txt',
|
||||
'/app/storage/kb_uploads/upload.pdf',
|
||||
'/app/storage/zim/devdocs.zim',
|
||||
]))
|
||||
|
||||
assert.deepEqual(
|
||||
groups.map((g) => g.bucket),
|
||||
['zim', 'upload', 'admin_docs', 'other']
|
||||
)
|
||||
})
|
||||
|
||||
test('groupAndSortKbFiles alphabetizes within a bucket', () => {
|
||||
const groups = groupAndSortKbFiles(asInfos([
|
||||
'/app/storage/zim/wikipedia.zim',
|
||||
'/app/storage/zim/devdocs.zim',
|
||||
'/app/storage/zim/ifixit.zim',
|
||||
]))
|
||||
|
||||
assert.deepEqual(
|
||||
groups.map((g) => g.displayName),
|
||||
['devdocs.zim', 'ifixit.zim', 'wikipedia.zim']
|
||||
)
|
||||
})
|
||||
|
||||
test('groupAndSortKbFiles uses singular noun when only one admin doc exists', () => {
|
||||
const groups = groupAndSortKbFiles(asInfos(['/app/docs/release-notes.md']))
|
||||
assert.equal(groups[0].displayName, 'Project NOMAD documentation · 1 file')
|
||||
})
|
||||
|
||||
test('groupAndSortKbFiles handles empty input', () => {
|
||||
assert.deepEqual(groupAndSortKbFiles([]), [])
|
||||
})
|
||||
|
||||
test('groupAndSortKbFiles preserves a stable synthetic key for the admin docs group', () => {
|
||||
const groups = groupAndSortKbFiles(asInfos([
|
||||
'/app/docs/release-notes.md',
|
||||
'/app/docs/maps.md',
|
||||
]))
|
||||
// The admin-docs row uses a synthetic source key (not a real path) so it
|
||||
// can be used as a React key without colliding with any real file row.
|
||||
assert.equal(groups[0].source, '__admin_docs_group__')
|
||||
})
|
||||
|
|
@ -1,106 +0,0 @@
|
|||
import * as assert from 'node:assert/strict'
|
||||
import { test } from 'node:test'
|
||||
|
||||
import {
|
||||
GUARDRAIL_ABSOLUTE_BYTES,
|
||||
GUARDRAIL_FREE_DISK_RATIO,
|
||||
evaluateGuardrail,
|
||||
} from '../../inertia/lib/kb_guardrail.js'
|
||||
|
||||
const GB = 1024 * 1024 * 1024
|
||||
|
||||
test('small batch does not trip the guardrail', () => {
|
||||
const verdict = evaluateGuardrail({
|
||||
estimateBytes: 1 * GB, // 1 GB
|
||||
freeBytes: 500 * GB,
|
||||
})
|
||||
assert.equal(verdict.trips, false)
|
||||
assert.deepEqual(verdict.reasons, [])
|
||||
})
|
||||
|
||||
test('batch at exactly the absolute threshold trips', () => {
|
||||
const verdict = evaluateGuardrail({
|
||||
estimateBytes: GUARDRAIL_ABSOLUTE_BYTES,
|
||||
freeBytes: 1000 * GB,
|
||||
})
|
||||
assert.equal(verdict.trips, true)
|
||||
assert.equal(verdict.reasons.length, 1)
|
||||
assert.equal(verdict.reasons[0].kind, 'over_absolute')
|
||||
})
|
||||
|
||||
test('batch over the absolute threshold trips with over_absolute reason', () => {
|
||||
const verdict = evaluateGuardrail({
|
||||
estimateBytes: 60 * GB,
|
||||
freeBytes: 1000 * GB,
|
||||
})
|
||||
const overAbsolute = verdict.reasons.find((r) => r.kind === 'over_absolute')
|
||||
assert.ok(overAbsolute, 'should include over_absolute reason')
|
||||
assert.equal(verdict.trips, true)
|
||||
})
|
||||
|
||||
test('batch over 10% of free disk trips with over_free_disk reason', () => {
|
||||
// 5 GB estimate against 40 GB free disk -> 5 > 4 (10% of 40)
|
||||
const verdict = evaluateGuardrail({
|
||||
estimateBytes: 5 * GB,
|
||||
freeBytes: 40 * GB,
|
||||
})
|
||||
const overFree = verdict.reasons.find((r) => r.kind === 'over_free_disk')
|
||||
assert.ok(overFree, 'should include over_free_disk reason')
|
||||
assert.equal(verdict.trips, true)
|
||||
})
|
||||
|
||||
test('batch can trip BOTH thresholds simultaneously', () => {
|
||||
// 100 GB estimate, 200 GB free
|
||||
// - over absolute (100 > 50)
|
||||
// - over 10% of free (100 > 20)
|
||||
const verdict = evaluateGuardrail({
|
||||
estimateBytes: 100 * GB,
|
||||
freeBytes: 200 * GB,
|
||||
})
|
||||
assert.equal(verdict.trips, true)
|
||||
assert.equal(verdict.reasons.length, 2)
|
||||
assert.ok(verdict.reasons.some((r) => r.kind === 'over_absolute'))
|
||||
assert.ok(verdict.reasons.some((r) => r.kind === 'over_free_disk'))
|
||||
})
|
||||
|
||||
test('freeBytes = 0 skips the relative-disk check', () => {
|
||||
// 100 MB estimate, no free-disk signal: only the absolute check runs,
|
||||
// and 100 MB is well below the 50 GB absolute threshold
|
||||
const verdict = evaluateGuardrail({
|
||||
estimateBytes: 100 * 1024 * 1024,
|
||||
freeBytes: 0,
|
||||
})
|
||||
assert.equal(verdict.trips, false)
|
||||
})
|
||||
|
||||
test('freeBytes = 0 still trips the absolute check at 50 GB', () => {
|
||||
const verdict = evaluateGuardrail({
|
||||
estimateBytes: 100 * GB,
|
||||
freeBytes: 0,
|
||||
})
|
||||
assert.equal(verdict.trips, true)
|
||||
assert.equal(verdict.reasons.length, 1)
|
||||
assert.equal(verdict.reasons[0].kind, 'over_absolute')
|
||||
})
|
||||
|
||||
test('relative-disk threshold computed from GUARDRAIL_FREE_DISK_RATIO constant', () => {
|
||||
// Estimate exactly equal to 10% of free -> trips (>=)
|
||||
const free = 100 * GB
|
||||
const exactlyTenPercent = free * GUARDRAIL_FREE_DISK_RATIO
|
||||
const verdict = evaluateGuardrail({
|
||||
estimateBytes: exactlyTenPercent,
|
||||
freeBytes: free,
|
||||
})
|
||||
const overFree = verdict.reasons.find((r) => r.kind === 'over_free_disk')
|
||||
assert.ok(overFree, 'should trip at exactly the threshold')
|
||||
})
|
||||
|
||||
test('batch just under both thresholds does not trip', () => {
|
||||
// 4 GB estimate vs 50 GB free -> 10% of 50 = 5 GB, so 4 < 5
|
||||
// Also well below 50 GB absolute
|
||||
const verdict = evaluateGuardrail({
|
||||
estimateBytes: 4 * GB,
|
||||
freeBytes: 50 * GB,
|
||||
})
|
||||
assert.equal(verdict.trips, false)
|
||||
})
|
||||
|
|
@ -1,82 +0,0 @@
|
|||
import * as assert from 'node:assert/strict'
|
||||
import { test } from 'node:test'
|
||||
|
||||
import { decideScanAction } from '../../app/utils/kb_ingest_decision.js'
|
||||
|
||||
// ---------- Always-policy cases (default behavior; preserves pre-policy install) ----------
|
||||
|
||||
test('Always: no state row, no chunks → dispatch and create row (new file)', () => {
|
||||
assert.deepEqual(decideScanAction(null, false, 'Always'), {
|
||||
kind: 'dispatch',
|
||||
createStateRow: true,
|
||||
})
|
||||
})
|
||||
|
||||
test('Always: no state row, chunks present → backfill_indexed (pre-RFC install, existing Qdrant volume)', () => {
|
||||
assert.deepEqual(decideScanAction(null, true, 'Always'), { kind: 'backfill_indexed' })
|
||||
})
|
||||
|
||||
test('Always: indexed + chunks present → skip', () => {
|
||||
assert.deepEqual(decideScanAction({ state: 'indexed' }, true, 'Always'), { kind: 'skip' })
|
||||
})
|
||||
|
||||
test('Always: indexed + chunks missing → re-dispatch (Qdrant collection rebuilt or chunks deleted)', () => {
|
||||
assert.deepEqual(decideScanAction({ state: 'indexed' }, false, 'Always'), {
|
||||
kind: 'dispatch',
|
||||
createStateRow: false,
|
||||
})
|
||||
})
|
||||
|
||||
test('Always: pending_decision → dispatch', () => {
|
||||
assert.deepEqual(decideScanAction({ state: 'pending_decision' }, false, 'Always'), {
|
||||
kind: 'dispatch',
|
||||
createStateRow: false,
|
||||
})
|
||||
})
|
||||
|
||||
test('Always: browse_only → skip (user opted out of indexing)', () => {
|
||||
assert.deepEqual(decideScanAction({ state: 'browse_only' }, false, 'Always'), { kind: 'skip' })
|
||||
})
|
||||
|
||||
test('Always: failed → skip (manual retry needed, do not auto-redispatch)', () => {
|
||||
assert.deepEqual(decideScanAction({ state: 'failed' }, false, 'Always'), { kind: 'skip' })
|
||||
})
|
||||
|
||||
test('Always: stalled → skip (manual retry needed)', () => {
|
||||
assert.deepEqual(decideScanAction({ state: 'stalled' }, false, 'Always'), { kind: 'skip' })
|
||||
})
|
||||
|
||||
// ---------- Manual-policy cases ----------
|
||||
|
||||
test('Manual: no state row, no chunks → create_pending (do not auto-dispatch new content)', () => {
|
||||
assert.deepEqual(decideScanAction(null, false, 'Manual'), { kind: 'create_pending' })
|
||||
})
|
||||
|
||||
test('Manual: no state row, chunks present → backfill_indexed (same as Always — Qdrant is authoritative)', () => {
|
||||
assert.deepEqual(decideScanAction(null, true, 'Manual'), { kind: 'backfill_indexed' })
|
||||
})
|
||||
|
||||
test('Manual: pending_decision → skip (waiting for user to opt in via Index button)', () => {
|
||||
assert.deepEqual(decideScanAction({ state: 'pending_decision' }, false, 'Manual'), {
|
||||
kind: 'skip',
|
||||
})
|
||||
})
|
||||
|
||||
test('Manual: indexed + chunks missing → re-dispatch (user has already opted in for this file)', () => {
|
||||
// Policy switch from Always→Manual must not break in-flight or partially-deleted indexes
|
||||
// for files the user previously chose to index.
|
||||
assert.deepEqual(decideScanAction({ state: 'indexed' }, false, 'Manual'), {
|
||||
kind: 'dispatch',
|
||||
createStateRow: false,
|
||||
})
|
||||
})
|
||||
|
||||
test('Manual: browse_only → skip (same as Always)', () => {
|
||||
assert.deepEqual(decideScanAction({ state: 'browse_only' }, false, 'Manual'), { kind: 'skip' })
|
||||
})
|
||||
|
||||
// ---------- Policy default ----------
|
||||
|
||||
test('omitted policy defaults to Always (unset KV preserves legacy behavior)', () => {
|
||||
assert.deepEqual(decideScanAction(null, false), { kind: 'dispatch', createStateRow: true })
|
||||
})
|
||||
|
|
@ -1,100 +0,0 @@
|
|||
import * as assert from 'node:assert/strict'
|
||||
import { test } from 'node:test'
|
||||
|
||||
import { computeJobHealth } from '../../app/utils/kb_job_health.js'
|
||||
|
||||
const MIN = 60 * 1000
|
||||
const NOW = 1_700_000_000_000 // arbitrary fixed epoch for deterministic tests
|
||||
|
||||
test('failed status takes precedence over any timing', () => {
|
||||
assert.equal(
|
||||
computeJobHealth({ status: 'failed', progress: 42, lastBatchAt: NOW, now: NOW }),
|
||||
'failed'
|
||||
)
|
||||
})
|
||||
|
||||
test('no progress + no activity timestamps → waiting', () => {
|
||||
assert.equal(
|
||||
computeJobHealth({ status: 'waiting', progress: 0, now: NOW }),
|
||||
'waiting'
|
||||
)
|
||||
})
|
||||
|
||||
test('progress > 0 but no lastBatchAt yet → healthy (first batch just started)', () => {
|
||||
assert.equal(
|
||||
computeJobHealth({ status: 'processing', progress: 5, startedAt: NOW, now: NOW }),
|
||||
'healthy'
|
||||
)
|
||||
})
|
||||
|
||||
test('lastBatchAt 30s ago → healthy', () => {
|
||||
assert.equal(
|
||||
computeJobHealth({
|
||||
status: 'batch_completed',
|
||||
progress: 50,
|
||||
lastBatchAt: NOW - 30 * 1000,
|
||||
now: NOW,
|
||||
}),
|
||||
'healthy'
|
||||
)
|
||||
})
|
||||
|
||||
test('lastBatchAt 90s ago → still healthy (under 2 min threshold)', () => {
|
||||
assert.equal(
|
||||
computeJobHealth({
|
||||
status: 'batch_completed',
|
||||
progress: 50,
|
||||
lastBatchAt: NOW - 90 * 1000,
|
||||
now: NOW,
|
||||
}),
|
||||
'healthy'
|
||||
)
|
||||
})
|
||||
|
||||
test('lastBatchAt 3 min ago → slow (CPU-paced ingestion lives here)', () => {
|
||||
assert.equal(
|
||||
computeJobHealth({
|
||||
status: 'batch_completed',
|
||||
progress: 50,
|
||||
lastBatchAt: NOW - 3 * MIN,
|
||||
now: NOW,
|
||||
}),
|
||||
'slow'
|
||||
)
|
||||
})
|
||||
|
||||
test('lastBatchAt 4:30 ago → still slow (under 5 min stalled threshold)', () => {
|
||||
assert.equal(
|
||||
computeJobHealth({
|
||||
status: 'batch_completed',
|
||||
progress: 50,
|
||||
lastBatchAt: NOW - 4.5 * MIN,
|
||||
now: NOW,
|
||||
}),
|
||||
'slow'
|
||||
)
|
||||
})
|
||||
|
||||
test('lastBatchAt 5:01 ago → stalled', () => {
|
||||
assert.equal(
|
||||
computeJobHealth({
|
||||
status: 'batch_completed',
|
||||
progress: 50,
|
||||
lastBatchAt: NOW - (5 * MIN + 1000),
|
||||
now: NOW,
|
||||
}),
|
||||
'stalled'
|
||||
)
|
||||
})
|
||||
|
||||
test('lastBatchAt missing but startedAt 10 min ago → stalled (first-batch-never-finished case)', () => {
|
||||
assert.equal(
|
||||
computeJobHealth({
|
||||
status: 'processing',
|
||||
progress: 5,
|
||||
startedAt: NOW - 10 * MIN,
|
||||
now: NOW,
|
||||
}),
|
||||
'stalled'
|
||||
)
|
||||
})
|
||||
|
|
@ -1,111 +0,0 @@
|
|||
import * as assert from 'node:assert/strict'
|
||||
import { test } from 'node:test'
|
||||
|
||||
import {
|
||||
BYTES_PER_CHUNK_ON_DISK,
|
||||
estimateBatch,
|
||||
estimateChunkCount,
|
||||
findChunksPerMb,
|
||||
} from '../../app/utils/kb_ratio_lookup.js'
|
||||
|
||||
const SEEDED_ROWS = [
|
||||
{ pattern: 'devdocs_', chunks_per_mb: 1100 },
|
||||
{ pattern: 'wikipedia_en_simple_', chunks_per_mb: 270 },
|
||||
{ pattern: 'wikipedia_en_', chunks_per_mb: 250 },
|
||||
{ pattern: 'ifixit_', chunks_per_mb: 50 },
|
||||
{ pattern: 'lrnselfreliance_', chunks_per_mb: 0 },
|
||||
{ pattern: '', chunks_per_mb: 100 },
|
||||
]
|
||||
|
||||
test('exact prefix match', () => {
|
||||
assert.equal(findChunksPerMb('devdocs_en_python_2026-02.zim', SEEDED_ROWS), 1100)
|
||||
})
|
||||
|
||||
test('longest-prefix wins over broader sibling', () => {
|
||||
// wikipedia_en_simple_* should pick 270, not the 250 from wikipedia_en_
|
||||
assert.equal(
|
||||
findChunksPerMb('wikipedia_en_simple_all_nopic_2026-02.zim', SEEDED_ROWS),
|
||||
270
|
||||
)
|
||||
})
|
||||
|
||||
test('broader prefix used when no specific match', () => {
|
||||
// wikipedia_en_medicine_* is not seeded; falls through to wikipedia_en_ at 250
|
||||
assert.equal(findChunksPerMb('wikipedia_en_medicine_nopic_2026-04.zim', SEEDED_ROWS), 250)
|
||||
})
|
||||
|
||||
test('empty-string fallback catches unmatched filenames', () => {
|
||||
assert.equal(findChunksPerMb('something_unknown_2026-02.zim', SEEDED_ROWS), 100)
|
||||
})
|
||||
|
||||
test('returns null when no row matches and no fallback is registered', () => {
|
||||
const rowsWithoutFallback = SEEDED_ROWS.filter((r) => r.pattern !== '')
|
||||
assert.equal(findChunksPerMb('something_unknown_2026-02.zim', rowsWithoutFallback), null)
|
||||
})
|
||||
|
||||
test('zero-ratio entry returns 0, not null (video-only ZIMs)', () => {
|
||||
assert.equal(findChunksPerMb('lrnselfreliance_en_all_2025-12.zim', SEEDED_ROWS), 0)
|
||||
})
|
||||
|
||||
test('estimateChunkCount scales by file size in MB', () => {
|
||||
// 100 MB * 1100 chunks/MB ≈ 110,000 chunks for devdocs
|
||||
const bytes = 100 * 1024 * 1024
|
||||
assert.equal(estimateChunkCount('devdocs_en_python_2026-02.zim', bytes, SEEDED_ROWS), 110000)
|
||||
})
|
||||
|
||||
test('estimateChunkCount returns 0 for video-only ZIM regardless of size', () => {
|
||||
const bytes = 5 * 1024 * 1024 * 1024 // 5 GB
|
||||
assert.equal(estimateChunkCount('lrnselfreliance_en_all_2025-12.zim', bytes, SEEDED_ROWS), 0)
|
||||
})
|
||||
|
||||
test('estimateChunkCount returns null when no match and no fallback', () => {
|
||||
const rowsWithoutFallback = SEEDED_ROWS.filter((r) => r.pattern !== '')
|
||||
assert.equal(
|
||||
estimateChunkCount('something_unknown_2026-02.zim', 50 * 1024 * 1024, rowsWithoutFallback),
|
||||
null
|
||||
)
|
||||
})
|
||||
|
||||
test('estimateBatch sums chunks and bytes for matched files', () => {
|
||||
const files = [
|
||||
// 100 MB devdocs -> 110,000 chunks
|
||||
{ filename: 'devdocs_en_python_2026-02.zim', sizeBytes: 100 * 1024 * 1024 },
|
||||
// 500 MB wikipedia_en_simple -> 135,000 chunks
|
||||
{ filename: 'wikipedia_en_simple_all_nopic_2026-02.zim', sizeBytes: 500 * 1024 * 1024 },
|
||||
]
|
||||
const out = estimateBatch(files, SEEDED_ROWS)
|
||||
assert.equal(out.totalChunks, 110_000 + 135_000)
|
||||
assert.equal(out.totalBytes, (110_000 + 135_000) * BYTES_PER_CHUNK_ON_DISK)
|
||||
assert.equal(out.hasUnknown, false)
|
||||
})
|
||||
|
||||
test('estimateBatch sets hasUnknown when a file has no registry match', () => {
|
||||
// Drop the empty-string fallback so the unknown file truly has no match
|
||||
const rows = SEEDED_ROWS.filter((r) => r.pattern !== '')
|
||||
const files = [
|
||||
{ filename: 'devdocs_en_python_2026-02.zim', sizeBytes: 100 * 1024 * 1024 },
|
||||
{ filename: 'something_unknown_2026-02.zim', sizeBytes: 50 * 1024 * 1024 },
|
||||
]
|
||||
const out = estimateBatch(files, rows)
|
||||
assert.equal(out.totalChunks, 110_000) // only the matched file
|
||||
assert.equal(out.hasUnknown, true)
|
||||
})
|
||||
|
||||
test('estimateBatch handles video-only ZIMs (0 chunks/MB) without flagging hasUnknown', () => {
|
||||
// A 5 GB video ZIM matches the registry with 0 chunks/MB; that is a
|
||||
// *known* value, not an unknown — totals should be 0 and hasUnknown false.
|
||||
const files = [
|
||||
{ filename: 'lrnselfreliance_en_all_2025-12.zim', sizeBytes: 5 * 1024 * 1024 * 1024 },
|
||||
]
|
||||
const out = estimateBatch(files, SEEDED_ROWS)
|
||||
assert.equal(out.totalChunks, 0)
|
||||
assert.equal(out.totalBytes, 0)
|
||||
assert.equal(out.hasUnknown, false)
|
||||
})
|
||||
|
||||
test('estimateBatch on empty input returns zeros', () => {
|
||||
const out = estimateBatch([], SEEDED_ROWS)
|
||||
assert.equal(out.totalChunks, 0)
|
||||
assert.equal(out.totalBytes, 0)
|
||||
assert.equal(out.hasUnknown, false)
|
||||
})
|
||||
|
|
@ -1,125 +0,0 @@
|
|||
import * as assert from 'node:assert/strict'
|
||||
import { test } from 'node:test'
|
||||
|
||||
import { decideWarnings } from '../../app/utils/kb_warning_decision.js'
|
||||
|
||||
const MB = 1024 * 1024
|
||||
|
||||
test('healthy file: chunks present and on-target → no warnings', () => {
|
||||
assert.deepEqual(
|
||||
decideWarnings({
|
||||
fileSizeBytes: 100 * MB,
|
||||
chunksInQdrant: 11_000,
|
||||
expectedChunks: 11_000,
|
||||
}),
|
||||
[]
|
||||
)
|
||||
})
|
||||
|
||||
test('healthy file: chunks slightly above expectation → no warnings', () => {
|
||||
assert.deepEqual(
|
||||
decideWarnings({
|
||||
fileSizeBytes: 100 * MB,
|
||||
chunksInQdrant: 12_000,
|
||||
expectedChunks: 11_000,
|
||||
}),
|
||||
[]
|
||||
)
|
||||
})
|
||||
|
||||
test('Warning A: large file with 0 chunks (video-only ZIM)', () => {
|
||||
assert.deepEqual(
|
||||
decideWarnings({
|
||||
fileSizeBytes: 5 * 1024 * MB,
|
||||
chunksInQdrant: 0,
|
||||
expectedChunks: 0,
|
||||
}),
|
||||
[{ kind: 'zero_chunks', fileSizeBytes: 5 * 1024 * MB }]
|
||||
)
|
||||
})
|
||||
|
||||
test('Warning A: small empty file is silently ignored (under 100 MB threshold)', () => {
|
||||
// A user uploads a 5 KB placeholder.txt that produces nothing → not worth a banner
|
||||
assert.deepEqual(
|
||||
decideWarnings({
|
||||
fileSizeBytes: 5 * 1024, // 5 KB
|
||||
chunksInQdrant: 0,
|
||||
expectedChunks: null,
|
||||
}),
|
||||
[]
|
||||
)
|
||||
})
|
||||
|
||||
test('Warning B: partial stall — chunks well below expectation', () => {
|
||||
assert.deepEqual(
|
||||
decideWarnings({
|
||||
fileSizeBytes: 1000 * MB,
|
||||
chunksInQdrant: 266,
|
||||
expectedChunks: 600_000,
|
||||
}),
|
||||
[{ kind: 'partial_stall', chunksEmbedded: 266, chunksExpected: 600_000 }]
|
||||
)
|
||||
})
|
||||
|
||||
test('Warning B: chunks just under 50% of expected → triggers', () => {
|
||||
assert.deepEqual(
|
||||
decideWarnings({
|
||||
fileSizeBytes: 100 * MB,
|
||||
chunksInQdrant: 4_999,
|
||||
expectedChunks: 10_000,
|
||||
}),
|
||||
[{ kind: 'partial_stall', chunksEmbedded: 4_999, chunksExpected: 10_000 }]
|
||||
)
|
||||
})
|
||||
|
||||
test('Warning B: chunks at exactly 50% of expected → does NOT trigger', () => {
|
||||
// Strict less-than threshold leaves room for the boundary
|
||||
assert.deepEqual(
|
||||
decideWarnings({
|
||||
fileSizeBytes: 100 * MB,
|
||||
chunksInQdrant: 5_000,
|
||||
expectedChunks: 10_000,
|
||||
}),
|
||||
[]
|
||||
)
|
||||
})
|
||||
|
||||
test('Warning B suppressed when expectedChunks is null (registry miss)', () => {
|
||||
// Better to be silent than show a meaningless "266 of unknown" comparison
|
||||
assert.deepEqual(
|
||||
decideWarnings({
|
||||
fileSizeBytes: 100 * MB,
|
||||
chunksInQdrant: 266,
|
||||
expectedChunks: null,
|
||||
}),
|
||||
[]
|
||||
)
|
||||
})
|
||||
|
||||
test('Warning B suppressed when expectedChunks is 0 (video-only registry entry)', () => {
|
||||
// A `lrnselfreliance_` row in the registry says "expect 0 chunks". A real
|
||||
// file matching it correctly producing 0 chunks must not trigger Warning B.
|
||||
assert.deepEqual(
|
||||
decideWarnings({
|
||||
fileSizeBytes: 500 * MB,
|
||||
chunksInQdrant: 0,
|
||||
expectedChunks: 0,
|
||||
}),
|
||||
// Note: Warning A triggers here because file > 100 MB and chunks = 0
|
||||
[{ kind: 'zero_chunks', fileSizeBytes: 500 * MB }]
|
||||
)
|
||||
})
|
||||
|
||||
test('Both warnings can fire on the same file in principle', () => {
|
||||
// Edge case: huge file, 0 chunks, but ratio registry expected 100k.
|
||||
// Warning A fires (large + zero), Warning B suppressed (chunksInQdrant must be > 0).
|
||||
// This documents the chunksInQdrant > 0 guard on Warning B.
|
||||
assert.deepEqual(
|
||||
decideWarnings({
|
||||
fileSizeBytes: 1000 * MB,
|
||||
chunksInQdrant: 0,
|
||||
expectedChunks: 100_000,
|
||||
}),
|
||||
[{ kind: 'zero_chunks', fileSizeBytes: 1000 * MB }]
|
||||
)
|
||||
})
|
||||
|
|
@ -1,73 +0,0 @@
|
|||
import * as assert from 'node:assert/strict'
|
||||
import { test } from 'node:test'
|
||||
|
||||
import { findReplacedWikipediaFiles, zimFilenameStem } from '../../app/utils/zim_filename.js'
|
||||
|
||||
test('zimFilenameStem strips YYYY-MM date suffix', () => {
|
||||
assert.equal(zimFilenameStem('wikipedia_en_all_nopic_2026-02.zim'), 'wikipedia_en_all_nopic')
|
||||
})
|
||||
|
||||
test('zimFilenameStem strips YYYY-MM-DD date suffix', () => {
|
||||
assert.equal(zimFilenameStem('wikipedia_en_all_nopic_2026-02-15.zim'), 'wikipedia_en_all_nopic')
|
||||
})
|
||||
|
||||
test('zimFilenameStem returns input unchanged when no date suffix present', () => {
|
||||
assert.equal(
|
||||
zimFilenameStem('wikipedia_en_my_custom_extract.zim'),
|
||||
'wikipedia_en_my_custom_extract.zim'
|
||||
)
|
||||
})
|
||||
|
||||
test('findReplacedWikipediaFiles cleans up older version of same variant', () => {
|
||||
assert.deepEqual(
|
||||
findReplacedWikipediaFiles('wikipedia_en_all_nopic_2026-04.zim', [
|
||||
'wikipedia_en_all_nopic_2026-02.zim',
|
||||
'wikipedia_en_all_nopic_2026-04.zim',
|
||||
]),
|
||||
['wikipedia_en_all_nopic_2026-02.zim']
|
||||
)
|
||||
})
|
||||
|
||||
test('findReplacedWikipediaFiles preserves co-existing distinct corpora — the #884 regression case', () => {
|
||||
assert.deepEqual(
|
||||
findReplacedWikipediaFiles('wikipedia_en_medicine_nopic_2026-04.zim', [
|
||||
'wikipedia_en_simple_all_nopic_2026-02.zim',
|
||||
'wikipedia_en_medicine_nopic_2026-04.zim',
|
||||
]),
|
||||
[]
|
||||
)
|
||||
})
|
||||
|
||||
test('findReplacedWikipediaFiles preserves all unrelated variants when a new variant lands', () => {
|
||||
assert.deepEqual(
|
||||
findReplacedWikipediaFiles('wikipedia_en_all_nopic_2026-04.zim', [
|
||||
'wikipedia_en_simple_all_nopic_2026-02.zim',
|
||||
'wikipedia_en_medicine_nopic_2026-04.zim',
|
||||
'wikipedia_en_wikivoyage_2026-02.zim',
|
||||
'wikipedia_en_climate_change_2025-08.zim',
|
||||
'wikipedia_en_all_nopic_2026-04.zim',
|
||||
]),
|
||||
[]
|
||||
)
|
||||
})
|
||||
|
||||
test('findReplacedWikipediaFiles ignores files without wikipedia_en_ prefix', () => {
|
||||
assert.deepEqual(
|
||||
findReplacedWikipediaFiles('wikipedia_en_all_nopic_2026-04.zim', [
|
||||
'wiktionary_en_all_2026-02.zim',
|
||||
'gutenberg_en_all_2026-01.zim',
|
||||
'wikipedia_en_all_nopic_2026-04.zim',
|
||||
]),
|
||||
[]
|
||||
)
|
||||
})
|
||||
|
||||
test('findReplacedWikipediaFiles preserves manually-named files without a date suffix', () => {
|
||||
assert.deepEqual(
|
||||
findReplacedWikipediaFiles('wikipedia_en_all_nopic_2026-04.zim', [
|
||||
'wikipedia_en_my_custom_extract.zim',
|
||||
'wikipedia_en_all_nopic_2026-04.zim',
|
||||
]),
|
||||
[]
|
||||
)
|
||||
})
|
||||
|
|
@ -64,11 +64,6 @@ export type ResourceStatus = 'installed' | 'not_installed' | 'update_available'
|
|||
|
||||
export type CategoryWithStatus = SpecCategory & {
|
||||
installedTierSlug?: string
|
||||
// Highest tier whose every resource is either installed OR has an in-flight
|
||||
// download. Set only when it differs from installedTierSlug — i.e. the user
|
||||
// picked something larger and downloads are still running. Lets the UI show
|
||||
// the user's actual intent during the (often long) download window.
|
||||
downloadingTierSlug?: string
|
||||
}
|
||||
|
||||
export type CollectionWithStatus = SpecCollection & {
|
||||
|
|
|
|||
|
|
@ -1,9 +0,0 @@
|
|||
export const KB_INGEST_STATES = [
|
||||
'pending_decision',
|
||||
'indexed',
|
||||
'browse_only',
|
||||
'failed',
|
||||
'stalled',
|
||||
] as const
|
||||
|
||||
export type KbIngestStateValue = (typeof KB_INGEST_STATES)[number]
|
||||
|
|
@ -3,7 +3,6 @@ export const KV_STORE_SCHEMA = {
|
|||
'chat.suggestionsEnabled': 'boolean',
|
||||
'chat.lastModel': 'string',
|
||||
'rag.docsEmbedded': 'boolean',
|
||||
'rag.defaultIngestPolicy': 'string',
|
||||
'system.updateAvailable': 'boolean',
|
||||
'system.latestVersion': 'string',
|
||||
'system.earlyAccess': 'boolean',
|
||||
|
|
@ -14,9 +13,6 @@ export const KV_STORE_SCHEMA = {
|
|||
'ai.remoteOllamaUrl': 'string',
|
||||
'ai.ollamaFlashAttention': 'boolean',
|
||||
'ai.amdGpuAcceleration': 'boolean',
|
||||
'ai.amdHsaOverride': 'string',
|
||||
'ai.autoFixGpuPassthrough': 'boolean',
|
||||
'gpu.autoRemediatedAt': 'string',
|
||||
} as const
|
||||
|
||||
type KVTagToType<T extends string> = T extends 'boolean' ? boolean : string
|
||||
|
|
|
|||
|
|
@ -5,12 +5,6 @@ export type EmbedJobWithProgress = {
|
|||
progress: number
|
||||
status: string
|
||||
error?: string
|
||||
/** ms epoch of last completed batch; multi-batch ZIMs update this each batch. */
|
||||
lastBatchAt?: number
|
||||
/** ms epoch of first batch start; used as a fallback when lastBatchAt unset. */
|
||||
startedAt?: number
|
||||
/** Total chunks embedded across this job's batches so far. */
|
||||
chunks?: number
|
||||
}
|
||||
|
||||
export type ProcessAndEmbedFileResponse = {
|
||||
|
|
@ -40,32 +34,4 @@ export type RAGResult = {
|
|||
|
||||
export type RerankedRAGResult = Omit<RAGResult, 'keywords'> & {
|
||||
finalScore: number
|
||||
}
|
||||
|
||||
export type FileWarning =
|
||||
| { kind: 'zero_chunks'; fileSizeBytes: number }
|
||||
| { kind: 'partial_stall'; chunksEmbedded: number; chunksExpected: number }
|
||||
|
||||
/**
|
||||
* Row returned by `GET /api/rag/files`. `state` is null for sources that exist
|
||||
* in Qdrant but have no `kb_ingest_state` row (pre-RFC-883 installs where the
|
||||
* scanner hasn't yet backfilled). `chunksEmbedded` mirrors the state-machine
|
||||
* field; 0 for state-row-less or zero-chunk files.
|
||||
*/
|
||||
export type StoredFileInfo = {
|
||||
source: string
|
||||
state: import('./kb_ingest_state.js').KbIngestStateValue | null
|
||||
chunksEmbedded: number
|
||||
}
|
||||
|
||||
/**
|
||||
* Result of computing per-file warnings. `ok: false` means the computation
|
||||
* itself failed (Qdrant unreachable, DB outage, FS read error) — distinct from
|
||||
* `ok: true` with an empty map, which means every scanned file is healthy.
|
||||
* The frontend should surface a neutral "warnings unavailable" indicator on
|
||||
* `!ok` rather than implying everything is fine.
|
||||
*/
|
||||
export type FileWarningsResult = {
|
||||
ok: boolean
|
||||
warnings: Record<string, FileWarning[]>
|
||||
}
|
||||
|
|
@ -520,40 +520,10 @@ verify_gpu_setup() {
|
|||
# Check for AMD GPU — restrict to display controller classes to avoid false positives
|
||||
# from AMD CPU host bridges, PCI bridges, and chipset devices.
|
||||
local has_amd_gpu='false'
|
||||
local amd_gfx_version=''
|
||||
if command -v lspci &> /dev/null; then
|
||||
if lspci 2>/dev/null | grep -iE "VGA|3D controller|Display" | grep -iE "amd|radeon" &> /dev/null; then
|
||||
has_amd_gpu='true'
|
||||
echo -e "${GREEN}✓${RESET} AMD GPU detected — ROCm acceleration will be configured automatically when AI Assistant is installed.\\n"
|
||||
|
||||
# Map AMD codename → gfx version so the admin can pick the right HSA_OVERRIDE_GFX_VERSION.
|
||||
# gfx1030/1100/1101/1102 are on AMD's official ROCm allowlist and need NO override —
|
||||
# forcing one (e.g. 11.0.0) breaks GPU discovery on these. Other variants do need it.
|
||||
local amd_devices
|
||||
amd_devices=$(lspci -vmm 2>/dev/null | awk -F'\t' '/^Class:.*(VGA|3D|Display)/{c=1} c && /^Device:/{print $2; c=0}')
|
||||
if echo "${amd_devices}" | grep -iq 'Navi 21'; then
|
||||
amd_gfx_version='gfx1030'
|
||||
elif echo "${amd_devices}" | grep -iq 'Navi 22'; then
|
||||
amd_gfx_version='gfx1031'
|
||||
elif echo "${amd_devices}" | grep -iq 'Navi 23'; then
|
||||
amd_gfx_version='gfx1032'
|
||||
elif echo "${amd_devices}" | grep -iq 'Navi 24'; then
|
||||
amd_gfx_version='gfx1034'
|
||||
elif echo "${amd_devices}" | grep -iq 'Rembrandt'; then
|
||||
amd_gfx_version='gfx1035'
|
||||
elif echo "${amd_devices}" | grep -iEq 'Phoenix1?|Phoenix2'; then
|
||||
amd_gfx_version='gfx1103'
|
||||
elif echo "${amd_devices}" | grep -iEq 'Strix Halo'; then
|
||||
amd_gfx_version='gfx1151'
|
||||
elif echo "${amd_devices}" | grep -iEq 'Strix( Point)?'; then
|
||||
amd_gfx_version='gfx1150'
|
||||
elif echo "${amd_devices}" | grep -iq 'Navi 31'; then
|
||||
amd_gfx_version='gfx1100'
|
||||
elif echo "${amd_devices}" | grep -iq 'Navi 32'; then
|
||||
amd_gfx_version='gfx1101'
|
||||
elif echo "${amd_devices}" | grep -iq 'Navi 33'; then
|
||||
amd_gfx_version='gfx1102'
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
|
|
@ -569,16 +539,6 @@ verify_gpu_setup() {
|
|||
sudo rm -f "${gpu_marker_path}" 2>/dev/null || true
|
||||
fi
|
||||
|
||||
# Companion marker used by the admin to pick the right HSA_OVERRIDE_GFX_VERSION for
|
||||
# the detected card. Absence of this file means "unknown gfx" — the admin falls back
|
||||
# to its built-in default. Always rewrite (or remove) on install to keep state fresh.
|
||||
local amd_gfx_marker_path="${NOMAD_DIR}/storage/.nomad-amd-gfx"
|
||||
if [[ -n "${amd_gfx_version}" ]]; then
|
||||
echo "${amd_gfx_version}" | sudo tee "${amd_gfx_marker_path}" > /dev/null 2>&1 || true
|
||||
else
|
||||
sudo rm -f "${amd_gfx_marker_path}" 2>/dev/null || true
|
||||
fi
|
||||
|
||||
echo -e "${YELLOW}===========================================${RESET}\\n"
|
||||
|
||||
# Summary
|
||||
|
|
|
|||
4
package-lock.json
generated
4
package-lock.json
generated
|
|
@ -1,12 +1,12 @@
|
|||
{
|
||||
"name": "project-nomad",
|
||||
"version": "1.32.0-rc.6",
|
||||
"version": "1.27.0",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "project-nomad",
|
||||
"version": "1.32.0-rc.6",
|
||||
"version": "1.27.0",
|
||||
"license": "ISC"
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
{
|
||||
"name": "project-nomad",
|
||||
"version": "1.32.0",
|
||||
"version": "1.32.0-rc.1",
|
||||
"description": "\"",
|
||||
"main": "index.js",
|
||||
"scripts": {
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user