project-nomad/admin/app/controllers/rag_controller.ts
Chris Sherwood ab8281d08b feat(KB): surface embedding-disk estimate in curated tier-change modal (RFC #883 §1)
When a user picks a tier in TierSelectionModal, show how much additional
disk space the AI Assistant will need if the new ZIMs are indexed, plus
a policy-aware footer explaining whether they'll auto-index (Always) or
wait for opt-in (Manual). Estimates consume #891's KbRatioRegistry via a
new POST /api/rag/estimate-batch endpoint.

Backend
- New POST /api/rag/estimate-batch route + RagController.estimateBatch
- VineJS schema accepting array of {filename, sizeBytes}, capped at 500
- KbRatioRegistry.estimateBatch aggregates via the existing prefix-match
  lookup, returns {totalChunks, totalBytes, hasUnknown}
- New BYTES_PER_CHUNK_ON_DISK constant (~8 KB: 3 KB vector + ~3 KB chunk
  text + ~2 KB payload/index overhead). Tunable; will be replaced by
  Phase 4 self-calibration once we have real measurements.
- Controller normalizes incoming filenames via path.basename so callers
  that send full paths or URLs still match registry prefixes correctly.

Frontend
- api.estimateEmbeddingBatch() client method
- TierSelectionModal: when localSelectedSlug is set, resolve the tier's
  resources (incl. inherited tiers), POST to /estimate-batch, and render
  a new info block with the +~X GB figure + ingest-policy copy. Also
  fetches rag.defaultIngestPolicy so the same block surfaces whether
  indexing will fire automatically or wait for the user.
- resourceFilename() helper extracts the basename from the resource URL
  so the registry lookup hits the right prefix regardless of mirror.

Tests
- 4 new cases in tests/unit/kb_ratio_lookup.spec.ts covering the
  estimateBatch aggregator: standard sum, unknown-flagging, video-only
  ZIM (0 chunks but known, hasUnknown stays false), empty input.

Stacks on feat/kb-ratio-registry (#891) — consumes the registry table
seeded by that PR. Once #891 merges to rc, this PR auto-rebases.

Out of scope for this PR (deferred to follow-ups):
- Per-batch opt-in checkbox (RFC §1's '☑ Also index these for AI') needs
  a per-batch policy override path and is a separate PR
- Guardrail modal at 50 GB / 10% free / 6 hr thresholds (RFC §7) is also
  separate; this PR is informational, not gating
- Time-to-embed estimate awaits a chunks-per-second metric per host
2026-05-16 21:33:30 -07:00

141 lines
5.1 KiB
TypeScript

import { RagService } from '#services/rag_service'
import { EmbedFileJob } from '#jobs/embed_file_job'
import KbRatioRegistry from '#models/kb_ratio_registry'
import { inject } from '@adonisjs/core'
import type { HttpContext } from '@adonisjs/core/http'
import app from '@adonisjs/core/services/app'
import { randomBytes } from 'node:crypto'
import { sanitizeFilename } from '../utils/fs.js'
import { basename } from 'node:path'
import { deleteFileSchema, estimateBatchSchema, getJobStatusSchema } from '#validators/rag'
import logger from '@adonisjs/core/services/logger'
@inject()
export default class RagController {
constructor(private ragService: RagService) { }
public async upload({ request, response }: HttpContext) {
const uploadedFile = request.file('file')
if (!uploadedFile) {
return response.status(400).json({ error: 'No file uploaded' })
}
const randomSuffix = randomBytes(6).toString('hex')
const sanitizedName = sanitizeFilename(uploadedFile.clientName)
const fileName = `${sanitizedName}-${randomSuffix}.${uploadedFile.extname || 'txt'}`
const fullPath = app.makePath(RagService.UPLOADS_STORAGE_PATH, fileName)
await uploadedFile.move(app.makePath(RagService.UPLOADS_STORAGE_PATH), {
name: fileName,
})
// Dispatch background job for embedding
const result = await EmbedFileJob.dispatch({
filePath: fullPath,
fileName,
})
return response.status(202).json({
message: result.message,
jobId: result.jobId,
fileName,
filePath: `/${RagService.UPLOADS_STORAGE_PATH}/${fileName}`,
alreadyProcessing: !result.created,
})
}
public async getActiveJobs({ response }: HttpContext) {
const jobs = await EmbedFileJob.listActiveJobs()
return response.status(200).json(jobs)
}
public async getJobStatus({ request, response }: HttpContext) {
const reqData = await request.validateUsing(getJobStatusSchema)
const fullPath = app.makePath(RagService.UPLOADS_STORAGE_PATH, reqData.filePath)
const status = await EmbedFileJob.getStatus(fullPath)
if (!status.exists) {
return response.status(404).json({ error: 'Job not found for this file' })
}
return response.status(200).json(status)
}
public async getStoredFiles({ response }: HttpContext) {
const files = await this.ragService.getStoredFiles()
return response.status(200).json({ files })
}
public async deleteFile({ request, response }: HttpContext) {
const { source } = await request.validateUsing(deleteFileSchema)
const result = await this.ragService.deleteFileBySource(source)
if (!result.success) {
return response.status(500).json({ error: result.message })
}
return response.status(200).json({ message: result.message })
}
public async getFailedJobs({ response }: HttpContext) {
const jobs = await EmbedFileJob.listFailedJobs()
return response.status(200).json(jobs)
}
public async cleanupFailedJobs({ response }: HttpContext) {
const result = await EmbedFileJob.cleanupFailedJobs()
return response.status(200).json({
message: `Cleaned up ${result.cleaned} failed job${result.cleaned !== 1 ? 's' : ''}${result.filesDeleted > 0 ? `, deleted ${result.filesDeleted} file${result.filesDeleted !== 1 ? 's' : ''}` : ''}.`,
...result,
})
}
public async scanAndSync({ response }: HttpContext) {
try {
const syncResult = await this.ragService.scanAndSyncStorage()
return response.status(200).json(syncResult)
} catch (error) {
logger.error({ err: error }, '[RagController] Error scanning and syncing storage')
return response.status(500).json({ error: 'Error scanning and syncing storage' })
}
}
public async reembedAll({ response }: HttpContext) {
try {
const result = await this.ragService.reembedAll()
return response.status(200).json(result)
} catch (error) {
logger.error({ err: error }, '[RagController] Error during re-embed all')
return response.status(500).json({ error: 'Error during re-embed all' })
}
}
public async resetAndRebuild({ response }: HttpContext) {
try {
const result = await this.ragService.resetAndRebuild()
return response.status(200).json(result)
} catch (error) {
logger.error({ err: error }, '[RagController] Error during reset and rebuild')
return response.status(500).json({ error: 'Error during reset and rebuild' })
}
}
public async health({ response }: HttpContext) {
const result = await this.ragService.checkQdrantHealth()
return response.status(200).json(result)
}
public async estimateBatch({ request, response }: HttpContext) {
const { files } = await request.validateUsing(estimateBatchSchema)
// The registry matches on basename prefixes; if a caller passes a full path
// (e.g. /app/storage/zim/wikipedia_en_simple_…), strip directories first so
// patterns like `wikipedia_en_simple_` still match.
const normalized = files.map((f) => ({
filename: basename(f.filename),
sizeBytes: f.sizeBytes,
}))
const result = await KbRatioRegistry.estimateBatch(normalized)
return response.status(200).json(result)
}
}