project-nomad/admin/app/controllers/rag_controller.ts

import { RagService } from '#services/rag_service'
import { EmbedFileJob } from '#jobs/embed_file_job'
import KbRatioRegistry from '#models/kb_ratio_registry'
import { inject } from '@adonisjs/core'
import type { HttpContext } from '@adonisjs/core/http'
import app from '@adonisjs/core/services/app'
import { randomBytes } from 'node:crypto'
import { sanitizeFilename } from '../utils/fs.js'
import { basename } from 'node:path'
import { deleteFileSchema, estimateBatchSchema, getJobStatusSchema } from '#validators/rag'
import logger from '@adonisjs/core/services/logger'

@inject()
export default class RagController {
  constructor(private ragService: RagService) { }

  public async upload({ request, response }: HttpContext) {
    const uploadedFile = request.file('file')
    if (!uploadedFile) {
      return response.status(400).json({ error: 'No file uploaded' })
    }

    const randomSuffix = randomBytes(6).toString('hex')
    const sanitizedName = sanitizeFilename(uploadedFile.clientName)

    const fileName = `${sanitizedName}-${randomSuffix}.${uploadedFile.extname || 'txt'}`
    const fullPath = app.makePath(RagService.UPLOADS_STORAGE_PATH, fileName)

    await uploadedFile.move(app.makePath(RagService.UPLOADS_STORAGE_PATH), {
      name: fileName,
    })

    // Dispatch background job for embedding
    const result = await EmbedFileJob.dispatch({
      filePath: fullPath,
      fileName,
    })

    return response.status(202).json({
      message: result.message,
      jobId: result.jobId,
      fileName,
      filePath: `/${RagService.UPLOADS_STORAGE_PATH}/${fileName}`,
      alreadyProcessing: !result.created,
    })
  }

  public async getActiveJobs({ response }: HttpContext) {
    const jobs = await EmbedFileJob.listActiveJobs()
    return response.status(200).json(jobs)
  }

  public async getJobStatus({ request, response }: HttpContext) {
    const reqData = await request.validateUsing(getJobStatusSchema)

    const fullPath = app.makePath(RagService.UPLOADS_STORAGE_PATH, reqData.filePath)
    const status = await EmbedFileJob.getStatus(fullPath)

    if (!status.exists) {
      return response.status(404).json({ error: 'Job not found for this file' })
    }

    return response.status(200).json(status)
  }

  public async getStoredFiles({ response }: HttpContext) {
    const files = await this.ragService.getStoredFiles()
    return response.status(200).json({ files })
  }

  public async getFileWarnings({ response }: HttpContext) {
    const warnings = await this.ragService.computeFileWarnings()
    return response.status(200).json({ warnings })
  }

  public async deleteFile({ request, response }: HttpContext) {
    const { source } = await request.validateUsing(deleteFileSchema)
    const result = await this.ragService.deleteFileBySource(source)
    if (!result.success) {
      return response.status(500).json({ error: result.message })
    }
    return response.status(200).json({ message: result.message })
  }

  public async getFailedJobs({ response }: HttpContext) {
    const jobs = await EmbedFileJob.listFailedJobs()
    return response.status(200).json(jobs)
  }

  public async cleanupFailedJobs({ response }: HttpContext) {
    const result = await EmbedFileJob.cleanupFailedJobs()
    return response.status(200).json({
      message: `Cleaned up ${result.cleaned} failed job${result.cleaned !== 1 ? 's' : ''}${result.filesDeleted > 0 ? `, deleted ${result.filesDeleted} file${result.filesDeleted !== 1 ? 's' : ''}` : ''}.`,
      ...result,
    })
  }

  public async scanAndSync({ response }: HttpContext) {
    try {
      const syncResult = await this.ragService.scanAndSyncStorage()
      return response.status(200).json(syncResult)
    } catch (error) {
      logger.error({ err: error }, '[RagController] Error scanning and syncing storage')
      return response.status(500).json({ error: 'Error scanning and syncing storage' })
    }
  }

  public async reembedAll({ response }: HttpContext) {
    try {
      const result = await this.ragService.reembedAll()
      return response.status(200).json(result)
    } catch (error) {
      logger.error({ err: error }, '[RagController] Error during re-embed all')
      return response.status(500).json({ error: 'Error during re-embed all' })
    }
  }

  public async resetAndRebuild({ response }: HttpContext) {
    try {
      const result = await this.ragService.resetAndRebuild()
      return response.status(200).json(result)
    } catch (error) {
      logger.error({ err: error }, '[RagController] Error during reset and rebuild')
      return response.status(500).json({ error: 'Error during reset and rebuild' })
    }
  }

  public async health({ response }: HttpContext) {
    const result = await this.ragService.checkQdrantHealth()
    return response.status(200).json(result)
  }

  public async estimateBatch({ request, response }: HttpContext) {
    const { files } = await request.validateUsing(estimateBatchSchema)
    // The registry matches on basename prefixes; if a caller passes a full path
    // (e.g. /app/storage/zim/wikipedia_en_simple_…), strip directories first so
    // patterns like `wikipedia_en_simple_` still match.
    const normalized = files.map((f) => ({
      filename: basename(f.filename),
      sizeBytes: f.sizeBytes,
    }))
    const result = await KbRatioRegistry.estimateBatch(normalized)
    return response.status(200).json(result)
  }
}