From 8ed0bdfd8ffbb094d47cdf3667d3e691007bfd00 Mon Sep 17 00:00:00 2001 From: chriscrosstalk <49691103+chriscrosstalk@users.noreply.github.com> Date: Sat, 16 May 2026 22:44:30 -0700 Subject: [PATCH] fix(KB): union Stored Files list with state-machine file paths (#898) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes the 'zero_chunks warning has no row to attach to' gap surfaced by the 2026-05-14 integration UAT. Before this fix RagService.getStoredFiles returned only file paths that appeared in Qdrant's payload.source — so files with 0 embedded chunks (video-only ZIMs, browse_only opt-outs, ingestions that failed before producing any chunks) silently disappeared from the KB panel's Stored Files table. The fix unions the Qdrant scroll result with the disk-backed file paths recorded in kb_ingest_state. Effect: - lrnselfreliance_en_all_2025-12.zim (3.97 GB video-only ZIM, 0 chunks) now appears in the table, picks up its zero_chunks warning chip - Files in pending_decision under Manual policy show up so the user can see what's waiting for opt-in - Files in browse_only / failed states have a row for future per-card Retry / Re-index actions (forthcoming, blocked on #886) The state-machine query is wrapped in its own try/catch so a transient DB error degrades to the Qdrant-only list rather than 500-ing the whole panel — same defensive posture as the outer try/catch. Stacks on feat/kb-ingest-state-machine (#888) because the union depends on the kb_ingest_state table that PR introduces. Will rebase to rc once #888 merges. Completes the second half of #895's warning surface; the first half (partial_stall) already worked because those files have at least some chunks in Qdrant. --- admin/app/services/rag_service.ts | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/admin/app/services/rag_service.ts b/admin/app/services/rag_service.ts index 7c8f943..a0eb5be 100644 --- a/admin/app/services/rag_service.ts +++ b/admin/app/services/rag_service.ts @@ -1082,6 +1082,28 @@ export class RagService { offset = scrollResult.next_page_offset || null } while (offset !== null) + // Union the Qdrant-derived list with the disk-backed file paths the + // state machine has tracked. Without this, files known to the scanner + // but with zero embedded chunks (video-only ZIMs, failed-before-first- + // chunk ingestions, browse_only opt-outs) never get a row in Stored + // Files — which means warnings keyed off those files (#895 zero_chunks + // in particular) have no row to attach to. The state machine is the + // authoritative "what's on disk?" view; Qdrant is "what made it into + // the vector store?". Both are needed to render the KB UI honestly. + try { + const stateRows = await KbIngestState.query().select('file_path') + for (const row of stateRows) { + sources.add(row.file_path) + } + } catch (error) { + // Non-fatal: if the state machine query fails for any reason we'd + // rather return the Qdrant-derived list than 500 the whole panel. + logger.warn( + { err: error }, + '[RagService.getStoredFiles] state-machine union skipped; returning Qdrant-only list' + ) + } + return Array.from(sources) } catch (error) { logger.error('Error retrieving stored files:', error)