diff --git a/admin/app/services/kiwix_library_service.ts b/admin/app/services/kiwix_library_service.ts index 28e6576..c7aeabd 100644 --- a/admin/app/services/kiwix_library_service.ts +++ b/admin/app/services/kiwix_library_service.ts @@ -2,7 +2,7 @@ import { XMLBuilder, XMLParser } from 'fast-xml-parser' import { readFile, writeFile, rename, readdir } from 'fs/promises' import { join } from 'path' import { Archive } from '@openzim/libzim' -import { KIWIX_LIBRARY_XML_PATH, ZIM_STORAGE_PATH, ensureDirectoryExists } from '../utils/fs.js' +import { KIWIX_LIBRARY_XML_PATH, ZIM_STORAGE_PATH, ensureDirectoryExists, isValidZimFile } from '../utils/fs.js' import logger from '@adonisjs/core/services/logger' import { randomUUID } from 'node:crypto' @@ -54,8 +54,12 @@ export class KiwixLibraryService { * * Returns null on any error so callers can fall back gracefully. */ - private _readZimMetadata(zimFilePath: string): Partial | null { + private async _readZimMetadata(zimFilePath: string): Promise | null> { try { + if (!(await isValidZimFile(zimFilePath))) { + logger.warn(`[KiwixLibraryService] Skipping invalid/corrupted ZIM file: ${zimFilePath}`) + return null + } const archive = new Archive(zimFilePath) const getMeta = (key: string): string | undefined => { @@ -197,17 +201,22 @@ export class KiwixLibraryService { const excludeSet = new Set(opts?.excludeFilenames ?? []) const zimFiles = entries.filter((name) => name.endsWith('.zim') && !excludeSet.has(name)) - const books: KiwixBook[] = zimFiles.map((filename) => { - const meta = this._readZimMetadata(join(dirPath, filename)) + const books: KiwixBook[] = [] + for (const filename of zimFiles) { + const meta = await this._readZimMetadata(join(dirPath, filename)) + if (meta === null) { + logger.warn(`[KiwixLibraryService] Skipping unreadable ZIM file: ${filename}`) + continue + } const containerPath = `${CONTAINER_DATA_PATH}/${filename}` - return { + books.push({ ...meta, // Override fields that must be derived locally, not from ZIM metadata id: meta?.id ?? filename.slice(0, -4), path: containerPath, title: meta?.title ?? this._filenameToTitle(filename), - } - }) + }) + } const xml = this._buildXml(books) await this._atomicWrite(xml) @@ -239,7 +248,12 @@ export class KiwixLibraryService { } const fullPath = join(process.cwd(), ZIM_STORAGE_PATH, zimFilename) - const meta = this._readZimMetadata(fullPath) + const meta = await this._readZimMetadata(fullPath) + + if (meta === null) { + logger.error(`[KiwixLibraryService] Cannot add ${zimFilename}: file is invalid or corrupted.`) + return + } existingBooks.push({ ...meta, diff --git a/admin/app/services/ollama_service.ts b/admin/app/services/ollama_service.ts index dacf131..5e38cb6 100644 --- a/admin/app/services/ollama_service.ts +++ b/admin/app/services/ollama_service.ts @@ -53,6 +53,7 @@ export class OllamaService { private baseUrl: string | null = null private initPromise: Promise | null = null private isOllamaNative: boolean | null = null + private activeDownloads: Map> = new Map() constructor() {} @@ -95,6 +96,26 @@ export class OllamaService { async downloadModel( model: string, progressCallback?: (percent: number) => void + ): Promise<{ success: boolean; message: string; retryable?: boolean }> { + // Deduplicate concurrent downloads of the same model + const existing = this.activeDownloads.get(model) + if (existing) { + logger.info(`[OllamaService] Download already in progress for "${model}", waiting on existing download.`) + return existing + } + + const downloadPromise = this._doDownloadModel(model, progressCallback) + this.activeDownloads.set(model, downloadPromise) + try { + return await downloadPromise + } finally { + this.activeDownloads.delete(model) + } + } + + private async _doDownloadModel( + model: string, + progressCallback?: (percent: number) => void ): Promise<{ success: boolean; message: string; retryable?: boolean }> { await this._ensureDependencies() if (!this.baseUrl) { @@ -130,6 +151,7 @@ export class OllamaService { await new Promise((resolve, reject) => { let buffer = '' + let lastPercent = -1 pullResponse.data.on('data', (chunk: Buffer) => { buffer += chunk.toString() const lines = buffer.split('\n') @@ -140,8 +162,11 @@ export class OllamaService { const parsed = JSON.parse(line) if (parsed.completed && parsed.total) { const percent = parseFloat(((parsed.completed / parsed.total) * 100).toFixed(2)) - this.broadcastDownloadProgress(model, percent) - if (progressCallback) progressCallback(percent) + if (percent !== lastPercent) { + lastPercent = percent + this.broadcastDownloadProgress(model, percent) + if (progressCallback) progressCallback(percent) + } } } catch { // ignore parse errors on partial lines diff --git a/admin/app/services/zim_extraction_service.ts b/admin/app/services/zim_extraction_service.ts index e60042d..b3594b6 100644 --- a/admin/app/services/zim_extraction_service.ts +++ b/admin/app/services/zim_extraction_service.ts @@ -5,6 +5,7 @@ import logger from '@adonisjs/core/services/logger' import { ExtractZIMChunkingStrategy, ExtractZIMContentOptions, ZIMContentChunk, ZIMArchiveMetadata } from '../../types/zim.js' import { randomUUID } from 'node:crypto' import { access } from 'node:fs/promises' +import { isValidZimFile } from '../utils/fs.js' export class ZIMExtractionService { @@ -51,7 +52,13 @@ export class ZIMExtractionService { logger.error(`[ZIMExtractionService]: ZIM file not accessible: ${filePath}`) throw new Error(`ZIM file not found or not accessible: ${filePath}`) } - + + // Validate ZIM magic number before opening with native library. + // A corrupted file causes a native C++ abort that cannot be caught by JS. + if (!(await isValidZimFile(filePath))) { + throw new Error(`ZIM file is invalid or corrupted: ${filePath}`) + } + const archive = new Archive(filePath) // Extract archive-level metadata once diff --git a/admin/app/utils/fs.ts b/admin/app/utils/fs.ts index c3ee398..d6cb5e0 100644 --- a/admin/app/utils/fs.ts +++ b/admin/app/utils/fs.ts @@ -1,4 +1,4 @@ -import { mkdir, readdir, readFile, stat, unlink } from 'fs/promises' +import { mkdir, open, readdir, readFile, stat, unlink } from 'fs/promises' import path, { join } from 'path' import { FileEntry } from '../../types/files.js' import { createReadStream } from 'fs' @@ -99,6 +99,28 @@ export async function getFileStatsIfExists( } } +/** + * Validates that a file has the ZIM magic number (0x44D495A). + * Must be called before passing a file to @openzim/libzim Archive, + * because a corrupted ZIM causes a native C++ abort that cannot be + * caught by JS try/catch. + */ +export async function isValidZimFile(filePath: string): Promise { + let fh + try { + fh = await open(filePath, 'r') + const buf = Buffer.alloc(4) + const { bytesRead } = await fh.read(buf, 0, 4, 0) + if (bytesRead < 4) return false + // ZIM magic number: 72 17 32 04 (little-endian 0x044D4953) + return buf[0] === 0x5a && buf[1] === 0x49 && buf[2] === 0x4d && buf[3] === 0x04 + } catch { + return false + } finally { + await fh?.close() + } +} + export async function deleteFileIfExists(path: string): Promise { try { await unlink(path)