fix: prevent ZIM corrupt file crash and deduplicate Ollama download logs (#741)

Corrupted ZIM files cause a native C++ abort (ZimFileFormatError) that
bypasses JS try/catch and kills the process. Add magic number validation
before passing files to @openzim/libzim so invalid files are skipped
gracefully. Also deduplicate Ollama download progress broadcasts — both
within a single stream (skip unchanged percentages) and across concurrent
callers (share one download promise per model).

Co-authored-by: aegisman <aegis@manicode.com>
Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Jake Turner 2026-04-17 11:54:04 -07:00
parent 6510f42184
commit c8cb79a3a5
4 changed files with 80 additions and 12 deletions

View File

@ -2,7 +2,7 @@ import { XMLBuilder, XMLParser } from 'fast-xml-parser'
import { readFile, writeFile, rename, readdir } from 'fs/promises'
import { join } from 'path'
import { Archive } from '@openzim/libzim'
import { KIWIX_LIBRARY_XML_PATH, ZIM_STORAGE_PATH, ensureDirectoryExists } from '../utils/fs.js'
import { KIWIX_LIBRARY_XML_PATH, ZIM_STORAGE_PATH, ensureDirectoryExists, isValidZimFile } from '../utils/fs.js'
import logger from '@adonisjs/core/services/logger'
import { randomUUID } from 'node:crypto'
@ -54,8 +54,12 @@ export class KiwixLibraryService {
*
* Returns null on any error so callers can fall back gracefully.
*/
private _readZimMetadata(zimFilePath: string): Partial<KiwixBook> | null {
private async _readZimMetadata(zimFilePath: string): Promise<Partial<KiwixBook> | null> {
try {
if (!(await isValidZimFile(zimFilePath))) {
logger.warn(`[KiwixLibraryService] Skipping invalid/corrupted ZIM file: ${zimFilePath}`)
return null
}
const archive = new Archive(zimFilePath)
const getMeta = (key: string): string | undefined => {
@ -197,17 +201,22 @@ export class KiwixLibraryService {
const excludeSet = new Set(opts?.excludeFilenames ?? [])
const zimFiles = entries.filter((name) => name.endsWith('.zim') && !excludeSet.has(name))
const books: KiwixBook[] = zimFiles.map((filename) => {
const meta = this._readZimMetadata(join(dirPath, filename))
const books: KiwixBook[] = []
for (const filename of zimFiles) {
const meta = await this._readZimMetadata(join(dirPath, filename))
if (meta === null) {
logger.warn(`[KiwixLibraryService] Skipping unreadable ZIM file: ${filename}`)
continue
}
const containerPath = `${CONTAINER_DATA_PATH}/${filename}`
return {
books.push({
...meta,
// Override fields that must be derived locally, not from ZIM metadata
id: meta?.id ?? filename.slice(0, -4),
path: containerPath,
title: meta?.title ?? this._filenameToTitle(filename),
}
})
})
}
const xml = this._buildXml(books)
await this._atomicWrite(xml)
@ -239,7 +248,12 @@ export class KiwixLibraryService {
}
const fullPath = join(process.cwd(), ZIM_STORAGE_PATH, zimFilename)
const meta = this._readZimMetadata(fullPath)
const meta = await this._readZimMetadata(fullPath)
if (meta === null) {
logger.error(`[KiwixLibraryService] Cannot add ${zimFilename}: file is invalid or corrupted.`)
return
}
existingBooks.push({
...meta,

View File

@ -53,6 +53,7 @@ export class OllamaService {
private baseUrl: string | null = null
private initPromise: Promise<void> | null = null
private isOllamaNative: boolean | null = null
private activeDownloads: Map<string, Promise<{ success: boolean; message: string; retryable?: boolean }>> = new Map()
constructor() {}
@ -95,6 +96,26 @@ export class OllamaService {
async downloadModel(
model: string,
progressCallback?: (percent: number) => void
): Promise<{ success: boolean; message: string; retryable?: boolean }> {
// Deduplicate concurrent downloads of the same model
const existing = this.activeDownloads.get(model)
if (existing) {
logger.info(`[OllamaService] Download already in progress for "${model}", waiting on existing download.`)
return existing
}
const downloadPromise = this._doDownloadModel(model, progressCallback)
this.activeDownloads.set(model, downloadPromise)
try {
return await downloadPromise
} finally {
this.activeDownloads.delete(model)
}
}
private async _doDownloadModel(
model: string,
progressCallback?: (percent: number) => void
): Promise<{ success: boolean; message: string; retryable?: boolean }> {
await this._ensureDependencies()
if (!this.baseUrl) {
@ -130,6 +151,7 @@ export class OllamaService {
await new Promise<void>((resolve, reject) => {
let buffer = ''
let lastPercent = -1
pullResponse.data.on('data', (chunk: Buffer) => {
buffer += chunk.toString()
const lines = buffer.split('\n')
@ -140,8 +162,11 @@ export class OllamaService {
const parsed = JSON.parse(line)
if (parsed.completed && parsed.total) {
const percent = parseFloat(((parsed.completed / parsed.total) * 100).toFixed(2))
this.broadcastDownloadProgress(model, percent)
if (progressCallback) progressCallback(percent)
if (percent !== lastPercent) {
lastPercent = percent
this.broadcastDownloadProgress(model, percent)
if (progressCallback) progressCallback(percent)
}
}
} catch {
// ignore parse errors on partial lines

View File

@ -5,6 +5,7 @@ import logger from '@adonisjs/core/services/logger'
import { ExtractZIMChunkingStrategy, ExtractZIMContentOptions, ZIMContentChunk, ZIMArchiveMetadata } from '../../types/zim.js'
import { randomUUID } from 'node:crypto'
import { access } from 'node:fs/promises'
import { isValidZimFile } from '../utils/fs.js'
export class ZIMExtractionService {
@ -51,7 +52,13 @@ export class ZIMExtractionService {
logger.error(`[ZIMExtractionService]: ZIM file not accessible: ${filePath}`)
throw new Error(`ZIM file not found or not accessible: ${filePath}`)
}
// Validate ZIM magic number before opening with native library.
// A corrupted file causes a native C++ abort that cannot be caught by JS.
if (!(await isValidZimFile(filePath))) {
throw new Error(`ZIM file is invalid or corrupted: ${filePath}`)
}
const archive = new Archive(filePath)
// Extract archive-level metadata once

View File

@ -1,4 +1,4 @@
import { mkdir, readdir, readFile, stat, unlink } from 'fs/promises'
import { mkdir, open, readdir, readFile, stat, unlink } from 'fs/promises'
import path, { join } from 'path'
import { FileEntry } from '../../types/files.js'
import { createReadStream } from 'fs'
@ -99,6 +99,28 @@ export async function getFileStatsIfExists(
}
}
/**
* Validates that a file has the ZIM magic number (0x44D495A).
* Must be called before passing a file to @openzim/libzim Archive,
* because a corrupted ZIM causes a native C++ abort that cannot be
* caught by JS try/catch.
*/
export async function isValidZimFile(filePath: string): Promise<boolean> {
let fh
try {
fh = await open(filePath, 'r')
const buf = Buffer.alloc(4)
const { bytesRead } = await fh.read(buf, 0, 4, 0)
if (bytesRead < 4) return false
// ZIM magic number: 72 17 32 04 (little-endian 0x044D4953)
return buf[0] === 0x5a && buf[1] === 0x49 && buf[2] === 0x4d && buf[3] === 0x04
} catch {
return false
} finally {
await fh?.close()
}
}
export async function deleteFileIfExists(path: string): Promise<void> {
try {
await unlink(path)