mirror of
https://github.com/Crosstalk-Solutions/project-nomad.git
synced 2026-05-12 16:10:11 +02:00
fix: prevent ZIM corrupt file crash and deduplicate Ollama download logs (#741)
Corrupted ZIM files cause a native C++ abort (ZimFileFormatError) that bypasses JS try/catch and kills the process. Add magic number validation before passing files to @openzim/libzim so invalid files are skipped gracefully. Also deduplicate Ollama download progress broadcasts — both within a single stream (skip unchanged percentages) and across concurrent callers (share one download promise per model). Co-authored-by: aegisman <aegis@manicode.com> Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
6510f42184
commit
c8cb79a3a5
|
|
@ -2,7 +2,7 @@ import { XMLBuilder, XMLParser } from 'fast-xml-parser'
|
|||
import { readFile, writeFile, rename, readdir } from 'fs/promises'
|
||||
import { join } from 'path'
|
||||
import { Archive } from '@openzim/libzim'
|
||||
import { KIWIX_LIBRARY_XML_PATH, ZIM_STORAGE_PATH, ensureDirectoryExists } from '../utils/fs.js'
|
||||
import { KIWIX_LIBRARY_XML_PATH, ZIM_STORAGE_PATH, ensureDirectoryExists, isValidZimFile } from '../utils/fs.js'
|
||||
import logger from '@adonisjs/core/services/logger'
|
||||
import { randomUUID } from 'node:crypto'
|
||||
|
||||
|
|
@ -54,8 +54,12 @@ export class KiwixLibraryService {
|
|||
*
|
||||
* Returns null on any error so callers can fall back gracefully.
|
||||
*/
|
||||
private _readZimMetadata(zimFilePath: string): Partial<KiwixBook> | null {
|
||||
private async _readZimMetadata(zimFilePath: string): Promise<Partial<KiwixBook> | null> {
|
||||
try {
|
||||
if (!(await isValidZimFile(zimFilePath))) {
|
||||
logger.warn(`[KiwixLibraryService] Skipping invalid/corrupted ZIM file: ${zimFilePath}`)
|
||||
return null
|
||||
}
|
||||
const archive = new Archive(zimFilePath)
|
||||
|
||||
const getMeta = (key: string): string | undefined => {
|
||||
|
|
@ -197,17 +201,22 @@ export class KiwixLibraryService {
|
|||
const excludeSet = new Set(opts?.excludeFilenames ?? [])
|
||||
const zimFiles = entries.filter((name) => name.endsWith('.zim') && !excludeSet.has(name))
|
||||
|
||||
const books: KiwixBook[] = zimFiles.map((filename) => {
|
||||
const meta = this._readZimMetadata(join(dirPath, filename))
|
||||
const books: KiwixBook[] = []
|
||||
for (const filename of zimFiles) {
|
||||
const meta = await this._readZimMetadata(join(dirPath, filename))
|
||||
if (meta === null) {
|
||||
logger.warn(`[KiwixLibraryService] Skipping unreadable ZIM file: ${filename}`)
|
||||
continue
|
||||
}
|
||||
const containerPath = `${CONTAINER_DATA_PATH}/${filename}`
|
||||
return {
|
||||
books.push({
|
||||
...meta,
|
||||
// Override fields that must be derived locally, not from ZIM metadata
|
||||
id: meta?.id ?? filename.slice(0, -4),
|
||||
path: containerPath,
|
||||
title: meta?.title ?? this._filenameToTitle(filename),
|
||||
}
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
const xml = this._buildXml(books)
|
||||
await this._atomicWrite(xml)
|
||||
|
|
@ -239,7 +248,12 @@ export class KiwixLibraryService {
|
|||
}
|
||||
|
||||
const fullPath = join(process.cwd(), ZIM_STORAGE_PATH, zimFilename)
|
||||
const meta = this._readZimMetadata(fullPath)
|
||||
const meta = await this._readZimMetadata(fullPath)
|
||||
|
||||
if (meta === null) {
|
||||
logger.error(`[KiwixLibraryService] Cannot add ${zimFilename}: file is invalid or corrupted.`)
|
||||
return
|
||||
}
|
||||
|
||||
existingBooks.push({
|
||||
...meta,
|
||||
|
|
|
|||
|
|
@ -53,6 +53,7 @@ export class OllamaService {
|
|||
private baseUrl: string | null = null
|
||||
private initPromise: Promise<void> | null = null
|
||||
private isOllamaNative: boolean | null = null
|
||||
private activeDownloads: Map<string, Promise<{ success: boolean; message: string; retryable?: boolean }>> = new Map()
|
||||
|
||||
constructor() {}
|
||||
|
||||
|
|
@ -95,6 +96,26 @@ export class OllamaService {
|
|||
async downloadModel(
|
||||
model: string,
|
||||
progressCallback?: (percent: number) => void
|
||||
): Promise<{ success: boolean; message: string; retryable?: boolean }> {
|
||||
// Deduplicate concurrent downloads of the same model
|
||||
const existing = this.activeDownloads.get(model)
|
||||
if (existing) {
|
||||
logger.info(`[OllamaService] Download already in progress for "${model}", waiting on existing download.`)
|
||||
return existing
|
||||
}
|
||||
|
||||
const downloadPromise = this._doDownloadModel(model, progressCallback)
|
||||
this.activeDownloads.set(model, downloadPromise)
|
||||
try {
|
||||
return await downloadPromise
|
||||
} finally {
|
||||
this.activeDownloads.delete(model)
|
||||
}
|
||||
}
|
||||
|
||||
private async _doDownloadModel(
|
||||
model: string,
|
||||
progressCallback?: (percent: number) => void
|
||||
): Promise<{ success: boolean; message: string; retryable?: boolean }> {
|
||||
await this._ensureDependencies()
|
||||
if (!this.baseUrl) {
|
||||
|
|
@ -130,6 +151,7 @@ export class OllamaService {
|
|||
|
||||
await new Promise<void>((resolve, reject) => {
|
||||
let buffer = ''
|
||||
let lastPercent = -1
|
||||
pullResponse.data.on('data', (chunk: Buffer) => {
|
||||
buffer += chunk.toString()
|
||||
const lines = buffer.split('\n')
|
||||
|
|
@ -140,8 +162,11 @@ export class OllamaService {
|
|||
const parsed = JSON.parse(line)
|
||||
if (parsed.completed && parsed.total) {
|
||||
const percent = parseFloat(((parsed.completed / parsed.total) * 100).toFixed(2))
|
||||
this.broadcastDownloadProgress(model, percent)
|
||||
if (progressCallback) progressCallback(percent)
|
||||
if (percent !== lastPercent) {
|
||||
lastPercent = percent
|
||||
this.broadcastDownloadProgress(model, percent)
|
||||
if (progressCallback) progressCallback(percent)
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// ignore parse errors on partial lines
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@ import logger from '@adonisjs/core/services/logger'
|
|||
import { ExtractZIMChunkingStrategy, ExtractZIMContentOptions, ZIMContentChunk, ZIMArchiveMetadata } from '../../types/zim.js'
|
||||
import { randomUUID } from 'node:crypto'
|
||||
import { access } from 'node:fs/promises'
|
||||
import { isValidZimFile } from '../utils/fs.js'
|
||||
|
||||
export class ZIMExtractionService {
|
||||
|
||||
|
|
@ -51,7 +52,13 @@ export class ZIMExtractionService {
|
|||
logger.error(`[ZIMExtractionService]: ZIM file not accessible: ${filePath}`)
|
||||
throw new Error(`ZIM file not found or not accessible: ${filePath}`)
|
||||
}
|
||||
|
||||
|
||||
// Validate ZIM magic number before opening with native library.
|
||||
// A corrupted file causes a native C++ abort that cannot be caught by JS.
|
||||
if (!(await isValidZimFile(filePath))) {
|
||||
throw new Error(`ZIM file is invalid or corrupted: ${filePath}`)
|
||||
}
|
||||
|
||||
const archive = new Archive(filePath)
|
||||
|
||||
// Extract archive-level metadata once
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
import { mkdir, readdir, readFile, stat, unlink } from 'fs/promises'
|
||||
import { mkdir, open, readdir, readFile, stat, unlink } from 'fs/promises'
|
||||
import path, { join } from 'path'
|
||||
import { FileEntry } from '../../types/files.js'
|
||||
import { createReadStream } from 'fs'
|
||||
|
|
@ -99,6 +99,28 @@ export async function getFileStatsIfExists(
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Validates that a file has the ZIM magic number (0x44D495A).
|
||||
* Must be called before passing a file to @openzim/libzim Archive,
|
||||
* because a corrupted ZIM causes a native C++ abort that cannot be
|
||||
* caught by JS try/catch.
|
||||
*/
|
||||
export async function isValidZimFile(filePath: string): Promise<boolean> {
|
||||
let fh
|
||||
try {
|
||||
fh = await open(filePath, 'r')
|
||||
const buf = Buffer.alloc(4)
|
||||
const { bytesRead } = await fh.read(buf, 0, 4, 0)
|
||||
if (bytesRead < 4) return false
|
||||
// ZIM magic number: 72 17 32 04 (little-endian 0x044D4953)
|
||||
return buf[0] === 0x5a && buf[1] === 0x49 && buf[2] === 0x4d && buf[3] === 0x04
|
||||
} catch {
|
||||
return false
|
||||
} finally {
|
||||
await fh?.close()
|
||||
}
|
||||
}
|
||||
|
||||
export async function deleteFileIfExists(path: string): Promise<void> {
|
||||
try {
|
||||
await unlink(path)
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user