project-nomad/admin/app/utils/fs.ts
Jake Turner c8cb79a3a5 fix: prevent ZIM corrupt file crash and deduplicate Ollama download logs (#741)
Corrupted ZIM files cause a native C++ abort (ZimFileFormatError) that
bypasses JS try/catch and kills the process. Add magic number validation
before passing files to @openzim/libzim so invalid files are skipped
gracefully. Also deduplicate Ollama download progress broadcasts — both
within a single stream (skip unchanged percentages) and across concurrent
callers (share one download promise per model).

Co-authored-by: aegisman <aegis@manicode.com>
Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-21 14:26:28 -07:00

201 lines
5.5 KiB
TypeScript

import { mkdir, open, readdir, readFile, stat, unlink } from 'fs/promises'
import path, { join } from 'path'
import { FileEntry } from '../../types/files.js'
import { createReadStream } from 'fs'
import { LSBlockDevice, NomadDiskInfoRaw } from '../../types/system.js'
export const ZIM_STORAGE_PATH = '/storage/zim'
export const KIWIX_LIBRARY_XML_PATH = '/storage/zim/kiwix-library.xml'
export async function listDirectoryContents(path: string): Promise<FileEntry[]> {
const entries = await readdir(path, { withFileTypes: true })
const results: FileEntry[] = []
for (const entry of entries) {
if (entry.isFile()) {
results.push({
type: 'file',
key: join(path, entry.name),
name: entry.name,
})
} else if (entry.isDirectory()) {
results.push({
type: 'directory',
prefix: join(path, entry.name),
name: entry.name,
})
}
}
return results
}
export async function listDirectoryContentsRecursive(path: string): Promise<FileEntry[]> {
let results: FileEntry[] = []
const entries = await readdir(path, { withFileTypes: true })
for (const entry of entries) {
const fullPath = join(path, entry.name)
if (entry.isDirectory()) {
const subdirectoryContents = await listDirectoryContentsRecursive(fullPath)
results = results.concat(subdirectoryContents)
} else {
results.push({
type: 'file',
key: fullPath,
name: entry.name,
})
}
}
return results
}
export async function ensureDirectoryExists(path: string): Promise<void> {
try {
await stat(path)
} catch (error: any) {
if (error.code === 'ENOENT') {
await mkdir(path, { recursive: true })
}
}
}
export async function getFile(path: string, returnType: 'buffer'): Promise<Buffer | null>
export async function getFile(
path: string,
returnType: 'stream'
): Promise<NodeJS.ReadableStream | null>
export async function getFile(path: string, returnType: 'string'): Promise<string | null>
export async function getFile(
path: string,
returnType: 'buffer' | 'string' | 'stream' = 'buffer'
): Promise<Buffer | string | NodeJS.ReadableStream | null> {
try {
if (returnType === 'string') {
return await readFile(path, 'utf-8')
} else if (returnType === 'stream') {
return createReadStream(path)
}
return await readFile(path)
} catch (error: any) {
if (error.code === 'ENOENT') {
return null
}
throw error
}
}
export async function getFileStatsIfExists(
path: string
): Promise<{ size: number; modifiedTime: Date } | null> {
try {
const stats = await stat(path)
return {
size: stats.size,
modifiedTime: stats.mtime,
}
} catch (error: any) {
if (error.code === 'ENOENT') {
return null
}
throw error
}
}
/**
* Validates that a file has the ZIM magic number (0x44D495A).
* Must be called before passing a file to @openzim/libzim Archive,
* because a corrupted ZIM causes a native C++ abort that cannot be
* caught by JS try/catch.
*/
export async function isValidZimFile(filePath: string): Promise<boolean> {
let fh
try {
fh = await open(filePath, 'r')
const buf = Buffer.alloc(4)
const { bytesRead } = await fh.read(buf, 0, 4, 0)
if (bytesRead < 4) return false
// ZIM magic number: 72 17 32 04 (little-endian 0x044D4953)
return buf[0] === 0x5a && buf[1] === 0x49 && buf[2] === 0x4d && buf[3] === 0x04
} catch {
return false
} finally {
await fh?.close()
}
}
export async function deleteFileIfExists(path: string): Promise<void> {
try {
await unlink(path)
} catch (error: any) {
if (error.code !== 'ENOENT') {
throw error
}
}
}
export function getAllFilesystems(
device: LSBlockDevice,
fsSize: NomadDiskInfoRaw['fsSize']
): NomadDiskInfoRaw['fsSize'] {
const filesystems: NomadDiskInfoRaw['fsSize'] = []
const seen = new Set()
function traverse(dev: LSBlockDevice) {
// Try to find matching filesystem
const fs = fsSize.find((f) => matchesDevice(f.fs, dev.name))
if (fs && !seen.has(fs.fs)) {
filesystems.push(fs)
seen.add(fs.fs)
}
// Traverse children recursively
if (dev.children) {
dev.children.forEach((child) => traverse(child))
}
}
traverse(device)
return filesystems
}
export function matchesDevice(fsPath: string, deviceName: string): boolean {
// Remove /dev/ and /dev/mapper/ prefixes
const normalized = fsPath.replace('/dev/mapper/', '').replace('/dev/', '')
// Direct match (covers /dev/sda1 ↔ sda1, /dev/nvme0n1p1 ↔ nvme0n1p1)
if (normalized === deviceName) {
return true
}
// LVM/device-mapper: e.g., /dev/mapper/ubuntu--vg-ubuntu--lv contains "ubuntu--lv"
if (fsPath.startsWith('/dev/mapper/') && fsPath.includes(deviceName)) {
return true
}
return false
}
export function determineFileType(filename: string): 'image' | 'pdf' | 'text' | 'epub' | 'zim' | 'unknown' {
const ext = path.extname(filename).toLowerCase()
if (['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff', '.webp'].includes(ext)) {
return 'image'
} else if (ext === '.pdf') {
return 'pdf'
} else if (['.txt', '.md', '.docx', '.rtf'].includes(ext)) {
return 'text'
} else if (ext === '.epub') {
return 'epub'
} else if (ext === '.zim') {
return 'zim'
} else {
return 'unknown'
}
}
/**
* Sanitize a filename by removing potentially dangerous characters.
* @param filename The original filename
* @returns The sanitized filename
*/
export function sanitizeFilename(filename: string): string {
return filename.replace(/[^a-zA-Z0-9._-]/g, '_')
}