mirror of
https://github.com/Crosstalk-Solutions/project-nomad.git
synced 2026-04-05 00:06:17 +02:00
286 lines
9.5 KiB
TypeScript
286 lines
9.5 KiB
TypeScript
import { XMLBuilder, XMLParser } from 'fast-xml-parser'
|
|
import { readFile, writeFile, rename, readdir } from 'fs/promises'
|
|
import { join } from 'path'
|
|
import { Archive } from '@openzim/libzim'
|
|
import { KIWIX_LIBRARY_XML_PATH, ZIM_STORAGE_PATH, ensureDirectoryExists } from '../utils/fs.js'
|
|
import logger from '@adonisjs/core/services/logger'
|
|
import { randomUUID } from 'node:crypto'
|
|
|
|
const CONTAINER_DATA_PATH = '/data'
|
|
const XML_DECLARATION = '<?xml version="1.0" encoding="UTF-8"?>\n'
|
|
|
|
interface KiwixBook {
|
|
id: string
|
|
path: string
|
|
title: string
|
|
description?: string
|
|
language?: string
|
|
creator?: string
|
|
publisher?: string
|
|
name?: string
|
|
flavour?: string
|
|
tags?: string
|
|
faviconMimeType?: string
|
|
favicon?: string
|
|
date?: string
|
|
articleCount?: number
|
|
mediaCount?: number
|
|
size?: number
|
|
}
|
|
|
|
export class KiwixLibraryService {
|
|
getLibraryFilePath(): string {
|
|
return join(process.cwd(), KIWIX_LIBRARY_XML_PATH)
|
|
}
|
|
|
|
containerLibraryPath(): string {
|
|
return '/data/kiwix-library.xml'
|
|
}
|
|
|
|
private _filenameToTitle(filename: string): string {
|
|
const withoutExt = filename.endsWith('.zim') ? filename.slice(0, -4) : filename
|
|
const parts = withoutExt.split('_')
|
|
// Drop last segment if it looks like a date (YYYY-MM)
|
|
const lastPart = parts[parts.length - 1]
|
|
const isDate = /^\d{4}-\d{2}$/.test(lastPart)
|
|
const titleParts = isDate && parts.length > 1 ? parts.slice(0, -1) : parts
|
|
return titleParts.map((p) => p.charAt(0).toUpperCase() + p.slice(1)).join(' ')
|
|
}
|
|
|
|
/**
|
|
* Reads all kiwix-manage-compatible metadata from a ZIM file, including the internal UUID,
|
|
* rich text fields, and the base64-encoded favicon. Kiwix-serve uses the UUID for OPDS
|
|
* catalog entries and illustration URLs (/catalog/v2/illustration/{uuid}).
|
|
*
|
|
* Returns null on any error so callers can fall back gracefully.
|
|
*/
|
|
private _readZimMetadata(zimFilePath: string): Partial<KiwixBook> | null {
|
|
try {
|
|
const archive = new Archive(zimFilePath)
|
|
|
|
const getMeta = (key: string): string | undefined => {
|
|
try {
|
|
return archive.getMetadata(key) || undefined
|
|
} catch {
|
|
return undefined
|
|
}
|
|
}
|
|
|
|
let favicon: string | undefined
|
|
let faviconMimeType: string | undefined
|
|
try {
|
|
if (archive.illustrationSizes.size > 0) {
|
|
const size = archive.illustrationSizes.has(48)
|
|
? 48
|
|
: ([...archive.illustrationSizes][0] as number)
|
|
const item = archive.getIllustrationItem(size)
|
|
favicon = item.data.data.toString('base64')
|
|
faviconMimeType = item.mimetype || undefined
|
|
}
|
|
} catch {
|
|
// ZIM has no illustration — that's fine
|
|
}
|
|
|
|
const rawFilesize =
|
|
typeof archive.filesize === 'bigint' ? Number(archive.filesize) : archive.filesize
|
|
|
|
return {
|
|
id: archive.uuid || undefined,
|
|
title: getMeta('Title'),
|
|
description: getMeta('Description'),
|
|
language: getMeta('Language'),
|
|
creator: getMeta('Creator'),
|
|
publisher: getMeta('Publisher'),
|
|
name: getMeta('Name'),
|
|
flavour: getMeta('Flavour'),
|
|
tags: getMeta('Tags'),
|
|
date: getMeta('Date'),
|
|
articleCount: archive.articleCount,
|
|
mediaCount: archive.mediaCount,
|
|
size: Math.floor(rawFilesize / 1024),
|
|
favicon,
|
|
faviconMimeType,
|
|
}
|
|
} catch {
|
|
return null
|
|
}
|
|
}
|
|
|
|
private _buildXml(books: KiwixBook[]): string {
|
|
const builder = new XMLBuilder({
|
|
ignoreAttributes: false,
|
|
attributeNamePrefix: '@_',
|
|
format: true,
|
|
suppressEmptyNode: false,
|
|
})
|
|
|
|
const obj: Record<string, any> = {
|
|
library: {
|
|
'@_version': '20110515',
|
|
...(books.length > 0 && {
|
|
book: books.map((b) => ({
|
|
'@_id': b.id,
|
|
'@_path': b.path,
|
|
'@_title': b.title,
|
|
...(b.description !== undefined && { '@_description': b.description }),
|
|
...(b.language !== undefined && { '@_language': b.language }),
|
|
...(b.creator !== undefined && { '@_creator': b.creator }),
|
|
...(b.publisher !== undefined && { '@_publisher': b.publisher }),
|
|
...(b.name !== undefined && { '@_name': b.name }),
|
|
...(b.flavour !== undefined && { '@_flavour': b.flavour }),
|
|
...(b.tags !== undefined && { '@_tags': b.tags }),
|
|
...(b.faviconMimeType !== undefined && { '@_faviconMimeType': b.faviconMimeType }),
|
|
...(b.favicon !== undefined && { '@_favicon': b.favicon }),
|
|
...(b.date !== undefined && { '@_date': b.date }),
|
|
...(b.articleCount !== undefined && { '@_articleCount': b.articleCount }),
|
|
...(b.mediaCount !== undefined && { '@_mediaCount': b.mediaCount }),
|
|
...(b.size !== undefined && { '@_size': b.size }),
|
|
})),
|
|
}),
|
|
},
|
|
}
|
|
|
|
return XML_DECLARATION + builder.build(obj)
|
|
}
|
|
|
|
private async _atomicWrite(content: string): Promise<void> {
|
|
const filePath = this.getLibraryFilePath()
|
|
const tmpPath = `${filePath}.tmp.${randomUUID()}`
|
|
await writeFile(tmpPath, content, 'utf-8')
|
|
await rename(tmpPath, filePath)
|
|
}
|
|
|
|
private _parseExistingBooks(xmlContent: string): KiwixBook[] {
|
|
const parser = new XMLParser({
|
|
ignoreAttributes: false,
|
|
attributeNamePrefix: '@_',
|
|
isArray: (name) => name === 'book',
|
|
})
|
|
|
|
const parsed = parser.parse(xmlContent)
|
|
const books: any[] = parsed?.library?.book ?? []
|
|
|
|
return books
|
|
.map((b) => ({
|
|
id: b['@_id'] ?? '',
|
|
path: b['@_path'] ?? '',
|
|
title: b['@_title'] ?? '',
|
|
description: b['@_description'],
|
|
language: b['@_language'],
|
|
creator: b['@_creator'],
|
|
publisher: b['@_publisher'],
|
|
name: b['@_name'],
|
|
flavour: b['@_flavour'],
|
|
tags: b['@_tags'],
|
|
faviconMimeType: b['@_faviconMimeType'],
|
|
favicon: b['@_favicon'],
|
|
date: b['@_date'],
|
|
articleCount:
|
|
b['@_articleCount'] !== undefined ? Number(b['@_articleCount']) : undefined,
|
|
mediaCount: b['@_mediaCount'] !== undefined ? Number(b['@_mediaCount']) : undefined,
|
|
size: b['@_size'] !== undefined ? Number(b['@_size']) : undefined,
|
|
}))
|
|
.filter((b) => b.id && b.path)
|
|
}
|
|
|
|
async rebuildFromDisk(opts?: { excludeFilenames?: string[] }): Promise<void> {
|
|
const dirPath = join(process.cwd(), ZIM_STORAGE_PATH)
|
|
await ensureDirectoryExists(dirPath)
|
|
|
|
let entries: string[] = []
|
|
try {
|
|
entries = await readdir(dirPath)
|
|
} catch {
|
|
entries = []
|
|
}
|
|
|
|
const excludeSet = new Set(opts?.excludeFilenames ?? [])
|
|
const zimFiles = entries.filter((name) => name.endsWith('.zim') && !excludeSet.has(name))
|
|
|
|
const books: KiwixBook[] = zimFiles.map((filename) => {
|
|
const meta = this._readZimMetadata(join(dirPath, filename))
|
|
const containerPath = `${CONTAINER_DATA_PATH}/${filename}`
|
|
return {
|
|
...meta,
|
|
// Override fields that must be derived locally, not from ZIM metadata
|
|
id: meta?.id ?? filename.slice(0, -4),
|
|
path: containerPath,
|
|
title: meta?.title ?? this._filenameToTitle(filename),
|
|
}
|
|
})
|
|
|
|
const xml = this._buildXml(books)
|
|
await this._atomicWrite(xml)
|
|
logger.info(`[KiwixLibraryService] Rebuilt library XML with ${books.length} book(s).`)
|
|
}
|
|
|
|
async addBook(filename: string): Promise<void> {
|
|
const zimFilename = filename.endsWith('.zim') ? filename : `${filename}.zim`
|
|
const containerPath = `${CONTAINER_DATA_PATH}/${zimFilename}`
|
|
|
|
const filePath = this.getLibraryFilePath()
|
|
let existingBooks: KiwixBook[] = []
|
|
|
|
try {
|
|
const content = await readFile(filePath, 'utf-8')
|
|
existingBooks = this._parseExistingBooks(content)
|
|
} catch (err: any) {
|
|
if (err.code === 'ENOENT') {
|
|
// XML doesn't exist yet — rebuild from disk; the completed download is already there
|
|
await this.rebuildFromDisk()
|
|
return
|
|
}
|
|
throw err
|
|
}
|
|
|
|
if (existingBooks.some((b) => b.path === containerPath)) {
|
|
logger.info(`[KiwixLibraryService] ${zimFilename} already in library, skipping.`)
|
|
return
|
|
}
|
|
|
|
const fullPath = join(process.cwd(), ZIM_STORAGE_PATH, zimFilename)
|
|
const meta = this._readZimMetadata(fullPath)
|
|
|
|
existingBooks.push({
|
|
...meta,
|
|
id: meta?.id ?? zimFilename.slice(0, -4),
|
|
path: containerPath,
|
|
title: meta?.title ?? this._filenameToTitle(zimFilename),
|
|
})
|
|
|
|
const xml = this._buildXml(existingBooks)
|
|
await this._atomicWrite(xml)
|
|
logger.info(`[KiwixLibraryService] Added ${zimFilename} to library XML.`)
|
|
}
|
|
|
|
async removeBook(filename: string): Promise<void> {
|
|
const zimFilename = filename.endsWith('.zim') ? filename : `${filename}.zim`
|
|
const containerPath = `${CONTAINER_DATA_PATH}/${zimFilename}`
|
|
|
|
const filePath = this.getLibraryFilePath()
|
|
let existingBooks: KiwixBook[] = []
|
|
|
|
try {
|
|
const content = await readFile(filePath, 'utf-8')
|
|
existingBooks = this._parseExistingBooks(content)
|
|
} catch (err: any) {
|
|
if (err.code === 'ENOENT') {
|
|
logger.warn(`[KiwixLibraryService] Library XML not found, nothing to remove.`)
|
|
return
|
|
}
|
|
throw err
|
|
}
|
|
|
|
const filtered = existingBooks.filter((b) => b.path !== containerPath)
|
|
|
|
if (filtered.length === existingBooks.length) {
|
|
logger.info(`[KiwixLibraryService] ${zimFilename} not found in library, nothing to remove.`)
|
|
return
|
|
}
|
|
|
|
const xml = this._buildXml(filtered)
|
|
await this._atomicWrite(xml)
|
|
logger.info(`[KiwixLibraryService] Removed ${zimFilename} from library XML.`)
|
|
}
|
|
}
|