diff --git a/admin/app/controllers/rag_controller.ts b/admin/app/controllers/rag_controller.ts index 7af9cc4..0a25336 100644 --- a/admin/app/controllers/rag_controller.ts +++ b/admin/app/controllers/rag_controller.ts @@ -5,7 +5,6 @@ import type { HttpContext } from '@adonisjs/core/http' import app from '@adonisjs/core/services/app' import { randomBytes } from 'node:crypto' import { sanitizeFilename } from '../utils/fs.js' -import { stat } from 'node:fs/promises' import { getJobStatusSchema } from '#validators/rag' @inject() @@ -28,20 +27,10 @@ export default class RagController { name: fileName, }) - // Get file size for tracking - let fileSize: number | undefined = undefined - try { - const stats = await stat(fullPath) - fileSize = stats.size - } catch (error) { - // Not critical if we can't get file size, just swallow the error - } - // Dispatch background job for embedding const result = await EmbedFileJob.dispatch({ filePath: fullPath, fileName, - fileSize, }) return response.status(202).json({ diff --git a/admin/app/jobs/embed_file_job.ts b/admin/app/jobs/embed_file_job.ts index 32b697e..24dd289 100644 --- a/admin/app/jobs/embed_file_job.ts +++ b/admin/app/jobs/embed_file_job.ts @@ -10,6 +10,10 @@ export interface EmbedFileJobParams { filePath: string fileName: string fileSize?: number + // Batch processing for large ZIM files + batchOffset?: number // Current batch offset (for ZIM files) + totalArticles?: number // Total articles in ZIM (for progress tracking) + isFinalBatch?: boolean // Whether this is the last batch (prevents premature deletion) } export class EmbedFileJob { @@ -26,9 +30,11 @@ export class EmbedFileJob { } async handle(job: Job) { - const { filePath, fileName } = job.data as EmbedFileJobParams + const { filePath, fileName, batchOffset, totalArticles } = job.data as EmbedFileJobParams - logger.info(`[EmbedFileJob] Starting embedding process for: ${fileName}`) + const isZimBatch = batchOffset !== undefined + const batchInfo = isZimBatch ? ` (batch offset: ${batchOffset})` : '' + logger.info(`[EmbedFileJob] Starting embedding process for: ${fileName}${batchInfo}`) const dockerService = new DockerService() const ollamaService = new OllamaService() @@ -55,30 +61,78 @@ export class EmbedFileJob { await job.updateData({ ...job.data, status: 'processing', - startedAt: Date.now(), + startedAt: job.data.startedAt || Date.now(), }) logger.info(`[EmbedFileJob] Processing file: ${filePath}`) // Process and embed the file - const result = await ragService.processAndEmbedFile(filePath) + // Only allow deletion if explicitly marked as final batch + const allowDeletion = job.data.isFinalBatch === true + const result = await ragService.processAndEmbedFile( + filePath, + allowDeletion, + batchOffset + ) if (!result.success) { logger.error(`[EmbedFileJob] Failed to process file ${fileName}: ${result.message}`) throw new Error(result.message) } - // Update progress complete + // For ZIM files with batching, check if more batches are needed + if (result.hasMoreBatches) { + const nextOffset = (batchOffset || 0) + (result.articlesProcessed || 0) + logger.info( + `[EmbedFileJob] Batch complete. Dispatching next batch at offset ${nextOffset}` + ) + + // Dispatch next batch (not final yet) + await EmbedFileJob.dispatch({ + filePath, + fileName, + batchOffset: nextOffset, + totalArticles: totalArticles || result.totalArticles, + isFinalBatch: false, // Explicitly not final + }) + + // Calculate progress based on articles processed + const progress = totalArticles + ? Math.round((nextOffset / totalArticles) * 100) + : 50 + + await job.updateProgress(progress) + await job.updateData({ + ...job.data, + status: 'batch_completed', + lastBatchAt: Date.now(), + chunks: (job.data.chunks || 0) + (result.chunks || 0), + }) + + return { + success: true, + fileName, + filePath, + chunks: result.chunks, + hasMoreBatches: true, + nextOffset, + message: `Batch embedded ${result.chunks} chunks, next batch queued`, + } + } + + // Final batch or non-batched file - mark as complete + const totalChunks = (job.data.chunks || 0) + (result.chunks || 0) await job.updateProgress(100) await job.updateData({ ...job.data, status: 'completed', completedAt: Date.now(), - chunks: result.chunks, + chunks: totalChunks, }) + const batchMsg = isZimBatch ? ` (final batch, total chunks: ${totalChunks})` : '' logger.info( - `[EmbedFileJob] Successfully embedded ${result.chunks} chunks from file: ${fileName}` + `[EmbedFileJob] Successfully embedded ${result.chunks} chunks from file: ${fileName}${batchMsg}` ) return { diff --git a/admin/app/jobs/run_download_job.ts b/admin/app/jobs/run_download_job.ts index f899c5d..e3df7cb 100644 --- a/admin/app/jobs/run_download_job.ts +++ b/admin/app/jobs/run_download_job.ts @@ -6,6 +6,7 @@ import { createHash } from 'crypto' import { DockerService } from '#services/docker_service' import { ZimService } from '#services/zim_service' import { MapService } from '#services/map_service' +import { EmbedFileJob } from './embed_file_job.js' export class RunDownloadJob { static get queue() { @@ -24,17 +25,6 @@ export class RunDownloadJob { const { url, filepath, timeout, allowedMimeTypes, forceNew, filetype } = job.data as RunDownloadJobParams - // console.log("Simulating delay for job for URL:", url) - // await new Promise((resolve) => setTimeout(resolve, 30000)) // Simulate initial delay - // console.log("Starting download for URL:", url) - - // // simulate progress updates for demonstration - // for (let progress = 0; progress <= 100; progress += 10) { - // await new Promise((resolve) => setTimeout(resolve, 20000)) // Simulate time taken for each progress step - // job.updateProgress(progress) - // console.log(`Job progress for URL ${url}: ${progress}%`) - // } - await doResumableDownload({ url, filepath, @@ -51,6 +41,16 @@ export class RunDownloadJob { const dockerService = new DockerService() const zimService = new ZimService(dockerService) await zimService.downloadRemoteSuccessCallback([url], true) + + // Dispatch an embedding job for the downloaded ZIM file + try { + await EmbedFileJob.dispatch({ + fileName: url.split('/').pop() || '', + filePath: filepath, + }) + } catch (error) { + console.error(`[RunDownloadJob] Error dispatching EmbedFileJob for URL ${url}:`, error) + } } else if (filetype === 'map') { const mapsService = new MapService() await mapsService.downloadRemoteSuccessCallback([url], false) diff --git a/admin/app/services/docs_service.ts b/admin/app/services/docs_service.ts index 6d9fe12..5669c2c 100644 --- a/admin/app/services/docs_service.ts +++ b/admin/app/services/docs_service.ts @@ -3,6 +3,7 @@ import { streamToString } from '../../util/docs.js' import { getFile, getFileStatsIfExists, listDirectoryContentsRecursive } from '../utils/fs.js' import path from 'path' import InternalServerErrorException from '#exceptions/internal_server_error_exception' +import logger from '@adonisjs/core/services/logger' export class DocsService { private docsPath = path.join(process.cwd(), 'docs') @@ -46,13 +47,13 @@ export class DocsService { // Filter out attribute-undefined errors which may be caused by emojis and special characters const criticalErrors = errors.filter((e) => e.error.id !== 'attribute-undefined') if (criticalErrors.length > 0) { - console.error('Markdoc validation errors:', errors.map((e) => JSON.stringify(e.error)).join(', ')) + logger.error('Markdoc validation errors:', errors.map((e) => JSON.stringify(e.error)).join(', ')) throw new Error('Markdoc validation failed') } return Markdoc.transform(ast, config) } catch (error) { - console.log('Error parsing Markdoc content:', error) + logger.error('Error parsing Markdoc content:', error) throw new InternalServerErrorException(`Error parsing content: ${(error as Error).message}`) } } diff --git a/admin/app/services/rag_service.ts b/admin/app/services/rag_service.ts index 87a5870..d63dedc 100644 --- a/admin/app/services/rag_service.ts +++ b/admin/app/services/rag_service.ts @@ -15,6 +15,8 @@ import { randomUUID } from 'node:crypto' import { join } from 'node:path' import KVStore from '#models/kv_store' import { parseBoolean } from '../utils/misc.js' +import { ZIMExtractionService } from './zim_extraction_service.js' +import { ZIM_BATCH_SIZE } from '../../constants/zim_extraction.js' @inject() export class RagService { @@ -38,6 +40,67 @@ export class RagService { private ollamaService: OllamaService ) { } + private async _initializeQdrantClient() { + if (!this.qdrantInitPromise) { + this.qdrantInitPromise = (async () => { + const qdrantUrl = await this.dockerService.getServiceURL(SERVICE_NAMES.QDRANT) + if (!qdrantUrl) { + throw new Error('Qdrant service is not installed or running.') + } + this.qdrant = new QdrantClient({ url: qdrantUrl }) + })() + } + return this.qdrantInitPromise + } + + private async _ensureDependencies() { + if (!this.qdrant) { + await this._initializeQdrantClient() + } + } + + private async _ensureCollection( + collectionName: string, + dimensions: number = RagService.EMBEDDING_DIMENSION + ) { + try { + await this._ensureDependencies() + const collections = await this.qdrant!.getCollections() + const collectionExists = collections.collections.some((col) => col.name === collectionName) + + if (!collectionExists) { + await this.qdrant!.createCollection(collectionName, { + vectors: { + size: dimensions, + distance: 'Cosine', + }, + }) + } + } catch (error) { + logger.error('Error ensuring Qdrant collection:', error) + throw error + } + } + + /** + * Sanitizes text to ensure it's safe for JSON encoding and Qdrant storage. + * Removes problematic characters that can cause "unexpected end of hex escape" errors: + * - Null bytes (\x00) + * - Invalid Unicode sequences + * - Control characters (except newlines, tabs, and carriage returns) + */ + private sanitizeText(text: string): string { + return text + // Null bytes + .replace(/\x00/g, '') + // Problematic control characters (keep \n, \r, \t) + .replace(/[\x01-\x08\x0B-\x0C\x0E-\x1F\x7F]/g, '') + // Invalid Unicode surrogates + .replace(/[\uD800-\uDFFF]/g, '') + // Trim extra whitespace + .trim() + } + /** * Estimates token count for text. This is a conservative approximation: * - English text: ~1 token per 3 characters @@ -114,48 +177,6 @@ export class RagService { return [...new Set(keywords)] } - private async _initializeQdrantClient() { - if (!this.qdrantInitPromise) { - this.qdrantInitPromise = (async () => { - const qdrantUrl = await this.dockerService.getServiceURL(SERVICE_NAMES.QDRANT) - if (!qdrantUrl) { - throw new Error('Qdrant service is not installed or running.') - } - this.qdrant = new QdrantClient({ url: qdrantUrl }) - })() - } - return this.qdrantInitPromise - } - - private async _ensureDependencies() { - if (!this.qdrant) { - await this._initializeQdrantClient() - } - } - - private async _ensureCollection( - collectionName: string, - dimensions: number = RagService.EMBEDDING_DIMENSION - ) { - try { - await this._ensureDependencies() - const collections = await this.qdrant!.getCollections() - const collectionExists = collections.collections.some((col) => col.name === collectionName) - - if (!collectionExists) { - await this.qdrant!.createCollection(collectionName, { - vectors: { - size: dimensions, - distance: 'Cosine', - }, - }) - } - } catch (error) { - logger.error('Error ensuring Qdrant collection:', error) - throw error - } - } - public async embedAndStoreText( text: string, metadata: Record = {} @@ -237,21 +258,45 @@ export class RagService { const timestamp = Date.now() const points = chunks.map((chunkText, index) => { - // Extract keywords for hybrid search - const keywords = this.extractKeywords(chunkText) - logger.debug(`[RAG] Extracted keywords for chunk ${index}: [${keywords.join(', ')}]`) + // Sanitize text to prevent JSON encoding errors + const sanitizedText = this.sanitizeText(chunkText) + + // Extract keywords from content + const contentKeywords = this.extractKeywords(sanitizedText) + + // For ZIM content, also extract keywords from structural metadata + let structuralKeywords: string[] = [] + if (metadata.full_title) { + structuralKeywords = this.extractKeywords(metadata.full_title as string) + } else if (metadata.article_title) { + structuralKeywords = this.extractKeywords(metadata.article_title as string) + } + + // Combine and dedup keywords + const allKeywords = [...new Set([...structuralKeywords, ...contentKeywords])] + + logger.debug(`[RAG] Extracted keywords for chunk ${index}: [${allKeywords.join(', ')}]`) + if (structuralKeywords.length > 0) { + logger.debug(`[RAG] - Structural: [${structuralKeywords.join(', ')}], Content: [${contentKeywords.join(', ')}]`) + } + + // Sanitize source metadata as well + const sanitizedSource = typeof metadata.source === 'string' + ? this.sanitizeText(metadata.source) + : 'unknown' + return { id: randomUUID(), // qdrant requires either uuid or unsigned int vector: embeddings[index], payload: { ...metadata, - text: chunkText, + text: sanitizedText, chunk_index: index, total_chunks: chunks.length, - keywords: keywords.join(' '), // Store as space-separated string for text search - char_count: chunkText.length, + keywords: allKeywords.join(' '), // store as space-separated string for text search + char_count: sanitizedText.length, created_at: timestamp, - source: metadata.source || 'unknown' + source: sanitizedSource }, } }) @@ -269,12 +314,6 @@ export class RagService { } } - /** - * Preprocess an image to enhance text extraction quality. - * Normalizes, grayscales, sharpens, and resizes the image to a manageable size. - * @param filebuffer Buffer of the image file - * @returns - Processed image buffer - */ private async preprocessImage(filebuffer: Buffer): Promise { return await sharp(filebuffer) .grayscale() @@ -284,12 +323,6 @@ export class RagService { .toBuffer() } - /** - * If the original PDF has little to no extractable text, - * we can use this method to convert each page to an image for OCR processing. - * @param filebuffer - Buffer of the PDF file - * @returns - Array of image buffers, one per page - */ private async convertPDFtoImages(filebuffer: Buffer): Promise { const converted = await fromBuffer(filebuffer, { quality: 50, @@ -301,11 +334,6 @@ export class RagService { return converted.filter((res) => res.buffer).map((res) => res.buffer!) } - /** - * Extract text from a PDF file using pdf-parse. - * @param filebuffer - Buffer of the PDF file - * @returns - Extracted text - */ private async extractPDFText(filebuffer: Buffer): Promise { const parser = new PDFParse({ data: filebuffer }) const data = await parser.getText() @@ -313,20 +341,10 @@ export class RagService { return data.text } - /** - * Extract text from a plain text file. - * @param filebuffer - Buffer of the text file - * @returns - Extracted text - */ private async extractTXTText(filebuffer: Buffer): Promise { return filebuffer.toString('utf-8') } - /** - * Extract text from an image file using Tesseract.js OCR. - * @param filebuffer - Buffer of the image file - * @returns - Extracted text - */ private async extractImageText(filebuffer: Buffer): Promise { const worker = await createWorker('eng') const result = await worker.recognize(filebuffer) @@ -334,71 +352,229 @@ export class RagService { return result.data.text } + private async processImageFile(fileBuffer: Buffer): Promise { + const preprocessedBuffer = await this.preprocessImage(fileBuffer) + return await this.extractImageText(preprocessedBuffer) + } + + /** + * Will process the PDF and attempt to extract text. + * If the extracted text is minimal, it will fallback to OCR on each page. + */ + private async processPDFFile(fileBuffer: Buffer): Promise { + let extractedText = await this.extractPDFText(fileBuffer) + + // Check if there was no extracted text or it was very minimal + if (!extractedText || extractedText.trim().length < 100) { + logger.debug('[RAG] PDF text extraction minimal, attempting OCR on pages') + // Convert PDF pages to images for OCR if text extraction was poor + const imageBuffers = await this.convertPDFtoImages(fileBuffer) + extractedText = '' + + for (const imgBuffer of imageBuffers) { + const preprocessedImg = await this.preprocessImage(imgBuffer) + const pageText = await this.extractImageText(preprocessedImg) + extractedText += pageText + '\n' + } + } + + return extractedText + } + + /** + * Process a ZIM file: extract content with metadata and embed each chunk. + * Returns early with complete result since ZIM processing is self-contained. + * Supports batch processing to prevent lock timeouts on large ZIM files. + */ + private async processZIMFile( + filepath: string, + deleteAfterEmbedding: boolean, + batchOffset?: number + ): Promise<{ + success: boolean + message: string + chunks?: number + hasMoreBatches?: boolean + articlesProcessed?: number + totalArticles?: number + }> { + const zimExtractionService = new ZIMExtractionService() + + // Process in batches to avoid lock timeout + const startOffset = batchOffset || 0 + + logger.info( + `[RAG] Extracting ZIM content (batch: offset=${startOffset}, size=${ZIM_BATCH_SIZE})` + ) + + const zimChunks = await zimExtractionService.extractZIMContent(filepath, { + startOffset, + batchSize: ZIM_BATCH_SIZE, + }) + + logger.info( + `[RAG] Extracted ${zimChunks.length} chunks from ZIM file with enhanced metadata` + ) + + // Process each chunk individually with its metadata + let totalChunks = 0 + for (const zimChunk of zimChunks) { + const result = await this.embedAndStoreText(zimChunk.text, { + source: filepath, + content_type: 'zim_article', + + // Article-level context + article_title: zimChunk.articleTitle, + article_path: zimChunk.articlePath, + + // Section-level context + section_title: zimChunk.sectionTitle, + full_title: zimChunk.fullTitle, + hierarchy: zimChunk.hierarchy, + section_level: zimChunk.sectionLevel, + + // Use the same document ID for all chunks from the same article for grouping in search results + document_id: zimChunk.documentId, + + // Archive metadata + archive_title: zimChunk.archiveMetadata.title, + archive_creator: zimChunk.archiveMetadata.creator, + archive_publisher: zimChunk.archiveMetadata.publisher, + archive_date: zimChunk.archiveMetadata.date, + archive_language: zimChunk.archiveMetadata.language, + archive_description: zimChunk.archiveMetadata.description, + + // Extraction metadata - not overly relevant for search, but could be useful for debugging and future features... + extraction_strategy: zimChunk.strategy, + }) + + if (result) { + totalChunks += result.chunks + } + } + + // Count unique articles processed in this batch + const articlesInBatch = new Set(zimChunks.map((c) => c.documentId)).size + const hasMoreBatches = zimChunks.length === ZIM_BATCH_SIZE + + logger.info( + `[RAG] Successfully embedded ${totalChunks} total chunks from ${articlesInBatch} articles (hasMore: ${hasMoreBatches})` + ) + + // Only delete the file when: + // 1. deleteAfterEmbedding is true (caller wants deletion) + // 2. No more batches remain (this is the final batch) + // This prevents race conditions where early batches complete after later ones + const shouldDelete = deleteAfterEmbedding && !hasMoreBatches + if (shouldDelete) { + logger.info(`[RAG] Final batch complete, deleting ZIM file: ${filepath}`) + await deleteFileIfExists(filepath) + } else if (!hasMoreBatches) { + logger.info(`[RAG] Final batch complete, but file deletion was not requested`) + } + + return { + success: true, + message: hasMoreBatches + ? 'ZIM batch processed successfully. More batches remain.' + : 'ZIM file processed and embedded successfully with enhanced metadata.', + chunks: totalChunks, + hasMoreBatches, + articlesProcessed: articlesInBatch, + } + } + + private async processTextFile(fileBuffer: Buffer): Promise { + return await this.extractTXTText(fileBuffer) + } + + private async embedTextAndCleanup( + extractedText: string, + filepath: string, + deleteAfterEmbedding: boolean = false + ): Promise<{ success: boolean; message: string; chunks?: number }> { + if (!extractedText || extractedText.trim().length === 0) { + return { success: false, message: 'Process completed succesfully, but no text was found to embed.' } + } + + const embedResult = await this.embedAndStoreText(extractedText, { + source: filepath + }) + + if (!embedResult) { + return { success: false, message: 'Failed to embed and store the extracted text.' } + } + + if (deleteAfterEmbedding) { + logger.info(`[RAG] Embedding complete, deleting uploaded file: ${filepath}`) + await deleteFileIfExists(filepath) + } + + return { + success: true, + message: 'File processed and embedded successfully.', + chunks: embedResult.chunks, + } + } + /** * Main pipeline to process and embed an uploaded file into the RAG knowledge base. * This includes text extraction, chunking, embedding, and storing in Qdrant. + * + * Orchestrates file type detection and delegates to specialized processors. + * For ZIM files, supports batch processing via batchOffset parameter. */ public async processAndEmbedFile( - filepath: string, // Should already be the full path to the uploaded file - deleteAfterEmbedding: boolean = false - ): Promise<{ success: boolean; message: string; chunks?: number }> { + filepath: string, + deleteAfterEmbedding: boolean = false, + batchOffset?: number + ): Promise<{ + success: boolean + message: string + chunks?: number + hasMoreBatches?: boolean + articlesProcessed?: number + totalArticles?: number + }> { try { const fileType = determineFileType(filepath) + logger.debug(`[RAG] Processing file: ${filepath} (detected type: ${fileType})`) + if (fileType === 'unknown') { return { success: false, message: 'Unsupported file type.' } } - const origFileBuffer = await getFile(filepath, 'buffer') - if (!origFileBuffer) { + // Read file buffer (not needed for ZIM as it reads directly) + const fileBuffer = fileType !== 'zim' ? await getFile(filepath, 'buffer') : null + if (fileType !== 'zim' && !fileBuffer) { return { success: false, message: 'Failed to read the uploaded file.' } } - let extractedText = '' - - if (fileType === 'image') { - const preprocessedBuffer = await this.preprocessImage(origFileBuffer) - extractedText = await this.extractImageText(preprocessedBuffer) - } else if (fileType === 'pdf') { - extractedText = await this.extractPDFText(origFileBuffer) - // Check if there was no extracted text or it was very minimal - if (!extractedText || extractedText.trim().length < 100) { - // Convert PDF pages to images for OCR - const imageBuffers = await this.convertPDFtoImages(origFileBuffer) - for (const imgBuffer of imageBuffers) { - const preprocessedImg = await this.preprocessImage(imgBuffer) - const pageText = await this.extractImageText(preprocessedImg) - extractedText += pageText + '\n' - } - } - } else { - extractedText = await this.extractTXTText(origFileBuffer) + // Process based on file type + // ZIM files are handled specially since they have their own embedding workflow + if (fileType === 'zim') { + return await this.processZIMFile(filepath, deleteAfterEmbedding, batchOffset) } - if (!extractedText || extractedText.trim().length === 0) { - return { success: false, message: 'No text could be extracted from the file.' } + // Extract text based on file type + let extractedText: string + switch (fileType) { + case 'image': + extractedText = await this.processImageFile(fileBuffer!) + break + case 'pdf': + extractedText = await this.processPDFFile(fileBuffer!) + break + case 'text': + default: + extractedText = await this.processTextFile(fileBuffer!) + break } - const embedResult = await this.embedAndStoreText(extractedText, { - source: filepath - }) - - if (!embedResult) { - return { success: false, message: 'Failed to embed and store the extracted text.' } - } - - if (deleteAfterEmbedding) { - // Cleanup the file from disk - logger.info(`[RAG] Embedding complete, deleting uploaded file: ${filepath}`) - await deleteFileIfExists(filepath) - } - - return { - success: true, - message: 'File processed and embedded successfully.', - chunks: embedResult?.chunks, - } + // Embed extracted text and cleanup + return await this.embedTextAndCleanup(extractedText, filepath, deleteAfterEmbedding) } catch (error) { - logger.error('Error processing and embedding file:', error) + logger.error('[RAG] Error processing and embedding file:', error) return { success: false, message: 'Error processing and embedding file.' } } } @@ -497,6 +673,13 @@ export class RagService { keywords: (result.payload?.keywords as string) || '', chunk_index: (result.payload?.chunk_index as number) || 0, created_at: (result.payload?.created_at as number) || 0, + // Enhanced ZIM metadata (likely be undefined for non-ZIM content) + article_title: result.payload?.article_title as string | undefined, + section_title: result.payload?.section_title as string | undefined, + full_title: result.payload?.full_title as string | undefined, + hierarchy: result.payload?.hierarchy as string | undefined, + document_id: result.payload?.document_id as string | undefined, + content_type: result.payload?.content_type as string | undefined, })) const rerankedResults = this.rerankResults(resultsWithMetadata, keywords, query) @@ -508,7 +691,7 @@ export class RagService { ) }) - // Return top N results + // Return top N results with enhanced metadata return rerankedResults.slice(0, limit).map((result) => ({ text: result.text, score: result.finalScore, @@ -516,6 +699,13 @@ export class RagService { chunk_index: result.chunk_index, created_at: result.created_at, semantic_score: result.score, + // Enhanced ZIM metadata (likely be undefined for non-ZIM content) + article_title: result.article_title, + section_title: result.section_title, + full_title: result.full_title, + hierarchy: result.hierarchy, + document_id: result.document_id, + content_type: result.content_type, }, })) } catch (error) { @@ -544,6 +734,12 @@ export class RagService { keywords: string chunk_index: number created_at: number + article_title?: string + section_title?: string + full_title?: string + hierarchy?: string + document_id?: string + content_type?: string }>, queryKeywords: string[], originalQuery: string @@ -553,6 +749,12 @@ export class RagService { finalScore: number chunk_index: number created_at: number + article_title?: string + section_title?: string + full_title?: string + hierarchy?: string + document_id?: string + content_type?: string }> { return results .map((result) => { @@ -711,11 +913,9 @@ export class RagService { for (const fileInfo of filesToEmbed) { try { logger.info(`[RAG] Dispatching embed job for: ${fileInfo.source}`) - const stats = await getFileStatsIfExists(fileInfo.path) await EmbedFileJob.dispatch({ filePath: fileInfo.path, fileName: fileInfo.source, - fileSize: stats?.size, }) logger.info(`[RAG] Successfully dispatched job for ${fileInfo.source}`) } catch (fileError) { diff --git a/admin/app/services/zim_extraction_service.ts b/admin/app/services/zim_extraction_service.ts new file mode 100644 index 0000000..e60042d --- /dev/null +++ b/admin/app/services/zim_extraction_service.ts @@ -0,0 +1,310 @@ +import { Archive, Entry } from '@openzim/libzim' +import * as cheerio from 'cheerio' +import { HTML_SELECTORS_TO_REMOVE, NON_CONTENT_HEADING_PATTERNS } from '../../constants/zim_extraction.js' +import logger from '@adonisjs/core/services/logger' +import { ExtractZIMChunkingStrategy, ExtractZIMContentOptions, ZIMContentChunk, ZIMArchiveMetadata } from '../../types/zim.js' +import { randomUUID } from 'node:crypto' +import { access } from 'node:fs/promises' + +export class ZIMExtractionService { + + private extractArchiveMetadata(archive: Archive): ZIMArchiveMetadata { + try { + return { + title: archive.getMetadata('Title') || archive.getMetadata('Name') || 'Unknown', + creator: archive.getMetadata('Creator') || 'Unknown', + publisher: archive.getMetadata('Publisher') || 'Unknown', + date: archive.getMetadata('Date') || 'Unknown', + language: archive.getMetadata('Language') || 'Unknown', + description: archive.getMetadata('Description') || '', + } + } catch (error) { + logger.warn('[ZIMExtractionService]: Could not extract all metadata, using defaults', error) + return { + title: 'Unknown', + creator: 'Unknown', + publisher: 'Unknown', + date: 'Unknown', + language: 'Unknown', + description: '', + } + } + } + + /** + * Breaks out a ZIM file's entries into their structured content form + * to facilitate better indexing and retrieval. + * Returns enhanced chunks with full article context and metadata. + * + * @param filePath - Path to the ZIM file + * @param opts - Options including maxArticles, strategy, onProgress, startOffset, and batchSize + */ + async extractZIMContent(filePath: string, opts: ExtractZIMContentOptions = {}): Promise { + try { + logger.info(`[ZIMExtractionService]: Processing ZIM file at path: ${filePath}`) + + // defensive - check if file still exists before opening + // could have been deleted by another process or batch + try { + await access(filePath) + } catch (error) { + logger.error(`[ZIMExtractionService]: ZIM file not accessible: ${filePath}`) + throw new Error(`ZIM file not found or not accessible: ${filePath}`) + } + + const archive = new Archive(filePath) + + // Extract archive-level metadata once + const archiveMetadata = this.extractArchiveMetadata(archive) + logger.info(`[ZIMExtractionService]: Archive metadata - Title: ${archiveMetadata.title}, Language: ${archiveMetadata.language}`) + + let articlesProcessed = 0 + let articlesSkipped = 0 + const processedPaths = new Set() + const toReturn: ZIMContentChunk[] = [] + + // Support batch processing to avoid lock timeouts on large ZIM files + const startOffset = opts.startOffset || 0 + const batchSize = opts.batchSize || (opts.maxArticles || Infinity) + + for (const entry of archive.iterByPath()) { + // Skip articles until we reach the start offset + if (articlesSkipped < startOffset) { + if (this.isArticleEntry(entry) && !processedPaths.has(entry.path)) { + articlesSkipped++ + } + continue + } + + if (articlesProcessed >= batchSize) { + break + } + + if (!this.isArticleEntry(entry)) { + logger.debug(`[ZIMExtractionService]: Skipping non-article entry at path: ${entry.path}`) + continue + } + + if (processedPaths.has(entry.path)) { + logger.debug(`[ZIMExtractionService]: Skipping duplicate entry at path: ${entry.path}`) + continue + } + processedPaths.add(entry.path) + + const item = entry.item + const blob = item.data + const html = this.getCleanedHTMLString(blob.data) + + const strategy = opts.strategy || this.chooseChunkingStrategy(html); + logger.debug(`[ZIMExtractionService]: Chosen chunking strategy for path ${entry.path}: ${strategy}`) + + // Generate a unique document ID. All chunks from same article will share it + const documentId = randomUUID() + const articleTitle = entry.title || entry.path + + let chunks: ZIMContentChunk[] + + if (strategy === 'structured') { + const structured = this.extractStructuredContent(html) + chunks = structured.sections.map(s => ({ + text: s.text, + articleTitle, + articlePath: entry.path, + sectionTitle: s.heading, + fullTitle: `${articleTitle} - ${s.heading}`, + hierarchy: `${articleTitle} > ${s.heading}`, + sectionLevel: s.level, + documentId, + archiveMetadata, + strategy, + })) + } else { + // Simple strategy - entire article as one chunk + const text = this.extractTextFromHTML(html) || '' + chunks = [{ + text, + articleTitle, + articlePath: entry.path, + sectionTitle: articleTitle, // Same as article for simple strategy + fullTitle: articleTitle, + hierarchy: articleTitle, + documentId, + archiveMetadata, + strategy, + }] + } + + logger.debug(`Extracted ${chunks.length} chunks from article at path: ${entry.path} using strategy: ${strategy}`) + + const nonEmptyChunks = chunks.filter(c => c.text.trim().length > 0) + logger.debug(`After filtering empty chunks, ${nonEmptyChunks.length} chunks remain for article at path: ${entry.path}`) + toReturn.push(...nonEmptyChunks) + articlesProcessed++ + + if (opts.onProgress) { + opts.onProgress(articlesProcessed, archive.articleCount) + } + } + + logger.info(`[ZIMExtractionService]: Completed processing ZIM file. Total articles processed: ${articlesProcessed}`) + logger.debug("Final structured content sample:", toReturn.slice(0, 3).map(c => ({ + articleTitle: c.articleTitle, + sectionTitle: c.sectionTitle, + hierarchy: c.hierarchy, + textPreview: c.text.substring(0, 100) + }))) + logger.debug("Total structured sections extracted:", toReturn.length) + return toReturn + } catch (error) { + logger.error('Error processing ZIM file:', error) + throw error + } + } + + private chooseChunkingStrategy(html: string, options = { + forceStrategy: null as ExtractZIMChunkingStrategy | null, + }): ExtractZIMChunkingStrategy { + const { + forceStrategy = null, + } = options; + + if (forceStrategy) return forceStrategy; + + // Use a simple analysis to determin if the HTML has any meaningful structure + // that we can leverage for better chunking. If not, we'll just chunk it as one big piece of text. + return this.hasStructuredHeadings(html) ? 'structured' : 'simple'; + } + + private getCleanedHTMLString(buff: Buffer): string { + const rawString = buff.toString('utf-8'); + const $ = cheerio.load(rawString); + + HTML_SELECTORS_TO_REMOVE.forEach((selector) => { + $(selector).remove() + }); + + return $.html(); + } + + private extractTextFromHTML(html: string): string | null { + try { + const $ = cheerio.load(html) + + // Search body first, then root if body is absent + const text = $('body').length ? $('body').text() : $.root().text() + + return text.replace(/\s+/g, ' ').replace(/\n\s*\n/g, '\n').trim() + } catch (error) { + logger.error('Error extracting text from HTML:', error) + return null + } + } + + private extractStructuredContent(html: string) { + const $ = cheerio.load(html); + + const title = $('h1').first().text().trim() || $('title').text().trim(); + + // Extract sections with their headings and heading levels + const sections: Array<{ heading: string; text: string; level: number }> = []; + let currentSection = { heading: 'Introduction', content: [] as string[], level: 2 }; + + $('body').children().each((_, element) => { + const $el = $(element); + const tagName = element.tagName?.toLowerCase(); + + if (['h2', 'h3', 'h4'].includes(tagName)) { + // Save current section if it has content + if (currentSection.content.length > 0) { + sections.push({ + heading: currentSection.heading, + text: currentSection.content.join(' ').replace(/\s+/g, ' ').trim(), + level: currentSection.level, + }); + } + // Start new section + const level = parseInt(tagName.substring(1)); // Extract number from h2, h3, h4 + currentSection = { + heading: $el.text().replace(/\[edit\]/gi, '').trim(), + content: [], + level, + }; + } else if (['p', 'ul', 'ol', 'dl', 'table'].includes(tagName)) { + const text = $el.text().trim(); + if (text.length > 0) { + currentSection.content.push(text); + } + } + }); + + // Push the last section if it has content + if (currentSection.content.length > 0) { + sections.push({ + heading: currentSection.heading, + text: currentSection.content.join(' ').replace(/\s+/g, ' ').trim(), + level: currentSection.level, + }); + } + + return { + title, + sections, + fullText: sections.map(s => `${s.heading}\n${s.text}`).join('\n\n'), + }; + } + + private hasStructuredHeadings(html: string): boolean { + const $ = cheerio.load(html); + + const headings = $('h2, h3').toArray(); + + // Consider it structured if it has at least 2 headings to break content into meaningful sections + if (headings.length < 2) return false; + + // Check that headings have substantial content between them + let sectionsWithContent = 0; + + for (const heading of headings) { + const $heading = $(heading); + const headingText = $heading.text().trim(); + + // Skip empty or very short headings, likely not meaningful + if (headingText.length < 3) continue; + + // Skip common non-content headings + if (NON_CONTENT_HEADING_PATTERNS.some(pattern => pattern.test(headingText))) { + continue; + } + + // Content until next heading + let contentLength = 0; + let $next = $heading.next(); + + while ($next.length && !$next.is('h1, h2, h3, h4')) { + contentLength += $next.text().trim().length; + $next = $next.next(); + } + + // Consider it a real section if it has at least 100 chars of content + if (contentLength >= 100) { + sectionsWithContent++; + } + } + + // Require at least 2 sections with substantial content + return sectionsWithContent >= 2; + } + + private isArticleEntry(entry: Entry): boolean { + try { + if (entry.isRedirect) return false; + + const item = entry.item; + const mimeType = item.mimetype; + + return mimeType === 'text/html' || mimeType === 'application/xhtml+xml'; + } catch { + return false; + } + } +} \ No newline at end of file diff --git a/admin/app/services/zim_service.ts b/admin/app/services/zim_service.ts index bb787ca..3f67b50 100644 --- a/admin/app/services/zim_service.ts +++ b/admin/app/services/zim_service.ts @@ -43,7 +43,7 @@ interface IZimService { @inject() export class ZimService implements IZimService { - constructor(private dockerService: DockerService) {} + constructor(private dockerService: DockerService) { } async list() { const dirPath = join(process.cwd(), ZIM_STORAGE_PATH) @@ -264,7 +264,7 @@ export class ZimService implements IZimService { } return downloadFilenames.length > 0 ? downloadFilenames : null - } + } async downloadRemoteSuccessCallback(urls: string[], restart = true) { // Check if any URL is a Wikipedia download and handle it @@ -275,28 +275,28 @@ export class ZimService implements IZimService { } if (restart) { - // Check if there are any remaining ZIM download jobs before restarting + // Check if there are any remaining ZIM download jobs before restarting const { QueueService } = await import('./queue_service.js') const queueService = new QueueService() const queue = queueService.getQueue('downloads') - + // Get all active and waiting jobs const [activeJobs, waitingJobs] = await Promise.all([ queue.getActive(), queue.getWaiting(), ]) - + // Filter out completed jobs (progress === 100) to avoid race condition // where this job itself is still in the active queue const activeIncompleteJobs = activeJobs.filter((job) => { const progress = typeof job.progress === 'number' ? job.progress : 0 return progress < 100 }) - + // Check if any remaining incomplete jobs are ZIM downloads const allJobs = [...activeIncompleteJobs, ...waitingJobs] const hasRemainingZimJobs = allJobs.some((job) => job.data.filetype === 'zim') - + if (hasRemainingZimJobs) { logger.info('[ZimService] Skipping container restart - more ZIM downloads pending') } else { @@ -364,7 +364,7 @@ export class ZimService implements IZimService { // Check each tier from highest to lowest (assuming tiers are ordered from low to high) // We check in reverse to find the highest fully-installed tier const reversedTiers = [...category.tiers].reverse() - + for (const tier of reversedTiers) { const allResourcesInstalled = tier.resources.every((resource) => { // Check if resource is marked as downloaded in database @@ -408,7 +408,7 @@ export class ZimService implements IZimService { for (const collection of validated.collections) { const { resources, ...restCollection } = collection; // we'll handle resources separately - + // Upsert the collection itself await CuratedCollection.updateOrCreate( { slug: restCollection.slug }, @@ -489,11 +489,11 @@ export class ZimService implements IZimService { options, currentSelection: selection ? { - optionId: selection.option_id, - status: selection.status, - filename: selection.filename, - url: selection.url, - } + optionId: selection.option_id, + status: selection.status, + filename: selection.filename, + url: selection.url, + } : null, } } diff --git a/admin/app/utils/fs.ts b/admin/app/utils/fs.ts index bc26523..7cc3ba8 100644 --- a/admin/app/utils/fs.ts +++ b/admin/app/utils/fs.ts @@ -152,7 +152,7 @@ export function matchesDevice(fsPath: string, deviceName: string): boolean { return false } -export function determineFileType(filename: string): 'image' | 'pdf' | 'text' | 'unknown' { +export function determineFileType(filename: string): 'image' | 'pdf' | 'text' | 'zim' | 'unknown' { const ext = path.extname(filename).toLowerCase() if (['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff', '.webp'].includes(ext)) { return 'image' @@ -160,6 +160,8 @@ export function determineFileType(filename: string): 'image' | 'pdf' | 'text' | return 'pdf' } else if (['.txt', '.md', '.docx', '.rtf'].includes(ext)) { return 'text' + } else if (ext === '.zim') { + return 'zim' } else { return 'unknown' } diff --git a/admin/constants/zim_extraction.ts b/admin/constants/zim_extraction.ts new file mode 100644 index 0000000..2e3a4db --- /dev/null +++ b/admin/constants/zim_extraction.ts @@ -0,0 +1,48 @@ + +export const HTML_SELECTORS_TO_REMOVE = [ + 'script', + 'style', + 'nav', + 'header', + 'footer', + 'noscript', + 'iframe', + 'svg', + '.navbox', + '.sidebar', + '.infobox', + '.mw-editsection', + '.reference', + '.reflist', + '.toc', + '.noprint', + '.mw-jump-link', + '.mw-headline-anchor', + '[role="navigation"]', + '.navbar', + '.hatnote', + '.ambox', + '.sistersitebox', + '.portal', + '#coordinates', + '.geo-nondefault', + '.authority-control', +] + +// Common heading names that usually don't have meaningful content under them +export const NON_CONTENT_HEADING_PATTERNS = [ + /^see also$/i, + /^references$/i, + /^external links$/i, + /^further reading$/i, + /^notes$/i, + /^bibliography$/i, + /^navigation$/i, +] + +/** + * Batch size for processing ZIM articles to prevent lock timeout errors. + * Processing 50 articles at a time balances throughput with job duration. + * Typical processing time: 2-5 minutes per batch depending on article complexity. + */ +export const ZIM_BATCH_SIZE = 50 \ No newline at end of file diff --git a/admin/package-lock.json b/admin/package-lock.json index dd277ad..acd9a59 100644 --- a/admin/package-lock.json +++ b/admin/package-lock.json @@ -24,6 +24,7 @@ "@headlessui/react": "^2.2.4", "@inertiajs/react": "^2.0.13", "@markdoc/markdoc": "^0.5.2", + "@openzim/libzim": "^4.0.0", "@protomaps/basemaps": "^5.7.0", "@qdrant/js-client-rest": "^1.16.2", "@tabler/icons-react": "^3.34.0", @@ -40,6 +41,7 @@ "axios": "^1.13.1", "better-sqlite3": "^12.1.1", "bullmq": "^5.65.1", + "cheerio": "^1.2.0", "dockerode": "^4.0.7", "edge.js": "^6.2.1", "fast-xml-parser": "^5.2.5", @@ -1261,7 +1263,6 @@ "version": "0.8.1", "resolved": "https://registry.npmjs.org/@cspotcode/source-map-support/-/source-map-support-0.8.1.tgz", "integrity": "sha512-IchNf6dN4tHoMFIn/7OE8LWZ19Y6q/67Bmf6vnGREv8RSbBVb9LPJxEcnwrcwX6ixSvaiGoomAUvu4YSxXrVgw==", - "dev": true, "license": "MIT", "dependencies": { "@jridgewell/trace-mapping": "0.3.9" @@ -1274,7 +1275,6 @@ "version": "0.3.9", "resolved": "https://registry.npmjs.org/@jridgewell/trace-mapping/-/trace-mapping-0.3.9.tgz", "integrity": "sha512-3Belt6tdc8bPgAtbcmdtNJlirVoTmEb5e2gC94PnkwEW9jI6CAHUeoG85tjWP5WquqfavoMtMwiG4P926ZKKuQ==", - "dev": true, "license": "MIT", "dependencies": { "@jridgewell/resolve-uri": "^3.0.3", @@ -2589,6 +2589,102 @@ "integrity": "sha512-eUgLqrMf8nJkZxT24JvVRrQya1vZkQh8BBeYNwGDqa5I0VUi8ACx7uFvAaLxintokpTenkK6DASvo/bvNbBGow==", "license": "MIT" }, + "node_modules/@isaacs/cliui": { + "version": "8.0.2", + "resolved": "https://registry.npmjs.org/@isaacs/cliui/-/cliui-8.0.2.tgz", + "integrity": "sha512-O8jcjabXaleOG9DQ0+ARXWZBTfnP4WNAqzuiJK7ll44AmxGKv/J2M4TPjxjY3znBCfvBXFzucm1twdyFybFqEA==", + "license": "ISC", + "dependencies": { + "string-width": "^5.1.2", + "string-width-cjs": "npm:string-width@^4.2.0", + "strip-ansi": "^7.0.1", + "strip-ansi-cjs": "npm:strip-ansi@^6.0.1", + "wrap-ansi": "^8.1.0", + "wrap-ansi-cjs": "npm:wrap-ansi@^7.0.0" + }, + "engines": { + "node": ">=12" + } + }, + "node_modules/@isaacs/cliui/node_modules/ansi-regex": { + "version": "6.2.2", + "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-6.2.2.tgz", + "integrity": "sha512-Bq3SmSpyFHaWjPk8If9yc6svM8c56dB5BAtW4Qbw5jHTwwXXcTLoRMkpDJp6VL0XzlWaCHTXrkFURMYmD0sLqg==", + "license": "MIT", + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/chalk/ansi-regex?sponsor=1" + } + }, + "node_modules/@isaacs/cliui/node_modules/ansi-styles": { + "version": "6.2.3", + "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-6.2.3.tgz", + "integrity": "sha512-4Dj6M28JB+oAH8kFkTLUo+a2jwOFkuqb3yucU0CANcRRUbxS0cP0nZYCGjcc3BNXwRIsUVmDGgzawme7zvJHvg==", + "license": "MIT", + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/chalk/ansi-styles?sponsor=1" + } + }, + "node_modules/@isaacs/cliui/node_modules/emoji-regex": { + "version": "9.2.2", + "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-9.2.2.tgz", + "integrity": "sha512-L18DaJsXSUk2+42pv8mLs5jJT2hqFkFE4j21wOmgbUqsZ2hL72NsUU785g9RXgo3s0ZNgVl42TiHp3ZtOv/Vyg==", + "license": "MIT" + }, + "node_modules/@isaacs/cliui/node_modules/string-width": { + "version": "5.1.2", + "resolved": "https://registry.npmjs.org/string-width/-/string-width-5.1.2.tgz", + "integrity": "sha512-HnLOCR3vjcY8beoNLtcjZ5/nxn2afmME6lhrDrebokqMap+XbeW8n9TXpPDOqdGK5qcI3oT0GKTW6wC7EMiVqA==", + "license": "MIT", + "dependencies": { + "eastasianwidth": "^0.2.0", + "emoji-regex": "^9.2.2", + "strip-ansi": "^7.0.1" + }, + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/@isaacs/cliui/node_modules/strip-ansi": { + "version": "7.1.2", + "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-7.1.2.tgz", + "integrity": "sha512-gmBGslpoQJtgnMAvOVqGZpEz9dyoKTCzy2nfz/n8aIFhN/jCE/rCmcxabB6jOOHV+0WNnylOxaxBQPSvcWklhA==", + "license": "MIT", + "dependencies": { + "ansi-regex": "^6.0.1" + }, + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/chalk/strip-ansi?sponsor=1" + } + }, + "node_modules/@isaacs/cliui/node_modules/wrap-ansi": { + "version": "8.1.0", + "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-8.1.0.tgz", + "integrity": "sha512-si7QWI6zUMq56bESFvagtmzMdGOtoxfR+Sez11Mobfc7tm+VkUckk9bW2UeffTGVUbOksxmSw0AA2gs8g71NCQ==", + "license": "MIT", + "dependencies": { + "ansi-styles": "^6.1.0", + "string-width": "^5.0.1", + "strip-ansi": "^7.0.1" + }, + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/chalk/wrap-ansi?sponsor=1" + } + }, "node_modules/@isaacs/fs-minipass": { "version": "4.0.1", "resolved": "https://registry.npmjs.org/@isaacs/fs-minipass/-/fs-minipass-4.0.1.tgz", @@ -2741,27 +2837,49 @@ "version": "30.0.1", "resolved": "https://registry.npmjs.org/@jest/diff-sequences/-/diff-sequences-30.0.1.tgz", "integrity": "sha512-n5H8QLDJ47QqbCNn5SuFjCRDrOLEZ0h8vAHCK5RL9Ls7Xa8AQLa/YxAc9UjFqoEDM48muwtBGjtMY5cr0PLDCw==", - "devOptional": true, "license": "MIT", "engines": { "node": "^18.14.0 || ^20.0.0 || ^22.0.0 || >=24.0.0" } }, + "node_modules/@jest/expect-utils": { + "version": "30.2.0", + "resolved": "https://registry.npmjs.org/@jest/expect-utils/-/expect-utils-30.2.0.tgz", + "integrity": "sha512-1JnRfhqpD8HGpOmQp180Fo9Zt69zNtC+9lR+kT7NVL05tNXIi+QC8Csz7lfidMoVLPD3FnOtcmp0CEFnxExGEA==", + "license": "MIT", + "dependencies": { + "@jest/get-type": "30.1.0" + }, + "engines": { + "node": "^18.14.0 || ^20.0.0 || ^22.0.0 || >=24.0.0" + } + }, "node_modules/@jest/get-type": { "version": "30.1.0", "resolved": "https://registry.npmjs.org/@jest/get-type/-/get-type-30.1.0.tgz", "integrity": "sha512-eMbZE2hUnx1WV0pmURZY9XoXPkUYjpc55mb0CrhtdWLtzMQPFvu/rZkTLZFTsdaVQa+Tr4eWAteqcUzoawq/uA==", - "devOptional": true, "license": "MIT", "engines": { "node": "^18.14.0 || ^20.0.0 || ^22.0.0 || >=24.0.0" } }, + "node_modules/@jest/pattern": { + "version": "30.0.1", + "resolved": "https://registry.npmjs.org/@jest/pattern/-/pattern-30.0.1.tgz", + "integrity": "sha512-gWp7NfQW27LaBQz3TITS8L7ZCQ0TLvtmI//4OwlQRx4rnWxcPNIYjxZpDcN4+UlGxgm3jS5QPz8IPTCkb59wZA==", + "license": "MIT", + "dependencies": { + "@types/node": "*", + "jest-regex-util": "30.0.1" + }, + "engines": { + "node": "^18.14.0 || ^20.0.0 || ^22.0.0 || >=24.0.0" + } + }, "node_modules/@jest/schemas": { "version": "30.0.5", "resolved": "https://registry.npmjs.org/@jest/schemas/-/schemas-30.0.5.tgz", "integrity": "sha512-DmdYgtezMkh3cpU8/1uyXakv3tJRcmcXxBOcO0tbaozPwpmh4YMsnWrQm9ZmZMfa5ocbxzbFk6O4bDPEc/iAnA==", - "devOptional": true, "license": "MIT", "dependencies": { "@sinclair/typebox": "^0.34.0" @@ -2770,6 +2888,24 @@ "node": "^18.14.0 || ^20.0.0 || ^22.0.0 || >=24.0.0" } }, + "node_modules/@jest/types": { + "version": "30.2.0", + "resolved": "https://registry.npmjs.org/@jest/types/-/types-30.2.0.tgz", + "integrity": "sha512-H9xg1/sfVvyfU7o3zMfBEjQ1gcsdeTMgqHoYdN79tuLqfTtuu7WckRA1R5whDwOzxaZAeMKTYWqP+WCAi0CHsg==", + "license": "MIT", + "dependencies": { + "@jest/pattern": "30.0.1", + "@jest/schemas": "30.0.5", + "@types/istanbul-lib-coverage": "^2.0.6", + "@types/istanbul-reports": "^3.0.4", + "@types/node": "*", + "@types/yargs": "^17.0.33", + "chalk": "^4.1.2" + }, + "engines": { + "node": "^18.14.0 || ^20.0.0 || ^22.0.0 || >=24.0.0" + } + }, "node_modules/@jridgewell/gen-mapping": { "version": "0.3.13", "resolved": "https://registry.npmjs.org/@jridgewell/gen-mapping/-/gen-mapping-0.3.13.tgz", @@ -3257,6 +3393,92 @@ "node": ">= 8" } }, + "node_modules/@npmcli/agent": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/@npmcli/agent/-/agent-3.0.0.tgz", + "integrity": "sha512-S79NdEgDQd/NGCay6TCoVzXSj74skRZIKJcpJjC5lOq34SZzyI6MqtiiWoiVWoVrTcGjNeC4ipbh1VIHlpfF5Q==", + "license": "ISC", + "dependencies": { + "agent-base": "^7.1.0", + "http-proxy-agent": "^7.0.0", + "https-proxy-agent": "^7.0.1", + "lru-cache": "^10.0.1", + "socks-proxy-agent": "^8.0.3" + }, + "engines": { + "node": "^18.17.0 || >=20.5.0" + } + }, + "node_modules/@npmcli/agent/node_modules/lru-cache": { + "version": "10.4.3", + "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-10.4.3.tgz", + "integrity": "sha512-JNAzZcXrCt42VGLuYz0zfAzDfAvJWW6AfYlDBQyDV5DClI2m5sAmK+OIO7s59XfsRsWHp02jAJrRadPRGTt6SQ==", + "license": "ISC" + }, + "node_modules/@npmcli/fs": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/@npmcli/fs/-/fs-4.0.0.tgz", + "integrity": "sha512-/xGlezI6xfGO9NwuJlnwz/K14qD1kCSAGtacBHnGzeAIuJGazcp45KP5NuyARXoKb7cwulAGWVsbeSxdG/cb0Q==", + "license": "ISC", + "dependencies": { + "semver": "^7.3.5" + }, + "engines": { + "node": "^18.17.0 || >=20.5.0" + } + }, + "node_modules/@openzim/libzim": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/@openzim/libzim/-/libzim-4.0.0.tgz", + "integrity": "sha512-lewW7gBRuEW0bD0nccGY/q9kGWR1/D9rcb89L4Njm/ZWy+XrC6XlIrbOjEfZVBI22a3u7uidZ2Vo8ESXnZmx4g==", + "hasInstallScript": true, + "license": "GPL-3.0", + "dependencies": { + "@types/bindings": "^1.5.5", + "@types/jest": "^30.0.0", + "@types/node": "^24.9.2", + "axios": "^1.13.1", + "bindings": "^1.5.0", + "dotenv": "^17.2.3", + "exec-then": "^1.3.1", + "mkdirp": "^3.0.1", + "node-addon-api": "^8.5.0", + "node-gyp": "^11.5.0", + "tqdm": "^2.0.3", + "ts-node": "^10.9.2", + "tsconfig-paths": "^4.2.0" + }, + "engines": { + "node": ">=20 <25" + } + }, + "node_modules/@openzim/libzim/node_modules/@types/node": { + "version": "24.10.10", + "resolved": "https://registry.npmjs.org/@types/node/-/node-24.10.10.tgz", + "integrity": "sha512-+0/4J266CBGPUq/ELg7QUHhN25WYjE0wYTPSQJn1xeu8DOlIOPxXxrNGiLmfAWl7HMMgWFWXpt9IDjMWrF5Iow==", + "license": "MIT", + "dependencies": { + "undici-types": "~7.16.0" + } + }, + "node_modules/@openzim/libzim/node_modules/dotenv": { + "version": "17.2.3", + "resolved": "https://registry.npmjs.org/dotenv/-/dotenv-17.2.3.tgz", + "integrity": "sha512-JVUnt+DUIzu87TABbhPmNfVdBDt18BLOWjMUFJMSi/Qqg7NTYtabbvSNJGOJ7afbRuv9D/lngizHtP7QyLQ+9w==", + "license": "BSD-2-Clause", + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://dotenvx.com" + } + }, + "node_modules/@openzim/libzim/node_modules/undici-types": { + "version": "7.16.0", + "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.16.0.tgz", + "integrity": "sha512-Zz+aZWSj8LE6zoxD+xrjh4VfkIG8Ya6LvYkZqtUQGJPZjYl53ypCaUwWqo7eI0x66KBGeRo+mlBEkMSeSZ38Nw==", + "license": "MIT" + }, "node_modules/@paralleldrive/cuid2": { "version": "2.3.1", "resolved": "https://registry.npmjs.org/@paralleldrive/cuid2/-/cuid2-2.3.1.tgz", @@ -3281,6 +3503,16 @@ "integrity": "sha512-k2ENnmBugE/rzQfEcdWHcCY+/FM3VLzH9cYEsbdsoqrvzAKRhUZeRNhAZvB8OitQJ1TBed3yqWtdjzS6wJKBwg==", "license": "MIT" }, + "node_modules/@pkgjs/parseargs": { + "version": "0.11.0", + "resolved": "https://registry.npmjs.org/@pkgjs/parseargs/-/parseargs-0.11.0.tgz", + "integrity": "sha512-+1VkjdD0QBLPodGrJUeqarH8VAIvQODIbwh9XpP5Syisf7YoQgsJKPNFoqqLQlu+VQ/tVSshMR6loPMn8U+dPg==", + "license": "MIT", + "optional": true, + "engines": { + "node": ">=14" + } + }, "node_modules/@pkgr/core": { "version": "0.2.9", "resolved": "https://registry.npmjs.org/@pkgr/core/-/core-0.2.9.tgz", @@ -4046,7 +4278,6 @@ "version": "0.34.48", "resolved": "https://registry.npmjs.org/@sinclair/typebox/-/typebox-0.34.48.tgz", "integrity": "sha512-kKJTNuK3AQOrgjjotVxMrCn1sUJwM76wMszfq1kdU4uYVJjvEWuFQ6HgvLt4Xz3fSmZlTOxJ/Ie13KnIcWQXFA==", - "devOptional": true, "license": "MIT" }, "node_modules/@sindresorhus/is": { @@ -4106,7 +4337,7 @@ "version": "1.11.24", "resolved": "https://registry.npmjs.org/@swc/core/-/core-1.11.24.tgz", "integrity": "sha512-MaQEIpfcEMzx3VWWopbofKJvaraqmL6HbLlw2bFZ7qYqYw3rkhM0cQVEgyzbHtTWwCwPMFZSC2DUbhlZgrMfLg==", - "dev": true, + "devOptional": true, "hasInstallScript": true, "license": "Apache-2.0", "dependencies": { @@ -4315,7 +4546,7 @@ "version": "0.1.3", "resolved": "https://registry.npmjs.org/@swc/counter/-/counter-0.1.3.tgz", "integrity": "sha512-e2BR4lsJkkRlKZ/qCHPw9ZaSxc0MVUd7gtbtaB7aMvHeJVYe8sOB8DBZkP2DtISHGSku9sCK6T6cnY0CtXrOCQ==", - "dev": true, + "devOptional": true, "license": "Apache-2.0" }, "node_modules/@swc/helpers": { @@ -4331,7 +4562,7 @@ "version": "0.1.25", "resolved": "https://registry.npmjs.org/@swc/types/-/types-0.1.25.tgz", "integrity": "sha512-iAoY/qRhNH8a/hBvm3zKj9qQ4oc2+3w1unPJa2XvTK3XjeLXtzcCingVPw/9e5mn1+0yPqxcBGp9Jf0pkfMb1g==", - "dev": true, + "devOptional": true, "license": "Apache-2.0", "dependencies": { "@swc/counter": "^0.1.3" @@ -4823,28 +5054,24 @@ "version": "1.0.12", "resolved": "https://registry.npmjs.org/@tsconfig/node10/-/node10-1.0.12.tgz", "integrity": "sha512-UCYBaeFvM11aU2y3YPZ//O5Rhj+xKyzy7mvcIoAjASbigy8mHMryP5cK7dgjlz2hWxh1g5pLw084E0a/wlUSFQ==", - "dev": true, "license": "MIT" }, "node_modules/@tsconfig/node12": { "version": "1.0.11", "resolved": "https://registry.npmjs.org/@tsconfig/node12/-/node12-1.0.11.tgz", "integrity": "sha512-cqefuRsh12pWyGsIoBKJA9luFu3mRxCA+ORZvA4ktLSzIuCUtWVxGIuXigEwO5/ywWFMZ2QEGKWvkZG1zDMTag==", - "dev": true, "license": "MIT" }, "node_modules/@tsconfig/node14": { "version": "1.0.3", "resolved": "https://registry.npmjs.org/@tsconfig/node14/-/node14-1.0.3.tgz", "integrity": "sha512-ysT8mhdixWK6Hw3i1V2AeRqZ5WfXg1G43mqoYlM2nc6388Fq5jcXyr5mRsqViLx/GJYdoL0bfXD8nmF+Zn/Iow==", - "dev": true, "license": "MIT" }, "node_modules/@tsconfig/node16": { "version": "1.0.4", "resolved": "https://registry.npmjs.org/@tsconfig/node16/-/node16-1.0.4.tgz", "integrity": "sha512-vxhUy4J8lyeyinH7Azl1pdd43GJhZH/tP2weN8TntQblOY+A0XbT8DJk1/oCPuOOyg/Ja757rG0CgHcWC8OfMA==", - "dev": true, "license": "MIT" }, "node_modules/@tuyau/utils": { @@ -4894,6 +5121,15 @@ "@babel/types": "^7.28.2" } }, + "node_modules/@types/bindings": { + "version": "1.5.5", + "resolved": "https://registry.npmjs.org/@types/bindings/-/bindings-1.5.5.tgz", + "integrity": "sha512-y59PRZBTo2/HuN94qRjyJD+465vGoXMsqz9MMJDbtJL9oT5/B+tAL6c3k10epIinC2/BBkLqKzKC6keukl8wdQ==", + "license": "MIT", + "dependencies": { + "@types/node": "*" + } + }, "node_modules/@types/chai": { "version": "5.2.3", "resolved": "https://registry.npmjs.org/@types/chai/-/chai-5.2.3.tgz", @@ -4989,6 +5225,40 @@ "integrity": "sha512-q67/qwlxblDzEDvzHhVkwc1gzVWxaNxeyHUBF4xElrvjL11O+Ytze+1fGpBHlr/H9myiBUaUXNnNPmBHxxfAcA==", "license": "MIT" }, + "node_modules/@types/istanbul-lib-coverage": { + "version": "2.0.6", + "resolved": "https://registry.npmjs.org/@types/istanbul-lib-coverage/-/istanbul-lib-coverage-2.0.6.tgz", + "integrity": "sha512-2QF/t/auWm0lsy8XtKVPG19v3sSOQlJe/YHZgfjb/KBBHOGSV+J2q/S671rcq9uTBrLAXmZpqJiaQbMT+zNU1w==", + "license": "MIT" + }, + "node_modules/@types/istanbul-lib-report": { + "version": "3.0.3", + "resolved": "https://registry.npmjs.org/@types/istanbul-lib-report/-/istanbul-lib-report-3.0.3.tgz", + "integrity": "sha512-NQn7AHQnk/RSLOxrBbGyJM/aVQ+pjj5HCgasFxc0K/KhoATfQ/47AyUl15I2yBUpihjmas+a+VJBOqecrFH+uA==", + "license": "MIT", + "dependencies": { + "@types/istanbul-lib-coverage": "*" + } + }, + "node_modules/@types/istanbul-reports": { + "version": "3.0.4", + "resolved": "https://registry.npmjs.org/@types/istanbul-reports/-/istanbul-reports-3.0.4.tgz", + "integrity": "sha512-pk2B1NWalF9toCRu6gjBzR69syFjP4Od8WRAX+0mmf9lAjCRicLOWc+ZrxZHx/0XRjotgkF9t6iaMJ+aXcOdZQ==", + "license": "MIT", + "dependencies": { + "@types/istanbul-lib-report": "*" + } + }, + "node_modules/@types/jest": { + "version": "30.0.0", + "resolved": "https://registry.npmjs.org/@types/jest/-/jest-30.0.0.tgz", + "integrity": "sha512-XTYugzhuwqWjws0CVz8QpM36+T+Dz5mTEBKhNs/esGLnCIlGdRy+Dq78NRjd7ls7r8BC8ZRMOrKlkO1hU0JOwA==", + "license": "MIT", + "dependencies": { + "expect": "^30.0.0", + "pretty-format": "^30.0.0" + } + }, "node_modules/@types/json-schema": { "version": "7.0.15", "resolved": "https://registry.npmjs.org/@types/json-schema/-/json-schema-7.0.15.tgz", @@ -5155,6 +5425,12 @@ "dev": true, "license": "MIT" }, + "node_modules/@types/stack-utils": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/@types/stack-utils/-/stack-utils-2.0.3.tgz", + "integrity": "sha512-9aEbYZ3TbYMznPdcdr3SmIrLXwC/AKZXQeCf9Pgao5CKb8CyHuEX5jzWPTkvregvhRJHcpRO6BFoGW9ycaOkYw==", + "license": "MIT" + }, "node_modules/@types/stopword": { "version": "2.0.3", "resolved": "https://registry.npmjs.org/@types/stopword/-/stopword-2.0.3.tgz", @@ -5183,6 +5459,21 @@ "integrity": "sha512-T8L6i7wCuyoK8A/ZeLYt1+q0ty3Zb9+qbSSvrIVitzT3YjZqkTZ40IbRsPanlB4h1QB3JVL1SYCdR6ngtFYcuA==", "license": "MIT" }, + "node_modules/@types/yargs": { + "version": "17.0.35", + "resolved": "https://registry.npmjs.org/@types/yargs/-/yargs-17.0.35.tgz", + "integrity": "sha512-qUHkeCyQFxMXg79wQfTtfndEC+N9ZZg76HJftDJp+qH2tV7Gj4OJi7l+PiWwJ+pWtW8GwSmqsDj/oymhrTWXjg==", + "license": "MIT", + "dependencies": { + "@types/yargs-parser": "*" + } + }, + "node_modules/@types/yargs-parser": { + "version": "21.0.3", + "resolved": "https://registry.npmjs.org/@types/yargs-parser/-/yargs-parser-21.0.3.tgz", + "integrity": "sha512-I4q9QU9MQv4oEOz4tAHJtNz1cwuLxn2F3xcc2iV5WdqLPpUnj30aUuxt1mAxYTG+oe8CZMV/+6rU4S4gRDzqtQ==", + "license": "MIT" + }, "node_modules/@typescript-eslint/eslint-plugin": { "version": "8.54.0", "resolved": "https://registry.npmjs.org/@typescript-eslint/eslint-plugin/-/eslint-plugin-8.54.0.tgz", @@ -5701,6 +5992,15 @@ "vite": "^4.2.0 || ^5.0.0 || ^6.0.0 || ^7.0.0" } }, + "node_modules/abbrev": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/abbrev/-/abbrev-3.0.1.tgz", + "integrity": "sha512-AO2ac6pjRB3SJmGJo+v5/aK6Omggp6fsLrs6wN9bd35ulu4cCwaAU9+7ZhXjeqHVkaHThLuzH0nZr0YpCDhygg==", + "license": "ISC", + "engines": { + "node": "^18.17.0 || >=20.5.0" + } + }, "node_modules/abstract-logging": { "version": "2.0.1", "resolved": "https://registry.npmjs.org/abstract-logging/-/abstract-logging-2.0.1.tgz", @@ -5767,7 +6067,6 @@ "version": "8.3.4", "resolved": "https://registry.npmjs.org/acorn-walk/-/acorn-walk-8.3.4.tgz", "integrity": "sha512-ueEepnujpqee2o5aIYnvHU6C0A42MNdsIDeqy5BydrkuC5R1ZuUFnm27EeFJGoEHJQgn3uleRvmTXaJgfXbt4g==", - "dev": true, "license": "MIT", "dependencies": { "acorn": "^8.11.0" @@ -5776,6 +6075,15 @@ "node": ">=0.4.0" } }, + "node_modules/agent-base": { + "version": "7.1.4", + "resolved": "https://registry.npmjs.org/agent-base/-/agent-base-7.1.4.tgz", + "integrity": "sha512-MnA+YT8fwfJPgBx3m60MNqakm30XOkyIoH1y6huTQvC0PwZG7ki8NacLBcrPbNoo8vEZy7Jpuk7+jMO+CUovTQ==", + "license": "MIT", + "engines": { + "node": ">= 14" + } + }, "node_modules/ajv": { "version": "6.12.6", "resolved": "https://registry.npmjs.org/ajv/-/ajv-6.12.6.tgz", @@ -5845,7 +6153,6 @@ "version": "4.1.3", "resolved": "https://registry.npmjs.org/arg/-/arg-4.1.3.tgz", "integrity": "sha512-58S9QDqG0Xx27YwPSt9fJxivjYl432YCwfDMfZ+71RAqUrZef7LrKQZ3LHLOwCS4FLNBplP533Zx895SeOCHvA==", - "dev": true, "license": "MIT" }, "node_modules/argparse": { @@ -6017,7 +6324,6 @@ "version": "1.0.2", "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz", "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==", - "devOptional": true, "license": "MIT" }, "node_modules/base64-js": { @@ -6116,11 +6422,16 @@ "integrity": "sha512-vHdS19CnY3hwiNdkaqk93DvjVLfbEcI8mys4UjuWrlX1haDmroo8o4xCzh4wD6DGV6HxRCyauwhHRqMTfERtjw==", "license": "MIT" }, + "node_modules/boolbase": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/boolbase/-/boolbase-1.0.0.tgz", + "integrity": "sha512-JZOSA7Mo9sNGB8+UjSgzdLtokWAky1zbztM3WRLCbZ70/3cTANmQmOdR7y2g+J0e2WXywy1yS468tY+IruqEww==", + "license": "ISC" + }, "node_modules/brace-expansion": { "version": "2.0.2", "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.2.tgz", "integrity": "sha512-Jt0vHyM+jmUBqojB7E1NIYadt0vI0Qxjxd2TErW94wDz+E2LAm5vKMXXwg6ZZBTHPuUlDgQHKXvjGBdfcF1ZDQ==", - "devOptional": true, "license": "MIT", "dependencies": { "balanced-match": "^1.0.0" @@ -6269,6 +6580,35 @@ "node": ">=8" } }, + "node_modules/cacache": { + "version": "19.0.1", + "resolved": "https://registry.npmjs.org/cacache/-/cacache-19.0.1.tgz", + "integrity": "sha512-hdsUxulXCi5STId78vRVYEtDAjq99ICAUktLTeTYsLoTE6Z8dS0c8pWNCxwdrk9YfJeobDZc2Y186hD/5ZQgFQ==", + "license": "ISC", + "dependencies": { + "@npmcli/fs": "^4.0.0", + "fs-minipass": "^3.0.0", + "glob": "^10.2.2", + "lru-cache": "^10.0.1", + "minipass": "^7.0.3", + "minipass-collect": "^2.0.1", + "minipass-flush": "^1.0.5", + "minipass-pipeline": "^1.2.4", + "p-map": "^7.0.2", + "ssri": "^12.0.0", + "tar": "^7.4.3", + "unique-filename": "^4.0.0" + }, + "engines": { + "node": "^18.17.0 || >=20.5.0" + } + }, + "node_modules/cacache/node_modules/lru-cache": { + "version": "10.4.3", + "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-10.4.3.tgz", + "integrity": "sha512-JNAzZcXrCt42VGLuYz0zfAzDfAvJWW6AfYlDBQyDV5DClI2m5sAmK+OIO7s59XfsRsWHp02jAJrRadPRGTt6SQ==", + "license": "ISC" + }, "node_modules/call-bind-apply-helpers": { "version": "1.0.2", "resolved": "https://registry.npmjs.org/call-bind-apply-helpers/-/call-bind-apply-helpers-1.0.2.tgz", @@ -6376,7 +6716,6 @@ "version": "4.1.2", "resolved": "https://registry.npmjs.org/chalk/-/chalk-4.1.2.tgz", "integrity": "sha512-oKnbhFyRIXpUuez8iBMmyEa4nbj4IOQyuhc/wy9kY7/WVPcwIO9VA668Pu8RkO7+0G76SLROeyw9CpQ061i4mA==", - "devOptional": true, "license": "MIT", "dependencies": { "ansi-styles": "^4.1.0", @@ -6393,7 +6732,6 @@ "version": "7.2.0", "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-7.2.0.tgz", "integrity": "sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw==", - "devOptional": true, "license": "MIT", "dependencies": { "has-flag": "^4.0.0" @@ -6458,6 +6796,57 @@ "node": ">=16" } }, + "node_modules/cheerio": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/cheerio/-/cheerio-1.2.0.tgz", + "integrity": "sha512-WDrybc/gKFpTYQutKIK6UvfcuxijIZfMfXaYm8NMsPQxSYvf+13fXUJ4rztGGbJcBQ/GF55gvrZ0Bc0bj/mqvg==", + "license": "MIT", + "dependencies": { + "cheerio-select": "^2.1.0", + "dom-serializer": "^2.0.0", + "domhandler": "^5.0.3", + "domutils": "^3.2.2", + "encoding-sniffer": "^0.2.1", + "htmlparser2": "^10.1.0", + "parse5": "^7.3.0", + "parse5-htmlparser2-tree-adapter": "^7.1.0", + "parse5-parser-stream": "^7.1.2", + "undici": "^7.19.0", + "whatwg-mimetype": "^4.0.0" + }, + "engines": { + "node": ">=20.18.1" + }, + "funding": { + "url": "https://github.com/cheeriojs/cheerio?sponsor=1" + } + }, + "node_modules/cheerio-select": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/cheerio-select/-/cheerio-select-2.1.0.tgz", + "integrity": "sha512-9v9kG0LvzrlcungtnJtpGNxY+fzECQKhK4EGJX2vByejiMX84MFNQw4UxPJl3bFbTMw+Dfs37XaIkCwTZfLh4g==", + "license": "BSD-2-Clause", + "dependencies": { + "boolbase": "^1.0.0", + "css-select": "^5.1.0", + "css-what": "^6.1.0", + "domelementtype": "^2.3.0", + "domhandler": "^5.0.3", + "domutils": "^3.0.1" + }, + "funding": { + "url": "https://github.com/sponsors/fb55" + } + }, + "node_modules/cheerio/node_modules/undici": { + "version": "7.20.0", + "resolved": "https://registry.npmjs.org/undici/-/undici-7.20.0.tgz", + "integrity": "sha512-MJZrkjyd7DeC+uPZh+5/YaMDxFiiEEaDgbUSVMXayofAkDWF1088CDo+2RPg7B1BuS1qf1vgNE7xqwPxE0DuSQ==", + "license": "MIT", + "engines": { + "node": ">=20.18.1" + } + }, "node_modules/chevrotain": { "version": "11.1.1", "resolved": "https://registry.npmjs.org/chevrotain/-/chevrotain-11.1.1.tgz", @@ -6502,7 +6891,6 @@ "version": "4.4.0", "resolved": "https://registry.npmjs.org/ci-info/-/ci-info-4.4.0.tgz", "integrity": "sha512-77PSwercCZU2Fc4sX94eF8k8Pxte6JAwL4/ICZLFjJLqegs7kCuAsqqj/70NQF6TvDpgFjkubQB2FW2ZZddvQg==", - "dev": true, "funding": [ { "type": "github", @@ -6945,7 +7333,6 @@ "version": "1.1.1", "resolved": "https://registry.npmjs.org/create-require/-/create-require-1.1.1.tgz", "integrity": "sha512-dcKFX3jn0MpIaXjisoRvexIJVEKzaq7z2rZKxf+MSr9TkdmHmsU4m2lcLojrj/FHl8mk5VxMmYA+ftRkP/3oKQ==", - "dev": true, "license": "MIT" }, "node_modules/cron-parser": { @@ -6988,6 +7375,34 @@ "node": ">= 0.8" } }, + "node_modules/css-select": { + "version": "5.2.2", + "resolved": "https://registry.npmjs.org/css-select/-/css-select-5.2.2.tgz", + "integrity": "sha512-TizTzUddG/xYLA3NXodFM0fSbNizXjOKhqiQQwvhlspadZokn1KDy0NZFS0wuEubIYAV5/c1/lAr0TaaFXEXzw==", + "license": "BSD-2-Clause", + "dependencies": { + "boolbase": "^1.0.0", + "css-what": "^6.1.0", + "domhandler": "^5.0.2", + "domutils": "^3.0.1", + "nth-check": "^2.0.1" + }, + "funding": { + "url": "https://github.com/sponsors/fb55" + } + }, + "node_modules/css-what": { + "version": "6.2.2", + "resolved": "https://registry.npmjs.org/css-what/-/css-what-6.2.2.tgz", + "integrity": "sha512-u/O3vwbptzhMs3L1fQE82ZSLHQQfto5gyZzwteVIEyeaY5Fc7R4dapF/BvRoSYFeqfBk4m0V1Vafq5Pjv25wvA==", + "license": "BSD-2-Clause", + "engines": { + "node": ">= 6" + }, + "funding": { + "url": "https://github.com/sponsors/fb55" + } + }, "node_modules/csstype": { "version": "3.2.3", "resolved": "https://registry.npmjs.org/csstype/-/csstype-3.2.3.tgz", @@ -7163,7 +7578,6 @@ "version": "4.0.4", "resolved": "https://registry.npmjs.org/diff/-/diff-4.0.4.tgz", "integrity": "sha512-X07nttJQkwkfKfvTPG/KSnE2OMdcUCao6+eXF3wmnIQRn2aPAHH3VxDbDOdegkd6JbPsXqShpvEOHfAT+nCNwQ==", - "dev": true, "license": "BSD-3-Clause", "engines": { "node": ">=0.3.1" @@ -7221,6 +7635,61 @@ "uuid": "dist/bin/uuid" } }, + "node_modules/dom-serializer": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/dom-serializer/-/dom-serializer-2.0.0.tgz", + "integrity": "sha512-wIkAryiqt/nV5EQKqQpo3SToSOV9J0DnbJqwK7Wv/Trc92zIAYZ4FlMu+JPFW1DfGFt81ZTCGgDEabffXeLyJg==", + "license": "MIT", + "dependencies": { + "domelementtype": "^2.3.0", + "domhandler": "^5.0.2", + "entities": "^4.2.0" + }, + "funding": { + "url": "https://github.com/cheeriojs/dom-serializer?sponsor=1" + } + }, + "node_modules/domelementtype": { + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/domelementtype/-/domelementtype-2.3.0.tgz", + "integrity": "sha512-OLETBj6w0OsagBwdXnPdN0cnMfF9opN69co+7ZrbfPGrdpPVNBUj02spi6B1N7wChLQiPn4CSH/zJvXw56gmHw==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/fb55" + } + ], + "license": "BSD-2-Clause" + }, + "node_modules/domhandler": { + "version": "5.0.3", + "resolved": "https://registry.npmjs.org/domhandler/-/domhandler-5.0.3.tgz", + "integrity": "sha512-cgwlv/1iFQiFnU96XXgROh8xTeetsnJiDsTc7TYCLFd9+/WNkIqPTxiM/8pSd8VIrhXGTf1Ny1q1hquVqDJB5w==", + "license": "BSD-2-Clause", + "dependencies": { + "domelementtype": "^2.3.0" + }, + "engines": { + "node": ">= 4" + }, + "funding": { + "url": "https://github.com/fb55/domhandler?sponsor=1" + } + }, + "node_modules/domutils": { + "version": "3.2.2", + "resolved": "https://registry.npmjs.org/domutils/-/domutils-3.2.2.tgz", + "integrity": "sha512-6kZKyUajlDuqlHKVX1w7gyslj9MPIXzIFiz/rGu35uC1wMi+kMhQwGhl4lt9unC9Vb9INnY9Z3/ZA3+FhASLaw==", + "license": "BSD-2-Clause", + "dependencies": { + "dom-serializer": "^2.0.0", + "domelementtype": "^2.3.0", + "domhandler": "^5.0.3" + }, + "funding": { + "url": "https://github.com/fb55/domutils?sponsor=1" + } + }, "node_modules/dotenv": { "version": "16.6.1", "resolved": "https://registry.npmjs.org/dotenv/-/dotenv-16.6.1.tgz", @@ -7253,6 +7722,12 @@ "integrity": "sha512-X7hshQbLyMJ/3RPhyObLARM2sNxxmRALLKx1+NVFFnQ9gKzmCrxm9+uLIAdBcvc8FNLpctqlQ2V6AE92Ol9UDQ==", "license": "ISC" }, + "node_modules/eastasianwidth": { + "version": "0.2.0", + "resolved": "https://registry.npmjs.org/eastasianwidth/-/eastasianwidth-0.2.0.tgz", + "integrity": "sha512-I88TYZWc9XiYHRQ4/3c5rjjfgkjhLyW2luGIheGERbNQ6OY7yTybanSpDXZa8y7VUP9YmDcYa+eyq4ca7iLqWA==", + "license": "MIT" + }, "node_modules/edge-error": { "version": "4.0.2", "resolved": "https://registry.npmjs.org/edge-error/-/edge-error-4.0.2.tgz", @@ -7372,6 +7847,54 @@ "node": ">= 0.8" } }, + "node_modules/encoding": { + "version": "0.1.13", + "resolved": "https://registry.npmjs.org/encoding/-/encoding-0.1.13.tgz", + "integrity": "sha512-ETBauow1T35Y/WZMkio9jiM0Z5xjHHmJ4XmjZOq1l/dXz3lr2sRn87nJy20RupqSh1F2m3HHPSp8ShIPQJrJ3A==", + "license": "MIT", + "optional": true, + "dependencies": { + "iconv-lite": "^0.6.2" + } + }, + "node_modules/encoding-sniffer": { + "version": "0.2.1", + "resolved": "https://registry.npmjs.org/encoding-sniffer/-/encoding-sniffer-0.2.1.tgz", + "integrity": "sha512-5gvq20T6vfpekVtqrYQsSCFZ1wEg5+wW0/QaZMWkFr6BqD3NfKs0rLCx4rrVlSWJeZb5NBJgVLswK/w2MWU+Gw==", + "license": "MIT", + "dependencies": { + "iconv-lite": "^0.6.3", + "whatwg-encoding": "^3.1.1" + }, + "funding": { + "url": "https://github.com/fb55/encoding-sniffer?sponsor=1" + } + }, + "node_modules/encoding-sniffer/node_modules/iconv-lite": { + "version": "0.6.3", + "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.6.3.tgz", + "integrity": "sha512-4fCk79wshMdzMp2rH06qWrJE4iolqLhCUH+OiuIgU++RB0+94NlDL81atO7GX55uUKueo0txHNtvEyI6D7WdMw==", + "license": "MIT", + "dependencies": { + "safer-buffer": ">= 2.1.2 < 3.0.0" + }, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/encoding/node_modules/iconv-lite": { + "version": "0.6.3", + "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.6.3.tgz", + "integrity": "sha512-4fCk79wshMdzMp2rH06qWrJE4iolqLhCUH+OiuIgU++RB0+94NlDL81atO7GX55uUKueo0txHNtvEyI6D7WdMw==", + "license": "MIT", + "optional": true, + "dependencies": { + "safer-buffer": ">= 2.1.2 < 3.0.0" + }, + "engines": { + "node": ">=0.10.0" + } + }, "node_modules/end-of-stream": { "version": "1.4.5", "resolved": "https://registry.npmjs.org/end-of-stream/-/end-of-stream-1.4.5.tgz", @@ -7407,6 +7930,27 @@ "node": ">=8.6" } }, + "node_modules/entities": { + "version": "4.5.0", + "resolved": "https://registry.npmjs.org/entities/-/entities-4.5.0.tgz", + "integrity": "sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw==", + "license": "BSD-2-Clause", + "engines": { + "node": ">=0.12" + }, + "funding": { + "url": "https://github.com/fb55/entities?sponsor=1" + } + }, + "node_modules/env-paths": { + "version": "2.2.1", + "resolved": "https://registry.npmjs.org/env-paths/-/env-paths-2.2.1.tgz", + "integrity": "sha512-+h1lkLKhZMTYjog1VEpJNG7NZJWcuc2DDk/qsqSTRRCOXiLjeQ1d1/udrUGhqMxUgAlwKNZ0cf2uqan5GLuS2A==", + "license": "MIT", + "engines": { + "node": ">=6" + } + }, "node_modules/environment": { "version": "1.1.0", "resolved": "https://registry.npmjs.org/environment/-/environment-1.1.0.tgz", @@ -7419,6 +7963,12 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/err-code": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/err-code/-/err-code-2.0.3.tgz", + "integrity": "sha512-2bmlRpNKBxT/CRmPOlyISQpNj+qSeYvcym/uT0Jx2bMOlKLtSy1ZmLuVxSEKKyor/N5yhvp/ZiG1oE3DEYMSFA==", + "license": "MIT" + }, "node_modules/error-stack-parser-es": { "version": "1.0.5", "resolved": "https://registry.npmjs.org/error-stack-parser-es/-/error-stack-parser-es-1.0.5.tgz", @@ -7895,6 +8445,15 @@ "integrity": "sha512-mlsTRyGaPBjPedk6Bvw+aqbsXDtoAyAzm5MO7JgU+yVRyMQ5O8bD4Kcci7BS85f93veegeCPkL8R4GLClnjLFw==", "license": "MIT" }, + "node_modules/exec-then": { + "version": "1.3.1", + "resolved": "https://registry.npmjs.org/exec-then/-/exec-then-1.3.1.tgz", + "integrity": "sha512-8N6aNVniIa4xr57wF6tfv8PTqpCnmejQPYLJxj9dGddvbLfFzkkWzMkSE2osmVq2rO6WhGNt9BqkiowkfiJaHw==", + "license": "MIT", + "dependencies": { + "q": "^1.1.2" + } + }, "node_modules/execa": { "version": "9.6.1", "resolved": "https://registry.npmjs.org/execa/-/execa-9.6.1.tgz", @@ -7937,6 +8496,29 @@ "node": ">=6" } }, + "node_modules/expect": { + "version": "30.2.0", + "resolved": "https://registry.npmjs.org/expect/-/expect-30.2.0.tgz", + "integrity": "sha512-u/feCi0GPsI+988gU2FLcsHyAHTU0MX1Wg68NhAnN7z/+C5wqG+CY8J53N9ioe8RXgaoz0nBR/TYMf3AycUuPw==", + "license": "MIT", + "dependencies": { + "@jest/expect-utils": "30.2.0", + "@jest/get-type": "30.1.0", + "jest-matcher-utils": "30.2.0", + "jest-message-util": "30.2.0", + "jest-mock": "30.2.0", + "jest-util": "30.2.0" + }, + "engines": { + "node": "^18.14.0 || ^20.0.0 || ^22.0.0 || >=24.0.0" + } + }, + "node_modules/exponential-backoff": { + "version": "3.1.3", + "resolved": "https://registry.npmjs.org/exponential-backoff/-/exponential-backoff-3.1.3.tgz", + "integrity": "sha512-ZgEeZXj30q+I0EN+CbSSpIyPaJ5HVQD18Z1m+u1FXbAeT94mr1zw50q4q6jiiC447Nl/YTcIYSAftiGqetwXCA==", + "license": "Apache-2.0" + }, "node_modules/extend": { "version": "3.0.2", "resolved": "https://registry.npmjs.org/extend/-/extend-3.0.2.tgz", @@ -8307,6 +8889,22 @@ } } }, + "node_modules/foreground-child": { + "version": "3.3.1", + "resolved": "https://registry.npmjs.org/foreground-child/-/foreground-child-3.3.1.tgz", + "integrity": "sha512-gIXjKqtFuWEgzFRJA9WCQeSJLZDjgJUOMCMzxtvFq/37KojM1BFGufqsCy0r4qSQmYLsZYMeyRqzIWOMup03sw==", + "license": "ISC", + "dependencies": { + "cross-spawn": "^7.0.6", + "signal-exit": "^4.0.1" + }, + "engines": { + "node": ">=14" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, "node_modules/form-data": { "version": "4.0.5", "resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.5.tgz", @@ -8381,6 +8979,18 @@ "integrity": "sha512-y6OAwoSIf7FyjMIv94u+b5rdheZEjzR63GTyZJm5qh4Bi+2YgwLCcI/fPFZkL5PSixOt6ZNKm+w+Hfp/Bciwow==", "license": "MIT" }, + "node_modules/fs-minipass": { + "version": "3.0.3", + "resolved": "https://registry.npmjs.org/fs-minipass/-/fs-minipass-3.0.3.tgz", + "integrity": "sha512-XUBA9XClHbnJWSfBzjkm6RvPsyg3sryZt06BEQoXcF7EK/xpGaQYJgQKDJSUH5SGZ76Y7pFx1QBnXz09rU5Fbw==", + "license": "ISC", + "dependencies": { + "minipass": "^7.0.3" + }, + "engines": { + "node": "^14.17.0 || ^16.13.0 || >=18.0.0" + } + }, "node_modules/fsevents": { "version": "2.3.3", "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz", @@ -8571,6 +9181,27 @@ "integrity": "sha512-latSnyDNt/8zYUB6VIJ6PCh2jBjJX6gnDsoCZ7LyW7GkqrD51EWwa9qCoGixj8YqBtETQK/xY7OmpTF8xz1DdQ==", "license": "MIT" }, + "node_modules/glob": { + "version": "10.5.0", + "resolved": "https://registry.npmjs.org/glob/-/glob-10.5.0.tgz", + "integrity": "sha512-DfXN8DfhJ7NH3Oe7cFmu3NCu1wKbkReJ8TorzSAFbSKrlNaQSKfIzqYqVY8zlbs2NLBbWpRiU52GX2PbaBVNkg==", + "deprecated": "Old versions of glob are not supported, and contain widely publicized security vulnerabilities, which have been fixed in the current version. Please update. Support for old versions may be purchased (at exorbitant rates) by contacting i@izs.me", + "license": "ISC", + "dependencies": { + "foreground-child": "^3.1.0", + "jackspeak": "^3.1.2", + "minimatch": "^9.0.4", + "minipass": "^7.1.2", + "package-json-from-dist": "^1.0.0", + "path-scurry": "^1.11.1" + }, + "bin": { + "glob": "dist/esm/bin.mjs" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, "node_modules/glob-parent": { "version": "6.0.2", "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-6.0.2.tgz", @@ -8725,7 +9356,6 @@ "version": "4.0.0", "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-4.0.0.tgz", "integrity": "sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==", - "devOptional": true, "license": "MIT", "engines": { "node": ">=8" @@ -8996,6 +9626,43 @@ "url": "https://opencollective.com/unified" } }, + "node_modules/htmlparser2": { + "version": "10.1.0", + "resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-10.1.0.tgz", + "integrity": "sha512-VTZkM9GWRAtEpveh7MSF6SjjrpNVNNVJfFup7xTY3UpFtm67foy9HDVXneLtFVt4pMz5kZtgNcvCniNFb1hlEQ==", + "funding": [ + "https://github.com/fb55/htmlparser2?sponsor=1", + { + "type": "github", + "url": "https://github.com/sponsors/fb55" + } + ], + "license": "MIT", + "dependencies": { + "domelementtype": "^2.3.0", + "domhandler": "^5.0.3", + "domutils": "^3.2.2", + "entities": "^7.0.1" + } + }, + "node_modules/htmlparser2/node_modules/entities": { + "version": "7.0.1", + "resolved": "https://registry.npmjs.org/entities/-/entities-7.0.1.tgz", + "integrity": "sha512-TWrgLOFUQTH994YUyl1yT4uyavY5nNB5muff+RtWaqNVCAK408b5ZnnbNAUEWLTCpum9w6arT70i1XdQ4UeOPA==", + "license": "BSD-2-Clause", + "engines": { + "node": ">=0.12" + }, + "funding": { + "url": "https://github.com/fb55/entities?sponsor=1" + } + }, + "node_modules/http-cache-semantics": { + "version": "4.2.0", + "resolved": "https://registry.npmjs.org/http-cache-semantics/-/http-cache-semantics-4.2.0.tgz", + "integrity": "sha512-dTxcvPXqPvXBQpq5dUr6mEMJX4oIEFv6bwom3FDwKRDsuIjjJGANqhBuoAn9c1RQJIdAKav33ED65E2ys+87QQ==", + "license": "BSD-2-Clause" + }, "node_modules/http-errors": { "version": "2.0.1", "resolved": "https://registry.npmjs.org/http-errors/-/http-errors-2.0.1.tgz", @@ -9016,6 +9683,32 @@ "url": "https://opencollective.com/express" } }, + "node_modules/http-proxy-agent": { + "version": "7.0.2", + "resolved": "https://registry.npmjs.org/http-proxy-agent/-/http-proxy-agent-7.0.2.tgz", + "integrity": "sha512-T1gkAiYYDWYx3V5Bmyu7HcfcvL7mUrTWiM6yOfa3PIphViJ/gFPbvidQ+veqSOHci/PxBcDabeUNCzpOODJZig==", + "license": "MIT", + "dependencies": { + "agent-base": "^7.1.0", + "debug": "^4.3.4" + }, + "engines": { + "node": ">= 14" + } + }, + "node_modules/https-proxy-agent": { + "version": "7.0.6", + "resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-7.0.6.tgz", + "integrity": "sha512-vK9P5/iUfdl95AI+JVyUuIcVtd4ofvtrOr3HNtM2yxC9bnMbEdp3x01OhQNnjb8IJYi38VlTE3mBXwcfvywuSw==", + "license": "MIT", + "dependencies": { + "agent-base": "^7.1.2", + "debug": "4" + }, + "engines": { + "node": ">= 14" + } + }, "node_modules/human-signals": { "version": "8.0.1", "resolved": "https://registry.npmjs.org/human-signals/-/human-signals-8.0.1.tgz", @@ -9108,7 +9801,6 @@ "version": "0.1.4", "resolved": "https://registry.npmjs.org/imurmurhash/-/imurmurhash-0.1.4.tgz", "integrity": "sha512-JmXMZ6wuvDmLiHEml9ykzqO6lwFbof0GG4IkcGaENdCRDDmMVnny7s5HsIgHCbaq0w2MyPhDqkhTUgS2LU2PHA==", - "dev": true, "license": "MIT", "engines": { "node": ">=0.8.19" @@ -9203,6 +9895,15 @@ "url": "https://opencollective.com/ioredis" } }, + "node_modules/ip-address": { + "version": "10.1.0", + "resolved": "https://registry.npmjs.org/ip-address/-/ip-address-10.1.0.tgz", + "integrity": "sha512-XXADHxXmvT9+CRxhXg56LJovE+bmWnEWB78LB83VZTprKTmaC5QfruXocxzTZ2Kl0DNwKuBdlIhjL8LeY8Sf8Q==", + "license": "MIT", + "engines": { + "node": ">= 12" + } + }, "node_modules/ipaddr.js": { "version": "1.9.1", "resolved": "https://registry.npmjs.org/ipaddr.js/-/ipaddr.js-1.9.1.tgz", @@ -9418,11 +10119,25 @@ "node": ">=0.10.0" } }, + "node_modules/jackspeak": { + "version": "3.4.3", + "resolved": "https://registry.npmjs.org/jackspeak/-/jackspeak-3.4.3.tgz", + "integrity": "sha512-OGlZQpz2yfahA/Rd1Y8Cd9SIEsqvXkLVoSw/cgwhnhFMDbsQFeZYoJJ7bIZBS9BcamUW96asq/npPWugM+RQBw==", + "license": "BlueOak-1.0.0", + "dependencies": { + "@isaacs/cliui": "^8.0.2" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + }, + "optionalDependencies": { + "@pkgjs/parseargs": "^0.11.0" + } + }, "node_modules/jest-diff": { "version": "30.2.0", "resolved": "https://registry.npmjs.org/jest-diff/-/jest-diff-30.2.0.tgz", "integrity": "sha512-dQHFo3Pt4/NLlG5z4PxZ/3yZTZ1C7s9hveiOj+GCN+uT109NC2QgsoVZsVOAvbJ3RgKkvyLGXZV9+piDpWbm6A==", - "devOptional": true, "license": "MIT", "dependencies": { "@jest/diff-sequences": "30.0.1", @@ -9434,6 +10149,90 @@ "node": "^18.14.0 || ^20.0.0 || ^22.0.0 || >=24.0.0" } }, + "node_modules/jest-matcher-utils": { + "version": "30.2.0", + "resolved": "https://registry.npmjs.org/jest-matcher-utils/-/jest-matcher-utils-30.2.0.tgz", + "integrity": "sha512-dQ94Nq4dbzmUWkQ0ANAWS9tBRfqCrn0bV9AMYdOi/MHW726xn7eQmMeRTpX2ViC00bpNaWXq+7o4lIQ3AX13Hg==", + "license": "MIT", + "dependencies": { + "@jest/get-type": "30.1.0", + "chalk": "^4.1.2", + "jest-diff": "30.2.0", + "pretty-format": "30.2.0" + }, + "engines": { + "node": "^18.14.0 || ^20.0.0 || ^22.0.0 || >=24.0.0" + } + }, + "node_modules/jest-message-util": { + "version": "30.2.0", + "resolved": "https://registry.npmjs.org/jest-message-util/-/jest-message-util-30.2.0.tgz", + "integrity": "sha512-y4DKFLZ2y6DxTWD4cDe07RglV88ZiNEdlRfGtqahfbIjfsw1nMCPx49Uev4IA/hWn3sDKyAnSPwoYSsAEdcimw==", + "license": "MIT", + "dependencies": { + "@babel/code-frame": "^7.27.1", + "@jest/types": "30.2.0", + "@types/stack-utils": "^2.0.3", + "chalk": "^4.1.2", + "graceful-fs": "^4.2.11", + "micromatch": "^4.0.8", + "pretty-format": "30.2.0", + "slash": "^3.0.0", + "stack-utils": "^2.0.6" + }, + "engines": { + "node": "^18.14.0 || ^20.0.0 || ^22.0.0 || >=24.0.0" + } + }, + "node_modules/jest-message-util/node_modules/slash": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/slash/-/slash-3.0.0.tgz", + "integrity": "sha512-g9Q1haeby36OSStwb4ntCGGGaKsaVSjQ68fBxoQcutl5fS1vuY18H3wSt3jFyFtrkx+Kz0V1G85A4MyAdDMi2Q==", + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/jest-mock": { + "version": "30.2.0", + "resolved": "https://registry.npmjs.org/jest-mock/-/jest-mock-30.2.0.tgz", + "integrity": "sha512-JNNNl2rj4b5ICpmAcq+WbLH83XswjPbjH4T7yvGzfAGCPh1rw+xVNbtk+FnRslvt9lkCcdn9i1oAoKUuFsOxRw==", + "license": "MIT", + "dependencies": { + "@jest/types": "30.2.0", + "@types/node": "*", + "jest-util": "30.2.0" + }, + "engines": { + "node": "^18.14.0 || ^20.0.0 || ^22.0.0 || >=24.0.0" + } + }, + "node_modules/jest-regex-util": { + "version": "30.0.1", + "resolved": "https://registry.npmjs.org/jest-regex-util/-/jest-regex-util-30.0.1.tgz", + "integrity": "sha512-jHEQgBXAgc+Gh4g0p3bCevgRCVRkB4VB70zhoAE48gxeSr1hfUOsM/C2WoJgVL7Eyg//hudYENbm3Ne+/dRVVA==", + "license": "MIT", + "engines": { + "node": "^18.14.0 || ^20.0.0 || ^22.0.0 || >=24.0.0" + } + }, + "node_modules/jest-util": { + "version": "30.2.0", + "resolved": "https://registry.npmjs.org/jest-util/-/jest-util-30.2.0.tgz", + "integrity": "sha512-QKNsM0o3Xe6ISQU869e+DhG+4CK/48aHYdJZGlFQVTjnbvgpcKyxpzk29fGiO7i/J8VENZ+d2iGnSsvmuHywlA==", + "license": "MIT", + "dependencies": { + "@jest/types": "30.2.0", + "@types/node": "*", + "chalk": "^4.1.2", + "ci-info": "^4.2.0", + "graceful-fs": "^4.2.11", + "picomatch": "^4.0.2" + }, + "engines": { + "node": "^18.14.0 || ^20.0.0 || ^22.0.0 || >=24.0.0" + } + }, "node_modules/jiti": { "version": "2.6.1", "resolved": "https://registry.npmjs.org/jiti/-/jiti-2.6.1.tgz", @@ -10125,9 +10924,39 @@ "version": "1.3.6", "resolved": "https://registry.npmjs.org/make-error/-/make-error-1.3.6.tgz", "integrity": "sha512-s8UhlNe7vPKomQhC1qFelMokr/Sc3AgNbso3n74mVPA5LTZwkB9NlXf4XPamLxJE8h0gh73rM94xvwRT2CVInw==", - "dev": true, "license": "ISC" }, + "node_modules/make-fetch-happen": { + "version": "14.0.3", + "resolved": "https://registry.npmjs.org/make-fetch-happen/-/make-fetch-happen-14.0.3.tgz", + "integrity": "sha512-QMjGbFTP0blj97EeidG5hk/QhKQ3T4ICckQGLgz38QF7Vgbk6e6FTARN8KhKxyBbWn8R0HU+bnw8aSoFPD4qtQ==", + "license": "ISC", + "dependencies": { + "@npmcli/agent": "^3.0.0", + "cacache": "^19.0.1", + "http-cache-semantics": "^4.1.1", + "minipass": "^7.0.2", + "minipass-fetch": "^4.0.0", + "minipass-flush": "^1.0.5", + "minipass-pipeline": "^1.2.4", + "negotiator": "^1.0.0", + "proc-log": "^5.0.0", + "promise-retry": "^2.0.1", + "ssri": "^12.0.0" + }, + "engines": { + "node": "^18.17.0 || >=20.5.0" + } + }, + "node_modules/make-fetch-happen/node_modules/negotiator": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/negotiator/-/negotiator-1.0.0.tgz", + "integrity": "sha512-8Ofs/AUQh8MaEcrlq5xOX0CQ9ypTF5dl78mjlMNfOK08fzpgTHQRQPBxcPlEtIw0yRpws+Zo/3r+5WRby7u3Gg==", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, "node_modules/maplibre-gl": { "version": "4.7.1", "resolved": "https://registry.npmjs.org/maplibre-gl/-/maplibre-gl-4.7.1.tgz", @@ -11179,7 +12008,6 @@ "version": "9.0.5", "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-9.0.5.tgz", "integrity": "sha512-G6T0ZX48xgozx7587koeX9Ys2NYy6Gmv//P89sEte9V9whIapMNF4idKxnW2QtCcLiTWlb/wfCabAtAFWhhBow==", - "devOptional": true, "license": "ISC", "dependencies": { "brace-expansion": "^2.0.1" @@ -11209,6 +12037,125 @@ "node": ">=16 || 14 >=14.17" } }, + "node_modules/minipass-collect": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/minipass-collect/-/minipass-collect-2.0.1.tgz", + "integrity": "sha512-D7V8PO9oaz7PWGLbCACuI1qEOsq7UKfLotx/C0Aet43fCUB/wfQ7DYeq2oR/svFJGYDHPr38SHATeaj/ZoKHKw==", + "license": "ISC", + "dependencies": { + "minipass": "^7.0.3" + }, + "engines": { + "node": ">=16 || 14 >=14.17" + } + }, + "node_modules/minipass-fetch": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/minipass-fetch/-/minipass-fetch-4.0.1.tgz", + "integrity": "sha512-j7U11C5HXigVuutxebFadoYBbd7VSdZWggSe64NVdvWNBqGAiXPL2QVCehjmw7lY1oF9gOllYbORh+hiNgfPgQ==", + "license": "MIT", + "dependencies": { + "minipass": "^7.0.3", + "minipass-sized": "^1.0.3", + "minizlib": "^3.0.1" + }, + "engines": { + "node": "^18.17.0 || >=20.5.0" + }, + "optionalDependencies": { + "encoding": "^0.1.13" + } + }, + "node_modules/minipass-flush": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/minipass-flush/-/minipass-flush-1.0.5.tgz", + "integrity": "sha512-JmQSYYpPUqX5Jyn1mXaRwOda1uQ8HP5KAT/oDSLCzt1BYRhQU0/hDtsB1ufZfEEzMZ9aAVmsBw8+FWsIXlClWw==", + "license": "ISC", + "dependencies": { + "minipass": "^3.0.0" + }, + "engines": { + "node": ">= 8" + } + }, + "node_modules/minipass-flush/node_modules/minipass": { + "version": "3.3.6", + "resolved": "https://registry.npmjs.org/minipass/-/minipass-3.3.6.tgz", + "integrity": "sha512-DxiNidxSEK+tHG6zOIklvNOwm3hvCrbUrdtzY74U6HKTJxvIDfOUL5W5P2Ghd3DTkhhKPYGqeNUIh5qcM4YBfw==", + "license": "ISC", + "dependencies": { + "yallist": "^4.0.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/minipass-flush/node_modules/yallist": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/yallist/-/yallist-4.0.0.tgz", + "integrity": "sha512-3wdGidZyq5PB084XLES5TpOSRA3wjXAlIWMhum2kRcv/41Sn2emQ0dycQW4uZXLejwKvg6EsvbdlVL+FYEct7A==", + "license": "ISC" + }, + "node_modules/minipass-pipeline": { + "version": "1.2.4", + "resolved": "https://registry.npmjs.org/minipass-pipeline/-/minipass-pipeline-1.2.4.tgz", + "integrity": "sha512-xuIq7cIOt09RPRJ19gdi4b+RiNvDFYe5JH+ggNvBqGqpQXcru3PcRmOZuHBKWK1Txf9+cQ+HMVN4d6z46LZP7A==", + "license": "ISC", + "dependencies": { + "minipass": "^3.0.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/minipass-pipeline/node_modules/minipass": { + "version": "3.3.6", + "resolved": "https://registry.npmjs.org/minipass/-/minipass-3.3.6.tgz", + "integrity": "sha512-DxiNidxSEK+tHG6zOIklvNOwm3hvCrbUrdtzY74U6HKTJxvIDfOUL5W5P2Ghd3DTkhhKPYGqeNUIh5qcM4YBfw==", + "license": "ISC", + "dependencies": { + "yallist": "^4.0.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/minipass-pipeline/node_modules/yallist": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/yallist/-/yallist-4.0.0.tgz", + "integrity": "sha512-3wdGidZyq5PB084XLES5TpOSRA3wjXAlIWMhum2kRcv/41Sn2emQ0dycQW4uZXLejwKvg6EsvbdlVL+FYEct7A==", + "license": "ISC" + }, + "node_modules/minipass-sized": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/minipass-sized/-/minipass-sized-1.0.3.tgz", + "integrity": "sha512-MbkQQ2CTiBMlA2Dm/5cY+9SWFEN8pzzOXi6rlM5Xxq0Yqbda5ZQy9sU75a673FE9ZK0Zsbr6Y5iP6u9nktfg2g==", + "license": "ISC", + "dependencies": { + "minipass": "^3.0.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/minipass-sized/node_modules/minipass": { + "version": "3.3.6", + "resolved": "https://registry.npmjs.org/minipass/-/minipass-3.3.6.tgz", + "integrity": "sha512-DxiNidxSEK+tHG6zOIklvNOwm3hvCrbUrdtzY74U6HKTJxvIDfOUL5W5P2Ghd3DTkhhKPYGqeNUIh5qcM4YBfw==", + "license": "ISC", + "dependencies": { + "yallist": "^4.0.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/minipass-sized/node_modules/yallist": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/yallist/-/yallist-4.0.0.tgz", + "integrity": "sha512-3wdGidZyq5PB084XLES5TpOSRA3wjXAlIWMhum2kRcv/41Sn2emQ0dycQW4uZXLejwKvg6EsvbdlVL+FYEct7A==", + "license": "ISC" + }, "node_modules/minizlib": { "version": "3.1.0", "resolved": "https://registry.npmjs.org/minizlib/-/minizlib-3.1.0.tgz", @@ -11225,7 +12172,6 @@ "version": "3.0.1", "resolved": "https://registry.npmjs.org/mkdirp/-/mkdirp-3.0.1.tgz", "integrity": "sha512-+NsyUUAZDmo6YVHzL/stxSu3t9YS1iljliy3BSDrXJ/dkn1KYdmtZODGGjLcc9XLgVVpH4KshHB8XmZgMhaBXg==", - "devOptional": true, "license": "MIT", "bin": { "mkdirp": "dist/cjs/src/bin.js" @@ -11398,6 +12344,15 @@ "integrity": "sha512-AGK2yQKIjRuqnc6VkX2Xj5d+QW8xZ87pa1UK6yA6ouUyuxfHuMP6umE5QK7UmTeOAymo+Zx1Fxiuw9rVx8taHQ==", "license": "MIT" }, + "node_modules/node-addon-api": { + "version": "8.5.0", + "resolved": "https://registry.npmjs.org/node-addon-api/-/node-addon-api-8.5.0.tgz", + "integrity": "sha512-/bRZty2mXUIFY/xU5HLvveNHlswNJej+RnxBjOMkidWfwZzgTbPG1E3K5TOxRLOR+5hX7bSofy8yf1hZevMS8A==", + "license": "MIT", + "engines": { + "node": "^18 || ^20 || >= 21" + } + }, "node_modules/node-fetch": { "version": "2.7.0", "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.7.0.tgz", @@ -11418,6 +12373,30 @@ } } }, + "node_modules/node-gyp": { + "version": "11.5.0", + "resolved": "https://registry.npmjs.org/node-gyp/-/node-gyp-11.5.0.tgz", + "integrity": "sha512-ra7Kvlhxn5V9Slyus0ygMa2h+UqExPqUIkfk7Pc8QTLT956JLSy51uWFwHtIYy0vI8cB4BDhc/S03+880My/LQ==", + "license": "MIT", + "dependencies": { + "env-paths": "^2.2.0", + "exponential-backoff": "^3.1.1", + "graceful-fs": "^4.2.6", + "make-fetch-happen": "^14.0.3", + "nopt": "^8.0.0", + "proc-log": "^5.0.0", + "semver": "^7.3.5", + "tar": "^7.4.3", + "tinyglobby": "^0.2.12", + "which": "^5.0.0" + }, + "bin": { + "node-gyp": "bin/node-gyp.js" + }, + "engines": { + "node": "^18.17.0 || >=20.5.0" + } + }, "node_modules/node-gyp-build-optional-packages": { "version": "5.2.2", "resolved": "https://registry.npmjs.org/node-gyp-build-optional-packages/-/node-gyp-build-optional-packages-5.2.2.tgz", @@ -11433,12 +12412,51 @@ "node-gyp-build-optional-packages-test": "build-test.js" } }, + "node_modules/node-gyp/node_modules/isexe": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/isexe/-/isexe-3.1.1.tgz", + "integrity": "sha512-LpB/54B+/2J5hqQ7imZHfdU31OlgQqx7ZicVlkm9kzg9/w8GKLEcFfJl/t7DCEDueOyBAD6zCCwTO6Fzs0NoEQ==", + "license": "ISC", + "engines": { + "node": ">=16" + } + }, + "node_modules/node-gyp/node_modules/which": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/which/-/which-5.0.0.tgz", + "integrity": "sha512-JEdGzHwwkrbWoGOlIHqQ5gtprKGOenpDHpxE9zVR1bWbOtYRyPPHMe9FaP6x61CmNaTThSkb0DAJte5jD+DmzQ==", + "license": "ISC", + "dependencies": { + "isexe": "^3.1.1" + }, + "bin": { + "node-which": "bin/which.js" + }, + "engines": { + "node": "^18.17.0 || >=20.5.0" + } + }, "node_modules/node-releases": { "version": "2.0.27", "resolved": "https://registry.npmjs.org/node-releases/-/node-releases-2.0.27.tgz", "integrity": "sha512-nmh3lCkYZ3grZvqcCH+fjmQ7X+H0OeZgP40OierEaAptX4XofMh5kwNbWh7lBduUzCcV/8kZ+NDLCwm2iorIlA==", "license": "MIT" }, + "node_modules/nopt": { + "version": "8.1.0", + "resolved": "https://registry.npmjs.org/nopt/-/nopt-8.1.0.tgz", + "integrity": "sha512-ieGu42u/Qsa4TFktmaKEwM6MQH0pOWnaB3htzh0JRtx84+Mebc0cbZYN5bC+6WTZ4+77xrL9Pn5m7CV6VIkV7A==", + "license": "ISC", + "dependencies": { + "abbrev": "^3.0.0" + }, + "bin": { + "nopt": "bin/nopt.js" + }, + "engines": { + "node": "^18.17.0 || >=20.5.0" + } + }, "node_modules/normalize-package-data": { "version": "8.0.0", "resolved": "https://registry.npmjs.org/normalize-package-data/-/normalize-package-data-8.0.0.tgz", @@ -11496,6 +12514,18 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/nth-check": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/nth-check/-/nth-check-2.1.1.tgz", + "integrity": "sha512-lqjrjmaOoAnWfMmBPL+XNnynZh2+swxiX3WUE0s4yEHI6m+AwrK2UZOimIRl3X/4QctVqS8AiZjFqyOGrMXb/w==", + "license": "BSD-2-Clause", + "dependencies": { + "boolbase": "^1.0.0" + }, + "funding": { + "url": "https://github.com/fb55/nth-check?sponsor=1" + } + }, "node_modules/object-assign": { "version": "4.1.1", "resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz", @@ -11673,7 +12703,6 @@ "version": "7.0.4", "resolved": "https://registry.npmjs.org/p-map/-/p-map-7.0.4.tgz", "integrity": "sha512-tkAQEw8ysMzmkhgw8k+1U/iPhWNhykKnSk4Rd5zLoPJCuJaGRPo6YposrZgaxHKzDHdDWWZvE/Sk7hsL2X/CpQ==", - "devOptional": true, "license": "MIT", "engines": { "node": ">=18" @@ -11710,6 +12739,12 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/package-json-from-dist": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/package-json-from-dist/-/package-json-from-dist-1.0.1.tgz", + "integrity": "sha512-UEZIS3/by4OC8vL3P2dTXRETpebLI2NiI5vIrjaD/5UtrkFX/tNbwjTSRAGC/+7CAo2pIcBaRgWmcBBHcsaCIw==", + "license": "BlueOak-1.0.0" + }, "node_modules/package-manager-detector": { "version": "0.2.11", "resolved": "https://registry.npmjs.org/package-manager-detector/-/package-manager-detector-0.2.11.tgz", @@ -11815,6 +12850,55 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/parse5": { + "version": "7.3.0", + "resolved": "https://registry.npmjs.org/parse5/-/parse5-7.3.0.tgz", + "integrity": "sha512-IInvU7fabl34qmi9gY8XOVxhYyMyuH2xUNpb2q8/Y+7552KlejkRvqvD19nMoUW/uQGGbqNpA6Tufu5FL5BZgw==", + "license": "MIT", + "dependencies": { + "entities": "^6.0.0" + }, + "funding": { + "url": "https://github.com/inikulin/parse5?sponsor=1" + } + }, + "node_modules/parse5-htmlparser2-tree-adapter": { + "version": "7.1.0", + "resolved": "https://registry.npmjs.org/parse5-htmlparser2-tree-adapter/-/parse5-htmlparser2-tree-adapter-7.1.0.tgz", + "integrity": "sha512-ruw5xyKs6lrpo9x9rCZqZZnIUntICjQAd0Wsmp396Ul9lN/h+ifgVV1x1gZHi8euej6wTfpqX8j+BFQxF0NS/g==", + "license": "MIT", + "dependencies": { + "domhandler": "^5.0.3", + "parse5": "^7.0.0" + }, + "funding": { + "url": "https://github.com/inikulin/parse5?sponsor=1" + } + }, + "node_modules/parse5-parser-stream": { + "version": "7.1.2", + "resolved": "https://registry.npmjs.org/parse5-parser-stream/-/parse5-parser-stream-7.1.2.tgz", + "integrity": "sha512-JyeQc9iwFLn5TbvvqACIF/VXG6abODeB3Fwmv/TGdLk2LfbWkaySGY72at4+Ty7EkPZj854u4CrICqNk2qIbow==", + "license": "MIT", + "dependencies": { + "parse5": "^7.0.0" + }, + "funding": { + "url": "https://github.com/inikulin/parse5?sponsor=1" + } + }, + "node_modules/parse5/node_modules/entities": { + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/entities/-/entities-6.0.1.tgz", + "integrity": "sha512-aN97NXWF6AWBTahfVOIrB/NShkzi5H7F9r1s9mD3cDj4Ko5f2qhhVoYMibXF7GlLveb/D2ioWay8lxI97Ven3g==", + "license": "BSD-2-Clause", + "engines": { + "node": ">=0.12" + }, + "funding": { + "url": "https://github.com/fb55/entities?sponsor=1" + } + }, "node_modules/parseurl": { "version": "1.3.3", "resolved": "https://registry.npmjs.org/parseurl/-/parseurl-1.3.3.tgz", @@ -11856,6 +12940,28 @@ "integrity": "sha512-LDJzPVEEEPR+y48z93A0Ed0yXb8pAByGWo/k5YYdYgpY2/2EsOsksJrq7lOHxryrVOn1ejG6oAp8ahvOIQD8sw==", "license": "MIT" }, + "node_modules/path-scurry": { + "version": "1.11.1", + "resolved": "https://registry.npmjs.org/path-scurry/-/path-scurry-1.11.1.tgz", + "integrity": "sha512-Xa4Nw17FS9ApQFJ9umLiJS4orGjm7ZzwUrwamcGQuHSzDyth9boKDaycYdDcZDuqYATXw4HFXgaqWTctW/v1HA==", + "license": "BlueOak-1.0.0", + "dependencies": { + "lru-cache": "^10.2.0", + "minipass": "^5.0.0 || ^6.0.2 || ^7.0.0" + }, + "engines": { + "node": ">=16 || 14 >=14.18" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, + "node_modules/path-scurry/node_modules/lru-cache": { + "version": "10.4.3", + "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-10.4.3.tgz", + "integrity": "sha512-JNAzZcXrCt42VGLuYz0zfAzDfAvJWW6AfYlDBQyDV5DClI2m5sAmK+OIO7s59XfsRsWHp02jAJrRadPRGTt6SQ==", + "license": "ISC" + }, "node_modules/path-type": { "version": "6.0.0", "resolved": "https://registry.npmjs.org/path-type/-/path-type-6.0.0.tgz", @@ -12223,7 +13329,6 @@ "version": "30.2.0", "resolved": "https://registry.npmjs.org/pretty-format/-/pretty-format-30.2.0.tgz", "integrity": "sha512-9uBdv/B4EefsuAL+pWqueZyZS2Ba+LxfFeQ9DN14HU4bN8bhaxKdkpjpB6fs9+pSjIBu+FXQHImEg8j/Lw0+vA==", - "devOptional": true, "license": "MIT", "dependencies": { "@jest/schemas": "30.0.5", @@ -12238,7 +13343,6 @@ "version": "5.2.0", "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-5.2.0.tgz", "integrity": "sha512-Cxwpt2SfTzTtXcfOlzGEee8O+c+MmUgGrNiBcXnuWxuFJHe6a5Hz7qwhwe5OgaSYI0IJvkLqWX1ASG+cJOkEiA==", - "devOptional": true, "license": "MIT", "engines": { "node": ">=10" @@ -12278,6 +13382,15 @@ "integrity": "sha512-dKp+C4iXWK4vVYZmYSd0KBH5F/h1HoZRsbJ82AVKRO3PEo8L4lBS/vLwhVtpwwuYcoIsVY+1JYKR268yn480uQ==", "license": "Unlicense" }, + "node_modules/proc-log": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/proc-log/-/proc-log-5.0.0.tgz", + "integrity": "sha512-Azwzvl90HaF0aCz1JrDdXQykFakSSNPaPoiZ9fm5qJIMHioDZEi7OAdRwSm6rSoPtY3Qutnm3L7ogmg3dc+wbQ==", + "license": "ISC", + "engines": { + "node": "^18.17.0 || >=20.5.0" + } + }, "node_modules/process-warning": { "version": "5.0.0", "resolved": "https://registry.npmjs.org/process-warning/-/process-warning-5.0.0.tgz", @@ -12294,6 +13407,28 @@ ], "license": "MIT" }, + "node_modules/promise-retry": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/promise-retry/-/promise-retry-2.0.1.tgz", + "integrity": "sha512-y+WKFlBR8BGXnsNlIHFGPZmyDf3DFMoLhaflAnyZgV6rG6xu+JwesTo2Q9R6XwYmtmwAFCkAk3e35jEdoeh/3g==", + "license": "MIT", + "dependencies": { + "err-code": "^2.0.2", + "retry": "^0.12.0" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/promise-retry/node_modules/retry": { + "version": "0.12.0", + "resolved": "https://registry.npmjs.org/retry/-/retry-0.12.0.tgz", + "integrity": "sha512-9LkiTwjUh6rT555DtE9rTX+BKByPfrMzEAtnlEtdEwr3Nkffwiihqe2bWADg+OQRjt9gl6ICdmB/ZFDCGAtSow==", + "license": "MIT", + "engines": { + "node": ">= 4" + } + }, "node_modules/property-information": { "version": "7.1.0", "resolved": "https://registry.npmjs.org/property-information/-/property-information-7.1.0.tgz", @@ -12373,6 +13508,17 @@ "node": ">=6" } }, + "node_modules/q": { + "version": "1.5.1", + "resolved": "https://registry.npmjs.org/q/-/q-1.5.1.tgz", + "integrity": "sha512-kV/CThkXo6xyFEZUugw/+pIOywXcDbFYgSct5cT3gqlbkBE1SJdwy6UQoZvodiWF/ckQLZyDE/Bu1M6gVu5lVw==", + "deprecated": "You or someone you depend on is using Q, the JavaScript Promise library that gave JavaScript developers strong feelings about promises. They can almost certainly migrate to the native JavaScript promise now. Thank you literally everyone for joining me in this bet against the odds. Be excellent to each other.\n\n(For a CapTP with native promises, see @endo/eventual-send and @endo/captp)", + "license": "MIT", + "engines": { + "node": ">=0.6.0", + "teleport": ">=0.2.0" + } + }, "node_modules/qs": { "version": "6.14.1", "resolved": "https://registry.npmjs.org/qs/-/qs-6.14.1.tgz", @@ -13471,6 +14617,44 @@ "node": ">=8.0.0" } }, + "node_modules/smart-buffer": { + "version": "4.2.0", + "resolved": "https://registry.npmjs.org/smart-buffer/-/smart-buffer-4.2.0.tgz", + "integrity": "sha512-94hK0Hh8rPqQl2xXc3HsaBoOXKV20MToPkcXvwbISWLEs+64sBq5kFgn2kJDHb1Pry9yrP0dxrCI9RRci7RXKg==", + "license": "MIT", + "engines": { + "node": ">= 6.0.0", + "npm": ">= 3.0.0" + } + }, + "node_modules/socks": { + "version": "2.8.7", + "resolved": "https://registry.npmjs.org/socks/-/socks-2.8.7.tgz", + "integrity": "sha512-HLpt+uLy/pxB+bum/9DzAgiKS8CX1EvbWxI4zlmgGCExImLdiad2iCwXT5Z4c9c3Eq8rP2318mPW2c+QbtjK8A==", + "license": "MIT", + "dependencies": { + "ip-address": "^10.0.1", + "smart-buffer": "^4.2.0" + }, + "engines": { + "node": ">= 10.0.0", + "npm": ">= 3.0.0" + } + }, + "node_modules/socks-proxy-agent": { + "version": "8.0.5", + "resolved": "https://registry.npmjs.org/socks-proxy-agent/-/socks-proxy-agent-8.0.5.tgz", + "integrity": "sha512-HehCEsotFqbPW9sJ8WVYB6UbmIMv7kUUORIF2Nncq4VQvBfNBLibW9YZR5dlYCSUhwcD628pRllm7n+E+YTzJw==", + "license": "MIT", + "dependencies": { + "agent-base": "^7.1.2", + "debug": "^4.3.4", + "socks": "^2.8.3" + }, + "engines": { + "node": ">= 14" + } + }, "node_modules/sonic-boom": { "version": "4.2.0", "resolved": "https://registry.npmjs.org/sonic-boom/-/sonic-boom-4.2.0.tgz", @@ -13669,6 +14853,39 @@ "nan": "^2.23.0" } }, + "node_modules/ssri": { + "version": "12.0.0", + "resolved": "https://registry.npmjs.org/ssri/-/ssri-12.0.0.tgz", + "integrity": "sha512-S7iGNosepx9RadX82oimUkvr0Ct7IjJbEbs4mJcTxst8um95J3sDYU1RBEOvdu6oL1Wek2ODI5i4MAw+dZ6cAQ==", + "license": "ISC", + "dependencies": { + "minipass": "^7.0.3" + }, + "engines": { + "node": "^18.17.0 || >=20.5.0" + } + }, + "node_modules/stack-utils": { + "version": "2.0.6", + "resolved": "https://registry.npmjs.org/stack-utils/-/stack-utils-2.0.6.tgz", + "integrity": "sha512-XlkWvfIm6RmsWtNJx+uqtKLS8eqFbxUg0ZzLXqY0caEy9l7hruX8IpiDnjsLavoBgqCCR71TqWO8MaXYheJ3RQ==", + "license": "MIT", + "dependencies": { + "escape-string-regexp": "^2.0.0" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/stack-utils/node_modules/escape-string-regexp": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-2.0.0.tgz", + "integrity": "sha512-UpzcLCXolUWcNu5HtVMHYdXJjArjsF9C0aNnquZYY4uW/Vu0miy5YoWvbV345HauVvcAUnpRuhMMcqTcGOY2+w==", + "license": "MIT", + "engines": { + "node": ">=8" + } + }, "node_modules/stacktracey": { "version": "2.1.8", "resolved": "https://registry.npmjs.org/stacktracey/-/stacktracey-2.1.8.tgz", @@ -13726,6 +14943,36 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/string-width-cjs": { + "name": "string-width", + "version": "4.2.3", + "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz", + "integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==", + "license": "MIT", + "dependencies": { + "emoji-regex": "^8.0.0", + "is-fullwidth-code-point": "^3.0.0", + "strip-ansi": "^6.0.1" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/string-width-cjs/node_modules/emoji-regex": { + "version": "8.0.0", + "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz", + "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==", + "license": "MIT" + }, + "node_modules/string-width-cjs/node_modules/is-fullwidth-code-point": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz", + "integrity": "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==", + "license": "MIT", + "engines": { + "node": ">=8" + } + }, "node_modules/string-width/node_modules/ansi-regex": { "version": "6.2.2", "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-6.2.2.tgz", @@ -13794,6 +15041,28 @@ "node": ">=8" } }, + "node_modules/strip-ansi-cjs": { + "name": "strip-ansi", + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz", + "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==", + "license": "MIT", + "dependencies": { + "ansi-regex": "^5.0.1" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/strip-bom": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/strip-bom/-/strip-bom-3.0.0.tgz", + "integrity": "sha512-vavAMRXOgBVNF6nyEEmL3DBK19iRpDcoIwW+swQ+CbGiu7lju6t+JklA1MHweoWtadgt4ISVUsXLyDq34ddcwA==", + "license": "MIT", + "engines": { + "node": ">=4" + } + }, "node_modules/strip-final-newline": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/strip-final-newline/-/strip-final-newline-4.0.0.tgz", @@ -14207,6 +15476,12 @@ "url": "https://github.com/sponsors/Borewit" } }, + "node_modules/tqdm": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/tqdm/-/tqdm-2.0.3.tgz", + "integrity": "sha512-Ju50G550gspkjd1AiJ/jFBHe2dii9s+KPntEsq0o73BqywqzNWPUM8/FD3zM1rOH7OGLoH7pGSGI90Ct+Yd/5Q==", + "license": "ISC" + }, "node_modules/tr46": { "version": "0.0.3", "resolved": "https://registry.npmjs.org/tr46/-/tr46-0.0.3.tgz", @@ -14257,6 +15532,49 @@ "code-block-writer": "^13.0.1" } }, + "node_modules/ts-node": { + "version": "10.9.2", + "resolved": "https://registry.npmjs.org/ts-node/-/ts-node-10.9.2.tgz", + "integrity": "sha512-f0FFpIdcHgn8zcPSbf1dRevwt047YMnaiJM3u2w2RewrB+fob/zePZcrOyQoLMMO7aBIddLcQIEK5dYjkLnGrQ==", + "license": "MIT", + "dependencies": { + "@cspotcode/source-map-support": "^0.8.0", + "@tsconfig/node10": "^1.0.7", + "@tsconfig/node12": "^1.0.7", + "@tsconfig/node14": "^1.0.0", + "@tsconfig/node16": "^1.0.2", + "acorn": "^8.4.1", + "acorn-walk": "^8.1.1", + "arg": "^4.1.0", + "create-require": "^1.1.0", + "diff": "^4.0.1", + "make-error": "^1.1.1", + "v8-compile-cache-lib": "^3.0.1", + "yn": "3.1.1" + }, + "bin": { + "ts-node": "dist/bin.js", + "ts-node-cwd": "dist/bin-cwd.js", + "ts-node-esm": "dist/bin-esm.js", + "ts-node-script": "dist/bin-script.js", + "ts-node-transpile-only": "dist/bin-transpile.js", + "ts-script": "dist/bin-script-deprecated.js" + }, + "peerDependencies": { + "@swc/core": ">=1.2.50", + "@swc/wasm": ">=1.2.50", + "@types/node": "*", + "typescript": ">=2.7" + }, + "peerDependenciesMeta": { + "@swc/core": { + "optional": true + }, + "@swc/wasm": { + "optional": true + } + } + }, "node_modules/ts-node-maintained": { "version": "10.9.6", "resolved": "https://registry.npmjs.org/ts-node-maintained/-/ts-node-maintained-10.9.6.tgz", @@ -14301,6 +15619,20 @@ } } }, + "node_modules/tsconfig-paths": { + "version": "4.2.0", + "resolved": "https://registry.npmjs.org/tsconfig-paths/-/tsconfig-paths-4.2.0.tgz", + "integrity": "sha512-NoZ4roiN7LnbKn9QqE1amc9DJfzvZXxF4xDavcOWt1BPkdx+m+0gJuPM+S0vCe7zTJMYUP0R8pO2XMr+Y8oLIg==", + "license": "MIT", + "dependencies": { + "json5": "^2.2.2", + "minimist": "^1.2.6", + "strip-bom": "^3.0.0" + }, + "engines": { + "node": ">=6" + } + }, "node_modules/tslib": { "version": "2.8.1", "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz", @@ -14528,6 +15860,30 @@ "node": ">=0.10.0" } }, + "node_modules/unique-filename": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/unique-filename/-/unique-filename-4.0.0.tgz", + "integrity": "sha512-XSnEewXmQ+veP7xX2dS5Q4yZAvO40cBN2MWkJ7D/6sW4Dg6wYBNwM1Vrnz1FhH5AdeLIlUXRI9e28z1YZi71NQ==", + "license": "ISC", + "dependencies": { + "unique-slug": "^5.0.0" + }, + "engines": { + "node": "^18.17.0 || >=20.5.0" + } + }, + "node_modules/unique-slug": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/unique-slug/-/unique-slug-5.0.0.tgz", + "integrity": "sha512-9OdaqO5kwqR+1kVgHAhsp5vPNU0hnxRa26rBFNfNgM7M6pNtgzeBn3s/xbyCQL3dcjzOatcef6UUHpB/6MaETg==", + "license": "ISC", + "dependencies": { + "imurmurhash": "^0.1.4" + }, + "engines": { + "node": "^18.17.0 || >=20.5.0" + } + }, "node_modules/unist-util-is": { "version": "6.0.1", "resolved": "https://registry.npmjs.org/unist-util-is/-/unist-util-is-6.0.1.tgz", @@ -14686,7 +16042,6 @@ "version": "3.0.1", "resolved": "https://registry.npmjs.org/v8-compile-cache-lib/-/v8-compile-cache-lib-3.0.1.tgz", "integrity": "sha512-wa7YjyUGfNZngI/vtK0UHAN+lgDCxBPCylVXGp0zu59Fz5aiGtNXaq3DhIov063MorB+VfufLh3JlF2KdTK3xg==", - "dev": true, "license": "MIT" }, "node_modules/validate-npm-package-license": { @@ -14858,12 +16213,46 @@ "integrity": "sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==", "license": "BSD-2-Clause" }, + "node_modules/whatwg-encoding": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/whatwg-encoding/-/whatwg-encoding-3.1.1.tgz", + "integrity": "sha512-6qN4hJdMwfYBtE3YBTTHhoeuUrDBPZmbQaxWAqSALV/MeEnR5z1xd8UKud2RAkFoPkmB+hli1TZSnyi84xz1vQ==", + "deprecated": "Use @exodus/bytes instead for a more spec-conformant and faster implementation", + "license": "MIT", + "dependencies": { + "iconv-lite": "0.6.3" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/whatwg-encoding/node_modules/iconv-lite": { + "version": "0.6.3", + "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.6.3.tgz", + "integrity": "sha512-4fCk79wshMdzMp2rH06qWrJE4iolqLhCUH+OiuIgU++RB0+94NlDL81atO7GX55uUKueo0txHNtvEyI6D7WdMw==", + "license": "MIT", + "dependencies": { + "safer-buffer": ">= 2.1.2 < 3.0.0" + }, + "engines": { + "node": ">=0.10.0" + } + }, "node_modules/whatwg-fetch": { "version": "3.6.20", "resolved": "https://registry.npmjs.org/whatwg-fetch/-/whatwg-fetch-3.6.20.tgz", "integrity": "sha512-EqhiFU6daOA8kpjOWTL0olhVOF3i7OrFzSYiGsEMB8GcXS+RrzauAERX65xMeNWVqxA6HXH2m69Z9LaKKdisfg==", "license": "MIT" }, + "node_modules/whatwg-mimetype": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/whatwg-mimetype/-/whatwg-mimetype-4.0.0.tgz", + "integrity": "sha512-QaKxh0eNIi2mE9p2vEdzfagOKHCcj1pJ56EEHGQOVxp8r9/iszLUUV7v89x9O1p/T+NlTM5W7jW6+cz4Fq1YVg==", + "license": "MIT", + "engines": { + "node": ">=18" + } + }, "node_modules/whatwg-url": { "version": "5.0.0", "resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-5.0.0.tgz", @@ -14928,6 +16317,53 @@ "url": "https://github.com/chalk/wrap-ansi?sponsor=1" } }, + "node_modules/wrap-ansi-cjs": { + "name": "wrap-ansi", + "version": "7.0.0", + "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-7.0.0.tgz", + "integrity": "sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==", + "license": "MIT", + "dependencies": { + "ansi-styles": "^4.0.0", + "string-width": "^4.1.0", + "strip-ansi": "^6.0.0" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/chalk/wrap-ansi?sponsor=1" + } + }, + "node_modules/wrap-ansi-cjs/node_modules/emoji-regex": { + "version": "8.0.0", + "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz", + "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==", + "license": "MIT" + }, + "node_modules/wrap-ansi-cjs/node_modules/is-fullwidth-code-point": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz", + "integrity": "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==", + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/wrap-ansi-cjs/node_modules/string-width": { + "version": "4.2.3", + "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz", + "integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==", + "license": "MIT", + "dependencies": { + "emoji-regex": "^8.0.0", + "is-fullwidth-code-point": "^3.0.0", + "strip-ansi": "^6.0.1" + }, + "engines": { + "node": ">=8" + } + }, "node_modules/wrap-ansi/node_modules/ansi-regex": { "version": "6.2.2", "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-6.2.2.tgz", @@ -15063,7 +16499,6 @@ "version": "3.1.1", "resolved": "https://registry.npmjs.org/yn/-/yn-3.1.1.tgz", "integrity": "sha512-Ux4ygGWsu2c7isFWe8Yu1YluJmqVhxqK2cLXNQA5AcC3QfbGNpM7fu0Y8b/z16pXLnFxZYvWhd3fhBY9DLmC6Q==", - "dev": true, "license": "MIT", "engines": { "node": ">=6" diff --git a/admin/package.json b/admin/package.json index 73a6160..497eb41 100644 --- a/admin/package.json +++ b/admin/package.json @@ -76,6 +76,7 @@ "@headlessui/react": "^2.2.4", "@inertiajs/react": "^2.0.13", "@markdoc/markdoc": "^0.5.2", + "@openzim/libzim": "^4.0.0", "@protomaps/basemaps": "^5.7.0", "@qdrant/js-client-rest": "^1.16.2", "@tabler/icons-react": "^3.34.0", @@ -92,6 +93,7 @@ "axios": "^1.13.1", "better-sqlite3": "^12.1.1", "bullmq": "^5.65.1", + "cheerio": "^1.2.0", "dockerode": "^4.0.7", "edge.js": "^6.2.1", "fast-xml-parser": "^5.2.5", diff --git a/admin/types/zim.ts b/admin/types/zim.ts index 68f7194..cfd040a 100644 --- a/admin/types/zim.ts +++ b/admin/types/zim.ts @@ -64,3 +64,47 @@ export type RemoteZimFileEntry = { author: string file_name: string } + +export type ExtractZIMContentOptions = { + strategy?: ExtractZIMChunkingStrategy + maxArticles?: number + onProgress?: (processedArticles: number, totalArticles: number) => void + // Batch processing options to avoid lock timeouts + startOffset?: number // Article index to start from for resuming + batchSize?: number // Max articles to process in this batch +} + +export type ExtractZIMChunkingStrategy = 'structured' | 'simple' + +export type ZIMArchiveMetadata = { + title: string + creator: string + publisher: string + date: string + language: string + description: string +} + +export type ZIMContentChunk = { + // Content + text: string + + // Article-level context + articleTitle: string + articlePath: string + + // Section-level context for structured chunks + sectionTitle: string + fullTitle: string // Combined "Article Title - Section Title" + hierarchy: string // Breadcrumb trail + sectionLevel?: number // Heading level (2=h2, 3=h3, etc.) + + // Document grouping + documentId: string // Same for all chunks from one article + + // Archive metadata + archiveMetadata: ZIMArchiveMetadata + + // Extraction metadata + strategy: ExtractZIMChunkingStrategy +} \ No newline at end of file