From d21c6b685570533fe3546f196761053d1713d961 Mon Sep 17 00:00:00 2001 From: Fred Reimer Date: Tue, 24 Mar 2026 22:04:45 -0400 Subject: [PATCH] add configurable download mirrors --- admin/.env.example | 6 +- admin/app/jobs/run_download_job.ts | 12 +- .../services/collection_manifest_service.ts | 46 +++++- .../app/services/collection_update_service.ts | 18 ++- admin/app/services/docker_service.ts | 4 +- admin/app/services/map_service.ts | 7 +- admin/app/services/zim_service.ts | 28 ++-- admin/app/utils/download_mirrors.ts | 140 ++++++++++++++++++ admin/start/env.ts | 1 + install/management_compose.yaml | 6 +- 10 files changed, 233 insertions(+), 35 deletions(-) create mode 100644 admin/app/utils/download_mirrors.ts diff --git a/admin/.env.example b/admin/.env.example index 05a03fd..dd66a64 100644 --- a/admin/.env.example +++ b/admin/.env.example @@ -15,4 +15,8 @@ REDIS_PORT=6379 # Storage path for NOMAD content (ZIM files, maps, etc.) # On Windows dev, use an absolute path like: C:/nomad-storage # On Linux production, use: /opt/project-nomad/storage -NOMAD_STORAGE_PATH=/opt/project-nomad/storage \ No newline at end of file +NOMAD_STORAGE_PATH=/opt/project-nomad/storage +# Optional: rewrite download URLs to one or more mirrors using JSON source-prefix => target-prefix mappings. +# Example for Kiwix: +# DOWNLOAD_MIRROR_RULES={"https://download.kiwix.org/zim/":"https://cdimage.debian.org/mirror/kiwix.org/zim/"} +DOWNLOAD_MIRROR_RULES= diff --git a/admin/app/jobs/run_download_job.ts b/admin/app/jobs/run_download_job.ts index c7f672e..189e63c 100644 --- a/admin/app/jobs/run_download_job.ts +++ b/admin/app/jobs/run_download_job.ts @@ -7,6 +7,7 @@ import { DockerService } from '#services/docker_service' import { ZimService } from '#services/zim_service' import { MapService } from '#services/map_service' import { EmbedFileJob } from './embed_file_job.js' +import { rewriteDownloadUrl } from '../utils/download_mirrors.js' export class RunDownloadJob { static get queue() { @@ -18,7 +19,7 @@ export class RunDownloadJob { } static getJobId(url: string): string { - return createHash('sha256').update(url).digest('hex').slice(0, 16) + return createHash('sha256').update(rewriteDownloadUrl(url)).digest('hex').slice(0, 16) } async handle(job: Job) { @@ -124,10 +125,11 @@ export class RunDownloadJob { static async dispatch(params: RunDownloadJobParams) { const queueService = new QueueService() const queue = queueService.getQueue(this.queue) - const jobId = this.getJobId(params.url) + const normalizedParams = { ...params, url: rewriteDownloadUrl(params.url) } + const jobId = this.getJobId(normalizedParams.url) try { - const job = await queue.add(this.key, params, { + const job = await queue.add(this.key, normalizedParams, { jobId, attempts: 3, backoff: { type: 'exponential', delay: 2000 }, @@ -137,7 +139,7 @@ export class RunDownloadJob { return { job, created: true, - message: `Dispatched download job for URL ${params.url}`, + message: `Dispatched download job for URL ${normalizedParams.url}`, } } catch (error) { if (error.message.includes('job already exists')) { @@ -145,7 +147,7 @@ export class RunDownloadJob { return { job: existing, created: false, - message: `Job already exists for URL ${params.url}`, + message: `Job already exists for URL ${normalizedParams.url}`, } } throw error diff --git a/admin/app/services/collection_manifest_service.ts b/admin/app/services/collection_manifest_service.ts index bc69368..d00d752 100644 --- a/admin/app/services/collection_manifest_service.ts +++ b/admin/app/services/collection_manifest_service.ts @@ -12,10 +12,12 @@ import { getFileStatsIfExists, ZIM_STORAGE_PATH, } from '../utils/fs.js' +import { rewriteDownloadUrl } from '../utils/download_mirrors.js' import type { ManifestType, ZimCategoriesSpec, MapsSpec, + WikipediaSpec, CategoryWithStatus, CollectionWithStatus, SpecResource, @@ -77,7 +79,7 @@ export class CollectionManifestService { async getCachedSpec(type: ManifestType): Promise { const manifest = await CollectionManifest.find(type) if (!manifest) return null - return manifest.spec_data as T + return this.applyDownloadMirrors(type, manifest.spec_data) as T } async getSpecWithFallback(type: ManifestType): Promise { @@ -170,6 +172,48 @@ export class CollectionManifestService { return undefined } + private applyDownloadMirrors( + type: ManifestType, + spec: ZimCategoriesSpec | MapsSpec | WikipediaSpec + ): ZimCategoriesSpec | MapsSpec | WikipediaSpec { + if (type === 'zim_categories') { + return { + ...spec, + categories: (spec as ZimCategoriesSpec).categories.map((category) => ({ + ...category, + tiers: category.tiers.map((tier) => ({ + ...tier, + resources: tier.resources.map((resource) => ({ + ...resource, + url: rewriteDownloadUrl(resource.url), + })), + })), + })), + } + } + + if (type === 'maps') { + return { + ...spec, + collections: (spec as MapsSpec).collections.map((collection) => ({ + ...collection, + resources: collection.resources.map((resource) => ({ + ...resource, + url: rewriteDownloadUrl(resource.url), + })), + })), + } + } + + return { + ...spec, + options: (spec as WikipediaSpec).options.map((option) => ({ + ...option, + url: option.url ? rewriteDownloadUrl(option.url) : option.url, + })), + } + } + // ---- Filename parsing ---- static parseZimFilename(filename: string): { resource_id: string; version: string } | null { diff --git a/admin/app/services/collection_update_service.ts b/admin/app/services/collection_update_service.ts index b1e06d1..b2ec251 100644 --- a/admin/app/services/collection_update_service.ts +++ b/admin/app/services/collection_update_service.ts @@ -4,6 +4,7 @@ import axios from 'axios' import InstalledResource from '#models/installed_resource' import { RunDownloadJob } from '../jobs/run_download_job.js' import { ZIM_STORAGE_PATH } from '../utils/fs.js' +import { rewriteDownloadUrl } from '../utils/download_mirrors.js' import { join } from 'path' import type { ResourceUpdateCheckRequest, @@ -49,12 +50,15 @@ export class CollectionUpdateService { timeout: 15000, }) - logger.info( - `[CollectionUpdateService] Update check complete: ${response.data.length} update(s) available` - ) + const updates = response.data.map((update) => ({ + ...update, + download_url: rewriteDownloadUrl(update.download_url), + })) + + logger.info(`[CollectionUpdateService] Update check complete: ${updates.length} update(s) available`) return { - updates: response.data, + updates, checked_at: new Date().toISOString(), } } catch (error) { @@ -82,8 +86,10 @@ export class CollectionUpdateService { async applyUpdate( update: ResourceUpdateInfo ): Promise<{ success: boolean; jobId?: string; error?: string }> { + const downloadUrl = rewriteDownloadUrl(update.download_url) + // Check if a download is already in progress for this URL - const existingJob = await RunDownloadJob.getByUrl(update.download_url) + const existingJob = await RunDownloadJob.getByUrl(downloadUrl) if (existingJob) { const state = await existingJob.getState() if (state === 'active' || state === 'waiting' || state === 'delayed') { @@ -98,7 +104,7 @@ export class CollectionUpdateService { const filepath = this.buildFilepath(update, filename) const result = await RunDownloadJob.dispatch({ - url: update.download_url, + url: downloadUrl, filepath, timeout: 30000, allowedMimeTypes: diff --git a/admin/app/services/docker_service.ts b/admin/app/services/docker_service.ts index 5d94f54..f20e7e1 100644 --- a/admin/app/services/docker_service.ts +++ b/admin/app/services/docker_service.ts @@ -4,6 +4,7 @@ import logger from '@adonisjs/core/services/logger' import { inject } from '@adonisjs/core' import transmit from '@adonisjs/transmit/services/main' import { doResumableDownloadWithRetry } from '../utils/downloads.js' +import { rewriteDownloadUrl } from '../utils/download_mirrors.js' import { join } from 'path' import { ZIM_STORAGE_PATH } from '../utils/fs.js' import { SERVICE_NAMES } from '../../constants/service_names.js' @@ -614,8 +615,9 @@ export class DockerService { * At least one .zim file must be available before we can start the kiwix container. * We'll download the lightweight mini Wikipedia Top 100 zim file for this purpose. **/ - const WIKIPEDIA_ZIM_URL = + const WIKIPEDIA_ZIM_URL = rewriteDownloadUrl( 'https://github.com/Crosstalk-Solutions/project-nomad/raw/refs/heads/main/install/wikipedia_en_100_mini_2025-06.zim' + ) const filename = 'wikipedia_en_100_mini_2025-06.zim' const filepath = join(process.cwd(), ZIM_STORAGE_PATH, filename) logger.info(`[DockerService] Kiwix Serve pre-install: Downloading ZIM file to ${filepath}`) diff --git a/admin/app/services/map_service.ts b/admin/app/services/map_service.ts index beb74b2..a020dba 100644 --- a/admin/app/services/map_service.ts +++ b/admin/app/services/map_service.ts @@ -13,6 +13,7 @@ import { getFile, ensureDirectoryExists, } from '../utils/fs.js' +import { rewriteDownloadUrl } from '../utils/download_mirrors.js' import { join, resolve, sep } from 'path' import urlJoin from 'url-join' import { RunDownloadJob } from '#jobs/run_download_job' @@ -61,9 +62,9 @@ export class MapService implements IMapService { 'https://github.com/Crosstalk-Solutions/project-nomad-maps/raw/refs/heads/master/' ) - const resolvedURL = url ? new URL(url) : defaultTarFileURL + const resolvedURL = rewriteDownloadUrl((url ? new URL(url) : defaultTarFileURL).toString()) await doResumableDownloadWithRetry({ - url: resolvedURL.toString(), + url: resolvedURL, filepath: tempTarPath, timeout: 30000, max_retries: 2, @@ -245,7 +246,7 @@ export class MapService implements IMapService { // Perform a HEAD request to get the content length const { default: axios } = await import('axios') - const response = await axios.head(url) + const response = await axios.head(rewriteDownloadUrl(url)) if (response.status !== 200) { throw new Error(`Failed to fetch file info: ${response.status} ${response.statusText}`) diff --git a/admin/app/services/zim_service.ts b/admin/app/services/zim_service.ts index 3eee1cb..f1beacb 100644 --- a/admin/app/services/zim_service.ts +++ b/admin/app/services/zim_service.ts @@ -16,19 +16,17 @@ import { listDirectoryContents, ZIM_STORAGE_PATH, } from '../utils/fs.js' +import { rewriteDownloadUrl } from '../utils/download_mirrors.js' import { join, resolve, sep } from 'path' import { WikipediaOption, WikipediaState } from '../../types/downloads.js' -import vine from '@vinejs/vine' -import { wikipediaOptionsFileSchema } from '#validators/curated_collections' import WikipediaSelection from '#models/wikipedia_selection' import InstalledResource from '#models/installed_resource' import { RunDownloadJob } from '#jobs/run_download_job' import { SERVICE_NAMES } from '../../constants/service_names.js' import { CollectionManifestService } from './collection_manifest_service.js' -import type { CategoryWithStatus } from '../../types/collections.js' +import type { CategoryWithStatus, WikipediaSpec } from '../../types/collections.js' const ZIM_MIME_TYPES = ['application/x-zim', 'application/x-openzim', 'application/octet-stream'] -const WIKIPEDIA_OPTIONS_URL = 'https://raw.githubusercontent.com/Crosstalk-Solutions/project-nomad/refs/heads/main/collections/wikipedia.json' @inject() export class ZimService { @@ -106,7 +104,9 @@ export class ZimService { } // downloadLink['href'] will end with .meta4, we need to remove that to get the actual download URL - const download_url = downloadLink['href'].substring(0, downloadLink['href'].length - 6) + const download_url = rewriteDownloadUrl( + downloadLink['href'].substring(0, downloadLink['href'].length - 6) + ) const file_name = download_url.split('/').pop() || `${entry.title}.zim` const sizeBytes = parseInt(downloadLink['length'], 10) @@ -361,20 +361,14 @@ export class ZimService { // Wikipedia selector methods async getWikipediaOptions(): Promise { - try { - const response = await axios.get(WIKIPEDIA_OPTIONS_URL) - const data = response.data - - const validated = await vine.validate({ - schema: wikipediaOptionsFileSchema, - data, - }) - - return validated.options - } catch (error) { - logger.error(`[ZimService] Failed to fetch Wikipedia options:`, error) + const manifestService = new CollectionManifestService() + const spec = await manifestService.getSpecWithFallback('wikipedia') + if (!spec) { + logger.error('[ZimService] Failed to fetch Wikipedia options: no spec available') throw new Error('Failed to fetch Wikipedia options') } + + return spec.options } async getWikipediaSelection(): Promise { diff --git a/admin/app/utils/download_mirrors.ts b/admin/app/utils/download_mirrors.ts new file mode 100644 index 0000000..39b20c7 --- /dev/null +++ b/admin/app/utils/download_mirrors.ts @@ -0,0 +1,140 @@ +type DownloadMirrorRule = { + source: string + target: string +} + +let cachedMirrorRulesRaw: string | undefined +let cachedMirrorRules: DownloadMirrorRule[] = [] + +function normalizeMirrorPrefix(urlString: string): string { + const parsed = new URL(urlString) + parsed.search = '' + parsed.hash = '' + return parsed.toString() +} + +function joinMirrorUrl(targetPrefix: string, suffix: string): string { + if (!suffix) return targetPrefix + + if (targetPrefix.endsWith('/') && suffix.startsWith('/')) { + return `${targetPrefix}${suffix.slice(1)}` + } + + if (!targetPrefix.endsWith('/') && !suffix.startsWith('/')) { + return `${targetPrefix}/${suffix}` + } + + return `${targetPrefix}${suffix}` +} + +function normalizeDownloadMirrorRule(source: string, target: string): DownloadMirrorRule { + const normalizedSource = normalizeMirrorPrefix(source) + const normalizedTarget = normalizeMirrorPrefix(target) + + return { + source: normalizedSource, + target: normalizedTarget, + } +} + +export function parseDownloadMirrorRules(raw?: string | null): DownloadMirrorRule[] { + if (!raw?.trim()) { + return [] + } + + let parsed: unknown + try { + parsed = JSON.parse(raw) + } catch (error) { + console.warn( + `[download_mirrors] Ignoring invalid DOWNLOAD_MIRROR_RULES JSON: ${ + error instanceof Error ? error.message : String(error) + }` + ) + return [] + } + + const rules: DownloadMirrorRule[] = [] + + if (Array.isArray(parsed)) { + for (const entry of parsed) { + if ( + typeof entry !== 'object' || + entry === null || + !('source' in entry) || + !('target' in entry) || + typeof entry.source !== 'string' || + typeof entry.target !== 'string' + ) { + console.warn('[download_mirrors] Ignoring malformed mirror rule in DOWNLOAD_MIRROR_RULES array') + continue + } + + try { + rules.push(normalizeDownloadMirrorRule(entry.source, entry.target)) + } catch (error) { + console.warn( + `[download_mirrors] Ignoring invalid mirror rule ${JSON.stringify(entry)}: ${ + error instanceof Error ? error.message : String(error) + }` + ) + } + } + } else if (parsed && typeof parsed === 'object') { + for (const [source, target] of Object.entries(parsed as Record)) { + if (typeof target !== 'string') { + console.warn( + `[download_mirrors] Ignoring mirror rule for ${source}: target must be a string` + ) + continue + } + + try { + rules.push(normalizeDownloadMirrorRule(source, target)) + } catch (error) { + console.warn( + `[download_mirrors] Ignoring invalid mirror rule ${source}: ${ + error instanceof Error ? error.message : String(error) + }` + ) + } + } + } else { + console.warn( + '[download_mirrors] Ignoring DOWNLOAD_MIRROR_RULES because it must be a JSON object or array' + ) + return [] + } + + return rules.sort((a, b) => b.source.length - a.source.length) +} + +export function rewriteDownloadUrlWithRules(url: string, rules: DownloadMirrorRule[]): string { + for (const rule of rules) { + if (!url.startsWith(rule.source)) { + continue + } + + return joinMirrorUrl(rule.target, url.slice(rule.source.length)) + } + + return url +} + +export function getConfiguredDownloadMirrorRules(): DownloadMirrorRule[] { + const raw = process.env.DOWNLOAD_MIRROR_RULES + + if (raw === cachedMirrorRulesRaw) { + return cachedMirrorRules + } + + cachedMirrorRulesRaw = raw + cachedMirrorRules = parseDownloadMirrorRules(raw) + return cachedMirrorRules +} + +export function rewriteDownloadUrl(url: string): string { + return rewriteDownloadUrlWithRules(url, getConfiguredDownloadMirrorRules()) +} + +export type { DownloadMirrorRule } diff --git a/admin/start/env.ts b/admin/start/env.ts index ddf9b5f..15b89f7 100644 --- a/admin/start/env.ts +++ b/admin/start/env.ts @@ -19,6 +19,7 @@ export default await Env.create(new URL('../', import.meta.url), { URL: Env.schema.string(), LOG_LEVEL: Env.schema.string(), INTERNET_STATUS_TEST_URL: Env.schema.string.optional(), + DOWNLOAD_MIRROR_RULES: Env.schema.string.optional(), /* |---------------------------------------------------------- diff --git a/install/management_compose.yaml b/install/management_compose.yaml index 39081bb..27dd6e6 100644 --- a/install/management_compose.yaml +++ b/install/management_compose.yaml @@ -44,6 +44,10 @@ services: - REDIS_HOST=redis # If you change the Redis port, make sure to update this accordingly - REDIS_PORT=6379 + # Optional: rewrite one or more download URL prefixes to mirrors. + # Example: + # - DOWNLOAD_MIRROR_RULES={"https://download.kiwix.org/zim/":"https://cdimage.debian.org/mirror/kiwix.org/zim/"} + - DOWNLOAD_MIRROR_RULES= depends_on: mysql: condition: service_healthy @@ -117,4 +121,4 @@ services: volumes: nomad-update-shared: - driver: local \ No newline at end of file + driver: local