add configurable download mirrors

This commit is contained in:
Fred Reimer 2026-03-24 22:04:45 -04:00
parent efe6af9b24
commit d21c6b6855
10 changed files with 233 additions and 35 deletions

View File

@ -15,4 +15,8 @@ REDIS_PORT=6379
# Storage path for NOMAD content (ZIM files, maps, etc.) # Storage path for NOMAD content (ZIM files, maps, etc.)
# On Windows dev, use an absolute path like: C:/nomad-storage # On Windows dev, use an absolute path like: C:/nomad-storage
# On Linux production, use: /opt/project-nomad/storage # On Linux production, use: /opt/project-nomad/storage
NOMAD_STORAGE_PATH=/opt/project-nomad/storage NOMAD_STORAGE_PATH=/opt/project-nomad/storage
# Optional: rewrite download URLs to one or more mirrors using JSON source-prefix => target-prefix mappings.
# Example for Kiwix:
# DOWNLOAD_MIRROR_RULES={"https://download.kiwix.org/zim/":"https://cdimage.debian.org/mirror/kiwix.org/zim/"}
DOWNLOAD_MIRROR_RULES=

View File

@ -7,6 +7,7 @@ import { DockerService } from '#services/docker_service'
import { ZimService } from '#services/zim_service' import { ZimService } from '#services/zim_service'
import { MapService } from '#services/map_service' import { MapService } from '#services/map_service'
import { EmbedFileJob } from './embed_file_job.js' import { EmbedFileJob } from './embed_file_job.js'
import { rewriteDownloadUrl } from '../utils/download_mirrors.js'
export class RunDownloadJob { export class RunDownloadJob {
static get queue() { static get queue() {
@ -18,7 +19,7 @@ export class RunDownloadJob {
} }
static getJobId(url: string): string { static getJobId(url: string): string {
return createHash('sha256').update(url).digest('hex').slice(0, 16) return createHash('sha256').update(rewriteDownloadUrl(url)).digest('hex').slice(0, 16)
} }
async handle(job: Job) { async handle(job: Job) {
@ -124,10 +125,11 @@ export class RunDownloadJob {
static async dispatch(params: RunDownloadJobParams) { static async dispatch(params: RunDownloadJobParams) {
const queueService = new QueueService() const queueService = new QueueService()
const queue = queueService.getQueue(this.queue) const queue = queueService.getQueue(this.queue)
const jobId = this.getJobId(params.url) const normalizedParams = { ...params, url: rewriteDownloadUrl(params.url) }
const jobId = this.getJobId(normalizedParams.url)
try { try {
const job = await queue.add(this.key, params, { const job = await queue.add(this.key, normalizedParams, {
jobId, jobId,
attempts: 3, attempts: 3,
backoff: { type: 'exponential', delay: 2000 }, backoff: { type: 'exponential', delay: 2000 },
@ -137,7 +139,7 @@ export class RunDownloadJob {
return { return {
job, job,
created: true, created: true,
message: `Dispatched download job for URL ${params.url}`, message: `Dispatched download job for URL ${normalizedParams.url}`,
} }
} catch (error) { } catch (error) {
if (error.message.includes('job already exists')) { if (error.message.includes('job already exists')) {
@ -145,7 +147,7 @@ export class RunDownloadJob {
return { return {
job: existing, job: existing,
created: false, created: false,
message: `Job already exists for URL ${params.url}`, message: `Job already exists for URL ${normalizedParams.url}`,
} }
} }
throw error throw error

View File

@ -12,10 +12,12 @@ import {
getFileStatsIfExists, getFileStatsIfExists,
ZIM_STORAGE_PATH, ZIM_STORAGE_PATH,
} from '../utils/fs.js' } from '../utils/fs.js'
import { rewriteDownloadUrl } from '../utils/download_mirrors.js'
import type { import type {
ManifestType, ManifestType,
ZimCategoriesSpec, ZimCategoriesSpec,
MapsSpec, MapsSpec,
WikipediaSpec,
CategoryWithStatus, CategoryWithStatus,
CollectionWithStatus, CollectionWithStatus,
SpecResource, SpecResource,
@ -77,7 +79,7 @@ export class CollectionManifestService {
async getCachedSpec<T>(type: ManifestType): Promise<T | null> { async getCachedSpec<T>(type: ManifestType): Promise<T | null> {
const manifest = await CollectionManifest.find(type) const manifest = await CollectionManifest.find(type)
if (!manifest) return null if (!manifest) return null
return manifest.spec_data as T return this.applyDownloadMirrors(type, manifest.spec_data) as T
} }
async getSpecWithFallback<T>(type: ManifestType): Promise<T | null> { async getSpecWithFallback<T>(type: ManifestType): Promise<T | null> {
@ -170,6 +172,48 @@ export class CollectionManifestService {
return undefined return undefined
} }
private applyDownloadMirrors(
type: ManifestType,
spec: ZimCategoriesSpec | MapsSpec | WikipediaSpec
): ZimCategoriesSpec | MapsSpec | WikipediaSpec {
if (type === 'zim_categories') {
return {
...spec,
categories: (spec as ZimCategoriesSpec).categories.map((category) => ({
...category,
tiers: category.tiers.map((tier) => ({
...tier,
resources: tier.resources.map((resource) => ({
...resource,
url: rewriteDownloadUrl(resource.url),
})),
})),
})),
}
}
if (type === 'maps') {
return {
...spec,
collections: (spec as MapsSpec).collections.map((collection) => ({
...collection,
resources: collection.resources.map((resource) => ({
...resource,
url: rewriteDownloadUrl(resource.url),
})),
})),
}
}
return {
...spec,
options: (spec as WikipediaSpec).options.map((option) => ({
...option,
url: option.url ? rewriteDownloadUrl(option.url) : option.url,
})),
}
}
// ---- Filename parsing ---- // ---- Filename parsing ----
static parseZimFilename(filename: string): { resource_id: string; version: string } | null { static parseZimFilename(filename: string): { resource_id: string; version: string } | null {

View File

@ -4,6 +4,7 @@ import axios from 'axios'
import InstalledResource from '#models/installed_resource' import InstalledResource from '#models/installed_resource'
import { RunDownloadJob } from '../jobs/run_download_job.js' import { RunDownloadJob } from '../jobs/run_download_job.js'
import { ZIM_STORAGE_PATH } from '../utils/fs.js' import { ZIM_STORAGE_PATH } from '../utils/fs.js'
import { rewriteDownloadUrl } from '../utils/download_mirrors.js'
import { join } from 'path' import { join } from 'path'
import type { import type {
ResourceUpdateCheckRequest, ResourceUpdateCheckRequest,
@ -49,12 +50,15 @@ export class CollectionUpdateService {
timeout: 15000, timeout: 15000,
}) })
logger.info( const updates = response.data.map((update) => ({
`[CollectionUpdateService] Update check complete: ${response.data.length} update(s) available` ...update,
) download_url: rewriteDownloadUrl(update.download_url),
}))
logger.info(`[CollectionUpdateService] Update check complete: ${updates.length} update(s) available`)
return { return {
updates: response.data, updates,
checked_at: new Date().toISOString(), checked_at: new Date().toISOString(),
} }
} catch (error) { } catch (error) {
@ -82,8 +86,10 @@ export class CollectionUpdateService {
async applyUpdate( async applyUpdate(
update: ResourceUpdateInfo update: ResourceUpdateInfo
): Promise<{ success: boolean; jobId?: string; error?: string }> { ): Promise<{ success: boolean; jobId?: string; error?: string }> {
const downloadUrl = rewriteDownloadUrl(update.download_url)
// Check if a download is already in progress for this URL // Check if a download is already in progress for this URL
const existingJob = await RunDownloadJob.getByUrl(update.download_url) const existingJob = await RunDownloadJob.getByUrl(downloadUrl)
if (existingJob) { if (existingJob) {
const state = await existingJob.getState() const state = await existingJob.getState()
if (state === 'active' || state === 'waiting' || state === 'delayed') { if (state === 'active' || state === 'waiting' || state === 'delayed') {
@ -98,7 +104,7 @@ export class CollectionUpdateService {
const filepath = this.buildFilepath(update, filename) const filepath = this.buildFilepath(update, filename)
const result = await RunDownloadJob.dispatch({ const result = await RunDownloadJob.dispatch({
url: update.download_url, url: downloadUrl,
filepath, filepath,
timeout: 30000, timeout: 30000,
allowedMimeTypes: allowedMimeTypes:

View File

@ -4,6 +4,7 @@ import logger from '@adonisjs/core/services/logger'
import { inject } from '@adonisjs/core' import { inject } from '@adonisjs/core'
import transmit from '@adonisjs/transmit/services/main' import transmit from '@adonisjs/transmit/services/main'
import { doResumableDownloadWithRetry } from '../utils/downloads.js' import { doResumableDownloadWithRetry } from '../utils/downloads.js'
import { rewriteDownloadUrl } from '../utils/download_mirrors.js'
import { join } from 'path' import { join } from 'path'
import { ZIM_STORAGE_PATH } from '../utils/fs.js' import { ZIM_STORAGE_PATH } from '../utils/fs.js'
import { SERVICE_NAMES } from '../../constants/service_names.js' import { SERVICE_NAMES } from '../../constants/service_names.js'
@ -614,8 +615,9 @@ export class DockerService {
* At least one .zim file must be available before we can start the kiwix container. * At least one .zim file must be available before we can start the kiwix container.
* We'll download the lightweight mini Wikipedia Top 100 zim file for this purpose. * We'll download the lightweight mini Wikipedia Top 100 zim file for this purpose.
**/ **/
const WIKIPEDIA_ZIM_URL = const WIKIPEDIA_ZIM_URL = rewriteDownloadUrl(
'https://github.com/Crosstalk-Solutions/project-nomad/raw/refs/heads/main/install/wikipedia_en_100_mini_2025-06.zim' 'https://github.com/Crosstalk-Solutions/project-nomad/raw/refs/heads/main/install/wikipedia_en_100_mini_2025-06.zim'
)
const filename = 'wikipedia_en_100_mini_2025-06.zim' const filename = 'wikipedia_en_100_mini_2025-06.zim'
const filepath = join(process.cwd(), ZIM_STORAGE_PATH, filename) const filepath = join(process.cwd(), ZIM_STORAGE_PATH, filename)
logger.info(`[DockerService] Kiwix Serve pre-install: Downloading ZIM file to ${filepath}`) logger.info(`[DockerService] Kiwix Serve pre-install: Downloading ZIM file to ${filepath}`)

View File

@ -13,6 +13,7 @@ import {
getFile, getFile,
ensureDirectoryExists, ensureDirectoryExists,
} from '../utils/fs.js' } from '../utils/fs.js'
import { rewriteDownloadUrl } from '../utils/download_mirrors.js'
import { join, resolve, sep } from 'path' import { join, resolve, sep } from 'path'
import urlJoin from 'url-join' import urlJoin from 'url-join'
import { RunDownloadJob } from '#jobs/run_download_job' import { RunDownloadJob } from '#jobs/run_download_job'
@ -61,9 +62,9 @@ export class MapService implements IMapService {
'https://github.com/Crosstalk-Solutions/project-nomad-maps/raw/refs/heads/master/' 'https://github.com/Crosstalk-Solutions/project-nomad-maps/raw/refs/heads/master/'
) )
const resolvedURL = url ? new URL(url) : defaultTarFileURL const resolvedURL = rewriteDownloadUrl((url ? new URL(url) : defaultTarFileURL).toString())
await doResumableDownloadWithRetry({ await doResumableDownloadWithRetry({
url: resolvedURL.toString(), url: resolvedURL,
filepath: tempTarPath, filepath: tempTarPath,
timeout: 30000, timeout: 30000,
max_retries: 2, max_retries: 2,
@ -245,7 +246,7 @@ export class MapService implements IMapService {
// Perform a HEAD request to get the content length // Perform a HEAD request to get the content length
const { default: axios } = await import('axios') const { default: axios } = await import('axios')
const response = await axios.head(url) const response = await axios.head(rewriteDownloadUrl(url))
if (response.status !== 200) { if (response.status !== 200) {
throw new Error(`Failed to fetch file info: ${response.status} ${response.statusText}`) throw new Error(`Failed to fetch file info: ${response.status} ${response.statusText}`)

View File

@ -16,19 +16,17 @@ import {
listDirectoryContents, listDirectoryContents,
ZIM_STORAGE_PATH, ZIM_STORAGE_PATH,
} from '../utils/fs.js' } from '../utils/fs.js'
import { rewriteDownloadUrl } from '../utils/download_mirrors.js'
import { join, resolve, sep } from 'path' import { join, resolve, sep } from 'path'
import { WikipediaOption, WikipediaState } from '../../types/downloads.js' import { WikipediaOption, WikipediaState } from '../../types/downloads.js'
import vine from '@vinejs/vine'
import { wikipediaOptionsFileSchema } from '#validators/curated_collections'
import WikipediaSelection from '#models/wikipedia_selection' import WikipediaSelection from '#models/wikipedia_selection'
import InstalledResource from '#models/installed_resource' import InstalledResource from '#models/installed_resource'
import { RunDownloadJob } from '#jobs/run_download_job' import { RunDownloadJob } from '#jobs/run_download_job'
import { SERVICE_NAMES } from '../../constants/service_names.js' import { SERVICE_NAMES } from '../../constants/service_names.js'
import { CollectionManifestService } from './collection_manifest_service.js' import { CollectionManifestService } from './collection_manifest_service.js'
import type { CategoryWithStatus } from '../../types/collections.js' import type { CategoryWithStatus, WikipediaSpec } from '../../types/collections.js'
const ZIM_MIME_TYPES = ['application/x-zim', 'application/x-openzim', 'application/octet-stream'] const ZIM_MIME_TYPES = ['application/x-zim', 'application/x-openzim', 'application/octet-stream']
const WIKIPEDIA_OPTIONS_URL = 'https://raw.githubusercontent.com/Crosstalk-Solutions/project-nomad/refs/heads/main/collections/wikipedia.json'
@inject() @inject()
export class ZimService { export class ZimService {
@ -106,7 +104,9 @@ export class ZimService {
} }
// downloadLink['href'] will end with .meta4, we need to remove that to get the actual download URL // downloadLink['href'] will end with .meta4, we need to remove that to get the actual download URL
const download_url = downloadLink['href'].substring(0, downloadLink['href'].length - 6) const download_url = rewriteDownloadUrl(
downloadLink['href'].substring(0, downloadLink['href'].length - 6)
)
const file_name = download_url.split('/').pop() || `${entry.title}.zim` const file_name = download_url.split('/').pop() || `${entry.title}.zim`
const sizeBytes = parseInt(downloadLink['length'], 10) const sizeBytes = parseInt(downloadLink['length'], 10)
@ -361,20 +361,14 @@ export class ZimService {
// Wikipedia selector methods // Wikipedia selector methods
async getWikipediaOptions(): Promise<WikipediaOption[]> { async getWikipediaOptions(): Promise<WikipediaOption[]> {
try { const manifestService = new CollectionManifestService()
const response = await axios.get(WIKIPEDIA_OPTIONS_URL) const spec = await manifestService.getSpecWithFallback<WikipediaSpec>('wikipedia')
const data = response.data if (!spec) {
logger.error('[ZimService] Failed to fetch Wikipedia options: no spec available')
const validated = await vine.validate({
schema: wikipediaOptionsFileSchema,
data,
})
return validated.options
} catch (error) {
logger.error(`[ZimService] Failed to fetch Wikipedia options:`, error)
throw new Error('Failed to fetch Wikipedia options') throw new Error('Failed to fetch Wikipedia options')
} }
return spec.options
} }
async getWikipediaSelection(): Promise<WikipediaSelection | null> { async getWikipediaSelection(): Promise<WikipediaSelection | null> {

View File

@ -0,0 +1,140 @@
type DownloadMirrorRule = {
source: string
target: string
}
let cachedMirrorRulesRaw: string | undefined
let cachedMirrorRules: DownloadMirrorRule[] = []
function normalizeMirrorPrefix(urlString: string): string {
const parsed = new URL(urlString)
parsed.search = ''
parsed.hash = ''
return parsed.toString()
}
function joinMirrorUrl(targetPrefix: string, suffix: string): string {
if (!suffix) return targetPrefix
if (targetPrefix.endsWith('/') && suffix.startsWith('/')) {
return `${targetPrefix}${suffix.slice(1)}`
}
if (!targetPrefix.endsWith('/') && !suffix.startsWith('/')) {
return `${targetPrefix}/${suffix}`
}
return `${targetPrefix}${suffix}`
}
function normalizeDownloadMirrorRule(source: string, target: string): DownloadMirrorRule {
const normalizedSource = normalizeMirrorPrefix(source)
const normalizedTarget = normalizeMirrorPrefix(target)
return {
source: normalizedSource,
target: normalizedTarget,
}
}
export function parseDownloadMirrorRules(raw?: string | null): DownloadMirrorRule[] {
if (!raw?.trim()) {
return []
}
let parsed: unknown
try {
parsed = JSON.parse(raw)
} catch (error) {
console.warn(
`[download_mirrors] Ignoring invalid DOWNLOAD_MIRROR_RULES JSON: ${
error instanceof Error ? error.message : String(error)
}`
)
return []
}
const rules: DownloadMirrorRule[] = []
if (Array.isArray(parsed)) {
for (const entry of parsed) {
if (
typeof entry !== 'object' ||
entry === null ||
!('source' in entry) ||
!('target' in entry) ||
typeof entry.source !== 'string' ||
typeof entry.target !== 'string'
) {
console.warn('[download_mirrors] Ignoring malformed mirror rule in DOWNLOAD_MIRROR_RULES array')
continue
}
try {
rules.push(normalizeDownloadMirrorRule(entry.source, entry.target))
} catch (error) {
console.warn(
`[download_mirrors] Ignoring invalid mirror rule ${JSON.stringify(entry)}: ${
error instanceof Error ? error.message : String(error)
}`
)
}
}
} else if (parsed && typeof parsed === 'object') {
for (const [source, target] of Object.entries(parsed as Record<string, unknown>)) {
if (typeof target !== 'string') {
console.warn(
`[download_mirrors] Ignoring mirror rule for ${source}: target must be a string`
)
continue
}
try {
rules.push(normalizeDownloadMirrorRule(source, target))
} catch (error) {
console.warn(
`[download_mirrors] Ignoring invalid mirror rule ${source}: ${
error instanceof Error ? error.message : String(error)
}`
)
}
}
} else {
console.warn(
'[download_mirrors] Ignoring DOWNLOAD_MIRROR_RULES because it must be a JSON object or array'
)
return []
}
return rules.sort((a, b) => b.source.length - a.source.length)
}
export function rewriteDownloadUrlWithRules(url: string, rules: DownloadMirrorRule[]): string {
for (const rule of rules) {
if (!url.startsWith(rule.source)) {
continue
}
return joinMirrorUrl(rule.target, url.slice(rule.source.length))
}
return url
}
export function getConfiguredDownloadMirrorRules(): DownloadMirrorRule[] {
const raw = process.env.DOWNLOAD_MIRROR_RULES
if (raw === cachedMirrorRulesRaw) {
return cachedMirrorRules
}
cachedMirrorRulesRaw = raw
cachedMirrorRules = parseDownloadMirrorRules(raw)
return cachedMirrorRules
}
export function rewriteDownloadUrl(url: string): string {
return rewriteDownloadUrlWithRules(url, getConfiguredDownloadMirrorRules())
}
export type { DownloadMirrorRule }

View File

@ -19,6 +19,7 @@ export default await Env.create(new URL('../', import.meta.url), {
URL: Env.schema.string(), URL: Env.schema.string(),
LOG_LEVEL: Env.schema.string(), LOG_LEVEL: Env.schema.string(),
INTERNET_STATUS_TEST_URL: Env.schema.string.optional(), INTERNET_STATUS_TEST_URL: Env.schema.string.optional(),
DOWNLOAD_MIRROR_RULES: Env.schema.string.optional(),
/* /*
|---------------------------------------------------------- |----------------------------------------------------------

View File

@ -44,6 +44,10 @@ services:
- REDIS_HOST=redis - REDIS_HOST=redis
# If you change the Redis port, make sure to update this accordingly # If you change the Redis port, make sure to update this accordingly
- REDIS_PORT=6379 - REDIS_PORT=6379
# Optional: rewrite one or more download URL prefixes to mirrors.
# Example:
# - DOWNLOAD_MIRROR_RULES={"https://download.kiwix.org/zim/":"https://cdimage.debian.org/mirror/kiwix.org/zim/"}
- DOWNLOAD_MIRROR_RULES=
depends_on: depends_on:
mysql: mysql:
condition: service_healthy condition: service_healthy
@ -117,4 +121,4 @@ services:
volumes: volumes:
nomad-update-shared: nomad-update-shared:
driver: local driver: local