add configurable download mirrors

This commit is contained in:
Fred Reimer 2026-03-24 22:04:45 -04:00
parent efe6af9b24
commit d21c6b6855
10 changed files with 233 additions and 35 deletions

View File

@ -15,4 +15,8 @@ REDIS_PORT=6379
# Storage path for NOMAD content (ZIM files, maps, etc.)
# On Windows dev, use an absolute path like: C:/nomad-storage
# On Linux production, use: /opt/project-nomad/storage
NOMAD_STORAGE_PATH=/opt/project-nomad/storage
NOMAD_STORAGE_PATH=/opt/project-nomad/storage
# Optional: rewrite download URLs to one or more mirrors using JSON source-prefix => target-prefix mappings.
# Example for Kiwix:
# DOWNLOAD_MIRROR_RULES={"https://download.kiwix.org/zim/":"https://cdimage.debian.org/mirror/kiwix.org/zim/"}
DOWNLOAD_MIRROR_RULES=

View File

@ -7,6 +7,7 @@ import { DockerService } from '#services/docker_service'
import { ZimService } from '#services/zim_service'
import { MapService } from '#services/map_service'
import { EmbedFileJob } from './embed_file_job.js'
import { rewriteDownloadUrl } from '../utils/download_mirrors.js'
export class RunDownloadJob {
static get queue() {
@ -18,7 +19,7 @@ export class RunDownloadJob {
}
static getJobId(url: string): string {
return createHash('sha256').update(url).digest('hex').slice(0, 16)
return createHash('sha256').update(rewriteDownloadUrl(url)).digest('hex').slice(0, 16)
}
async handle(job: Job) {
@ -124,10 +125,11 @@ export class RunDownloadJob {
static async dispatch(params: RunDownloadJobParams) {
const queueService = new QueueService()
const queue = queueService.getQueue(this.queue)
const jobId = this.getJobId(params.url)
const normalizedParams = { ...params, url: rewriteDownloadUrl(params.url) }
const jobId = this.getJobId(normalizedParams.url)
try {
const job = await queue.add(this.key, params, {
const job = await queue.add(this.key, normalizedParams, {
jobId,
attempts: 3,
backoff: { type: 'exponential', delay: 2000 },
@ -137,7 +139,7 @@ export class RunDownloadJob {
return {
job,
created: true,
message: `Dispatched download job for URL ${params.url}`,
message: `Dispatched download job for URL ${normalizedParams.url}`,
}
} catch (error) {
if (error.message.includes('job already exists')) {
@ -145,7 +147,7 @@ export class RunDownloadJob {
return {
job: existing,
created: false,
message: `Job already exists for URL ${params.url}`,
message: `Job already exists for URL ${normalizedParams.url}`,
}
}
throw error

View File

@ -12,10 +12,12 @@ import {
getFileStatsIfExists,
ZIM_STORAGE_PATH,
} from '../utils/fs.js'
import { rewriteDownloadUrl } from '../utils/download_mirrors.js'
import type {
ManifestType,
ZimCategoriesSpec,
MapsSpec,
WikipediaSpec,
CategoryWithStatus,
CollectionWithStatus,
SpecResource,
@ -77,7 +79,7 @@ export class CollectionManifestService {
async getCachedSpec<T>(type: ManifestType): Promise<T | null> {
const manifest = await CollectionManifest.find(type)
if (!manifest) return null
return manifest.spec_data as T
return this.applyDownloadMirrors(type, manifest.spec_data) as T
}
async getSpecWithFallback<T>(type: ManifestType): Promise<T | null> {
@ -170,6 +172,48 @@ export class CollectionManifestService {
return undefined
}
private applyDownloadMirrors(
type: ManifestType,
spec: ZimCategoriesSpec | MapsSpec | WikipediaSpec
): ZimCategoriesSpec | MapsSpec | WikipediaSpec {
if (type === 'zim_categories') {
return {
...spec,
categories: (spec as ZimCategoriesSpec).categories.map((category) => ({
...category,
tiers: category.tiers.map((tier) => ({
...tier,
resources: tier.resources.map((resource) => ({
...resource,
url: rewriteDownloadUrl(resource.url),
})),
})),
})),
}
}
if (type === 'maps') {
return {
...spec,
collections: (spec as MapsSpec).collections.map((collection) => ({
...collection,
resources: collection.resources.map((resource) => ({
...resource,
url: rewriteDownloadUrl(resource.url),
})),
})),
}
}
return {
...spec,
options: (spec as WikipediaSpec).options.map((option) => ({
...option,
url: option.url ? rewriteDownloadUrl(option.url) : option.url,
})),
}
}
// ---- Filename parsing ----
static parseZimFilename(filename: string): { resource_id: string; version: string } | null {

View File

@ -4,6 +4,7 @@ import axios from 'axios'
import InstalledResource from '#models/installed_resource'
import { RunDownloadJob } from '../jobs/run_download_job.js'
import { ZIM_STORAGE_PATH } from '../utils/fs.js'
import { rewriteDownloadUrl } from '../utils/download_mirrors.js'
import { join } from 'path'
import type {
ResourceUpdateCheckRequest,
@ -49,12 +50,15 @@ export class CollectionUpdateService {
timeout: 15000,
})
logger.info(
`[CollectionUpdateService] Update check complete: ${response.data.length} update(s) available`
)
const updates = response.data.map((update) => ({
...update,
download_url: rewriteDownloadUrl(update.download_url),
}))
logger.info(`[CollectionUpdateService] Update check complete: ${updates.length} update(s) available`)
return {
updates: response.data,
updates,
checked_at: new Date().toISOString(),
}
} catch (error) {
@ -82,8 +86,10 @@ export class CollectionUpdateService {
async applyUpdate(
update: ResourceUpdateInfo
): Promise<{ success: boolean; jobId?: string; error?: string }> {
const downloadUrl = rewriteDownloadUrl(update.download_url)
// Check if a download is already in progress for this URL
const existingJob = await RunDownloadJob.getByUrl(update.download_url)
const existingJob = await RunDownloadJob.getByUrl(downloadUrl)
if (existingJob) {
const state = await existingJob.getState()
if (state === 'active' || state === 'waiting' || state === 'delayed') {
@ -98,7 +104,7 @@ export class CollectionUpdateService {
const filepath = this.buildFilepath(update, filename)
const result = await RunDownloadJob.dispatch({
url: update.download_url,
url: downloadUrl,
filepath,
timeout: 30000,
allowedMimeTypes:

View File

@ -4,6 +4,7 @@ import logger from '@adonisjs/core/services/logger'
import { inject } from '@adonisjs/core'
import transmit from '@adonisjs/transmit/services/main'
import { doResumableDownloadWithRetry } from '../utils/downloads.js'
import { rewriteDownloadUrl } from '../utils/download_mirrors.js'
import { join } from 'path'
import { ZIM_STORAGE_PATH } from '../utils/fs.js'
import { SERVICE_NAMES } from '../../constants/service_names.js'
@ -614,8 +615,9 @@ export class DockerService {
* At least one .zim file must be available before we can start the kiwix container.
* We'll download the lightweight mini Wikipedia Top 100 zim file for this purpose.
**/
const WIKIPEDIA_ZIM_URL =
const WIKIPEDIA_ZIM_URL = rewriteDownloadUrl(
'https://github.com/Crosstalk-Solutions/project-nomad/raw/refs/heads/main/install/wikipedia_en_100_mini_2025-06.zim'
)
const filename = 'wikipedia_en_100_mini_2025-06.zim'
const filepath = join(process.cwd(), ZIM_STORAGE_PATH, filename)
logger.info(`[DockerService] Kiwix Serve pre-install: Downloading ZIM file to ${filepath}`)

View File

@ -13,6 +13,7 @@ import {
getFile,
ensureDirectoryExists,
} from '../utils/fs.js'
import { rewriteDownloadUrl } from '../utils/download_mirrors.js'
import { join, resolve, sep } from 'path'
import urlJoin from 'url-join'
import { RunDownloadJob } from '#jobs/run_download_job'
@ -61,9 +62,9 @@ export class MapService implements IMapService {
'https://github.com/Crosstalk-Solutions/project-nomad-maps/raw/refs/heads/master/'
)
const resolvedURL = url ? new URL(url) : defaultTarFileURL
const resolvedURL = rewriteDownloadUrl((url ? new URL(url) : defaultTarFileURL).toString())
await doResumableDownloadWithRetry({
url: resolvedURL.toString(),
url: resolvedURL,
filepath: tempTarPath,
timeout: 30000,
max_retries: 2,
@ -245,7 +246,7 @@ export class MapService implements IMapService {
// Perform a HEAD request to get the content length
const { default: axios } = await import('axios')
const response = await axios.head(url)
const response = await axios.head(rewriteDownloadUrl(url))
if (response.status !== 200) {
throw new Error(`Failed to fetch file info: ${response.status} ${response.statusText}`)

View File

@ -16,19 +16,17 @@ import {
listDirectoryContents,
ZIM_STORAGE_PATH,
} from '../utils/fs.js'
import { rewriteDownloadUrl } from '../utils/download_mirrors.js'
import { join, resolve, sep } from 'path'
import { WikipediaOption, WikipediaState } from '../../types/downloads.js'
import vine from '@vinejs/vine'
import { wikipediaOptionsFileSchema } from '#validators/curated_collections'
import WikipediaSelection from '#models/wikipedia_selection'
import InstalledResource from '#models/installed_resource'
import { RunDownloadJob } from '#jobs/run_download_job'
import { SERVICE_NAMES } from '../../constants/service_names.js'
import { CollectionManifestService } from './collection_manifest_service.js'
import type { CategoryWithStatus } from '../../types/collections.js'
import type { CategoryWithStatus, WikipediaSpec } from '../../types/collections.js'
const ZIM_MIME_TYPES = ['application/x-zim', 'application/x-openzim', 'application/octet-stream']
const WIKIPEDIA_OPTIONS_URL = 'https://raw.githubusercontent.com/Crosstalk-Solutions/project-nomad/refs/heads/main/collections/wikipedia.json'
@inject()
export class ZimService {
@ -106,7 +104,9 @@ export class ZimService {
}
// downloadLink['href'] will end with .meta4, we need to remove that to get the actual download URL
const download_url = downloadLink['href'].substring(0, downloadLink['href'].length - 6)
const download_url = rewriteDownloadUrl(
downloadLink['href'].substring(0, downloadLink['href'].length - 6)
)
const file_name = download_url.split('/').pop() || `${entry.title}.zim`
const sizeBytes = parseInt(downloadLink['length'], 10)
@ -361,20 +361,14 @@ export class ZimService {
// Wikipedia selector methods
async getWikipediaOptions(): Promise<WikipediaOption[]> {
try {
const response = await axios.get(WIKIPEDIA_OPTIONS_URL)
const data = response.data
const validated = await vine.validate({
schema: wikipediaOptionsFileSchema,
data,
})
return validated.options
} catch (error) {
logger.error(`[ZimService] Failed to fetch Wikipedia options:`, error)
const manifestService = new CollectionManifestService()
const spec = await manifestService.getSpecWithFallback<WikipediaSpec>('wikipedia')
if (!spec) {
logger.error('[ZimService] Failed to fetch Wikipedia options: no spec available')
throw new Error('Failed to fetch Wikipedia options')
}
return spec.options
}
async getWikipediaSelection(): Promise<WikipediaSelection | null> {

View File

@ -0,0 +1,140 @@
type DownloadMirrorRule = {
source: string
target: string
}
let cachedMirrorRulesRaw: string | undefined
let cachedMirrorRules: DownloadMirrorRule[] = []
function normalizeMirrorPrefix(urlString: string): string {
const parsed = new URL(urlString)
parsed.search = ''
parsed.hash = ''
return parsed.toString()
}
function joinMirrorUrl(targetPrefix: string, suffix: string): string {
if (!suffix) return targetPrefix
if (targetPrefix.endsWith('/') && suffix.startsWith('/')) {
return `${targetPrefix}${suffix.slice(1)}`
}
if (!targetPrefix.endsWith('/') && !suffix.startsWith('/')) {
return `${targetPrefix}/${suffix}`
}
return `${targetPrefix}${suffix}`
}
function normalizeDownloadMirrorRule(source: string, target: string): DownloadMirrorRule {
const normalizedSource = normalizeMirrorPrefix(source)
const normalizedTarget = normalizeMirrorPrefix(target)
return {
source: normalizedSource,
target: normalizedTarget,
}
}
export function parseDownloadMirrorRules(raw?: string | null): DownloadMirrorRule[] {
if (!raw?.trim()) {
return []
}
let parsed: unknown
try {
parsed = JSON.parse(raw)
} catch (error) {
console.warn(
`[download_mirrors] Ignoring invalid DOWNLOAD_MIRROR_RULES JSON: ${
error instanceof Error ? error.message : String(error)
}`
)
return []
}
const rules: DownloadMirrorRule[] = []
if (Array.isArray(parsed)) {
for (const entry of parsed) {
if (
typeof entry !== 'object' ||
entry === null ||
!('source' in entry) ||
!('target' in entry) ||
typeof entry.source !== 'string' ||
typeof entry.target !== 'string'
) {
console.warn('[download_mirrors] Ignoring malformed mirror rule in DOWNLOAD_MIRROR_RULES array')
continue
}
try {
rules.push(normalizeDownloadMirrorRule(entry.source, entry.target))
} catch (error) {
console.warn(
`[download_mirrors] Ignoring invalid mirror rule ${JSON.stringify(entry)}: ${
error instanceof Error ? error.message : String(error)
}`
)
}
}
} else if (parsed && typeof parsed === 'object') {
for (const [source, target] of Object.entries(parsed as Record<string, unknown>)) {
if (typeof target !== 'string') {
console.warn(
`[download_mirrors] Ignoring mirror rule for ${source}: target must be a string`
)
continue
}
try {
rules.push(normalizeDownloadMirrorRule(source, target))
} catch (error) {
console.warn(
`[download_mirrors] Ignoring invalid mirror rule ${source}: ${
error instanceof Error ? error.message : String(error)
}`
)
}
}
} else {
console.warn(
'[download_mirrors] Ignoring DOWNLOAD_MIRROR_RULES because it must be a JSON object or array'
)
return []
}
return rules.sort((a, b) => b.source.length - a.source.length)
}
export function rewriteDownloadUrlWithRules(url: string, rules: DownloadMirrorRule[]): string {
for (const rule of rules) {
if (!url.startsWith(rule.source)) {
continue
}
return joinMirrorUrl(rule.target, url.slice(rule.source.length))
}
return url
}
export function getConfiguredDownloadMirrorRules(): DownloadMirrorRule[] {
const raw = process.env.DOWNLOAD_MIRROR_RULES
if (raw === cachedMirrorRulesRaw) {
return cachedMirrorRules
}
cachedMirrorRulesRaw = raw
cachedMirrorRules = parseDownloadMirrorRules(raw)
return cachedMirrorRules
}
export function rewriteDownloadUrl(url: string): string {
return rewriteDownloadUrlWithRules(url, getConfiguredDownloadMirrorRules())
}
export type { DownloadMirrorRule }

View File

@ -19,6 +19,7 @@ export default await Env.create(new URL('../', import.meta.url), {
URL: Env.schema.string(),
LOG_LEVEL: Env.schema.string(),
INTERNET_STATUS_TEST_URL: Env.schema.string.optional(),
DOWNLOAD_MIRROR_RULES: Env.schema.string.optional(),
/*
|----------------------------------------------------------

View File

@ -44,6 +44,10 @@ services:
- REDIS_HOST=redis
# If you change the Redis port, make sure to update this accordingly
- REDIS_PORT=6379
# Optional: rewrite one or more download URL prefixes to mirrors.
# Example:
# - DOWNLOAD_MIRROR_RULES={"https://download.kiwix.org/zim/":"https://cdimage.debian.org/mirror/kiwix.org/zim/"}
- DOWNLOAD_MIRROR_RULES=
depends_on:
mysql:
condition: service_healthy
@ -117,4 +121,4 @@ services:
volumes:
nomad-update-shared:
driver: local
driver: local