add configurable download mirrors

2026-04-10 02:36:15 +02:00 · 2026-03-24 22:04:45 -04:00 · 2026-03-24 22:04:45 -04:00 · d21c6b6855
commit d21c6b6855
parent efe6af9b24
10 changed files with 233 additions and 35 deletions
--- a/admin/.env.example
+++ b/admin/.env.example
@ -15,4 +15,8 @@ REDIS_PORT=6379
 # Storage path for NOMAD content (ZIM files, maps, etc.)
 # On Windows dev, use an absolute path like: C:/nomad-storage
 # On Linux production, use: /opt/project-nomad/storage
-NOMAD_STORAGE_PATH=/opt/project-nomad/storage
+NOMAD_STORAGE_PATH=/opt/project-nomad/storage
 # Optional: rewrite download URLs to one or more mirrors using JSON source-prefix => target-prefix mappings.
 # Example for Kiwix:
 # DOWNLOAD_MIRROR_RULES={"https://download.kiwix.org/zim/":"https://cdimage.debian.org/mirror/kiwix.org/zim/"}
 DOWNLOAD_MIRROR_RULES=
--- a/admin/app/jobs/run_download_job.ts
+++ b/admin/app/jobs/run_download_job.ts
@ -7,6 +7,7 @@ import { DockerService } from '#services/docker_service'
 import { ZimService } from '#services/zim_service'
 import { MapService } from '#services/map_service'
 import { EmbedFileJob } from './embed_file_job.js'
 import { rewriteDownloadUrl } from '../utils/download_mirrors.js'
 export class RunDownloadJob {
  static get queue() {
@ -18,7 +19,7 @@ export class RunDownloadJob {
  }
  static getJobId(url: string): string {
-    return createHash('sha256').update(url).digest('hex').slice(0, 16)
+    return createHash('sha256').update(rewriteDownloadUrl(url)).digest('hex').slice(0, 16)
  }
  async handle(job: Job) {
@ -124,10 +125,11 @@ export class RunDownloadJob {
  static async dispatch(params: RunDownloadJobParams) {
    const queueService = new QueueService()
    const queue = queueService.getQueue(this.queue)
-    const jobId = this.getJobId(params.url)
+    const normalizedParams = { ...params, url: rewriteDownloadUrl(params.url) }
    const jobId = this.getJobId(normalizedParams.url)
    try {
-      const job = await queue.add(this.key, params, {
+      const job = await queue.add(this.key, normalizedParams, {
        jobId,
        attempts: 3,
        backoff: { type: 'exponential', delay: 2000 },
@ -137,7 +139,7 @@ export class RunDownloadJob {
      return {
        job,
        created: true,
-        message: `Dispatched download job for URL ${params.url}`,
+        message: `Dispatched download job for URL ${normalizedParams.url}`,
      }
    } catch (error) {
      if (error.message.includes('job already exists')) {
@ -145,7 +147,7 @@ export class RunDownloadJob {
        return {
          job: existing,
          created: false,
-          message: `Job already exists for URL ${params.url}`,
+          message: `Job already exists for URL ${normalizedParams.url}`,
        }
      }
      throw error
--- a/admin/app/services/collection_manifest_service.ts
+++ b/admin/app/services/collection_manifest_service.ts
@ -12,10 +12,12 @@ import {
  getFileStatsIfExists,
  ZIM_STORAGE_PATH,
 } from '../utils/fs.js'
 import { rewriteDownloadUrl } from '../utils/download_mirrors.js'
 import type {
  ManifestType,
  ZimCategoriesSpec,
  MapsSpec,
  WikipediaSpec,
  CategoryWithStatus,
  CollectionWithStatus,
  SpecResource,
@ -77,7 +79,7 @@ export class CollectionManifestService {
  async getCachedSpec<T>(type: ManifestType): Promise<T | null> {
    const manifest = await CollectionManifest.find(type)
    if (!manifest) return null
-    return manifest.spec_data as T
+    return this.applyDownloadMirrors(type, manifest.spec_data) as T
  }
  async getSpecWithFallback<T>(type: ManifestType): Promise<T | null> {
@ -170,6 +172,48 @@ export class CollectionManifestService {
    return undefined
  }
  private applyDownloadMirrors(
    type: ManifestType,
    spec: ZimCategoriesSpec | MapsSpec | WikipediaSpec
  ): ZimCategoriesSpec | MapsSpec | WikipediaSpec {
    if (type === 'zim_categories') {
      return {
        ...spec,
        categories: (spec as ZimCategoriesSpec).categories.map((category) => ({
          ...category,
          tiers: category.tiers.map((tier) => ({
            ...tier,
            resources: tier.resources.map((resource) => ({
              ...resource,
              url: rewriteDownloadUrl(resource.url),
            })),
          })),
        })),
      }
    }
    if (type === 'maps') {
      return {
        ...spec,
        collections: (spec as MapsSpec).collections.map((collection) => ({
          ...collection,
          resources: collection.resources.map((resource) => ({
            ...resource,
            url: rewriteDownloadUrl(resource.url),
          })),
        })),
      }
    }
    return {
      ...spec,
      options: (spec as WikipediaSpec).options.map((option) => ({
        ...option,
        url: option.url ? rewriteDownloadUrl(option.url) : option.url,
      })),
    }
  }
  // ---- Filename parsing ----
  static parseZimFilename(filename: string): { resource_id: string; version: string } | null {
--- a/admin/app/services/collection_update_service.ts
+++ b/admin/app/services/collection_update_service.ts
@ -4,6 +4,7 @@ import axios from 'axios'
 import InstalledResource from '#models/installed_resource'
 import { RunDownloadJob } from '../jobs/run_download_job.js'
 import { ZIM_STORAGE_PATH } from '../utils/fs.js'
 import { rewriteDownloadUrl } from '../utils/download_mirrors.js'
 import { join } from 'path'
 import type {
  ResourceUpdateCheckRequest,
@ -49,12 +50,15 @@ export class CollectionUpdateService {
        timeout: 15000,
      })
-      logger.info(
+      const updates = response.data.map((update) => ({
-        `[CollectionUpdateService] Update check complete: ${response.data.length} update(s) available`
+        ...update,
-      )
+        download_url: rewriteDownloadUrl(update.download_url),
      }))
      logger.info(`[CollectionUpdateService] Update check complete: ${updates.length} update(s) available`)
      return {
-        updates: response.data,
+        updates,
        checked_at: new Date().toISOString(),
      }
    } catch (error) {
@ -82,8 +86,10 @@ export class CollectionUpdateService {
  async applyUpdate(
    update: ResourceUpdateInfo
  ): Promise<{ success: boolean; jobId?: string; error?: string }> {
    const downloadUrl = rewriteDownloadUrl(update.download_url)
    // Check if a download is already in progress for this URL
-    const existingJob = await RunDownloadJob.getByUrl(update.download_url)
+    const existingJob = await RunDownloadJob.getByUrl(downloadUrl)
    if (existingJob) {
      const state = await existingJob.getState()
      if (state === 'active' || state === 'waiting' || state === 'delayed') {
@ -98,7 +104,7 @@ export class CollectionUpdateService {
    const filepath = this.buildFilepath(update, filename)
    const result = await RunDownloadJob.dispatch({
-      url: update.download_url,
+      url: downloadUrl,
      filepath,
      timeout: 30000,
      allowedMimeTypes:
--- a/admin/app/services/docker_service.ts
+++ b/admin/app/services/docker_service.ts
@ -4,6 +4,7 @@ import logger from '@adonisjs/core/services/logger'
 import { inject } from '@adonisjs/core'
 import transmit from '@adonisjs/transmit/services/main'
 import { doResumableDownloadWithRetry } from '../utils/downloads.js'
 import { rewriteDownloadUrl } from '../utils/download_mirrors.js'
 import { join } from 'path'
 import { ZIM_STORAGE_PATH } from '../utils/fs.js'
 import { SERVICE_NAMES } from '../../constants/service_names.js'
@ -614,8 +615,9 @@ export class DockerService {
     * At least one .zim file must be available before we can start the kiwix container.
     * We'll download the lightweight mini Wikipedia Top 100 zim file for this purpose.
     **/
-    const WIKIPEDIA_ZIM_URL =
+    const WIKIPEDIA_ZIM_URL = rewriteDownloadUrl(
      'https://github.com/Crosstalk-Solutions/project-nomad/raw/refs/heads/main/install/wikipedia_en_100_mini_2025-06.zim'
    )
    const filename = 'wikipedia_en_100_mini_2025-06.zim'
    const filepath = join(process.cwd(), ZIM_STORAGE_PATH, filename)
    logger.info(`[DockerService] Kiwix Serve pre-install: Downloading ZIM file to ${filepath}`)
--- a/admin/app/services/map_service.ts
+++ b/admin/app/services/map_service.ts
@ -13,6 +13,7 @@ import {
  getFile,
  ensureDirectoryExists,
 } from '../utils/fs.js'
 import { rewriteDownloadUrl } from '../utils/download_mirrors.js'
 import { join, resolve, sep } from 'path'
 import urlJoin from 'url-join'
 import { RunDownloadJob } from '#jobs/run_download_job'
@ -61,9 +62,9 @@ export class MapService implements IMapService {
      'https://github.com/Crosstalk-Solutions/project-nomad-maps/raw/refs/heads/master/'
    )
-    const resolvedURL = url ? new URL(url) : defaultTarFileURL
+    const resolvedURL = rewriteDownloadUrl((url ? new URL(url) : defaultTarFileURL).toString())
    await doResumableDownloadWithRetry({
-      url: resolvedURL.toString(),
+      url: resolvedURL,
      filepath: tempTarPath,
      timeout: 30000,
      max_retries: 2,
@ -245,7 +246,7 @@ export class MapService implements IMapService {
      // Perform a HEAD request to get the content length
      const { default: axios } = await import('axios')
-      const response = await axios.head(url)
+      const response = await axios.head(rewriteDownloadUrl(url))
      if (response.status !== 200) {
        throw new Error(`Failed to fetch file info: ${response.status} ${response.statusText}`)
--- a/admin/app/services/zim_service.ts
+++ b/admin/app/services/zim_service.ts
@ -16,19 +16,17 @@ import {
  listDirectoryContents,
  ZIM_STORAGE_PATH,
 } from '../utils/fs.js'
 import { rewriteDownloadUrl } from '../utils/download_mirrors.js'
 import { join, resolve, sep } from 'path'
 import { WikipediaOption, WikipediaState } from '../../types/downloads.js'
 import vine from '@vinejs/vine'
 import { wikipediaOptionsFileSchema } from '#validators/curated_collections'
 import WikipediaSelection from '#models/wikipedia_selection'
 import InstalledResource from '#models/installed_resource'
 import { RunDownloadJob } from '#jobs/run_download_job'
 import { SERVICE_NAMES } from '../../constants/service_names.js'
 import { CollectionManifestService } from './collection_manifest_service.js'
-import type { CategoryWithStatus } from '../../types/collections.js'
+import type { CategoryWithStatus, WikipediaSpec } from '../../types/collections.js'
 const ZIM_MIME_TYPES = ['application/x-zim', 'application/x-openzim', 'application/octet-stream']
 const WIKIPEDIA_OPTIONS_URL = 'https://raw.githubusercontent.com/Crosstalk-Solutions/project-nomad/refs/heads/main/collections/wikipedia.json'
@inject()
 export class ZimService {
@ -106,7 +104,9 @@ export class ZimService {
      }
      // downloadLink['href'] will end with .meta4, we need to remove that to get the actual download URL
-      const download_url = downloadLink['href'].substring(0, downloadLink['href'].length - 6)
+      const download_url = rewriteDownloadUrl(
        downloadLink['href'].substring(0, downloadLink['href'].length - 6)
      )
      const file_name = download_url.split('/').pop() || `${entry.title}.zim`
      const sizeBytes = parseInt(downloadLink['length'], 10)
@ -361,20 +361,14 @@ export class ZimService {
  // Wikipedia selector methods
  async getWikipediaOptions(): Promise<WikipediaOption[]> {
-    try {
+    const manifestService = new CollectionManifestService()
-      const response = await axios.get(WIKIPEDIA_OPTIONS_URL)
+    const spec = await manifestService.getSpecWithFallback<WikipediaSpec>('wikipedia')
-      const data = response.data
+    if (!spec) {
-
+      logger.error('[ZimService] Failed to fetch Wikipedia options: no spec available')
      const validated = await vine.validate({
        schema: wikipediaOptionsFileSchema,
        data,
      })
      return validated.options
    } catch (error) {
      logger.error(`[ZimService] Failed to fetch Wikipedia options:`, error)
      throw new Error('Failed to fetch Wikipedia options')
    }
    return spec.options
  }
  async getWikipediaSelection(): Promise<WikipediaSelection | null> {
--- a/admin/app/utils/download_mirrors.ts
+++ b/admin/app/utils/download_mirrors.ts
@ -0,0 +1,140 @@
 type DownloadMirrorRule = {
  source: string
  target: string
 }
 let cachedMirrorRulesRaw: string | undefined
 let cachedMirrorRules: DownloadMirrorRule[] = []
 function normalizeMirrorPrefix(urlString: string): string {
  const parsed = new URL(urlString)
  parsed.search = ''
  parsed.hash = ''
  return parsed.toString()
 }
 function joinMirrorUrl(targetPrefix: string, suffix: string): string {
  if (!suffix) return targetPrefix
  if (targetPrefix.endsWith('/') && suffix.startsWith('/')) {
    return `${targetPrefix}${suffix.slice(1)}`
  }
  if (!targetPrefix.endsWith('/') && !suffix.startsWith('/')) {
    return `${targetPrefix}/${suffix}`
  }
  return `${targetPrefix}${suffix}`
 }
 function normalizeDownloadMirrorRule(source: string, target: string): DownloadMirrorRule {
  const normalizedSource = normalizeMirrorPrefix(source)
  const normalizedTarget = normalizeMirrorPrefix(target)
  return {
    source: normalizedSource,
    target: normalizedTarget,
  }
 }
 export function parseDownloadMirrorRules(raw?: string | null): DownloadMirrorRule[] {
  if (!raw?.trim()) {
    return []
  }
  let parsed: unknown
  try {
    parsed = JSON.parse(raw)
  } catch (error) {
    console.warn(
      `[download_mirrors] Ignoring invalid DOWNLOAD_MIRROR_RULES JSON: ${
        error instanceof Error ? error.message : String(error)
      }`
    )
    return []
  }
  const rules: DownloadMirrorRule[] = []
  if (Array.isArray(parsed)) {
    for (const entry of parsed) {
      if (
        typeof entry !== 'object' ||
        entry === null ||
        !('source' in entry) ||
        !('target' in entry) ||
        typeof entry.source !== 'string' ||
        typeof entry.target !== 'string'
      ) {
        console.warn('[download_mirrors] Ignoring malformed mirror rule in DOWNLOAD_MIRROR_RULES array')
        continue
      }
      try {
        rules.push(normalizeDownloadMirrorRule(entry.source, entry.target))
      } catch (error) {
        console.warn(
          `[download_mirrors] Ignoring invalid mirror rule ${JSON.stringify(entry)}: ${
            error instanceof Error ? error.message : String(error)
          }`
        )
      }
    }
  } else if (parsed && typeof parsed === 'object') {
    for (const [source, target] of Object.entries(parsed as Record<string, unknown>)) {
      if (typeof target !== 'string') {
        console.warn(
          `[download_mirrors] Ignoring mirror rule for ${source}: target must be a string`
        )
        continue
      }
      try {
        rules.push(normalizeDownloadMirrorRule(source, target))
      } catch (error) {
        console.warn(
          `[download_mirrors] Ignoring invalid mirror rule ${source}: ${
            error instanceof Error ? error.message : String(error)
          }`
        )
      }
    }
  } else {
    console.warn(
      '[download_mirrors] Ignoring DOWNLOAD_MIRROR_RULES because it must be a JSON object or array'
    )
    return []
  }
  return rules.sort((a, b) => b.source.length - a.source.length)
 }
 export function rewriteDownloadUrlWithRules(url: string, rules: DownloadMirrorRule[]): string {
  for (const rule of rules) {
    if (!url.startsWith(rule.source)) {
      continue
    }
    return joinMirrorUrl(rule.target, url.slice(rule.source.length))
  }
  return url
 }
 export function getConfiguredDownloadMirrorRules(): DownloadMirrorRule[] {
  const raw = process.env.DOWNLOAD_MIRROR_RULES
  if (raw === cachedMirrorRulesRaw) {
    return cachedMirrorRules
  }
  cachedMirrorRulesRaw = raw
  cachedMirrorRules = parseDownloadMirrorRules(raw)
  return cachedMirrorRules
 }
 export function rewriteDownloadUrl(url: string): string {
  return rewriteDownloadUrlWithRules(url, getConfiguredDownloadMirrorRules())
 }
 export type { DownloadMirrorRule }
--- a/admin/start/env.ts
+++ b/admin/start/env.ts
@ -19,6 +19,7 @@ export default await Env.create(new URL('../', import.meta.url), {
  URL: Env.schema.string(),
  LOG_LEVEL: Env.schema.string(),
  INTERNET_STATUS_TEST_URL: Env.schema.string.optional(),
  DOWNLOAD_MIRROR_RULES: Env.schema.string.optional(),
  /*
  |----------------------------------------------------------
--- a/install/management_compose.yaml
+++ b/install/management_compose.yaml
@ -44,6 +44,10 @@ services:
      - REDIS_HOST=redis
      # If you change the Redis port, make sure to update this accordingly
      - REDIS_PORT=6379
      # Optional: rewrite one or more download URL prefixes to mirrors.
      # Example:
      # - DOWNLOAD_MIRROR_RULES={"https://download.kiwix.org/zim/":"https://cdimage.debian.org/mirror/kiwix.org/zim/"}
      - DOWNLOAD_MIRROR_RULES=
    depends_on:
      mysql:
        condition: service_healthy
@ -117,4 +121,4 @@ services:
 volumes:
  nomad-update-shared:
-    driver: local
+    driver: local