mirror of
https://github.com/Crosstalk-Solutions/project-nomad.git
synced 2026-03-30 21:49:26 +02:00
add configurable download mirrors
This commit is contained in:
parent
efe6af9b24
commit
d21c6b6855
|
|
@ -15,4 +15,8 @@ REDIS_PORT=6379
|
|||
# Storage path for NOMAD content (ZIM files, maps, etc.)
|
||||
# On Windows dev, use an absolute path like: C:/nomad-storage
|
||||
# On Linux production, use: /opt/project-nomad/storage
|
||||
NOMAD_STORAGE_PATH=/opt/project-nomad/storage
|
||||
NOMAD_STORAGE_PATH=/opt/project-nomad/storage
|
||||
# Optional: rewrite download URLs to one or more mirrors using JSON source-prefix => target-prefix mappings.
|
||||
# Example for Kiwix:
|
||||
# DOWNLOAD_MIRROR_RULES={"https://download.kiwix.org/zim/":"https://cdimage.debian.org/mirror/kiwix.org/zim/"}
|
||||
DOWNLOAD_MIRROR_RULES=
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ import { DockerService } from '#services/docker_service'
|
|||
import { ZimService } from '#services/zim_service'
|
||||
import { MapService } from '#services/map_service'
|
||||
import { EmbedFileJob } from './embed_file_job.js'
|
||||
import { rewriteDownloadUrl } from '../utils/download_mirrors.js'
|
||||
|
||||
export class RunDownloadJob {
|
||||
static get queue() {
|
||||
|
|
@ -18,7 +19,7 @@ export class RunDownloadJob {
|
|||
}
|
||||
|
||||
static getJobId(url: string): string {
|
||||
return createHash('sha256').update(url).digest('hex').slice(0, 16)
|
||||
return createHash('sha256').update(rewriteDownloadUrl(url)).digest('hex').slice(0, 16)
|
||||
}
|
||||
|
||||
async handle(job: Job) {
|
||||
|
|
@ -124,10 +125,11 @@ export class RunDownloadJob {
|
|||
static async dispatch(params: RunDownloadJobParams) {
|
||||
const queueService = new QueueService()
|
||||
const queue = queueService.getQueue(this.queue)
|
||||
const jobId = this.getJobId(params.url)
|
||||
const normalizedParams = { ...params, url: rewriteDownloadUrl(params.url) }
|
||||
const jobId = this.getJobId(normalizedParams.url)
|
||||
|
||||
try {
|
||||
const job = await queue.add(this.key, params, {
|
||||
const job = await queue.add(this.key, normalizedParams, {
|
||||
jobId,
|
||||
attempts: 3,
|
||||
backoff: { type: 'exponential', delay: 2000 },
|
||||
|
|
@ -137,7 +139,7 @@ export class RunDownloadJob {
|
|||
return {
|
||||
job,
|
||||
created: true,
|
||||
message: `Dispatched download job for URL ${params.url}`,
|
||||
message: `Dispatched download job for URL ${normalizedParams.url}`,
|
||||
}
|
||||
} catch (error) {
|
||||
if (error.message.includes('job already exists')) {
|
||||
|
|
@ -145,7 +147,7 @@ export class RunDownloadJob {
|
|||
return {
|
||||
job: existing,
|
||||
created: false,
|
||||
message: `Job already exists for URL ${params.url}`,
|
||||
message: `Job already exists for URL ${normalizedParams.url}`,
|
||||
}
|
||||
}
|
||||
throw error
|
||||
|
|
|
|||
|
|
@ -12,10 +12,12 @@ import {
|
|||
getFileStatsIfExists,
|
||||
ZIM_STORAGE_PATH,
|
||||
} from '../utils/fs.js'
|
||||
import { rewriteDownloadUrl } from '../utils/download_mirrors.js'
|
||||
import type {
|
||||
ManifestType,
|
||||
ZimCategoriesSpec,
|
||||
MapsSpec,
|
||||
WikipediaSpec,
|
||||
CategoryWithStatus,
|
||||
CollectionWithStatus,
|
||||
SpecResource,
|
||||
|
|
@ -77,7 +79,7 @@ export class CollectionManifestService {
|
|||
async getCachedSpec<T>(type: ManifestType): Promise<T | null> {
|
||||
const manifest = await CollectionManifest.find(type)
|
||||
if (!manifest) return null
|
||||
return manifest.spec_data as T
|
||||
return this.applyDownloadMirrors(type, manifest.spec_data) as T
|
||||
}
|
||||
|
||||
async getSpecWithFallback<T>(type: ManifestType): Promise<T | null> {
|
||||
|
|
@ -170,6 +172,48 @@ export class CollectionManifestService {
|
|||
return undefined
|
||||
}
|
||||
|
||||
private applyDownloadMirrors(
|
||||
type: ManifestType,
|
||||
spec: ZimCategoriesSpec | MapsSpec | WikipediaSpec
|
||||
): ZimCategoriesSpec | MapsSpec | WikipediaSpec {
|
||||
if (type === 'zim_categories') {
|
||||
return {
|
||||
...spec,
|
||||
categories: (spec as ZimCategoriesSpec).categories.map((category) => ({
|
||||
...category,
|
||||
tiers: category.tiers.map((tier) => ({
|
||||
...tier,
|
||||
resources: tier.resources.map((resource) => ({
|
||||
...resource,
|
||||
url: rewriteDownloadUrl(resource.url),
|
||||
})),
|
||||
})),
|
||||
})),
|
||||
}
|
||||
}
|
||||
|
||||
if (type === 'maps') {
|
||||
return {
|
||||
...spec,
|
||||
collections: (spec as MapsSpec).collections.map((collection) => ({
|
||||
...collection,
|
||||
resources: collection.resources.map((resource) => ({
|
||||
...resource,
|
||||
url: rewriteDownloadUrl(resource.url),
|
||||
})),
|
||||
})),
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
...spec,
|
||||
options: (spec as WikipediaSpec).options.map((option) => ({
|
||||
...option,
|
||||
url: option.url ? rewriteDownloadUrl(option.url) : option.url,
|
||||
})),
|
||||
}
|
||||
}
|
||||
|
||||
// ---- Filename parsing ----
|
||||
|
||||
static parseZimFilename(filename: string): { resource_id: string; version: string } | null {
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ import axios from 'axios'
|
|||
import InstalledResource from '#models/installed_resource'
|
||||
import { RunDownloadJob } from '../jobs/run_download_job.js'
|
||||
import { ZIM_STORAGE_PATH } from '../utils/fs.js'
|
||||
import { rewriteDownloadUrl } from '../utils/download_mirrors.js'
|
||||
import { join } from 'path'
|
||||
import type {
|
||||
ResourceUpdateCheckRequest,
|
||||
|
|
@ -49,12 +50,15 @@ export class CollectionUpdateService {
|
|||
timeout: 15000,
|
||||
})
|
||||
|
||||
logger.info(
|
||||
`[CollectionUpdateService] Update check complete: ${response.data.length} update(s) available`
|
||||
)
|
||||
const updates = response.data.map((update) => ({
|
||||
...update,
|
||||
download_url: rewriteDownloadUrl(update.download_url),
|
||||
}))
|
||||
|
||||
logger.info(`[CollectionUpdateService] Update check complete: ${updates.length} update(s) available`)
|
||||
|
||||
return {
|
||||
updates: response.data,
|
||||
updates,
|
||||
checked_at: new Date().toISOString(),
|
||||
}
|
||||
} catch (error) {
|
||||
|
|
@ -82,8 +86,10 @@ export class CollectionUpdateService {
|
|||
async applyUpdate(
|
||||
update: ResourceUpdateInfo
|
||||
): Promise<{ success: boolean; jobId?: string; error?: string }> {
|
||||
const downloadUrl = rewriteDownloadUrl(update.download_url)
|
||||
|
||||
// Check if a download is already in progress for this URL
|
||||
const existingJob = await RunDownloadJob.getByUrl(update.download_url)
|
||||
const existingJob = await RunDownloadJob.getByUrl(downloadUrl)
|
||||
if (existingJob) {
|
||||
const state = await existingJob.getState()
|
||||
if (state === 'active' || state === 'waiting' || state === 'delayed') {
|
||||
|
|
@ -98,7 +104,7 @@ export class CollectionUpdateService {
|
|||
const filepath = this.buildFilepath(update, filename)
|
||||
|
||||
const result = await RunDownloadJob.dispatch({
|
||||
url: update.download_url,
|
||||
url: downloadUrl,
|
||||
filepath,
|
||||
timeout: 30000,
|
||||
allowedMimeTypes:
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ import logger from '@adonisjs/core/services/logger'
|
|||
import { inject } from '@adonisjs/core'
|
||||
import transmit from '@adonisjs/transmit/services/main'
|
||||
import { doResumableDownloadWithRetry } from '../utils/downloads.js'
|
||||
import { rewriteDownloadUrl } from '../utils/download_mirrors.js'
|
||||
import { join } from 'path'
|
||||
import { ZIM_STORAGE_PATH } from '../utils/fs.js'
|
||||
import { SERVICE_NAMES } from '../../constants/service_names.js'
|
||||
|
|
@ -614,8 +615,9 @@ export class DockerService {
|
|||
* At least one .zim file must be available before we can start the kiwix container.
|
||||
* We'll download the lightweight mini Wikipedia Top 100 zim file for this purpose.
|
||||
**/
|
||||
const WIKIPEDIA_ZIM_URL =
|
||||
const WIKIPEDIA_ZIM_URL = rewriteDownloadUrl(
|
||||
'https://github.com/Crosstalk-Solutions/project-nomad/raw/refs/heads/main/install/wikipedia_en_100_mini_2025-06.zim'
|
||||
)
|
||||
const filename = 'wikipedia_en_100_mini_2025-06.zim'
|
||||
const filepath = join(process.cwd(), ZIM_STORAGE_PATH, filename)
|
||||
logger.info(`[DockerService] Kiwix Serve pre-install: Downloading ZIM file to ${filepath}`)
|
||||
|
|
|
|||
|
|
@ -13,6 +13,7 @@ import {
|
|||
getFile,
|
||||
ensureDirectoryExists,
|
||||
} from '../utils/fs.js'
|
||||
import { rewriteDownloadUrl } from '../utils/download_mirrors.js'
|
||||
import { join, resolve, sep } from 'path'
|
||||
import urlJoin from 'url-join'
|
||||
import { RunDownloadJob } from '#jobs/run_download_job'
|
||||
|
|
@ -61,9 +62,9 @@ export class MapService implements IMapService {
|
|||
'https://github.com/Crosstalk-Solutions/project-nomad-maps/raw/refs/heads/master/'
|
||||
)
|
||||
|
||||
const resolvedURL = url ? new URL(url) : defaultTarFileURL
|
||||
const resolvedURL = rewriteDownloadUrl((url ? new URL(url) : defaultTarFileURL).toString())
|
||||
await doResumableDownloadWithRetry({
|
||||
url: resolvedURL.toString(),
|
||||
url: resolvedURL,
|
||||
filepath: tempTarPath,
|
||||
timeout: 30000,
|
||||
max_retries: 2,
|
||||
|
|
@ -245,7 +246,7 @@ export class MapService implements IMapService {
|
|||
|
||||
// Perform a HEAD request to get the content length
|
||||
const { default: axios } = await import('axios')
|
||||
const response = await axios.head(url)
|
||||
const response = await axios.head(rewriteDownloadUrl(url))
|
||||
|
||||
if (response.status !== 200) {
|
||||
throw new Error(`Failed to fetch file info: ${response.status} ${response.statusText}`)
|
||||
|
|
|
|||
|
|
@ -16,19 +16,17 @@ import {
|
|||
listDirectoryContents,
|
||||
ZIM_STORAGE_PATH,
|
||||
} from '../utils/fs.js'
|
||||
import { rewriteDownloadUrl } from '../utils/download_mirrors.js'
|
||||
import { join, resolve, sep } from 'path'
|
||||
import { WikipediaOption, WikipediaState } from '../../types/downloads.js'
|
||||
import vine from '@vinejs/vine'
|
||||
import { wikipediaOptionsFileSchema } from '#validators/curated_collections'
|
||||
import WikipediaSelection from '#models/wikipedia_selection'
|
||||
import InstalledResource from '#models/installed_resource'
|
||||
import { RunDownloadJob } from '#jobs/run_download_job'
|
||||
import { SERVICE_NAMES } from '../../constants/service_names.js'
|
||||
import { CollectionManifestService } from './collection_manifest_service.js'
|
||||
import type { CategoryWithStatus } from '../../types/collections.js'
|
||||
import type { CategoryWithStatus, WikipediaSpec } from '../../types/collections.js'
|
||||
|
||||
const ZIM_MIME_TYPES = ['application/x-zim', 'application/x-openzim', 'application/octet-stream']
|
||||
const WIKIPEDIA_OPTIONS_URL = 'https://raw.githubusercontent.com/Crosstalk-Solutions/project-nomad/refs/heads/main/collections/wikipedia.json'
|
||||
|
||||
@inject()
|
||||
export class ZimService {
|
||||
|
|
@ -106,7 +104,9 @@ export class ZimService {
|
|||
}
|
||||
|
||||
// downloadLink['href'] will end with .meta4, we need to remove that to get the actual download URL
|
||||
const download_url = downloadLink['href'].substring(0, downloadLink['href'].length - 6)
|
||||
const download_url = rewriteDownloadUrl(
|
||||
downloadLink['href'].substring(0, downloadLink['href'].length - 6)
|
||||
)
|
||||
const file_name = download_url.split('/').pop() || `${entry.title}.zim`
|
||||
const sizeBytes = parseInt(downloadLink['length'], 10)
|
||||
|
||||
|
|
@ -361,20 +361,14 @@ export class ZimService {
|
|||
// Wikipedia selector methods
|
||||
|
||||
async getWikipediaOptions(): Promise<WikipediaOption[]> {
|
||||
try {
|
||||
const response = await axios.get(WIKIPEDIA_OPTIONS_URL)
|
||||
const data = response.data
|
||||
|
||||
const validated = await vine.validate({
|
||||
schema: wikipediaOptionsFileSchema,
|
||||
data,
|
||||
})
|
||||
|
||||
return validated.options
|
||||
} catch (error) {
|
||||
logger.error(`[ZimService] Failed to fetch Wikipedia options:`, error)
|
||||
const manifestService = new CollectionManifestService()
|
||||
const spec = await manifestService.getSpecWithFallback<WikipediaSpec>('wikipedia')
|
||||
if (!spec) {
|
||||
logger.error('[ZimService] Failed to fetch Wikipedia options: no spec available')
|
||||
throw new Error('Failed to fetch Wikipedia options')
|
||||
}
|
||||
|
||||
return spec.options
|
||||
}
|
||||
|
||||
async getWikipediaSelection(): Promise<WikipediaSelection | null> {
|
||||
|
|
|
|||
140
admin/app/utils/download_mirrors.ts
Normal file
140
admin/app/utils/download_mirrors.ts
Normal file
|
|
@ -0,0 +1,140 @@
|
|||
type DownloadMirrorRule = {
|
||||
source: string
|
||||
target: string
|
||||
}
|
||||
|
||||
let cachedMirrorRulesRaw: string | undefined
|
||||
let cachedMirrorRules: DownloadMirrorRule[] = []
|
||||
|
||||
function normalizeMirrorPrefix(urlString: string): string {
|
||||
const parsed = new URL(urlString)
|
||||
parsed.search = ''
|
||||
parsed.hash = ''
|
||||
return parsed.toString()
|
||||
}
|
||||
|
||||
function joinMirrorUrl(targetPrefix: string, suffix: string): string {
|
||||
if (!suffix) return targetPrefix
|
||||
|
||||
if (targetPrefix.endsWith('/') && suffix.startsWith('/')) {
|
||||
return `${targetPrefix}${suffix.slice(1)}`
|
||||
}
|
||||
|
||||
if (!targetPrefix.endsWith('/') && !suffix.startsWith('/')) {
|
||||
return `${targetPrefix}/${suffix}`
|
||||
}
|
||||
|
||||
return `${targetPrefix}${suffix}`
|
||||
}
|
||||
|
||||
function normalizeDownloadMirrorRule(source: string, target: string): DownloadMirrorRule {
|
||||
const normalizedSource = normalizeMirrorPrefix(source)
|
||||
const normalizedTarget = normalizeMirrorPrefix(target)
|
||||
|
||||
return {
|
||||
source: normalizedSource,
|
||||
target: normalizedTarget,
|
||||
}
|
||||
}
|
||||
|
||||
export function parseDownloadMirrorRules(raw?: string | null): DownloadMirrorRule[] {
|
||||
if (!raw?.trim()) {
|
||||
return []
|
||||
}
|
||||
|
||||
let parsed: unknown
|
||||
try {
|
||||
parsed = JSON.parse(raw)
|
||||
} catch (error) {
|
||||
console.warn(
|
||||
`[download_mirrors] Ignoring invalid DOWNLOAD_MIRROR_RULES JSON: ${
|
||||
error instanceof Error ? error.message : String(error)
|
||||
}`
|
||||
)
|
||||
return []
|
||||
}
|
||||
|
||||
const rules: DownloadMirrorRule[] = []
|
||||
|
||||
if (Array.isArray(parsed)) {
|
||||
for (const entry of parsed) {
|
||||
if (
|
||||
typeof entry !== 'object' ||
|
||||
entry === null ||
|
||||
!('source' in entry) ||
|
||||
!('target' in entry) ||
|
||||
typeof entry.source !== 'string' ||
|
||||
typeof entry.target !== 'string'
|
||||
) {
|
||||
console.warn('[download_mirrors] Ignoring malformed mirror rule in DOWNLOAD_MIRROR_RULES array')
|
||||
continue
|
||||
}
|
||||
|
||||
try {
|
||||
rules.push(normalizeDownloadMirrorRule(entry.source, entry.target))
|
||||
} catch (error) {
|
||||
console.warn(
|
||||
`[download_mirrors] Ignoring invalid mirror rule ${JSON.stringify(entry)}: ${
|
||||
error instanceof Error ? error.message : String(error)
|
||||
}`
|
||||
)
|
||||
}
|
||||
}
|
||||
} else if (parsed && typeof parsed === 'object') {
|
||||
for (const [source, target] of Object.entries(parsed as Record<string, unknown>)) {
|
||||
if (typeof target !== 'string') {
|
||||
console.warn(
|
||||
`[download_mirrors] Ignoring mirror rule for ${source}: target must be a string`
|
||||
)
|
||||
continue
|
||||
}
|
||||
|
||||
try {
|
||||
rules.push(normalizeDownloadMirrorRule(source, target))
|
||||
} catch (error) {
|
||||
console.warn(
|
||||
`[download_mirrors] Ignoring invalid mirror rule ${source}: ${
|
||||
error instanceof Error ? error.message : String(error)
|
||||
}`
|
||||
)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
console.warn(
|
||||
'[download_mirrors] Ignoring DOWNLOAD_MIRROR_RULES because it must be a JSON object or array'
|
||||
)
|
||||
return []
|
||||
}
|
||||
|
||||
return rules.sort((a, b) => b.source.length - a.source.length)
|
||||
}
|
||||
|
||||
export function rewriteDownloadUrlWithRules(url: string, rules: DownloadMirrorRule[]): string {
|
||||
for (const rule of rules) {
|
||||
if (!url.startsWith(rule.source)) {
|
||||
continue
|
||||
}
|
||||
|
||||
return joinMirrorUrl(rule.target, url.slice(rule.source.length))
|
||||
}
|
||||
|
||||
return url
|
||||
}
|
||||
|
||||
export function getConfiguredDownloadMirrorRules(): DownloadMirrorRule[] {
|
||||
const raw = process.env.DOWNLOAD_MIRROR_RULES
|
||||
|
||||
if (raw === cachedMirrorRulesRaw) {
|
||||
return cachedMirrorRules
|
||||
}
|
||||
|
||||
cachedMirrorRulesRaw = raw
|
||||
cachedMirrorRules = parseDownloadMirrorRules(raw)
|
||||
return cachedMirrorRules
|
||||
}
|
||||
|
||||
export function rewriteDownloadUrl(url: string): string {
|
||||
return rewriteDownloadUrlWithRules(url, getConfiguredDownloadMirrorRules())
|
||||
}
|
||||
|
||||
export type { DownloadMirrorRule }
|
||||
|
|
@ -19,6 +19,7 @@ export default await Env.create(new URL('../', import.meta.url), {
|
|||
URL: Env.schema.string(),
|
||||
LOG_LEVEL: Env.schema.string(),
|
||||
INTERNET_STATUS_TEST_URL: Env.schema.string.optional(),
|
||||
DOWNLOAD_MIRROR_RULES: Env.schema.string.optional(),
|
||||
|
||||
/*
|
||||
|----------------------------------------------------------
|
||||
|
|
|
|||
|
|
@ -44,6 +44,10 @@ services:
|
|||
- REDIS_HOST=redis
|
||||
# If you change the Redis port, make sure to update this accordingly
|
||||
- REDIS_PORT=6379
|
||||
# Optional: rewrite one or more download URL prefixes to mirrors.
|
||||
# Example:
|
||||
# - DOWNLOAD_MIRROR_RULES={"https://download.kiwix.org/zim/":"https://cdimage.debian.org/mirror/kiwix.org/zim/"}
|
||||
- DOWNLOAD_MIRROR_RULES=
|
||||
depends_on:
|
||||
mysql:
|
||||
condition: service_healthy
|
||||
|
|
@ -117,4 +121,4 @@ services:
|
|||
|
||||
volumes:
|
||||
nomad-update-shared:
|
||||
driver: local
|
||||
driver: local
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user