mirror of
https://github.com/Crosstalk-Solutions/project-nomad.git
synced 2026-04-02 14:59:26 +02:00
- Add Kiwix ZIM categories: Deutsch & Österreich (Wikipedia DE, Wiktionary, Wikibooks, Wikivoyage, Medizin, Klexikon, Koch-Wiki, iFixit DE), Militär & Taktik, Kommunikation & Sicherheit, Energie & Off-Grid - Add map collection entries for Europe and Oberösterreich (user-supplied PMTiles) - Add NOMAD_COLLECTIONS_BASE_URL and NOMAD_DATA_PATH env options; collection specs load from configurable base URL - Add install/nomad-data-pdf-urls.txt manifest and download-nomad-data-pdfs.sh for BBK, DGUV, FEMA, WHO, Zivilschutz AT, and related PDFs - Add install/NOMAD-DATA-DOWNLOADS.md with usage and Austria PMTiles extract notes - Update release notes (Unreleased) Made-with: Cursor
54 lines
1.7 KiB
Bash
Executable File
54 lines
1.7 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# Download all PDFs and resources from install/nomad-data-pdf-urls.txt into NOMAD-DATA.
|
|
# Usage: ./install/download-nomad-data-pdfs.sh [TARGET_DIR]
|
|
# TARGET_DIR defaults to ./NOMAD-DATA or $NOMAD_DATA_PATH
|
|
# Requires: wget or curl
|
|
|
|
set -e
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
|
MANIFEST="${SCRIPT_DIR}/nomad-data-pdf-urls.txt"
|
|
TARGET="${1:-${NOMAD_DATA_PATH:-$REPO_ROOT/NOMAD-DATA}}"
|
|
|
|
if [[ ! -f "$MANIFEST" ]]; then
|
|
echo "Manifest not found: $MANIFEST"
|
|
exit 1
|
|
fi
|
|
|
|
mkdir -p "$TARGET"
|
|
cd "$TARGET"
|
|
|
|
if command -v wget &>/dev/null; then
|
|
GET="wget"
|
|
GET_OPTS=(--no-check-certificate -q --show-progress -N)
|
|
elif command -v curl &>/dev/null; then
|
|
GET="curl"
|
|
GET_OPTS=(-fSL -o)
|
|
else
|
|
echo "Need wget or curl."
|
|
exit 1
|
|
fi
|
|
|
|
count=0
|
|
while IFS= read -r line || [[ -n "$line" ]]; do
|
|
line="${line%%#*}"
|
|
line="$(echo "$line" | tr -d '\r' | sed 's/^[[:space:]]*//; s/[[:space:]]*$//')"
|
|
[[ -z "$line" ]] && continue
|
|
subdir="${line%%[[:space:]]*}"
|
|
url="$(echo "${line#*[[:space:]]}" | sed 's/^[[:space:]]*//')"
|
|
[[ -z "$url" || "$url" == "$subdir" ]] && continue
|
|
dir="$TARGET/$subdir"
|
|
mkdir -p "$dir"
|
|
raw_name=$(basename "$(echo "$url" | sed 's/?.*//')")
|
|
filename=$(echo "$raw_name" | sed 's/%20/_/g; s/%2B/+/g')
|
|
[[ -z "$filename" ]] && filename="doc_$(echo "$url" | sha256sum 2>/dev/null | cut -c1-12).pdf"
|
|
if [[ "$GET" == "wget" ]]; then
|
|
(cd "$dir" && wget "${GET_OPTS[@]}" -O "$filename" "$url") || true
|
|
else
|
|
(cd "$dir" && curl -fSL -o "$filename" "$url") || true
|
|
fi
|
|
((count++)) || true
|
|
done < "$MANIFEST"
|
|
|
|
echo "Done. Downloaded/updated up to $count files under $TARGET"
|