diff --git a/.github/workflows/validate-collection-urls.yml b/.github/workflows/validate-collection-urls.yml new file mode 100644 index 0000000..70c15fd --- /dev/null +++ b/.github/workflows/validate-collection-urls.yml @@ -0,0 +1,58 @@ +name: Validate Collection URLs + +on: + push: + paths: + - 'collections/**.json' + pull_request: + paths: + - 'collections/**.json' + +jobs: + validate-urls: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Extract and validate URLs + run: | + FAILED=0 + CHECKED=0 + FAILED_URLS="" + + # Recursively extract all non-null string URLs from every JSON file in collections/ + URLS=$(jq -r '.. | .url? | select(type == "string")' collections/*.json | sort -u) + + while IFS= read -r url; do + [ -z "$url" ] && continue + CHECKED=$((CHECKED + 1)) + printf "Checking: %s ... " "$url" + + # Use Range: bytes=0-0 to avoid downloading the full file. + # --max-filesize 1 aborts early if the server ignores the Range header + # and returns 200 with the full body. The HTTP status is still captured. + HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" \ + --range 0-0 \ + --max-filesize 1 \ + --max-time 30 \ + --location \ + "$url") + + if [ "$HTTP_CODE" = "200" ] || [ "$HTTP_CODE" = "206" ]; then + echo "OK ($HTTP_CODE)" + else + echo "FAILED ($HTTP_CODE)" + FAILED=$((FAILED + 1)) + FAILED_URLS="$FAILED_URLS\n - $url (HTTP $HTTP_CODE)" + fi + done <<< "$URLS" + + echo "" + echo "Checked $CHECKED URLs, $FAILED failed." + + if [ "$FAILED" -gt 0 ]; then + echo "" + echo "Broken URLs:" + printf "%b\n" "$FAILED_URLS" + exit 1 + fi