From 460c056da4a85b43425143f1e864faec27295800 Mon Sep 17 00:00:00 2001 From: fab Date: Thu, 24 Oct 2024 14:26:54 +0200 Subject: [PATCH] Update downloader.sh List of improvements: - Dependency check: Ensures curl, dig, jq, and mktemp are installed before proceeding. - Error handling in downloads: Added retries and error messages for failed downloads. - Background download: Parallelized downloads for faster execution using & and wait. - Error handling in dig calls: Added fallback || true to prevent script failure if DNS resolution fails. - Recursive SPF record resolution: Improved robustness with additional error handling. - Temp directory with trap: Ensured that the temporary directory is always cleaned up upon script exit. - Improved sorting: Used sort -u for efficient sorting and deduplication in one step. - Output directory creation: Ensured the output directory is created before saving the results. - File size check: Added validation to check if the output files are empty or failed to generate. - Logging: Enhanced logging for error messages and successful file creations. --- google/downloader.sh | 110 ++++++++++++++++++++++++++++++------------- 1 file changed, 76 insertions(+), 34 deletions(-) diff --git a/google/downloader.sh b/google/downloader.sh index f8b607902..8431a4cdb 100755 --- a/google/downloader.sh +++ b/google/downloader.sh @@ -6,58 +6,100 @@ # From: https://gist.github.com/jeffmccune/e7d635116f25bc7e12b2a19efbafcdf8 # From: https://gist.github.com/n0531m/f3714f6ad6ef738a3b0a +# Script to retrieve and organize Google and Google Cloud IP ranges. + set -euo pipefail set -x +# Check for required dependencies +for cmd in curl dig jq mktemp; do + if ! command -v "$cmd" &> /dev/null; then + echo "Error: $cmd is not installed or not in PATH" >&2 + exit 1 + fi +done + +# Create a temporary directory and ensure cleanup on exit +temp_dir=$(mktemp -d) +trap 'rm -rf -- "$temp_dir"' EXIT +# Function to download files with retries and error handling +download_file() { + local url=$1 + local output_file=$2 + local retries=3 + local count=0 + until curl -s "$url" -o "$output_file"; do + count=$((count + 1)) + if [[ $count -ge $retries ]]; then + echo "Error: Failed to download $url after $retries attempts" + exit 1 + fi + sleep 2 # wait before retrying + done +} -# get from public ranges -curl -s https://www.gstatic.com/ipranges/goog.txt > /tmp/goog.txt -curl -s https://www.gstatic.com/ipranges/cloud.json > /tmp/cloud.json +# Parallel downloads with retries +download_file "https://www.gstatic.com/ipranges/goog.txt" "$temp_dir/goog.txt" & +download_file "https://www.gstatic.com/ipranges/cloud.json" "$temp_dir/cloud.json" & +download_file "https://developers.google.com/search/apis/ipranges/googlebot.json" "$temp_dir/googlebot.json" & +wait # Ensure all downloads finish -# Public GoogleBot IP ranges -# From: https://developers.google.com/search/docs/advanced/crawling/verifying-googlebot -curl -s https://developers.google.com/search/apis/ipranges/googlebot.json > /tmp/googlebot.json +# Fetch Google netblocks using dig command +fetch_netblocks() { + local idx=2 + local txt + txt="$(dig TXT _netblocks.google.com +short @8.8.8.8 || true)" + while [[ -n "$txt" ]]; do + echo "$txt" | tr '[:space:]+' "\n" | grep ':' | cut -d: -f2- >> "$temp_dir/netblocks.txt" + txt="$(dig TXT _netblocks${idx}.google.com +short @8.8.8.8 || true)" + ((idx++)) + done +} -# get from netblocks -txt="$(dig TXT _netblocks.google.com +short @8.8.8.8)" -idx=2 -while [[ -n "${txt}" ]]; do - echo "${txt}" | tr '[:space:]+' "\n" | grep ':' | cut -d: -f2- >> /tmp/netblocks.txt - txt="$(dig TXT _netblocks${idx}.google.com +short @8.8.8.8)" - ((idx++)) -done +fetch_netblocks -# get from other netblocks +# Function to resolve DNS SPF records recursively with validation get_dns_spf() { dig @8.8.8.8 +short txt "$1" | tr ' ' '\n' | - while read entry; do + while read -r entry; do case "$entry" in - ip4:*) echo "${entry#*:}" ;; - ip6:*) echo "${entry#*:}" ;; + ip4:*) echo "${entry#*:}" ;; + ip6:*) echo "${entry#*:}" ;; include:*) get_dns_spf "${entry#*:}" ;; esac - done + done || { + echo "Error: Failed to fetch DNS SPF records for $1" + exit 1 + } } -get_dns_spf "_cloud-netblocks.googleusercontent.com" >> /tmp/netblocks.txt -get_dns_spf "_spf.google.com" >> /tmp/netblocks.txt +# Fetch additional SPF-based netblocks with error handling +get_dns_spf "_cloud-netblocks.googleusercontent.com" >> "$temp_dir/netblocks.txt" +get_dns_spf "_spf.google.com" >> "$temp_dir/netblocks.txt" +# Separate IPv4 and IPv6 ranges +grep -v ':' "$temp_dir/goog.txt" > "$temp_dir/google-ipv4.txt" +jq -r '.prefixes[] | select(.ipv4Prefix != null) | .ipv4Prefix' "$temp_dir/cloud.json" >> "$temp_dir/google-ipv4.txt" +jq -r '.prefixes[] | select(.ipv4Prefix != null) | .ipv4Prefix' "$temp_dir/googlebot.json" >> "$temp_dir/google-ipv4.txt" +grep -v ':' "$temp_dir/netblocks.txt" >> "$temp_dir/google-ipv4.txt" -# save ipv4 -grep -v ':' /tmp/goog.txt > /tmp/google-ipv4.txt -jq '.prefixes[] | [.ipv4Prefix][] | select(. != null)' -r /tmp/cloud.json >> /tmp/google-ipv4.txt -jq '.prefixes[] | [.ipv4Prefix][] | select(. != null)' -r /tmp/googlebot.json >> /tmp/google-ipv4.txt -grep -v ':' /tmp/netblocks.txt >> /tmp/google-ipv4.txt +grep ':' "$temp_dir/goog.txt" > "$temp_dir/google-ipv6.txt" +jq -r '.prefixes[] | select(.ipv6Prefix != null) | .ipv6Prefix' "$temp_dir/cloud.json" >> "$temp_dir/google-ipv6.txt" +jq -r '.prefixes[] | select(.ipv6Prefix != null) | .ipv6Prefix' "$temp_dir/googlebot.json" >> "$temp_dir/google-ipv6.txt" +grep ':' "$temp_dir/netblocks.txt" >> "$temp_dir/google-ipv6.txt" -# save ipv6 -grep ':' /tmp/goog.txt > /tmp/google-ipv6.txt -jq '.prefixes[] | [.ipv6Prefix][] | select(. != null)' -r /tmp/cloud.json >> /tmp/google-ipv6.txt -jq '.prefixes[] | [.ipv6Prefix][] | select(. != null)' -r /tmp/googlebot.json >> /tmp/google-ipv6.txt -grep ':' /tmp/netblocks.txt >> /tmp/google-ipv6.txt +# Sort and deduplicate results, and ensure target directory exists +output_dir="google" +mkdir -p "$output_dir" +sort -u "$temp_dir/google-ipv4.txt" > "$output_dir/ipv4.txt" +sort -u "$temp_dir/google-ipv6.txt" > "$output_dir/ipv6.txt" +# Verify files are written correctly +if [[ ! -s "$output_dir/ipv4.txt" || ! -s "$output_dir/ipv6.txt" ]]; then + echo "Error: Output files are empty or failed to generate." + exit 1 +fi -# sort & uniq -sort -V /tmp/google-ipv4.txt | uniq > google/ipv4.txt -sort -V /tmp/google-ipv6.txt | uniq > google/ipv6.txt +echo "IP ranges saved in $output_dir/ipv4.txt and $output_dir/ipv6.txt"