name: Check Links in README on: workflow_dispatch: jobs: check-links: runs-on: ubuntu-latest steps: - name: Checkout repository uses: actions/checkout@v4 - name: Check all links in markdown files run: | #!/bin/bash set -o pipefail echo "" echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" echo "🔍 Checking ALL Links in All Markdown Files" echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" echo "" # Find all markdown files find . -name "*.md" -type f | sort > all_markdown_files.txt total_files=$(wc -l < all_markdown_files.txt) echo "📊 Found $total_files markdown files to check" echo "" has_errors=0 # Process each markdown file while IFS= read -r md_file; do echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" echo "📄 Checking: $md_file" echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" file_has_errors=0 # Extract ALL links from the markdown file # Match both []() and direct URLs - using Perl regex for better handling { grep -oP '\]\(([^\)]+)\)' "$md_file" 2>/dev/null | sed 's/](\(.*\))/\1/' || true grep -oP '!\[[^\]]*\]\(([^\)]+)\)' "$md_file" 2>/dev/null | sed 's/!\[.*\](\(.*\))/\1/' || true } > /tmp/links_$$.txt link_count=$(wc -l < /tmp/links_$$.txt 2>/dev/null || echo "0") if [ $link_count -eq 0 ]; then echo " ℹ️ No links found in this file" echo "" continue fi echo " 📊 Found $link_count links to check" echo "" # Check each link while IFS= read -r link; do [ -z "$link" ] && continue # Decode URL-encoded characters decoded_link=$(echo "$link" | sed 's/%20/ /g' | sed 's/%23/#/g' | sed 's/%28/(/g' | sed 's/%29/)/g' | sed 's/%E2%80%A2/•/g' | sed 's/%E1%9A%96/ᚖ/g' | sed 's/%E3%80%8A/《/g' | sed 's/%E3%80%8B/》/g' | sed 's/%E3%80%8E/『/g' | sed 's/%E3%80%8F/』/g' | sed 's/%E2%9B%94/⛔/g' | sed 's/%E2%9C%A8/✨/g' | sed 's/%7B/{/g' | sed 's/%7D/}/g' | sed 's/%2B/+/g' | sed 's/%E3%83%86/テ/g' | sed 's/%E3%83%B3/ン/g' | sed 's/%E3%83%8D/ネ/g' | sed 's/%E3%82%B9/ス/g' | sed 's/%E3%82%A4/イ/g' | sed 's/%E3%83%BB/・/g' | sed 's/%E3%83%95/フ/g' | sed 's/%E3%83%AA/リ/g' | sed 's/%E3%83%BC/ー/g' | sed 's/%E3%83%8A/ナ/g' | sed 's/%5B/[/g' | sed 's/%5D/]/g') # Check if it's an external URL if [[ "$decoded_link" =~ ^https?:// ]]; then # URL-encode the link for curl (convert spaces and special chars) # Use the original encoded link, not the decoded one encoded_url="$link" # Check external URL with curl (with User-Agent header for better compatibility) http_code=$(curl -s -o /dev/null -w "%{http_code}" --max-time 10 \ -H "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36" \ "$encoded_url" 2>/dev/null || echo "000") # Accept 2xx and 3xx status codes as valid if [[ "$http_code" =~ ^[23][0-9][0-9]$ ]]; then echo " ✅ $decoded_link" else echo " ❌ $decoded_link (HTTP $http_code)" file_has_errors=1 has_errors=1 fi else # Local file - remove leading slash if present if [[ "$decoded_link" =~ ^/ ]]; then file_path="${decoded_link#/}" else # Relative path - resolve from markdown file location md_dir=$(dirname "$md_file") file_path="$md_dir/$decoded_link" fi # Check if file exists (using test -f which handles spaces better) if [ -f "$file_path" ]; then echo " ✅ $decoded_link" else echo " ❌ $decoded_link (file not found at: $file_path)" file_has_errors=1 has_errors=1 fi fi done < /tmp/links_$$.txt rm -f /tmp/links_$$.txt if [ $file_has_errors -eq 0 ]; then echo "" echo " ✅ All links valid in this file" else echo "" echo " ❌ Some links are broken in this file" fi echo "" done < all_markdown_files.txt echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" if [ $has_errors -eq 0 ]; then echo "✅ FINAL RESULT: All links are valid across all markdown files!" echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" exit 0 else echo "❌ FINAL RESULT: Some links are broken. Please review the output above." echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" exit 1 fi