name: "Check Links" description: "Check all links in markdown files for validity" inputs: user_repository: description: "Repository path in format owner/repo" required: true runs: using: "composite" steps: - name: Check all links in markdown files shell: bash env: USER_REPO: ${{ inputs.user_repository }} run: | #!/bin/bash set -o pipefail echo "[Link Checker Job Started]" echo "" # Find all markdown files find . -name "*.md" -type f | sort > all_markdown_files.txt total_files=$(wc -l < all_markdown_files.txt) echo "Found $total_files markdown files to check" echo "" # Get repository info from environment variable REPO_OWNER="${USER_REPO%/*}" REPO_NAME="${USER_REPO#*/}" GIT_BRANCH=$(git rev-parse --abbrev-ref HEAD 2>/dev/null || echo "main") has_errors=0 file_index=1 # File to collect all broken links > /tmp/broken_links_$$.txt # Process each markdown file while IFS= read -r md_file; do echo "[$file_index/$total_files] Checking: $md_file" file_has_errors=0 # Extract ALL links from the markdown file { # Markdown links []() grep -oP '\]\(([^\)]+)\)' "$md_file" 2>/dev/null | sed 's/](\(.*\))/\1/' || true # Image links ![]() grep -oP '!\[[^\]]*\]\(([^\)]+)\)' "$md_file" 2>/dev/null | sed 's/!\[.*\](\(.*\))/\1/' || true # Video src attributes grep -oP ']+src="([^"]+)"' "$md_file" 2>/dev/null | sed 's/.*src="\([^"]*\)".*/\1/' || true } > /tmp/links_$$.txt link_count=$(wc -l < /tmp/links_$$.txt 2>/dev/null || echo "0") if [ $link_count -eq 0 ]; then echo " → No links found" echo "" file_index=$((file_index + 1)) continue fi echo " → Checking $link_count links..." # Categorize and check links > /tmp/download_$$.txt > /tmp/media_$$.txt > /tmp/video_$$.txt > /tmp/tags_$$.txt # Check each link while IFS= read -r link; do [ -z "$link" ] && continue # Decode URL-encoded characters for display decoded_link=$(echo "$link" | sed 's/%20/ /g' | sed 's/%23/#/g' | sed 's/%28/(/g' | sed 's/%29/)/g' | sed 's/%E2%80%A2/•/g' | sed 's/%E1%9A%96/ᚖ/g' | sed 's/%E3%80%8A/《/g' | sed 's/%E3%80%8B/》/g' | sed 's/%E3%80%8E/『/g' | sed 's/%E3%80%8F/』/g' | sed 's/%E2%9B%94/⛔/g' | sed 's/%E2%9C%A8/✨/g' | sed 's/%7B/{/g' | sed 's/%7D/}/g' | sed 's/%2B/+/g' | sed 's/%E3%83%86/テ/g' | sed 's/%E3%83%B3/ン/g' | sed 's/%E3%83%8D/ネ/g' | sed 's/%E3%82%B9/ス/g' | sed 's/%E3%82%A4/イ/g' | sed 's/%E3%83%BB/・/g' | sed 's/%E3%83%95/フ/g' | sed 's/%E3%83%AA/リ/g' | sed 's/%E3%83%BC/ー/g' | sed 's/%E3%83%8A/ナ/g' | sed 's/%5B/[/g' | sed 's/%5D/]/g' | sed 's/%2C/,/g') # Determine link category and validation status status="✓" # Check if it's an external URL if [[ "$decoded_link" =~ ^https?:// ]]; then # Replace git.sulej.net with internal gitea URL for checking check_url="$link" if [[ "$link" =~ git\.sulej\.net ]]; then check_url="${link//git.sulej.net/gitea:3000}" check_url="${check_url//https:/http:}" fi # Check external URL with curl http_code=$(curl -s -o /dev/null -w "%{http_code}" --max-time 10 \ -H "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36" \ "$check_url" 2>/dev/null || echo "000") else # Local file - convert to full Gitea URL (keep URL encoding) if [[ "$link" =~ ^/ ]]; then # Absolute path from repo root check_url="http://gitea:3000/$REPO_OWNER/$REPO_NAME/raw/branch/$GIT_BRANCH${link}" display_url="https://git.sulej.net/$REPO_OWNER/$REPO_NAME/raw/branch/$GIT_BRANCH${link}" else # Relative path from markdown file md_dir=$(dirname "$md_file") if [[ "$md_dir" == "." ]]; then rel_path="$link" else rel_path="${md_dir#./}/$link" fi check_url="http://gitea:3000/$REPO_OWNER/$REPO_NAME/raw/branch/$GIT_BRANCH/$rel_path" display_url="https://git.sulej.net/$REPO_OWNER/$REPO_NAME/raw/branch/$GIT_BRANCH/$rel_path" fi # Use display_url for output instead of decoded_link decoded_link="$display_url" # Check URL with curl http_code=$(curl -s -o /dev/null -w "%{http_code}" --max-time 10 "$check_url" 2>/dev/null || echo "000") fi # Accept 2xx and 3xx status codes as valid if ! [[ "$http_code" =~ ^[23][0-9][0-9]$ ]]; then status="✖ (HTTP $http_code)" file_has_errors=1 has_errors=1 # Save broken link to persistent file echo "$decoded_link" >> /tmp/broken_links_$$.txt fi # Categorize all links if [[ "$decoded_link" =~ /export/.*\.(osk|osz)$ ]] || [[ "$decoded_link" =~ ^https?:// && ! "$decoded_link" =~ /media/ && ! "$decoded_link" =~ /src/tag/ ]]; then echo " $status $decoded_link" >> /tmp/download_$$.txt elif [[ "$decoded_link" =~ /media/gameplay/.*\.(mp4|webm)$ ]]; then echo " $status $decoded_link" >> /tmp/video_$$.txt elif [[ "$decoded_link" =~ /src/tag/ ]]; then echo " $status $decoded_link" >> /tmp/tags_$$.txt elif [[ "$decoded_link" =~ \.(webp|png|jpg|jpeg)$ ]] || [[ "$decoded_link" =~ /media/(panel|icons|thumbnail)/ ]]; then echo " $status $decoded_link" >> /tmp/media_$$.txt elif [[ "$decoded_link" =~ \.md$ ]]; then echo " $status $decoded_link" >> /tmp/tags_$$.txt else echo " $status $decoded_link" >> /tmp/download_$$.txt fi done < /tmp/links_$$.txt # Display categorized results - special handling for README if [[ "$md_file" == "./README.md" ]]; then # For README, group by skin name > /tmp/skins_$$.txt # Extract unique skin names from download links and decode them if [ -s /tmp/download_$$.txt ]; then grep -oP 'export/[^/]+' /tmp/download_$$.txt | sed 's|export/||' | while read -r encoded_name; do # Decode the skin name echo "$encoded_name" | sed 's/%20/ /g' | sed 's/%23/#/g' | sed 's/%28/(/g' | sed 's/%29/)/g' | sed 's/%E2%80%A2/•/g' | sed 's/%E1%9A%96/ᚖ/g' | sed 's/%E3%80%8A/《/g' | sed 's/%E3%80%8B/》/g' | sed 's/%E3%80%8E/『/g' | sed 's/%E3%80%8F/』/g' | sed 's/%E2%9B%94/⛔/g' | sed 's/%E2%9C%A8/✨/g' | sed 's/%7B/{/g' | sed 's/%7D/}/g' | sed 's/%2B/+/g' | sed 's/%E3%83%86/テ/g' | sed 's/%E3%83%B3/ン/g' | sed 's/%E3%83%8D/ネ/g' | sed 's/%E3%82%B9/ス/g' | sed 's/%E3%82%A4/イ/g' | sed 's/%E3%83%BB/・/g' | sed 's/%E3%83%95/フ/g' | sed 's/%E3%83%AA/リ/g' | sed 's/%E3%83%BC/ー/g' | sed 's/%E3%83%8A/ナ/g' | sed 's/%5B/[/g' | sed 's/%5D/]/g' | sed 's/%2C/,/g' done | sort -u > /tmp/skins_$$.txt || true fi # Show general links first (not skin-specific) if [ -s /tmp/download_$$.txt ]; then general_downloads=$(grep -v '/export/.*\.osk' /tmp/download_$$.txt | sort -u || true) if [ -n "$general_downloads" ]; then echo " general:" echo "$general_downloads" echo "" fi fi # Show each skin's links together while IFS= read -r skin_name; do [ -z "$skin_name" ] && continue echo " skin: $skin_name" # Download link grep "/export/$skin_name/.*\.osk" /tmp/download_$$.txt 2>/dev/null | sort -u || true # Thumbnail grep "thumbnail/$skin_name/" /tmp/media_$$.txt 2>/dev/null | sort -u || true # Docs grep "/docs/$skin_name/" /tmp/tags_$$.txt 2>/dev/null | sort -u || true echo "" done < /tmp/skins_$$.txt # Show version tags separately if [ -s /tmp/tags_$$.txt ]; then version_tags=$(grep '/src/tag/' /tmp/tags_$$.txt | sort -u || true) if [ -n "$version_tags" ]; then echo " version tags:" echo "$version_tags" echo "" fi fi rm -f /tmp/skins_$$.txt else # For other markdown files, show categorized as before if [ -s /tmp/download_$$.txt ]; then echo " download:" sort -u /tmp/download_$$.txt echo "" fi if [ -s /tmp/media_$$.txt ]; then echo " media:" sort -u /tmp/media_$$.txt echo "" fi if [ -s /tmp/video_$$.txt ]; then echo " video:" sort -u /tmp/video_$$.txt echo "" fi if [ -s /tmp/tags_$$.txt ]; then echo " tags:" sort -u /tmp/tags_$$.txt echo "" fi fi rm -f /tmp/download_$$.txt /tmp/media_$$.txt /tmp/video_$$.txt /tmp/tags_$$.txt rm -f /tmp/links_$$.txt if [ $file_has_errors -eq 0 ]; then echo " ✓ All links valid" else echo " ✖ Some links broken" fi echo "" file_index=$((file_index + 1)) done < all_markdown_files.txt echo "" if [ $has_errors -eq 0 ]; then echo "[Link Checker Complete — all links valid in $total_files files]" rm -f /tmp/broken_links_$$.txt exit 0 else echo "[Link Checker Complete — found broken links in $total_files files]" echo "" echo "Missing files:" if [ -s /tmp/broken_links_$$.txt ]; then sort -u /tmp/broken_links_$$.txt else echo "(no broken links file found)" fi rm -f /tmp/broken_links_$$.txt exit 1 fi