name: "Check Links"
description: "Check all links in markdown files for validity"

inputs:
  user_repository:
    description: "Repository path in format owner/repo"
    required: true

runs:
  using: "composite"
  steps:
    - name: Check all links in markdown files
      shell: bash
      env:
        USER_REPO: ${{ inputs.user_repository }}
      run: |
        #!/bin/bash
        set -o pipefail
        
        echo "[Link Checker Job Started]"
        echo ""
        
        # Find all markdown files
        find . -name "*.md" -type f | sort > all_markdown_files.txt
        total_files=$(wc -l < all_markdown_files.txt)
        echo "Found $total_files markdown files to check"
        echo ""
        
        # Get repository info from environment variable
        REPO_OWNER="${USER_REPO%/*}"
        REPO_NAME="${USER_REPO#*/}"
        GIT_BRANCH=$(git rev-parse --abbrev-ref HEAD 2>/dev/null || echo "main")
        
        has_errors=0
        file_index=1
        
        # File to collect all broken links
        > /tmp/broken_links_$$.txt
        
        # Process each markdown file
        while IFS= read -r md_file; do
          echo "[$file_index/$total_files] Checking: $md_file"
          
          file_has_errors=0
          
          # Extract ALL links from the markdown file
          {
            # Markdown links []()
            grep -oP '\]\(([^\)]+)\)' "$md_file" 2>/dev/null | sed 's/](\(.*\))/\1/' || true
            # Image links ![]()
            grep -oP '!\[[^\]]*\]\(([^\)]+)\)' "$md_file" 2>/dev/null | sed 's/!\[.*\](\(.*\))/\1/' || true
            # Video src attributes
            grep -oP '<video[^>]+src="([^"]+)"' "$md_file" 2>/dev/null | sed 's/.*src="\([^"]*\)".*/\1/' || true
          } > /tmp/links_$$.txt
          
          link_count=$(wc -l < /tmp/links_$$.txt 2>/dev/null || echo "0")
          
          if [ $link_count -eq 0 ]; then
            echo "    → No links found"
            echo ""
            file_index=$((file_index + 1))
            continue
          fi
          
          echo "    → Checking $link_count links..."
          
          # Categorize and check links
          > /tmp/download_$$.txt
          > /tmp/media_$$.txt
          > /tmp/video_$$.txt
          > /tmp/tags_$$.txt
          
          # Check each link
          while IFS= read -r link; do
            [ -z "$link" ] && continue
            
            # Decode URL-encoded characters for display
            decoded_link=$(echo "$link" | sed 's/%20/ /g' | sed 's/%23/#/g' | sed 's/%28/(/g' | sed 's/%29/)/g' | sed 's/%E2%80%A2/•/g' | sed 's/%E1%9A%96/ᚖ/g' | sed 's/%E3%80%8A/《/g' | sed 's/%E3%80%8B/》/g' | sed 's/%E3%80%8E/『/g' | sed 's/%E3%80%8F/』/g' | sed 's/%E2%9B%94/⛔/g' | sed 's/%E2%9C%A8/✨/g' | sed 's/%7B/{/g' | sed 's/%7D/}/g' | sed 's/%2B/+/g' | sed 's/%E3%83%86/テ/g' | sed 's/%E3%83%B3/ン/g' | sed 's/%E3%83%8D/ネ/g' | sed 's/%E3%82%B9/ス/g' | sed 's/%E3%82%A4/イ/g' | sed 's/%E3%83%BB/・/g' | sed 's/%E3%83%95/フ/g' | sed 's/%E3%83%AA/リ/g' | sed 's/%E3%83%BC/ー/g' | sed 's/%E3%83%8A/ナ/g' | sed 's/%5B/[/g' | sed 's/%5D/]/g' | sed 's/%2C/,/g')
            
            # Determine link category and validation status
            status="✓"
            
            # Check if it's an external URL
            if [[ "$decoded_link" =~ ^https?:// ]]; then
              # Replace git.sulej.net with internal gitea URL for checking
              check_url="$link"
              if [[ "$link" =~ git\.sulej\.net ]]; then
                check_url="${link//git.sulej.net/gitea:3000}"
                check_url="${check_url//https:/http:}"
              fi
              
              # Check external URL with curl
              http_code=$(curl -s -o /dev/null -w "%{http_code}" --max-time 10 \
                -H "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36" \
                "$check_url" 2>/dev/null || echo "000")
            else
              # Local file - convert to full Gitea URL (keep URL encoding)
              if [[ "$link" =~ ^/ ]]; then
                # Absolute path from repo root
                check_url="http://gitea:3000/$REPO_OWNER/$REPO_NAME/raw/branch/$GIT_BRANCH${link}"
                display_url="https://git.sulej.net/$REPO_OWNER/$REPO_NAME/raw/branch/$GIT_BRANCH${link}"
              else
                # Relative path from markdown file
                md_dir=$(dirname "$md_file")
                if [[ "$md_dir" == "." ]]; then
                  rel_path="$link"
                else
                  rel_path="${md_dir#./}/$link"
                fi
                check_url="http://gitea:3000/$REPO_OWNER/$REPO_NAME/raw/branch/$GIT_BRANCH/$rel_path"
                display_url="https://git.sulej.net/$REPO_OWNER/$REPO_NAME/raw/branch/$GIT_BRANCH/$rel_path"
              fi
              
              # Use display_url for output instead of decoded_link
              decoded_link="$display_url"
              
              # Check URL with curl
              http_code=$(curl -s -o /dev/null -w "%{http_code}" --max-time 10 "$check_url" 2>/dev/null || echo "000")
            fi
            
            # Accept 2xx and 3xx status codes as valid
            if ! [[ "$http_code" =~ ^[23][0-9][0-9]$ ]]; then
              status="✖ (HTTP $http_code)"
              file_has_errors=1
              has_errors=1
              # Save broken link to persistent file
              echo "$decoded_link" >> /tmp/broken_links_$$.txt
            fi
            
            # Categorize all links
            if [[ "$decoded_link" =~ /export/.*\.(osk|osz)$ ]] || [[ "$decoded_link" =~ ^https?:// && ! "$decoded_link" =~ /media/ && ! "$decoded_link" =~ /src/tag/ ]]; then
              echo "      $status $decoded_link" >> /tmp/download_$$.txt
            elif [[ "$decoded_link" =~ /media/gameplay/.*\.(mp4|webm)$ ]]; then
              echo "      $status $decoded_link" >> /tmp/video_$$.txt
            elif [[ "$decoded_link" =~ /src/tag/ ]]; then
              echo "      $status $decoded_link" >> /tmp/tags_$$.txt
            elif [[ "$decoded_link" =~ \.(webp|png|jpg|jpeg)$ ]] || [[ "$decoded_link" =~ /media/(panel|icons|thumbnail)/ ]]; then
              echo "      $status $decoded_link" >> /tmp/media_$$.txt
            elif [[ "$decoded_link" =~ \.md$ ]]; then
              echo "      $status $decoded_link" >> /tmp/tags_$$.txt
            else
              echo "      $status $decoded_link" >> /tmp/download_$$.txt
            fi
          done < /tmp/links_$$.txt
          
          # Display categorized results - special handling for README
          if [[ "$md_file" == "./README.md" ]]; then
            # For README, group by skin name
            > /tmp/skins_$$.txt
            
            # Extract unique skin names from download links and decode them
            if [ -s /tmp/download_$$.txt ]; then
              grep -oP 'export/[^/]+' /tmp/download_$$.txt | sed 's|export/||' | while read -r encoded_name; do
                # Decode the skin name
                echo "$encoded_name" | sed 's/%20/ /g' | sed 's/%23/#/g' | sed 's/%28/(/g' | sed 's/%29/)/g' | sed 's/%E2%80%A2/•/g' | sed 's/%E1%9A%96/ᚖ/g' | sed 's/%E3%80%8A/《/g' | sed 's/%E3%80%8B/》/g' | sed 's/%E3%80%8E/『/g' | sed 's/%E3%80%8F/』/g' | sed 's/%E2%9B%94/⛔/g' | sed 's/%E2%9C%A8/✨/g' | sed 's/%7B/{/g' | sed 's/%7D/}/g' | sed 's/%2B/+/g' | sed 's/%E3%83%86/テ/g' | sed 's/%E3%83%B3/ン/g' | sed 's/%E3%83%8D/ネ/g' | sed 's/%E3%82%B9/ス/g' | sed 's/%E3%82%A4/イ/g' | sed 's/%E3%83%BB/・/g' | sed 's/%E3%83%95/フ/g' | sed 's/%E3%83%AA/リ/g' | sed 's/%E3%83%BC/ー/g' | sed 's/%E3%83%8A/ナ/g' | sed 's/%5B/[/g' | sed 's/%5D/]/g' | sed 's/%2C/,/g'
              done | sort -u > /tmp/skins_$$.txt || true
            fi
            
            # Show general links first (not skin-specific)
            if [ -s /tmp/download_$$.txt ]; then
              general_downloads=$(grep -v '/export/.*\.osk' /tmp/download_$$.txt | sort -u || true)
              if [ -n "$general_downloads" ]; then
                echo "    general:"
                echo "$general_downloads"
                echo ""
              fi
            fi
            
            # Show each skin's links together
            while IFS= read -r skin_name; do
              [ -z "$skin_name" ] && continue
              
              echo "    skin: $skin_name"
              
              # Download link
              grep "/export/$skin_name/.*\.osk" /tmp/download_$$.txt 2>/dev/null | sort -u || true
              
              # Thumbnail
              grep "thumbnail/$skin_name/" /tmp/media_$$.txt 2>/dev/null | sort -u || true
              
              # Docs
              grep "/docs/$skin_name/" /tmp/tags_$$.txt 2>/dev/null | sort -u || true
              
              echo ""
            done < /tmp/skins_$$.txt
            
            # Show version tags separately
            if [ -s /tmp/tags_$$.txt ]; then
              version_tags=$(grep '/src/tag/' /tmp/tags_$$.txt | sort -u || true)
              if [ -n "$version_tags" ]; then
                echo "    version tags:"
                echo "$version_tags"
                echo ""
              fi
            fi
            
            rm -f /tmp/skins_$$.txt
          else
            # For other markdown files, show categorized as before
            if [ -s /tmp/download_$$.txt ]; then
              echo "    download:"
              sort -u /tmp/download_$$.txt
              echo ""
            fi
            
            if [ -s /tmp/media_$$.txt ]; then
              echo "    media:"
              sort -u /tmp/media_$$.txt
              echo ""
            fi
            
            if [ -s /tmp/video_$$.txt ]; then
              echo "    video:"
              sort -u /tmp/video_$$.txt
              echo ""
            fi
            
            if [ -s /tmp/tags_$$.txt ]; then
              echo "    tags:"
              sort -u /tmp/tags_$$.txt
              echo ""
            fi
          fi
          
          rm -f /tmp/download_$$.txt /tmp/media_$$.txt /tmp/video_$$.txt /tmp/tags_$$.txt
          
          rm -f /tmp/links_$$.txt
          
          if [ $file_has_errors -eq 0 ]; then
            echo "    ✓ All links valid"
          else
            echo "    ✖ Some links broken"
          fi
          echo ""
          
          file_index=$((file_index + 1))
        done < all_markdown_files.txt
        
        echo ""
        
        if [ $has_errors -eq 0 ]; then
          echo "[Link Checker Complete — all links valid in $total_files files]"
          rm -f /tmp/broken_links_$$.txt
          exit 0
        else
          echo "[Link Checker Complete — found broken links in $total_files files]"
          echo ""
          echo "Missing files:"
          if [ -s /tmp/broken_links_$$.txt ]; then
            sort -u /tmp/broken_links_$$.txt
          else
            echo "(no broken links file found)"
          fi
          rm -f /tmp/broken_links_$$.txt
          exit 1
        fi