feat: new course kick off: Scraping with Apify and AI #8100
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Test | |
| on: | |
| push: | |
| branches: [ master, renovate/** ] | |
| pull_request: | |
| jobs: | |
| build: | |
| name: Docs build | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Checkout Source code | |
| uses: actions/checkout@v6 | |
| - name: Use Node.js 22 | |
| uses: actions/setup-node@v6 | |
| with: | |
| node-version: 24 | |
| cache: 'npm' | |
| cache-dependency-path: 'package-lock.json' | |
| - name: Enable corepack | |
| run: | | |
| corepack enable | |
| - name: Install Dependencies | |
| run: npm ci --force | |
| - run: npm run build | |
| env: | |
| INTERCOM_APP_ID: ${{ secrets.INTERCOM_APP_ID }} | |
| SEGMENT_TOKEN: ${{ secrets.SEGMENT_TOKEN }} | |
| - name: Install Nginx | |
| run: | | |
| sudo apt-get update | |
| sudo apt-get install -y nginx | |
| - name: Start Docusaurus server | |
| run: | | |
| nohup npx docusaurus serve --port 3000 --no-open & | |
| sleep 5 | |
| curl -f http://localhost:3000 > /dev/null | |
| - name: Start Nginx with project config | |
| run: | | |
| PWD_PATH="$(pwd)" | |
| cat > default.conf <<EOF | |
| worker_processes auto; | |
| error_log ${PWD_PATH}/logs/error.log; | |
| pid ${PWD_PATH}/logs/nginx.pid; | |
| events {} | |
| http { | |
| access_log ${PWD_PATH}/logs/access.log; | |
| include ${PWD_PATH}/nginx.conf; | |
| } | |
| EOF | |
| sed -i 's|https://apify.github.io/apify-docs|http://localhost:3000|g' default.conf | |
| mkdir -p "${PWD_PATH}/logs" | |
| nginx -c "${PWD_PATH}/default.conf" | |
| sleep 1 | |
| - name: Run header assertions | |
| run: | | |
| set -euo pipefail | |
| function assert_header() { | |
| url=$1 | |
| header=$2 | |
| expected=$3 | |
| shift 3 | |
| extra_args=("$@") | |
| actual=$(curl -s -D - -o /dev/null "${extra_args[@]}" "$url" | grep -i "^$header" | tr -d '\r' || true) | |
| echo "→ $url → $actual" | |
| echo "$actual" | grep -q "$expected" || (echo "❌ Expected '$expected' in '$header' for $url" && exit 1) | |
| } | |
| function assert_status() { | |
| url=$1 | |
| expected=$2 | |
| shift 2 | |
| extra_args=("$@") | |
| actual=$(curl -s -o /dev/null -w "%{http_code}" "${extra_args[@]}" "$url") | |
| echo "→ $url → HTTP $actual" | |
| [ "$actual" = "$expected" ] || (echo "❌ Expected HTTP $expected but got $actual for $url" && exit 1) | |
| } | |
| function assert_no_redirect() { | |
| url=$1 | |
| shift | |
| extra_args=("$@") | |
| response=$(curl -s -D - -o /dev/null -w "\n%{http_code}" "${extra_args[@]}" "$url" 2>/dev/null) | |
| status=$(echo "$response" | tail -1) | |
| location=$(echo "$response" | grep -i "^location:" | tr -d '\r' || true) | |
| echo "→ $url → HTTP $status ${location:+(${location})}" | |
| if [ "$status" = "301" ] || [ "$status" = "302" ]; then | |
| echo "❌ Got redirect for $url: $location" && exit 1 | |
| fi | |
| } | |
| echo "🧪 Checking open redirect protection..." | |
| # Backslash URLs must not produce redirects (the redirect Location | |
| # would contain \, which browsers normalize to /, creating | |
| # protocol-relative URLs like //evil.com that redirect externally). | |
| assert_no_redirect "http://localhost:8080///%5Cevil.com/" | |
| assert_no_redirect "http://localhost:8080/%5Cevil.com/" | |
| assert_no_redirect "http://localhost:8080///%5cevil.com/" | |
| # Literal backslash (simulates CDN pre-decoding %5C before forwarding) | |
| assert_no_redirect "http://localhost:8080" --request-target '/\evil.com/' | |
| assert_no_redirect "http://localhost:8080" --request-target '///\evil.com/' | |
| # Normal trailing-slash redirect must still work | |
| assert_status "http://localhost:8080/platform/proxy/usage/" "302" | |
| echo "🧪 Checking Nginx responses... (apify-docs)" | |
| assert_header "http://localhost:8080/" "Content-Type" "text/html" | |
| assert_header "http://localhost:8080/" "Content-Type" "text/markdown" -H "Accept: text/markdown" | |
| assert_header "http://localhost:8080/platform/proxy/usage" "Content-Type" "text/html" | |
| assert_header "http://localhost:8080/platform/proxy/usage.md" "Content-Type" "text/markdown" | |
| assert_header "http://localhost:8080/platform/proxy/usage" "Content-Type" "text/markdown" -H "Accept: text/markdown" | |
| assert_header "http://localhost:8080/img/docs-og.png" "Content-Type" "image/png" | |
| assert_header "http://localhost:8080/img/javascript-40x40.svg" "Content-Type" "image/svg" | |
| assert_header "http://localhost:8080/llms.txt" "Content-Type" "text/markdown" | |
| assert_header "http://localhost:8080/llms-full.txt" "Content-Type" "text/markdown" | |
| echo "🧪 Checking Nginx responses... (apify-sdk-js)" | |
| assert_header "http://localhost:8080/sdk/js/docs/introduction/quick-start" "Content-Type" "text/html" | |
| assert_header "http://localhost:8080/sdk/js/docs/introduction/quick-start.md" "Content-Type" "text/markdown" | |
| assert_header "http://localhost:8080/sdk/js/docs/introduction/quick-start" "Content-Type" "text/markdown" -H "Accept: text/markdown" | |
| assert_header "http://localhost:8080/sdk/js/llms.txt" "Content-Type" "text/markdown" | |
| assert_header "http://localhost:8080/sdk/js/llms-full.txt" "Content-Type" "text/markdown" | |
| echo "🧪 Checking Nginx responses... (apify-sdk-python)" | |
| assert_header "http://localhost:8080/sdk/python/docs/changelog" "Content-Type" "text/html" | |
| assert_header "http://localhost:8080/sdk/python/docs/changelog.md" "Content-Type" "text/markdown" | |
| assert_header "http://localhost:8080/sdk/python/docs/changelog" "Content-Type" "text/markdown" -H "Accept: text/markdown" | |
| assert_header "http://localhost:8080/sdk/python/llms.txt" "Content-Type" "text/markdown" | |
| assert_header "http://localhost:8080/sdk/python/llms-full.txt" "Content-Type" "text/markdown" | |
| echo "🧪 Checking Nginx responses... (apify-client-js)" | |
| assert_header "http://localhost:8080/api/client/js/docs/changelog" "Content-Type" "text/html" | |
| assert_header "http://localhost:8080/api/client/js/docs/changelog.md" "Content-Type" "text/markdown" | |
| assert_header "http://localhost:8080/api/client/js/docs/changelog" "Content-Type" "text/markdown" -H "Accept: text/markdown" | |
| assert_header "http://localhost:8080/api/client/js/llms.txt" "Content-Type" "text/markdown" | |
| assert_header "http://localhost:8080/api/client/js/llms-full.txt" "Content-Type" "text/markdown" | |
| echo "🧪 Checking Nginx responses... (apify-client-python)" | |
| assert_header "http://localhost:8080/api/client/python/docs/changelog" "Content-Type" "text/html" | |
| assert_header "http://localhost:8080/api/client/python/docs/changelog.md" "Content-Type" "text/markdown" | |
| assert_header "http://localhost:8080/api/client/python/docs/changelog" "Content-Type" "text/markdown" -H "Accept: text/markdown" | |
| assert_header "http://localhost:8080/api/client/python/llms.txt" "Content-Type" "text/markdown" | |
| assert_header "http://localhost:8080/api/client/python/llms-full.txt" "Content-Type" "text/markdown" | |
| echo "🧪 Checking Nginx responses... (apify-cli)" | |
| assert_header "http://localhost:8080/cli/docs/changelog" "Content-Type" "text/html" | |
| assert_header "http://localhost:8080/cli/docs/changelog.md" "Content-Type" "text/markdown" | |
| assert_header "http://localhost:8080/cli/docs/changelog" "Content-Type" "text/markdown" -H "Accept: text/markdown" | |
| assert_header "http://localhost:8080/cli/llms.txt" "Content-Type" "text/markdown" | |
| assert_header "http://localhost:8080/cli/llms-full.txt" "Content-Type" "text/markdown" | |
| echo "✅ All Nginx header checks passed." | |
| - name: Stop Nginx | |
| if: always() | |
| run: nginx -c "$(pwd)/default.conf" -s stop | |
| lint_content: | |
| name: Lint markdown content | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Checkout Source code | |
| uses: actions/checkout@v6 | |
| - name: Get changed files | |
| id: changed-files | |
| uses: tj-actions/changed-files@v47 | |
| with: | |
| files: '**/*.{md,mdx}' | |
| files_ignore: '!sources/api/*.{md,mdx}' | |
| separator: "," | |
| - name: Use Node.js 22 | |
| uses: actions/setup-node@v6 | |
| with: | |
| node-version: 24 | |
| cache: 'npm' | |
| cache-dependency-path: 'package-lock.json' | |
| - name: Enable corepack | |
| run: | | |
| corepack enable | |
| - name: Install Dependencies | |
| run: npm ci --force | |
| - name: List and Lint Changed Markdown Files | |
| env: | |
| ALL_CHANGED_FILES: ${{ steps.changed-files.outputs.all_changed_files }} | |
| run: | | |
| IFS=',' read -ra FILE_ARRAY <<< "$ALL_CHANGED_FILES" | |
| for file in "${FILE_ARRAY[@]}"; do | |
| npx markdownlint "$file" | |
| done | |
| lint_code: | |
| name: Lint app code | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Checkout Source code | |
| uses: actions/checkout@v6 | |
| - name: Use Node.js 22 | |
| uses: actions/setup-node@v6 | |
| with: | |
| node-version: 24 | |
| cache: 'npm' | |
| cache-dependency-path: 'package-lock.json' | |
| - name: Enable corepack | |
| run: | | |
| corepack enable | |
| - name: Install Dependencies | |
| run: npm ci --force | |
| - run: npm run lint:code |