Skip to content
Snippets Groups Projects
Verified Commit f4887c30 authored by Chris Coley's avatar Chris Coley
Browse files

Split the cache purge into chunks to stay under the Cloudflare purge limit

parent 2a528e61
Branches
No related tags found
No related merge requests found
Pipeline #975 passed
Pipeline: Pages Test

#976

    ...@@ -71,8 +71,13 @@ pages: ...@@ -71,8 +71,13 @@ pages:
    rules: rules:
    - if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH - if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH
    # Purge all of this site's URLs from the Cloudflare cache # Create a list of URLs that need to be purged from the cache after GitLab Pages
    create-purge-json: # deployment. Each real file/directory has multiple URLs because the router for
    # GitLab Pages fudges paths for better user experience. The fudging rules are:
    # - Directories can be accessed with or without a trailing slash
    # - Files can be accessed with or without a trailing slash
    # - Files with the '.html' extension can be accessed with or without .html
    create-purge-list:
    stage: deploy stage: deploy
    rules: rules:
    - if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH - if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH
    ...@@ -80,31 +85,28 @@ create-purge-json: ...@@ -80,31 +85,28 @@ create-purge-json:
    tags: tags:
    - docker - docker
    script: script:
    # Add the index-less homepage
    - echo "$CI_PAGES_URL" > purge-list.txt
    # Add all the HTML files and HTML symlinks, with and without .html extension
    - |- - |-
    echo -en "{\n \"files\": [" > purge.json for f in $(find public/* \( -type f -or -type l \) -iname '*.html'); do
    # Add the index-less homepage, with and without trailing slash f="${f#public/}"
    echo -en "\n \"$CI_PAGES_URL\"" >> purge.json echo "$CI_PAGES_URL/$f" >> purge-list.txt
    echo -en ",\n \"$CI_PAGES_URL/\"" >> purge.json echo "$CI_PAGES_URL/${f%.html}" >> purge-list.txt
    # Add all the directories, non-HTML files and non-HTML symlinks; with and
    # without trailing slash
    for f in $(find public \( -type d -or -type f -or -type l \) -not -iname '*.html'); do
    echo -en ",\n \"$CI_PAGES_URL/${f#public/}\"" >> purge.json
    echo -en ",\n \"$CI_PAGES_URL/${f#public/}/\"" >> purge.json
    done done
    # Add all the HTML files and HTML symlinks, with and without trailing # Add everything else
    # slash. First with file extension, then without - |-
    for f in $(find public \( -type f -or -type l \) -iname '*.html'); do for f in $(find public/* \( -type d -or -type f -or -type l \) -not -iname '*.html'); do
    echo -en ",\n \"$CI_PAGES_URL/${f#public/}\"" >> purge.json echo "$CI_PAGES_URL/${f#public/}" >> purge-list.txt
    echo -en ",\n \"$CI_PAGES_URL/${f#public/}/\"" >> purge.json
    f="${f%.html}"
    echo -en ",\n \"$CI_PAGES_URL/${f#public/}\"" >> purge.json
    echo -en ",\n \"$CI_PAGES_URL/${f#public/}/\"" >> purge.json
    done done
    echo -e "\n ]\n}" >> purge.json # Remove any duplicate URLs
    - cat purge.json - sort -u -o purge-list.txt purge-list.txt
    # Duplicate each line, adding a trailing slash to the duplicates
    - sed -i 'p;s|$|/|' purge-list.txt
    - cat purge-list.txt
    artifacts: artifacts:
    paths: paths:
    - purge.json - purge-list.txt
    trigger-cache-purge: trigger-cache-purge:
    stage: .post stage: .post
    ... ...
    ......
    # Purge the Cloudflare cache using the request body contained in purge.json # Purge the URLs contained in purge-list.txt from the Cloudflare cache
    # #
    # We delay this job to give the pages:deploy job time to finish. If we don't # We delay this job to give the pages:deploy job time to finish. If we don't
    # delay, then the cache might refill with old pages before the new pages are # delay, then the cache might refill with old pages before the new pages are
    ...@@ -12,14 +12,37 @@ purge-cache: ...@@ -12,14 +12,37 @@ purge-cache:
    start_in: 3 minutes start_in: 3 minutes
    needs: needs:
    - pipeline: $PARENT_PIPELINE_ID - pipeline: $PARENT_PIPELINE_ID
    job: create-purge-json job: create-purge-list
    before_script:
    # Make sure the purge-list.txt file is readable, else exit
    - test -r purge-list.txt || { echo "purge-list.txt not found" ; exit 1 ; }
    - echo "Purge list has $(cat purge-list.txt | wc -l) URLs"
    # Default to chunks of 30 URLs because Cloudflare only allows 30 URLs per
    # purge request on free accounts
    - echo "Chunk size of ${CF_PURGE_CACHE_CHUNK_SIZE:=30}"
    script: script:
    - cat purge.json # Split the purge list into chunks named 'purge-chunk-[aaa,aab,...]'
    - >- - split -l $CF_PURGE_CACHE_CHUNK_SIZE -a 3 purge-list.txt purge-chunk-
    wget -qO- "https://api.cloudflare.com/client/v4/zones/$CF_PURGE_CACHE_ZONE/purge_cache" # Loop over the chunks, creating a purge request for each
    --header "Content-Type: application/json" - |-
    --header "Authorization: Bearer $CF_PURGE_CACHE_TOKEN" for chunk in purge-chunk-* ; do
    # Create the purge request body
    echo -en "{\n \"files\": [" > purge.json
    unset comma # This needs to be unset for the first line in each chunk
    while read path; do
    echo -en "$comma\n \"$path\"" >> purge.json
    comma=','
    done < $chunk
    echo -e "\n ]\n}" >> purge.json
    cat purge.json
    # Make the API request to Cloudflare to purge the URLs from cache
    wget -qO- "https://api.cloudflare.com/client/v4/zones/$CF_PURGE_CACHE_ZONE/purge_cache" \
    --header "Content-Type: application/json" \
    --header "Authorization: Bearer $CF_PURGE_CACHE_TOKEN" \
    --post-file purge.json --post-file purge.json
    # Rate limit ourselves to 1 request per second
    sleep 1
    done
    # vi: set ts=2 sw=2 et ft=yaml: # vi: set ts=2 sw=2 et ft=yaml:
    0% Loading or .
    You are about to add 0 people to the discussion. Proceed with caution.
    Please to comment