From 341bac6cc21cf563306184c87d3d9f7516873ea9 Mon Sep 17 00:00:00 2001 From: Matt J Cloyd Date: Mon, 22 Apr 2024 09:50:14 -0400 Subject: [PATCH] Determine which pages to scan with pa11y via Jekyll Hook This work attempts to solve the problem of overly long CI runs, in which single PRs were taking 25+ minutes, because each PR scans every page with pa11y-ci. See #3752. This commit adds a Jekyll::Hook which keeps track of which destination files (built HTML files) should be checked with pa11y-ci for each PR. The files to check are based on which files changed in the last commit. The design of the code lets you swap out different "differs", which are the objects responsible for getting the list of files. This design makes the code easier to test, and makes it more adaptable. One problem I'm seeing so far is that we only check what files were changed *in the last commit*. This means that, if you're ever running CI locally, you'd have to commit your changes to have them picked up by the "differ". We should probably change this in a future commit, and have a "differ" for local development and a "differ" for CI. To summarize, the differs should differ. We also have yet to implement the case where a stylesheet or something site-wide changes. --- _plugins/pa11y.rb | 66 +++++++++++++++++++++++++++++++++++ spec/_plugins/pa11y_spec.rb | 68 +++++++++++++++++++++++++++++++++++++ 2 files changed, 134 insertions(+) create mode 100644 _plugins/pa11y.rb create mode 100644 spec/_plugins/pa11y_spec.rb diff --git a/_plugins/pa11y.rb b/_plugins/pa11y.rb new file mode 100644 index 000000000..1027dd188 --- /dev/null +++ b/_plugins/pa11y.rb @@ -0,0 +1,66 @@ +=begin + +Determines which files to check with pa11y-ci per pull request. + +The current scope of pa11y testing is as follows: + +- Check any posts or pages whose content has changed. +- Check any posts or pages using changed layouts. +- If stylesheets have changed, check a sample of all page types: + - Three posts of each layout in _config.yml's `defaults` + - The first, last, and middle blog archive pages (/blog/page/:num) + +Previously, checking every page resulted in excessive CI runtimes. + +=end + +# Wraps Jekyll documents with convenience methods. +# +# @param path [String] The relative path of the source file, usually a +# Markdown file (.md) +# @param layout [String] The name of the Jekyll document's layout +# @param differ [Object, #changed_files] The collaborator which lists +# the files that were changed in the last commit. +# @todo Are there cases when the document being checked has no layout? +Document = Struct.new(:path, :layout, :differ) do + # @return [Boolean] Whether to scan this document in pa11y-ci + def to_scan? + source_changed? || layout_changed? + end + + def source_changed? + differ.changed_files.include?(path) + end + + def layout_changed? + differ.changed_files.include?("_layouts/#{layout}.html") + end +end + +# Gets the list of files changed in the last commit +# @note Designed as a collaborator object to make Document easier +# to test +module GitDiffer + # @return [Array] List of relative paths of files that were + # changed in the last commit. + def self.changed_files + @changed_files ||= %x[ git diff-tree --no-commit-id --name-only -r $(git rev-parse HEAD) ] + .to_s + .split("\n") + .map(&:strip) + end +end + +# Outputs posts and pages to scan in file `pa11y_targets`. +# @todo Implement the stylesheet checker (third bullet above) +Jekyll::Hooks.register :documents, :post_render do |doc| + puts doc.relative_path if doc.relative_path.match?(/css/) + document = Document.new( + doc.relative_path, + doc.data["layout"], + GitDiffer + ) + if document.to_scan? + File.open('pa11y_targets', 'a') { |f| f.write doc.destination("") } + end +end diff --git a/spec/_plugins/pa11y_spec.rb b/spec/_plugins/pa11y_spec.rb new file mode 100644 index 000000000..d77499637 --- /dev/null +++ b/spec/_plugins/pa11y_spec.rb @@ -0,0 +1,68 @@ +require_relative '../../_plugins/pa11y' + +RSpec.describe Document do + + let(:blog_path) { "_posts/2024-04-10-working-with-oracle-databases.md" } + let(:blog_layout) { "post" } + let(:blog_layout_path) { "_layouts/post.html" } + + # Stand-in for file diffing (GitDiffer), so tests don't rely on + # git history. + class TestDiffer + attr_reader :changed_files + + def initialize(*paths) + @changed_files = *paths + end + end + + # @example Use the convenience method + # TestDiffer("_layouts/default.html", "_posts/2024-04-10-new-post.md") + def TestDiffer(*paths) + TestDiffer.new(*paths) + end + + describe "#to_scan?" do + context "source files" do + let(:document) { + Document.new(blog_path, blog_layout, test_differ) + } + + context "with a matching changed source file" do + let(:test_differ) { TestDiffer(blog_path) } + it "returns true" do + expect(document.to_scan?).to be true + end + end + + context "without a matching changed source file" do + let(:test_differ) { TestDiffer() } + it "returns false" do + expect(document.to_scan?).to be false + end + end + + context "with a matching changed layout file" do + let(:test_differ) { TestDiffer(blog_layout_path) } + it "returns true" do + expect(document.to_scan?).to be true + end + end + + context "without a matching changed layout file" do + let(:test_differ) { TestDiffer() } + it "returns false" do + expect(document.to_scan?).to be false + end + end + + context "with a matching changed source file and changed layout file" do + let(:test_differ) { TestDiffer(blog_path, blog_layout_path) } + it "returns true" do + expect(document.to_scan?).to be true + end + end + end + end + +end