From c208799af319b10c5d68871f937ea7000b57b08a Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Tue, 2 Jul 2024 16:49:53 -0400 Subject: [PATCH] test: add test coverage describing how pushparser handles empty docs The behavior is different between Java and C impls in a way that I don't care enough to fix. Let's document the difference and move on with our lives. --- lib/nokogiri/xml/sax/push_parser.rb | 3 +++ test/xml/sax/test_push_parser.rb | 33 ++++++++++++++++++++++++++--- 2 files changed, 33 insertions(+), 3 deletions(-) diff --git a/lib/nokogiri/xml/sax/push_parser.rb b/lib/nokogiri/xml/sax/push_parser.rb index 2b890a690a..5295b0cf3e 100644 --- a/lib/nokogiri/xml/sax/push_parser.rb +++ b/lib/nokogiri/xml/sax/push_parser.rb @@ -52,6 +52,9 @@ def write(chunk, last_chunk = false) ### # Finish the parsing. This method is only necessary for # Nokogiri::XML::SAX::Document#end_document to be called. + # + # ⚠ Note that empty documents are treated as an error when using the libxml2-based + # implementation (CRuby), but are fine when using the Xerces-based implementation (JRuby). def finish write("", true) end diff --git a/test/xml/sax/test_push_parser.rb b/test/xml/sax/test_push_parser.rb index aa7ce84e8e..a0dea3b99e 100644 --- a/test/xml/sax/test_push_parser.rb +++ b/test/xml/sax/test_push_parser.rb @@ -32,9 +32,36 @@ it :test_empty_doc do parser.options |= Nokogiri::XML::ParseOptions::RECOVER - parser.write("", true) - assert_nil parser.document.start_elements - assert_nil parser.document.end_elements + parser.finish + + assert_nil(parser.document.start_elements) + assert_nil(parser.document.end_elements) + if Nokogiri.jruby? + assert_empty(parser.document.errors) + elsif Nokogiri.uses_libxml?(">= 2.12.0") # gnome/libxml2@53050b1d + assert_match(/Document is empty/, parser.document.errors.first) + end + assert(parser.document.end_document_called) + end + + it :test_empty_doc_without_recovery do + # behavior is different between implementations + # https://github.com/sparklemotion/nokogiri/issues/1758 + if Nokogiri.jruby? + parser.finish + + assert_nil(parser.document.start_elements) + assert_nil(parser.document.end_elements) + assert_empty(parser.document.errors) + assert(parser.document.end_document_called) + else + e = assert_raises(Nokogiri::XML::SyntaxError) do + parser.finish + end + if Nokogiri.uses_libxml?(">= 2.12.0") # gnome/libxml2@53050b1d + assert_match(/Document is empty/, e.message) + end + end end it :test_finish_should_rethrow_last_error do