Sign Up
Log In
Log In
or
Sign Up
Places
All Projects
Status Monitor
Collapse sidebar
SUSE:SLE-15-SP1:GA
ruby2.5
rexml-test.patch
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
File rexml-test.patch of Package ruby2.5
diff --git a/lib/rexml.rb b/lib/rexml.rb new file mode 100644 index 0000000000..eee246e436 --- /dev/null +++ b/lib/rexml.rb @@ -0,0 +1,3 @@ +# frozen_string_literal: true + +require_relative "rexml/document" diff --git a/lib/rexml/attlistdecl.rb b/lib/rexml/attlistdecl.rb index dc1d2add0b..44a91d66d6 100644 --- a/lib/rexml/attlistdecl.rb +++ b/lib/rexml/attlistdecl.rb @@ -1,7 +1,7 @@ # frozen_string_literal: false #vim:ts=2 sw=2 noexpandtab: -require 'rexml/child' -require 'rexml/source' +require_relative 'child' +require_relative 'source' module REXML # This class needs: diff --git a/lib/rexml/attribute.rb b/lib/rexml/attribute.rb index ca5984e178..fe48745ccf 100644 --- a/lib/rexml/attribute.rb +++ b/lib/rexml/attribute.rb @@ -1,6 +1,6 @@ -# frozen_string_literal: false -require "rexml/namespace" -require 'rexml/text' +# frozen_string_literal: true +require_relative "namespace" +require_relative 'text' module REXML # Defines an Element Attribute; IE, a attribute=value pair, as in: @@ -13,9 +13,6 @@ class Attribute # The element to which this attribute belongs attr_reader :element - # The normalized value of this attribute. That is, the attribute with - # entities intact. - attr_writer :normalized PATTERN = /\s*(#{NAME_STR})\s*=\s*(["'])(.*?)\2/um NEEDS_A_SECOND_CHECK = /(<|&((#{Entity::NAME});|(#0*((?:\d+)|(?:x[a-fA-F0-9]+)));)?)/um @@ -67,15 +64,11 @@ def initialize( first, second=nil, parent=nil ) # e.add_attribute( "nsa:a", "aval" ) # e.add_attribute( "b", "bval" ) # e.attributes.get_attribute( "a" ).prefix # -> "nsa" - # e.attributes.get_attribute( "b" ).prefix # -> "elns" + # e.attributes.get_attribute( "b" ).prefix # -> "" # a = Attribute.new( "x", "y" ) # a.prefix # -> "" def prefix - pf = super - if pf == "" - pf = @element.prefix if @element - end - pf + super end # Returns the namespace URL, if defined, or nil otherwise @@ -86,9 +79,26 @@ def prefix # e.add_attribute("nsx:a", "c") # e.attribute("ns:a").namespace # => "http://url" # e.attribute("nsx:a").namespace # => nil + # + # This method always returns "" for no namespace attribute. Because + # the default namespace doesn't apply to attribute names. + # + # From https://www.w3.org/TR/xml-names/#uniqAttrs + # + # > the default namespace does not apply to attribute names + # + # e = REXML::Element.new("el") + # e.add_namespace("", "http://example.com/") + # e.namespace # => "http://example.com/" + # e.add_attribute("a", "b") + # e.attribute("a").namespace # => "" def namespace arg=nil arg = prefix if arg.nil? - @element.namespace arg + if arg == "" + "" + else + @element.namespace(arg) + end end # Returns true if other is an Attribute and has the same name and value, @@ -109,10 +119,13 @@ def hash # b = Attribute.new( "ns:x", "y" ) # b.to_string # -> "ns:x='y'" def to_string + value = to_s if @element and @element.context and @element.context[:attribute_quote] == :quote - %Q^#@expanded_name="#{to_s().gsub(/"/, '"')}"^ + value = value.gsub('"', '"') if value.include?('"') + %Q^#@expanded_name="#{value}"^ else - "#@expanded_name='#{to_s().gsub(/'/, ''')}'" + value = value.gsub("'", ''') if value.include?("'") + "#@expanded_name='#{value}'" end end @@ -128,7 +141,6 @@ def to_s return @normalized if @normalized @normalized = Text::normalize( @unnormalized, doctype ) - @unnormalized = nil @normalized end @@ -136,9 +148,16 @@ def to_s # have been expanded to their values def value return @unnormalized if @unnormalized - @unnormalized = Text::unnormalize( @normalized, doctype ) - @normalized = nil - @unnormalized + + @unnormalized = Text::unnormalize(@normalized, doctype, + entity_expansion_text_limit: @element&.document&.entity_expansion_text_limit) + end + + # The normalized value of this attribute. That is, the attribute with + # entities intact. + def normalized=(new_normalized) + @normalized = new_normalized + @unnormalized = nil end # Returns a copy of this attribute @@ -177,7 +196,7 @@ def node_type end def inspect - rv = "" + rv = +"" write( rv ) rv end diff --git a/lib/rexml/cdata.rb b/lib/rexml/cdata.rb index 2238446dc4..997f5a08db 100644 --- a/lib/rexml/cdata.rb +++ b/lib/rexml/cdata.rb @@ -1,5 +1,5 @@ # frozen_string_literal: false -require "rexml/text" +require_relative "text" module REXML class CData < Text diff --git a/lib/rexml/child.rb b/lib/rexml/child.rb index d23451e71e..cc6e9a4719 100644 --- a/lib/rexml/child.rb +++ b/lib/rexml/child.rb @@ -1,5 +1,5 @@ # frozen_string_literal: false -require "rexml/node" +require_relative "node" module REXML ## diff --git a/lib/rexml/comment.rb b/lib/rexml/comment.rb index 822fe0d586..52c58b46f6 100644 --- a/lib/rexml/comment.rb +++ b/lib/rexml/comment.rb @@ -1,5 +1,5 @@ # frozen_string_literal: false -require "rexml/child" +require_relative "child" module REXML ## diff --git a/lib/rexml/doctype.rb b/lib/rexml/doctype.rb index cb9bf57406..f35904845e 100644 --- a/lib/rexml/doctype.rb +++ b/lib/rexml/doctype.rb @@ -1,20 +1,25 @@ # frozen_string_literal: false -require "rexml/parent" -require "rexml/parseexception" -require "rexml/namespace" -require 'rexml/entity' -require 'rexml/attlistdecl' -require 'rexml/xmltokens' +require_relative "parent" +require_relative "parseexception" +require_relative "namespace" +require_relative 'entity' +require_relative 'attlistdecl' +require_relative 'xmltokens' module REXML class ReferenceWriter def initialize(id_type, public_id_literal, - system_literal) + system_literal, + context=nil) @id_type = id_type @public_id_literal = public_id_literal @system_literal = system_literal - @default_quote = "\"" + if context and context[:prologue_quote] == :apostrophe + @default_quote = "'" + else + @default_quote = "\"" + end end def write(output) @@ -150,7 +155,8 @@ def write( output, indent=0, transitive=false, ie_hack=false ) if @external_id reference_writer = ReferenceWriter.new(@external_id, @long_name, - @uri) + @uri, + context) reference_writer.write(output) end unless @children.empty? @@ -165,7 +171,11 @@ def write( output, indent=0, transitive=false, ie_hack=false ) end def context - @parent.context + if @parent + @parent.context + else + nil + end end def entity( name ) @@ -187,7 +197,7 @@ def public when "SYSTEM" nil when "PUBLIC" - strip_quotes(@long_name) + @long_name end end @@ -197,9 +207,9 @@ def public def system case @external_id when "SYSTEM" - strip_quotes(@long_name) + @long_name when "PUBLIC" - @uri.kind_of?(String) ? strip_quotes(@uri) : nil + @uri.kind_of?(String) ? @uri : nil end end @@ -221,15 +231,6 @@ def notation(name) notation_decl.name == name } end - - private - - # Method contributed by Henrik Martensson - def strip_quotes(quoted_string) - quoted_string =~ /^[\'\"].*[\'\"]$/ ? - quoted_string[1, quoted_string.length-2] : - quoted_string - end end # We don't really handle any of these since we're not a validating @@ -287,8 +288,10 @@ def initialize name, middle, pub, sys end def to_s + context = nil + context = parent.context if parent notation = "<!NOTATION #{@name}" - reference_writer = ReferenceWriter.new(@middle, @public, @system) + reference_writer = ReferenceWriter.new(@middle, @public, @system, context) reference_writer.write(notation) notation << ">" notation diff --git a/lib/rexml/document.rb b/lib/rexml/document.rb index 806bc499cd..d1747dd42f 100644 --- a/lib/rexml/document.rb +++ b/lib/rexml/document.rb @@ -1,40 +1,98 @@ # frozen_string_literal: false -require "rexml/security" -require "rexml/element" -require "rexml/xmldecl" -require "rexml/source" -require "rexml/comment" -require "rexml/doctype" -require "rexml/instruction" -require "rexml/rexml" -require "rexml/parseexception" -require "rexml/output" -require "rexml/parsers/baseparser" -require "rexml/parsers/streamparser" -require "rexml/parsers/treeparser" +require_relative "security" +require_relative "element" +require_relative "xmldecl" +require_relative "source" +require_relative "comment" +require_relative "doctype" +require_relative "instruction" +require_relative "rexml" +require_relative "parseexception" +require_relative "output" +require_relative "parsers/baseparser" +require_relative "parsers/streamparser" +require_relative "parsers/treeparser" module REXML - # Represents a full XML document, including PIs, a doctype, etc. A - # Document has a single child that can be accessed by root(). - # Note that if you want to have an XML declaration written for a document - # you create, you must add one; REXML documents do not write a default - # declaration for you. See |DECLARATION| and |write|. + # Represents an XML document. + # + # A document may have: + # + # - A single child that may be accessed via method #root. + # - An XML declaration. + # - A document type. + # - Processing instructions. + # + # == In a Hurry? + # + # If you're somewhat familiar with XML + # and have a particular task in mind, + # you may want to see the + # {tasks pages}[../doc/rexml/tasks/tocs/master_toc_rdoc.html], + # and in particular, the + # {tasks page for documents}[../doc/rexml/tasks/tocs/document_toc_rdoc.html]. + # class Document < Element - # A convenient default XML declaration. If you want an XML declaration, - # the easiest way to add one is mydoc << Document::DECLARATION - # +DEPRECATED+ - # Use: mydoc << XMLDecl.default + # A convenient default XML declaration. Use: + # + # mydoc << XMLDecl.default + # DECLARATION = XMLDecl.default - # Constructor - # @param source if supplied, must be a Document, String, or IO. - # Documents have their context and Element attributes cloned. - # Strings are expected to be valid XML documents. IOs are expected - # to be sources of valid XML documents. - # @param context if supplied, contains the context of the document; - # this should be a Hash. + # :call-seq: + # new(string = nil, context = {}) -> new_document + # new(io_stream = nil, context = {}) -> new_document + # new(document = nil, context = {}) -> new_document + # + # Returns a new \REXML::Document object. + # + # When no arguments are given, + # returns an empty document: + # + # d = REXML::Document.new + # d.to_s # => "" + # + # When argument +string+ is given, it must be a string + # containing a valid XML document: + # + # xml_string = '<root><foo>Foo</foo><bar>Bar</bar></root>' + # d = REXML::Document.new(xml_string) + # d.to_s # => "<root><foo>Foo</foo><bar>Bar</bar></root>" + # + # When argument +io_stream+ is given, it must be an \IO object + # that is opened for reading, and when read must return a valid XML document: + # + # File.write('t.xml', xml_string) + # d = File.open('t.xml', 'r') do |io| + # REXML::Document.new(io) + # end + # d.to_s # => "<root><foo>Foo</foo><bar>Bar</bar></root>" + # + # When argument +document+ is given, it must be an existing + # document object, whose context and attributes (but not children) + # are cloned into the new document: + # + # d = REXML::Document.new(xml_string) + # d.children # => [<root> ... </>] + # d.context = {raw: :all, compress_whitespace: :all} + # d.add_attributes({'bar' => 0, 'baz' => 1}) + # d1 = REXML::Document.new(d) + # d1.children # => [] + # d1.context # => {:raw=>:all, :compress_whitespace=>:all} + # d1.attributes # => {"bar"=>bar='0', "baz"=>baz='1'} + # + # When argument +context+ is given, it must be a hash + # containing context entries for the document; + # see {Element Context}[../doc/rexml/context_rdoc.html]: + # + # context = {raw: :all, compress_whitespace: :all} + # d = REXML::Document.new(xml_string, context) + # d.context # => {:raw=>:all, :compress_whitespace=>:all} + # def initialize( source = nil, context = {} ) @entity_expansion_count = 0 + @entity_expansion_limit = Security.entity_expansion_limit + @entity_expansion_text_limit = Security.entity_expansion_text_limit super() @context = context return if source.nil? @@ -46,26 +104,71 @@ def initialize( source = nil, context = {} ) end end + # :call-seq: + # node_type -> :document + # + # Returns the symbol +:document+. + # def node_type :document end - # Should be obvious + # :call-seq: + # clone -> new_document + # + # Returns the new document resulting from executing + # <tt>Document.new(self)</tt>. See Document.new. + # def clone Document.new self end - # According to the XML spec, a root node has no expanded name + # :call-seq: + # expanded_name -> empty_string + # + # Returns an empty string. + # def expanded_name '' #d = doc_type #d ? d.name : "UNDEFINED" end - alias :name :expanded_name - # We override this, because XMLDecls and DocTypes must go at the start - # of the document + # :call-seq: + # add(xml_decl) -> self + # add(doc_type) -> self + # add(object) -> self + # + # Adds an object to the document; returns +self+. + # + # When argument +xml_decl+ is given, + # it must be an REXML::XMLDecl object, + # which becomes the XML declaration for the document, + # replacing the previous XML declaration if any: + # + # d = REXML::Document.new + # d.xml_decl.to_s # => "" + # d.add(REXML::XMLDecl.new('2.0')) + # d.xml_decl.to_s # => "<?xml version='2.0'?>" + # + # When argument +doc_type+ is given, + # it must be an REXML::DocType object, + # which becomes the document type for the document, + # replacing the previous document type, if any: + # + # d = REXML::Document.new + # d.doctype.to_s # => "" + # d.add(REXML::DocType.new('foo')) + # d.doctype.to_s # => "<!DOCTYPE foo>" + # + # When argument +object+ (not an REXML::XMLDecl or REXML::DocType object) + # is given it is added as the last child: + # + # d = REXML::Document.new + # d.add(REXML::Element.new('foo')) + # d.to_s # => "<foo/>" + # def add( child ) if child.kind_of? XMLDecl if @children[0].kind_of? XMLDecl @@ -99,49 +202,108 @@ def add( child ) end alias :<< :add + # :call-seq: + # add_element(name_or_element = nil, attributes = nil) -> new_element + # + # Adds an element to the document by calling REXML::Element.add_element: + # + # REXML::Element.add_element(name_or_element, attributes) def add_element(arg=nil, arg2=nil) rv = super raise "attempted adding second root element to document" if @elements.size > 1 rv end - # @return the root Element of the document, or nil if this document - # has no children. + # :call-seq: + # root -> root_element or nil + # + # Returns the root element of the document, if it exists, otherwise +nil+: + # + # d = REXML::Document.new('<root></root>') + # d.root # => <root/> + # d = REXML::Document.new('') + # d.root # => nil + # def root elements[1] #self #@children.find { |item| item.kind_of? Element } end - # @return the DocType child of the document, if one exists, - # and nil otherwise. + # :call-seq: + # doctype -> doc_type or nil + # + # Returns the DocType object for the document, if it exists, otherwise +nil+: + # + # d = REXML::Document.new('<!DOCTYPE document SYSTEM "subjects.dtd">') + # d.doctype.class # => REXML::DocType + # d = REXML::Document.new('') + # d.doctype.class # => nil + # def doctype @children.find { |item| item.kind_of? DocType } end - # @return the XMLDecl of this document; if no XMLDecl has been - # set, the default declaration is returned. + # :call-seq: + # xml_decl -> xml_decl + # + # Returns the XMLDecl object for the document, if it exists, + # otherwise the default XMLDecl object: + # + # d = REXML::Document.new('<?xml version="1.0" encoding="UTF-8"?>') + # d.xml_decl.class # => REXML::XMLDecl + # d.xml_decl.to_s # => "<?xml version='1.0' encoding='UTF-8'?>" + # d = REXML::Document.new('') + # d.xml_decl.class # => REXML::XMLDecl + # d.xml_decl.to_s # => "" + # def xml_decl rv = @children[0] return rv if rv.kind_of? XMLDecl @children.unshift(XMLDecl.default)[0] end - # @return the XMLDecl version of this document as a String. - # If no XMLDecl has been set, returns the default version. + # :call-seq: + # version -> version_string + # + # Returns the XMLDecl version of this document as a string, + # if it has been set, otherwise the default version: + # + # d = REXML::Document.new('<?xml version="2.0" encoding="UTF-8"?>') + # d.version # => "2.0" + # d = REXML::Document.new('') + # d.version # => "1.0" + # def version xml_decl().version end - # @return the XMLDecl encoding of this document as an - # Encoding object. - # If no XMLDecl has been set, returns the default encoding. + # :call-seq: + # encoding -> encoding_string + # + # Returns the XMLDecl encoding of the document, + # if it has been set, otherwise the default encoding: + # + # d = REXML::Document.new('<?xml version="1.0" encoding="UTF-16"?>') + # d.encoding # => "UTF-16" + # d = REXML::Document.new('') + # d.encoding # => "UTF-8" + # def encoding xml_decl().encoding end - # @return the XMLDecl standalone value of this document as a String. - # If no XMLDecl has been set, returns the default setting. + # :call-seq: + # stand_alone? + # + # Returns the XMLDecl standalone value of the document as a string, + # if it has been set, otherwise the default standalone value: + # + # d = REXML::Document.new('<?xml standalone="yes"?>') + # d.stand_alone? # => "yes" + # d = REXML::Document.new('') + # d.stand_alone? # => nil + # def stand_alone? xml_decl().stand_alone? end @@ -226,7 +388,7 @@ def write(*arguments) end formatter = if indent > -1 if transitive - require "rexml/formatters/transitive" + require_relative "formatters/transitive" REXML::Formatters::Transitive.new( indent, ie_hack ) else REXML::Formatters::Pretty.new( indent, ie_hack ) @@ -271,10 +433,12 @@ def Document::entity_expansion_text_limit end attr_reader :entity_expansion_count + attr_writer :entity_expansion_limit + attr_accessor :entity_expansion_text_limit def record_entity_expansion @entity_expansion_count += 1 - if @entity_expansion_count > Security.entity_expansion_limit + if @entity_expansion_count > @entity_expansion_limit raise "number of entity expansions exceeded, processing aborted." end end diff --git a/lib/rexml/dtd/attlistdecl.rb b/lib/rexml/dtd/attlistdecl.rb index 32847daadb..1326cb21e4 100644 --- a/lib/rexml/dtd/attlistdecl.rb +++ b/lib/rexml/dtd/attlistdecl.rb @@ -1,5 +1,5 @@ # frozen_string_literal: false -require "rexml/child" +require_relative "../child" module REXML module DTD class AttlistDecl < Child diff --git a/lib/rexml/dtd/dtd.rb b/lib/rexml/dtd/dtd.rb index 927d5d847b..8b0f2d753a 100644 --- a/lib/rexml/dtd/dtd.rb +++ b/lib/rexml/dtd/dtd.rb @@ -1,10 +1,10 @@ # frozen_string_literal: false -require "rexml/dtd/elementdecl" -require "rexml/dtd/entitydecl" -require "rexml/comment" -require "rexml/dtd/notationdecl" -require "rexml/dtd/attlistdecl" -require "rexml/parent" +require_relative "elementdecl" +require_relative "entitydecl" +require_relative "../comment" +require_relative "notationdecl" +require_relative "attlistdecl" +require_relative "../parent" module REXML module DTD diff --git a/lib/rexml/dtd/elementdecl.rb b/lib/rexml/dtd/elementdecl.rb index 119fd41a8f..20ed023244 100644 --- a/lib/rexml/dtd/elementdecl.rb +++ b/lib/rexml/dtd/elementdecl.rb @@ -1,5 +1,5 @@ # frozen_string_literal: false -require "rexml/child" +require_relative "../child" module REXML module DTD class ElementDecl < Child diff --git a/lib/rexml/dtd/entitydecl.rb b/lib/rexml/dtd/entitydecl.rb index 45707e2f42..312df655ff 100644 --- a/lib/rexml/dtd/entitydecl.rb +++ b/lib/rexml/dtd/entitydecl.rb @@ -1,5 +1,5 @@ # frozen_string_literal: false -require "rexml/child" +require_relative "../child" module REXML module DTD class EntityDecl < Child diff --git a/lib/rexml/dtd/notationdecl.rb b/lib/rexml/dtd/notationdecl.rb index cfdf0b9b74..04a9b08aa7 100644 --- a/lib/rexml/dtd/notationdecl.rb +++ b/lib/rexml/dtd/notationdecl.rb @@ -1,5 +1,5 @@ # frozen_string_literal: false -require "rexml/child" +require_relative "../child" module REXML module DTD class NotationDecl < Child diff --git a/lib/rexml/element.rb b/lib/rexml/element.rb index ac9b10872c..4e3a60b9a2 100644 --- a/lib/rexml/element.rb +++ b/lib/rexml/element.rb @@ -1,23 +1,273 @@ # frozen_string_literal: false -require "rexml/parent" -require "rexml/namespace" -require "rexml/attribute" -require "rexml/cdata" -require "rexml/xpath" -require "rexml/parseexception" +require_relative "parent" +require_relative "namespace" +require_relative "attribute" +require_relative "cdata" +require_relative "xpath" +require_relative "parseexception" module REXML - # An implementation note about namespaces: - # As we parse, when we find namespaces we put them in a hash and assign - # them a unique ID. We then convert the namespace prefix for the node - # to the unique ID. This makes namespace lookup much faster for the - # cost of extra memory use. We save the namespace prefix for the - # context node and convert it back when we write it. - @@namespaces = {} - - # Represents a tagged XML element. Elements are characterized by - # having children, attributes, and names, and can themselves be - # children. + # An \REXML::Element object represents an XML element. + # + # An element: + # + # - Has a name (string). + # - May have a parent (another element). + # - Has zero or more children + # (other elements, text, CDATA, processing instructions, and comments). + # - Has zero or more siblings + # (other elements, text, CDATA, processing instructions, and comments). + # - Has zero or more named attributes. + # + # == In a Hurry? + # + # If you're somewhat familiar with XML + # and have a particular task in mind, + # you may want to see the + # {tasks pages}[../doc/rexml/tasks/tocs/master_toc_rdoc.html], + # and in particular, the + # {tasks page for elements}[../doc/rexml/tasks/tocs/element_toc_rdoc.html]. + # + # === Name + # + # An element has a name, which is initially set when the element is created: + # + # e = REXML::Element.new('foo') + # e.name # => "foo" + # + # The name may be changed: + # + # e.name = 'bar' + # e.name # => "bar" + # + # + # === \Parent + # + # An element may have a parent. + # + # Its parent may be assigned explicitly when the element is created: + # + # e0 = REXML::Element.new('foo') + # e1 = REXML::Element.new('bar', e0) + # e1.parent # => <foo> ... </> + # + # Note: the representation of an element always shows the element's name. + # If the element has children, the representation indicates that + # by including an ellipsis (<tt>...</tt>). + # + # The parent may be assigned explicitly at any time: + # + # e2 = REXML::Element.new('baz') + # e1.parent = e2 + # e1.parent # => <baz/> + # + # When an element is added as a child, its parent is set automatically: + # + # e1.add_element(e0) + # e0.parent # => <bar> ... </> + # + # For an element that has no parent, method +parent+ returns +nil+. + # + # === Children + # + # An element has zero or more children. + # The children are an ordered collection + # of all objects whose parent is the element itself. + # + # The children may include any combination of elements, text, comments, + # processing instructions, and CDATA. + # (This example keeps things clean by controlling whitespace + # via a +context+ setting.) + # + # xml_string = <<-EOT + # <root> + # <ele_0/> + # text 0 + # <!--comment 0--> + # <?target_0 pi_0?> + # <![CDATA[cdata 0]]> + # <ele_1/> + # text 1 + # <!--comment 1--> + # <?target_0 pi_1?> + # <![CDATA[cdata 1]]> + # </root> + # EOT + # context = {ignore_whitespace_nodes: :all, compress_whitespace: :all} + # d = REXML::Document.new(xml_string, context) + # root = d.root + # root.children.size # => 10 + # root.each {|child| p "#{child.class}: #{child}" } + # + # Output: + # + # "REXML::Element: <ele_0/>" + # "REXML::Text: \n text 0\n " + # "REXML::Comment: comment 0" + # "REXML::Instruction: <?target_0 pi_0?>" + # "REXML::CData: cdata 0" + # "REXML::Element: <ele_1/>" + # "REXML::Text: \n text 1\n " + # "REXML::Comment: comment 1" + # "REXML::Instruction: <?target_0 pi_1?>" + # "REXML::CData: cdata 1" + # + # A child may be added using inherited methods + # Parent#insert_before or Parent#insert_after: + # + # xml_string = '<root><a/><c/><d/></root>' + # d = REXML::Document.new(xml_string) + # root = d.root + # c = d.root[1] # => <c/> + # root.insert_before(c, REXML::Element.new('b')) + # root.to_a # => [<a/>, <b/>, <c/>, <d/>] + # + # A child may be replaced using Parent#replace_child: + # + # root.replace_child(c, REXML::Element.new('x')) + # root.to_a # => [<a/>, <b/>, <x/>, <d/>] + # + # A child may be removed using Parent#delete: + # + # x = root[2] # => <x/> + # root.delete(x) + # root.to_a # => [<a/>, <b/>, <d/>] + # + # === Siblings + # + # An element has zero or more siblings, + # which are the other children of the element's parent. + # + # In the example above, element +ele_1+ is between a CDATA sibling + # and a text sibling: + # + # ele_1 = root[5] # => <ele_1/> + # ele_1.previous_sibling # => "cdata 0" + # ele_1.next_sibling # => "\n text 1\n " + # + # === \Attributes + # + # An element has zero or more named attributes. + # + # A new element has no attributes: + # + # e = REXML::Element.new('foo') + # e.attributes # => {} + # + # Attributes may be added: + # + # e.add_attribute('bar', 'baz') + # e.add_attribute('bat', 'bam') + # e.attributes.size # => 2 + # e['bar'] # => "baz" + # e['bat'] # => "bam" + # + # An existing attribute may be modified: + # + # e.add_attribute('bar', 'bad') + # e.attributes.size # => 2 + # e['bar'] # => "bad" + # + # An existing attribute may be deleted: + # + # e.delete_attribute('bar') + # e.attributes.size # => 1 + # e['bar'] # => nil + # + # == What's Here + # + # To begin with, what's elsewhere? + # + # \Class \REXML::Element inherits from its ancestor classes: + # + # - REXML::Child + # - REXML::Parent + # + # \REXML::Element itself and its ancestors also include modules: + # + # - {Enumerable}[https://docs.ruby-lang.org/en/master/Enumerable.html] + # - REXML::Namespace + # - REXML::Node + # - REXML::XMLTokens + # + # === Methods for Creating an \Element + # + # ::new:: Returns a new empty element. + # #clone:: Returns a clone of another element. + # + # === Methods for Attributes + # + # {[attribute_name]}[#method-i-5B-5D]:: Returns an attribute value. + # #add_attribute:: Adds a new attribute. + # #add_attributes:: Adds multiple new attributes. + # #attribute:: Returns the attribute value for a given name and optional namespace. + # #delete_attribute:: Removes an attribute. + # + # === Methods for Children + # + # {[index]}[#method-i-5B-5D]:: Returns the child at the given offset. + # #add_element:: Adds an element as the last child. + # #delete_element:: Deletes a child element. + # #each_element:: Calls the given block with each child element. + # #each_element_with_attribute:: Calls the given block with each child element + # that meets given criteria, + # which can include the attribute name. + # #each_element_with_text:: Calls the given block with each child element + # that meets given criteria, + # which can include text. + # #get_elements:: Returns an array of element children that match a given xpath. + # + # === Methods for \Text Children + # + # #add_text:: Adds a text node to the element. + # #get_text:: Returns a text node that meets specified criteria. + # #text:: Returns the text string from the first node that meets specified criteria. + # #texts:: Returns an array of the text children of the element. + # #text=:: Adds, removes, or replaces the first text child of the element + # + # === Methods for Other Children + # + # #cdatas:: Returns an array of the cdata children of the element. + # #comments:: Returns an array of the comment children of the element. + # #instructions:: Returns an array of the instruction children of the element. + # + # === Methods for Namespaces + # + # #add_namespace:: Adds a namespace to the element. + # #delete_namespace:: Removes a namespace from the element. + # #namespace:: Returns the string namespace URI for the element. + # #namespaces:: Returns a hash of all defined namespaces in the element. + # #prefixes:: Returns an array of the string prefixes (names) + # of all defined namespaces in the element + # + # === Methods for Querying + # + # #document:: Returns the document, if any, that the element belongs to. + # #root:: Returns the most distant element (not document) ancestor of the element. + # #root_node:: Returns the most distant ancestor of the element. + # #xpath:: Returns the string xpath to the element + # relative to the most distant parent + # #has_attributes?:: Returns whether the element has attributes. + # #has_elements?:: Returns whether the element has elements. + # #has_text?:: Returns whether the element has text. + # #next_element:: Returns the next sibling that is an element. + # #previous_element:: Returns the previous sibling that is an element. + # #raw:: Returns whether raw mode is set for the element. + # #whitespace:: Returns whether whitespace is respected for the element. + # #ignore_whitespace_nodes:: Returns whether whitespace nodes + # are to be ignored for the element. + # #node_type:: Returns symbol <tt>:element</tt>. + # + # === One More Method + # + # #inspect:: Returns a string representation of the element. + # + # === Accessors + # + # #elements:: Returns the REXML::Elements object for the element. + # #attributes:: Returns the REXML::Attributes object for the element. + # #context:: Returns or sets the context hash for the element. + # class Element < Parent include Namespace @@ -30,32 +280,42 @@ class Element < Parent # whitespace handling. attr_accessor :context - # Constructor - # arg:: - # if not supplied, will be set to the default value. - # If a String, the name of this object will be set to the argument. - # If an Element, the object will be shallowly cloned; name, - # attributes, and namespaces will be copied. Children will +not+ be - # copied. - # parent:: - # if supplied, must be a Parent, and will be used as - # the parent of this object. - # context:: - # If supplied, must be a hash containing context items. Context items - # include: - # * <tt>:respect_whitespace</tt> the value of this is :+all+ or an array of - # strings being the names of the elements to respect - # whitespace for. Defaults to :+all+. - # * <tt>:compress_whitespace</tt> the value can be :+all+ or an array of - # strings being the names of the elements to ignore whitespace on. - # Overrides :+respect_whitespace+. - # * <tt>:ignore_whitespace_nodes</tt> the value can be :+all+ or an array - # of strings being the names of the elements in which to ignore - # whitespace-only nodes. If this is set, Text nodes which contain only - # whitespace will not be added to the document tree. - # * <tt>:raw</tt> can be :+all+, or an array of strings being the names of - # the elements to process in raw mode. In raw mode, special - # characters in text is not converted to or from entities. + # :call-seq: + # Element.new(name = 'UNDEFINED', parent = nil, context = nil) -> new_element + # Element.new(element, parent = nil, context = nil) -> new_element + # + # Returns a new \REXML::Element object. + # + # When no arguments are given, + # returns an element with name <tt>'UNDEFINED'</tt>: + # + # e = REXML::Element.new # => <UNDEFINED/> + # e.class # => REXML::Element + # e.name # => "UNDEFINED" + # + # When only argument +name+ is given, + # returns an element of the given name: + # + # REXML::Element.new('foo') # => <foo/> + # + # When only argument +element+ is given, it must be an \REXML::Element object; + # returns a shallow copy of the given element: + # + # e0 = REXML::Element.new('foo') + # e1 = REXML::Element.new(e0) # => <foo/> + # + # When argument +parent+ is also given, it must be an REXML::Parent object: + # + # e = REXML::Element.new('foo', REXML::Parent.new) + # e.parent # => #<REXML::Parent @parent=nil, @children=[<foo/>]> + # + # When argument +context+ is also given, it must be a hash + # representing the context for the element; + # see {Element Context}[../doc/rexml/context_rdoc.html]: + # + # e = REXML::Element.new('foo', nil, {raw: :all}) + # e.context # => {:raw=>:all} + # def initialize( arg = UNDEFINED, parent=nil, context=nil ) super(parent) @@ -74,6 +334,27 @@ def initialize( arg = UNDEFINED, parent=nil, context=nil ) end end + # :call-seq: + # inspect -> string + # + # Returns a string representation of the element. + # + # For an element with no attributes and no children, shows the element name: + # + # REXML::Element.new.inspect # => "<UNDEFINED/>" + # + # Shows attributes, if any: + # + # e = REXML::Element.new('foo') + # e.add_attributes({'bar' => 0, 'baz' => 1}) + # e.inspect # => "<foo bar='0' baz='1'/>" + # + # Shows an ellipsis (<tt>...</tt>), if there are child elements: + # + # e.add_element(REXML::Element.new('bar')) + # e.add_element(REXML::Element.new('baz')) + # e.inspect # => "<foo bar='0' baz='1'> ... </>" + # def inspect rv = "<#@expanded_name" @@ -89,60 +370,123 @@ def inspect end end - - # Creates a shallow copy of self. - # d = Document.new "<a><b/><b/><c><d/></c></a>" - # new_a = d.root.clone - # puts new_a # => "<a/>" + # :call-seq: + # clone -> new_element + # + # Returns a shallow copy of the element, containing the name and attributes, + # but not the parent or children: + # + # e = REXML::Element.new('foo') + # e.add_attributes({'bar' => 0, 'baz' => 1}) + # e.clone # => <foo bar='0' baz='1'/> + # def clone self.class.new self end - # Evaluates to the root node of the document that this element - # belongs to. If this element doesn't belong to a document, but does - # belong to another Element, the parent's root will be returned, until the - # earliest ancestor is found. - # - # Note that this is not the same as the document element. - # In the following example, <a> is the document element, and the root - # node is the parent node of the document element. You may ask yourself - # why the root node is useful: consider the doctype and XML declaration, - # and any processing instructions before the document element... they - # are children of the root node, or siblings of the document element. - # The only time this isn't true is when an Element is created that is - # not part of any Document. In this case, the ancestor that has no - # parent acts as the root node. - # d = Document.new '<a><b><c/></b></a>' - # a = d[1] ; c = a[1][1] - # d.root_node == d # TRUE - # a.root_node # namely, d - # c.root_node # again, d + # :call-seq: + # root_node -> document or element + # + # Returns the most distant ancestor of +self+. + # + # When the element is part of a document, + # returns the root node of the document. + # Note that the root node is different from the document element; + # in this example +a+ is document element and the root node is its parent: + # + # d = REXML::Document.new('<a><b><c/></b></a>') + # top_element = d.first # => <a> ... </> + # child = top_element.first # => <b> ... </> + # d.root_node == d # => true + # top_element.root_node == d # => true + # child.root_node == d # => true + # + # When the element is not part of a document, but does have ancestor elements, + # returns the most distant ancestor element: + # + # e0 = REXML::Element.new('foo') + # e1 = REXML::Element.new('bar') + # e1.parent = e0 + # e2 = REXML::Element.new('baz') + # e2.parent = e1 + # e2.root_node == e0 # => true + # + # When the element has no ancestor elements, + # returns +self+: + # + # e = REXML::Element.new('foo') + # e.root_node == e # => true + # + # Related: #root, #document. + # def root_node parent.nil? ? self : parent.root_node end + # :call-seq: + # root -> element + # + # Returns the most distant _element_ (not document) ancestor of the element: + # + # d = REXML::Document.new('<a><b><c/></b></a>') + # top_element = d.first + # child = top_element.first + # top_element.root == top_element # => true + # child.root == top_element # => true + # + # For a document, returns the topmost element: + # + # d.root == top_element # => true + # + # Related: #root_node, #document. + # def root - return elements[1] if self.kind_of? Document - return self if parent.kind_of? Document or parent.nil? - return parent.root + target = self + while target + return target.elements[1] if target.kind_of? Document + parent = target.parent + return target if parent.kind_of? Document or parent.nil? + target = parent + end + nil end - # Evaluates to the document to which this element belongs, or nil if this - # element doesn't belong to a document. + # :call-seq: + # document -> document or nil + # + # If the element is part of a document, returns that document: + # + # d = REXML::Document.new('<a><b><c/></b></a>') + # top_element = d.first + # child = top_element.first + # top_element.document == d # => true + # child.document == d # => true + # + # If the element is not part of a document, returns +nil+: + # + # REXML::Element.new.document # => nil + # + # For a document, returns +self+: + # + # d.document == d # => true + # + # Related: #root, #root_node. + # def document rt = root rt.parent if rt end - # Evaluates to +true+ if whitespace is respected for this element. This - # is the case if: - # 1. Neither :+respect_whitespace+ nor :+compress_whitespace+ has any value - # 2. The context has :+respect_whitespace+ set to :+all+ or - # an array containing the name of this element, and - # :+compress_whitespace+ isn't set to :+all+ or an array containing the - # name of this element. - # The evaluation is tested against +expanded_name+, and so is namespace - # sensitive. + # :call-seq: + # whitespace + # + # Returns +true+ if whitespace is respected for this element, + # +false+ otherwise. + # + # See {Element Context}[../doc/rexml/context_rdoc.html]. + # + # The evaluation is tested against the element's +expanded_name+, + # and so is namespace-sensitive. def whitespace @whitespace = nil if @context @@ -159,6 +503,13 @@ def whitespace @whitespace end + # :call-seq: + # ignore_whitespace_nodes + # + # Returns +true+ if whitespace nodes are ignored for the element. + # + # See {Element Context}[../doc/rexml/context_rdoc.html]. + # def ignore_whitespace_nodes @ignore_whitespace_nodes = false if @context @@ -170,9 +521,12 @@ def ignore_whitespace_nodes end end - # Evaluates to +true+ if raw mode is set for this element. This - # is the case if the context has :+raw+ set to :+all+ or - # an array containing the name of this element. + # :call-seq: + # raw + # + # Returns +true+ if raw mode is set for the element. + # + # See {Element Context}[../doc/rexml/context_rdoc.html]. # # The evaluation is tested against +expanded_name+, and so is namespace # sensitive. @@ -180,7 +534,7 @@ def raw @raw = (@context and @context[:raw] and (@context[:raw] == :all or @context[:raw].include? expanded_name)) - @raw + @raw end #once :whitespace, :raw, :ignore_whitespace_nodes @@ -189,10 +543,25 @@ def raw # Namespaces # ################################################# - # Evaluates to an +Array+ containing the prefixes (names) of all defined - # namespaces at this context node. - # doc = Document.new("<a xmlns:x='1' xmlns:y='2'><b/><c xmlns:z='3'/></a>") - # doc.elements['//b'].prefixes # -> ['x', 'y'] + # :call-seq: + # prefixes -> array_of_namespace_prefixes + # + # Returns an array of the string prefixes (names) of all defined namespaces + # in the element and its ancestors: + # + # xml_string = <<-EOT + # <root> + # <a xmlns:x='1' xmlns:y='2'> + # <b/> + # <c xmlns:z='3'/> + # </a> + # </root> + # EOT + # d = REXML::Document.new(xml_string, {compress_whitespace: :all}) + # d.elements['//a'].prefixes # => ["x", "y"] + # d.elements['//b'].prefixes # => ["x", "y"] + # d.elements['//c'].prefixes # => ["x", "y", "z"] + # def prefixes prefixes = [] prefixes = parent.prefixes if parent @@ -200,6 +569,25 @@ def prefixes return prefixes end + # :call-seq: + # namespaces -> array_of_namespace_names + # + # Returns a hash of all defined namespaces + # in the element and its ancestors: + # + # xml_string = <<-EOT + # <root> + # <a xmlns:x='1' xmlns:y='2'> + # <b/> + # <c xmlns:z='3'/> + # </a> + # </root> + # EOT + # d = REXML::Document.new(xml_string) + # d.elements['//a'].namespaces # => {"x"=>"1", "y"=>"2"} + # d.elements['//b'].namespaces # => {"x"=>"1", "y"=>"2"} + # d.elements['//c'].namespaces # => {"x"=>"1", "y"=>"2", "z"=>"3"} + # def namespaces namespaces = {} namespaces = parent.namespaces if parent @@ -207,19 +595,26 @@ def namespaces return namespaces end - # Evaluates to the URI for a prefix, or the empty string if no such - # namespace is declared for this element. Evaluates recursively for - # ancestors. Returns the default namespace, if there is one. - # prefix:: - # the prefix to search for. If not supplied, returns the default - # namespace if one exists - # Returns:: - # the namespace URI as a String, or nil if no such namespace - # exists. If the namespace is undefined, returns an empty string - # doc = Document.new("<a xmlns='1' xmlns:y='2'><b/><c xmlns:z='3'/></a>") - # b = doc.elements['//b'] - # b.namespace # -> '1' - # b.namespace("y") # -> '2' + # :call-seq: + # namespace(prefix = nil) -> string_uri or nil + # + # Returns the string namespace URI for the element, + # possibly deriving from one of its ancestors. + # + # xml_string = <<-EOT + # <root> + # <a xmlns='1' xmlns:y='2'> + # <b/> + # <c xmlns:z='3'/> + # </a> + # </root> + # EOT + # d = REXML::Document.new(xml_string) + # b = d.elements['//b'] + # b.namespace # => "1" + # b.namespace('y') # => "2" + # b.namespace('nosuch') # => nil + # def namespace(prefix=nil) if prefix.nil? prefix = prefix() @@ -229,25 +624,34 @@ def namespace(prefix=nil) else prefix = "xmlns:#{prefix}" unless prefix[0,5] == 'xmlns' end - ns = attributes[ prefix ] - ns = parent.namespace(prefix) if ns.nil? and parent + ns = nil + target = self + while ns.nil? and target + ns = target.attributes[prefix] + target = target.parent + end ns = '' if ns.nil? and prefix == 'xmlns' return ns end - # Adds a namespace to this element. - # prefix:: - # the prefix string, or the namespace URI if +uri+ is not - # supplied - # uri:: - # the namespace URI. May be nil, in which +prefix+ is used as - # the URI - # Evaluates to: this Element - # a = Element.new("a") - # a.add_namespace("xmlns:foo", "bar" ) - # a.add_namespace("foo", "bar") # shorthand for previous line - # a.add_namespace("twiddle") - # puts a #-> <a xmlns:foo='bar' xmlns='twiddle'/> + # :call-seq: + # add_namespace(prefix, uri = nil) -> self + # + # Adds a namespace to the element; returns +self+. + # + # With the single argument +prefix+, + # adds a namespace using the given +prefix+ and the namespace URI: + # + # e = REXML::Element.new('foo') + # e.add_namespace('bar') + # e.namespaces # => {"xmlns"=>"bar"} + # + # With both arguments +prefix+ and +uri+ given, + # adds a namespace using both arguments: + # + # e.add_namespace('baz', 'bat') + # e.namespaces # => {"xmlns"=>"bar", "baz"=>"bat"} + # def add_namespace( prefix, uri=nil ) unless uri @attributes["xmlns"] = prefix @@ -258,16 +662,28 @@ def add_namespace( prefix, uri=nil ) self end - # Removes a namespace from this node. This only works if the namespace is - # actually declared in this node. If no argument is passed, deletes the - # default namespace. + # :call-seq: + # delete_namespace(namespace = 'xmlns') -> self + # + # Removes a namespace from the element. + # + # With no argument, removes the default namespace: + # + # d = REXML::Document.new "<a xmlns:foo='bar' xmlns='twiddle'/>" + # d.to_s # => "<a xmlns:foo='bar' xmlns='twiddle'/>" + # d.root.delete_namespace # => <a xmlns:foo='bar'/> + # d.to_s # => "<a xmlns:foo='bar'/>" + # + # With argument +namespace+, removes the specified namespace: + # + # d.root.delete_namespace('foo') + # d.to_s # => "<a/>" + # + # Does nothing if no such namespace is found: + # + # d.root.delete_namespace('nosuch') + # d.to_s # => "<a/>" # - # Evaluates to: this element - # doc = Document.new "<a xmlns:foo='bar' xmlns='twiddle'/>" - # doc.root.delete_namespace - # puts doc # -> <a xmlns:foo='bar'/> - # doc.root.delete_namespace 'foo' - # puts doc # -> <a/> def delete_namespace namespace="xmlns" namespace = "xmlns:#{namespace}" unless namespace == 'xmlns' attribute = attributes.get_attribute(namespace) @@ -279,20 +695,40 @@ def delete_namespace namespace="xmlns" # Elements # ################################################# - # Adds a child to this element, optionally setting attributes in - # the element. - # element:: - # optional. If Element, the element is added. - # Otherwise, a new Element is constructed with the argument (see - # Element.initialize). - # attrs:: - # If supplied, must be a Hash containing String name,value - # pairs, which will be used to set the attributes of the new Element. - # Returns:: the Element that was added - # el = doc.add_element 'my-tag' - # el = doc.add_element 'my-tag', {'attr1'=>'val1', 'attr2'=>'val2'} - # el = Element.new 'my-tag' - # doc.add_element el + # :call-seq: + # add_element(name, attributes = nil) -> new_element + # add_element(element, attributes = nil) -> element + # + # Adds a child element, optionally setting attributes + # on the added element; returns the added element. + # + # With string argument +name+, creates a new element with that name + # and adds the new element as a child: + # + # e0 = REXML::Element.new('foo') + # e0.add_element('bar') + # e0[0] # => <bar/> + # + # + # With argument +name+ and hash argument +attributes+, + # sets attributes on the new element: + # + # e0.add_element('baz', {'bat' => '0', 'bam' => '1'}) + # e0[1] # => <baz bat='0' bam='1'/> + # + # With element argument +element+, adds that element as a child: + # + # e0 = REXML::Element.new('foo') + # e1 = REXML::Element.new('bar') + # e0.add_element(e1) + # e0[0] # => <bar/> + # + # With argument +element+ and hash argument +attributes+, + # sets attributes on the added element: + # + # e0.add_element(e1, {'bat' => '0', 'bam' => '1'}) + # e0[1] # => <bar bat='0' bam='1'/> + # def add_element element, attrs=nil raise "First argument must be either an element name, or an Element object" if element.nil? el = @elements.add(element) @@ -302,52 +738,112 @@ def add_element element, attrs=nil el end + # :call-seq: + # delete_element(index) -> removed_element or nil + # delete_element(element) -> removed_element or nil + # delete_element(xpath) -> removed_element or nil + # # Deletes a child element. - # element:: - # Must be an +Element+, +String+, or +Integer+. If Element, - # the element is removed. If String, the element is found (via XPath) - # and removed. <em>This means that any parent can remove any - # descendant.<em> If Integer, the Element indexed by that number will be - # removed. - # Returns:: the element that was removed. - # doc.delete_element "/a/b/c[@id='4']" - # doc.delete_element doc.elements["//k"] - # doc.delete_element 1 + # + # When 1-based integer argument +index+ is given, + # removes and returns the child element at that offset if it exists; + # indexing does not include text nodes; + # returns +nil+ if the element does not exist: + # + # d = REXML::Document.new '<a><b/>text<c/></a>' + # a = d.root # => <a> ... </> + # a.delete_element(1) # => <b/> + # a.delete_element(1) # => <c/> + # a.delete_element(1) # => nil + # + # When element argument +element+ is given, + # removes and returns that child element if it exists, + # otherwise returns +nil+: + # + # d = REXML::Document.new '<a><b/>text<c/></a>' + # a = d.root # => <a> ... </> + # c = a[2] # => <c/> + # a.delete_element(c) # => <c/> + # a.delete_element(c) # => nil + # + # When xpath argument +xpath+ is given, + # removes and returns the element at xpath if it exists, + # otherwise returns +nil+: + # + # d = REXML::Document.new '<a><b/>text<c/></a>' + # a = d.root # => <a> ... </> + # a.delete_element('//c') # => <c/> + # a.delete_element('//c') # => nil + # def delete_element element @elements.delete element end - # Evaluates to +true+ if this element has at least one child Element - # doc = Document.new "<a><b/><c>Text</c></a>" - # doc.root.has_elements # -> true - # doc.elements["/a/b"].has_elements # -> false - # doc.elements["/a/c"].has_elements # -> false + # :call-seq: + # has_elements? + # + # Returns +true+ if the element has one or more element children, + # +false+ otherwise: + # + # d = REXML::Document.new '<a><b/>text<c/></a>' + # a = d.root # => <a> ... </> + # a.has_elements? # => true + # b = a[0] # => <b/> + # b.has_elements? # => false + # def has_elements? !@elements.empty? end - # Iterates through the child elements, yielding for each Element that - # has a particular attribute set. - # key:: - # the name of the attribute to search for - # value:: - # the value of the attribute - # max:: - # (optional) causes this method to return after yielding - # for this number of matching children - # name:: - # (optional) if supplied, this is an XPath that filters - # the children to check. - # - # doc = Document.new "<a><b @id='1'/><c @id='2'/><d @id='1'/><e/></a>" - # # Yields b, c, d - # doc.root.each_element_with_attribute( 'id' ) {|e| p e} - # # Yields b, d - # doc.root.each_element_with_attribute( 'id', '1' ) {|e| p e} - # # Yields b - # doc.root.each_element_with_attribute( 'id', '1', 1 ) {|e| p e} - # # Yields d - # doc.root.each_element_with_attribute( 'id', '1', 0, 'd' ) {|e| p e} + # :call-seq: + # each_element_with_attribute(attr_name, value = nil, max = 0, xpath = nil) {|e| ... } + # + # Calls the given block with each child element that meets given criteria. + # + # When only string argument +attr_name+ is given, + # calls the block with each child element that has that attribute: + # + # d = REXML::Document.new '<a><b id="1"/><c id="2"/><d id="1"/><e/></a>' + # a = d.root + # a.each_element_with_attribute('id') {|e| p e } + # + # Output: + # + # <b id='1'/> + # <c id='2'/> + # <d id='1'/> + # + # With argument +attr_name+ and string argument +value+ given, + # calls the block with each child element that has that attribute + # with that value: + # + # a.each_element_with_attribute('id', '1') {|e| p e } + # + # Output: + # + # <b id='1'/> + # <d id='1'/> + # + # With arguments +attr_name+, +value+, and integer argument +max+ given, + # calls the block with at most +max+ child elements: + # + # a.each_element_with_attribute('id', '1', 1) {|e| p e } + # + # Output: + # + # <b id='1'/> + # + # With all arguments given, including +xpath+, + # calls the block with only those child elements + # that meet the first three criteria, + # and also match the given +xpath+: + # + # a.each_element_with_attribute('id', '1', 2, '//d') {|e| p e } + # + # Output: + # + # <d id='1'/> + # def each_element_with_attribute( key, value=nil, max=0, name=nil, &block ) # :yields: Element each_with_something( proc {|child| if value.nil? @@ -358,27 +854,53 @@ def each_element_with_attribute( key, value=nil, max=0, name=nil, &block ) # :yi }, max, name, &block ) end - # Iterates through the children, yielding for each Element that - # has a particular text set. - # text:: - # the text to search for. If nil, or not supplied, will iterate - # over all +Element+ children that contain at least one +Text+ node. - # max:: - # (optional) causes this method to return after yielding - # for this number of matching children - # name:: - # (optional) if supplied, this is an XPath that filters - # the children to check. - # - # doc = Document.new '<a><b>b</b><c>b</c><d>d</d><e/></a>' - # # Yields b, c, d - # doc.each_element_with_text {|e|p e} - # # Yields b, c - # doc.each_element_with_text('b'){|e|p e} - # # Yields b - # doc.each_element_with_text('b', 1){|e|p e} - # # Yields d - # doc.each_element_with_text(nil, 0, 'd'){|e|p e} + # :call-seq: + # each_element_with_text(text = nil, max = 0, xpath = nil) {|e| ... } + # + # Calls the given block with each child element that meets given criteria. + # + # With no arguments, calls the block with each child element that has text: + # + # d = REXML::Document.new '<a><b>b</b><c>b</c><d>d</d><e/></a>' + # a = d.root + # a.each_element_with_text {|e| p e } + # + # Output: + # + # <b> ... </> + # <c> ... </> + # <d> ... </> + # + # With the single string argument +text+, + # calls the block with each element that has exactly that text: + # + # a.each_element_with_text('b') {|e| p e } + # + # Output: + # + # <b> ... </> + # <c> ... </> + # + # With argument +text+ and integer argument +max+, + # calls the block with at most +max+ elements: + # + # a.each_element_with_text('b', 1) {|e| p e } + # + # Output: + # + # <b> ... </> + # + # With all arguments given, including +xpath+, + # calls the block with only those child elements + # that meet the first two criteria, + # and also match the given +xpath+: + # + # a.each_element_with_text('b', 2, '//c') {|e| p e } + # + # Output: + # + # <c> ... </> + # def each_element_with_text( text=nil, max=0, name=nil, &block ) # :yields: Element each_with_something( proc {|child| if text.nil? @@ -389,35 +911,71 @@ def each_element_with_text( text=nil, max=0, name=nil, &block ) # :yields: Eleme }, max, name, &block ) end - # Synonym for Element.elements.each + # :call-seq: + # each_element {|e| ... } + # + # Calls the given block with each child element: + # + # d = REXML::Document.new '<a><b>b</b><c>b</c><d>d</d><e/></a>' + # a = d.root + # a.each_element {|e| p e } + # + # Output: + # + # <b> ... </> + # <c> ... </> + # <d> ... </> + # <e/> + # def each_element( xpath=nil, &block ) # :yields: Element @elements.each( xpath, &block ) end - # Synonym for Element.to_a - # This is a little slower than calling elements.each directly. - # xpath:: any XPath by which to search for elements in the tree - # Returns:: an array of Elements that match the supplied path + # :call-seq: + # get_elements(xpath) + # + # Returns an array of the elements that match the given +xpath+: + # + # xml_string = <<-EOT + # <root> + # <a level='1'> + # <a level='2'/> + # </a> + # </root> + # EOT + # d = REXML::Document.new(xml_string) + # d.root.get_elements('//a') # => [<a level='1'> ... </>, <a level='2'/>] + # def get_elements( xpath ) @elements.to_a( xpath ) end - # Returns the next sibling that is an element, or nil if there is - # no Element sibling after this one - # doc = Document.new '<a><b/>text<c/></a>' - # doc.root.elements['b'].next_element #-> <c/> - # doc.root.elements['c'].next_element #-> nil + # :call-seq: + # next_element + # + # Returns the next sibling that is an element if it exists, + # +niL+ otherwise: + # + # d = REXML::Document.new '<a><b/>text<c/></a>' + # d.root.elements['b'].next_element #-> <c/> + # d.root.elements['c'].next_element #-> nil + # def next_element element = next_sibling element = element.next_sibling until element.nil? or element.kind_of? Element return element end - # Returns the previous sibling that is an element, or nil if there is - # no Element sibling prior to this one - # doc = Document.new '<a><b/>text<c/></a>' - # doc.root.elements['c'].previous_element #-> <b/> - # doc.root.elements['b'].previous_element #-> nil + # :call-seq: + # previous_element + # + # Returns the previous sibling that is an element if it exists, + # +niL+ otherwise: + # + # d = REXML::Document.new '<a><b/>text<c/></a>' + # d.root.elements['c'].previous_element #-> <b/> + # d.root.elements['b'].previous_element #-> nil + # def previous_element element = previous_sibling element = element.previous_sibling until element.nil? or element.kind_of? Element @@ -429,36 +987,69 @@ def previous_element # Text # ################################################# - # Evaluates to +true+ if this element has at least one Text child + # :call-seq: + # has_text? -> true or false + # + # Returns +true+ if the element has one or more text noded, + # +false+ otherwise: + # + # d = REXML::Document.new '<a><b/>text<c/></a>' + # a = d.root + # a.has_text? # => true + # b = a[0] + # b.has_text? # => false + # def has_text? not text().nil? end - # A convenience method which returns the String value of the _first_ - # child text element, if one exists, and +nil+ otherwise. + # :call-seq: + # text(xpath = nil) -> text_string or nil + # + # Returns the text string from the first text node child + # in a specified element, if it exists, +nil+ otherwise. + # + # With no argument, returns the text from the first text node in +self+: + # + # d = REXML::Document.new "<p>some text <b>this is bold!</b> more text</p>" + # d.root.text.class # => String + # d.root.text # => "some text " + # + # With argument +xpath+, returns text from the first text node + # in the element that matches +xpath+: + # + # d.root.text(1) # => "this is bold!" # - # <em>Note that an element may have multiple Text elements, perhaps - # separated by other children</em>. Be aware that this method only returns - # the first Text node. + # Note that an element may have multiple text nodes, + # possibly separated by other non-text children, as above. + # Even so, the returned value is the string text from the first such node. # - # This method returns the +value+ of the first text child node, which - # ignores the +raw+ setting, so always returns normalized text. See - # the Text::value documentation. + # Note also that the text note is retrieved by method get_text, + # and so is always normalized text. # - # doc = Document.new "<p>some text <b>this is bold!</b> more text</p>" - # # The element 'p' has two text elements, "some text " and " more text". - # doc.root.text #-> "some text " def text( path = nil ) rv = get_text(path) return rv.value unless rv.nil? nil end - # Returns the first child Text node, if any, or +nil+ otherwise. - # This method returns the actual +Text+ node, rather than the String content. - # doc = Document.new "<p>some text <b>this is bold!</b> more text</p>" - # # The element 'p' has two text elements, "some text " and " more text". - # doc.root.get_text.value #-> "some text " + # :call-seq: + # get_text(xpath = nil) -> text_node or nil + # + # Returns the first text node child in a specified element, if it exists, + # +nil+ otherwise. + # + # With no argument, returns the first text node from +self+: + # + # d = REXML::Document.new "<p>some text <b>this is bold!</b> more text</p>" + # d.root.get_text.class # => REXML::Text + # d.root.get_text # => "some text " + # + # With argument +xpath+, returns the first text node from the element + # that matches +xpath+: + # + # d.root.get_text(1) # => "this is bold!" + # def get_text path = nil rv = nil if path @@ -470,26 +1061,31 @@ def get_text path = nil return rv end - # Sets the first Text child of this object. See text() for a - # discussion about Text children. - # - # If a Text child already exists, the child is replaced by this - # content. This means that Text content can be deleted by calling - # this method with a nil argument. In this case, the next Text - # child becomes the first Text child. In no case is the order of - # any siblings disturbed. - # text:: - # If a String, a new Text child is created and added to - # this Element as the first Text child. If Text, the text is set - # as the first Child element. If nil, then any existing first Text - # child is removed. - # Returns:: this Element. - # doc = Document.new '<a><b/></a>' - # doc.root.text = 'Sean' #-> '<a><b/>Sean</a>' - # doc.root.text = 'Elliott' #-> '<a><b/>Elliott</a>' - # doc.root.add_element 'c' #-> '<a><b/>Elliott<c/></a>' - # doc.root.text = 'Russell' #-> '<a><b/>Russell<c/></a>' - # doc.root.text = nil #-> '<a><b/><c/></a>' + # :call-seq: + # text = string -> string + # text = nil -> nil + # + # Adds, replaces, or removes the first text node child in the element. + # + # With string argument +string+, + # creates a new \REXML::Text node containing that string, + # honoring the current settings for whitespace and row, + # then places the node as the first text child in the element; + # returns +string+. + # + # If the element has no text child, the text node is added: + # + # d = REXML::Document.new '<a><b/></a>' + # d.root.text = 'foo' #-> '<a><b/>foo</a>' + # + # If the element has a text child, it is replaced: + # + # d.root.text = 'bar' #-> '<a><b/>bar</a>' + # + # With argument +nil+, removes the first text child: + # + # d.root.text = nil #-> '<a><b/><c/></a>' + # def text=( text ) if text.kind_of? String text = Text.new( text, whitespace(), nil, raw() ) @@ -509,17 +1105,45 @@ def text=( text ) return self end - # A helper method to add a Text child. Actual Text instances can - # be added with regular Parent methods, such as add() and <<() - # text:: - # if a String, a new Text instance is created and added - # to the parent. If Text, the object is added directly. - # Returns:: this Element - # e = Element.new('a') #-> <e/> - # e.add_text 'foo' #-> <e>foo</e> - # e.add_text Text.new(' bar') #-> <e>foo bar</e> - # Note that at the end of this example, the branch has <b>3</b> nodes; the 'e' - # element and <b>2</b> Text node children. + # :call-seq: + # add_text(string) -> nil + # add_text(text_node) -> self + # + # Adds text to the element. + # + # When string argument +string+ is given, returns +nil+. + # + # If the element has no child text node, + # creates a \REXML::Text object using the string, + # honoring the current settings for whitespace and raw, + # then adds that node to the element: + # + # d = REXML::Document.new('<a><b/></a>') + # a = d.root + # a.add_text('foo') + # a.to_a # => [<b/>, "foo"] + # + # If the element has child text nodes, + # appends the string to the _last_ text node: + # + # d = REXML::Document.new('<a>foo<b/>bar</a>') + # a = d.root + # a.add_text('baz') + # a.to_a # => ["foo", <b/>, "barbaz"] + # a.add_text('baz') + # a.to_a # => ["foo", <b/>, "barbazbaz"] + # + # When text node argument +text_node+ is given, + # appends the node as the last text node in the element; + # returns +self+: + # + # d = REXML::Document.new('<a>foo<b/>bar</a>') + # a = d.root + # a.add_text(REXML::Text.new('baz')) + # a.to_a # => ["foo", <b/>, "bar", "baz"] + # a.add_text(REXML::Text.new('baz')) + # a.to_a # => ["foo", <b/>, "bar", "baz", "baz"] + # def add_text( text ) if text.kind_of? String if @children[-1].kind_of? Text @@ -532,10 +1156,39 @@ def add_text( text ) return self end + # :call-seq: + # node_type -> :element + # + # Returns symbol <tt>:element</tt>: + # + # d = REXML::Document.new('<a/>') + # a = d.root # => <a/> + # a.node_type # => :element + # def node_type :element end + # :call-seq: + # xpath -> string_xpath + # + # Returns the string xpath to the element + # relative to the most distant parent: + # + # d = REXML::Document.new('<a><b><c/></b></a>') + # a = d.root # => <a> ... </> + # b = a[0] # => <b> ... </> + # c = b[0] # => <c/> + # d.xpath # => "" + # a.xpath # => "/a" + # b.xpath # => "/a/b" + # c.xpath # => "/a/b/c" + # + # If there is no parent, returns the expanded name of the element: + # + # e = REXML::Element.new('foo') + # e.xpath # => "foo" + # def xpath path_elements = [] cur = self @@ -551,19 +1204,45 @@ def xpath # Attributes # ################################################# - # Fetches an attribute value or a child. + # :call-seq: + # [index] -> object + # [attr_name] -> attr_value + # [attr_sym] -> attr_value + # + # With integer argument +index+ given, + # returns the child at offset +index+, or +nil+ if none: + # + # d = REXML::Document.new '><root><a/>text<b/>more<c/></root>' + # root = d.root + # (0..root.size).each do |index| + # node = root[index] + # p "#{index}: #{node} (#{node.class})" + # end + # + # Output: + # + # "0: <a/> (REXML::Element)" + # "1: text (REXML::Text)" + # "2: <b/> (REXML::Element)" + # "3: more (REXML::Text)" + # "4: <c/> (REXML::Element)" + # "5: (NilClass)" # - # If String or Symbol is specified, it's treated as attribute - # name. Attribute value as String or +nil+ is returned. This case - # is shortcut of +attributes[name]+. + # With string argument +attr_name+ given, + # returns the string value for the given attribute name if it exists, + # otherwise +nil+: # - # If Integer is specified, it's treated as the index of - # child. It returns Nth child. + # d = REXML::Document.new('<root attr="value"></root>') + # root = d.root + # root['attr'] # => "value" + # root['nosuch'] # => nil + # + # With symbol argument +attr_sym+ given, + # returns <tt>[attr_sym.to_s]</tt>: + # + # root[:attr] # => "value" + # root[:nosuch] # => nil # - # doc = REXML::Document.new("<a attr='1'><b/><c/></a>") - # doc.root["attr"] # => "1" - # doc.root.attributes["attr"] # => "1" - # doc.root[1] # => <c/> def [](name_or_index) case name_or_index when String @@ -575,17 +1254,42 @@ def [](name_or_index) end end + + # :call-seq: + # attribute(name, namespace = nil) + # + # Returns the string value for the given attribute name. + # + # With only argument +name+ given, + # returns the value of the named attribute if it exists, otherwise +nil+: + # + # xml_string = <<-EOT + # <root xmlns="ns0"> + # <a xmlns="ns1" attr="value"></a> + # <b xmlns="ns2" attr="value"></b> + # <c attr="value"/> + # </root> + # EOT + # d = REXML::Document.new(xml_string) + # root = d.root + # a = root[1] # => <a xmlns='ns1' attr='value'/> + # a.attribute('attr') # => attr='value' + # a.attribute('nope') # => nil + # + # With arguments +name+ and +namespace+ given, + # returns the value of the named attribute if it exists, otherwise +nil+: + # + # xml_string = "<root xmlns:a='a' a:x='a:x' x='x'/>" + # document = REXML::Document.new(xml_string) + # document.root.attribute("x") # => x='x' + # document.root.attribute("x", "a") # => a:x='a:x' + # def attribute( name, namespace=nil ) - prefix = nil - if namespaces.respond_to? :key - prefix = namespaces.key(namespace) if namespace - else - prefix = namespaces.index(namespace) if namespace - end + prefix = namespaces.key(namespace) if namespace prefix = nil if prefix == 'xmlns' ret_val = - attributes.get_attribute( "#{prefix ? prefix + ':' : ''}#{name}" ) + attributes.get_attribute( prefix ? "#{prefix}:#{name}" : name ) return ret_val unless ret_val.nil? return nil if prefix.nil? @@ -598,29 +1302,46 @@ def attribute( name, namespace=nil ) end - # Evaluates to +true+ if this element has any attributes set, false - # otherwise. + # :call-seq: + # has_attributes? -> true or false + # + # Returns +true+ if the element has attributes, +false+ otherwise: + # + # d = REXML::Document.new('<root><a attr="val"/><b/></root>') + # a, b = *d.root + # a.has_attributes? # => true + # b.has_attributes? # => false + # def has_attributes? return !@attributes.empty? end + # :call-seq: + # add_attribute(name, value) -> value + # add_attribute(attribute) -> attribute + # # Adds an attribute to this element, overwriting any existing attribute # by the same name. - # key:: - # can be either an Attribute or a String. If an Attribute, - # the attribute is added to the list of Element attributes. If String, - # the argument is used as the name of the new attribute, and the value - # parameter must be supplied. - # value:: - # Required if +key+ is a String, and ignored if the first argument is - # an Attribute. This is a String, and is used as the value - # of the new Attribute. This should be the unnormalized value of the - # attribute (without entities). - # Returns:: the Attribute added - # e = Element.new 'e' - # e.add_attribute( 'a', 'b' ) #-> <e a='b'/> - # e.add_attribute( 'x:a', 'c' ) #-> <e a='b' x:a='c'/> - # e.add_attribute Attribute.new('b', 'd') #-> <e a='b' x:a='c' b='d'/> + # + # With string argument +name+ and object +value+ are given, + # adds the attribute created with that name and value: + # + # e = REXML::Element.new + # e.add_attribute('attr', 'value') # => "value" + # e['attr'] # => "value" + # e.add_attribute('attr', 'VALUE') # => "VALUE" + # e['attr'] # => "VALUE" + # + # With only attribute object +attribute+ given, + # adds the given attribute: + # + # a = REXML::Attribute.new('attr', 'value') + # e.add_attribute(a) # => attr='value' + # e['attr'] # => "value" + # a = REXML::Attribute.new('attr', 'VALUE') + # e.add_attribute(a) # => attr='VALUE' + # e['attr'] # => "VALUE" + # def add_attribute( key, value=nil ) if key.kind_of? Attribute @attributes << key @@ -629,10 +1350,29 @@ def add_attribute( key, value=nil ) end end - # Add multiple attributes to this element. - # hash:: is either a hash, or array of arrays - # el.add_attributes( {"name1"=>"value1", "name2"=>"value2"} ) - # el.add_attributes( [ ["name1","value1"], ["name2"=>"value2"] ] ) + # :call-seq: + # add_attributes(hash) -> hash + # add_attributes(array) + # + # Adds zero or more attributes to the element; + # returns the argument. + # + # If hash argument +hash+ is given, + # each key must be a string; + # adds each attribute created with the key/value pair: + # + # e = REXML::Element.new + # h = {'foo' => 'bar', 'baz' => 'bat'} + # e.add_attributes(h) + # + # If argument +array+ is given, + # each array member must be a 2-element array <tt>[name, value]; + # each name must be a string: + # + # e = REXML::Element.new + # a = [['foo' => 'bar'], ['baz' => 'bat']] + # e.add_attributes(a) + # def add_attributes hash if hash.kind_of? Hash hash.each_pair {|key, value| @attributes[key] = value } @@ -641,19 +1381,17 @@ def add_attributes hash end end - # Removes an attribute - # key:: - # either an Attribute or a String. In either case, the - # attribute is found by matching the attribute name to the argument, - # and then removed. If no attribute is found, no action is taken. - # Returns:: - # the attribute removed, or nil if this Element did not contain - # a matching attribute - # e = Element.new('E') - # e.add_attribute( 'name', 'Sean' ) #-> <E name='Sean'/> - # r = e.add_attribute( 'sur:name', 'Russell' ) #-> <E name='Sean' sur:name='Russell'/> - # e.delete_attribute( 'name' ) #-> <E sur:name='Russell'/> - # e.delete_attribute( r ) #-> <E/> + # :call-seq: + # delete_attribute(name) -> removed_attribute or nil + # + # Removes a named attribute if it exists; + # returns the removed attribute if found, otherwise +nil+: + # + # e = REXML::Element.new('foo') + # e.add_attribute('bar', 'baz') + # e.delete_attribute('bar') # => <bar/> + # e.delete_attribute('bar') # => nil + # def delete_attribute(key) attr = @attributes.get_attribute(key) attr.remove unless attr.nil? @@ -663,26 +1401,80 @@ def delete_attribute(key) # Other Utilities # ################################################# - # Get an array of all CData children. - # IMMUTABLE + # :call-seq: + # cdatas -> array_of_cdata_children + # + # Returns a frozen array of the REXML::CData children of the element: + # + # xml_string = <<-EOT + # <root> + # <![CDATA[foo]]> + # <![CDATA[bar]]> + # </root> + # EOT + # d = REXML::Document.new(xml_string) + # cds = d.root.cdatas # => ["foo", "bar"] + # cds.frozen? # => true + # cds.map {|cd| cd.class } # => [REXML::CData, REXML::CData] + # def cdatas find_all { |child| child.kind_of? CData }.freeze end - # Get an array of all Comment children. - # IMMUTABLE + # :call-seq: + # comments -> array_of_comment_children + # + # Returns a frozen array of the REXML::Comment children of the element: + # + # xml_string = <<-EOT + # <root> + # <!--foo--> + # <!--bar--> + # </root> + # EOT + # d = REXML::Document.new(xml_string) + # cs = d.root.comments + # cs.frozen? # => true + # cs.map {|c| c.class } # => [REXML::Comment, REXML::Comment] + # cs.map {|c| c.to_s } # => ["foo", "bar"] + # def comments find_all { |child| child.kind_of? Comment }.freeze end - # Get an array of all Instruction children. - # IMMUTABLE + # :call-seq: + # instructions -> array_of_instruction_children + # + # Returns a frozen array of the REXML::Instruction children of the element: + # + # xml_string = <<-EOT + # <root> + # <?target0 foo?> + # <?target1 bar?> + # </root> + # EOT + # d = REXML::Document.new(xml_string) + # is = d.root.instructions + # is.frozen? # => true + # is.map {|i| i.class } # => [REXML::Instruction, REXML::Instruction] + # is.map {|i| i.to_s } # => ["<?target0 foo?>", "<?target1 bar?>"] + # def instructions find_all { |child| child.kind_of? Instruction }.freeze end - # Get an array of all Text children. - # IMMUTABLE + # :call-seq: + # texts -> array_of_text_children + # + # Returns a frozen array of the REXML::Text children of the element: + # + # xml_string = '<root><a/>text<b/>more<c/></root>' + # d = REXML::Document.new(xml_string) + # ts = d.root.texts + # ts.frozen? # => true + # ts.map {|t| t.class } # => [REXML::Text, REXML::Text] + # ts.map {|t| t.to_s } # => ["text", "more"] + # def texts find_all { |child| child.kind_of? Text }.freeze end @@ -713,7 +1505,7 @@ def write(output=$stdout, indent=-1, transitive=false, ie_hack=false) Kernel.warn("#{self.class.name}.write is deprecated. See REXML::Formatters", uplevel: 1) formatter = if indent > -1 if transitive - require "rexml/formatters/transitive" + require_relative "formatters/transitive" REXML::Formatters::Transitive.new( indent, ie_hack ) else REXML::Formatters::Pretty.new( indent, ie_hack ) @@ -758,35 +1550,129 @@ def each_with_something( test, max=0, name=nil ) # XPath search support. You are expected to only encounter this class as # the <tt>element.elements</tt> object. Therefore, you are # _not_ expected to instantiate this yourself. + # + # xml_string = <<-EOT + # <?xml version="1.0" encoding="UTF-8"?> + # <bookstore> + # <book category="cooking"> + # <title lang="en">Everyday Italian</title> + # <author>Giada De Laurentiis</author> + # <year>2005</year> + # <price>30.00</price> + # </book> + # <book category="children"> + # <title lang="en">Harry Potter</title> + # <author>J K. Rowling</author> + # <year>2005</year> + # <price>29.99</price> + # </book> + # <book category="web"> + # <title lang="en">XQuery Kick Start</title> + # <author>James McGovern</author> + # <author>Per Bothner</author> + # <author>Kurt Cagle</author> + # <author>James Linn</author> + # <author>Vaidyanathan Nagarajan</author> + # <year>2003</year> + # <price>49.99</price> + # </book> + # <book category="web" cover="paperback"> + # <title lang="en">Learning XML</title> + # <author>Erik T. Ray</author> + # <year>2003</year> + # <price>39.95</price> + # </book> + # </bookstore> + # EOT + # d = REXML::Document.new(xml_string) + # elements = d.root.elements + # elements # => #<REXML::Elements @element=<bookstore> ... </>> + # class Elements include Enumerable - # Constructor - # parent:: the parent Element + # :call-seq: + # new(parent) -> new_elements_object + # + # Returns a new \Elements object with the given +parent+. + # Does _not_ assign <tt>parent.elements = self</tt>: + # + # d = REXML::Document.new(xml_string) + # eles = REXML::Elements.new(d.root) + # eles # => #<REXML::Elements @element=<bookstore> ... </>> + # eles == d.root.elements # => false + # def initialize parent @element = parent end - # Fetches a child element. Filters only Element children, regardless of - # the XPath match. - # index:: - # the search parameter. This is either an Integer, which - # will be used to find the index'th child Element, or an XPath, - # which will be used to search for the Element. <em>Because - # of the nature of XPath searches, any element in the connected XML - # document can be fetched through any other element.</em> <b>The - # Integer index is 1-based, not 0-based.</b> This means that the first - # child element is at index 1, not 0, and the +n+th element is at index - # +n+, not <tt>n-1</tt>. This is because XPath indexes element children - # starting from 1, not 0, and the indexes should be the same. - # name:: - # optional, and only used in the first argument is an - # Integer. In that case, the index'th child Element that has the - # supplied name will be returned. Note again that the indexes start at 1. - # Returns:: the first matching Element, or nil if no child matched - # doc = Document.new '<a><b/><c id="1"/><c id="2"/><d/></a>' - # doc.root.elements[1] #-> <b/> - # doc.root.elements['c'] #-> <c id="1"/> - # doc.root.elements[2,'c'] #-> <c id="2"/> + # :call-seq: + # parent + # + # Returns the parent element cited in creating the \Elements object. + # This element is also the default starting point for searching + # in the \Elements object. + # + # d = REXML::Document.new(xml_string) + # elements = REXML::Elements.new(d.root) + # elements.parent == d.root # => true + # + def parent + @element + end + + # :call-seq: + # elements[index] -> element or nil + # elements[xpath] -> element or nil + # elements[n, name] -> element or nil + # + # Returns the first \Element object selected by the arguments, + # if any found, or +nil+ if none found. + # + # Notes: + # - The +index+ is 1-based, not 0-based, so that: + # - The first element has index <tt>1</tt> + # - The _nth_ element has index +n+. + # - The selection ignores non-\Element nodes. + # + # When the single argument +index+ is given, + # returns the element given by the index, if any; otherwise, +nil+: + # + # d = REXML::Document.new(xml_string) + # eles = d.root.elements + # eles # => #<REXML::Elements @element=<bookstore> ... </>> + # eles[1] # => <book category='cooking'> ... </> + # eles.size # => 4 + # eles[4] # => <book category='web' cover='paperback'> ... </> + # eles[5] # => nil + # + # The node at this index is not an \Element, and so is not returned: + # + # eles = d.root.first.first # => <title lang='en'> ... </> + # eles.to_a # => ["Everyday Italian"] + # eles[1] # => nil + # + # When the single argument +xpath+ is given, + # returns the first element found via that +xpath+, if any; otherwise, +nil+: + # + # eles = d.root.elements # => #<REXML::Elements @element=<bookstore> ... </>> + # eles['/bookstore'] # => <bookstore> ... </> + # eles['//book'] # => <book category='cooking'> ... </> + # eles['//book [@category="children"]'] # => <book category='children'> ... </> + # eles['/nosuch'] # => nil + # eles['//nosuch'] # => nil + # eles['//book [@category="nosuch"]'] # => nil + # eles['.'] # => <bookstore> ... </> + # eles['..'].class # => REXML::Document + # + # With arguments +n+ and +name+ given, + # returns the _nth_ found element that has the given +name+, + # or +nil+ if there is no such _nth_ element: + # + # eles = d.root.elements # => #<REXML::Elements @element=<bookstore> ... </>> + # eles[1, 'book'] # => <book category='cooking'> ... </> + # eles[4, 'book'] # => <book category='web' cover='paperback'> ... </> + # eles[5, 'book'] # => nil + # def []( index, name=nil) if index.kind_of? Integer raise "index (#{index}) must be >= 1" if index < 1 @@ -806,19 +1692,42 @@ def []( index, name=nil) end end - # Sets an element, replacing any previous matching element. If no - # existing element is found ,the element is added. - # index:: Used to find a matching element to replace. See [](). - # element:: - # The element to replace the existing element with - # the previous element - # Returns:: nil if no previous element was found. + # :call-seq: + # elements[] = index, replacement_element -> replacement_element or nil + # + # Replaces or adds an element. + # + # When <tt>eles[index]</tt> exists, replaces it with +replacement_element+ + # and returns +replacement_element+: + # + # d = REXML::Document.new(xml_string) + # eles = d.root.elements # => #<REXML::Elements @element=<bookstore> ... </>> + # eles[1] # => <book category='cooking'> ... </> + # eles[1] = REXML::Element.new('foo') + # eles[1] # => <foo/> + # + # Does nothing (or raises an exception) + # if +replacement_element+ is not an \Element: + # eles[2] # => <book category='web' cover='paperback'> ... </> + # eles[2] = REXML::Text.new('bar') + # eles[2] # => <book category='web' cover='paperback'> ... </> + # + # When <tt>eles[index]</tt> does not exist, + # adds +replacement_element+ to the element and returns + # + # d = REXML::Document.new(xml_string) + # eles = d.root.elements # => #<REXML::Elements @element=<bookstore> ... </>> + # eles.size # => 4 + # eles[50] = REXML::Element.new('foo') # => <foo/> + # eles.size # => 5 + # eles[5] # => <foo/> + # + # Does nothing (or raises an exception) + # if +replacement_element+ is not an \Element: + # + # eles[50] = REXML::Text.new('bar') # => "bar" + # eles.size # => 5 # - # doc = Document.new '<a/>' - # doc.root.elements[10] = Element.new('b') #-> <a><b/></a> - # doc.root.elements[1] #-> <b/> - # doc.root.elements[1] = Element.new('c') #-> <a><c/></a> - # doc.root.elements['c'] = Element.new('d') #-> <a><d/></a> def []=( index, element ) previous = self[index] if previous.nil? @@ -829,14 +1738,34 @@ def []=( index, element ) return previous end - # Returns +true+ if there are no +Element+ children, +false+ otherwise + # :call-seq: + # empty? -> true or false + # + # Returns +true+ if there are no children, +false+ otherwise. + # + # d = REXML::Document.new('') + # d.elements.empty? # => true + # d = REXML::Document.new(xml_string) + # d.elements.empty? # => false + # def empty? @element.find{ |child| child.kind_of? Element}.nil? end - # Returns the index of the supplied child (starting at 1), or -1 if - # the element is not a child - # element:: an +Element+ child + # :call-seq: + # index(element) + # + # Returns the 1-based index of the given +element+, if found; + # otherwise, returns -1: + # + # d = REXML::Document.new(xml_string) + # elements = d.root.elements + # ele_1, ele_2, ele_3, ele_4 = *elements + # elements.index(ele_4) # => 4 + # elements.delete(ele_3) + # elements.index(ele_4) # => 3 + # elements.index(ele_3) # => -1 + # def index element rv = 0 found = @element.find do |child| @@ -848,17 +1777,47 @@ def index element return -1 end - # Deletes a child Element - # element:: - # Either an Element, which is removed directly; an - # xpath, where the first matching child is removed; or an Integer, - # where the n'th Element is removed. - # Returns:: the removed child - # doc = Document.new '<a><b/><c/><c id="1"/></a>' - # b = doc.root.elements[1] - # doc.root.elements.delete b #-> <a><c/><c id="1"/></a> - # doc.elements.delete("a/c[@id='1']") #-> <a><c/></a> - # doc.root.elements.delete 1 #-> <a/> + # :call-seq: + # delete(index) -> removed_element or nil + # delete(element) -> removed_element or nil + # delete(xpath) -> removed_element or nil + # + # Removes an element; returns the removed element, or +nil+ if none removed. + # + # With integer argument +index+ given, + # removes the child element at that offset: + # + # d = REXML::Document.new(xml_string) + # elements = d.root.elements + # elements.size # => 4 + # elements[2] # => <book category='children'> ... </> + # elements.delete(2) # => <book category='children'> ... </> + # elements.size # => 3 + # elements[2] # => <book category='web'> ... </> + # elements.delete(50) # => nil + # + # With element argument +element+ given, + # removes that child element: + # + # d = REXML::Document.new(xml_string) + # elements = d.root.elements + # ele_1, ele_2, ele_3, ele_4 = *elements + # elements.size # => 4 + # elements[2] # => <book category='children'> ... </> + # elements.delete(ele_2) # => <book category='children'> ... </> + # elements.size # => 3 + # elements[2] # => <book category='web'> ... </> + # elements.delete(ele_2) # => nil + # + # With string argument +xpath+ given, + # removes the first element found via that xpath: + # + # d = REXML::Document.new(xml_string) + # elements = d.root.elements + # elements.delete('//book') # => <book category='cooking'> ... </> + # elements.delete('//book [@category="children"]') # => <book category='children'> ... </> + # elements.delete('//nosuch') # => nil + # def delete element if element.kind_of? Element @element.delete element @@ -868,12 +1827,23 @@ def delete element end end - # Removes multiple elements. Filters for Element children, regardless of - # XPath matching. - # xpath:: all elements matching this String path are removed. - # Returns:: an Array of Elements that have been removed - # doc = Document.new '<a><c/><c/><c/><c/></a>' - # deleted = doc.elements.delete_all 'a/c' #-> [<c/>, <c/>, <c/>, <c/>] + # :call-seq: + # delete_all(xpath) + # + # Removes all elements found via the given +xpath+; + # returns the array of removed elements, if any, else +nil+. + # + # d = REXML::Document.new(xml_string) + # elements = d.root.elements + # elements.size # => 4 + # deleted_elements = elements.delete_all('//book [@category="web"]') + # deleted_elements.size # => 2 + # elements.size # => 2 + # deleted_elements = elements.delete_all('//book') + # deleted_elements.size # => 2 + # elements.size # => 0 + # elements.delete_all('//book') # => [] + # def delete_all( xpath ) rv = [] XPath::each( @element, xpath) {|element| @@ -886,15 +1856,68 @@ def delete_all( xpath ) return rv end - # Adds an element - # element:: - # if supplied, is either an Element, String, or - # Source (see Element.initialize). If not supplied or nil, a - # new, default Element will be constructed - # Returns:: the added Element - # a = Element.new('a') - # a.elements.add(Element.new('b')) #-> <a><b/></a> - # a.elements.add('c') #-> <a><b/><c/></a> + # :call-seq: + # add -> new_element + # add(name) -> new_element + # add(element) -> element + # + # Adds an element; returns the element added. + # + # With no argument, creates and adds a new element. + # The new element has: + # + # - No name. + # - \Parent from the \Elements object. + # - Context from the that parent. + # + # Example: + # + # d = REXML::Document.new(xml_string) + # elements = d.root.elements + # parent = elements.parent # => <bookstore> ... </> + # parent.context = {raw: :all} + # elements.size # => 4 + # new_element = elements.add # => </> + # elements.size # => 5 + # new_element.name # => nil + # new_element.parent # => <bookstore> ... </> + # new_element.context # => {:raw=>:all} + # + # With string argument +name+, creates and adds a new element. + # The new element has: + # + # - Name +name+. + # - \Parent from the \Elements object. + # - Context from the that parent. + # + # Example: + # + # d = REXML::Document.new(xml_string) + # elements = d.root.elements + # parent = elements.parent # => <bookstore> ... </> + # parent.context = {raw: :all} + # elements.size # => 4 + # new_element = elements.add('foo') # => <foo/> + # elements.size # => 5 + # new_element.name # => "foo" + # new_element.parent # => <bookstore> ... </> + # new_element.context # => {:raw=>:all} + # + # With argument +element+, + # creates and adds a clone of the given +element+. + # The new element has name, parent, and context from the given +element+. + # + # d = REXML::Document.new(xml_string) + # elements = d.root.elements + # elements.size # => 4 + # e0 = REXML::Element.new('foo') + # e1 = REXML::Element.new('bar', e0, {raw: :all}) + # element = elements.add(e1) # => <bar/> + # elements.size # => 5 + # element.name # => "bar" + # element.parent # => <bookstore> ... </> + # element.context # => {:raw=>:all} + # def add element=nil if element.nil? Element.new("", self, @element.context) @@ -909,24 +1932,55 @@ def add element=nil alias :<< :add - # Iterates through all of the child Elements, optionally filtering - # them by a given XPath - # xpath:: - # optional. If supplied, this is a String XPath, and is used to - # filter the children, so that only matching children are yielded. Note - # that XPaths are automatically filtered for Elements, so that - # non-Element children will not be yielded - # doc = Document.new '<a><b/><c/><d/>sean<b/><c/><d/></a>' - # doc.root.elements.each {|e|p e} #-> Yields b, c, d, b, c, d elements - # doc.root.elements.each('b') {|e|p e} #-> Yields b, b elements - # doc.root.elements.each('child::node()') {|e|p e} - # #-> Yields <b/>, <c/>, <d/>, <b/>, <c/>, <d/> - # XPath.each(doc.root, 'child::node()', &block) - # #-> Yields <b/>, <c/>, <d/>, sean, <b/>, <c/>, <d/> + # :call-seq: + # each(xpath = nil) {|element| ... } -> self + # + # Iterates over the elements. + # + # With no argument, calls the block with each element: + # + # d = REXML::Document.new(xml_string) + # elements = d.root.elements + # elements.each {|element| p element } + # + # Output: + # + # <book category='cooking'> ... </> + # <book category='children'> ... </> + # <book category='web'> ... </> + # <book category='web' cover='paperback'> ... </> + # + # With argument +xpath+, calls the block with each element + # that matches the given +xpath+: + # + # elements.each('//book [@category="web"]') {|element| p element } + # + # Output: + # + # <book category='web'> ... </> + # <book category='web' cover='paperback'> ... </> + # def each( xpath=nil ) XPath::each( @element, xpath ) {|e| yield e if e.kind_of? Element } end + # :call-seq: + # collect(xpath = nil) {|element| ... } -> array + # + # Iterates over the elements; returns the array of block return values. + # + # With no argument, iterates over all elements: + # + # d = REXML::Document.new(xml_string) + # elements = d.root.elements + # elements.collect {|element| element.size } # => [9, 9, 17, 9] + # + # With argument +xpath+, iterates over elements that match + # the given +xpath+: + # + # xpath = '//book [@category="web"]' + # elements.collect(xpath) {|element| element.size } # => [17, 9] + # def collect( xpath=nil ) collection = [] XPath::each( @element, xpath ) {|e| @@ -935,6 +1989,83 @@ def collect( xpath=nil ) collection end + # :call-seq: + # inject(xpath = nil, initial = nil) -> object + # + # Calls the block with elements; returns the last block return value. + # + # With no argument, iterates over the elements, calling the block + # <tt>elements.size - 1</tt> times. + # + # - The first call passes the first and second elements. + # - The second call passes the first block return value and the third element. + # - The third call passes the second block return value and the fourth element. + # - And so on. + # + # In this example, the block returns the passed element, + # which is then the object argument to the next call: + # + # d = REXML::Document.new(xml_string) + # elements = d.root.elements + # elements.inject do |object, element| + # p [elements.index(object), elements.index(element)] + # element + # end + # + # Output: + # + # [1, 2] + # [2, 3] + # [3, 4] + # + # With the single argument +xpath+, calls the block only with + # elements matching that xpath: + # + # elements.inject('//book [@category="web"]') do |object, element| + # p [elements.index(object), elements.index(element)] + # element + # end + # + # Output: + # + # [3, 4] + # + # With argument +xpath+ given as +nil+ + # and argument +initial+ also given, + # calls the block once for each element. + # + # - The first call passes the +initial+ and the first element. + # - The second call passes the first block return value and the second element. + # - The third call passes the second block return value and the third element. + # - And so on. + # + # In this example, the first object index is <tt>-1</tt> + # + # elements.inject(nil, 'Initial') do |object, element| + # p [elements.index(object), elements.index(element)] + # element + # end + # + # Output: + # + # [-1, 1] + # [1, 2] + # [2, 3] + # [3, 4] + # + # In this form the passed object can be used as an accumulator: + # + # elements.inject(nil, 0) do |total, element| + # total += element.size + # end # => 44 + # + # With both arguments +xpath+ and +initial+ are given, + # calls the block only with elements matching that xpath: + # + # elements.inject('//book [@category="web"]', 0) do |total, element| + # total += element.size + # end # => 26 + # def inject( xpath=nil, initial=nil ) first = true XPath::each( @element, xpath ) {|e| @@ -950,23 +2081,39 @@ def inject( xpath=nil, initial=nil ) initial end - # Returns the number of +Element+ children of the parent object. - # doc = Document.new '<a>sean<b/>elliott<b/>russell<b/></a>' - # doc.root.size #-> 6, 3 element and 3 text nodes - # doc.root.elements.size #-> 3 + # :call-seq: + # size -> integer + # + # Returns the count of \Element children: + # + # d = REXML::Document.new '<a>sean<b/>elliott<b/>russell<b/></a>' + # d.root.elements.size # => 3 # Three elements. + # d.root.size # => 6 # Three elements plus three text nodes.. + # def size count = 0 @element.each {|child| count+=1 if child.kind_of? Element } count end - # Returns an Array of Element children. An XPath may be supplied to - # filter the children. Only Element children are returned, even if the - # supplied XPath matches non-Element children. - # doc = Document.new '<a>sean<b/>elliott<c/></a>' - # doc.root.elements.to_a #-> [ <b/>, <c/> ] - # doc.root.elements.to_a("child::node()") #-> [ <b/>, <c/> ] - # XPath.match(doc.root, "child::node()") #-> [ sean, <b/>, elliott, <c/> ] + # :call-seq: + # to_a(xpath = nil) -> array_of_elements + # + # Returns an array of element children (not including non-element children). + # + # With no argument, returns an array of all element children: + # + # d = REXML::Document.new '<a>sean<b/>elliott<c/></a>' + # elements = d.root.elements + # elements.to_a # => [<b/>, <c/>] # Omits non-element children. + # children = d.root.children + # children # => ["sean", <b/>, "elliott", <c/>] # Includes non-element children. + # + # With argument +xpath+, returns an array of element children + # that match the xpath: + # + # elements.to_a('//c') # => [<c/>] + # def to_a( xpath=nil ) rv = XPath.match( @element, xpath ) return rv.find_all{|e| e.kind_of? Element} if xpath @@ -988,36 +2135,89 @@ def literalize name # A class that defines the set of Attributes of an Element and provides # operations for accessing elements in that set. class Attributes < Hash - # Constructor - # element:: the Element of which this is an Attribute + + # :call-seq: + # new(element) + # + # Creates and returns a new \REXML::Attributes object. + # The element given by argument +element+ is stored, + # but its own attributes are not modified: + # + # ele = REXML::Element.new('foo') + # attrs = REXML::Attributes.new(ele) + # attrs.object_id == ele.attributes.object_id # => false + # + # Other instance methods in class \REXML::Attributes may refer to: + # + # - +element.document+. + # - +element.prefix+. + # - +element.expanded_name+. + # def initialize element @element = element end - # Fetches an attribute value. If you want to get the Attribute itself, - # use get_attribute() - # name:: an XPath attribute name. Namespaces are relevant here. - # Returns:: - # the String value of the matching attribute, or +nil+ if no - # matching attribute was found. This is the unnormalized value - # (with entities expanded). + # :call-seq: + # [name] -> attribute_value or nil + # + # Returns the value for the attribute given by +name+, + # if it exists; otherwise +nil+. + # The value returned is the unnormalized attribute value, + # with entities expanded: + # + # xml_string = <<-EOT + # <root xmlns:foo="http://foo" xmlns:bar="http://bar"> + # <ele foo:att='1' bar:att='2' att='<'/> + # </root> + # EOT + # d = REXML::Document.new(xml_string) + # ele = d.elements['//ele'] # => <a foo:att='1' bar:att='2' att='<'/> + # ele.attributes['att'] # => "<" + # ele.attributes['bar:att'] # => "2" + # ele.attributes['nosuch'] # => nil + # + # Related: get_attribute (returns an \Attribute object). # - # doc = Document.new "<a foo:att='1' bar:att='2' att='<'/>" - # doc.root.attributes['att'] #-> '<' - # doc.root.attributes['bar:att'] #-> '2' def [](name) attr = get_attribute(name) return attr.value unless attr.nil? return nil end + # :call-seq: + # to_a -> array_of_attribute_objects + # + # Returns an array of \REXML::Attribute objects representing + # the attributes: + # + # xml_string = <<-EOT + # <root xmlns:foo="http://foo" xmlns:bar="http://bar"> + # <ele foo:att='1' bar:att='2' att='<'/> + # </root> + # EOT + # d = REXML::Document.new(xml_string) + # ele = d.root.elements['//ele'] # => <a foo:att='1' bar:att='2' att='<'/> + # attrs = ele.attributes.to_a # => [foo:att='1', bar:att='2', att='<'] + # attrs.first.class # => REXML::Attribute + # def to_a enum_for(:each_attribute).to_a end - # Returns the number of attributes the owning Element contains. - # doc = Document "<a x='1' y='2' foo:x='3'/>" - # doc.root.attributes.length #-> 3 + # :call-seq: + # length + # + # Returns the count of attributes: + # + # xml_string = <<-EOT + # <root xmlns:foo="http://foo" xmlns:bar="http://bar"> + # <ele foo:att='1' bar:att='2' att='<'/> + # </root> + # EOT + # d = REXML::Document.new(xml_string) + # ele = d.root.elements['//ele'] # => <a foo:att='1' bar:att='2' att='<'/> + # ele.attributes.length # => 3 + # def length c = 0 each_attribute { c+=1 } @@ -1025,14 +2225,30 @@ def length end alias :size :length - # Iterates over the attributes of an Element. Yields actual Attribute - # nodes, not String values. + # :call-seq: + # each_attribute {|attr| ... } + # + # Calls the given block with each \REXML::Attribute object: + # + # xml_string = <<-EOT + # <root xmlns:foo="http://foo" xmlns:bar="http://bar"> + # <ele foo:att='1' bar:att='2' att='<'/> + # </root> + # EOT + # d = REXML::Document.new(xml_string) + # ele = d.root.elements['//ele'] # => <a foo:att='1' bar:att='2' att='<'/> + # ele.attributes.each_attribute do |attr| + # p [attr.class, attr] + # end + # + # Output: + # + # [REXML::Attribute, foo:att='1'] + # [REXML::Attribute, bar:att='2'] + # [REXML::Attribute, att='<'] # - # doc = Document.new '<a x="1" y="2"/>' - # doc.root.attributes.each_attribute {|attr| - # p attr.expanded_name+" => "+attr.value - # } def each_attribute # :yields: attribute + return to_enum(__method__) unless block_given? each_value do |val| if val.kind_of? Attribute yield val @@ -1042,26 +2258,54 @@ def each_attribute # :yields: attribute end end - # Iterates over each attribute of an Element, yielding the expanded name - # and value as a pair of Strings. + # :call-seq: + # each {|expanded_name, value| ... } + # + # Calls the given block with each expanded-name/value pair: + # + # xml_string = <<-EOT + # <root xmlns:foo="http://foo" xmlns:bar="http://bar"> + # <ele foo:att='1' bar:att='2' att='<'/> + # </root> + # EOT + # d = REXML::Document.new(xml_string) + # ele = d.root.elements['//ele'] # => <a foo:att='1' bar:att='2' att='<'/> + # ele.attributes.each do |expanded_name, value| + # p [expanded_name, value] + # end + # + # Output: + # + # ["foo:att", "1"] + # ["bar:att", "2"] + # ["att", "<"] # - # doc = Document.new '<a x="1" y="2"/>' - # doc.root.attributes.each {|name, value| p name+" => "+value } def each + return to_enum(__method__) unless block_given? each_attribute do |attr| yield [attr.expanded_name, attr.value] end end - # Fetches an attribute - # name:: - # the name by which to search for the attribute. Can be a - # <tt>prefix:name</tt> namespace name. - # Returns:: The first matching attribute, or nil if there was none. This - # value is an Attribute node, not the String value of the attribute. - # doc = Document.new '<a x:foo="1" foo="2" bar="3"/>' - # doc.root.attributes.get_attribute("foo").value #-> "2" - # doc.root.attributes.get_attribute("x:foo").value #-> "1" + # :call-seq: + # get_attribute(name) -> attribute_object or nil + # + # Returns the \REXML::Attribute object for the given +name+: + # + # xml_string = <<-EOT + # <root xmlns:foo="http://foo" xmlns:bar="http://bar"> + # <ele foo:att='1' bar:att='2' att='<'/> + # </root> + # EOT + # d = REXML::Document.new(xml_string) + # ele = d.root.elements['//ele'] # => <a foo:att='1' bar:att='2' att='<'/> + # attrs = ele.attributes + # attrs.get_attribute('foo:att') # => foo:att='1' + # attrs.get_attribute('foo:att').class # => REXML::Attribute + # attrs.get_attribute('bar:att') # => bar:att='2' + # attrs.get_attribute('att') # => att='<' + # attrs.get_attribute('nosuch') # => nil + # def get_attribute( name ) attr = fetch( name, nil ) if attr.nil? @@ -1095,18 +2339,29 @@ def get_attribute( name ) return attr end - # Sets an attribute, overwriting any existing attribute value by the - # same name. Namespace is significant. - # name:: the name of the attribute - # value:: - # (optional) If supplied, the value of the attribute. If - # nil, any existing matching attribute is deleted. - # Returns:: - # Owning element - # doc = Document.new "<a x:foo='1' foo='3'/>" - # doc.root.attributes['y:foo'] = '2' - # doc.root.attributes['foo'] = '4' - # doc.root.attributes['x:foo'] = nil + # :call-seq: + # [name] = value -> value + # + # When +value+ is non-+nil+, + # assigns that to the attribute for the given +name+, + # overwriting the previous value if it exists: + # + # xml_string = <<-EOT + # <root xmlns:foo="http://foo" xmlns:bar="http://bar"> + # <ele foo:att='1' bar:att='2' att='<'/> + # </root> + # EOT + # d = REXML::Document.new(xml_string) + # ele = d.root.elements['//ele'] # => <a foo:att='1' bar:att='2' att='<'/> + # attrs = ele.attributes + # attrs['foo:att'] = '2' # => "2" + # attrs['baz:att'] = '3' # => "3" + # + # When +value+ is +nil+, deletes the attribute if it exists: + # + # attrs['baz:att'] = nil + # attrs.include?('baz:att') # => false + # def []=( name, value ) if value.nil? # Delete the named attribute attr = get_attribute(name) @@ -1129,29 +2384,25 @@ def []=( name, value ) elsif old_attr.kind_of? Hash old_attr[value.prefix] = value elsif old_attr.prefix != value.prefix - # Check for conflicting namespaces - raise ParseException.new( - "Namespace conflict in adding attribute \"#{value.name}\": "+ - "Prefix \"#{old_attr.prefix}\" = "+ - "\"#{@element.namespace(old_attr.prefix)}\" and prefix "+ - "\"#{value.prefix}\" = \"#{@element.namespace(value.prefix)}\"") if - value.prefix != "xmlns" and old_attr.prefix != "xmlns" and - @element.namespace( old_attr.prefix ) == - @element.namespace( value.prefix ) - store value.name, { old_attr.prefix => old_attr, - value.prefix => value } + store value.name, {old_attr.prefix => old_attr, + value.prefix => value} else store value.name, value end return @element end - # Returns an array of Strings containing all of the prefixes declared - # by this set of # attributes. The array does not include the default + # :call-seq: + # prefixes -> array_of_prefix_strings + # + # Returns an array of prefix strings in the attributes. + # The array does not include the default # namespace declaration, if one exists. - # doc = Document.new("<a xmlns='foo' xmlns:x='bar' xmlns:y='twee' "+ - # "z='glorp' p:k='gru'/>") - # prefixes = doc.root.attributes.prefixes #-> ['x', 'y'] + # + # xml_string = '<a xmlns="foo" xmlns:x="bar" xmlns:y="twee" z="glorp"/>' + # d = REXML::Document.new(xml_string) + # d.root.attributes.prefixes # => ["x", "y"] + # def prefixes ns = [] each_attribute do |attribute| @@ -1168,6 +2419,15 @@ def prefixes ns end + # :call-seq: + # namespaces + # + # Returns a hash of name/value pairs for the namespaces: + # + # xml_string = '<a xmlns="foo" xmlns:x="bar" xmlns:y="twee" z="glorp"/>' + # d = REXML::Document.new(xml_string) + # d.root.attributes.namespaces # => {"xmlns"=>"foo", "x"=>"bar", "y"=>"twee"} + # def namespaces namespaces = {} each_attribute do |attribute| @@ -1184,16 +2444,34 @@ def namespaces namespaces end - # Removes an attribute - # attribute:: - # either a String, which is the name of the attribute to remove -- - # namespaces are significant here -- or the attribute to remove. - # Returns:: the owning element - # doc = Document.new "<a y:foo='0' x:foo='1' foo='3' z:foo='4'/>" - # doc.root.attributes.delete 'foo' #-> <a y:foo='0' x:foo='1' z:foo='4'/>" - # doc.root.attributes.delete 'x:foo' #-> <a y:foo='0' z:foo='4'/>" - # attr = doc.root.attributes.get_attribute('y:foo') - # doc.root.attributes.delete attr #-> <a z:foo='4'/>" + # :call-seq: + # delete(name) -> element + # delete(attribute) -> element + # + # Removes a specified attribute if it exists; + # returns the attributes' element. + # + # When string argument +name+ is given, + # removes the attribute of that name if it exists: + # + # xml_string = <<-EOT + # <root xmlns:foo="http://foo" xmlns:bar="http://bar"> + # <ele foo:att='1' bar:att='2' att='<'/> + # </root> + # EOT + # d = REXML::Document.new(xml_string) + # ele = d.root.elements['//ele'] # => <a foo:att='1' bar:att='2' att='<'/> + # attrs = ele.attributes + # attrs.delete('foo:att') # => <ele bar:att='2' att='<'/> + # attrs.delete('foo:att') # => <ele bar:att='2' att='<'/> + # + # When attribute argument +attribute+ is given, + # removes that attribute if it exists: + # + # attr = REXML::Attribute.new('bar:att', '2') + # attrs.delete(attr) # => <ele att='<'/> # => <ele att='<'/> + # attrs.delete(attr) # => <ele att='<'/> # => <ele/> + # def delete( attribute ) name = nil prefix = nil @@ -1221,19 +2499,48 @@ def delete( attribute ) @element end - # Adds an attribute, overriding any existing attribute by the - # same name. Namespaces are significant. - # attribute:: An Attribute + # :call-seq: + # add(attribute) -> attribute + # + # Adds attribute +attribute+, replacing the previous + # attribute of the same name if it exists; + # returns +attribute+: + # + # xml_string = <<-EOT + # <root xmlns:foo="http://foo" xmlns:bar="http://bar"> + # <ele foo:att='1' bar:att='2' att='<'/> + # </root> + # EOT + # d = REXML::Document.new(xml_string) + # ele = d.root.elements['//ele'] # => <a foo:att='1' bar:att='2' att='<'/> + # attrs = ele.attributes + # attrs # => {"att"=>{"foo"=>foo:att='1', "bar"=>bar:att='2', ""=>att='<'}} + # attrs.add(REXML::Attribute.new('foo:att', '2')) # => foo:att='2' + # attrs.add(REXML::Attribute.new('baz', '3')) # => baz='3' + # attrs.include?('baz') # => true + # def add( attribute ) self[attribute.name] = attribute end alias :<< :add - # Deletes all attributes matching a name. Namespaces are significant. - # name:: - # A String; all attributes that match this path will be removed - # Returns:: an Array of the Attributes that were removed + # :call-seq: + # delete_all(name) -> array_of_removed_attributes + # + # Removes all attributes matching the given +name+; + # returns an array of the removed attributes: + # + # xml_string = <<-EOT + # <root xmlns:foo="http://foo" xmlns:bar="http://bar"> + # <ele foo:att='1' bar:att='2' att='<'/> + # </root> + # EOT + # d = REXML::Document.new(xml_string) + # ele = d.root.elements['//ele'] # => <a foo:att='1' bar:att='2' att='<'/> + # attrs = ele.attributes + # attrs.delete_all('att') # => [att='<'] + # def delete_all( name ) rv = [] each_attribute { |attribute| @@ -1243,11 +2550,23 @@ def delete_all( name ) return rv end - # The +get_attribute_ns+ method retrieves a method by its namespace - # and name. Thus it is possible to reliably identify an attribute - # even if an XML processor has changed the prefix. + # :call-seq: + # get_attribute_ns(namespace, name) + # + # Returns the \REXML::Attribute object among the attributes + # that matches the given +namespace+ and +name+: + # + # xml_string = <<-EOT + # <root xmlns:foo="http://foo" xmlns:bar="http://bar"> + # <ele foo:att='1' bar:att='2' att='<'/> + # </root> + # EOT + # d = REXML::Document.new(xml_string) + # ele = d.root.elements['//ele'] # => <a foo:att='1' bar:att='2' att='<'/> + # attrs = ele.attributes + # attrs.get_attribute_ns('http://foo', 'att') # => foo:att='1' + # attrs.get_attribute_ns('http://foo', 'nosuch') # => nil # - # Method contributed by Henrik Martensson def get_attribute_ns(namespace, name) result = nil each_attribute() { |attribute| diff --git a/lib/rexml/entity.rb b/lib/rexml/entity.rb index 97c7b6b42f..1ba5a7bb7b 100644 --- a/lib/rexml/entity.rb +++ b/lib/rexml/entity.rb @@ -1,7 +1,7 @@ # frozen_string_literal: false -require 'rexml/child' -require 'rexml/source' -require 'rexml/xmltokens' +require_relative 'child' +require_relative 'source' +require_relative 'xmltokens' module REXML class Entity < Child @@ -12,6 +12,7 @@ class Entity < Child EXTERNALID = "(?:(?:(SYSTEM)\\s+#{SYSTEMLITERAL})|(?:(PUBLIC)\\s+#{PUBIDLITERAL}\\s+#{SYSTEMLITERAL}))" NDATADECL = "\\s+NDATA\\s+#{NAME}" PEREFERENCE = "%#{NAME};" + PEREFERENCE_RE = /#{PEREFERENCE}/um ENTITYVALUE = %Q{((?:"(?:[^%&"]|#{PEREFERENCE}|#{REFERENCE})*")|(?:'([^%&']|#{PEREFERENCE}|#{REFERENCE})*'))} PEDEF = "(?:#{ENTITYVALUE}|#{EXTERNALID})" ENTITYDEF = "(?:#{ENTITYVALUE}|(?:#{EXTERNALID}(#{NDATADECL})?))" @@ -19,7 +20,7 @@ class Entity < Child GEDECL = "<!ENTITY\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>" ENTITYDECL = /\s*(?:#{GEDECL})|(?:#{PEDECL})/um - attr_reader :name, :external, :ref, :ndata, :pubid + attr_reader :name, :external, :ref, :ndata, :pubid, :value # Create a new entity. Simple entities can be constructed by passing a # name, value to the constructor; this creates a generic, plain entity @@ -68,14 +69,14 @@ def Entity::matches? string end # Evaluates to the unnormalized value of this entity; that is, replacing - # all entities -- both %ent; and &ent; entities. This differs from - # +value()+ in that +value+ only replaces %ent; entities. + # &ent; entities. def unnormalized - document.record_entity_expansion unless document.nil? - v = value() - return nil if v.nil? - @unnormalized = Text::unnormalize(v, parent) - @unnormalized + document&.record_entity_expansion + + return nil if @value.nil? + + @unnormalized = Text::unnormalize(@value, parent, + entity_expansion_text_limit: document&.entity_expansion_text_limit) end #once :unnormalized @@ -90,7 +91,7 @@ def normalized # object itself is valid.) # # out:: - # An object implementing <TT><<<TT> to which the entity will be + # An object implementing <TT><<</TT> to which the entity will be # output # indent:: # *DEPRECATED* and ignored @@ -121,36 +122,6 @@ def to_s write rv rv end - - PEREFERENCE_RE = /#{PEREFERENCE}/um - # Returns the value of this entity. At the moment, only internal entities - # are processed. If the value contains internal references (IE, - # %blah;), those are replaced with their values. IE, if the doctype - # contains: - # <!ENTITY % foo "bar"> - # <!ENTITY yada "nanoo %foo; nanoo> - # then: - # doctype.entity('yada').value #-> "nanoo bar nanoo" - def value - if @value - matches = @value.scan(PEREFERENCE_RE) - rv = @value.clone - if @parent - sum = 0 - matches.each do |entity_reference| - entity_value = @parent.entity( entity_reference[0] ) - if sum + entity_value.bytesize > Security.entity_expansion_text_limit - raise "entity expansion has grown too large" - else - sum += entity_value.bytesize - end - rv.gsub!( /%#{entity_reference.join};/um, entity_value ) - end - end - return rv - end - nil - end end # This is a set of entity constants -- the ones defined in the XML diff --git a/lib/rexml/formatters/default.rb b/lib/rexml/formatters/default.rb index c375f1468b..811b2ff3d5 100644 --- a/lib/rexml/formatters/default.rb +++ b/lib/rexml/formatters/default.rb @@ -1,4 +1,5 @@ # frozen_string_literal: false + module REXML module Formatters class Default @@ -101,11 +102,14 @@ def write_cdata( node, output ) end def write_instruction( node, output ) - output << Instruction::START.sub(/\\/u, '') + output << Instruction::START output << node.target - output << ' ' - output << node.content - output << Instruction::STOP.sub(/\\/u, '') + content = node.content + if content + output << ' ' + output << content + end + output << Instruction::STOP end end end diff --git a/lib/rexml/formatters/pretty.rb b/lib/rexml/formatters/pretty.rb index a80274bdad..a838d8357e 100644 --- a/lib/rexml/formatters/pretty.rb +++ b/lib/rexml/formatters/pretty.rb @@ -1,5 +1,5 @@ -# frozen_string_literal: false -require 'rexml/formatters/default' +# frozen_string_literal: true +require_relative 'default' module REXML module Formatters @@ -58,7 +58,7 @@ def write_element(node, output) skip = false if compact if node.children.inject(true) {|s,c| s & c.kind_of?(Text)} - string = "" + string = +"" old_level = @level @level = 0 node.children.each { |child| write( child, string ) } @@ -111,7 +111,7 @@ def write_document( node, output ) # itself, then we don't need a carriage return... which makes this # logic more complex. node.children.each { |child| - next if child == node.children[-1] and child.instance_of?(Text) + next if child.instance_of?(Text) unless child == node.children[0] or child.instance_of?(Text) or (child == node.children[1] and !node.children[0].writethis) output << "\n" diff --git a/lib/rexml/formatters/transitive.rb b/lib/rexml/formatters/transitive.rb index 81e67f3274..5ff51e10f3 100644 --- a/lib/rexml/formatters/transitive.rb +++ b/lib/rexml/formatters/transitive.rb @@ -1,5 +1,5 @@ # frozen_string_literal: false -require 'rexml/formatters/pretty' +require_relative 'pretty' module REXML module Formatters diff --git a/lib/rexml/functions.rb b/lib/rexml/functions.rb index cd879fdd28..4c11461601 100644 --- a/lib/rexml/functions.rb +++ b/lib/rexml/functions.rb @@ -66,11 +66,11 @@ def Functions::count( node_set ) def Functions::id( object ) end - # UNTESTED - def Functions::local_name( node_set=nil ) - get_namespace( node_set ) do |node| + def Functions::local_name(node_set=nil) + get_namespace(node_set) do |node| return node.local_name end + "" end def Functions::namespace_uri( node_set=nil ) @@ -86,10 +86,14 @@ def Functions::name( node_set=nil ) # Helper method. def Functions::get_namespace( node_set = nil ) if node_set == nil - yield @@context[:node] if defined? @@context[:node].namespace + yield @@context[:node] if @@context[:node].respond_to?(:namespace) else if node_set.respond_to? :each - node_set.each { |node| yield node if defined? node.namespace } + result = [] + node_set.each do |node| + result << yield(node) if node.respond_to?(:namespace) + end + result elsif node_set.respond_to? :namespace yield node_set end @@ -131,22 +135,38 @@ def Functions::get_namespace( node_set = nil ) # # An object of a type other than the four basic types is converted to a # string in a way that is dependent on that type. - def Functions::string( object=nil ) - #object = @context unless object - if object.instance_of? Array - string( object[0] ) - elsif defined? object.node_type - if object.node_type == :attribute + def Functions::string( object=@@context[:node] ) + if object.respond_to?(:node_type) + case object.node_type + when :attribute object.value - elsif object.node_type == :element || object.node_type == :document + when :element string_value(object) + when :document + string_value(object.root) + when :processing_instruction + object.content else object.to_s end - elsif object.nil? - return "" else - object.to_s + case object + when Array + string(object[0]) + when Float + if object.nan? + "NaN" + else + integer = object.to_i + if object == integer + "%d" % integer + else + object.to_s + end + end + else + object.to_s + end end end @@ -167,9 +187,12 @@ def Functions::string_value( o ) rv end - # UNTESTED def Functions::concat( *objects ) - objects.join + concatenated = "" + objects.each do |object| + concatenated << string(object) + end + concatenated end # Fixed by Mike Stok @@ -239,11 +262,10 @@ def Functions::string_length( string ) string(string).length end - # UNTESTED def Functions::normalize_space( string=nil ) string = string(@@context[:node]) if string.nil? if string.kind_of? Array - string.collect{|x| string.to_s.strip.gsub(/\s+/um, ' ') if string} + string.collect{|x| x.to_s.strip.gsub(/\s+/um, ' ') if x} else string.to_s.strip.gsub(/\s+/um, ' ') end @@ -292,18 +314,23 @@ def Functions::translate( string, tr1, tr2 ) end end - # UNTESTED - def Functions::boolean( object=nil ) - if object.kind_of? String - if object =~ /\d+/u - return object.to_f != 0 - else - return object.size > 0 - end - elsif object.kind_of? Array - object = object.find{|x| x and true} + def Functions::boolean(object=@@context[:node]) + case object + when true, false + object + when Float + return false if object.zero? + return false if object.nan? + true + when Numeric + not object.zero? + when String + not object.empty? + when Array + not object.empty? + else + object ? true : false end - return object ? true : false end # UNTESTED @@ -357,25 +384,23 @@ def Functions::compare_language lang1, lang2 # # an object of a type other than the four basic types is converted to a # number in a way that is dependent on that type - def Functions::number( object=nil ) - object = @@context[:node] unless object + def Functions::number(object=@@context[:node]) case object when true Float(1) when false Float(0) when Array - number(string( object )) + number(string(object)) when Numeric object.to_f else - str = string( object ) - # If XPath ever gets scientific notation... - #if str =~ /^\s*-?(\d*\.?\d+|\d+\.)([Ee]\d*)?\s*$/ - if str =~ /^\s*-?(\d*\.?\d+|\d+\.)\s*$/ - str.to_f + str = string(object) + case str.strip + when /\A\s*(-?(?:\d+(?:\.\d*)?|\.\d+))\s*\z/ + $1.to_f else - (0.0 / 0.0) + Float::NAN end end end @@ -397,7 +422,7 @@ def Functions::round( number ) number = number(number) begin neg = number.negative? - number = number.abs.round(half: :up) + number = number.abs.round neg ? -number : number rescue FloatDomainError number diff --git a/lib/rexml/instruction.rb b/lib/rexml/instruction.rb index c4f65eefc1..318741f03b 100644 --- a/lib/rexml/instruction.rb +++ b/lib/rexml/instruction.rb @@ -1,13 +1,14 @@ # frozen_string_literal: false -require "rexml/child" -require "rexml/source" + +require_relative "child" +require_relative "source" module REXML # Represents an XML Instruction; IE, <? ... ?> # TODO: Add parent arg (3rd arg) to constructor class Instruction < Child - START = '<\?' - STOP = '\?>' + START = "<?" + STOP = "?>" # target is the "name" of the Instruction; IE, the "tag" in <?tag ...?> # content is everything else. @@ -17,20 +18,25 @@ class Instruction < Child # @param target can be one of a number of things. If String, then # the target of this instruction is set to this. If an Instruction, # then the Instruction is shallowly cloned (target and content are - # copied). If a Source, then the source is scanned and parsed for - # an Instruction declaration. + # copied). # @param content Must be either a String, or a Parent. Can only # be a Parent if the target argument is a Source. Otherwise, this # String is set as the content of this instruction. def initialize(target, content=nil) - if target.kind_of? String + case target + when String super() @target = target @content = content - elsif target.kind_of? Instruction + when Instruction super(content) @target = target.target @content = target.content + else + message = + "processing instruction target must be String or REXML::Instruction: " + message << "<#{target.inspect}>" + raise ArgumentError, message end @content.strip! if @content end @@ -45,11 +51,13 @@ def clone def write writer, indent=-1, transitive=false, ie_hack=false Kernel.warn( "#{self.class.name}.write is deprecated", uplevel: 1) indent(writer, indent) - writer << START.sub(/\\/u, '') + writer << START writer << @target - writer << ' ' - writer << @content - writer << STOP.sub(/\\/u, '') + if @content + writer << ' ' + writer << @content + end + writer << STOP end # @return true if other is an Instruction, and the content and target diff --git a/lib/rexml/light/node.rb b/lib/rexml/light/node.rb index d58119a3a4..3dab885b0f 100644 --- a/lib/rexml/light/node.rb +++ b/lib/rexml/light/node.rb @@ -1,14 +1,6 @@ # frozen_string_literal: false -require 'rexml/xmltokens' - -# [ :element, parent, name, attributes, children* ] - # a = Node.new - # a << "B" # => <a>B</a> - # a.b # => <a>B<b/></a> - # a.b[1] # => <a>B<b/><b/><a> - # a.b[1]["x"] = "y" # => <a>B<b/><b x="y"/></a> - # a.b[0].c # => <a>B<b><c/></b><b x="y"/></a> - # a.b.c << "D" # => <a>B<b><c>D</c></b><b x="y"/></a> +require_relative '../xmltokens' + module REXML module Light # Represents a tagged XML element. Elements are characterized by diff --git a/lib/rexml/namespace.rb b/lib/rexml/namespace.rb index 90ba7cc635..2e67252a51 100644 --- a/lib/rexml/namespace.rb +++ b/lib/rexml/namespace.rb @@ -1,5 +1,6 @@ -# frozen_string_literal: false -require 'rexml/xmltokens' +# frozen_string_literal: true + +require_relative 'xmltokens' module REXML # Adds named attributes to an object. @@ -9,19 +10,33 @@ module Namespace # The expanded name of the object, valid if name is set attr_accessor :prefix include XMLTokens + NAME_WITHOUT_NAMESPACE = /\A#{NCNAME_STR}\z/ NAMESPLIT = /^(?:(#{NCNAME_STR}):)?(#{NCNAME_STR})/u # Sets the name and the expanded name def name=( name ) @expanded_name = name - name =~ NAMESPLIT - if $1 - @prefix = $1 - else + if name.match?(NAME_WITHOUT_NAMESPACE) @prefix = "" @namespace = "" + @name = name + elsif name =~ NAMESPLIT + if $1 + @prefix = $1 + else + @prefix = "" + @namespace = "" + end + @name = $2 + elsif name == "" + @prefix = nil + @namespace = nil + @name = nil + else + message = "name must be \#{PREFIX}:\#{LOCAL_NAME} or \#{LOCAL_NAME}: " + message += "<#{name.inspect}>" + raise ArgumentError, message end - @name = $2 end # Compares names optionally WITH namespaces diff --git a/lib/rexml/node.rb b/lib/rexml/node.rb index 52337ade44..c771db70d1 100644 --- a/lib/rexml/node.rb +++ b/lib/rexml/node.rb @@ -1,7 +1,7 @@ # frozen_string_literal: false -require "rexml/parseexception" -require "rexml/formatters/pretty" -require "rexml/formatters/default" +require_relative "parseexception" +require_relative "formatters/pretty" +require_relative "formatters/default" module REXML # Represents a node in the tree. Nodes are never encountered except as @@ -52,10 +52,14 @@ def parent? # Visit all subnodes of +self+ recursively def each_recursive(&block) # :yields: node - self.elements.each {|node| - block.call(node) - node.each_recursive(&block) - } + stack = [] + each { |child| stack.unshift child if child.node_type == :element } + until stack.empty? + child = stack.pop + yield child + n = stack.size + child.each { |grandchild| stack.insert n, grandchild if grandchild.node_type == :element } + end end # Find (and return) first subnode (recursively) for which the block diff --git a/lib/rexml/output.rb b/lib/rexml/output.rb index 96dfea570e..88a5fb378d 100644 --- a/lib/rexml/output.rb +++ b/lib/rexml/output.rb @@ -1,5 +1,5 @@ # frozen_string_literal: false -require 'rexml/encoding' +require_relative 'encoding' module REXML class Output diff --git a/lib/rexml/parent.rb b/lib/rexml/parent.rb index 3bd0a96255..6a53b37a12 100644 --- a/lib/rexml/parent.rb +++ b/lib/rexml/parent.rb @@ -1,5 +1,5 @@ # frozen_string_literal: false -require "rexml/child" +require_relative "child" module REXML # A parent has children, and has methods for accessing them. The Parent diff --git a/lib/rexml/parseexception.rb b/lib/rexml/parseexception.rb index 7b16cd1a41..e57d05fd2e 100644 --- a/lib/rexml/parseexception.rb +++ b/lib/rexml/parseexception.rb @@ -29,6 +29,7 @@ def to_s err << "\nLine: #{line}\n" err << "Position: #{position}\n" err << "Last 80 unconsumed characters:\n" + err.force_encoding("ASCII-8BIT") err << @source.buffer[0..80].force_encoding("ASCII-8BIT").gsub(/\n/, ' ') end diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb index e7ef695912..a567e045d6 100644 --- a/lib/rexml/parsers/baseparser.rb +++ b/lib/rexml/parsers/baseparser.rb @@ -1,14 +1,40 @@ -# frozen_string_literal: false - -require "strscan" - -require 'rexml/parseexception' -require 'rexml/undefinednamespaceexception' -require 'rexml/source' +# frozen_string_literal: true +require_relative '../parseexception' +require_relative '../undefinednamespaceexception' +require_relative '../security' +require_relative '../source' require 'set' +require "strscan" module REXML module Parsers + unless [].respond_to?(:tally) + module EnumerableTally + refine Enumerable do + def tally + counts = {} + each do |item| + counts[item] ||= 0 + counts[item] += 1 + end + counts + end + end + end + using EnumerableTally + end + + if StringScanner::Version < "3.0.8" + module StringScannerCaptures + refine StringScanner do + def captures + values_at(*(1...size)) + end + end + end + using StringScannerCaptures + end + # = Using the Pull Parser # <em>This API is experimental, and subject to change.</em> # parser = PullParser.new( "<a>text<b att='val'/>txet</a>" ) @@ -61,7 +87,7 @@ class BaseParser XMLDECL_START = /\A<\?xml\s/u; XMLDECL_PATTERN = /<\?xml\s+(.*?)\?>/um INSTRUCTION_START = /\A<\?/u - INSTRUCTION_PATTERN = /<\?(.*?)(\s+.*?)?\?>/um + INSTRUCTION_PATTERN = /<\?#{NAME}(\s+.*?)?\?>/um TAG_MATCH = /\A<((?>#{QNAME_STR}))/um CLOSE_MATCH = /\A\s*<\/(#{QNAME_STR})\s*>/um @@ -98,7 +124,7 @@ class BaseParser ENTITYDEF = "(?:#{ENTITYVALUE}|(?:#{EXTERNALID}(#{NDATADECL})?))" PEDECL = "<!ENTITY\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>" GEDECL = "<!ENTITY\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>" - ENTITYDECL = /\s*(?:#{GEDECL})|(?:#{PEDECL})/um + ENTITYDECL = /\s*(?:#{GEDECL})|\s*(?:#{PEDECL})/um NOTATIONDECL_START = /\A\s*<!NOTATION/um EXTERNAL_ID_PUBLIC = /\A\s*PUBLIC\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}\s*/um @@ -114,16 +140,33 @@ class BaseParser "apos" => [/'/, "'", "'", /'/] } - - ###################################################################### - # These are patterns to identify common markup errors, to make the - # error messages more informative. - ###################################################################### - MISSING_ATTRIBUTE_QUOTES = /^<#{QNAME_STR}\s+#{QNAME_STR}\s*=\s*[^"']/um + module Private + PEREFERENCE_PATTERN = /#{PEREFERENCE}/um + TAG_PATTERN = /((?>#{QNAME_STR}))\s*/um + CLOSE_PATTERN = /(#{QNAME_STR})\s*>/um + ATTLISTDECL_END = /\s+#{NAME}(?:#{ATTDEF})*\s*>/um + NAME_PATTERN = /#{NAME}/um + GEDECL_PATTERN = "\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>" + PEDECL_PATTERN = "\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>" + ENTITYDECL_PATTERN = /(?:#{GEDECL_PATTERN})|(?:#{PEDECL_PATTERN})/um + CARRIAGE_RETURN_NEWLINE_PATTERN = /\r\n?/ + CHARACTER_REFERENCES = /�*((?:\d+)|(?:x[a-fA-F0-9]+));/ + DEFAULT_ENTITIES_PATTERNS = {} + default_entities = ['gt', 'lt', 'quot', 'apos', 'amp'] + default_entities.each do |term| + DEFAULT_ENTITIES_PATTERNS[term] = /&#{term};/ + end + XML_PREFIXED_NAMESPACE = "http://www.w3.org/XML/1998/namespace" + end + private_constant :Private def initialize( source ) self.stream = source @listeners = [] + @prefixes = Set.new + @entity_expansion_count = 0 + @entity_expansion_limit = Security.entity_expansion_limit + @entity_expansion_text_limit = Security.entity_expansion_text_limit end def add_listener( listener ) @@ -131,15 +174,20 @@ def add_listener( listener ) end attr_reader :source + attr_reader :entity_expansion_count + attr_writer :entity_expansion_limit + attr_writer :entity_expansion_text_limit def stream=( source ) @source = SourceFactory.create_from( source ) @closed = nil + @have_root = false @document_status = nil @tags = [] @stack = [] @entities = [] - @nsstack = [] + @namespaces = {"xml" => Private::XML_PREFIXED_NAMESPACE} + @namespaces_restore_stack = [] end def position @@ -189,6 +237,8 @@ def peek depth=0 # Returns the next event. This is a +PullEvent+ object. def pull + @source.drop_parsed_content + pull_event.tap do |event| @listeners.each do |listener| listener.receive event @@ -201,248 +251,280 @@ def pull_event x, @closed = @closed, nil return [ :end_element, x ] end - return [ :end_document ] if empty? + if empty? + if @document_status == :in_doctype + raise ParseException.new("Malformed DOCTYPE: unclosed", @source) + end + unless @tags.empty? + path = "/" + @tags.join("/") + raise ParseException.new("Missing end tag for '#{path}'", @source) + end + return [ :end_document ] + end return @stack.shift if @stack.size > 0 #STDERR.puts @source.encoding #STDERR.puts "BUFFER = #{@source.buffer.inspect}" + + @source.ensure_buffer if @document_status == nil - word = @source.match( /\A((?:\s+)|(?:<[^>]*>))/um ) - word = word[1] unless word.nil? - #STDERR.puts "WORD = #{word.inspect}" - case word - when COMMENT_START - return [ :comment, @source.match( COMMENT_PATTERN, true )[1] ] - when XMLDECL_START - #STDERR.puts "XMLDECL" - results = @source.match( XMLDECL_PATTERN, true )[1] - version = VERSION.match( results ) - version = version[1] unless version.nil? - encoding = ENCODING.match(results) - encoding = encoding[1] unless encoding.nil? - if need_source_encoding_update?(encoding) - @source.encoding = encoding - end - if encoding.nil? and /\AUTF-16(?:BE|LE)\z/i =~ @source.encoding - encoding = "UTF-16" - end - standalone = STANDALONE.match(results) - standalone = standalone[1] unless standalone.nil? - return [ :xmldecl, version, encoding, standalone ] - when INSTRUCTION_START - return [ :processing_instruction, *@source.match(INSTRUCTION_PATTERN, true)[1,2] ] - when DOCTYPE_START - base_error_message = "Malformed DOCTYPE" - @source.match(DOCTYPE_START, true) - @nsstack.unshift(curr_ns=Set.new) - name = parse_name(base_error_message) - if @source.match(/\A\s*\[/um, true) - id = [nil, nil, nil] - @document_status = :in_doctype - elsif @source.match(/\A\s*>/um, true) - id = [nil, nil, nil] - @document_status = :after_doctype - else - id = parse_id(base_error_message, - accept_external_id: true, - accept_public_id: false) - if id[0] == "SYSTEM" - # For backward compatibility - id[1], id[2] = id[2], nil + start_position = @source.position + if @source.match("<?", true) + return process_instruction + elsif @source.match("<!", true) + if @source.match("--", true) + md = @source.match(/(.*?)-->/um, true) + if md.nil? + raise REXML::ParseException.new("Unclosed comment", @source) + end + if /--|-\z/.match?(md[1]) + raise REXML::ParseException.new("Malformed comment", @source) end - if @source.match(/\A\s*\[/um, true) - @document_status = :in_doctype - elsif @source.match(/\A\s*>/um, true) + return [ :comment, md[1] ] + elsif @source.match("DOCTYPE", true) + base_error_message = "Malformed DOCTYPE" + unless @source.match(/\s+/um, true) + if @source.match(">") + message = "#{base_error_message}: name is missing" + else + message = "#{base_error_message}: invalid name" + end + @source.position = start_position + raise REXML::ParseException.new(message, @source) + end + name = parse_name(base_error_message) + if @source.match(/\s*\[/um, true) + id = [nil, nil, nil] + @document_status = :in_doctype + elsif @source.match(/\s*>/um, true) + id = [nil, nil, nil] @document_status = :after_doctype + @source.ensure_buffer else - message = "#{base_error_message}: garbage after external ID" - raise REXML::ParseException.new(message, @source) + id = parse_id(base_error_message, + accept_external_id: true, + accept_public_id: false) + if id[0] == "SYSTEM" + # For backward compatibility + id[1], id[2] = id[2], nil + end + if @source.match(/\s*\[/um, true) + @document_status = :in_doctype + elsif @source.match(/\s*>/um, true) + @document_status = :after_doctype + @source.ensure_buffer + else + message = "#{base_error_message}: garbage after external ID" + raise REXML::ParseException.new(message, @source) + end end - end - args = [:start_doctype, name, *id] - if @document_status == :after_doctype - @source.match(/\A\s*/um, true) - @stack << [ :end_doctype ] - end - return args - when /\A\s+/ - else - @document_status = :after_doctype - if @source.encoding == "UTF-8" - @source.buffer.force_encoding(::Encoding::UTF_8) + args = [:start_doctype, name, *id] + if @document_status == :after_doctype + @source.match(/\s*/um, true) + @stack << [ :end_doctype ] + end + return args + else + message = "Invalid XML" + raise REXML::ParseException.new(message, @source) end end end if @document_status == :in_doctype - md = @source.match(/\A\s*(.*?>)/um) - case md[1] - when SYSTEMENTITY - match = @source.match( SYSTEMENTITY, true )[1] - return [ :externalentity, match ] - - when ELEMENTDECL_START - return [ :elementdecl, @source.match( ELEMENTDECL_PATTERN, true )[1] ] - - when ENTITY_START - match = @source.match( ENTITYDECL, true ).to_a.compact - match[0] = :entitydecl - ref = false - if match[1] == '%' - ref = true - match.delete_at 1 - end - # Now we have to sort out what kind of entity reference this is - if match[2] == 'SYSTEM' - # External reference - match[3] = match[3][1..-2] # PUBID - match.delete_at(4) if match.size > 4 # Chop out NDATA decl - # match is [ :entity, name, SYSTEM, pubid(, ndata)? ] - elsif match[2] == 'PUBLIC' - # External reference - match[3] = match[3][1..-2] # PUBID - match[4] = match[4][1..-2] # HREF - match.delete_at(5) if match.size > 5 # Chop out NDATA decl - # match is [ :entity, name, PUBLIC, pubid, href(, ndata)? ] - else - match[2] = match[2][1..-2] - match.pop if match.size == 4 - # match is [ :entity, name, value ] - end - match << '%' if ref - return match - when ATTLISTDECL_START - md = @source.match( ATTLISTDECL_PATTERN, true ) - raise REXML::ParseException.new( "Bad ATTLIST declaration!", @source ) if md.nil? - element = md[1] - contents = md[0] - - pairs = {} - values = md[0].scan( ATTDEF_RE ) - values.each do |attdef| - unless attdef[3] == "#IMPLIED" - attdef.compact! - val = attdef[3] - val = attdef[4] if val == "#FIXED " - pairs[attdef[0]] = val - if attdef[0] =~ /^xmlns:(.*)/ - @nsstack[0] << $1 - end + @source.match(/\s*/um, true) # skip spaces + start_position = @source.position + if @source.match("<!", true) + if @source.match("ELEMENT", true) + md = @source.match(/(.*?)>/um, true) + raise REXML::ParseException.new( "Bad ELEMENT declaration!", @source ) if md.nil? + return [ :elementdecl, "<!ELEMENT" + md[1] ] + elsif @source.match("ENTITY", true) + match_data = @source.match(Private::ENTITYDECL_PATTERN, true) + unless match_data + raise REXML::ParseException.new("Malformed entity declaration", @source) end - end - return [ :attlistdecl, element, pairs, contents ] - when NOTATIONDECL_START - base_error_message = "Malformed notation declaration" - unless @source.match(/\A\s*<!NOTATION\s+/um, true) - if @source.match(/\A\s*<!NOTATION\s*>/um) - message = "#{base_error_message}: name is missing" + match = [:entitydecl, *match_data.captures.compact] + ref = false + if match[1] == '%' + ref = true + match.delete_at 1 + end + # Now we have to sort out what kind of entity reference this is + if match[2] == 'SYSTEM' + # External reference + match[3] = match[3][1..-2] # PUBID + match.delete_at(4) if match.size > 4 # Chop out NDATA decl + # match is [ :entity, name, SYSTEM, pubid(, ndata)? ] + elsif match[2] == 'PUBLIC' + # External reference + match[3] = match[3][1..-2] # PUBID + match[4] = match[4][1..-2] # HREF + match.delete_at(5) if match.size > 5 # Chop out NDATA decl + # match is [ :entity, name, PUBLIC, pubid, href(, ndata)? ] + elsif Private::PEREFERENCE_PATTERN.match?(match[2]) + raise REXML::ParseException.new("Parameter entity references forbidden in internal subset: #{match[2]}", @source) else - message = "#{base_error_message}: invalid declaration name" + match[2] = match[2][1..-2] + match.pop if match.size == 4 + # match is [ :entity, name, value ] end - raise REXML::ParseException.new(message, @source) - end - name = parse_name(base_error_message) - id = parse_id(base_error_message, - accept_external_id: true, - accept_public_id: true) - unless @source.match(/\A\s*>/um, true) - message = "#{base_error_message}: garbage before end >" - raise REXML::ParseException.new(message, @source) + match << '%' if ref + return match + elsif @source.match("ATTLIST", true) + md = @source.match(Private::ATTLISTDECL_END, true) + raise REXML::ParseException.new( "Bad ATTLIST declaration!", @source ) if md.nil? + element = md[1] + contents = md[0] + + pairs = {} + values = md[0].strip.scan( ATTDEF_RE ) + values.each do |attdef| + unless attdef[3] == "#IMPLIED" + attdef.compact! + val = attdef[3] + val = attdef[4] if val == "#FIXED " + pairs[attdef[0]] = val + if attdef[0] =~ /^xmlns:(.*)/ + @namespaces[$1] = val + end + end + end + return [ :attlistdecl, element, pairs, contents ] + elsif @source.match("NOTATION", true) + base_error_message = "Malformed notation declaration" + unless @source.match(/\s+/um, true) + if @source.match(">") + message = "#{base_error_message}: name is missing" + else + message = "#{base_error_message}: invalid name" + end + @source.position = start_position + raise REXML::ParseException.new(message, @source) + end + name = parse_name(base_error_message) + id = parse_id(base_error_message, + accept_external_id: true, + accept_public_id: true) + unless @source.match(/\s*>/um, true) + message = "#{base_error_message}: garbage before end >" + raise REXML::ParseException.new(message, @source) + end + return [:notationdecl, name, *id] + elsif md = @source.match(/--(.*?)-->/um, true) + case md[1] + when /--/, /-\z/ + raise REXML::ParseException.new("Malformed comment", @source) + end + return [ :comment, md[1] ] if md end - return [:notationdecl, name, *id] - when DOCTYPE_END + elsif match = @source.match(/(%.*?;)\s*/um, true) + return [ :externalentity, match[1] ] + elsif @source.match(/\]\s*>/um, true) @document_status = :after_doctype - @source.match( DOCTYPE_END, true ) return [ :end_doctype ] end + if @document_status == :in_doctype + raise ParseException.new("Malformed DOCTYPE: invalid declaration", @source) + end end if @document_status == :after_doctype - @source.match(/\A\s*/um, true) + @source.match(/\s*/um, true) end begin - @source.read if @source.buffer.size<2 - if @source.buffer[0] == ?< - if @source.buffer[1] == ?/ - @nsstack.shift + start_position = @source.position + if @source.match("<", true) + # :text's read_until may remain only "<" in buffer. In the + # case, buffer is empty here. So we need to fill buffer + # here explicitly. + @source.ensure_buffer + if @source.match("/", true) + @namespaces_restore_stack.pop last_tag = @tags.pop - #md = @source.match_to_consume( '>', CLOSE_MATCH) - md = @source.match( CLOSE_MATCH, true ) + md = @source.match(Private::CLOSE_PATTERN, true) if md and !last_tag message = "Unexpected top-level end tag (got '#{md[1]}')" raise REXML::ParseException.new(message, @source) end if md.nil? or last_tag != md[1] message = "Missing end tag for '#{last_tag}'" - message << " (got '#{md[1]}')" if md + message += " (got '#{md[1]}')" if md + @source.position = start_position if md.nil? raise REXML::ParseException.new(message, @source) end return [ :end_element, last_tag ] - elsif @source.buffer[1] == ?! - md = @source.match(/\A(\s*[^>]*>)/um) + elsif @source.match("!", true) + md = @source.match(/([^>]*>)/um) #STDERR.puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}" raise REXML::ParseException.new("Malformed node", @source) unless md - if md[0][2] == ?- - md = @source.match( COMMENT_PATTERN, true ) + if md[0][0] == ?- + md = @source.match(/--(.*?)-->/um, true) - case md[1] - when /--/, /-\z/ + if md.nil? || /--|-\z/.match?(md[1]) raise REXML::ParseException.new("Malformed comment", @source) end - return [ :comment, md[1] ] if md + return [ :comment, md[1] ] else - md = @source.match( CDATA_PATTERN, true ) + md = @source.match(/\[CDATA\[(.*?)\]\]>/um, true) return [ :cdata, md[1] ] if md end raise REXML::ParseException.new( "Declarations can only occur "+ "in the doctype declaration.", @source) - elsif @source.buffer[1] == ?? - md = @source.match( INSTRUCTION_PATTERN, true ) - return [ :processing_instruction, md[1], md[2] ] if md - raise REXML::ParseException.new( "Bad instruction declaration", - @source) + elsif @source.match("?", true) + return process_instruction else # Get the next tag - md = @source.match(TAG_MATCH, true) + md = @source.match(Private::TAG_PATTERN, true) unless md - # Check for missing attribute quotes - raise REXML::ParseException.new("missing attribute quote", @source) if @source.match(MISSING_ATTRIBUTE_QUOTES ) + @source.position = start_position raise REXML::ParseException.new("malformed XML: missing tag start", @source) end + tag = md[1] @document_status = :in_element - prefixes = Set.new - prefixes << md[2] if md[2] - @nsstack.unshift(curr_ns=Set.new) - attributes, closed = parse_attributes(prefixes, curr_ns) + @prefixes.clear + @prefixes << md[2] if md[2] + push_namespaces_restore + attributes, closed = parse_attributes(@prefixes) # Verify that all of the prefixes have been defined - for prefix in prefixes - unless @nsstack.find{|k| k.member?(prefix)} + for prefix in @prefixes + unless @namespaces.key?(prefix) raise UndefinedNamespaceException.new(prefix,@source,self) end end if closed - @closed = md[1] - @nsstack.shift + @closed = tag + pop_namespaces_restore else - @tags.push( md[1] ) + if @tags.empty? and @have_root + raise ParseException.new("Malformed XML: Extra tag at the end of the document (got '<#{tag}')", @source) + end + @tags.push( tag ) end - return [ :start_element, md[1], attributes ] + @have_root = true + return [ :start_element, tag, attributes ] end else - md = @source.match( TEXT_PATTERN, true ) - if md[0].length == 0 - @source.match( /(\s+)/, true ) + text = @source.read_until("<") + if text.chomp!("<") + @source.position -= "<".bytesize end - #STDERR.puts "GOT #{md[1].inspect}" unless md[0].length == 0 - #return [ :text, "" ] if md[0].length == 0 - # unnormalized = Text::unnormalize( md[1], self ) - # return PullEvent.new( :text, md[1], unnormalized ) - return [ :text, md[1] ] + if @tags.empty? + unless /\A\s*\z/.match?(text) + if @have_root + raise ParseException.new("Malformed XML: Extra content at the end of the document (got '#{text}')", @source) + else + raise ParseException.new("Malformed XML: Content at the start of the document (got '#{text}')", @source) + end + end + return pull_event if @have_root + end + return [ :text, text ] end rescue REXML::UndefinedNamespaceException raise rescue REXML::ParseException raise - rescue Exception, NameError => error + rescue => error raise REXML::ParseException.new( "Exception parsing", @source, self, (error ? error : $!) ) end @@ -451,13 +533,13 @@ def pull_event private :pull_event def entity( reference, entities ) - value = nil - value = entities[ reference ] if entities - if not value - value = DEFAULT_ENTITIES[ reference ] - value = value[2] if value - end - unnormalize( value, entities ) if value + return unless entities + + value = entities[ reference ] + return if value.nil? + + record_entity_expansion + unnormalize( value, entities ) end # Escapes all possible entities @@ -478,35 +560,83 @@ def normalize( input, entities=nil, entity_filter=nil ) # Unescapes all possible entities def unnormalize( string, entities=nil, filter=nil ) - rv = string.clone - rv.gsub!( /\r\n?/, "\n" ) + if string.include?("\r") + rv = string.gsub( Private::CARRIAGE_RETURN_NEWLINE_PATTERN, "\n" ) + else + rv = string.dup + end matches = rv.scan( REFERENCE_RE ) return rv if matches.size == 0 - rv.gsub!( /�*((?:\d+)|(?:x[a-fA-F0-9]+));/ ) { + rv.gsub!( Private::CHARACTER_REFERENCES ) { m=$1 m = "0#{m}" if m[0] == ?x [Integer(m)].pack('U*') } matches.collect!{|x|x[0]}.compact! + if filter + matches.reject! do |entity_reference| + filter.include?(entity_reference) + end + end if matches.size > 0 - matches.each do |entity_reference| - unless filter and filter.include?(entity_reference) - entity_value = entity( entity_reference, entities ) - if entity_value - re = /&#{entity_reference};/ - rv.gsub!( re, entity_value ) - else - er = DEFAULT_ENTITIES[entity_reference] - rv.gsub!( er[0], er[2] ) if er + matches.tally.each do |entity_reference, n| + entity_expansion_count_before = @entity_expansion_count + entity_value = entity( entity_reference, entities ) + if entity_value + if n > 1 + entity_expansion_count_delta = + @entity_expansion_count - entity_expansion_count_before + record_entity_expansion(entity_expansion_count_delta * (n - 1)) end + re = Private::DEFAULT_ENTITIES_PATTERNS[entity_reference] || /&#{entity_reference};/ + rv.gsub!( re, entity_value ) + if rv.bytesize > @entity_expansion_text_limit + raise "entity expansion has grown too large" + end + else + er = DEFAULT_ENTITIES[entity_reference] + rv.gsub!( er[0], er[2] ) if er end end - rv.gsub!( /&/, '&' ) + rv.gsub!( Private::DEFAULT_ENTITIES_PATTERNS['amp'], '&' ) end rv end private + def add_namespace(prefix, uri) + @namespaces_restore_stack.last[prefix] = @namespaces[prefix] + if uri.nil? + @namespaces.delete(prefix) + else + @namespaces[prefix] = uri + end + end + + def push_namespaces_restore + namespaces_restore = {} + @namespaces_restore_stack.push(namespaces_restore) + namespaces_restore + end + + def pop_namespaces_restore + namespaces_restore = @namespaces_restore_stack.pop + namespaces_restore.each do |prefix, uri| + if uri.nil? + @namespaces.delete(prefix) + else + @namespaces[prefix] = uri + end + end + end + + def record_entity_expansion(delta=1) + @entity_expansion_count += delta + if @entity_expansion_count > @entity_expansion_limit + raise "number of entity expansions exceeded, processing aborted." + end + end + def need_source_encoding_update?(xml_declaration_encoding) return false if xml_declaration_encoding.nil? return false if /\AUTF-16\z/i =~ xml_declaration_encoding @@ -514,16 +644,16 @@ def need_source_encoding_update?(xml_declaration_encoding) end def parse_name(base_error_message) - md = @source.match(/\A\s*#{NAME}/um, true) + md = @source.match(Private::NAME_PATTERN, true) unless md - if @source.match(/\A\s*\S/um) + if @source.match(/\S/um) message = "#{base_error_message}: invalid name" else message = "#{base_error_message}: name is missing" end raise REXML::ParseException.new(message, @source) end - md[1] + md[0] end def parse_id(base_error_message, @@ -592,88 +722,115 @@ def parse_id_invalid_details(accept_external_id:, end end - def parse_attributes(prefixes, curr_ns) - attributes = {} - closed = false - match_data = @source.match(/^(.*?)(\/)?>/um, true) - if match_data.nil? - message = "Start tag isn't ended" - raise REXML::ParseException.new(message, @source) + def process_instruction + name = parse_name("Malformed XML: Invalid processing instruction node") + if @source.match(/\s+/um, true) + match_data = @source.match(/(.*?)\?>/um, true) + unless match_data + raise ParseException.new("Malformed XML: Unclosed processing instruction", @source) + end + content = match_data[1] + else + content = nil + unless @source.match("?>", true) + raise ParseException.new("Malformed XML: Unclosed processing instruction", @source) + end end - - raw_attributes = match_data[1] - closed = !match_data[2].nil? - return attributes, closed if raw_attributes.nil? - return attributes, closed if raw_attributes.empty? - - scanner = StringScanner.new(raw_attributes) - until scanner.eos? - if scanner.scan(/\s+/) - break if scanner.eos? + if name == "xml" + if @document_status + raise ParseException.new("Malformed XML: XML declaration is not at the start", @source) + end + version = VERSION.match(content) + version = version[1] unless version.nil? + encoding = ENCODING.match(content) + encoding = encoding[1] unless encoding.nil? + if need_source_encoding_update?(encoding) + @source.encoding = encoding + end + if encoding.nil? and /\AUTF-16(?:BE|LE)\z/i =~ @source.encoding + encoding = "UTF-16" end + standalone = STANDALONE.match(content) + standalone = standalone[1] unless standalone.nil? + return [ :xmldecl, version, encoding, standalone ] + end + [:processing_instruction, name, content] + end - pos = scanner.pos - loop do - break if scanner.scan(ATTRIBUTE_PATTERN) - unless scanner.scan(QNAME) - message = "Invalid attribute name: <#{scanner.rest}>" - raise REXML::ParseException.new(message, @source) - end - name = scanner[0] - unless scanner.scan(/\s*=\s*/um) + def parse_attributes(prefixes) + attributes = {} + expanded_names = {} + closed = false + while true + if @source.match(">", true) + return attributes, closed + elsif @source.match("/>", true) + closed = true + return attributes, closed + elsif match = @source.match(QNAME, true) + name = match[1] + prefix = match[2] + local_part = match[3] + + unless @source.match(/\s*=\s*/um, true) message = "Missing attribute equal: <#{name}>" raise REXML::ParseException.new(message, @source) end - quote = scanner.scan(/['"]/) - unless quote + unless match = @source.match(/(['"])/, true) message = "Missing attribute value start quote: <#{name}>" raise REXML::ParseException.new(message, @source) end - unless scanner.scan(/.*#{Regexp.escape(quote)}/um) - match_data = @source.match(/^(.*?)(\/)?>/um, true) - if match_data - scanner << "/" if closed - scanner << ">" - scanner << match_data[1] - scanner.pos = pos - closed = !match_data[2].nil? - next - end - message = - "Missing attribute value end quote: <#{name}>: <#{quote}>" + quote = match[1] + start_position = @source.position + value = @source.read_until(quote) + unless value.chomp!(quote) + @source.position = start_position + message = "Missing attribute value end quote: <#{name}>: <#{quote}>" raise REXML::ParseException.new(message, @source) end - end - name = scanner[1] - prefix = scanner[2] - local_part = scanner[3] - # quote = scanner[4] - value = scanner[5] - if prefix == "xmlns" - if local_part == "xml" - if value != "http://www.w3.org/XML/1998/namespace" - msg = "The 'xml' prefix must not be bound to any other namespace "+ + @source.match(/\s*/um, true) + if prefix == "xmlns" + if local_part == "xml" + if value != Private::XML_PREFIXED_NAMESPACE + msg = "The 'xml' prefix must not be bound to any other namespace "+ + "(http://www.w3.org/TR/REC-xml-names/#ns-decl)" + raise REXML::ParseException.new( msg, @source, self ) + end + elsif local_part == "xmlns" + msg = "The 'xmlns' prefix must not be declared "+ "(http://www.w3.org/TR/REC-xml-names/#ns-decl)" - raise REXML::ParseException.new( msg, @source, self ) + raise REXML::ParseException.new( msg, @source, self) end - elsif local_part == "xmlns" - msg = "The 'xmlns' prefix must not be declared "+ - "(http://www.w3.org/TR/REC-xml-names/#ns-decl)" - raise REXML::ParseException.new( msg, @source, self) + add_namespace(local_part, value) + elsif prefix + prefixes << prefix unless prefix == "xml" end - curr_ns << local_part - elsif prefix - prefixes << prefix unless prefix == "xml" - end - if attributes.has_key?(name) - msg = "Duplicate attribute #{name.inspect}" - raise REXML::ParseException.new(msg, @source, self) - end + if attributes[name] + msg = "Duplicate attribute #{name.inspect}" + raise REXML::ParseException.new(msg, @source, self) + end - attributes[name] = value + unless prefix == "xmlns" + uri = @namespaces[prefix] + expanded_name = [uri, local_part] + existing_prefix = expanded_names[expanded_name] + if existing_prefix + message = "Namespace conflict in adding attribute " + + "\"#{local_part}\": " + + "Prefix \"#{existing_prefix}\" = \"#{uri}\" and " + + "prefix \"#{prefix}\" = \"#{uri}\"" + raise REXML::ParseException.new(message, @source, self) + end + expanded_names[expanded_name] = prefix + end + + attributes[name] = value + else + message = "Invalid attribute name: <#{@source.buffer.split(%r{[/>\s]}).first}>" + raise REXML::ParseException.new(message, @source) + end end - return attributes, closed end end end diff --git a/lib/rexml/parsers/lightparser.rb b/lib/rexml/parsers/lightparser.rb index f0601ae51b..bdc08276a9 100644 --- a/lib/rexml/parsers/lightparser.rb +++ b/lib/rexml/parsers/lightparser.rb @@ -1,7 +1,7 @@ # frozen_string_literal: false -require 'rexml/parsers/streamparser' -require 'rexml/parsers/baseparser' -require 'rexml/light/node' +require_relative 'streamparser' +require_relative 'baseparser' +require_relative '../light/node' module REXML module Parsers diff --git a/lib/rexml/parsers/pullparser.rb b/lib/rexml/parsers/pullparser.rb index 8c49217553..a331eff524 100644 --- a/lib/rexml/parsers/pullparser.rb +++ b/lib/rexml/parsers/pullparser.rb @@ -1,9 +1,9 @@ # frozen_string_literal: false require 'forwardable' -require 'rexml/parseexception' -require 'rexml/parsers/baseparser' -require 'rexml/xmltokens' +require_relative '../parseexception' +require_relative 'baseparser' +require_relative '../xmltokens' module REXML module Parsers @@ -47,6 +47,18 @@ def add_listener( listener ) @listeners << listener end + def entity_expansion_count + @parser.entity_expansion_count + end + + def entity_expansion_limit=( limit ) + @parser.entity_expansion_limit = limit + end + + def entity_expansion_text_limit=( limit ) + @parser.entity_expansion_text_limit = limit + end + def each while has_next? yield self.pull diff --git a/lib/rexml/parsers/sax2parser.rb b/lib/rexml/parsers/sax2parser.rb index 1386f69c83..a51477de21 100644 --- a/lib/rexml/parsers/sax2parser.rb +++ b/lib/rexml/parsers/sax2parser.rb @@ -1,8 +1,8 @@ # frozen_string_literal: false -require 'rexml/parsers/baseparser' -require 'rexml/parseexception' -require 'rexml/namespace' -require 'rexml/text' +require_relative 'baseparser' +require_relative '../parseexception' +require_relative '../namespace' +require_relative '../text' module REXML module Parsers @@ -22,6 +22,18 @@ def source @parser.source end + def entity_expansion_count + @parser.entity_expansion_count + end + + def entity_expansion_limit=( limit ) + @parser.entity_expansion_limit = limit + end + + def entity_expansion_text_limit=( limit ) + @parser.entity_expansion_text_limit = limit + end + def add_listener( listener ) @parser.add_listener( listener ) end @@ -157,25 +169,8 @@ def parse end end when :text - #normalized = @parser.normalize( event[1] ) - #handle( :characters, normalized ) - copy = event[1].clone - - esub = proc { |match| - if @entities.has_key?($1) - @entities[$1].gsub(Text::REFERENCE, &esub) - else - match - end - } - - copy.gsub!( Text::REFERENCE, &esub ) - copy.gsub!( Text::NUMERICENTITY ) {|m| - m=$1 - m = "0#{m}" if m[0] == ?x - [Integer(m)].pack('U*') - } - handle( :characters, copy ) + unnormalized = @parser.unnormalize( event[1], @entities ) + handle( :characters, unnormalized ) when :entitydecl handle_entitydecl( event ) when :processing_instruction, :comment, :attlistdecl, @@ -264,6 +259,8 @@ def add( pair ) end def get_namespace( prefix ) + return nil if @namespace_stack.empty? + uris = (@namespace_stack.find_all { |ns| not ns[prefix].nil? }) || (@namespace_stack.find { |ns| not ns[nil].nil? }) uris[-1][prefix] unless uris.nil? or 0 == uris.size diff --git a/lib/rexml/parsers/streamparser.rb b/lib/rexml/parsers/streamparser.rb index f6a8bfa802..6c64d97893 100644 --- a/lib/rexml/parsers/streamparser.rb +++ b/lib/rexml/parsers/streamparser.rb @@ -1,5 +1,5 @@ # frozen_string_literal: false -require "rexml/parsers/baseparser" +require_relative "baseparser" module REXML module Parsers @@ -7,37 +7,42 @@ class StreamParser def initialize source, listener @listener = listener @parser = BaseParser.new( source ) - @tag_stack = [] + @entities = {} end def add_listener( listener ) @parser.add_listener( listener ) end + def entity_expansion_count + @parser.entity_expansion_count + end + + def entity_expansion_limit=( limit ) + @parser.entity_expansion_limit = limit + end + + def entity_expansion_text_limit=( limit ) + @parser.entity_expansion_text_limit = limit + end + def parse # entity string while true event = @parser.pull case event[0] when :end_document - unless @tag_stack.empty? - tag_path = "/" + @tag_stack.join("/") - raise ParseException.new("Missing end tag for '#{tag_path}'", - @parser.source) - end return when :start_element - @tag_stack << event[1] attrs = event[2].each do |n, v| event[2][n] = @parser.unnormalize( v ) end @listener.tag_start( event[1], attrs ) when :end_element @listener.tag_end( event[1] ) - @tag_stack.pop when :text - normalized = @parser.unnormalize( event[1] ) - @listener.text( normalized ) + unnormalized = @parser.unnormalize( event[1], @entities ) + @listener.text( unnormalized ) when :processing_instruction @listener.instruction( *event[1,2] ) when :start_doctype @@ -48,6 +53,7 @@ def parse when :comment, :attlistdecl, :cdata, :xmldecl, :elementdecl @listener.send( event[0].to_s, *event[1..-1] ) when :entitydecl, :notationdecl + @entities[ event[1] ] = event[2] if event.size == 3 @listener.send( event[0].to_s, event[1..-1] ) when :externalentity entity_reference = event[1] diff --git a/lib/rexml/parsers/treeparser.rb b/lib/rexml/parsers/treeparser.rb index fc0993c72a..4565a406cb 100644 --- a/lib/rexml/parsers/treeparser.rb +++ b/lib/rexml/parsers/treeparser.rb @@ -1,6 +1,6 @@ # frozen_string_literal: false -require 'rexml/validation/validationexception' -require 'rexml/undefinednamespaceexception' +require_relative '../validation/validationexception' +require_relative '../undefinednamespaceexception' module REXML module Parsers @@ -15,8 +15,6 @@ def add_listener( listener ) end def parse - tag_stack = [] - in_doctype = false entities = nil begin while true @@ -24,32 +22,24 @@ def parse #STDERR.puts "TREEPARSER GOT #{event.inspect}" case event[0] when :end_document - unless tag_stack.empty? - raise ParseException.new("No close tag for #{@build_context.xpath}", - @parser.source, @parser) - end return when :start_element - tag_stack.push(event[1]) el = @build_context = @build_context.add_element( event[1] ) event[2].each do |key, value| el.attributes[key]=Attribute.new(key,value,self) end when :end_element - tag_stack.pop @build_context = @build_context.parent when :text - if not in_doctype - if @build_context[-1].instance_of? Text - @build_context[-1] << event[1] - else - @build_context.add( - Text.new(event[1], @build_context.whitespace, nil, true) - ) unless ( - @build_context.ignore_whitespace_nodes and - event[1].strip.size==0 - ) - end + if @build_context[-1].instance_of? Text + @build_context[-1] << event[1] + else + @build_context.add( + Text.new(event[1], @build_context.whitespace, nil, true) + ) unless ( + @build_context.ignore_whitespace_nodes and + event[1].strip.size==0 + ) end when :comment c = Comment.new( event[1] ) @@ -60,14 +50,12 @@ def parse when :processing_instruction @build_context.add( Instruction.new( event[1], event[2] ) ) when :end_doctype - in_doctype = false entities.each { |k,v| entities[k] = @build_context.entities[k].value } @build_context = @build_context.parent when :start_doctype doctype = DocType.new( event[1..-1], @build_context ) @build_context = doctype entities = {} - in_doctype = true when :attlistdecl n = AttlistDecl.new( event[1..-1] ) @build_context.add( n ) diff --git a/lib/rexml/parsers/ultralightparser.rb b/lib/rexml/parsers/ultralightparser.rb index 6571d119bd..e0029f43da 100644 --- a/lib/rexml/parsers/ultralightparser.rb +++ b/lib/rexml/parsers/ultralightparser.rb @@ -1,6 +1,6 @@ # frozen_string_literal: false -require 'rexml/parsers/streamparser' -require 'rexml/parsers/baseparser' +require_relative 'streamparser' +require_relative 'baseparser' module REXML module Parsers diff --git a/lib/rexml/parsers/xpathparser.rb b/lib/rexml/parsers/xpathparser.rb index 32b70bb798..bd3b6856eb 100644 --- a/lib/rexml/parsers/xpathparser.rb +++ b/lib/rexml/parsers/xpathparser.rb @@ -1,6 +1,7 @@ # frozen_string_literal: false -require 'rexml/namespace' -require 'rexml/xmltokens' + +require_relative '../namespace' +require_relative '../xmltokens' module REXML module Parsers @@ -22,7 +23,13 @@ def parse path path.gsub!(/([\(\[])\s+/, '\1') # Strip ignorable spaces path.gsub!( /\s+([\]\)])/, '\1') parsed = [] - OrExpr(path, parsed) + rest = OrExpr(path, parsed) + if rest + unless rest.strip.empty? + raise ParseException.new("Garbage component exists at the end: " + + "<#{rest}>: <#{path}>") + end + end parsed end @@ -32,108 +39,143 @@ def predicate path parsed end - def abbreviate( path ) - path = path.kind_of?(String) ? parse( path ) : path - string = "" - document = false - while path.size > 0 - op = path.shift + def abbreviate(path_or_parsed) + if path_or_parsed.kind_of?(String) + parsed = parse(path_or_parsed) + else + parsed = path_or_parsed + end + components = [] + component = nil + while parsed.size > 0 + op = parsed.shift case op when :node + component << "node()" when :attribute - string << "/" if string.size > 0 - string << "@" + component = "@" + components << component when :child - string << "/" if string.size > 0 + component = "" + components << component when :descendant_or_self - string << "/" + next_op = parsed[0] + if next_op == :node + parsed.shift + component = "" + components << component + else + component = "descendant-or-self::" + components << component + end when :self - string << "." + next_op = parsed[0] + if next_op == :node + parsed.shift + components << "." + else + component = "self::" + components << component + end when :parent - string << ".." + next_op = parsed[0] + if next_op == :node + parsed.shift + components << ".." + else + component = "parent::" + components << component + end when :any - string << "*" + component << "*" when :text - string << "text()" + component << "text()" when :following, :following_sibling, :ancestor, :ancestor_or_self, :descendant, :namespace, :preceding, :preceding_sibling - string << "/" unless string.size == 0 - string << op.to_s.tr("_", "-") - string << "::" + component = op.to_s.tr("_", "-") << "::" + components << component when :qname - prefix = path.shift - name = path.shift - string << prefix+":" if prefix.size > 0 - string << name + prefix = parsed.shift + name = parsed.shift + component << prefix+":" if prefix.size > 0 + component << name when :predicate - string << '[' - string << predicate_to_string( path.shift ) {|x| abbreviate( x ) } - string << ']' + component << '[' + component << predicate_to_path(parsed.shift) {|x| abbreviate(x)} + component << ']' when :document - document = true + components << "" when :function - string << path.shift - string << "( " - string << predicate_to_string( path.shift[0] ) {|x| abbreviate( x )} - string << " )" + component << parsed.shift + component << "( " + component << predicate_to_path(parsed.shift[0]) {|x| abbreviate(x)} + component << " )" when :literal - string << %Q{ "#{path.shift}" } + component << quote_literal(parsed.shift) else - string << "/" unless string.size == 0 - string << "UNKNOWN(" - string << op.inspect - string << ")" + component << "UNKNOWN(" + component << op.inspect + component << ")" end end - string = "/"+string if document - return string + case components + when [""] + "/" + when ["", ""] + "//" + else + components.join("/") + end end - def expand( path ) - path = path.kind_of?(String) ? parse( path ) : path - string = "" + def expand(path_or_parsed) + if path_or_parsed.kind_of?(String) + parsed = parse(path_or_parsed) + else + parsed = path_or_parsed + end + path = "" document = false - while path.size > 0 - op = path.shift + while parsed.size > 0 + op = parsed.shift case op when :node - string << "node()" + path << "node()" when :attribute, :child, :following, :following_sibling, :ancestor, :ancestor_or_self, :descendant, :descendant_or_self, :namespace, :preceding, :preceding_sibling, :self, :parent - string << "/" unless string.size == 0 - string << op.to_s.tr("_", "-") - string << "::" + path << "/" unless path.size == 0 + path << op.to_s.tr("_", "-") + path << "::" when :any - string << "*" + path << "*" when :qname - prefix = path.shift - name = path.shift - string << prefix+":" if prefix.size > 0 - string << name + prefix = parsed.shift + name = parsed.shift + path << prefix+":" if prefix.size > 0 + path << name when :predicate - string << '[' - string << predicate_to_string( path.shift ) { |x| expand(x) } - string << ']' + path << '[' + path << predicate_to_path( parsed.shift ) { |x| expand(x) } + path << ']' when :document document = true else - string << "/" unless string.size == 0 - string << "UNKNOWN(" - string << op.inspect - string << ")" + path << "UNKNOWN(" + path << op.inspect + path << ")" end end - string = "/"+string if document - return string + path = "/"+path if document + path end - def predicate_to_string( path, &block ) - string = "" - case path[0] + def predicate_to_path(parsed, &block) + path = "" + case parsed[0] when :and, :or, :mult, :plus, :minus, :neq, :eq, :lt, :gt, :lteq, :gteq, :div, :mod, :union - op = path.shift + op = parsed.shift case op when :eq op = "=" @@ -150,42 +192,56 @@ def predicate_to_string( path, &block ) when :union op = "|" end - left = predicate_to_string( path.shift, &block ) - right = predicate_to_string( path.shift, &block ) - string << " " - string << left - string << " " - string << op.to_s - string << " " - string << right - string << " " + left = predicate_to_path( parsed.shift, &block ) + right = predicate_to_path( parsed.shift, &block ) + path << left + path << " " + path << op.to_s + path << " " + path << right when :function - path.shift - name = path.shift - string << name - string << "( " - string << predicate_to_string( path.shift, &block ) - string << " )" + parsed.shift + name = parsed.shift + path << name + path << "(" + parsed.shift.each_with_index do |argument, i| + path << ", " if i > 0 + path << predicate_to_path(argument, &block) + end + path << ")" when :literal - path.shift - string << " " - string << path.shift.inspect - string << " " + parsed.shift + path << quote_literal(parsed.shift) else - string << " " - string << yield( path ) - string << " " + path << yield( parsed ) end - return string.squeeze(" ") + return path.squeeze(" ") end + # For backward compatibility + alias_method :preciate_to_string, :predicate_to_path private + def quote_literal( literal ) + case literal + when String + # XPath 1.0 does not support escape characters. + # Assumes literal does not contain both single and double quotes. + if literal.include?("'") + "\"#{literal}\"" + else + "'#{literal}'" + end + else + literal.inspect + end + end + #LocationPath # | RelativeLocationPath # | '/' RelativeLocationPath? # | '//' RelativeLocationPath def LocationPath path, parsed - path = path.strip + path = path.lstrip if path[0] == ?/ parsed << :document if path[1] == ?/ @@ -209,7 +265,12 @@ def LocationPath path, parsed # | RelativeLocationPath '//' Step AXIS = /^(ancestor|ancestor-or-self|attribute|child|descendant|descendant-or-self|following|following-sibling|namespace|parent|preceding|preceding-sibling|self)::/ def RelativeLocationPath path, parsed - while path.size > 0 + loop do + original_path = path + path = path.lstrip + + return original_path if path.empty? + # (axis or @ or <child::>) nodetest predicate > # OR > / Step # (. or ..) > @@ -224,43 +285,44 @@ def RelativeLocationPath path, parsed path = path[1..-1] end else + path_before_axis_specifier = path + parsed_not_abberviated = [] if path[0] == ?@ - parsed << :attribute + parsed_not_abberviated << :attribute path = path[1..-1] # Goto Nodetest elsif path =~ AXIS - parsed << $1.tr('-','_').intern + parsed_not_abberviated << $1.tr('-','_').intern path = $' # Goto Nodetest else - parsed << :child + parsed_not_abberviated << :child end - n = [] - path = NodeTest( path, n) - - if path[0] == ?[ - path = Predicate( path, n ) + path_before_node_test = path + path = NodeTest(path, parsed_not_abberviated) + if path == path_before_node_test + return path_before_axis_specifier end + path = Predicate(path, parsed_not_abberviated) - parsed.concat(n) + parsed.concat(parsed_not_abberviated) end - if path.size > 0 - if path[0] == ?/ - if path[1] == ?/ - parsed << :descendant_or_self - parsed << :node - path = path[2..-1] - else - path = path[1..-1] - end - else - return path - end + original_path = path + path = path.lstrip + return original_path if path.empty? + + return original_path if path[0] != ?/ + + if path[1] == ?/ + parsed << :descendant_or_self + parsed << :node + path = path[2..-1] + else + path = path[1..-1] end end - return path end # Returns a 1-1 map of the nodeset @@ -269,15 +331,26 @@ def RelativeLocationPath path, parsed # String, if a name match #NodeTest # | ('*' | NCNAME ':' '*' | QNAME) NameTest - # | NODE_TYPE '(' ')' NodeType + # | '*' ':' NCNAME NameTest since XPath 2.0 + # | NODE_TYPE '(' ')' NodeType # | PI '(' LITERAL ')' PI # | '[' expr ']' Predicate - NCNAMETEST= /^(#{NCNAME_STR}):\*/u + PREFIX_WILDCARD = /^\*:(#{NCNAME_STR})/u + LOCAL_NAME_WILDCARD = /^(#{NCNAME_STR}):\*/u QNAME = Namespace::NAMESPLIT NODE_TYPE = /^(comment|text|node)\(\s*\)/m PI = /^processing-instruction\(/ def NodeTest path, parsed + original_path = path + path = path.lstrip case path + when PREFIX_WILDCARD + prefix = nil + name = $1 + path = $' + parsed << :qname + parsed << prefix + parsed << name when /^\*/ path = $' parsed << :any @@ -288,7 +361,9 @@ def NodeTest path, parsed when PI path = $' literal = nil - if path !~ /^\s*\)/ + if path =~ /^\s*\)/ + path = $' + else path =~ LITERAL literal = $1 path = $' @@ -297,7 +372,7 @@ def NodeTest path, parsed end parsed << :processing_instruction parsed << (literal || '') - when NCNAMETEST + when LOCAL_NAME_WILDCARD prefix = $1 path = $' parsed << :namespace @@ -310,13 +385,17 @@ def NodeTest path, parsed parsed << :qname parsed << prefix parsed << name + else + path = original_path end return path end # Filters the supplied nodeset on the predicate(s) def Predicate path, parsed - return nil unless path[0] == ?[ + original_path = path + path = path.lstrip + return original_path unless path[0] == ?[ predicates = [] while path[0] == ?[ path, expr = get_group(path) @@ -421,13 +500,13 @@ def RelationalExpr path, parsed rest end - #| AdditiveExpr ('+' | S '-') MultiplicativeExpr + #| AdditiveExpr ('+' | '-') MultiplicativeExpr #| MultiplicativeExpr def AdditiveExpr path, parsed n = [] rest = MultiplicativeExpr( path, n ) if rest != path - while rest =~ /^\s*(\+| -)\s*/ + while rest =~ /^\s*(\+|-)\s*/ if $1[0] == ?+ n = [ :plus, n, [] ] else @@ -509,13 +588,14 @@ def UnionExpr path, parsed #| LocationPath #| FilterExpr ('/' | '//') RelativeLocationPath def PathExpr path, parsed - path =~ /^\s*/ - path = $' + path = path.lstrip n = [] rest = FilterExpr( path, n ) if rest != path if rest and rest[0] == ?/ - return RelativeLocationPath(rest, n) + rest = RelativeLocationPath(rest, n) + parsed.concat(n) + return rest end end rest = LocationPath(rest, n) if rest =~ /\A[\/\.\@\[\w*]/ @@ -527,8 +607,10 @@ def PathExpr path, parsed #| PrimaryExpr def FilterExpr path, parsed n = [] - path = PrimaryExpr( path, n ) - path = Predicate(path, n) if path and path[0] == ?[ + path_before_primary_expr = path + path = PrimaryExpr(path, n) + return path_before_primary_expr if path == path_before_primary_expr + path = Predicate(path, n) parsed.concat(n) path end diff --git a/lib/rexml/quickpath.rb b/lib/rexml/quickpath.rb index 5d6c77ca38..a0466b25d9 100644 --- a/lib/rexml/quickpath.rb +++ b/lib/rexml/quickpath.rb @@ -1,6 +1,6 @@ # frozen_string_literal: false -require 'rexml/functions' -require 'rexml/xmltokens' +require_relative 'functions' +require_relative 'xmltokens' module REXML class QuickPath diff --git a/lib/rexml/source.rb b/lib/rexml/source.rb index af65cf4751..ff887fc080 100644 --- a/lib/rexml/source.rb +++ b/lib/rexml/source.rb @@ -1,8 +1,28 @@ # coding: US-ASCII # frozen_string_literal: false -require 'rexml/encoding' + +require "strscan" + +require_relative 'encoding' module REXML + if StringScanner::Version < "1.0.0" + module StringScannerCheckScanString + refine StringScanner do + def check(pattern) + pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String) + super(pattern) + end + + def scan(pattern) + pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String) + super(pattern) + end + end + end + using StringScannerCheckScanString + end + # Generates Source-s. USE THIS CLASS. class SourceFactory # Generates a Source object @@ -30,18 +50,27 @@ def SourceFactory::create_from(arg) # objects and provides consumption of text class Source include Encoding - # The current buffer (what we're going to read next) - attr_reader :buffer # The line number of the last consumed text attr_reader :line attr_reader :encoding + module Private + SCANNER_RESET_SIZE = 100000 + PRE_DEFINED_TERM_PATTERNS = {} + pre_defined_terms = ["'", '"', "<"] + pre_defined_terms.each do |term| + PRE_DEFINED_TERM_PATTERNS[term] = /#{Regexp.escape(term)}/ + end + end + private_constant :Private + # Constructor # @param arg must be a String, and should be a valid XML document # @param encoding if non-null, sets the encoding of the source to this # value, overriding all encoding detection def initialize(arg, encoding=nil) - @orig = @buffer = arg + @orig = arg + @scanner = StringScanner.new(@orig) if encoding self.encoding = encoding else @@ -50,6 +79,20 @@ def initialize(arg, encoding=nil) @line = 0 end + # The current buffer (what we're going to read next) + def buffer + @scanner.rest + end + + def drop_parsed_content + if @scanner.pos > Private::SCANNER_RESET_SIZE + @scanner.string = @scanner.rest + end + end + + def buffer_encoding=(encoding) + @scanner.string.force_encoding(encoding) + end # Inherited from Encoding # Overridden to support optimized en/decoding @@ -58,98 +101,78 @@ def encoding=(enc) encoding_updated end - # Scans the source for a given pattern. Note, that this is not your - # usual scan() method. For one thing, the pattern argument has some - # requirements; for another, the source can be consumed. You can easily - # confuse this method. Originally, the patterns were easier - # to construct and this method more robust, because this method - # generated search regexps on the fly; however, this was - # computationally expensive and slowed down the entire REXML package - # considerably, since this is by far the most commonly called method. - # @param pattern must be a Regexp, and must be in the form of - # /^\s*(#{your pattern, with no groups})(.*)/. The first group - # will be returned; the second group is used if the consume flag is - # set. - # @param consume if true, the pattern returned will be consumed, leaving - # everything after it in the Source. - # @return the pattern, if found, or nil if the Source is empty or the - # pattern is not found. - def scan(pattern, cons=false) - return nil if @buffer.nil? - rv = @buffer.scan(pattern) - @buffer = $' if cons and rv.size>0 - rv + def read(term = nil) end - def read + def read_until(term) + pattern = Private::PRE_DEFINED_TERM_PATTERNS[term] || /#{Regexp.escape(term)}/ + data = @scanner.scan_until(pattern) + unless data + data = @scanner.rest + @scanner.pos = @scanner.string.bytesize + end + data end - def consume( pattern ) - @buffer = $' if pattern.match( @buffer ) + def ensure_buffer end - def match_to( char, pattern ) - return pattern.match(@buffer) + def match(pattern, cons=false) + if cons + @scanner.scan(pattern).nil? ? nil : @scanner + else + @scanner.check(pattern).nil? ? nil : @scanner + end end - def match_to_consume( char, pattern ) - md = pattern.match(@buffer) - @buffer = $' - return md + def position + @scanner.pos end - def match(pattern, cons=false) - md = pattern.match(@buffer) - @buffer = $' if cons and md - return md + def position=(pos) + @scanner.pos = pos end # @return true if the Source is exhausted def empty? - @buffer == "" - end - - def position - @orig.index( @buffer ) + @scanner.eos? end # @return the current line in the source def current_line lines = @orig.split - res = lines.grep @buffer[0..30] + res = lines.grep @scanner.rest[0..30] res = res[-1] if res.kind_of? Array lines.index( res ) if res end private + def detect_encoding - buffer_encoding = @buffer.encoding + scanner_encoding = @scanner.rest.encoding detected_encoding = "UTF-8" begin - @buffer.force_encoding("ASCII-8BIT") - if @buffer[0, 2] == "\xfe\xff" - @buffer[0, 2] = "" + @scanner.string.force_encoding("ASCII-8BIT") + if @scanner.scan(/\xfe\xff/n) detected_encoding = "UTF-16BE" - elsif @buffer[0, 2] == "\xff\xfe" - @buffer[0, 2] = "" + elsif @scanner.scan(/\xff\xfe/n) detected_encoding = "UTF-16LE" - elsif @buffer[0, 3] == "\xef\xbb\xbf" - @buffer[0, 3] = "" + elsif @scanner.scan(/\xef\xbb\xbf/n) detected_encoding = "UTF-8" end ensure - @buffer.force_encoding(buffer_encoding) + @scanner.string.force_encoding(scanner_encoding) end self.encoding = detected_encoding end def encoding_updated if @encoding != 'UTF-8' - @buffer = decode(@buffer) + @scanner.string = decode(@scanner.rest) @to_utf = true else @to_utf = false - @buffer.force_encoding ::Encoding::UTF_8 + @scanner.string.force_encoding(::Encoding::UTF_8) end end end @@ -172,7 +195,7 @@ def initialize(arg, block_size=500, encoding=nil) end if !@to_utf and - @buffer.respond_to?(:force_encoding) and + @orig.respond_to?(:force_encoding) and @source.respond_to?(:external_encoding) and @source.external_encoding != ::Encoding::UTF_8 @force_utf8 = true @@ -181,65 +204,72 @@ def initialize(arg, block_size=500, encoding=nil) end end - def scan(pattern, cons=false) - rv = super - # You'll notice that this next section is very similar to the same - # section in match(), but just a liiittle different. This is - # because it is a touch faster to do it this way with scan() - # than the way match() does it; enough faster to warrant duplicating - # some code - if rv.size == 0 - until @buffer =~ pattern or @source.nil? - begin - @buffer << readline - rescue Iconv::IllegalSequence - raise - rescue - @source = nil + def read(term = nil, min_bytes = 1) + term = encode(term) if term + begin + str = readline(term) + @scanner << str + read_bytes = str.bytesize + begin + while read_bytes < min_bytes + str = readline(term) + @scanner << str + read_bytes += str.bytesize end + rescue IOError end - rv = super + true + rescue Exception, NameError + @source = nil + false end - rv.taint - rv end - def read - begin - @buffer << readline - rescue Exception, NameError - @source = nil + def read_until(term) + pattern = Private::PRE_DEFINED_TERM_PATTERNS[term] || /#{Regexp.escape(term)}/ + term = encode(term) + until str = @scanner.scan_until(pattern) + break if @source.nil? + break if @source.eof? + @scanner << readline(term) + end + if str + read if @scanner.eos? and !@source.eof? + str + else + rest = @scanner.rest + @scanner.pos = @scanner.string.bytesize + rest end end - def consume( pattern ) - match( pattern, true ) + def ensure_buffer + read if @scanner.eos? && @source end def match( pattern, cons=false ) - rv = pattern.match(@buffer) - @buffer = $' if cons and rv - while !rv and @source - begin - @buffer << readline - rv = pattern.match(@buffer) - @buffer = $' if cons and rv - rescue - @source = nil + # To avoid performance issue, we need to increase bytes to read per scan + min_bytes = 1 + while true + if cons + md = @scanner.scan(pattern) + else + md = @scanner.check(pattern) end + break if md + return nil if pattern.is_a?(String) + return nil if @source.nil? + return nil unless read(nil, min_bytes) + min_bytes *= 2 end - rv.taint - rv + + md.nil? ? nil : @scanner end def empty? super and ( @source.nil? || @source.eof? ) end - def position - @er_source.pos rescue 0 - end - # @return the current line in the source def current_line begin @@ -254,6 +284,7 @@ def current_line end rescue end + @er_source.seek(pos) rescue IOError pos = -1 line = -1 @@ -262,8 +293,8 @@ def current_line end private - def readline - str = @source.readline(@line_break) + def readline(term = nil) + str = @source.readline(term || @line_break) if @pending_buffer if str.nil? str = @pending_buffer @@ -289,7 +320,7 @@ def encoding_updated @source.set_encoding(@encoding, @encoding) end @line_break = encode(">") - @pending_buffer, @buffer = @buffer, "" + @pending_buffer, @scanner.string = @scanner.rest, "" @pending_buffer.force_encoding(@encoding) super end diff --git a/lib/rexml/text.rb b/lib/rexml/text.rb index 86269dea1e..997f77d3f5 100644 --- a/lib/rexml/text.rb +++ b/lib/rexml/text.rb @@ -1,10 +1,10 @@ -# frozen_string_literal: false -require 'rexml/security' -require 'rexml/entity' -require 'rexml/doctype' -require 'rexml/child' -require 'rexml/doctype' -require 'rexml/parseexception' +# frozen_string_literal: true +require_relative 'security' +require_relative 'entity' +require_relative 'doctype' +require_relative 'child' +require_relative 'doctype' +require_relative 'parseexception' module REXML # Represents text nodes in an XML document @@ -96,27 +96,28 @@ def initialize(arg, respect_whitespace=false, parent=nil, raw=nil, @raw = false @parent = nil + @entity_filter = nil if parent super( parent ) @raw = parent.raw end - @raw = raw unless raw.nil? - @entity_filter = entity_filter - clear_cache - if arg.kind_of? String @string = arg.dup - @string.squeeze!(" \n\t") unless respect_whitespace elsif arg.kind_of? Text - @string = arg.to_s + @string = arg.instance_variable_get(:@string).dup @raw = arg.raw - elsif + @entity_filter = arg.instance_variable_get(:@entity_filter) + else raise "Illegal argument of type #{arg.type} for Text constructor (#{arg})" end - @string.gsub!( /\r\n?/, "\n" ) + @string.squeeze!(" \n\t") unless respect_whitespace + @string.gsub!(/\r\n?/, "\n") + @raw = raw unless raw.nil? + @entity_filter = entity_filter if entity_filter + clear_cache Text.check(@string, illegal, doctype) if @raw end @@ -130,13 +131,13 @@ def parent= parent def Text.check string, pattern, doctype # illegal anywhere - if string !~ VALID_XML_CHARS + if !string.match?(VALID_XML_CHARS) if String.method_defined? :encode string.chars.each do |c| case c.ord when *VALID_CHAR else - raise "Illegal character #{c.inspect} in raw string \"#{string}\"" + raise "Illegal character #{c.inspect} in raw string #{string.inspect}" end end else @@ -144,31 +145,51 @@ def Text.check string, pattern, doctype case c.unpack('U') when *VALID_CHAR else - raise "Illegal character #{c.inspect} in raw string \"#{string}\"" + raise "Illegal character #{c.inspect} in raw string #{string.inspect}" end end end end - # context sensitive - string.scan(pattern) do - if $1[-1] != ?; - raise "Illegal character '#{$1}' in raw string \"#{string}\"" - elsif $1[0] == ?& - if $5 and $5[0] == ?# - case ($5[1] == ?x ? $5[2..-1].to_i(16) : $5[1..-1].to_i) - when *VALID_CHAR + pos = 0 + while (index = string.index(/<|&/, pos)) + if string[index] == "<" + raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}" + end + + unless (end_index = string.index(/[^\s];/, index + 1)) + raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}" + end + + value = string[(index + 1)..end_index] + if /\s/.match?(value) + raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}" + end + + if value[0] == "#" + character_reference = value[1..-1] + + unless (/\A(\d+|x[0-9a-fA-F]+)\z/.match?(character_reference)) + if character_reference[0] == "x" || character_reference[-1] == "x" + raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}" else - raise "Illegal character '#{$1}' in raw string \"#{string}\"" + raise "Illegal character #{string.inspect} in raw string #{string.inspect}" end - # FIXME: below can't work but this needs API change. - # elsif @parent and $3 and !SUBSTITUTES.include?($1) - # if !doctype or !doctype.entities.has_key?($3) - # raise "Undeclared entity '#{$1}' in raw string \"#{string}\"" - # end end + + case (character_reference[0] == "x" ? character_reference[1..-1].to_i(16) : character_reference[0..-1].to_i) + when *VALID_CHAR + else + raise "Illegal character #{string.inspect} in raw string #{string.inspect}" + end + elsif !(/\A#{Entity::NAME}\z/um.match?(value)) + raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}" end + + pos = end_index + 1 end + + string end def node_type @@ -181,7 +202,7 @@ def empty? def clone - return Text.new(self) + return Text.new(self, true) end @@ -226,9 +247,7 @@ def doctype # u.to_s #-> "sean russell" def to_s return @string if @raw - return @normalized if @normalized - - @normalized = Text::normalize( @string, doctype, @entity_filter ) + @normalized ||= Text::normalize( @string, doctype, @entity_filter ) end def inspect @@ -249,8 +268,8 @@ def inspect # u = Text.new( "sean russell", false, nil, true ) # u.value #-> "sean russell" def value - return @unnormalized if @unnormalized - @unnormalized = Text::unnormalize( @string, doctype ) + @unnormalized ||= Text::unnormalize(@string, doctype, + entity_expansion_text_limit: document&.entity_expansion_text_limit) end # Sets the contents of this text node. This expects the text to be @@ -266,16 +285,16 @@ def value=( val ) @raw = false end - def wrap(string, width, addnewline=false) - # Recursively wrap string at width. - return string if string.length <= width - place = string.rindex(' ', width) # Position in string with last ' ' before cutoff - if addnewline then - return "\n" + string[0,place] + "\n" + wrap(string[place+1..-1], width) - else - return string[0,place] + "\n" + wrap(string[place+1..-1], width) - end - end + def wrap(string, width, addnewline=false) + # Recursively wrap string at width. + return string if string.length <= width + place = string.rindex(' ', width) # Position in string with last ' ' before cutoff + if addnewline then + return "\n" + string[0,place] + "\n" + wrap(string[place+1..-1], width) + else + return string[0,place] + "\n" + wrap(string[place+1..-1], width) + end + end def indent_text(string, level=1, style="\t", indentfirstline=true) return string if level < 0 @@ -373,7 +392,7 @@ def Text::normalize( input, doctype=nil, entity_filter=nil ) copy = input.to_s # Doing it like this rather than in a loop improves the speed #copy = copy.gsub( EREFERENCE, '&' ) - copy = copy.gsub( "&", "&" ) + copy = copy.gsub( "&", "&" ) if copy.include?("&") if doctype # Replace all ampersands that aren't part of an entity doctype.entities.each_value do |entity| @@ -384,18 +403,21 @@ def Text::normalize( input, doctype=nil, entity_filter=nil ) else # Replace all ampersands that aren't part of an entity DocType::DEFAULT_ENTITIES.each_value do |entity| - copy = copy.gsub(entity.value, "&#{entity.name};" ) + if copy.include?(entity.value) + copy = copy.gsub(entity.value, "&#{entity.name};" ) + end end end copy end # Unescapes all possible entities - def Text::unnormalize( string, doctype=nil, filter=nil, illegal=nil ) + def Text::unnormalize( string, doctype=nil, filter=nil, illegal=nil, entity_expansion_text_limit: nil ) + entity_expansion_text_limit ||= Security.entity_expansion_text_limit sum = 0 string.gsub( /\r\n?/, "\n" ).gsub( REFERENCE ) { s = Text.expand($&, doctype, filter) - if sum + s.bytesize > Security.entity_expansion_text_limit + if sum + s.bytesize > entity_expansion_text_limit raise "entity expansion has grown too large" else sum += s.bytesize diff --git a/lib/rexml/undefinednamespaceexception.rb b/lib/rexml/undefinednamespaceexception.rb index e522ed57ea..492a098183 100644 --- a/lib/rexml/undefinednamespaceexception.rb +++ b/lib/rexml/undefinednamespaceexception.rb @@ -1,5 +1,5 @@ # frozen_string_literal: false -require 'rexml/parseexception' +require_relative 'parseexception' module REXML class UndefinedNamespaceException < ParseException def initialize( prefix, source, parser ) diff --git a/lib/rexml/validation/relaxng.rb b/lib/rexml/validation/relaxng.rb index fb52438290..f29a2c05e5 100644 --- a/lib/rexml/validation/relaxng.rb +++ b/lib/rexml/validation/relaxng.rb @@ -1,6 +1,6 @@ # frozen_string_literal: false -require "rexml/validation/validation" -require "rexml/parsers/baseparser" +require_relative "validation" +require_relative "../parsers/baseparser" module REXML module Validation diff --git a/lib/rexml/validation/validation.rb b/lib/rexml/validation/validation.rb index f0c76f976c..0ad6ada427 100644 --- a/lib/rexml/validation/validation.rb +++ b/lib/rexml/validation/validation.rb @@ -1,5 +1,5 @@ # frozen_string_literal: false -require 'rexml/validation/validationexception' +require_relative 'validationexception' module REXML module Validation diff --git a/lib/rexml/xmldecl.rb b/lib/rexml/xmldecl.rb index a37e9f3ddc..d19407cefd 100644 --- a/lib/rexml/xmldecl.rb +++ b/lib/rexml/xmldecl.rb @@ -1,17 +1,18 @@ # frozen_string_literal: false -require 'rexml/encoding' -require 'rexml/source' + +require_relative 'encoding' +require_relative 'source' module REXML # NEEDS DOCUMENTATION class XMLDecl < Child include Encoding - DEFAULT_VERSION = "1.0"; - DEFAULT_ENCODING = "UTF-8"; - DEFAULT_STANDALONE = "no"; - START = '<\?xml'; - STOP = '\?>'; + DEFAULT_VERSION = "1.0" + DEFAULT_ENCODING = "UTF-8" + DEFAULT_STANDALONE = "no" + START = "<?xml" + STOP = "?>" attr_accessor :version, :standalone attr_reader :writeencoding, :writethis @@ -25,6 +26,7 @@ def initialize(version=DEFAULT_VERSION, encoding=nil, standalone=nil) self.encoding = version.encoding @writeencoding = version.writeencoding @standalone = version.standalone + @writethis = version.writethis else super() @version = version @@ -46,9 +48,9 @@ def clone # Ignored def write(writer, indent=-1, transitive=false, ie_hack=false) return nil unless @writethis or writer.kind_of? Output - writer << START.sub(/\\/u, '') + writer << START writer << " #{content encoding}" - writer << STOP.sub(/\\/u, '') + writer << STOP end def ==( other ) @@ -102,14 +104,26 @@ def dowrite end def inspect - START.sub(/\\/u, '') + " ... " + STOP.sub(/\\/u, '') + "#{START} ... #{STOP}" end private def content(enc) - rv = "version='#@version'" - rv << " encoding='#{enc}'" if @writeencoding || enc !~ /\Autf-8\z/i - rv << " standalone='#@standalone'" if @standalone + context = nil + context = parent.context if parent + if context and context[:prologue_quote] == :quote + quote = "\"" + else + quote = "'" + end + + rv = "version=#{quote}#{@version}#{quote}" + if @writeencoding or enc !~ /\Autf-8\z/i + rv << " encoding=#{quote}#{enc}#{quote}" + end + if @standalone + rv << " standalone=#{quote}#{@standalone}#{quote}" + end rv end end diff --git a/lib/rexml/xpath.rb b/lib/rexml/xpath.rb index f1cb99baea..a0921bd8e1 100644 --- a/lib/rexml/xpath.rb +++ b/lib/rexml/xpath.rb @@ -1,6 +1,6 @@ # frozen_string_literal: false -require 'rexml/functions' -require 'rexml/xpath_parser' +require_relative 'functions' +require_relative 'xpath_parser' module REXML # Wrapper class. Use this class to access the XPath functions. @@ -28,10 +28,10 @@ class XPath # XPath.first( doc, "//b"} ) # XPath.first( node, "a/x:b", { "x"=>"http://doofus" } ) # XPath.first( node, '/book/publisher/text()=$publisher', {}, {"publisher"=>"O'Reilly"}) - def XPath::first element, path=nil, namespaces=nil, variables={} + def XPath::first(element, path=nil, namespaces=nil, variables={}, options={}) raise "The namespaces argument, if supplied, must be a hash object." unless namespaces.nil? or namespaces.kind_of?(Hash) raise "The variables argument, if supplied, must be a hash object." unless variables.kind_of?(Hash) - parser = XPathParser.new + parser = XPathParser.new(**options) parser.namespaces = namespaces parser.variables = variables path = "*" unless path @@ -57,10 +57,10 @@ def XPath::first element, path=nil, namespaces=nil, variables={} # XPath.each( node, 'ancestor::x' ) { |el| ... } # XPath.each( node, '/book/publisher/text()=$publisher', {}, {"publisher"=>"O'Reilly"}) \ # {|el| ... } - def XPath::each element, path=nil, namespaces=nil, variables={}, &block + def XPath::each(element, path=nil, namespaces=nil, variables={}, options={}, &block) raise "The namespaces argument, if supplied, must be a hash object." unless namespaces.nil? or namespaces.kind_of?(Hash) raise "The variables argument, if supplied, must be a hash object." unless variables.kind_of?(Hash) - parser = XPathParser.new + parser = XPathParser.new(**options) parser.namespaces = namespaces parser.variables = variables path = "*" unless path @@ -69,8 +69,8 @@ def XPath::each element, path=nil, namespaces=nil, variables={}, &block end # Returns an array of nodes matching a given XPath. - def XPath::match element, path=nil, namespaces=nil, variables={} - parser = XPathParser.new + def XPath::match(element, path=nil, namespaces=nil, variables={}, options={}) + parser = XPathParser.new(**options) parser.namespaces = namespaces parser.variables = variables path = "*" unless path diff --git a/lib/rexml/xpath_parser.rb b/lib/rexml/xpath_parser.rb index 181b2b6e85..5eb1e5a961 100644 --- a/lib/rexml/xpath_parser.rb +++ b/lib/rexml/xpath_parser.rb @@ -1,43 +1,51 @@ # frozen_string_literal: false -require 'rexml/namespace' -require 'rexml/xmltokens' -require 'rexml/attribute' -require 'rexml/syncenumerator' -require 'rexml/parsers/xpathparser' - -class Object - # provides a unified +clone+ operation, for REXML::XPathParser - # to use across multiple Object types - def dclone - clone - end -end -class Symbol - # provides a unified +clone+ operation, for REXML::XPathParser - # to use across multiple Object types - def dclone ; self ; end -end -class Integer - # provides a unified +clone+ operation, for REXML::XPathParser - # to use across multiple Object types - def dclone ; self ; end -end -class Float - # provides a unified +clone+ operation, for REXML::XPathParser - # to use across multiple Object types - def dclone ; self ; end -end -class Array - # provides a unified +clone+ operation, for REXML::XPathParser - # to use across multiple Object+ types - def dclone - klone = self.clone - klone.clear - self.each{|v| klone << v.dclone} - klone + +require "pp" + +require_relative 'namespace' +require_relative 'xmltokens' +require_relative 'attribute' +require_relative 'parsers/xpathparser' + +module REXML + module DClonable + refine Object do + # provides a unified +clone+ operation, for REXML::XPathParser + # to use across multiple Object types + def dclone + clone + end + end + refine Symbol do + # provides a unified +clone+ operation, for REXML::XPathParser + # to use across multiple Object types + def dclone ; self ; end + end + refine Integer do + # provides a unified +clone+ operation, for REXML::XPathParser + # to use across multiple Object types + def dclone ; self ; end + end + refine Float do + # provides a unified +clone+ operation, for REXML::XPathParser + # to use across multiple Object types + def dclone ; self ; end + end + refine Array do + # provides a unified +clone+ operation, for REXML::XPathParser + # to use across multiple Object+ types + def dclone + klone = self.clone + klone.clear + self.each{|v| klone << v.dclone} + klone + end + end end end +using REXML::DClonable + module REXML # You don't want to use this class. Really. Use XPath, which is a wrapper # for this class. Believe me. You don't want to poke around in here. @@ -47,10 +55,15 @@ class XPathParser include XMLTokens LITERAL = /^'([^']*)'|^"([^"]*)"/u - def initialize( ) + DEBUG = (ENV["REXML_XPATH_PARSER_DEBUG"] == "true") + + def initialize(strict: false) + @debug = DEBUG @parser = REXML::Parsers::XPathParser.new @namespaces = nil @variables = {} + @nest = 0 + @strict = strict end def namespaces=( namespaces={} ) @@ -75,7 +88,7 @@ def get_first path, nodeset def predicate path, nodeset path_stack = @parser.parse( path ) - expr( path_stack, nodeset ) + match( path_stack, nodeset ) end def []=( variable_name, value ) @@ -123,13 +136,24 @@ def first( path_stack, node ) end - def match( path_stack, nodeset ) - r = expr( path_stack, nodeset ) - r + def match(path_stack, nodeset) + nodeset = nodeset.collect.with_index do |node, i| + position = i + 1 + XPathNode.new(node, position: position) + end + result = expr(path_stack, nodeset) + case result + when Array # nodeset + unnode(result) + else + [result] + end end private - + def strict? + @strict + end # Returns a String namespace for a node, given a prefix # The rules are: @@ -148,343 +172,481 @@ def get_namespace( node, prefix ) # Expr takes a stack of path elements and a set of nodes (either a Parent # or an Array and returns an Array of matching nodes - ALL = [ :attribute, :element, :text, :processing_instruction, :comment ] - ELEMENTS = [ :element ] def expr( path_stack, nodeset, context=nil ) - node_types = ELEMENTS + enter(:expr, path_stack, nodeset) if @debug return nodeset if path_stack.length == 0 || nodeset.length == 0 while path_stack.length > 0 + trace(:while, path_stack, nodeset) if @debug if nodeset.length == 0 path_stack.clear return [] end - case (op = path_stack.shift) + op = path_stack.shift + case op when :document - nodeset = [ nodeset[0].root_node ] - - when :qname - prefix = path_stack.shift - name = path_stack.shift - nodeset.delete_if do |node| - # FIXME: This DOUBLES the time XPath searches take - ns = get_namespace( node, prefix ) - if node.node_type == :element - if node.name == name - end - end - !(node.node_type == :element and - node.name == name and - node.namespace == ns ) - end - node_types = ELEMENTS - - when :any - nodeset.delete_if { |node| !node_types.include?(node.node_type) } - + first_raw_node = nodeset.first.raw_node + nodeset = [XPathNode.new(first_raw_node.root_node, position: 1)] when :self - # This space left intentionally blank - - when :processing_instruction - target = path_stack.shift - nodeset.delete_if do |node| - (node.node_type != :processing_instruction) or - ( target!='' and ( node.target != target ) ) + nodeset = step(path_stack) do + [nodeset] end - - when :text - nodeset.delete_if { |node| node.node_type != :text } - - when :comment - nodeset.delete_if { |node| node.node_type != :comment } - - when :node - # This space left intentionally blank - node_types = ALL - when :child - new_nodeset = [] - nt = nil - nodeset.each do |node| - nt = node.node_type - new_nodeset += node.children if nt == :element or nt == :document + nodeset = step(path_stack) do + child(nodeset) end - nodeset = new_nodeset - node_types = ELEMENTS - when :literal + trace(:literal, path_stack, nodeset) if @debug return path_stack.shift - when :attribute - new_nodeset = [] - case path_stack.shift - when :qname - prefix = path_stack.shift - name = path_stack.shift - for element in nodeset - if element.node_type == :element - attrib = element.attribute( name, get_namespace(element, prefix) ) - new_nodeset << attrib if attrib + nodeset = step(path_stack, any_type: :attribute) do + nodesets = [] + nodeset.each do |node| + raw_node = node.raw_node + next unless raw_node.node_type == :element + attributes = raw_node.attributes + next if attributes.empty? + nodesets << attributes.each_attribute.collect.with_index do |attribute, i| + XPathNode.new(attribute, position: i + 1) end end - when :any - for element in nodeset - if element.node_type == :element - new_nodeset += element.attributes.to_a + nodesets + end + when :namespace + pre_defined_namespaces = { + "xml" => "http://www.w3.org/XML/1998/namespace", + } + nodeset = step(path_stack, any_type: :namespace) do + nodesets = [] + nodeset.each do |node| + raw_node = node.raw_node + case raw_node.node_type + when :element + if @namespaces + nodesets << pre_defined_namespaces.merge(@namespaces) + else + nodesets << pre_defined_namespaces.merge(raw_node.namespaces) + end + when :attribute + if @namespaces + nodesets << pre_defined_namespaces.merge(@namespaces) + else + nodesets << pre_defined_namespaces.merge(raw_node.element.namespaces) + end end end + nodesets end - nodeset = new_nodeset - when :parent - nodeset = nodeset.collect{|n| n.parent}.compact - #nodeset = expr(path_stack.dclone, nodeset.collect{|n| n.parent}.compact) - node_types = ELEMENTS - - when :ancestor - new_nodeset = [] - nodeset.each do |node| - while node.parent - node = node.parent - new_nodeset << node unless new_nodeset.include? node + nodeset = step(path_stack) do + nodesets = [] + nodeset.each do |node| + raw_node = node.raw_node + if raw_node.node_type == :attribute + parent = raw_node.element + else + parent = raw_node.parent + end + nodesets << [XPathNode.new(parent, position: 1)] if parent end + nodesets end - nodeset = new_nodeset - node_types = ELEMENTS - - when :ancestor_or_self - new_nodeset = [] - nodeset.each do |node| - if node.node_type == :element - new_nodeset << node - while ( node.parent ) - node = node.parent - new_nodeset << node unless new_nodeset.include? node + when :ancestor + nodeset = step(path_stack) do + nodesets = [] + # new_nodes = {} + nodeset.each do |node| + raw_node = node.raw_node + new_nodeset = [] + while raw_node.parent + raw_node = raw_node.parent + # next if new_nodes.key?(node) + new_nodeset << XPathNode.new(raw_node, + position: new_nodeset.size + 1) + # new_nodes[node] = true end + nodesets << new_nodeset unless new_nodeset.empty? end + nodesets end - nodeset = new_nodeset - node_types = ELEMENTS - - when :predicate - new_nodeset = [] - subcontext = { :size => nodeset.size } - pred = path_stack.shift - nodeset.each_with_index { |node, index| - subcontext[ :node ] = node - subcontext[ :index ] = index+1 - pc = pred.dclone - result = expr( pc, [node], subcontext ) - result = result[0] if result.kind_of? Array and result.length == 1 - if result.kind_of? Numeric - new_nodeset << node if result == (index+1) - elsif result.instance_of? Array - if result.size > 0 and result.inject(false) {|k,s| s or k} - new_nodeset << node if result.size > 0 + when :ancestor_or_self + nodeset = step(path_stack) do + nodesets = [] + # new_nodes = {} + nodeset.each do |node| + raw_node = node.raw_node + next unless raw_node.node_type == :element + new_nodeset = [XPathNode.new(raw_node, position: 1)] + # new_nodes[node] = true + while raw_node.parent + raw_node = raw_node.parent + # next if new_nodes.key?(node) + new_nodeset << XPathNode.new(raw_node, + position: new_nodeset.size + 1) + # new_nodes[node] = true end - else - new_nodeset << node if result + nodesets << new_nodeset unless new_nodeset.empty? end - } - nodeset = new_nodeset -=begin - predicate = path_stack.shift - ns = nodeset.clone - result = expr( predicate, ns ) - if result.kind_of? Array - nodeset = result.zip(ns).collect{|m,n| n if m}.compact - else - nodeset = result ? nodeset : [] + nodesets end -=end - when :descendant_or_self - rv = descendant_or_self( path_stack, nodeset ) - path_stack.clear - nodeset = rv - node_types = ELEMENTS - + nodeset = step(path_stack) do + descendant(nodeset, true) + end when :descendant - results = [] - nt = nil - nodeset.each do |node| - nt = node.node_type - results += expr( path_stack.dclone.unshift( :descendant_or_self ), - node.children ) if nt == :element or nt == :document + nodeset = step(path_stack) do + descendant(nodeset, false) end - nodeset = results - node_types = ELEMENTS - when :following_sibling - results = [] - nodeset.each do |node| - next if node.parent.nil? - all_siblings = node.parent.children - current_index = all_siblings.index( node ) - following_siblings = all_siblings[ current_index+1 .. -1 ] - results += expr( path_stack.dclone, following_siblings ) + nodeset = step(path_stack) do + nodesets = [] + nodeset.each do |node| + raw_node = node.raw_node + next unless raw_node.respond_to?(:parent) + next if raw_node.parent.nil? + all_siblings = raw_node.parent.children + current_index = all_siblings.index(raw_node) + following_siblings = all_siblings[(current_index + 1)..-1] + next if following_siblings.empty? + nodesets << following_siblings.collect.with_index do |sibling, i| + XPathNode.new(sibling, position: i + 1) + end + end + nodesets end - nodeset = results - when :preceding_sibling - results = [] - nodeset.each do |node| - next if node.parent.nil? - all_siblings = node.parent.children - current_index = all_siblings.index( node ) - preceding_siblings = all_siblings[ 0, current_index ].reverse - results += preceding_siblings + nodeset = step(path_stack, order: :reverse) do + nodesets = [] + nodeset.each do |node| + raw_node = node.raw_node + next unless raw_node.respond_to?(:parent) + next if raw_node.parent.nil? + all_siblings = raw_node.parent.children + current_index = all_siblings.index(raw_node) + preceding_siblings = all_siblings[0, current_index].reverse + next if preceding_siblings.empty? + nodesets << preceding_siblings.collect.with_index do |sibling, i| + XPathNode.new(sibling, position: i + 1) + end + end + nodesets end - nodeset = results - node_types = ELEMENTS - when :preceding - new_nodeset = [] - nodeset.each do |node| - new_nodeset += preceding( node ) + nodeset = step(path_stack, order: :reverse) do + unnode(nodeset) do |node| + preceding(node) + end end - nodeset = new_nodeset - node_types = ELEMENTS - when :following - new_nodeset = [] - nodeset.each do |node| - new_nodeset += following( node ) - end - nodeset = new_nodeset - node_types = ELEMENTS - - when :namespace - new_nodeset = [] - prefix = path_stack.shift - nodeset.each do |node| - if (node.node_type == :element or node.node_type == :attribute) - if @namespaces - namespaces = @namespaces - elsif (node.node_type == :element) - namespaces = node.namespaces - else - namespaces = node.element.namesapces - end - if (node.namespace == namespaces[prefix]) - new_nodeset << node - end + nodeset = step(path_stack) do + unnode(nodeset) do |node| + following(node) end end - nodeset = new_nodeset - when :variable var_name = path_stack.shift - return @variables[ var_name ] + return [@variables[var_name]] - # :and, :or, :eq, :neq, :lt, :lteq, :gt, :gteq - # TODO: Special case for :or and :and -- not evaluate the right - # operand if the left alone determines result (i.e. is true for - # :or and false for :and). - when :eq, :neq, :lt, :lteq, :gt, :gteq, :or + when :eq, :neq, :lt, :lteq, :gt, :gteq left = expr( path_stack.shift, nodeset.dup, context ) right = expr( path_stack.shift, nodeset.dup, context ) res = equality_relational_compare( left, op, right ) + trace(op, left, right, res) if @debug return res + when :or + left = expr(path_stack.shift, nodeset.dup, context) + return true if Functions.boolean(left) + right = expr(path_stack.shift, nodeset.dup, context) + return Functions.boolean(right) + when :and - left = expr( path_stack.shift, nodeset.dup, context ) - return [] unless left - if left.respond_to?(:inject) and !left.inject(false) {|a,b| a | b} - return [] + left = expr(path_stack.shift, nodeset.dup, context) + return false unless Functions.boolean(left) + right = expr(path_stack.shift, nodeset.dup, context) + return Functions.boolean(right) + + when :div, :mod, :mult, :plus, :minus + left = expr(path_stack.shift, nodeset, context) + right = expr(path_stack.shift, nodeset, context) + left = unnode(left) if left.is_a?(Array) + right = unnode(right) if right.is_a?(Array) + left = Functions::number(left) + right = Functions::number(right) + case op + when :div + return left / right + when :mod + return left % right + when :mult + return left * right + when :plus + return left + right + when :minus + return left - right + else + raise "[BUG] Unexpected operator: <#{op.inspect}>" end - right = expr( path_stack.shift, nodeset.dup, context ) - res = equality_relational_compare( left, op, right ) - return res - - when :div - left = Functions::number(expr(path_stack.shift, nodeset, context)).to_f - right = Functions::number(expr(path_stack.shift, nodeset, context)).to_f - return (left / right) - - when :mod - left = Functions::number(expr(path_stack.shift, nodeset, context )).to_f - right = Functions::number(expr(path_stack.shift, nodeset, context )).to_f - return (left % right) - - when :mult - left = Functions::number(expr(path_stack.shift, nodeset, context )).to_f - right = Functions::number(expr(path_stack.shift, nodeset, context )).to_f - return (left * right) - - when :plus - left = Functions::number(expr(path_stack.shift, nodeset, context )).to_f - right = Functions::number(expr(path_stack.shift, nodeset, context )).to_f - return (left + right) - - when :minus - left = Functions::number(expr(path_stack.shift, nodeset, context )).to_f - right = Functions::number(expr(path_stack.shift, nodeset, context )).to_f - return (left - right) - when :union left = expr( path_stack.shift, nodeset, context ) right = expr( path_stack.shift, nodeset, context ) + left = unnode(left) if left.is_a?(Array) + right = unnode(right) if right.is_a?(Array) return (left | right) - when :neg res = expr( path_stack, nodeset, context ) - return -(res.to_f) - + res = unnode(res) if res.is_a?(Array) + return -Functions.number(res) when :not when :function func_name = path_stack.shift.tr('-','_') arguments = path_stack.shift - subcontext = context ? nil : { :size => nodeset.size } - - res = [] - cont = context - nodeset.each_with_index { |n, i| - if subcontext - subcontext[:node] = n - subcontext[:index] = i - cont = subcontext + + if nodeset.size != 1 + message = "[BUG] Node set size must be 1 for function call: " + message += "<#{func_name}>: <#{nodeset.inspect}>: " + message += "<#{arguments.inspect}>" + raise message + end + + node = nodeset.first + if context + target_context = context + else + target_context = {:size => nodeset.size} + if node.is_a?(XPathNode) + target_context[:node] = node.raw_node + target_context[:index] = node.position + else + target_context[:node] = node + target_context[:index] = 1 end - arg_clone = arguments.dclone - args = arg_clone.collect { |arg| - expr( arg, [n], cont ) - } - Functions.context = cont - res << Functions.send( func_name, *args ) - } - return res + end + args = arguments.dclone.collect do |arg| + result = expr(arg, nodeset, target_context) + result = unnode(result) if result.is_a?(Array) + result + end + Functions.context = target_context + return Functions.send(func_name, *args) + else + raise "[BUG] Unexpected path: <#{op.inspect}>: <#{path_stack.inspect}>" end end # while return nodeset + ensure + leave(:expr, path_stack, nodeset) if @debug + end + + def step(path_stack, any_type: :element, order: :forward) + nodesets = yield + begin + enter(:step, path_stack, nodesets) if @debug + nodesets = node_test(path_stack, nodesets, any_type: any_type) + while path_stack[0] == :predicate + path_stack.shift # :predicate + predicate_expression = path_stack.shift.dclone + nodesets = evaluate_predicate(predicate_expression, nodesets) + end + if nodesets.size == 1 + ordered_nodeset = nodesets[0] + else + raw_nodes = [] + nodesets.each do |nodeset| + nodeset.each do |node| + if node.respond_to?(:raw_node) + raw_nodes << node.raw_node + else + raw_nodes << node + end + end + end + ordered_nodeset = sort(raw_nodes, order) + end + new_nodeset = [] + ordered_nodeset.each do |node| + # TODO: Remove duplicated + new_nodeset << XPathNode.new(node, position: new_nodeset.size + 1) + end + new_nodeset + ensure + leave(:step, path_stack, new_nodeset) if @debug + end end + def node_test(path_stack, nodesets, any_type: :element) + enter(:node_test, path_stack, nodesets) if @debug + operator = path_stack.shift + case operator + when :qname + prefix = path_stack.shift + name = path_stack.shift + new_nodesets = nodesets.collect do |nodeset| + filter_nodeset(nodeset) do |node| + raw_node = node.raw_node + case raw_node.node_type + when :element + if prefix.nil? + raw_node.name == name + elsif prefix.empty? + if strict? + raw_node.name == name and raw_node.namespace == "" + else + # FIXME: This DOUBLES the time XPath searches take + ns = get_namespace(raw_node, prefix) + raw_node.name == name and raw_node.namespace == ns + end + else + # FIXME: This DOUBLES the time XPath searches take + ns = get_namespace(raw_node, prefix) + raw_node.name == name and raw_node.namespace == ns + end + when :attribute + if prefix.nil? + raw_node.name == name + elsif prefix.empty? + raw_node.name == name and raw_node.namespace == "" + else + # FIXME: This DOUBLES the time XPath searches take + ns = get_namespace(raw_node.element, prefix) + raw_node.name == name and raw_node.namespace == ns + end + else + false + end + end + end + when :namespace + prefix = path_stack.shift + new_nodesets = nodesets.collect do |nodeset| + filter_nodeset(nodeset) do |node| + raw_node = node.raw_node + case raw_node.node_type + when :element + namespaces = @namespaces || raw_node.namespaces + raw_node.namespace == namespaces[prefix] + when :attribute + namespaces = @namespaces || raw_node.element.namespaces + raw_node.namespace == namespaces[prefix] + else + false + end + end + end + when :any + new_nodesets = nodesets.collect do |nodeset| + filter_nodeset(nodeset) do |node| + raw_node = node.raw_node + raw_node.node_type == any_type + end + end + when :comment + new_nodesets = nodesets.collect do |nodeset| + filter_nodeset(nodeset) do |node| + raw_node = node.raw_node + raw_node.node_type == :comment + end + end + when :text + new_nodesets = nodesets.collect do |nodeset| + filter_nodeset(nodeset) do |node| + raw_node = node.raw_node + raw_node.node_type == :text + end + end + when :processing_instruction + target = path_stack.shift + new_nodesets = nodesets.collect do |nodeset| + filter_nodeset(nodeset) do |node| + raw_node = node.raw_node + (raw_node.node_type == :processing_instruction) and + (target.empty? or (raw_node.target == target)) + end + end + when :node + new_nodesets = nodesets.collect do |nodeset| + filter_nodeset(nodeset) do |node| + true + end + end + else + message = "[BUG] Unexpected node test: " + + "<#{operator.inspect}>: <#{path_stack.inspect}>" + raise message + end + new_nodesets + ensure + leave(:node_test, path_stack, new_nodesets) if @debug + end - ########################################################## - # FIXME - # The next two methods are BAD MOJO! - # This is my achilles heel. If anybody thinks of a better - # way of doing this, be my guest. This really sucks, but - # it is a wonder it works at all. - # ######################################################## + def filter_nodeset(nodeset) + new_nodeset = [] + nodeset.each do |node| + next unless yield(node) + new_nodeset << XPathNode.new(node, position: new_nodeset.size + 1) + end + new_nodeset + end - def descendant_or_self( path_stack, nodeset ) - rs = [] - d_o_s( path_stack, nodeset, rs ) - document_order(rs.flatten.compact) - #rs.flatten.compact + def evaluate_predicate(expression, nodesets) + enter(:predicate, expression, nodesets) if @debug + new_nodeset_count = 0 + new_nodesets = nodesets.collect do |nodeset| + new_nodeset = [] + subcontext = { :size => nodeset.size } + nodeset.each_with_index do |node, index| + if node.is_a?(XPathNode) + subcontext[:node] = node.raw_node + subcontext[:index] = node.position + else + subcontext[:node] = node + subcontext[:index] = index + 1 + end + result = expr(expression.dclone, [node], subcontext) + trace(:predicate_evaluate, expression, node, subcontext, result) if @debug + result = result[0] if result.kind_of? Array and result.length == 1 + if result.kind_of? Numeric + if result == node.position + new_nodeset_count += 1 + new_nodeset << XPathNode.new(node, position: new_nodeset_count) + end + elsif result.instance_of? Array + if result.size > 0 and result.inject(false) {|k,s| s or k} + if result.size > 0 + new_nodeset_count += 1 + new_nodeset << XPathNode.new(node, position: new_nodeset_count) + end + end + else + if result + new_nodeset_count += 1 + new_nodeset << XPathNode.new(node, position: new_nodeset_count) + end + end + end + new_nodeset + end + new_nodesets + ensure + leave(:predicate, new_nodesets) if @debug end - def d_o_s( p, ns, r ) - nt = nil - ns.each_index do |i| - n = ns[i] - x = expr( p.dclone, [ n ] ) - nt = n.node_type - d_o_s( p, n.children, x ) if nt == :element or nt == :document and n.children.size > 0 - r.concat(x) if x.size > 0 + def trace(*args) + indent = " " * @nest + PP.pp(args, "").each_line do |line| + puts("#{indent}#{line}") end end + def enter(tag, *args) + trace(:enter, tag, *args) + @nest += 1 + end + + def leave(tag, *args) + @nest -= 1 + trace(:leave, tag, *args) + end # Reorders an array of nodes so that they are in document order # It tries to do this efficiently. @@ -494,7 +656,7 @@ def d_o_s( p, ns, r ) # in and out of function calls. If I knew what the index of the nodes was, # I wouldn't have to do this. Maybe add a document IDX for each node? # Problems with mutable documents. Or, rewrite everything. - def document_order( array_of_nodes ) + def sort(array_of_nodes, order) new_arry = [] array_of_nodes.each { |node| node_idx = [] @@ -505,42 +667,68 @@ def document_order( array_of_nodes ) end new_arry << [ node_idx.reverse, node ] } - new_arry.sort{ |s1, s2| s1[0] <=> s2[0] }.collect{ |s| s[1] } + ordered = new_arry.sort_by do |index, node| + if order == :forward + index + else + -index + end + end + ordered.collect do |_index, node| + node + end end - - def recurse( nodeset, &block ) - for node in nodeset - yield node - recurse( node, &block ) if node.node_type == :element + def descendant(nodeset, include_self) + nodesets = [] + nodeset.each do |node| + new_nodeset = [] + new_nodes = {} + descendant_recursive(node.raw_node, new_nodeset, new_nodes, include_self) + nodesets << new_nodeset unless new_nodeset.empty? end + nodesets end + def descendant_recursive(raw_node, new_nodeset, new_nodes, include_self) + if include_self + return if new_nodes.key?(raw_node) + new_nodeset << XPathNode.new(raw_node, position: new_nodeset.size + 1) + new_nodes[raw_node] = true + end + node_type = raw_node.node_type + if node_type == :element or node_type == :document + raw_node.children.each do |child| + descendant_recursive(child, new_nodeset, new_nodes, true) + end + end + end # Builds a nodeset of all of the preceding nodes of the supplied node, # in reverse document order # preceding:: includes every element in the document that precedes this node, # except for ancestors - def preceding( node ) + def preceding(node) ancestors = [] - p = node.parent - while p - ancestors << p - p = p.parent + parent = node.parent + while parent + ancestors << parent + parent = parent.parent end - acc = [] - p = preceding_node_of( node ) - while p - if ancestors.include? p - ancestors.delete(p) + precedings = [] + preceding_node = preceding_node_of(node) + while preceding_node + if ancestors.include?(preceding_node) + ancestors.delete(preceding_node) else - acc << p + precedings << XPathNode.new(preceding_node, + position: precedings.size + 1) end - p = preceding_node_of( p ) + preceding_node = preceding_node_of(preceding_node) end - acc + precedings end def preceding_node_of( node ) @@ -558,14 +746,15 @@ def preceding_node_of( node ) psn end - def following( node ) - acc = [] - p = next_sibling_node( node ) - while p - acc << p - p = following_node_of( p ) + def following(node) + followings = [] + following_node = next_sibling_node(node) + while following_node + followings << XPathNode.new(following_node, + position: followings.size + 1) + following_node = following_node_of(following_node) end - acc + followings end def following_node_of( node ) @@ -587,45 +776,68 @@ def next_sibling_node(node) return psn end + def child(nodeset) + nodesets = [] + nodeset.each do |node| + raw_node = node.raw_node + node_type = raw_node.node_type + # trace(:child, node_type, node) + case node_type + when :element + nodesets << raw_node.children.collect.with_index do |child_node, i| + XPathNode.new(child_node, position: i + 1) + end + when :document + new_nodeset = [] + raw_node.children.each do |child| + case child + when XMLDecl, Text + # Ignore + else + new_nodeset << XPathNode.new(child, position: new_nodeset.size + 1) + end + end + nodesets << new_nodeset unless new_nodeset.empty? + end + end + nodesets + end + def norm b case b when true, false return b when 'true', 'false' return Functions::boolean( b ) - when /^\d+(\.\d+)?$/ + when /^\d+(\.\d+)?$/, Numeric return Functions::number( b ) else return Functions::string( b ) end end - def equality_relational_compare( set1, op, set2 ) + def equality_relational_compare(set1, op, set2) + set1 = unnode(set1) if set1.is_a?(Array) + set2 = unnode(set2) if set2.is_a?(Array) + if set1.kind_of? Array and set2.kind_of? Array - if set1.size == 1 and set2.size == 1 - set1 = set1[0] - set2 = set2[0] - elsif set1.size == 0 or set2.size == 0 - nd = set1.size==0 ? set2 : set1 - rv = nd.collect { |il| compare( il, op, nil ) } - return rv - else - res = [] - SyncEnumerator.new( set1, set2 ).each { |i1, i2| - i1 = norm( i1 ) - i2 = norm( i2 ) - res << compare( i1, op, i2 ) - } - return res + # If both objects to be compared are node-sets, then the + # comparison will be true if and only if there is a node in the + # first node-set and a node in the second node-set such that the + # result of performing the comparison on the string-values of + # the two nodes is true. + set1.product(set2).any? do |node1, node2| + node_string1 = Functions.string(node1) + node_string2 = Functions.string(node2) + compare(node_string1, op, node_string2) end - end - # If one is nodeset and other is number, compare number to each item - # in nodeset s.t. number op number(string(item)) - # If one is nodeset and other is string, compare string to each item - # in nodeset s.t. string op string(item) - # If one is nodeset and other is boolean, compare boolean to each item - # in nodeset s.t. boolean op boolean(item) - if set1.kind_of? Array or set2.kind_of? Array + elsif set1.kind_of? Array or set2.kind_of? Array + # If one is nodeset and other is number, compare number to each item + # in nodeset s.t. number op number(string(item)) + # If one is nodeset and other is string, compare string to each item + # in nodeset s.t. string op string(item) + # If one is nodeset and other is boolean, compare boolean to each item + # in nodeset s.t. boolean op boolean(item) if set1.kind_of? Array a = set1 b = set2 @@ -636,15 +848,23 @@ def equality_relational_compare( set1, op, set2 ) case b when true, false - return a.collect {|v| compare( Functions::boolean(v), op, b ) } + each_unnode(a).any? do |unnoded| + compare(Functions.boolean(unnoded), op, b) + end when Numeric - return a.collect {|v| compare( Functions::number(v), op, b )} - when /^\d+(\.\d+)?$/ - b = Functions::number( b ) - return a.collect {|v| compare( Functions::number(v), op, b )} + each_unnode(a).any? do |unnoded| + compare(Functions.number(unnoded), op, b) + end + when /\A\d+(\.\d+)?\z/ + b = Functions.number(b) + each_unnode(a).any? do |unnoded| + compare(Functions.number(unnoded), op, b) + end else - b = Functions::string( b ) - return a.collect { |v| compare( Functions::string(v), op, b ) } + b = Functions::string(b) + each_unnode(a).any? do |unnoded| + compare(Functions::string(unnoded), op, b) + end end else # If neither is nodeset, @@ -654,32 +874,52 @@ def equality_relational_compare( set1, op, set2 ) # Else, convert to string # Else # Convert both to numbers and compare - s1 = set1.to_s - s2 = set2.to_s - if s1 == 'true' or s1 == 'false' or s2 == 'true' or s2 == 'false' - set1 = Functions::boolean( set1 ) - set2 = Functions::boolean( set2 ) + compare(set1, op, set2) + end + end + + def value_type(value) + case value + when true, false + :boolean + when Numeric + :number + when String + :string + else + raise "[BUG] Unexpected value type: <#{value.inspect}>" + end + end + + def normalize_compare_values(a, operator, b) + a_type = value_type(a) + b_type = value_type(b) + case operator + when :eq, :neq + if a_type == :boolean or b_type == :boolean + a = Functions.boolean(a) unless a_type == :boolean + b = Functions.boolean(b) unless b_type == :boolean + elsif a_type == :number or b_type == :number + a = Functions.number(a) unless a_type == :number + b = Functions.number(b) unless b_type == :number else - if op == :eq or op == :neq - if s1 =~ /^\d+(\.\d+)?$/ or s2 =~ /^\d+(\.\d+)?$/ - set1 = Functions::number( s1 ) - set2 = Functions::number( s2 ) - else - set1 = Functions::string( set1 ) - set2 = Functions::string( set2 ) - end - else - set1 = Functions::number( set1 ) - set2 = Functions::number( set2 ) - end + a = Functions.string(a) unless a_type == :string + b = Functions.string(b) unless b_type == :string end - return compare( set1, op, set2 ) + when :lt, :lteq, :gt, :gteq + a = Functions.number(a) unless a_type == :number + b = Functions.number(b) unless b_type == :number + else + message = "[BUG] Unexpected compare operator: " + + "<#{operator.inspect}>: <#{a.inspect}>: <#{b.inspect}>" + raise message end - return false + [a, b] end - def compare a, op, b - case op + def compare(a, operator, b) + a, b = normalize_compare_values(a, operator, b) + case operator when :eq a == b when :neq @@ -692,13 +932,47 @@ def compare a, op, b a > b when :gteq a >= b - when :and - a and b - when :or - a or b else - false + message = "[BUG] Unexpected compare operator: " + + "<#{operator.inspect}>: <#{a.inspect}>: <#{b.inspect}>" + raise message + end + end + + def each_unnode(nodeset) + return to_enum(__method__, nodeset) unless block_given? + nodeset.each do |node| + if node.is_a?(XPathNode) + unnoded = node.raw_node + else + unnoded = node + end + yield(unnoded) + end + end + + def unnode(nodeset) + each_unnode(nodeset).collect do |unnoded| + unnoded = yield(unnoded) if block_given? + unnoded + end + end + end + + # @private + class XPathNode + attr_reader :raw_node, :context + def initialize(node, context=nil) + if node.is_a?(XPathNode) + @raw_node = node.raw_node + else + @raw_node = node end + @context = context || {} + end + + def position + @context[:position] end end end diff --git a/test/rexml/data/much_ado.xml b/test/rexml/data/much_ado.xml index f008fadbb0..0040088c9c 100644 --- a/test/rexml/data/much_ado.xml +++ b/test/rexml/data/much_ado.xml @@ -4735,7 +4735,7 @@ CLAUDIO, BENEDICK, HERO, BEATRICE, and Attendants</STAGEDIR> <LINE>But they shall find, awaked in such a kind,</LINE> <LINE>Both strength of limb and policy of mind,</LINE> <LINE>Ability in means and choice of friends,</LINE> -<LINE>To quit me of them throughly.</LINE> +<LINE>To quit me of them thoroughly.</LINE> </SPEECH> <SPEECH> diff --git a/test/rexml/data/ofbiz-issues-full-177.xml b/test/rexml/data/ofbiz-issues-full-177.xml index bfff771d12..e1f7bdfddc 100644 --- a/test/rexml/data/ofbiz-issues-full-177.xml +++ b/test/rexml/data/ofbiz-issues-full-177.xml @@ -152,8 +152,8 @@ <!-- desc : Short description for attachment. --> <!-- ispatch : Whether attachment is a patch file. --> <!-- filename : Filename of attachment. --> - <!-- submitter_id : Issuezilla ID of attachement submitter. --> - <!-- submitting_username : username of attachement submitter. --> + <!-- submitter_id : Issuezilla ID of attachment submitter. --> + <!-- submitting_username : username of attachment submitter. --> <!-- data : Encoded attachment. --> <!-- attachment_iz_url : URL to attachment in iz. --> diff --git a/test/rexml/data/t75.xml b/test/rexml/data/t75.xml index 0911fb1b1a..eb3cccee4b 100644 --- a/test/rexml/data/t75.xml +++ b/test/rexml/data/t75.xml @@ -1,4 +1,4 @@ -<?xml version="1.0" encoding="ISO-8859-1"?><?pos="3"?> +<?xml version="1.0" encoding="ISO-8859-1"?> <!-- generated by hnb 1.9.17 (http://hnb.sourceforge.net) --> <!DOCTYPE tree[ diff --git a/test/rexml/data/test/tests.xml b/test/rexml/data/test/tests.xml index cf03b42b0b..fd415679c4 100644 --- a/test/rexml/data/test/tests.xml +++ b/test/rexml/data/test/tests.xml @@ -299,7 +299,7 @@ <valueOf select="name(/.)"></valueOf> <valueOf select="name(/self::node())"></valueOf> - <!-- name of root elemet --> + <!-- name of root element --> <valueOf select="name(node())">web-app</valueOf> <valueOf select="name(/node())">web-app</valueOf> <valueOf select="name(/*)">web-app</valueOf> @@ -318,7 +318,7 @@ <valueOf select="name(parent::node())"></valueOf> <valueOf select="name(parent::*)"></valueOf> - <!-- name of root elemet --> + <!-- name of root element --> <valueOf select="name()">web-app</valueOf> <valueOf select="name(.)">web-app</valueOf> <valueOf select="name(../*)">web-app</valueOf> diff --git a/test/rexml/data/tutorial.xml b/test/rexml/data/tutorial.xml index bf5783d09a..9c4639b948 100644 --- a/test/rexml/data/tutorial.xml +++ b/test/rexml/data/tutorial.xml @@ -286,7 +286,7 @@ el1 << Text.new(" cruel world") strings.</p> <p>I can't emphasize this enough, because people do have problems with - this. REXML can't possibly alway guess correctly how your text is + this. REXML can't possibly always guess correctly how your text is encoded, so it always assumes the text is UTF-8. It also does not warn you when you try to add text which isn't properly encoded, for the same reason. You must make sure that you are adding UTF-8 text. diff --git a/test/rexml/formatter/test_default.rb b/test/rexml/formatter/test_default.rb new file mode 100644 index 0000000000..aa403dbed6 --- /dev/null +++ b/test/rexml/formatter/test_default.rb @@ -0,0 +1,17 @@ +module REXMLTests + class DefaultFormatterTest < Test::Unit::TestCase + def format(node) + formatter = REXML::Formatters::Default.new + output = +"" + formatter.write(node, output) + output + end + + class InstructionTest < self + def test_content_nil + instruction = REXML::Instruction.new("target") + assert_equal("<?target?>", format(instruction)) + end + end + end +end diff --git a/test/rexml/functions/test_base.rb b/test/rexml/functions/test_base.rb new file mode 100644 index 0000000000..daa38156f8 --- /dev/null +++ b/test/rexml/functions/test_base.rb @@ -0,0 +1,283 @@ +# frozen_string_literal: false +require "test/unit/testcase" + +require "rexml/document" + +# TODO: Split me +module REXMLTests + class FunctionsTester < Test::Unit::TestCase + include REXML + + def setup + super + REXML::Functions.context = nil + end + + def test_functions + # trivial text() test + # confuse-a-function + source = "<a>more <b id='1'/><b id='2'>dumb</b><b id='3'/><c/> text</a>" + doc = Document.new source + res = "" + XPath::each(doc.root, "text()") {|val| res << val.to_s} + assert_equal "more text", res + + res = XPath::first(doc.root, "b[last()]") + assert_equal '3', res.attributes['id'] + res = XPath::first(doc.root, "b[position()=2]") + assert_equal '2', res.attributes['id'] + res = XPath::first(doc.root, "*[name()='c']") + assert_equal "c", res.name + end + + # Contributed by Mike Stok + def test_starts_with + source = <<-EOF + <foo> + <a href="mailto:a@b.c">a@b.c</a> + <a href="http://www.foo.com">http://www.foo.com</a> + </foo> + EOF + doc = Document.new source + mailtos = doc.elements.to_a("//a[starts-with(@href, 'mailto:')]") + assert_equal 1, mailtos.size + assert_equal "mailto:a@b.c", mailtos[0].attributes['href'] + + ailtos = doc.elements.to_a("//a[starts-with(@href, 'ailto:')]") + assert_equal 0, ailtos.size + end + + def test_string_length + doc = Document.new <<-EOF + <AAA> + <Q/> + <SSSS/> + <BB/> + <CCC/> + <DDDDDDDD/> + <EEEE/> + </AAA> + EOF + assert doc, "create doc" + + set = doc.elements.to_a("//*[string-length(name()) = 3]") + assert_equal 2, set.size, "nodes with names length = 3" + + set = doc.elements.to_a("//*[string-length(name()) < 3]") + assert_equal 2, set.size, "nodes with names length < 3" + + set = doc.elements.to_a("//*[string-length(name()) > 3]") + assert_equal 3, set.size, "nodes with names length > 3" + end + + # Test provided by Mike Stok + def test_contains + source = <<-EOF + <foo> + <a href="mailto:a@b.c">a@b.c</a> + <a href="http://www.foo.com">http://www.foo.com</a> + </foo> + EOF + doc = Document.new source + + [['o', 2], ['foo', 1], ['bar', 0]].each { |test| + search, expected = test + set = doc.elements.to_a("//a[contains(@href, '#{search}')]") + assert_equal expected, set.size + } + end + + # Mike Stok and Sean Russell + def test_substring + # examples from http://www.w3.org/TR/xpath#function-substring + doc = Document.new('<test string="12345" />') + + #puts XPath.first(d, 'node()[0 + 1]') + #d = Document.new("<a b='1'/>") + #puts XPath.first(d, 'a[0 mod 0]') + [ [1.5, 2.6, '234'], + [0, 3, '12'], + [0, '0 div 0', ''], + [1, '0 div 0', ''], + ['-42', '1 div 0', '12345'], + ['-1 div 0', '1 div 0', ''] + ].each { |start, length, expected| + set = doc.elements.to_a("//test[substring(@string, #{start}, #{length}) = '#{expected}']") + assert_equal 1, set.size, "#{start}, #{length}, '#{expected}'" + } + end + + def test_substring_angrez + testString = REXML::Functions::substring_after("helloworld","hello") + assert_equal( 'world', testString ) + end + + def test_translate + source = <<-EOF + <doc> + <case name='w3c one' result='BAr' /> <!-- w3c --> + <case name='w3c two' result='AAA' /> <!-- w3c --> + <case name='alchemy' result="gold" /> <!-- mike --> + <case name='vbxml one' result='A Space Odyssey' /> + <case name='vbxml two' result='AbCdEf' /> + </doc> + EOF + + doc = Document.new(source) + + [ ['bar', 'abc', 'ABC', 'w3c one'], + ['--aaa--','abc-','ABC', 'w3c two'], + ['lead', 'dear language', 'doll groover', 'alchemy'], + ['A Space Odissei', 'i', 'y', 'vbxml one'], + ['abcdefg', 'aceg', 'ACE', 'vbxml two'], + ].each { |arg1, arg2, arg3, name| + translate = "translate('#{arg1}', '#{arg2}', '#{arg3}')" + set = doc.elements.to_a("//case[@result = #{translate}]") + assert_equal 1, set.size, translate + assert_equal name, set[0].attributes['name'] + } + end + + def test_name + d = REXML::Document.new("<a xmlns:x='foo'><b/><x:b/></a>") + assert_equal 1, d.root.elements.to_a('*[name() = "b"]').size + assert_equal 1, d.elements.to_a('//*[name() = "x:b"]').size + end + + def test_local_name + d = REXML::Document.new("<a xmlns:x='foo'><b/><x:b/></a>") + assert_equal 2, d.root.elements.to_a('*[local_name() = "b"]').size + assert_equal 2, d.elements.to_a('//*[local_name() = "b"]').size + end + + def test_substring2 + doc = Document.new('<test string="12345" />') + assert_equal(1,doc.elements.to_a("//test[substring(@string,2)='2345']").size) + end + + # Submitted by Kouhei + def test_floor_ceiling_round + source = "<a><b id='1'/><b id='2'/><b id='3'/></a>" + doc = REXML::Document.new(source) + + id_1 = doc.elements["/a/b[@id='1']"] + id_2 = doc.elements["/a/b[@id='2']"] + id_3 = doc.elements["/a/b[@id='3']"] + + good = { + "floor" => [[], [id_1], [id_2], [id_3]], + "ceiling" => [[id_1], [id_2], [id_3], []], + "round" => [[id_1], [id_2], [id_3], []] + } + good.each do |key, value| + (0..3).each do |i| + xpath = "//b[number(@id) = #{key}(#{i+0.5})]" + assert_equal(value[i], REXML::XPath.match(doc, xpath)) + end + end + + good["round"] = [[], [id_1], [id_2], [id_3]] + good.each do |key, value| + (0..3).each do |i| + xpath = "//b[number(@id) = #{key}(#{i+0.4})]" + assert_equal(value[i], REXML::XPath.match(doc, xpath)) + end + end + end + + # Submitted by Kou + def test_lang + d = Document.new(<<-XML) + <a xml:lang="en"> + <b xml:lang="ja"> + <c xml:lang="fr"/> + <d/> + <e xml:lang="ja-JP"/> + <f xml:lang="en-US"/> + </b> + </a> + XML + + assert_equal(1, d.elements.to_a("//*[lang('fr')]").size) + assert_equal(3, d.elements.to_a("//*[lang('ja')]").size) + assert_equal(2, d.elements.to_a("//*[lang('en')]").size) + assert_equal(1, d.elements.to_a("//*[lang('en-us')]").size) + + d = Document.new(<<-XML) + <root> + <para xml:lang="en"/> + <div xml:lang="en"><para/></div> + <para xml:lang="EN"/> + <para xml:lang="en-us"/> + </root> + XML + + assert_equal(5, d.elements.to_a("//*[lang('en')]").size) + end + + def test_ticket_60 + document = REXML::Document.new("<a><b>A</b><b>1</b></a>") + assert_equal( "A", REXML::XPath.first(document, '//b[.="A"]').text ) + assert_equal( "1", REXML::XPath.first(document, '//b[.="1"]').text ) + end + + def test_normalize_space + source = "<a><!--COMMENT A--><b><!-- COMMENT A --></b></a>" + doc = REXML::Document.new(source) + predicate = "string(.)=normalize_space('\nCOMMENT \n A \n\n ')" + m = REXML::XPath.match(doc, "//comment()[#{predicate}]") + assert_equal( [REXML::Comment.new("COMMENT A")], m ) + end + + def test_normalize_space_strings + source = <<-XML +<a><b>breakfast boosts\t\t + +concentration </b><c> +Coffee beans + aroma + + + +</c><d> Dessert + \t\t after dinner</d></a> + XML + normalized_texts = REXML::XPath.each(REXML::Document.new(source), "normalize-space(//text())").to_a + assert_equal([ + "breakfast boosts concentration", + "Coffee beans aroma", + "Dessert after dinner", + ], + normalized_texts) + end + + def test_string_nil_without_context + doc = REXML::Document.new(<<~XML) + <?xml version="1.0" encoding="UTF-8"?> + <root> + <foo bar="baz"/> + <foo bar=""/> + </root> + XML + + assert_equal([doc.root.elements[2]], + REXML::XPath.match(doc, + "//foo[@bar=$n]", + nil, + {"n" => nil})) + end + + def test_unregistered_method + doc = Document.new("<root/>") + assert_nil(XPath::first(doc.root, "to_s()")) + end + + def test_nonexistent_function + doc = Document.new("<root><nonexistent/></root>") + # TODO: Maybe, this is not XPath spec behavior. + # This behavior must be reconsidered. + assert_equal(doc.root.elements[1], + XPath::first(doc.root, "nonexistent()")) + end + end +end diff --git a/test/rexml/functions/test_boolean.rb b/test/rexml/functions/test_boolean.rb new file mode 100644 index 0000000000..b3e2117c10 --- /dev/null +++ b/test/rexml/functions/test_boolean.rb @@ -0,0 +1,75 @@ +# frozen_string_literal: false + +require "test/unit" +require "rexml/document" +require "rexml/functions" + +module REXMLTests + class TestFunctionsBoolean < Test::Unit::TestCase + def setup + REXML::Functions.context = nil + end + + def test_true + assert_equal(true, REXML::Functions.boolean(true)) + end + + def test_false + assert_equal(false, REXML::Functions.boolean(false)) + end + + def test_integer_true + assert_equal(true, REXML::Functions.boolean(1)) + end + + def test_integer_positive_zero + assert_equal(false, REXML::Functions.boolean(0)) + end + + def test_integer_negative_zero + assert_equal(false, REXML::Functions.boolean(-0)) + end + + def test_float_true + assert_equal(true, REXML::Functions.boolean(1.1)) + end + + def test_float_positive_zero + assert_equal(false, REXML::Functions.boolean(-0.0)) + end + + def test_float_negative_zero + assert_equal(false, REXML::Functions.boolean(-0.0)) + end + + def test_float_nan + assert_equal(false, REXML::Functions.boolean(Float::NAN)) + end + + def test_string_true + assert_equal(true, REXML::Functions.boolean("content")) + end + + def test_string_empty + assert_equal(false, REXML::Functions.boolean("")) + end + + def test_node_set_true + root = REXML::Document.new("<root/>").root + assert_equal(true, REXML::Functions.boolean([root])) + end + + def test_node_set_empty + assert_equal(false, REXML::Functions.boolean([])) + end + + def test_nil + assert_equal(false, REXML::Functions.boolean(nil)) + end + + def test_context + REXML::Functions.context = {node: true} + assert_equal(true, REXML::Functions.boolean()) + end + end +end diff --git a/test/rexml/functions/test_local_name.rb b/test/rexml/functions/test_local_name.rb new file mode 100644 index 0000000000..97c9e74852 --- /dev/null +++ b/test/rexml/functions/test_local_name.rb @@ -0,0 +1,44 @@ +# frozen_string_literal: false + +require "test/unit" +require "rexml/document" +require "rexml/functions" + +module REXMLTests + class TestFunctionsLocalName < Test::Unit::TestCase + def setup + REXML::Functions.context = nil + end + + def test_one + document = REXML::Document.new(<<-XML) +<root xmlns:x="http://example.com/x/"> + <x:child/> +</root> + XML + node_set = document.root.children + assert_equal("child", REXML::Functions.local_name(node_set)) + end + + def test_multiple + document = REXML::Document.new(<<-XML) +<root xmlns:x="http://example.com/x/"> + <x:child1/> + <x:child2/> +</root> + XML + node_set = document.root.children + assert_equal("child1", REXML::Functions.local_name(node_set)) + end + + def test_nonexistent + assert_equal("", REXML::Functions.local_name([])) + end + + def test_context + document = REXML::Document.new("<root/>") + REXML::Functions.context = {node: document.root} + assert_equal("root", REXML::Functions.local_name()) + end + end +end diff --git a/test/rexml/functions/test_number.rb b/test/rexml/functions/test_number.rb new file mode 100644 index 0000000000..16e635701c --- /dev/null +++ b/test/rexml/functions/test_number.rb @@ -0,0 +1,38 @@ +# frozen_string_literal: false + +require "test/unit" +require "rexml/document" +require "rexml/functions" + +module REXMLTests + class TestFunctionsNumber < Test::Unit::TestCase + def setup + REXML::Functions.context = nil + end + + def test_true + assert_equal(1, REXML::Functions.number(true)) + end + + def test_false + assert_equal(0, REXML::Functions.number(false)) + end + + def test_numeric + assert_equal(29, REXML::Functions.number(29)) + end + + def test_string_integer + assert_equal(100, REXML::Functions.number("100")) + end + + def test_string_float + assert_equal(-9.13, REXML::Functions.number("-9.13")) + end + + def test_node_set + root = REXML::Document.new("<root>100</root>").root + assert_equal(100, REXML::Functions.number([root])) + end + end +end diff --git a/test/rexml/helper.rb b/test/rexml/helper.rb new file mode 100644 index 0000000000..3de1327635 --- /dev/null +++ b/test/rexml/helper.rb @@ -0,0 +1,35 @@ +# frozen_string_literal: false + +require "test-unit" + +require "rexml/document" + +module Helper + module Fixture + def fixture_path(*components) + File.join(__dir__, "data", *components) + end + end + + module Global + def suppress_warning + verbose = $VERBOSE + begin + $VERBOSE = nil + yield + ensure + $VERBOSE = verbose + end + end + + def with_default_internal(encoding) + default_internal = Encoding.default_internal + begin + suppress_warning {Encoding.default_internal = encoding} + yield + ensure + suppress_warning {Encoding.default_internal = default_internal} + end + end + end +end diff --git a/test/rexml/parse/test_attribute_list_declaration.rb b/test/rexml/parse/test_attribute_list_declaration.rb new file mode 100644 index 0000000000..4388252846 --- /dev/null +++ b/test/rexml/parse/test_attribute_list_declaration.rb @@ -0,0 +1,30 @@ +require "test/unit" +require "core_assertions" + +require "rexml/document" + +module REXMLTests + class TestParseAttributeListDeclaration < Test::Unit::TestCase + include Test::Unit::CoreAssertions + + def test_linear_performance_space + seq = [10000, 50000, 100000, 150000, 200000] + assert_linear_performance(seq, rehearsal: 10) do |n| + REXML::Document.new("<!DOCTYPE schema SYSTEM \"foo.dtd\" [<!ATTLIST " + + " " * n + + " root v CDATA #FIXED \"test\">]>") + end + end + + def test_linear_performance_tab_and_gt + seq = [10000, 50000, 100000, 150000, 200000] + assert_linear_performance(seq, rehearsal: 10) do |n| + REXML::Document.new("<!DOCTYPE root [<!ATTLIST " + + "\t" * n + + "root value CDATA \"" + + ">" * n + + "\">]>") + end + end + end +end diff --git a/test/rexml/parse/test_cdata.rb b/test/rexml/parse/test_cdata.rb new file mode 100644 index 0000000000..b5f1a3bc47 --- /dev/null +++ b/test/rexml/parse/test_cdata.rb @@ -0,0 +1,17 @@ +require "test/unit" +require "core_assertions" + +require "rexml/document" + +module REXMLTests + class TestParseCData < Test::Unit::TestCase + include Test::Unit::CoreAssertions + + def test_linear_performance_gt + seq = [10000, 50000, 100000, 150000, 200000] + assert_linear_performance(seq, rehearsal: 10) do |n| + REXML::Document.new('<description><![CDATA[ ' + ">" * n + ' ]]></description>') + end + end + end +end diff --git a/test/rexml/parse/test_character_reference.rb b/test/rexml/parse/test_character_reference.rb new file mode 100644 index 0000000000..bf8d21903e --- /dev/null +++ b/test/rexml/parse/test_character_reference.rb @@ -0,0 +1,17 @@ +require "test/unit" +require "core_assertions" + +require "rexml/document" + +module REXMLTests + class TestParseCharacterReference < Test::Unit::TestCase + include Test::Unit::CoreAssertions + + def test_linear_performance_many_preceding_zeros + seq = [10000, 50000, 100000, 150000, 200000] + assert_linear_performance(seq, rehearsal: 10) do |n| + REXML::Document.new('<test testing="&#' + "0" * n + '97;"/>') + end + end + end +end diff --git a/test/rexml/parse/test_comment.rb b/test/rexml/parse/test_comment.rb new file mode 100644 index 0000000000..4475dca753 --- /dev/null +++ b/test/rexml/parse/test_comment.rb @@ -0,0 +1,151 @@ +require "test/unit" +require "core_assertions" + +require "rexml/document" + +module REXMLTests + class TestParseComment < Test::Unit::TestCase + include Test::Unit::CoreAssertions + + def parse(xml) + REXML::Document.new(xml) + end + + class TestInvalid < self + def test_toplevel_unclosed_comment + exception = assert_raise(REXML::ParseException) do + parse("<!--") + end + assert_equal(<<~DETAIL, exception.to_s) + Unclosed comment + Line: 1 + Position: 4 + Last 80 unconsumed characters: + DETAIL + end + + def test_toplevel_malformed_comment_inner + exception = assert_raise(REXML::ParseException) do + parse("<!-- -- -->") + end + assert_equal(<<~DETAIL, exception.to_s) + Malformed comment + Line: 1 + Position: 11 + Last 80 unconsumed characters: + DETAIL + end + + def test_toplevel_malformed_comment_end + exception = assert_raise(REXML::ParseException) do + parse("<!-- --->") + end + assert_equal(<<~DETAIL, exception.to_s) + Malformed comment + Line: 1 + Position: 9 + Last 80 unconsumed characters: + DETAIL + end + + def test_doctype_malformed_comment_inner + exception = assert_raise(REXML::ParseException) do + parse("<!DOCTYPE foo [<!-- -- -->") + end + assert_equal(<<~DETAIL, exception.to_s) + Malformed comment + Line: 1 + Position: 26 + Last 80 unconsumed characters: + DETAIL + end + + def test_doctype_malformed_comment_end + exception = assert_raise(REXML::ParseException) do + parse("<!DOCTYPE foo [<!-- --->") + end + assert_equal(<<~DETAIL, exception.to_s) + Malformed comment + Line: 1 + Position: 24 + Last 80 unconsumed characters: + DETAIL + end + + def test_after_doctype_malformed_comment_short + exception = assert_raise(REXML::ParseException) do + parse("<a><!-->") + end + assert_equal(<<~DETAIL.chomp, exception.to_s) + Malformed comment + Line: 1 + Position: 8 + Last 80 unconsumed characters: + --> + DETAIL + end + + def test_after_doctype_malformed_comment_inner + exception = assert_raise(REXML::ParseException) do + parse("<a><!-- -- -->") + end + assert_equal(<<~DETAIL, exception.to_s) + Malformed comment + Line: 1 + Position: 14 + Last 80 unconsumed characters: + DETAIL + end + + def test_after_doctype_malformed_comment_end + exception = assert_raise(REXML::ParseException) do + parse("<a><!-- --->") + end + assert_equal(<<~DETAIL, exception.to_s) + Malformed comment + Line: 1 + Position: 12 + Last 80 unconsumed characters: + DETAIL + end + end + + def test_before_root + parser = REXML::Parsers::BaseParser.new('<!-- ok comment --><a></a>') + + events = {} + while parser.has_next? + event = parser.pull + events[event[0]] = event[1] + end + + assert_equal(" ok comment ", events[:comment]) + end + + def test_after_root + parser = REXML::Parsers::BaseParser.new('<a></a><!-- ok comment -->') + + events = {} + while parser.has_next? + event = parser.pull + events[event[0]] = event[1] + end + + assert_equal(" ok comment ", events[:comment]) + end + + def test_linear_performance_top_level_gt + seq = [10000, 50000, 100000, 150000, 200000] + assert_linear_performance(seq, rehearsal: 10) do |n| + REXML::Document.new('<!-- ' + ">" * n + ' -->') + end + end + + def test_linear_performance_in_element_gt + seq = [10000, 50000, 100000, 150000, 200000] + assert_linear_performance(seq, rehearsal: 10) do |n| + REXML::Document.new('<xml><!-- ' + '>' * n + ' --></xml>') + end + end + end +end diff --git a/test/rexml/parse/test_document_type_declaration.rb b/test/rexml/parse/test_document_type_declaration.rb index 55713909e7..99c23745a6 100644 --- a/test/rexml/parse/test_document_type_declaration.rb +++ b/test/rexml/parse/test_document_type_declaration.rb @@ -1,9 +1,13 @@ # frozen_string_literal: false require "test/unit" +require "core_assertions" + require "rexml/document" module REXMLTests class TestParseDocumentTypeDeclaration < Test::Unit::TestCase + include Test::Unit::CoreAssertions + private def parse(doctype) REXML::Document.new(<<-XML).doctype @@ -36,6 +40,66 @@ def test_garbage_plus_before_name_at_line_start + r SYSTEM "urn:x-rexml:test" [ ]> <r/> DETAIL end + + def test_no_name + exception = assert_raise(REXML::ParseException) do + parse(<<-DOCTYPE) +<!DOCTYPE> + DOCTYPE + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed DOCTYPE: name is missing +Line: 3 +Position: 17 +Last 80 unconsumed characters: +<!DOCTYPE> <r/> + DETAIL + end + end + + class TestUnclosed < self + def test_no_extra_node + exception = assert_raise(REXML::ParseException) do + REXML::Document.new("<!DOCTYPE foo [") + end + assert_equal(<<~DETAIL.chomp, exception.to_s) + Malformed DOCTYPE: unclosed + Line: 1 + Position: 15 + Last 80 unconsumed characters: + + DETAIL + end + + def test_start_element + exception = assert_raise(REXML::ParseException) do + REXML::Document.new(<<~DOCTYPE) + <!DOCTYPE foo [ <r> + DOCTYPE + end + assert_equal(<<~DETAIL.chomp, exception.to_s) + Malformed DOCTYPE: invalid declaration + Line: 1 + Position: 20 + Last 80 unconsumed characters: + <r>#{' '} + DETAIL + end + + def test_text + exception = assert_raise(REXML::ParseException) do + REXML::Document.new(<<~DOCTYPE) + <!DOCTYPE foo [ text + DOCTYPE + end + assert_equal(<<~DETAIL.chomp, exception.to_s) + Malformed DOCTYPE: invalid declaration + Line: 1 + Position: 21 + Last 80 unconsumed characters: + text#{' '} + DETAIL + end end class TestExternalID < self @@ -225,5 +289,29 @@ def parse(internal_subset) DOCTYPE end end + + def test_linear_performance_percent_gt + seq = [10000, 50000, 100000, 150000, 200000] + assert_linear_performance(seq, rehearsal: 10) do |n| + begin + REXML::Document.new("<!DOCTYPE root [" + "%>" * n + "]><test/>") + rescue + end + end + end + + def test_linear_performance_comment_gt + seq = [10000, 50000, 100000, 150000, 200000] + assert_linear_performance(seq, rehearsal: 10) do |n| + REXML::Document.new("<!DOCTYPE root [<!-- " + ">" * n + " -->]>") + end + end + + def test_linear_performance_external_entity_right_bracket_gt + seq = [10000, 50000, 100000, 150000, 200000] + assert_linear_performance(seq, rehearsal: 10) do |n| + REXML::Document.new("<!DOCTYPE root [%" + "]>" * n + ";]>") + end + end end end diff --git a/test/rexml/parse/test_element.rb b/test/rexml/parse/test_element.rb index e8dce4b997..ab4818dad9 100644 --- a/test/rexml/parse/test_element.rb +++ b/test/rexml/parse/test_element.rb @@ -1,8 +1,12 @@ require "test/unit" +require "core_assertions" + require "rexml/document" module REXMLTests class TestParseElement < Test::Unit::TestCase + include Test::Unit::CoreAssertions + def parse(xml) REXML::Document.new(xml) end @@ -43,7 +47,20 @@ def test_empty_namespace_attribute_name Line: 1 Position: 13 Last 80 unconsumed characters: +:a=""></x> + DETAIL + end + def test_empty_namespace_attribute_name_with_utf8_character + exception = assert_raise(REXML::ParseException) do + parse("<x :\xE2\x80\x8B>") # U+200B ZERO WIDTH SPACE + end + assert_equal(<<-DETAIL.chomp.force_encoding("ASCII-8BIT"), exception.to_s) +Invalid attribute name: <:\xE2\x80\x8B> +Line: 1 +Position: 8 +Last 80 unconsumed characters: +:\xE2\x80\x8B> DETAIL end @@ -72,6 +89,61 @@ def test_garbage_less_than_slash_before_end_tag_at_line_start </ </x> DETAIL end + + def test_after_root + exception = assert_raise(REXML::ParseException) do + parser = REXML::Parsers::BaseParser.new('<a></a><b>') + while parser.has_next? + parser.pull + end + end + + assert_equal(<<~DETAIL.chomp, exception.to_s) + Malformed XML: Extra tag at the end of the document (got '<b') + Line: 1 + Position: 10 + Last 80 unconsumed characters: + + DETAIL + end + + def test_after_empty_element_tag_root + exception = assert_raise(REXML::ParseException) do + parser = REXML::Parsers::BaseParser.new('<a/><b>') + while parser.has_next? + parser.pull + end + end + + assert_equal(<<~DETAIL.chomp, exception.to_s) + Malformed XML: Extra tag at the end of the document (got '<b') + Line: 1 + Position: 7 + Last 80 unconsumed characters: + + DETAIL + end + end + + def test_linear_performance_attribute_value_gt + seq = [10000, 50000, 100000, 150000, 200000] + assert_linear_performance(seq, rehearsal: 10) do |n| + REXML::Document.new('<test testing="' + ">" * n + '"></test>') + end + end + + def test_linear_performance_deep_same_name_attributes + seq = [100, 500, 1000, 1500, 2000] + assert_linear_performance(seq, rehearsal: 10) do |n| + xml = <<-XML +<?xml version="1.0"?> +<root xmlns:ns="ns-uri"> +#{"<x ns:name='ns-value' name='value'>\n" * n} +#{"</x>\n" * n} +</root> + XML + REXML::Document.new(xml) + end end end end diff --git a/test/rexml/parse/test_entity_declaration.rb b/test/rexml/parse/test_entity_declaration.rb new file mode 100644 index 0000000000..81d95b5868 --- /dev/null +++ b/test/rexml/parse/test_entity_declaration.rb @@ -0,0 +1,557 @@ +# frozen_string_literal: false +require "test/unit" +require "core_assertions" + +require "rexml/document" + +module REXMLTests + class TestParseEntityDeclaration < Test::Unit::TestCase + include Test::Unit::CoreAssertions + + private + def xml(internal_subset) + <<-XML +<!DOCTYPE r SYSTEM "urn:x-henrikmartensson:test" [ +#{internal_subset} +]> +<r/> + XML + end + + def parse(internal_subset) + REXML::Document.new(xml(internal_subset)).doctype + end + + public + + # https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-GEDecl + class TestGeneralEntityDeclaration < self + # https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-Name + class TestName < self + def test_prohibited_character + exception = assert_raise(REXML::ParseException) do + REXML::Document.new("<!DOCTYPE root [<!ENTITY invalid&name \"valid-entity-value\">]>") + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed entity declaration +Line: 1 +Position: 61 +Last 80 unconsumed characters: + invalid&name "valid-entity-value">]> + DETAIL + end + end + + # https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-EntityDef + class TestEntityDefinition < self + # https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-EntityValue + class TestEntityValue < self + def test_no_quote + exception = assert_raise(REXML::ParseException) do + REXML::Document.new("<!DOCTYPE root [<!ENTITY valid-name invalid-entity-value>]>") + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed entity declaration +Line: 1 +Position: 59 +Last 80 unconsumed characters: + valid-name invalid-entity-value>]> + DETAIL + end + + def test_prohibited_character + exception = assert_raise(REXML::ParseException) do + REXML::Document.new("<!DOCTYPE root [<!ENTITY valid-name \"% &\">]>") + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed entity declaration +Line: 1 +Position: 44 +Last 80 unconsumed characters: + valid-name "% &">]> + DETAIL + end + + def test_mixed_quote + exception = assert_raise(REXML::ParseException) do + REXML::Document.new("<!DOCTYPE root [<!ENTITY valid-name \"invalid-entity-value'>]>") + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed entity declaration +Line: 1 +Position: 61 +Last 80 unconsumed characters: + valid-name "invalid-entity-value'>]> + DETAIL + end + end + + # https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-ExternalID + class TestExternalID < self + # https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-SystemLiteral + class TestSystemLiteral < self + def test_no_quote_in_system + exception = assert_raise(REXML::ParseException) do + REXML::Document.new("<!DOCTYPE root [<!ENTITY valid-name SYSTEM invalid-system-literal>]>") + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed entity declaration +Line: 1 +Position: 68 +Last 80 unconsumed characters: + valid-name SYSTEM invalid-system-literal>]> + DETAIL + end + + def test_no_quote_in_public + exception = assert_raise(REXML::ParseException) do + REXML::Document.new("<!DOCTYPE root [<!ENTITY valid-name PUBLIC \"valid-pubid-literal\" invalid-system-literal>]>") + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed entity declaration +Line: 1 +Position: 90 +Last 80 unconsumed characters: + valid-name PUBLIC "valid-pubid-literal" invalid-system-literal>]> + DETAIL + end + + def test_mixed_quote_in_system + exception = assert_raise(REXML::ParseException) do + REXML::Document.new("<!DOCTYPE root [<!ENTITY valid-name SYSTEM 'invalid-system-literal\">]>") + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed entity declaration +Line: 1 +Position: 70 +Last 80 unconsumed characters: + valid-name SYSTEM 'invalid-system-literal">]> + DETAIL + end + + def test_mixed_quote_in_public + exception = assert_raise(REXML::ParseException) do + REXML::Document.new("<!DOCTYPE root [<!ENTITY valid-name PUBLIC \"valid-pubid-literal\" \"invalid-system-literal'>]>") + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed entity declaration +Line: 1 +Position: 92 +Last 80 unconsumed characters: + valid-name PUBLIC "valid-pubid-literal" "invalid-system-literal'>]> + DETAIL + end + + def test_no_literal_in_system + exception = assert_raise(REXML::ParseException) do + REXML::Document.new("<!DOCTYPE root [<!ENTITY valid-name SYSTEM>]>") + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed entity declaration +Line: 1 +Position: 45 +Last 80 unconsumed characters: + valid-name SYSTEM>]> + DETAIL + end + + def test_no_literal_in_public + exception = assert_raise(REXML::ParseException) do + REXML::Document.new("<!DOCTYPE root [<!ENTITY valid-name PUBLIC \"valid-pubid-literal\">]>") + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed entity declaration +Line: 1 +Position: 67 +Last 80 unconsumed characters: + valid-name PUBLIC "valid-pubid-literal">]> + DETAIL + end + end + + # https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-PubidLiteral + # https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-PubidChar + class TestPublicIDLiteral < self + def test_no_quote + exception = assert_raise(REXML::ParseException) do + REXML::Document.new("<!DOCTYPE root [<!ENTITY valid-name PUBLIC invalid-pubid-literal \"valid-system-literal\">]>") + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed entity declaration +Line: 1 +Position: 90 +Last 80 unconsumed characters: + valid-name PUBLIC invalid-pubid-literal "valid-system-literal">]> + DETAIL + end + + def test_prohibited_pubid_character + exception = assert_raise(REXML::ParseException) do + # U+3042 HIRAGANA LETTER A + REXML::Document.new("<!DOCTYPE root [<!ENTITY valid-name PUBLIC \"\u3042\" \"valid-system-literal\">]>") + end + assert_equal(<<-DETAIL.force_encoding('utf-8').chomp, exception.to_s.force_encoding('utf-8')) +Malformed entity declaration +Line: 1 +Position: 74 +Last 80 unconsumed characters: + valid-name PUBLIC "\u3042" "valid-system-literal">]> + DETAIL + end + + def test_mixed_quote + exception = assert_raise(REXML::ParseException) do + REXML::Document.new("<!DOCTYPE root [<!ENTITY valid-name PUBLIC \"invalid-pubid-literal' \"valid-system-literal\">]>") + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed entity declaration +Line: 1 +Position: 92 +Last 80 unconsumed characters: + valid-name PUBLIC "invalid-pubid-literal' "valid-system-literal">]> + DETAIL + end + + def test_no_literal + exception = assert_raise(REXML::ParseException) do + REXML::Document.new("<!DOCTYPE root [<!ENTITY valid-name PUBLIC>]>") + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed entity declaration +Line: 1 +Position: 45 +Last 80 unconsumed characters: + valid-name PUBLIC>]> + DETAIL + end + end + end + + # https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-NDataDecl + class TestNotationDataDeclaration < self + # https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-NameChar + def test_prohibited_character + exception = assert_raise(REXML::ParseException) do + REXML::Document.new("<!DOCTYPE root [<!ENTITY valid-name PUBLIC \"valid-pubid-literal\" \"valid-system-literal\" NDATA invalid&name>]>") + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed entity declaration +Line: 1 +Position: 109 +Last 80 unconsumed characters: + valid-name PUBLIC "valid-pubid-literal" "valid-system-literal" NDATA invalid&nam + DETAIL + end + end + + def test_entity_value_and_notation_data_declaration + exception = assert_raise(REXML::ParseException) do + REXML::Document.new("<!DOCTYPE root [<!ENTITY valid-name \"valid-entity-value\" NDATA valid-ndata-value>]>") + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed entity declaration +Line: 1 +Position: 83 +Last 80 unconsumed characters: + valid-name "valid-entity-value" NDATA valid-ndata-value>]> + DETAIL + end + end + + def test_no_space + exception = assert_raise(REXML::ParseException) do + REXML::Document.new("<!DOCTYPE root [<!ENTITY valid-namePUBLIC\"valid-pubid-literal\"\"valid-system-literal\"NDATAvalid-name>]>") + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed entity declaration +Line: 1 +Position: 102 +Last 80 unconsumed characters: + valid-namePUBLIC"valid-pubid-literal""valid-system-literal"NDATAvalid-name>]> + DETAIL + end + end + + # https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-PEDecl + class TestParsedEntityDeclaration < self + # https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-Name + class TestName < self + def test_prohibited_character + exception = assert_raise(REXML::ParseException) do + REXML::Document.new("<!DOCTYPE root [<!ENTITY % invalid&name \"valid-entity-value\">]>") + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed entity declaration +Line: 1 +Position: 63 +Last 80 unconsumed characters: + % invalid&name "valid-entity-value">]> + DETAIL + end + end + + # https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-PEDef + class TestParsedEntityDefinition < self + # https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-EntityValue + class TestEntityValue < self + def test_no_quote + exception = assert_raise(REXML::ParseException) do + REXML::Document.new("<!DOCTYPE root [<!ENTITY % valid-name invalid-entity-value>]>") + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed entity declaration +Line: 1 +Position: 61 +Last 80 unconsumed characters: + % valid-name invalid-entity-value>]> + DETAIL + end + + def test_prohibited_character + exception = assert_raise(REXML::ParseException) do + REXML::Document.new("<!DOCTYPE root [<!ENTITY % valid-name \"% &\">]>") + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed entity declaration +Line: 1 +Position: 46 +Last 80 unconsumed characters: + % valid-name "% &">]> + DETAIL + end + + def test_mixed_quote + exception = assert_raise(REXML::ParseException) do + REXML::Document.new("<!DOCTYPE root [<!ENTITY % valid-name 'invalid-entity-value\">]>") + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed entity declaration +Line: 1 +Position: 63 +Last 80 unconsumed characters: + % valid-name 'invalid-entity-value">]> + DETAIL + end + end + + # https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-ExternalID + class TestExternalID < self + # https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-SystemLiteral + class TestSystemLiteral < self + def test_no_quote_in_system + exception = assert_raise(REXML::ParseException) do + REXML::Document.new("<!DOCTYPE root [<!ENTITY % valid-name SYSTEM invalid-system-literal>]>") + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed entity declaration +Line: 1 +Position: 70 +Last 80 unconsumed characters: + % valid-name SYSTEM invalid-system-literal>]> + DETAIL + end + + def test_no_quote_in_public + exception = assert_raise(REXML::ParseException) do + REXML::Document.new("<!DOCTYPE root [<!ENTITY % valid-name PUBLIC \"valid-pubid-literal\" invalid-system-literal>]>") + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed entity declaration +Line: 1 +Position: 92 +Last 80 unconsumed characters: + % valid-name PUBLIC "valid-pubid-literal" invalid-system-literal>]> + DETAIL + end + + def test_mixed_quote_in_system + exception = assert_raise(REXML::ParseException) do + REXML::Document.new("<!DOCTYPE root [<!ENTITY % valid-name SYSTEM \"invalid-system-literal'>]>") + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed entity declaration +Line: 1 +Position: 72 +Last 80 unconsumed characters: + % valid-name SYSTEM "invalid-system-literal'>]> + DETAIL + end + + def test_mixed_quote_in_public + exception = assert_raise(REXML::ParseException) do + REXML::Document.new("<!DOCTYPE root [<!ENTITY % valid-name PUBLIC \"valid-pubid-literal\" 'invalid-system-literal\">]>") + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed entity declaration +Line: 1 +Position: 94 +Last 80 unconsumed characters: + % valid-name PUBLIC "valid-pubid-literal" 'invalid-system-literal">]> + DETAIL + end + + def test_no_literal_in_system + exception = assert_raise(REXML::ParseException) do + REXML::Document.new("<!DOCTYPE root [<!ENTITY % valid-name SYSTEM>]>") + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed entity declaration +Line: 1 +Position: 47 +Last 80 unconsumed characters: + % valid-name SYSTEM>]> + DETAIL + end + + def test_no_literal_in_public + exception = assert_raise(REXML::ParseException) do + REXML::Document.new("<!DOCTYPE root [<!ENTITY % valid-name PUBLIC \"valid-pubid-literal\">]>") + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed entity declaration +Line: 1 +Position: 69 +Last 80 unconsumed characters: + % valid-name PUBLIC "valid-pubid-literal">]> + DETAIL + end + end + + # https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-PubidLiteral + # https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-PubidChar + class TestPublicIDLiteral < self + def test_no_quote + exception = assert_raise(REXML::ParseException) do + REXML::Document.new("<!DOCTYPE root [<!ENTITY % valid-name PUBLIC invalid-pubid-literal \"valid-system-literal\">]>") + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed entity declaration +Line: 1 +Position: 92 +Last 80 unconsumed characters: + % valid-name PUBLIC invalid-pubid-literal "valid-system-literal">]> + DETAIL + end + + def test_prohibited_pubid_character + exception = assert_raise(REXML::ParseException) do + # U+3042 HIRAGANA LETTER A + REXML::Document.new("<!DOCTYPE root [<!ENTITY % valid-name PUBLIC \"\u3042\" \"valid-system-literal\">]>") + end + assert_equal(<<-DETAIL.force_encoding('utf-8').chomp, exception.to_s.force_encoding('utf-8')) +Malformed entity declaration +Line: 1 +Position: 76 +Last 80 unconsumed characters: + % valid-name PUBLIC "\u3042" "valid-system-literal">]> + DETAIL + end + + def test_mixed_quote + exception = assert_raise(REXML::ParseException) do + REXML::Document.new("<!DOCTYPE root [<!ENTITY % valid-name PUBLIC 'invalid-pubid-literal\" \"valid-system-literal\">]>") + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed entity declaration +Line: 1 +Position: 94 +Last 80 unconsumed characters: + % valid-name PUBLIC 'invalid-pubid-literal" "valid-system-literal">]> + DETAIL + end + + def test_no_literal + exception = assert_raise(REXML::ParseException) do + REXML::Document.new("<!DOCTYPE root [<!ENTITY % valid-name PUBLIC>]>") + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed entity declaration +Line: 1 +Position: 47 +Last 80 unconsumed characters: + % valid-name PUBLIC>]> + DETAIL + end + end + end + + def test_entity_value_and_notation_data_declaration + exception = assert_raise(REXML::ParseException) do + REXML::Document.new("<!DOCTYPE root [<!ENTITY % valid-name \"valid-entity-value\" NDATA valid-ndata-value>]>") + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed entity declaration +Line: 1 +Position: 85 +Last 80 unconsumed characters: + % valid-name "valid-entity-value" NDATA valid-ndata-value>]> + DETAIL + end + end + + def test_no_space + exception = assert_raise(REXML::ParseException) do + REXML::Document.new("<!DOCTYPE root [<!ENTITY %valid-nameSYSTEM\"valid-system-literal\">]>") + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed entity declaration +Line: 1 +Position: 67 +Last 80 unconsumed characters: + %valid-nameSYSTEM"valid-system-literal">]> + DETAIL + end + end + + def test_empty + exception = assert_raise(REXML::ParseException) do + parse(<<-INTERNAL_SUBSET) +<!ENTITY> + INTERNAL_SUBSET + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed entity declaration +Line: 5 +Position: 70 +Last 80 unconsumed characters: +> ]> <r/> + DETAIL + end + + def test_linear_performance_entity_value_gt + seq = [10000, 50000, 100000, 150000, 200000] + assert_linear_performance(seq, rehearsal: 10) do |n| + REXML::Document.new("<!DOCTYPE rubynet [<!ENTITY rbconfig.ruby_version \"" + + ">" * n + + "\">]>") + end + end + + def test_linear_performance_entity_value_gt_right_bracket + seq = [10000, 50000, 100000, 150000, 200000] + assert_linear_performance(seq, rehearsal: 10) do |n| + REXML::Document.new("<!DOCTYPE rubynet [<!ENTITY rbconfig.ruby_version \"" + + ">]" * n + + "\">]>") + end + end + + def test_linear_performance_system_literal_in_system_gt_right_bracket + seq = [10000, 50000, 100000, 150000, 200000] + assert_linear_performance(seq, rehearsal: 10) do |n| + REXML::Document.new("<!DOCTYPE rubynet [<!ENTITY rbconfig.ruby_version SYSTEM \"" + + ">]" * n + + "\">]>") + end + end + + def test_linear_performance_system_literal_in_public_gt_right_bracket + seq = [10000, 50000, 100000, 150000, 200000] + assert_linear_performance(seq, rehearsal: 10) do |n| + REXML::Document.new("<!DOCTYPE rubynet [<!ENTITY rbconfig.ruby_version PUBLIC \"pubid-literal\" \"" + + ">]" * n + + "\">]>") + end + end + end +end diff --git a/test/rexml/parse/test_notation_declaration.rb b/test/rexml/parse/test_notation_declaration.rb index 19a0536d0a..9e81b6a428 100644 --- a/test/rexml/parse/test_notation_declaration.rb +++ b/test/rexml/parse/test_notation_declaration.rb @@ -35,7 +35,7 @@ def test_no_name Line: 5 Position: 72 Last 80 unconsumed characters: - <!NOTATION> ]> <r/> +<!NOTATION> ]> <r/> DETAIL end diff --git a/test/rexml/parse/test_processing_instruction.rb b/test/rexml/parse/test_processing_instruction.rb new file mode 100644 index 0000000000..ba381dc4cd --- /dev/null +++ b/test/rexml/parse/test_processing_instruction.rb @@ -0,0 +1,127 @@ +require "test/unit" +require "core_assertions" + +require "rexml/document" + +module REXMLTests + class TestParseProcessingInstruction < Test::Unit::TestCase + include Test::Unit::CoreAssertions + + def parse(xml) + REXML::Document.new(xml) + end + + class TestInvalid < self + def test_no_name + exception = assert_raise(REXML::ParseException) do + parse("<??>") + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed XML: Invalid processing instruction node: invalid name +Line: 1 +Position: 4 +Last 80 unconsumed characters: +?> + DETAIL + end + + def test_unclosed_content + exception = assert_raise(REXML::ParseException) do + parse("<?name content") + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed XML: Unclosed processing instruction +Line: 1 +Position: 14 +Last 80 unconsumed characters: +content + DETAIL + end + + def test_unclosed_no_content + exception = assert_raise(REXML::ParseException) do + parse("<?name") + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed XML: Unclosed processing instruction +Line: 1 +Position: 6 +Last 80 unconsumed characters: + + DETAIL + end + + def test_xml_declaration_not_at_document_start + exception = assert_raise(REXML::ParseException) do + parser = REXML::Parsers::BaseParser.new('<a><?xml version="1.0" ?></a>') + while parser.has_next? + parser.pull + end + end + + assert_equal(<<~DETAIL.chomp, exception.to_s) + Malformed XML: XML declaration is not at the start + Line: 1 + Position: 25 + Last 80 unconsumed characters: + + DETAIL + end + end + + def test_comment + doc = parse(<<-XML) +<?x y +<!--?><?x -->?> +<r/> + XML + assert_equal([["x", "y\n<!--"], + ["x", "-->"]], + [[doc.children[0].target, doc.children[0].content], + [doc.children[1].target, doc.children[1].content]]) + end + + def test_before_root + parser = REXML::Parsers::BaseParser.new('<?abc version="1.0" ?><a></a>') + + events = {} + while parser.has_next? + event = parser.pull + events[event[0]] = event[1] + end + + assert_equal("abc", events[:processing_instruction]) + end + + def test_after_root + parser = REXML::Parsers::BaseParser.new('<a></a><?abc version="1.0" ?>') + + events = {} + while parser.has_next? + event = parser.pull + events[event[0]] = event[1] + end + + assert_equal("abc", events[:processing_instruction]) + end + + def test_content_question + document = REXML::Document.new("<a><?name con?tent?></a>") + assert_equal("con?tent", document.root.children.first.content) + end + + def test_linear_performance_gt + seq = [10000, 50000, 100000, 150000, 200000] + assert_linear_performance(seq, rehearsal: 10) do |n| + REXML::Document.new("<?xml version=\"1.0\" " + ">" * n + " ?>") + end + end + + def test_linear_performance_tab + seq = [10000, 50000, 100000, 150000, 200000] + assert_linear_performance(seq, rehearsal: 10) do |n| + REXML::Document.new("<?name" + "\t" * n + "version=\"1.0\" > ?>") + end + end + end +end diff --git a/test/rexml/parse/test_text.rb b/test/rexml/parse/test_text.rb new file mode 100644 index 0000000000..04f553ae8b --- /dev/null +++ b/test/rexml/parse/test_text.rb @@ -0,0 +1,57 @@ +require "test/unit" +require 'rexml/parsers/baseparser' + +module REXMLTests + class TestParseText < Test::Unit::TestCase + class TestInvalid < self + def test_before_root + exception = assert_raise(REXML::ParseException) do + parser = REXML::Parsers::BaseParser.new('b<a></a>') + while parser.has_next? + parser.pull + end + end + + assert_equal(<<~DETAIL.chomp, exception.to_s) + Malformed XML: Content at the start of the document (got 'b') + Line: 1 + Position: 4 + Last 80 unconsumed characters: + <a> + DETAIL + end + + def test_after_root + exception = assert_raise(REXML::ParseException) do + parser = REXML::Parsers::BaseParser.new('<a></a>c') + while parser.has_next? + parser.pull + end + end + + assert_equal(<<~DETAIL.chomp, exception.to_s) + Malformed XML: Extra content at the end of the document (got 'c') + Line: 1 + Position: 8 + Last 80 unconsumed characters: + + DETAIL + end + end + + def test_whitespace_characters_after_root + parser = REXML::Parsers::BaseParser.new('<a>b</a> ') + + events = [] + while parser.has_next? + event = parser.pull + case event[0] + when :text + events << event[1] + end + end + + assert_equal(["b"], events) + end + end +end diff --git a/test/rexml/parser/test_base_parser.rb b/test/rexml/parser/test_base_parser.rb new file mode 100644 index 0000000000..6f213978c0 --- /dev/null +++ b/test/rexml/parser/test_base_parser.rb @@ -0,0 +1,62 @@ +# frozen_string_literal: false + +require 'rexml/parsers/baseparser' + +module REXMLTests + class BaseParserTester < Test::Unit::TestCase + def test_large_xml + large_text = "a" * 100_000 + xml = <<-XML + <?xml version="1.0"?> + <root> + <child>#{large_text}</child> + <child>#{large_text}</child> + </root> + XML + + parser = REXML::Parsers::BaseParser.new(xml) + while parser.has_next? + parser.pull + end + + assert do + parser.position < xml.bytesize + end + end + + def test_attribute_prefixed_by_xml + xml = <<-XML + <?xml version="1.0" encoding="UTF-8"?> + <!DOCTYPE html> + <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> + <head> + <title>XHTML Document</title> + </head> + <body> + <h1>XHTML Document</h1> + <p xml:lang="ja" lang="ja">For Japanese</p> + </body> + </html> + XML + + parser = REXML::Parsers::BaseParser.new(xml) + 5.times {parser.pull} + + html = parser.pull + assert_equal([:start_element, + "html", + {"xmlns" => "http://www.w3.org/1999/xhtml", + "xml:lang" => "en", + "lang" => "en"}], + html) + + 15.times {parser.pull} + + p = parser.pull + assert_equal([:start_element, + "p", + {"xml:lang" => "ja", "lang" => "ja"}], + p) + end + end +end diff --git a/test/rexml/parser/test_sax2.rb b/test/rexml/parser/test_sax2.rb index 91d135f5f7..c254890744 100644 --- a/test/rexml/parser/test_sax2.rb +++ b/test/rexml/parser/test_sax2.rb @@ -4,200 +4,200 @@ require "rexml/sax2listener" module REXMLTests -class TestSAX2Parser < Test::Unit::TestCase - class TestDocumentTypeDeclaration < self - private - def xml(internal_subset) - <<-XML + class TestSAX2Parser < Test::Unit::TestCase + class TestDocumentTypeDeclaration < self + private + def xml(internal_subset) + <<-XML <!DOCTYPE r SYSTEM "urn:x-henrikmartensson:test" [ #{internal_subset} ]> <r/> - XML - end + XML + end - class TestEntityDeclaration < self - class Listener - include REXML::SAX2Listener - attr_reader :entity_declarations - def initialize - @entity_declarations = [] - end + class TestEntityDeclaration < self + class Listener + include REXML::SAX2Listener + attr_reader :entity_declarations + def initialize + @entity_declarations = [] + end - def entitydecl(declaration) - super - @entity_declarations << declaration + def entitydecl(declaration) + super + @entity_declarations << declaration + end end - end - private - def parse(internal_subset) - listener = Listener.new - parser = REXML::Parsers::SAX2Parser.new(xml(internal_subset)) - parser.listen(listener) - parser.parse - listener.entity_declarations - end + private + def parse(internal_subset) + listener = Listener.new + parser = REXML::Parsers::SAX2Parser.new(xml(internal_subset)) + parser.listen(listener) + parser.parse + listener.entity_declarations + end - class TestGeneralEntity < self - class TestValue < self - def test_double_quote - assert_equal([["name", "value"]], parse(<<-INTERNAL_SUBSET)) + class TestGeneralEntity < self + class TestValue < self + def test_double_quote + assert_equal([["name", "value"]], parse(<<-INTERNAL_SUBSET)) <!ENTITY name "value"> - INTERNAL_SUBSET - end + INTERNAL_SUBSET + end - def test_single_quote - assert_equal([["name", "value"]], parse(<<-INTERNAL_SUBSET)) + def test_single_quote + assert_equal([["name", "value"]], parse(<<-INTERNAL_SUBSET)) <!ENTITY name 'value'> - INTERNAL_SUBSET + INTERNAL_SUBSET + end end - end - class TestExternlID < self - class TestSystem < self - def test_with_ndata - declaration = [ - "name", - "SYSTEM", "system-literal", - "NDATA", "ndata-name", - ] - assert_equal([declaration], - parse(<<-INTERNAL_SUBSET)) + class TestExternlID < self + class TestSystem < self + def test_with_ndata + declaration = [ + "name", + "SYSTEM", "system-literal", + "NDATA", "ndata-name", + ] + assert_equal([declaration], + parse(<<-INTERNAL_SUBSET)) <!ENTITY name SYSTEM "system-literal" NDATA ndata-name> + INTERNAL_SUBSET + end + + def test_without_ndata + declaration = [ + "name", + "SYSTEM", "system-literal", + ] + assert_equal([declaration], + parse(<<-INTERNAL_SUBSET)) +<!ENTITY name SYSTEM "system-literal"> + INTERNAL_SUBSET + end + end + + class TestPublic < self + def test_with_ndata + declaration = [ + "name", + "PUBLIC", "public-literal", "system-literal", + "NDATA", "ndata-name", + ] + assert_equal([declaration], + parse(<<-INTERNAL_SUBSET)) +<!ENTITY name PUBLIC "public-literal" "system-literal" NDATA ndata-name> + INTERNAL_SUBSET + end + + def test_without_ndata + declaration = [ + "name", + "PUBLIC", "public-literal", "system-literal", + ] + assert_equal([declaration], parse(<<-INTERNAL_SUBSET)) +<!ENTITY name PUBLIC "public-literal" "system-literal"> + INTERNAL_SUBSET + end + end + end + end + + class TestParameterEntity < self + class TestValue < self + def test_double_quote + assert_equal([["%", "name", "value"]], parse(<<-INTERNAL_SUBSET)) +<!ENTITY % name "value"> INTERNAL_SUBSET end - def test_without_ndata - declaration = [ - "name", - "SYSTEM", "system-literal", - ] - assert_equal([declaration], - parse(<<-INTERNAL_SUBSET)) -<!ENTITY name SYSTEM "system-literal"> + def test_single_quote + assert_equal([["%", "name", "value"]], parse(<<-INTERNAL_SUBSET)) +<!ENTITY % name 'value'> INTERNAL_SUBSET end end - class TestPublic < self - def test_with_ndata + class TestExternlID < self + def test_system declaration = [ + "%", "name", - "PUBLIC", "public-literal", "system-literal", - "NDATA", "ndata-name", + "SYSTEM", "system-literal", ] assert_equal([declaration], - parse(<<-INTERNAL_SUBSET)) -<!ENTITY name PUBLIC "public-literal" "system-literal" NDATA ndata-name> + parse(<<-INTERNAL_SUBSET)) +<!ENTITY % name SYSTEM "system-literal"> INTERNAL_SUBSET end - def test_without_ndata + def test_public declaration = [ + "%", "name", "PUBLIC", "public-literal", "system-literal", ] assert_equal([declaration], parse(<<-INTERNAL_SUBSET)) -<!ENTITY name PUBLIC "public-literal" "system-literal"> +<!ENTITY % name PUBLIC "public-literal" "system-literal"> INTERNAL_SUBSET end end end end - class TestParameterEntity < self - class TestValue < self - def test_double_quote - assert_equal([["%", "name", "value"]], parse(<<-INTERNAL_SUBSET)) -<!ENTITY % name "value"> - INTERNAL_SUBSET + class TestNotationDeclaration < self + class Listener + include REXML::SAX2Listener + attr_reader :notation_declarations + def initialize + @notation_declarations = [] end - def test_single_quote - assert_equal([["%", "name", "value"]], parse(<<-INTERNAL_SUBSET)) -<!ENTITY % name 'value'> - INTERNAL_SUBSET + def notationdecl(*declaration) + super + @notation_declarations << declaration end end + private + def parse(internal_subset) + listener = Listener.new + parser = REXML::Parsers::SAX2Parser.new(xml(internal_subset)) + parser.listen(listener) + parser.parse + listener.notation_declarations + end + class TestExternlID < self def test_system - declaration = [ - "%", - "name", - "SYSTEM", "system-literal", - ] + declaration = ["name", "SYSTEM", nil, "system-literal"] assert_equal([declaration], - parse(<<-INTERNAL_SUBSET)) -<!ENTITY % name SYSTEM "system-literal"> + parse(<<-INTERNAL_SUBSET)) +<!NOTATION name SYSTEM "system-literal"> INTERNAL_SUBSET end def test_public - declaration = [ - "%", - "name", - "PUBLIC", "public-literal", "system-literal", - ] + declaration = ["name", "PUBLIC", "public-literal", "system-literal"] assert_equal([declaration], parse(<<-INTERNAL_SUBSET)) -<!ENTITY % name PUBLIC "public-literal" "system-literal"> +<!NOTATION name PUBLIC "public-literal" "system-literal"> INTERNAL_SUBSET end end - end - end - class TestNotationDeclaration < self - class Listener - include REXML::SAX2Listener - attr_reader :notation_declarations - def initialize - @notation_declarations = [] - end - - def notationdecl(*declaration) - super - @notation_declarations << declaration - end - end - - private - def parse(internal_subset) - listener = Listener.new - parser = REXML::Parsers::SAX2Parser.new(xml(internal_subset)) - parser.listen(listener) - parser.parse - listener.notation_declarations - end - - class TestExternlID < self - def test_system - declaration = ["name", "SYSTEM", nil, "system-literal"] - assert_equal([declaration], - parse(<<-INTERNAL_SUBSET)) -<!NOTATION name SYSTEM "system-literal"> - INTERNAL_SUBSET - end - - def test_public - declaration = ["name", "PUBLIC", "public-literal", "system-literal"] - assert_equal([declaration], parse(<<-INTERNAL_SUBSET)) -<!NOTATION name PUBLIC "public-literal" "system-literal"> - INTERNAL_SUBSET - end - end - - class TestPublicID < self - def test_literal - declaration = ["name", "PUBLIC", "public-literal", nil] - assert_equal([declaration], - parse(<<-INTERNAL_SUBSET)) + class TestPublicID < self + def test_literal + declaration = ["name", "PUBLIC", "public-literal", nil] + assert_equal([declaration], + parse(<<-INTERNAL_SUBSET)) <!NOTATION name PUBLIC "public-literal"> - INTERNAL_SUBSET + INTERNAL_SUBSET + end end end end end end -end diff --git a/test/rexml/parser/test_tree.rb b/test/rexml/parser/test_tree.rb index 8a5d9d1223..315be9c23f 100644 --- a/test/rexml/parser/test_tree.rb +++ b/test/rexml/parser/test_tree.rb @@ -4,40 +4,39 @@ require "rexml/parsers/treeparser" module REXMLTests -class TestTreeParser < Test::Unit::TestCase - class TestInvalid < self - def test_unmatched_close_tag - xml = "<root></not-root>" - exception = assert_raise(REXML::ParseException) do - parse(xml) - end - assert_equal(<<-MESSAGE, exception.to_s) + class TestTreeParser < Test::Unit::TestCase + private def parse(xml) + document = REXML::Document.new + parser = REXML::Parsers::TreeParser.new(xml, document) + parser.parse + end + + class TestInvalid < self + def test_unmatched_close_tag + xml = "<root></not-root>" + exception = assert_raise(REXML::ParseException) do + parse(xml) + end + assert_equal(<<-MESSAGE, exception.to_s) Missing end tag for 'root' (got 'not-root') Line: 1 Position: #{xml.bytesize} Last 80 unconsumed characters: - MESSAGE - end - - def test_no_close_tag - xml = "<root>" - exception = assert_raise(REXML::ParseException) do - parse(xml) + MESSAGE end - assert_equal(<<-MESSAGE, exception.to_s) -No close tag for /root + + def test_no_close_tag + xml = "<root>" + exception = assert_raise(REXML::ParseException) do + parse(xml) + end + assert_equal(<<-MESSAGE, exception.to_s) +Missing end tag for '/root' Line: 1 Position: #{xml.bytesize} Last 80 unconsumed characters: - MESSAGE - end - - private - def parse(xml) - document = REXML::Document.new - parser = REXML::Parsers::TreeParser.new(xml, document) - parser.parse + MESSAGE + end end end end -end diff --git a/test/rexml/parser/test_ultra_light.rb b/test/rexml/parser/test_ultra_light.rb index cb6ee5a8ab..d1364d6a99 100644 --- a/test/rexml/parser/test_ultra_light.rb +++ b/test/rexml/parser/test_ultra_light.rb @@ -3,67 +3,66 @@ require "rexml/parsers/ultralightparser" module REXMLTests -class TestUltraLightParser < Test::Unit::TestCase - class TestDocumentTypeDeclaration < self - def test_entity_declaration - assert_equal([ - [ - :start_doctype, - :parent, - "root", - "SYSTEM", - "urn:x-test", - nil, - [:entitydecl, "name", "value"] + class TestUltraLightParser < Test::Unit::TestCase + class TestDocumentTypeDeclaration < self + def test_entity_declaration + assert_equal([ + [ + :start_doctype, + :parent, + "root", + "SYSTEM", + "urn:x-test", + nil, + [:entitydecl, "name", "value"] + ], + [:start_element, :parent, "root", {}], ], - [:start_element, :parent, "root", {}], - [:text, "\n"], - ], - parse(<<-INTERNAL_SUBSET)) + parse(<<-INTERNAL_SUBSET)) <!ENTITY name "value"> - INTERNAL_SUBSET - end + INTERNAL_SUBSET + end - private - def xml(internal_subset) - <<-XML + private + def xml(internal_subset) + <<-XML <!DOCTYPE root SYSTEM "urn:x-test" [ #{internal_subset} ]> <root/> - XML - end + XML + end - def parse(internal_subset) - parser = REXML::Parsers::UltraLightParser.new(xml(internal_subset)) - normalize(parser.parse) - end + def parse(internal_subset) + parser = REXML::Parsers::UltraLightParser.new(xml(internal_subset)) + normalize(parser.parse) + end - def normalize(root) - root.collect do |child| - normalize_child(child) + def normalize(root) + root.collect do |child| + normalize_child(child) + end end - end - def normalize_child(child) - tag = child.first - case tag - when :start_doctype - normalized_parent = :parent - normalized_doctype = child.dup - normalized_doctype[1] = normalized_parent - normalized_doctype - when :start_element - tag, parent, name, attributes, *children = child - normalized_parent = :parent - normalized_children = children.collect do |sub_child| - normalize_child(sub_child) + def normalize_child(child) + tag = child.first + case tag + when :start_doctype + normalized_parent = :parent + normalized_doctype = child.dup + normalized_doctype[1] = normalized_parent + normalized_doctype + when :start_element + tag, _parent, name, attributes, *children = child + normalized_parent = :parent + normalized_children = children.collect do |sub_child| + normalize_child(sub_child) + end + [tag, normalized_parent, name, attributes, *normalized_children] + else + child end - [tag, normalized_parent, name, attributes, *normalized_children] - else - child end end end end -end diff --git a/test/rexml/parser/test_xpath.rb b/test/rexml/parser/test_xpath.rb new file mode 100644 index 0000000000..360b9b793a --- /dev/null +++ b/test/rexml/parser/test_xpath.rb @@ -0,0 +1,127 @@ +# frozen_string_literal: false + +require "test/unit" +require "rexml/parsers/xpathparser" + +module REXMLTests + class TestXPathParser < Test::Unit::TestCase + def self.sub_test_case(name, &block) + parent_test_case = self + sub_test_case = Class.new(self) do + singleton_class = class << self; self; end + singleton_class.__send__(:define_method, :name) do + [parent_test_case.name, name].compact.join("::") + end + end + sub_test_case.class_eval(&block) + sub_test_case + end + + sub_test_case("#abbreviate") do + def abbreviate(xpath) + parser = REXML::Parsers::XPathParser.new + parser.abbreviate(xpath) + end + + def test_document + assert_equal("/", + abbreviate("/")) + end + + def test_descendant_or_self_only + assert_equal("//", + abbreviate("/descendant-or-self::node()/")) + end + + def test_descendant_or_self_absolute + assert_equal("//a/b", + abbreviate("/descendant-or-self::node()/a/b")) + end + + def test_descendant_or_self_relative + assert_equal("a//b", + abbreviate("a/descendant-or-self::node()/b")) + end + + def test_descendant_or_self_not_node + assert_equal("/descendant-or-self::text()", + abbreviate("/descendant-or-self::text()")) + end + + def test_self_absolute + assert_equal("/a/./b", + abbreviate("/a/self::node()/b")) + end + + def test_self_relative + assert_equal("a/./b", + abbreviate("a/self::node()/b")) + end + + def test_self_not_node + assert_equal("/self::text()", + abbreviate("/self::text()")) + end + + def test_parent_absolute + assert_equal("/a/../b", + abbreviate("/a/parent::node()/b")) + end + + def test_parent_relative + assert_equal("a/../b", + abbreviate("a/parent::node()/b")) + end + + def test_parent_not_node + assert_equal("/a/parent::text()", + abbreviate("/a/parent::text()")) + end + + def test_any_absolute + assert_equal("/*/a", + abbreviate("/*/a")) + end + + def test_any_relative + assert_equal("a/*/b", + abbreviate("a/*/b")) + end + + def test_following_sibling_absolute + assert_equal("/following-sibling::a/b", + abbreviate("/following-sibling::a/b")) + end + + def test_following_sibling_relative + assert_equal("a/following-sibling::b/c", + abbreviate("a/following-sibling::b/c")) + end + + def test_predicate_index + assert_equal("a[5]/b", + abbreviate("a[5]/b")) + end + + def test_attribute_relative + assert_equal("a/@b", + abbreviate("a/attribute::b")) + end + + def test_filter_attribute + assert_equal("a/b[@i = 1]/c", + abbreviate("a/b[attribute::i=1]/c")) + end + + def test_filter_string_single_quote + assert_equal("a/b[@name = \"single ' quote\"]/c", + abbreviate("a/b[attribute::name=\"single ' quote\"]/c")) + end + + def test_filter_string_double_quote + assert_equal("a/b[@name = 'double \" quote']/c", + abbreviate("a/b[attribute::name='double \" quote']/c")) + end + end + end +end diff --git a/test/rexml/run.rb b/test/rexml/run.rb new file mode 100755 index 0000000000..089318b014 --- /dev/null +++ b/test/rexml/run.rb @@ -0,0 +1,13 @@ +#!/usr/bin/env ruby + +$VERBOSE = true + +base_dir = File.dirname(File.expand_path(__dir__)) +lib_dir = File.join(base_dir, "lib") +test_dir = File.join(base_dir, "test") + +$LOAD_PATH.unshift(lib_dir) + +require_relative "helper" + +exit(Test::Unit::AutoRunner.run(true, test_dir)) diff --git a/test/rexml/test_attribute.rb b/test/rexml/test_attribute.rb new file mode 100644 index 0000000000..b66e462d4a --- /dev/null +++ b/test/rexml/test_attribute.rb @@ -0,0 +1,12 @@ +module REXMLTests + class AttributeTest < Test::Unit::TestCase + def test_empty_prefix + error = assert_raise(ArgumentError) do + REXML::Attribute.new(":x") + end + assert_equal("name must be " + + "\#{PREFIX}:\#{LOCAL_NAME} or \#{LOCAL_NAME}: <\":x\">", + error.message) + end + end +end diff --git a/test/rexml/test_attributes.rb b/test/rexml/test_attributes.rb index d6f566bdf8..09fde44224 100644 --- a/test/rexml/test_attributes.rb +++ b/test/rexml/test_attributes.rb @@ -1,6 +1,4 @@ # frozen_string_literal: false -require 'test/unit/testcase' -require 'rexml/document' module REXMLTests class AttributesTester < Test::Unit::TestCase @@ -180,18 +178,27 @@ def test_amp_and_lf_attributes attr_test('name','value with LF 
 & ampersand') end - def test_quoting + def test_quote_root d = Document.new(%q{<a x='1' y="2"/>}) assert_equal( %q{<a x='1' y='2'/>}, d.to_s ) d.root.context[:attribute_quote] = :quote assert_equal( %q{<a x="1" y="2"/>}, d.to_s ) + end + def test_quote_sub_element d = Document.new(%q{<a x='1' y="2"><b z='3'/></a>}) assert_equal( %q{<a x='1' y='2'><b z='3'/></a>}, d.to_s ) d.root.context[:attribute_quote] = :quote assert_equal( %q{<a x="1" y="2"><b z="3"/></a>}, d.to_s ) end + def test_quote_to_s_value + doc = Document.new(%q{<root a="'"/>}, {attribute_quote: :quote}) + assert_equal(%q{<root a="'"/>}, doc.to_s) + assert_equal("'", doc.root.attribute("a").value) + assert_equal(%q{<root a="'"/>}, doc.to_s) + end + def test_ticket_127 doc = Document.new doc.add_element 'a', { 'v' => 'x & y' } diff --git a/test/rexml/test_attributes_mixin.rb b/test/rexml/test_attributes_mixin.rb index 3a9f54eefd..2b9108cbc6 100644 --- a/test/rexml/test_attributes_mixin.rb +++ b/test/rexml/test_attributes_mixin.rb @@ -1,6 +1,4 @@ # frozen_string_literal: false -require 'test/unit' -require 'rexml/document' module REXMLTests class TestAttributes < Test::Unit::TestCase diff --git a/test/rexml/test_comment.rb b/test/rexml/test_comment.rb index 0af2f5ca76..f6f4d8099d 100644 --- a/test/rexml/test_comment.rb +++ b/test/rexml/test_comment.rb @@ -1,7 +1,4 @@ # frozen_string_literal: false -require "test/unit/testcase" - -require 'rexml/document' module REXMLTests class CommentTester < Test::Unit::TestCase diff --git a/test/rexml/test_contrib.rb b/test/rexml/test_contrib.rb index 8462b3c23f..c16c72f2a6 100644 --- a/test/rexml/test_contrib.rb +++ b/test/rexml/test_contrib.rb @@ -1,15 +1,14 @@ # coding: binary # frozen_string_literal: false -require_relative "rexml_test_utils" - -require "rexml/document" require "rexml/parseexception" require "rexml/formatters/default" +require_relative "helper" + module REXMLTests class ContribTester < Test::Unit::TestCase - include REXMLTestUtils + include Helper::Fixture include REXML XML_STRING_01 = <<DELIMITER @@ -73,6 +72,7 @@ def test_bad_doctype_Tobias > ] > + <svg/> EOF doc = REXML::Document.new source doc.write(out="") @@ -82,7 +82,7 @@ def test_bad_doctype_Tobias # Peter Verhage def test_namespace_Peter - source = <<-EOF + source = <<~EOF <?xml version="1.0"?> <config:myprog-config xmlns:config="http://someurl/program/version"> <!-- main options --> @@ -379,7 +379,7 @@ def test_various_xpath end def test_entities_Holden_Glova - document = <<-EOL + document = <<~EOL <?xml version="1.0" encoding="UTF-8"?> <!DOCTYPE rubynet [ <!ENTITY rbconfig.MAJOR "1"> diff --git a/test/rexml/test_core.rb b/test/rexml/test_core.rb index ee5438d5e5..eed9cca2c0 100644 --- a/test/rexml/test_core.rb +++ b/test/rexml/test_core.rb @@ -1,8 +1,6 @@ -# coding: utf-8 +# -*- coding: utf-8 -*- # frozen_string_literal: false -require_relative "rexml_test_utils" - require "rexml/document" require "rexml/parseexception" require "rexml/output" @@ -11,13 +9,14 @@ require "rexml/undefinednamespaceexception" require_relative "listener" +require_relative "helper" module REXMLTests class Tester < Test::Unit::TestCase - include REXMLTestUtils + include Helper::Fixture include REXML def setup - @xsa_source = <<-EOL + @xsa_source = <<~EOL <?xml version="1.0"?> <?xsl stylesheet="blah.xsl"?> <!-- The first line tests the XMLDecl, the second tests PI. @@ -91,7 +90,7 @@ def test_attribute # Bryan Murphy <murphybryanp@yahoo.com> text = "this is a {target[@name='test']/@value} test" - source = <<-EOL + source = <<~EOL <?xml version="1.0"?> <doc search="#{text}"/> EOL @@ -116,6 +115,72 @@ def test_attribute name4='test4'/>).join(' '), e.to_s end + def test_attribute_duplicated + # https://www.w3.org/TR/xml-names/#uniqAttrs + message = <<-MESSAGE.chomp +Duplicate attribute "a" +Line: 2 +Position: 24 +Last 80 unconsumed characters: +/> + MESSAGE + assert_raise(REXML::ParseException.new(message)) do + Document.new(<<-XML) +<x> + <bad a="1" a="2"/> +</x> + XML + end + end + + def test_attribute_namespace_conflict + # https://www.w3.org/TR/xml-names/#uniqAttrs + message = <<-MESSAGE.chomp +Namespace conflict in adding attribute "a": Prefix "n1" = "http://www.w3.org" and prefix "n2" = "http://www.w3.org" +Line: 4 +Position: 140 +Last 80 unconsumed characters: +/> + MESSAGE + assert_raise(REXML::ParseException.new(message)) do + Document.new(<<-XML) +<!-- http://www.w3.org is bound to n1 and n2 --> +<x xmlns:n1="http://www.w3.org" + xmlns:n2="http://www.w3.org"> + <bad n1:a="1" n2:a="2"/> +</x> + XML + end + end + + def test_attribute_default_namespace + # https://www.w3.org/TR/xml-names/#uniqAttrs + document = Document.new(<<-XML) +<!-- http://www.w3.org is bound to n1 and is the default --> +<x xmlns:n1="http://www.w3.org" + xmlns="http://www.w3.org" > + <good a="1" b="2" /> + <good a="1" n1:a="2" /> +</x> + XML + attributes = document.root.elements.collect do |element| + element.attributes.each_attribute.collect do |attribute| + [attribute.prefix, attribute.namespace, attribute.name] + end + end + assert_equal([ + [ + ["", "", "a"], + ["", "", "b"], + ], + [ + ["", "", "a"], + ["n1", "http://www.w3.org", "a"], + ], + ], + attributes) + end + def test_cdata test = "The quick brown fox jumped & < & < \" ' @@ -681,7 +746,7 @@ def test_iso_8859_1_output_function koln_iso_8859_1 = "K\xF6ln" koln_utf8 = "K\xc3\xb6ln" source = Source.new( koln_iso_8859_1, 'iso-8859-1' ) - results = source.scan(/.*/)[0] + results = source.match(/.*/)[0] koln_utf8.force_encoding('UTF-8') if koln_utf8.respond_to?(:force_encoding) assert_equal koln_utf8, results output << results @@ -779,7 +844,7 @@ def test_deep_clone end def test_whitespace_before_root - a = <<EOL + a = <<EOL.chomp <?xml version='1.0'?> <blo> <wak> @@ -823,7 +888,7 @@ def test_attlist_decl assert_equal 'two', doc.root.elements[1].namespace assert_equal 'foo', doc.root.namespace - doc = Document.new <<-EOL + doc = Document.new <<~EOL <?xml version="1.0"?> <!DOCTYPE schema SYSTEM "XMLSchema.dtd" [ <!ENTITY % p ''> @@ -877,18 +942,18 @@ def test_ticket_51 EOL # The most common case. People not caring about the namespaces much. - assert_equal( "XY", XPath.match( doc, "/test/a/text()" ).join ) - assert_equal( "XY", XPath.match( doc, "/test/x:a/text()" ).join ) + assert_equal( "XY", XPath.match( doc, "/*:test/*:a/text()" ).join ) + assert_equal( "XY", XPath.match( doc, "/*:test/x:a/text()" ).join ) # Surprising? I don't think so, if you believe my definition of the "common case" - assert_equal( "XYZ", XPath.match( doc, "//a/text()" ).join ) + assert_equal( "XYZ", XPath.match( doc, "//*:a/text()" ).join ) # These are the uncommon cases. Namespaces are actually important, so we define our own # mappings, and pass them in. assert_equal( "XY", XPath.match( doc, "/f:test/f:a/text()", { "f" => "1" } ).join ) # The namespaces are defined, and override the original mappings - assert_equal( "", XPath.match( doc, "/test/a/text()", { "f" => "1" } ).join ) + assert_equal( "XY", XPath.match( doc, "/*:test/*:a/text()", { "f" => "1" } ).join ) assert_equal( "", XPath.match( doc, "/x:test/x:a/text()", { "f" => "1" } ).join ) - assert_equal( "", XPath.match( doc, "//a/text()", { "f" => "1" } ).join ) + assert_equal( "XYZ", XPath.match( doc, "//*:a/text()", { "f" => "1" } ).join ) end def test_processing_instruction @@ -900,7 +965,7 @@ def test_processing_instruction end def test_oses_with_bad_EOLs - Document.new("\n\n\n<?xml version='1.0'?>\n\n\n<a/>\n\n") + Document.new("<?xml version='1.0'?>\n\n\n<a/>\n\n") end # Contributed (with patch to fix bug) by Kouhei @@ -927,7 +992,7 @@ def test_0xD_in_preface end def test_hyphens_in_doctype - doc = REXML::Document.new <<-EOQ + doc = REXML::Document.new <<~EOQ <?xml version="1.0"?> <!DOCTYPE a-b-c> <a-b-c> @@ -1043,7 +1108,7 @@ def test_null_element_name def test_text_raw # From the REXML tutorial # (http://www.germane-software.com/software/rexml/test/data/tutorial.html) - doc = Document.new <<-EOL + doc = Document.new <<~EOL <?xml version="1.0"?> <!DOCTYPE schema SYSTEM "XMLSchema.dtd" [ <!ENTITY % s 'Sean'> @@ -1277,11 +1342,26 @@ def test_ticket_21 exception = assert_raise(ParseException) do Document.new(src) end - assert_equal(<<-DETAIL, exception.to_s) + assert_equal(<<-DETAIL.chomp, exception.to_s) Missing attribute value start quote: <bar> Line: 1 Position: 16 Last 80 unconsumed characters: +value/> + DETAIL + end + + def test_parse_exception_on_missing_attribute_end_quote + src = '<foo bar="value/>' + exception = assert_raise(ParseException) do + Document.new(src) + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Missing attribute value end quote: <bar>: <"> +Line: 1 +Position: 17 +Last 80 unconsumed characters: +value/> DETAIL end @@ -1377,7 +1457,7 @@ def test_ticket_91 d.root.add_element( "bah" ) p=REXML::Formatters::Pretty.new(2) p.compact = true # Don't add whitespace to text nodes unless necessary - p.write(d,out="") + p.write(d,out=+"") assert_equal( expected, out ) end @@ -1391,8 +1471,8 @@ def test_ticket_95 def test_ticket_102 doc = REXML::Document.new '<doc xmlns="ns"><item name="foo"/></doc>' - assert_equal( "foo", doc.root.elements["item"].attribute("name","ns").to_s ) - assert_equal( "item", doc.root.elements["item[@name='foo']"].name ) + assert_equal( "foo", doc.root.elements["*:item"].attribute("name","ns").to_s ) + assert_equal( "item", doc.root.elements["*:item[@name='foo']"].name ) end def test_ticket_14 @@ -1421,11 +1501,11 @@ def test_ticket_121 doc = REXML::Document.new( '<doc xmlns="ns" xmlns:phantom="ns"><item name="foo">text</item></doc>' ) - assert_equal 'text', doc.text( "/doc/item[@name='foo']" ) + assert_equal 'text', doc.text( "/*:doc/*:item[@name='foo']" ) assert_equal "name='foo'", - doc.root.elements["item"].attribute("name", "ns").inspect + doc.root.elements["*:item"].attribute("name", "ns").inspect assert_equal "<item name='foo'>text</item>", - doc.root.elements["item[@name='foo']"].to_s + doc.root.elements["*:item[@name='foo']"].to_s end def test_ticket_135 @@ -1453,8 +1533,10 @@ def test_ticket_138 "" => attribute("version", "1.0"), }, } - assert_equal(expected, doc.root.attributes) - assert_equal(expected, REXML::Document.new(doc.root.to_s).root.attributes) + assert_equal(expected, + doc.root.attributes.to_h) + assert_equal(expected, + REXML::Document.new(doc.root.to_s).root.attributes.to_h) end def test_empty_doc diff --git a/test/rexml/test_doctype.rb b/test/rexml/test_doctype.rb index d728cba606..b20d30ae2f 100644 --- a/test/rexml/test_doctype.rb +++ b/test/rexml/test_doctype.rb @@ -1,6 +1,4 @@ # frozen_string_literal: false -require 'test/unit' -require 'rexml/document' module REXMLTests class TestDocTypeAccessor < Test::Unit::TestCase @@ -41,6 +39,12 @@ def test_to_s @doc_type_public_system.to_s) end + def test_to_s_apostrophe + @doc_type_public_system.parent.context[:prologue_quote] = :apostrophe + assert_equal("<!DOCTYPE root PUBLIC '#{@pubid}' '#{@sysid}'>", + @doc_type_public_system.to_s) + end + def test_system assert_equal([ @sysid, @@ -82,6 +86,35 @@ def test_to_s assert_equal("<!DOCTYPE root SYSTEM \"root.dtd\">", doctype.to_s) end + + def test_to_s_apostrophe + doctype = REXML::DocType.new(["root", "SYSTEM", nil, "root.dtd"]) + doc = REXML::Document.new + doc << doctype + doctype.parent.context[:prologue_quote] = :apostrophe + assert_equal("<!DOCTYPE root SYSTEM 'root.dtd'>", + doctype.to_s) + end + + def test_to_s_single_quote_apostrophe + doctype = REXML::DocType.new(["root", "SYSTEM", nil, "root'.dtd"]) + doc = REXML::Document.new + doc << doctype + # This isn't used. + doctype.parent.context[:prologue_quote] = :apostrophe + assert_equal("<!DOCTYPE root SYSTEM \"root'.dtd\">", + doctype.to_s) + end + + def test_to_s_double_quote + doctype = REXML::DocType.new(["root", "SYSTEM", nil, "root\".dtd"]) + doc = REXML::Document.new + doc << doctype + # This isn't used. + doctype.parent.context[:prologue_quote] = :apostrophe + assert_equal("<!DOCTYPE root SYSTEM 'root\".dtd'>", + doctype.to_s) + end end end @@ -92,6 +125,25 @@ def test_to_s assert_equal("<!DOCTYPE root PUBLIC \"pub\" \"root.dtd\">", doctype.to_s) end + + def test_to_s_apostrophe + doctype = REXML::DocType.new(["root", "PUBLIC", "pub", "root.dtd"]) + doc = REXML::Document.new + doc << doctype + doctype.parent.context[:prologue_quote] = :apostrophe + assert_equal("<!DOCTYPE root PUBLIC 'pub' 'root.dtd'>", + doctype.to_s) + end + + def test_to_s_apostrophe_include_apostrophe + doctype = REXML::DocType.new(["root", "PUBLIC", "pub'", "root.dtd"]) + doc = REXML::Document.new + doc << doctype + # This isn't used. + doctype.parent.context[:prologue_quote] = :apostrophe + assert_equal("<!DOCTYPE root PUBLIC \"pub'\" 'root.dtd'>", + doctype.to_s) + end end class TestSystemLiteral < self @@ -101,6 +153,25 @@ def test_to_s doctype.to_s) end + def test_to_s_apostrophe + doctype = REXML::DocType.new(["root", "PUBLIC", "pub", "root.dtd"]) + doc = REXML::Document.new + doc << doctype + doctype.parent.context[:prologue_quote] = :apostrophe + assert_equal("<!DOCTYPE root PUBLIC 'pub' 'root.dtd'>", + doctype.to_s) + end + + def test_to_s_apostrophe_include_apostrophe + doctype = REXML::DocType.new(["root", "PUBLIC", "pub", "root'.dtd"]) + doc = REXML::Document.new + doc << doctype + # This isn't used. + doctype.parent.context[:prologue_quote] = :apostrophe + assert_equal("<!DOCTYPE root PUBLIC 'pub' \"root'.dtd\">", + doctype.to_s) + end + def test_to_s_double_quote doctype = REXML::DocType.new(["root", "PUBLIC", "pub", "root\".dtd"]) assert_equal("<!DOCTYPE root PUBLIC \"pub\" 'root\".dtd'>", @@ -143,6 +214,62 @@ def test_to_s_system_literal_include_double_quote decl(@id, "system\"literal").to_s) end + def test_to_s_apostrophe + document = REXML::Document.new(<<-XML) + <!DOCTYPE root SYSTEM "urn:x-test:sysid" [ + #{decl(@id, @uri).to_s} + ]> + <root/> + XML + document.context[:prologue_quote] = :apostrophe + notation = document.doctype.notations[0] + assert_equal("<!NOTATION #{@name} PUBLIC '#{@id}' '#{@uri}'>", + notation.to_s) + end + + def test_to_s_apostrophe_pubid_literal_include_apostrophe + document = REXML::Document.new(<<-XML) + <!DOCTYPE root SYSTEM "urn:x-test:sysid" [ + #{decl("#{@id}'", @uri).to_s} + ]> + <root/> + XML + # This isn't used for PubidLiteral because PubidChar includes '. + document.context[:prologue_quote] = :apostrophe + notation = document.doctype.notations[0] + assert_equal("<!NOTATION #{@name} PUBLIC \"#{@id}'\" '#{@uri}'>", + notation.to_s) + end + + def test_to_s_apostrophe_system_literal_include_apostrophe + document = REXML::Document.new(<<-XML) + <!DOCTYPE root SYSTEM "urn:x-test:sysid" [ + #{decl(@id, "system'literal").to_s} + ]> + <root/> + XML + # This isn't used for SystemLiteral because SystemLiteral includes '. + document.context[:prologue_quote] = :apostrophe + notation = document.doctype.notations[0] + assert_equal("<!NOTATION #{@name} PUBLIC '#{@id}' \"system'literal\">", + notation.to_s) + end + + def test_to_s_apostrophe_system_literal_include_double_quote + document = REXML::Document.new(<<-XML) + <!DOCTYPE root SYSTEM "urn:x-test:sysid" [ + #{decl(@id, "system\"literal").to_s} + ]> + <root/> + XML + # This isn't used for SystemLiteral because SystemLiteral includes ". + # But quoted by ' because SystemLiteral includes ". + document.context[:prologue_quote] = :apostrophe + notation = document.doctype.notations[0] + assert_equal("<!NOTATION #{@name} PUBLIC '#{@id}' 'system\"literal'>", + notation.to_s) + end + private def decl(id, uri) REXML::NotationDecl.new(@name, "PUBLIC", id, uri) @@ -170,6 +297,48 @@ def test_to_s_include_double_quote decl("#{@id}\"").to_s) end + def test_to_s_apostrophe + document = REXML::Document.new(<<-XML) + <!DOCTYPE root SYSTEM "urn:x-test:sysid" [ + #{decl(@id).to_s} + ]> + <root/> + XML + document.context[:prologue_quote] = :apostrophe + notation = document.doctype.notations[0] + assert_equal("<!NOTATION #{@name} SYSTEM '#{@id}'>", + notation.to_s) + end + + def test_to_s_apostrophe_include_apostrophe + document = REXML::Document.new(<<-XML) + <!DOCTYPE root SYSTEM "urn:x-test:sysid" [ + #{decl("#{@id}'").to_s} + ]> + <root/> + XML + # This isn't used for SystemLiteral because SystemLiteral includes '. + document.context[:prologue_quote] = :apostrophe + notation = document.doctype.notations[0] + assert_equal("<!NOTATION #{@name} SYSTEM \"#{@id}'\">", + notation.to_s) + end + + def test_to_s_apostrophe_include_double_quote + document = REXML::Document.new(<<-XML) + <!DOCTYPE root SYSTEM "urn:x-test:sysid" [ + #{decl("#{@id}\"").to_s} + ]> + <root/> + XML + # This isn't used for SystemLiteral because SystemLiteral includes ". + # But quoted by ' because SystemLiteral includes ". + document.context[:prologue_quote] = :apostrophe + notation = document.doctype.notations[0] + assert_equal("<!NOTATION #{@name} SYSTEM '#{@id}\"'>", + notation.to_s) + end + private def decl(id) REXML::NotationDecl.new(@name, "SYSTEM", nil, id) diff --git a/test/rexml/test_document.rb b/test/rexml/test_document.rb index c0faae4ae0..cda4354f31 100644 --- a/test/rexml/test_document.rb +++ b/test/rexml/test_document.rb @@ -1,13 +1,10 @@ # -*- coding: utf-8 -*- # frozen_string_literal: false -require "rexml/document" -require "test/unit" - module REXMLTests class TestDocument < Test::Unit::TestCase def test_version_attributes_to_s - doc = REXML::Document.new(<<-eoxml) + doc = REXML::Document.new(<<~eoxml) <?xml version="1.0" encoding="UTF-8" standalone="no"?> <svg id="svg2" xmlns:sodipodi="foo" @@ -34,17 +31,9 @@ def test_new end class EntityExpansionLimitTest < Test::Unit::TestCase - def setup - @default_entity_expansion_limit = REXML::Security.entity_expansion_limit - end - - def teardown - REXML::Security.entity_expansion_limit = @default_entity_expansion_limit - end - class GeneralEntityTest < self def test_have_value - xml = <<EOF + xml = <<XML <?xml version="1.0" encoding="UTF-8"?> <!DOCTYPE member [ <!ENTITY a "&b;&b;&b;&b;&b;&b;&b;&b;&b;&b;"> @@ -58,23 +47,23 @@ def test_have_value <member> &a; </member> -EOF +XML doc = REXML::Document.new(xml) - assert_raise(RuntimeError) do + assert_raise(RuntimeError.new("entity expansion has grown too large")) do doc.root.children.first.value end - REXML::Security.entity_expansion_limit = 100 - assert_equal(100, REXML::Security.entity_expansion_limit) + doc = REXML::Document.new(xml) - assert_raise(RuntimeError) do + doc.entity_expansion_limit = 100 + assert_raise(RuntimeError.new("number of entity expansions exceeded, processing aborted.")) do doc.root.children.first.value end assert_equal(101, doc.entity_expansion_count) end def test_empty_value - xml = <<EOF + xml = <<XML <?xml version="1.0" encoding="UTF-8"?> <!DOCTYPE member [ <!ENTITY a "&b;&b;&b;&b;&b;&b;&b;&b;&b;&b;"> @@ -88,23 +77,23 @@ def test_empty_value <member> &a; </member> -EOF +XML doc = REXML::Document.new(xml) - assert_raise(RuntimeError) do + assert_raise(RuntimeError.new("number of entity expansions exceeded, processing aborted.")) do doc.root.children.first.value end - REXML::Security.entity_expansion_limit = 100 - assert_equal(100, REXML::Security.entity_expansion_limit) + doc = REXML::Document.new(xml) - assert_raise(RuntimeError) do + doc.entity_expansion_limit = 100 + assert_raise(RuntimeError.new("number of entity expansions exceeded, processing aborted.")) do doc.root.children.first.value end assert_equal(101, doc.entity_expansion_count) end def test_with_default_entity - xml = <<EOF + xml = <<XML <?xml version="1.0" encoding="UTF-8"?> <!DOCTYPE member [ <!ENTITY a "a"> @@ -115,68 +104,35 @@ def test_with_default_entity &a2; < </member> -EOF +XML - REXML::Security.entity_expansion_limit = 4 doc = REXML::Document.new(xml) + doc.entity_expansion_limit = 4 assert_equal("\na\na a\n<\n", doc.root.children.first.value) - REXML::Security.entity_expansion_limit = 3 + doc = REXML::Document.new(xml) - assert_raise(RuntimeError) do + doc.entity_expansion_limit = 3 + assert_raise(RuntimeError.new("number of entity expansions exceeded, processing aborted.")) do doc.root.children.first.value end end - end - class ParameterEntityTest < self - def test_have_value - xml = <<EOF -<!DOCTYPE root [ - <!ENTITY % a "BOOM.BOOM.BOOM.BOOM.BOOM.BOOM.BOOM.BOOM.BOOM."> - <!ENTITY % b "%a;%a;%a;%a;%a;%a;%a;%a;%a;%a;%a;%a;%a;%a;%a;"> - <!ENTITY % c "%b;%b;%b;%b;%b;%b;%b;%b;%b;%b;%b;%b;%b;%b;%b;"> - <!ENTITY % d "%c;%c;%c;%c;%c;%c;%c;%c;%c;%c;%c;%c;%c;%c;%c;"> - <!ENTITY % e "%d;%d;%d;%d;%d;%d;%d;%d;%d;%d;%d;%d;%d;%d;%d;"> - <!ENTITY % f "%e;%e;%e;%e;%e;%e;%e;%e;%e;%e;%e;%e;%e;%e;%e;"> - <!ENTITY % g "%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;"> - <!ENTITY test "test %g;"> -]> -<cd></cd> -EOF - - assert_raise(REXML::ParseException) do - REXML::Document.new(xml) - end - REXML::Security.entity_expansion_limit = 100 - assert_equal(100, REXML::Security.entity_expansion_limit) - assert_raise(REXML::ParseException) do - REXML::Document.new(xml) - end - end - - def test_empty_value - xml = <<EOF -<!DOCTYPE root [ - <!ENTITY % a ""> - <!ENTITY % b "%a;%a;%a;%a;%a;%a;%a;%a;%a;%a;%a;%a;%a;%a;%a;"> - <!ENTITY % c "%b;%b;%b;%b;%b;%b;%b;%b;%b;%b;%b;%b;%b;%b;%b;"> - <!ENTITY % d "%c;%c;%c;%c;%c;%c;%c;%c;%c;%c;%c;%c;%c;%c;%c;"> - <!ENTITY % e "%d;%d;%d;%d;%d;%d;%d;%d;%d;%d;%d;%d;%d;%d;%d;"> - <!ENTITY % f "%e;%e;%e;%e;%e;%e;%e;%e;%e;%e;%e;%e;%e;%e;%e;"> - <!ENTITY % g "%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;"> - <!ENTITY test "test %g;"> + def test_entity_expansion_text_limit + xml = <<-XML +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE member [ + <!ENTITY a "&b;&b;&b;"> + <!ENTITY b "&c;&d;&e;"> + <!ENTITY c "xxxxxxxxxx"> + <!ENTITY d "yyyyyyyyyy"> + <!ENTITY e "zzzzzzzzzz"> ]> -<cd></cd> -EOF +<member>&a;</member> + XML - assert_raise(REXML::ParseException) do - REXML::Document.new(xml) - end - REXML::Security.entity_expansion_limit = 100 - assert_equal(100, REXML::Security.entity_expansion_limit) - assert_raise(REXML::ParseException) do - REXML::Document.new(xml) - end + doc = REXML::Document.new(xml) + doc.entity_expansion_text_limit = 90 + assert_equal(90, doc.root.children.first.value.bytesize) end end end @@ -203,9 +159,45 @@ def test_xml_declaration_standalone assert_equal('no', doc.stand_alone?, bug2539) end + def test_each_recursive + xml_source = <<~XML + <?xml version="1.0" encoding="UTF-8" standalone="yes"?> + <root name="root"> + <x name="1_1"> + <x name="1_2"> + <x name="1_3" /> + </x> + </x> + <x name="2_1"> + <x name="2_2"> + <x name="2_3" /> + </x> + </x> + <!-- comment --> + <![CDATA[ cdata ]]> + </root> + XML + + expected_names = %w[ + root + 1_1 1_2 1_3 + 2_1 2_2 2_3 + ] + + document = REXML::Document.new(xml_source) + + # Node#each_recursive iterates elements only. + # This does not iterate XML declarations, comments, attributes, CDATA sections, etc. + actual_names = [] + document.each_recursive do |element| + actual_names << element.attributes["name"] + end + assert_equal(expected_names, actual_names) + end + class WriteTest < Test::Unit::TestCase def setup - @document = REXML::Document.new(<<-EOX) + @document = REXML::Document.new(<<-EOX.chomp) <?xml version="1.0" encoding="UTF-8"?> <message>Hello world!</message> EOX @@ -215,7 +207,7 @@ class ArgumentsTest < self def test_output output = "" @document.write(output) - assert_equal(<<-EOX, output) + assert_equal(<<-EOX.chomp, output) <?xml version='1.0' encoding='UTF-8'?> <message>Hello world!</message> EOX @@ -238,7 +230,7 @@ def test_transitive indent = 2 transitive = true @document.write(output, indent, transitive) - assert_equal(<<-EOX, output) + assert_equal(<<-EOX.chomp, output) <?xml version='1.0' encoding='UTF-8'?> <message >Hello world!</message @@ -267,7 +259,7 @@ def test_encoding japanese_text = "こんにちは" @document.root.text = japanese_text @document.write(output, indent, transitive, ie_hack, encoding) - assert_equal(<<-EOX.encode(encoding), output) + assert_equal(<<-EOX.chomp.encode(encoding), output) <?xml version='1.0' encoding='SHIFT_JIS'?> <message>#{japanese_text}</message> EOX @@ -278,7 +270,7 @@ class OptionsTest < self def test_output output = "" @document.write(:output => output) - assert_equal(<<-EOX, output) + assert_equal(<<-EOX.chomp, output) <?xml version='1.0' encoding='UTF-8'?> <message>Hello world!</message> EOX @@ -298,7 +290,7 @@ def test_indent def test_transitive output = "" @document.write(:output => output, :indent => 2, :transitive => true) - assert_equal(<<-EOX, output) + assert_equal(<<-EOX.chomp, output) <?xml version='1.0' encoding='UTF-8'?> <message >Hello world!</message @@ -320,7 +312,7 @@ def test_encoding japanese_text = "こんにちは" @document.root.text = japanese_text @document.write(:output => output, :encoding => encoding) - assert_equal(<<-EOX.encode(encoding), output) + assert_equal(<<-EOX.chomp.encode(encoding), output) <?xml version='1.0' encoding='SHIFT_JIS'?> <message>#{japanese_text}</message> EOX @@ -404,7 +396,7 @@ def test_utf_16 actual_xml = "" document.write(actual_xml) - expected_xml = <<-EOX.encode("UTF-16BE") + expected_xml = <<-EOX.chomp.encode("UTF-16BE") \ufeff<?xml version='1.0' encoding='UTF-16'?> <message>Hello world!</message> EOX diff --git a/test/rexml/test_element.rb b/test/rexml/test_element.rb index 82830b44e6..202168955c 100644 --- a/test/rexml/test_element.rb +++ b/test/rexml/test_element.rb @@ -1,8 +1,5 @@ # frozen_string_literal: false -require "test/unit/testcase" -require "rexml/document" - module REXMLTests class ElementTester < Test::Unit::TestCase def test_array_reference_string diff --git a/test/rexml/test_elements.rb b/test/rexml/test_elements.rb index a850e62209..c0f1b22007 100644 --- a/test/rexml/test_elements.rb +++ b/test/rexml/test_elements.rb @@ -1,6 +1,4 @@ # frozen_string_literal: false -require 'test/unit/testcase' -require 'rexml/document' module REXMLTests class ElementsTester < Test::Unit::TestCase @@ -115,5 +113,10 @@ def test_inject } assert_equal 6, r end + + def test_parent + doc = Document.new( "<a><b id='1'/><b id='2'/></a>" ) + assert_equal('a', doc.root.elements.parent.name) + end end end diff --git a/test/rexml/test_encoding.rb b/test/rexml/test_encoding.rb index 919db131f0..586f1f8b68 100644 --- a/test/rexml/test_encoding.rb +++ b/test/rexml/test_encoding.rb @@ -1,14 +1,15 @@ -# coding: binary +# coding: utf-8 # frozen_string_literal: false -require_relative "rexml_test_utils" - require 'rexml/source' require 'rexml/document' +require_relative "helper" + module REXMLTests class EncodingTester < Test::Unit::TestCase - include REXMLTestUtils + include Helper::Fixture + include Helper::Global include REXML def setup @@ -23,7 +24,7 @@ def test_encoded_in_encoded_out doc = Document.new( @encoded ) doc.write( out="" ) out.force_encoding(::Encoding::ASCII_8BIT) - assert_equal( @encoded, out ) + assert_equal( @encoded.b, out ) end # Given an encoded document, try to change the encoding and write it out @@ -33,10 +34,10 @@ def test_encoded_in_change_out assert_equal("UTF-8", doc.encoding) REXML::Formatters::Default.new.write( doc.root, out="" ) out.force_encoding(::Encoding::ASCII_8BIT) - assert_equal( @not_encoded, out ) + assert_equal( @not_encoded.b, out ) char = XPath.first( doc, "/a/b/text()" ).to_s char.force_encoding(::Encoding::ASCII_8BIT) - assert_equal( "ĉ", char ) + assert_equal( "ĉ".b, char ) end # * Given an encoded document, try to write it to a different encoding @@ -44,7 +45,7 @@ def test_encoded_in_different_out doc = Document.new( @encoded ) REXML::Formatters::Default.new.write( doc.root, Output.new( out="", "UTF-8" ) ) out.force_encoding(::Encoding::ASCII_8BIT) - assert_equal( @not_encoded, out ) + assert_equal( @not_encoded.b, out ) end # * Given a non-encoded document, change the encoding @@ -54,7 +55,7 @@ def test_in_change_out assert_equal("ISO-8859-3", doc.encoding) doc.write( out="" ) out.force_encoding(::Encoding::ASCII_8BIT) - assert_equal( @encoded, out ) + assert_equal( @encoded.b, out ) end # * Given a non-encoded document, write to a different encoding @@ -62,13 +63,13 @@ def test_in_different_out doc = Document.new( @not_encoded ) doc.write( Output.new( out="", "ISO-8859-3" ) ) out.force_encoding(::Encoding::ASCII_8BIT) - assert_equal( "<?xml version='1.0'?>#{@encoded_root}", out ) + assert_equal( "<?xml version='1.0'?>#{@encoded_root}".b, out ) end # * Given an encoded document, accessing text and attribute nodes # should provide UTF-8 text. def test_in_different_access - doc = Document.new <<-EOL + doc = Document.new <<~EOL <?xml version='1.0' encoding='ISO-8859-1'?> <a a="\xFF">\xFF</a> EOL @@ -80,7 +81,7 @@ def test_in_different_access def test_ticket_89 - doc = Document.new <<-EOL + doc = Document.new <<~EOL <?xml version="1.0" encoding="CP-1252" ?> <xml><foo></foo></xml> EOL @@ -97,7 +98,7 @@ def test_parse_utf16 end def test_parse_utf16_with_utf8_default_internal - EnvUtil.with_default_internal("UTF-8") do + with_default_internal("UTF-8") do utf16 = File.open(fixture_path("utf16.xml")) do |f| REXML::Document.new(f) end diff --git a/test/rexml/test_entity.rb b/test/rexml/test_entity.rb index 6dc6637074..89f8389445 100644 --- a/test/rexml/test_entity.rb +++ b/test/rexml/test_entity.rb @@ -1,7 +1,5 @@ # frozen_string_literal: false -require "test/unit/testcase" -require 'rexml/document' require 'rexml/entity' require 'rexml/source' @@ -61,8 +59,7 @@ def test_parse_entity def test_constructor one = [ %q{<!ENTITY % YN '"Yes"'>}, - %q{<!ENTITY % YN2 "Yes">}, - %q{<!ENTITY WhatHeSaid "He said %YN;">}, + %q{<!ENTITY WhatHeSaid 'He said "Yes"'>}, '<!ENTITY open-hatch SYSTEM "http://www.textuality.com/boilerplate/OpenHatch.xml">', '<!ENTITY open-hatch2 @@ -73,8 +70,7 @@ def test_constructor NDATA gif>' ] source = %q{<!DOCTYPE foo [ <!ENTITY % YN '"Yes"'> - <!ENTITY % YN2 "Yes"> - <!ENTITY WhatHeSaid "He said %YN;"> + <!ENTITY WhatHeSaid 'He said "Yes"'> <!ENTITY open-hatch SYSTEM "http://www.textuality.com/boilerplate/OpenHatch.xml"> <!ENTITY open-hatch2 @@ -106,6 +102,84 @@ def test_replace_entities assert_equal source, out end + def test_readers_with_reference + entity = REXML::Entity.new([:entitydecl, "a", "B", "%"]) + assert_equal([ + '<!ENTITY % a "B">', + "a", + "B", + "B", + "B", + ], + [ + entity.to_s, + entity.name, + entity.value, + entity.normalized, + entity.unnormalized, + ]) + end + + def test_readers_without_reference + entity = REXML::Entity.new([:entitydecl, "a", "&b;"]) + assert_equal([ + '<!ENTITY a "&b;">', + "a", + "&b;", + "&b;", + "&b;", + ], + [ + entity.to_s, + entity.name, + entity.value, + entity.normalized, + entity.unnormalized, + ]) + end + + def test_readers_with_nested_references + doctype = REXML::DocType.new('root') + doctype.add(REXML::Entity.new([:entitydecl, "a", "&b;"])) + doctype.add(REXML::Entity.new([:entitydecl, "b", "X"])) + assert_equal([ + "a", + "&b;", + "&b;", + "X", + "b", + "X", + "X", + "X", + ], + [ + doctype.entities["a"].name, + doctype.entities["a"].value, + doctype.entities["a"].normalized, + doctype.entities["a"].unnormalized, + doctype.entities["b"].name, + doctype.entities["b"].value, + doctype.entities["b"].normalized, + doctype.entities["b"].unnormalized, + ]) + end + + def test_parameter_entity_reference_forbidden_by_internal_subset_in_parser + source = '<!DOCTYPE root [ <!ENTITY % a "B" > <!ENTITY c "%a;" > ]><root/>' + parser = REXML::Parsers::BaseParser.new(source) + exception = assert_raise(REXML::ParseException) do + while parser.has_next? + parser.pull + end + end + assert_equal(<<-DETAIL, exception.to_s) +Parameter entity references forbidden in internal subset: "%a;" +Line: 1 +Position: 54 +Last 80 unconsumed characters: + DETAIL + end + def test_entity_string_limit template = '<!DOCTYPE bomb [ <!ENTITY a "^" > ]> <bomb>$</bomb>' len = 5120 # 5k per entity @@ -124,22 +198,6 @@ def test_entity_string_limit end end - def test_entity_string_limit_for_parameter_entity - template = '<!DOCTYPE bomb [ <!ENTITY % a "^" > <!ENTITY bomb "$" > ]><root/>' - len = 5120 # 5k per entity - template.sub!(/\^/, "B" * len) - - # 10k is OK - entities = '%a;' * 2 # 5k entity * 2 = 10k - REXML::Document.new(template.sub(/\$/, entities)) - - # above 10k explodes - entities = '%a;' * 3 # 5k entity * 2 = 15k - assert_raise(REXML::ParseException) do - REXML::Document.new(template.sub(/\$/, entities)) - end - end - def test_raw source = '<!DOCTYPE foo [ <!ENTITY ent "replace"> @@ -163,7 +221,7 @@ def test_lazy_evaluation def test_entity_replacement source = %q{<!DOCTYPE foo [ <!ENTITY % YN '"Yes"'> - <!ENTITY WhatHeSaid "He said %YN;">]> + <!ENTITY WhatHeSaid 'He said "Yes"'>]> <a>&WhatHeSaid;</a>} d = REXML::Document.new( source ) diff --git a/test/rexml/test_instruction.rb b/test/rexml/test_instruction.rb new file mode 100644 index 0000000000..5451e367b8 --- /dev/null +++ b/test/rexml/test_instruction.rb @@ -0,0 +1,12 @@ +module REXMLTests + class InstructionTest < Test::Unit::TestCase + def test_target_nil + error = assert_raise(ArgumentError) do + REXML::Instruction.new(nil) + end + assert_equal("processing instruction target must be String or " + + "REXML::Instruction: <nil>", + error.message) + end + end +end diff --git a/test/rexml/test_jaxen.rb b/test/rexml/test_jaxen.rb index 9cd7bee8c2..38d2e959c6 100644 --- a/test/rexml/test_jaxen.rb +++ b/test/rexml/test_jaxen.rb @@ -1,5 +1,4 @@ # frozen_string_literal: false -require_relative 'rexml_test_utils' require "rexml/document" require "rexml/xpath" @@ -7,124 +6,127 @@ # Harness to test REXML's capabilities against the test suite from Jaxen # ryan.a.cox@gmail.com +require_relative "helper" + module REXMLTests class JaxenTester < Test::Unit::TestCase - include REXMLTestUtils + include Helper::Fixture include REXML - def test_axis ; test("axis") ; end - def test_basic ; test("basic") ; end - def test_basicupdate ; test("basicupdate") ; end - def test_contents ; test("contents") ; end - def test_defaultNamespace ; test("defaultNamespace") ; end - def test_fibo ; test("fibo") ; end - def test_id ; test("id") ; end - def test_jaxen24 ; test("jaxen24") ; end - def test_lang ; test("lang") ; end - def test_message ; test("message") ; end - def test_moreover ; test("moreover") ; end - def test_much_ado ; test("much_ado") ; end - def test_namespaces ; test("namespaces") ; end - def test_nitf ; test("nitf") ; end - def test_numbers ; test("numbers") ; end - def test_pi ; test("pi") ; end - def test_pi2 ; test("pi2") ; end - def test_simple ; test("simple") ; end - def test_testNamespaces ; test("testNamespaces") ; end - def test_text ; test("text") ; end - def test_underscore ; test("underscore") ; end - def test_web ; test("web") ; end - def test_web2 ; test("web2") ; end + def test_axis ; process_test_case("axis") ; end + def test_basic ; process_test_case("basic") ; end + def test_basicupdate ; process_test_case("basicupdate") ; end + def test_contents ; process_test_case("contents") ; end + def test_defaultNamespace ; process_test_case("defaultNamespace") ; end + def test_fibo ; process_test_case("fibo") ; end + def test_id ; process_test_case("id") ; end + def test_jaxen24 ; process_test_case("jaxen24") ; end + def test_lang ; process_test_case("lang") ; end + # document() function for XSLT isn't supported + def _test_message ; process_test_case("message") ; end + def test_moreover ; process_test_case("moreover") ; end + def test_much_ado ; process_test_case("much_ado") ; end + def test_namespaces ; process_test_case("namespaces") ; end + def test_nitf ; process_test_case("nitf") ; end + # Exception should be considered + def _test_numbers ; process_test_case("numbers") ; end + def test_pi ; process_test_case("pi") ; end + def test_pi2 ; process_test_case("pi2") ; end + def test_simple ; process_test_case("simple") ; end + # TODO: namespace node is needed + def _test_testNamespaces ; process_test_case("testNamespaces") ; end + # document() function for XSLT isn't supported + def _test_text ; process_test_case("text") ; end + def test_underscore ; process_test_case("underscore") ; end + def _test_web ; process_test_case("web") ; end + def test_web2 ; process_test_case("web2") ; end private - def test( fname ) -# Dir.entries( xml_dir ).each { |fname| -# if fname =~ /\.xml$/ - doc = File.open(fixture_path(fname+".xml")) do |file| - Document.new(file) - end - XPath.each( doc, "/tests/document" ) {|e| handleDocument(e)} -# end -# } + def process_test_case(name) + xml_path = "#{name}.xml" + doc = File.open(fixture_path(xml_path)) do |file| + Document.new(file) + end + test_doc = File.open(fixture_path("test/tests.xml")) do |file| + Document.new(file) + end + XPath.each(test_doc, + "/tests/document[@url='xml/#{xml_path}']/context") do |context| + process_context(doc, context) + end end # processes a tests/document/context node - def handleContext( testDoc, ctxElement) - testCtx = XPath.match( testDoc, ctxElement.attributes["select"] )[0] - namespaces = {} - if testCtx.class == Element - testCtx.prefixes.each { |pre| handleNamespace( testCtx, pre, namespaces ) } - end + def process_context(doc, context) + test_context = XPath.match(doc, context.attributes["select"]) + namespaces = context.namespaces + namespaces.delete("var") + namespaces = nil if namespaces.empty? variables = {} - XPath.each( ctxElement, "@*[namespace-uri() = 'http://jaxen.org/test-harness/var']") { |attrib| handleVariable(testCtx, variables, attrib) } - XPath.each( ctxElement, "valueOf") { |e| handleValueOf(testCtx, variables, namespaces, e) } - XPath.each( ctxElement, "test[not(@exception) or (@exception != 'true') ]") { |e| handleNominalTest(testCtx,variables, namespaces, e) } - XPath.each( ctxElement, "test[@exception = 'true']") { |e| handleExceptionalTest(testCtx,variables, namespaces, e) } + var_namespace = "http://jaxen.org/test-harness/var" + XPath.each(context, + "@*[namespace-uri() = '#{var_namespace}']") do |attribute| + variables[attribute.name] = attribute.value + end + XPath.each(context, "valueOf") do |value| + process_value_of(test_context, variables, namespaces, value) + end + XPath.each(context, + "test[not(@exception) or (@exception != 'true')]") do |test| + process_nominal_test(test_context, variables, namespaces, test) + end + XPath.each(context, + "test[@exception = 'true']") do |test| + process_exceptional_test(test_context, variables, namespaces, test) + end end # processes a tests/document/context/valueOf or tests/document/context/test/valueOf node - def handleValueOf(ctx,variables, namespaces, valueOfElement) - expected = valueOfElement.text - got = XPath.match( ctx, valueOfElement.attributes["select"], namespaces, variables )[0] - assert_true( (got.nil? && expected.nil?) || !got.nil? ) - case got.class - when Element - assert_equal( got.class, Element ) - when Attribute, Text, Comment, TrueClass, FalseClass - assert_equal( expected, got.to_s ) - when Instruction - assert_equal( expected, got.content ) - when Integer - assert_equal( exected.to_f, got ) - when String - # normalize values for comparison - got = "" if got == nil or got == "" - expected = "" if expected == nil or expected == "" - assert_equal( expected, got ) - else - assert_fail( "Wassup?" ) - end - end + def process_value_of(context, variables, namespaces, value_of) + expected = value_of.text + xpath = value_of.attributes["select"] + matched = XPath.match(context, xpath, namespaces, variables, strict: true) + message = user_message(context, xpath, matched) + assert_equal(expected || "", + REXML::Functions.string(matched), + message) + end # processes a tests/document/context/test node ( where @exception is false or doesn't exist ) - def handleNominalTest(ctx, variables, namespaces, testElement) - expected = testElement.attributes["count"] - got = XPath.match( ctx, testElement.attributes["select"], namespaces, variables ) + def process_nominal_test(context, variables, namespaces, test) + xpath = test.attributes["select"] + matched = XPath.match(context, xpath, namespaces, variables, strict: true) # might be a test with no count attribute, but nested valueOf elements - assert( expected == got.size.to_s ) if !expected.nil? + expected = test.attributes["count"] + if expected + assert_equal(Integer(expected, 10), + matched.size, + user_message(context, xpath, matched)) + end - XPath.each( testElement, "valueOf") { |e| - handleValueOf(got, variables, namespaces, e) - } + XPath.each(test, "valueOf") do |value_of| + process_value_of(matched, variables, namespaces, value_of) + end end # processes a tests/document/context/test node ( where @exception is true ) - def handleExceptionalTest(ctx, variables, namespaces, testElement) - assert_raise( Exception ) { - XPath.match( ctx, testElement.attributes["select"], namespaces, variables ) - } - end - - # processes a tests/document node - def handleDocument(docElement) - puts "- Processing document: #{docElement.attributes['url']}" - testFile = File.new( docElement.attributes["url"] ) - testDoc = Document.new testFile - XPath.each( docElement, "context") { |e| handleContext(testDoc, e) } - end - - # processes a variable definition in a namespace like <test var:foo="bar"> - def handleVariable( ctx, variables, attrib ) - puts "--- Found attribute: #{attrib.name}" - variables[attrib.name] = attrib.value + def process_exceptional_test(context, variables, namespaces, test) + xpath = test.attributes["select"] + assert_raise(REXML::ParseException) do + XPath.match(context, xpath, namespaces, variables, strict: true) + end end - # processes a namespace definition like <test xmlns:foo="fiz:bang:bam"> - def handleNamespace( ctx, prefix, namespaces ) - puts "--- Found namespace: #{prefix}" - namespaces[prefix] = ctx.namespaces[prefix] + def user_message(context, xpath, matched) + message = "" + context.each_with_index do |node, i| + message << "Node#{i}:\n" + message << "#{node}\n" + end + message << "XPath: <#{xpath}>\n" + message << "Matched <#{matched}>" + message end - end end diff --git a/test/rexml/test_light.rb b/test/rexml/test_light.rb index 99bd9cac3f..fd39948daa 100644 --- a/test/rexml/test_light.rb +++ b/test/rexml/test_light.rb @@ -1,18 +1,20 @@ # frozen_string_literal: false -require_relative "rexml_test_utils" + require "rexml/light/node" require "rexml/parsers/lightparser" +require_relative "helper" + module REXMLTests class LightTester < Test::Unit::TestCase - include REXMLTestUtils + include Helper::Fixture include REXML::Light def test_parse_large - xml_string = fixture_path("documentation.xml") + xml_string = File.read(fixture_path("documentation.xml")) parser = REXML::Parsers::LightParser.new(xml_string) tag, content = parser.parse - assert_equal([:document, :text], [tag, content.first]) + assert_equal([:document, :xmldecl], [tag, content.first]) end # FIXME INCOMPLETE @@ -62,7 +64,7 @@ def test_access_child_elements assert_equal( 'c', a[1].name ) end - def test_itterate_over_children + def test_iterate_over_children foo = make_small_document ctr = 0 foo[0].each { ctr += 1 } diff --git a/test/rexml/test_lightparser.rb b/test/rexml/test_lightparser.rb index 1aeac072dd..507e067b17 100644 --- a/test/rexml/test_lightparser.rb +++ b/test/rexml/test_lightparser.rb @@ -1,10 +1,12 @@ # frozen_string_literal: false -require_relative 'rexml_test_utils' + require 'rexml/parsers/lightparser' +require_relative "helper" + module REXMLTests class LightParserTester < Test::Unit::TestCase - include REXMLTestUtils + include Helper::Fixture include REXML def test_parsing File.open(fixture_path("documentation.xml")) do |f| diff --git a/test/rexml/test_listener.rb b/test/rexml/test_listener.rb index 322d368be8..ae834a1fb6 100644 --- a/test/rexml/test_listener.rb +++ b/test/rexml/test_listener.rb @@ -1,14 +1,13 @@ # coding: binary # frozen_string_literal: false -require_relative 'rexml_test_utils' - -require 'rexml/document' require 'rexml/streamlistener' +require_relative "helper" + module REXMLTests class BaseTester < Test::Unit::TestCase - include REXMLTestUtils + include Helper::Fixture def test_empty return unless defined? @listener # Empty. diff --git a/test/rexml/test_martin_fowler.rb b/test/rexml/test_martin_fowler.rb index da685a80ec..ce27d72576 100644 --- a/test/rexml/test_martin_fowler.rb +++ b/test/rexml/test_martin_fowler.rb @@ -1,9 +1,7 @@ # frozen_string_literal: false -require 'test/unit' -require 'rexml/document' module REXMLTests - class OrderTester < Test::Unit::TestCase + class OrderTesterMF < Test::Unit::TestCase DOC = <<END <paper> <title>Remove this element and figs order differently</title> @@ -18,12 +16,12 @@ class OrderTester < Test::Unit::TestCase </paper> END - def initialize n + def setup @doc = REXML::Document.new(DOC) @figs = REXML::XPath.match(@doc,'//figure') @names = @figs.collect {|f| f.attributes['src']} - super end + def test_fig1 assert_equal 'fig1', @figs[0].attributes['src'] end diff --git a/test/rexml/test_namespace.rb b/test/rexml/test_namespace.rb index 90e1d36945..57a0a3e5ad 100644 --- a/test/rexml/test_namespace.rb +++ b/test/rexml/test_namespace.rb @@ -1,11 +1,10 @@ # frozen_string_literal: false -require_relative "rexml_test_utils" -require "rexml/document" +require_relative "helper" module REXMLTests class TestNamespace < Test::Unit::TestCase - include REXMLTestUtils + include Helper::Fixture include REXML def setup diff --git a/test/rexml/test_order.rb b/test/rexml/test_order.rb index 807d9faa96..6dc0204691 100644 --- a/test/rexml/test_order.rb +++ b/test/rexml/test_order.rb @@ -1,14 +1,15 @@ # frozen_string_literal: false -require_relative 'rexml_test_utils' -require 'rexml/document' + begin require 'zlib' rescue LoadError end +require_relative "helper" + module REXMLTests class OrderTester < Test::Unit::TestCase - include REXMLTestUtils + include Helper::Fixture TESTDOC = <<END <a> diff --git a/test/rexml/test_preceding_sibling.rb b/test/rexml/test_preceding_sibling.rb index d89a1e1c90..7e661eb00c 100644 --- a/test/rexml/test_preceding_sibling.rb +++ b/test/rexml/test_preceding_sibling.rb @@ -1,7 +1,5 @@ # frozen_string_literal: false # ISSUE 32 -require 'test/unit' -require 'rexml/document' module REXMLTests # daz - for report by Dan Kohn in: diff --git a/test/rexml/test_pullparser.rb b/test/rexml/test_pullparser.rb index 31b5b74bd6..bdf8be17fa 100644 --- a/test/rexml/test_pullparser.rb +++ b/test/rexml/test_pullparser.rb @@ -1,5 +1,4 @@ # frozen_string_literal: false -require "test/unit/testcase" require 'rexml/parsers/pullparser' @@ -63,6 +62,63 @@ def test_entity_replacement end end + def test_character_references + source = '<root><a>A</a><b>B</b></root>' + parser = REXML::Parsers::PullParser.new( source ) + + events = {} + element_name = '' + while parser.has_next? + event = parser.pull + case event.event_type + when :start_element + element_name = event[0] + when :text + events[element_name] = event[1] + end + end + + assert_equal('A', events['a']) + assert_equal("B", events['b']) + end + + def test_text_entity_references + source = '<root><a><P> <I> <B> Text </B> </I></a></root>' + parser = REXML::Parsers::PullParser.new( source ) + + events = [] + while parser.has_next? + event = parser.pull + case event.event_type + when :text + events << event[1] + end + end + + assert_equal(["<P> <I> <B> Text </B> </I>"], events) + end + + def test_text_content_with_line_breaks + source = "<root><a>A</a><b>B\n</b><c>C\r\n</c></root>" + parser = REXML::Parsers::PullParser.new( source ) + + events = {} + element_name = '' + while parser.has_next? + event = parser.pull + case event.event_type + when :start_element + element_name = event[0] + when :text + events[element_name] = event[1] + end + end + + assert_equal('A', events['a']) + assert_equal("B\n", events['b']) + assert_equal("C\n", events['c']) + end + def test_peek_unshift source = "<a><b/></a>" REXML::Parsers::PullParser.new(source) @@ -99,5 +155,152 @@ def test_peek end assert_equal( 0, names.length ) end + + class EntityExpansionLimitTest < Test::Unit::TestCase + class GeneralEntityTest < self + def test_have_value + source = <<-XML +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE member [ + <!ENTITY a "&b;&b;&b;&b;&b;&b;&b;&b;&b;&b;"> + <!ENTITY b "&c;&c;&c;&c;&c;&c;&c;&c;&c;&c;"> + <!ENTITY c "&d;&d;&d;&d;&d;&d;&d;&d;&d;&d;"> + <!ENTITY d "&e;&e;&e;&e;&e;&e;&e;&e;&e;&e;"> + <!ENTITY e "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"> +]> +<member> +&a; +</member> + XML + + parser = REXML::Parsers::PullParser.new(source) + assert_raise(RuntimeError.new("entity expansion has grown too large")) do + while parser.has_next? + parser.pull + end + end + end + + def test_empty_value + source = <<-XML +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE member [ + <!ENTITY a "&b;&b;&b;&b;&b;&b;&b;&b;&b;&b;"> + <!ENTITY b "&c;&c;&c;&c;&c;&c;&c;&c;&c;&c;"> + <!ENTITY c "&d;&d;&d;&d;&d;&d;&d;&d;&d;&d;"> + <!ENTITY d "&e;&e;&e;&e;&e;&e;&e;&e;&e;&e;"> + <!ENTITY e ""> +]> +<member> +&a; +</member> + XML + + parser = REXML::Parsers::PullParser.new(source) + parser.entity_expansion_limit = 100000 + while parser.has_next? + parser.pull + end + assert_equal(11111, parser.entity_expansion_count) + + parser = REXML::Parsers::PullParser.new(source) + assert_raise(RuntimeError.new("number of entity expansions exceeded, processing aborted.")) do + while parser.has_next? + parser.pull + end + end + assert do + parser.entity_expansion_count > REXML::Security.entity_expansion_limit + end + end + + def test_with_default_entity + source = <<-XML +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE member [ + <!ENTITY a "a"> + <!ENTITY a2 "&a; &a;"> +]> +<member> +&a; +&a2; +< +</member> + XML + + parser = REXML::Parsers::PullParser.new(source) + parser.entity_expansion_limit = 4 + while parser.has_next? + parser.pull + end + + parser = REXML::Parsers::PullParser.new(source) + parser.entity_expansion_limit = 3 + assert_raise(RuntimeError.new("number of entity expansions exceeded, processing aborted.")) do + while parser.has_next? + parser.pull + end + end + end + + def test_with_only_default_entities + member_value = "<p>#{'A' * REXML::Security.entity_expansion_text_limit}</p>" + source = <<-XML +<?xml version="1.0" encoding="UTF-8"?> +<member> +#{member_value} +</member> + XML + + parser = REXML::Parsers::PullParser.new(source) + events = {} + element_name = '' + while parser.has_next? + event = parser.pull + case event.event_type + when :start_element + element_name = event[0] + when :text + events[element_name] = event[1] + end + end + + expected_value = "<p>#{'A' * REXML::Security.entity_expansion_text_limit}</p>" + assert_equal(expected_value, events['member'].strip) + assert_equal(0, parser.entity_expansion_count) + assert do + events['member'].bytesize > REXML::Security.entity_expansion_text_limit + end + end + + def test_entity_expansion_text_limit + source = <<-XML +<!DOCTYPE member [ + <!ENTITY a "&b;&b;&b;"> + <!ENTITY b "&c;&d;&e;"> + <!ENTITY c "xxxxxxxxxx"> + <!ENTITY d "yyyyyyyyyy"> + <!ENTITY e "zzzzzzzzzz"> +]> +<member>&a;</member> + XML + + parser = REXML::Parsers::PullParser.new(source) + parser.entity_expansion_text_limit = 90 + events = {} + element_name = '' + while parser.has_next? + event = parser.pull + case event.event_type + when :start_element + element_name = event[0] + when :text + events[element_name] = event[1] + end + end + assert_equal(90, events['member'].size) + end + end + end end end diff --git a/test/rexml/test_rexml_issuezilla.rb b/test/rexml/test_rexml_issuezilla.rb index 1c54c9d53d..fb9f75de9b 100644 --- a/test/rexml/test_rexml_issuezilla.rb +++ b/test/rexml/test_rexml_issuezilla.rb @@ -1,10 +1,10 @@ # frozen_string_literal: false -require_relative 'rexml_test_utils' -require 'rexml/document' + +require_relative "helper" module REXMLTests class TestIssuezillaParsing < Test::Unit::TestCase - include REXMLTestUtils + include Helper::Fixture def test_rexml doc = File.open(fixture_path("ofbiz-issues-full-177.xml")) do |f| REXML::Document.new(f) diff --git a/test/rexml/test_sax.rb b/test/rexml/test_sax.rb index 00539f0d4c..fa1d1cb612 100644 --- a/test/rexml/test_sax.rb +++ b/test/rexml/test_sax.rb @@ -1,12 +1,13 @@ # frozen_string_literal: false -require_relative "rexml_test_utils" + require 'rexml/sax2listener' require 'rexml/parsers/sax2parser' -require 'rexml/document' + +require_relative "helper" module REXMLTests class SAX2Tester < Test::Unit::TestCase - include REXMLTestUtils + include Helper::Fixture include REXML def test_characters d = Document.new( "<A>@blah@</A>" ) @@ -32,6 +33,17 @@ def test_entity_replacement assert_equal '--1234--', results[1] end + def test_characters_predefined_entities + source = '<root><a><P> <I> <B> Text </B> </I></a></root>' + + sax = Parsers::SAX2Parser.new( source ) + results = [] + sax.listen(:characters) {|x| results << x } + sax.parse + + assert_equal(["<P> <I> <B> Text </B> </I>"], results) + end + def test_sax2 File.open(fixture_path("documentation.xml")) do |f| parser = Parsers::SAX2Parser.new( f ) @@ -89,6 +101,177 @@ def test_sax2 end end + def test_without_namespace + xml = <<-XML +<root > + <a att1='1' att2='2' att3='<'> + <b /> + </a> +</root> + XML + + parser = REXML::Parsers::SAX2Parser.new(xml) + elements = [] + parser.listen(:start_element) do |uri, localname, qname, attrs| + elements << [uri, localname, qname, attrs] + end + parser.parse + assert_equal([ + [nil, "root", "root", {}], + [nil, "a", "a", {"att1"=>"1", "att2"=>"2", "att3"=>"<"}], + [nil, "b", "b", {}] + ], elements) + end + + def test_with_namespace + xml = <<-XML +<root xmlns="http://example.org/default" + xmlns:foo="http://example.org/foo" + xmlns:bar="http://example.org/bar"> + <a foo:att='1' bar:att='2' att='<'> + <bar:b /> + </a> +</root> + XML + + parser = REXML::Parsers::SAX2Parser.new(xml) + elements = [] + parser.listen(:start_element) do |uri, localname, qname, attrs| + elements << [uri, localname, qname, attrs] + end + parser.parse + assert_equal([ + ["http://example.org/default", "root", "root", {"xmlns"=>"http://example.org/default", "xmlns:bar"=>"http://example.org/bar", "xmlns:foo"=>"http://example.org/foo"}], + ["http://example.org/default", "a", "a", {"att"=>"<", "bar:att"=>"2", "foo:att"=>"1"}], + ["http://example.org/bar", "b", "bar:b", {}] + ], elements) + end + + class EntityExpansionLimitTest < Test::Unit::TestCase + class GeneralEntityTest < self + def test_have_value + source = <<-XML +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE member [ + <!ENTITY a "&b;&b;&b;&b;&b;&b;&b;&b;&b;&b;"> + <!ENTITY b "&c;&c;&c;&c;&c;&c;&c;&c;&c;&c;"> + <!ENTITY c "&d;&d;&d;&d;&d;&d;&d;&d;&d;&d;"> + <!ENTITY d "&e;&e;&e;&e;&e;&e;&e;&e;&e;&e;"> + <!ENTITY e "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"> +]> +<member> +&a; +</member> + XML + + sax = REXML::Parsers::SAX2Parser.new(source) + assert_raise(RuntimeError.new("entity expansion has grown too large")) do + sax.parse + end + end + + def test_empty_value + source = <<-XML +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE member [ + <!ENTITY a "&b;&b;&b;&b;&b;&b;&b;&b;&b;&b;"> + <!ENTITY b "&c;&c;&c;&c;&c;&c;&c;&c;&c;&c;"> + <!ENTITY c "&d;&d;&d;&d;&d;&d;&d;&d;&d;&d;"> + <!ENTITY d "&e;&e;&e;&e;&e;&e;&e;&e;&e;&e;"> + <!ENTITY e ""> +]> +<member> +&a; +</member> + XML + + sax = REXML::Parsers::SAX2Parser.new(source) + sax.entity_expansion_limit = 100000 + sax.parse + assert_equal(11111, sax.entity_expansion_count) + + sax = REXML::Parsers::SAX2Parser.new(source) + assert_raise(RuntimeError.new("number of entity expansions exceeded, processing aborted.")) do + sax.parse + end + assert do + sax.entity_expansion_count > REXML::Security.entity_expansion_limit + end + end + + def test_with_default_entity + source = <<-XML +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE member [ + <!ENTITY a "a"> + <!ENTITY a2 "&a; &a;"> +]> +<member> +&a; +&a2; +< +</member> + XML + + sax = REXML::Parsers::SAX2Parser.new(source) + sax.entity_expansion_limit = 4 + sax.parse + + sax = REXML::Parsers::SAX2Parser.new(source) + sax.entity_expansion_limit = 3 + assert_raise(RuntimeError.new("number of entity expansions exceeded, processing aborted.")) do + sax.parse + end + end + + def test_with_only_default_entities + member_value = "<p>#{'A' * REXML::Security.entity_expansion_text_limit}</p>" + source = <<-XML +<?xml version="1.0" encoding="UTF-8"?> +<member> +#{member_value} +</member> + XML + + sax = REXML::Parsers::SAX2Parser.new(source) + text_value = nil + sax.listen(:characters, ["member"]) do |text| + text_value = text + end + sax.parse + + expected_value = "<p>#{'A' * REXML::Security.entity_expansion_text_limit}</p>" + assert_equal(expected_value, text_value.strip) + assert_equal(0, sax.entity_expansion_count) + assert do + text_value.bytesize > REXML::Security.entity_expansion_text_limit + end + end + + def test_entity_expansion_text_limit + source = <<-XML +<!DOCTYPE member [ + <!ENTITY a "&b;&b;&b;"> + <!ENTITY b "&c;&d;&e;"> + <!ENTITY c "xxxxxxxxxx"> + <!ENTITY d "yyyyyyyyyy"> + <!ENTITY e "zzzzzzzzzz"> +]> +<member>&a;</member> + XML + + sax = REXML::Parsers::SAX2Parser.new(source) + sax.entity_expansion_text_limit = 90 + text_size = nil + sax.listen(:characters, ["member"]) do |text| + text_size = text.size + end + sax.parse + assert_equal(90, text_size) + end + end + end + # used by test_simple_doctype_listener # submitted by Jeff Barczewski class SimpleDoctypeListener @@ -110,7 +293,7 @@ def doctype(name, pub_sys, long_name, uri) # test simple non-entity doctype in sax listener # submitted by Jeff Barczewski def test_simple_doctype_listener - xml = <<-END + xml = <<~END <?xml version="1.0"?> <!DOCTYPE greeting PUBLIC "Hello Greeting DTD" "http://foo/hello.dtd"> <greeting>Hello, world!</greeting> @@ -141,8 +324,8 @@ def test_simple_doctype_listener # test doctype with missing name, should throw ParseException # submitted by Jeff Barczewseki - def test_doctype_with_mising_name_throws_exception - xml = <<-END + def test_doctype_with_missing_name_throws_exception + xml = <<~END <?xml version="1.0"?> <!DOCTYPE > <greeting>Hello, world!</greeting> diff --git a/test/rexml/test_stream.rb b/test/rexml/test_stream.rb index d7ceedc70e..7917760ae6 100644 --- a/test/rexml/test_stream.rb +++ b/test/rexml/test_stream.rb @@ -1,6 +1,5 @@ # frozen_string_literal: false -require "test/unit/testcase" -require "rexml/document" + require 'rexml/streamlistener' require 'stringio' @@ -15,8 +14,8 @@ class StreamTester < Test::Unit::TestCase def test_listener data = %Q{<session1 user="han" password="rootWeiler" />\n<session2 user="han" password="rootWeiler" />} - b = RequestReader.new( data ) - b = RequestReader.new( data ) + RequestReader.new( data ) + RequestReader.new( data ) end def test_ticket_49 @@ -88,8 +87,175 @@ def entity(content) assert_equal(["ISOLat2"], listener.entities) end + + def test_entity_replacement + source = <<-XML +<!DOCTYPE foo [ + <!ENTITY la "1234"> + <!ENTITY lala "--&la;--"> + <!ENTITY lalal "&la;&la;"> +]><a><la>&la;</la><lala>&lala;</lala></a> + XML + + listener = MyListener.new + class << listener + attr_accessor :text_values + def text(text) + @text_values << text + end + end + listener.text_values = [] + REXML::Document.parse_stream(source, listener) + assert_equal(["1234", "--1234--"], listener.text_values) + end + + def test_characters_predefined_entities + source = '<root><a><P> <I> <B> Text </B> </I></a></root>' + + listener = MyListener.new + class << listener + attr_accessor :text_value + def text(text) + @text_value << text + end + end + listener.text_value = "" + REXML::Document.parse_stream(source, listener) + assert_equal("<P> <I> <B> Text </B> </I>", listener.text_value) + end end + class EntityExpansionLimitTest < Test::Unit::TestCase + def test_have_value + source = <<-XML +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE member [ + <!ENTITY a "&b;&b;&b;&b;&b;&b;&b;&b;&b;&b;"> + <!ENTITY b "&c;&c;&c;&c;&c;&c;&c;&c;&c;&c;"> + <!ENTITY c "&d;&d;&d;&d;&d;&d;&d;&d;&d;&d;"> + <!ENTITY d "&e;&e;&e;&e;&e;&e;&e;&e;&e;&e;"> + <!ENTITY e "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"> +]> +<member> +&a; +</member> + XML + + assert_raise(RuntimeError.new("entity expansion has grown too large")) do + REXML::Document.parse_stream(source, MyListener.new) + end + end + + def test_empty_value + source = <<-XML +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE member [ + <!ENTITY a "&b;&b;&b;&b;&b;&b;&b;&b;&b;&b;"> + <!ENTITY b "&c;&c;&c;&c;&c;&c;&c;&c;&c;&c;"> + <!ENTITY c "&d;&d;&d;&d;&d;&d;&d;&d;&d;&d;"> + <!ENTITY d "&e;&e;&e;&e;&e;&e;&e;&e;&e;&e;"> + <!ENTITY e ""> +]> +<member> +&a; +</member> + XML + + listener = MyListener.new + parser = REXML::Parsers::StreamParser.new( source, listener ) + parser.entity_expansion_limit = 100000 + parser.parse + assert_equal(11111, parser.entity_expansion_count) + + parser = REXML::Parsers::StreamParser.new( source, listener ) + assert_raise(RuntimeError.new("number of entity expansions exceeded, processing aborted.")) do + parser.parse + end + assert do + parser.entity_expansion_count > REXML::Security.entity_expansion_limit + end + end + + def test_with_default_entity + source = <<-XML +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE member [ + <!ENTITY a "a"> + <!ENTITY a2 "&a; &a;"> +]> +<member> +&a; +&a2; +< +</member> + XML + + listener = MyListener.new + parser = REXML::Parsers::StreamParser.new( source, listener ) + parser.entity_expansion_limit = 4 + parser.parse + + parser = REXML::Parsers::StreamParser.new( source, listener ) + parser.entity_expansion_limit = 3 + assert_raise(RuntimeError.new("number of entity expansions exceeded, processing aborted.")) do + parser.parse + end + end + + def test_with_only_default_entities + member_value = "<p>#{'A' * REXML::Security.entity_expansion_text_limit}</p>" + source = <<-XML +<?xml version="1.0" encoding="UTF-8"?> +<member> +#{member_value} +</member> + XML + + listener = MyListener.new + class << listener + attr_accessor :text_value + def text(text) + @text_value << text + end + end + listener.text_value = "" + parser = REXML::Parsers::StreamParser.new( source, listener ) + parser.parse + + expected_value = "<p>#{'A' * REXML::Security.entity_expansion_text_limit}</p>" + assert_equal(expected_value, listener.text_value.strip) + assert_equal(0, parser.entity_expansion_count) + assert do + listener.text_value.bytesize > REXML::Security.entity_expansion_text_limit + end + end + + def test_entity_expansion_text_limit + source = <<-XML +<!DOCTYPE member [ + <!ENTITY a "&b;&b;&b;"> + <!ENTITY b "&c;&d;&e;"> + <!ENTITY c "xxxxxxxxxx"> + <!ENTITY d "yyyyyyyyyy"> + <!ENTITY e "zzzzzzzzzz"> +]> +<member>&a;</member> + XML + + listener = MyListener.new + class << listener + attr_accessor :text_value + def text(text) + @text_value << text + end + end + listener.text_value = "" + parser = REXML::Parsers::StreamParser.new( source, listener ) + parser.entity_expansion_text_limit = 90 + parser.parse + assert_equal(90, listener.text_value.size) + end + end # For test_listener class RequestReader diff --git a/test/rexml/test_text.rb b/test/rexml/test_text.rb index 3f8036eee3..bae216562e 100644 --- a/test/rexml/test_text.rb +++ b/test/rexml/test_text.rb @@ -1,10 +1,55 @@ # frozen_string_literal: false -require "rexml/text" module REXMLTests class TextTester < Test::Unit::TestCase include REXML + def test_new_text_response_whitespace_default + text = Text.new("a b\t\tc", true) + assert_equal("a b\tc", Text.new(text).to_s) + end + + def test_new_text_response_whitespace_true + text = Text.new("a b\t\tc", true) + assert_equal("a b\t\tc", Text.new(text, true).to_s) + end + + def test_new_text_raw_default + text = Text.new("&lt;", false, nil, true) + assert_equal("&lt;", Text.new(text).to_s) + end + + def test_new_text_raw_false + text = Text.new("&lt;", false, nil, true) + assert_equal("&amp;lt;", Text.new(text, false, nil, false).to_s) + end + + def test_new_text_entity_filter_default + document = REXML::Document.new(<<-XML) +<!DOCTYPE root [ + <!ENTITY a "aaa"> + <!ENTITY b "bbb"> +]> +<root/> + XML + text = Text.new("aaa bbb", false, document.root, nil, ["a"]) + assert_equal("aaa &b;", + Text.new(text, false, document.root).to_s) + end + + def test_new_text_entity_filter_custom + document = REXML::Document.new(<<-XML) +<!DOCTYPE root [ + <!ENTITY a "aaa"> + <!ENTITY b "bbb"> +]> +<root/> + XML + text = Text.new("aaa bbb", false, document.root, nil, ["a"]) + assert_equal("&a; bbb", + Text.new(text, false, document.root, nil, ["b"]).to_s) + end + def test_shift_operator_chain text = Text.new("original\r\n") text << "append1\r\n" << "append2\r\n" @@ -18,5 +63,11 @@ def test_shift_operator_cache text << "append3\r\n" << "append4\r\n" assert_equal("original\nappend1\nappend2\nappend3\nappend4\n", text.to_s) end + + def test_clone + text = Text.new("&lt; <") + assert_equal(text.to_s, + text.clone.to_s) + end end end diff --git a/test/rexml/test_text_check.rb b/test/rexml/test_text_check.rb new file mode 100644 index 0000000000..11cf65a36f --- /dev/null +++ b/test/rexml/test_text_check.rb @@ -0,0 +1,121 @@ +# frozen_string_literal: false + +module REXMLTests + class TextCheckTester < Test::Unit::TestCase + + def check(string) + REXML::Text.check(string, REXML::Text::NEEDS_A_SECOND_CHECK, nil) + end + + def assert_check(string) + assert_nothing_raised { check(string) } + end + + def assert_check_failed(string, illegal_part) + message = "Illegal character #{illegal_part.inspect} in raw string #{string.inspect}" + assert_raise(RuntimeError.new(message)) do + check(string) + end + end + + class TestValid < self + def test_entity_name_start_char_colon + assert_check("&:;") + end + + def test_entity_name_start_char_under_score + assert_check("&_;") + end + + def test_entity_name_mix + assert_check("&A.b-0123;") + end + + def test_character_reference_decimal + assert_check("¢") + end + + def test_character_reference_hex + assert_check("") + end + + def test_entity_name_non_ascii + # U+3042 HIRAGANA LETTER A + # U+3044 HIRAGANA LETTER I + assert_check("&\u3042\u3044;") + end + + def test_normal_string + assert_check("foo") + end + end + + class TestInvalid < self + def test_lt + assert_check_failed("<;", "<") + end + + def test_lt_mix + assert_check_failed("ab<cd", "<") + end + + def test_reference_empty + assert_check_failed("&;", "&") + end + + def test_entity_reference_missing_colon + assert_check_failed("&", "&") + end + + def test_character_reference_decimal_garbage_at_the_end + # U+0030 DIGIT ZERO + assert_check_failed("0x;", "&") + end + + def test_character_reference_decimal_space_at_the_start + # U+0030 DIGIT ZERO + assert_check_failed("&# 48;", "&") + end + + def test_character_reference_decimal_control_character + # U+0008 BACKSPACE + assert_check_failed("", "") + end + + def test_character_reference_format_hex_0x + # U+0041 LATIN CAPITAL LETTER A + assert_check_failed("�x41;", "�x41;") + end + + def test_character_reference_format_hex_00x + # U+0041 LATIN CAPITAL LETTER A + assert_check_failed("�x41;", "�x41;") + end + + def test_character_reference_hex_garbage_at_the_end + # U+0030 DIGIT ZERO + assert_check_failed("Hx;", "&") + end + + def test_character_reference_hex_space_at_the_start + # U+0030 DIGIT ZERO + assert_check_failed("&#x 30;", "&") + end + + def test_character_reference_hex_surrogate_block + # U+0D800 SURROGATE PAIR + assert_check_failed("�", "�") + end + + def test_entity_name_non_ascii_symbol + # U+00BF INVERTED QUESTION MARK + assert_check_failed("&\u00BF;", "&") + end + + def test_entity_name_new_line + # U+0026 AMPERSAND + assert_check_failed("&\namp\nx;", "&") + end + end + end +end diff --git a/test/rexml/test_ticket_80.rb b/test/rexml/test_ticket_80.rb index ab6a57efaf..daebdc5972 100644 --- a/test/rexml/test_ticket_80.rb +++ b/test/rexml/test_ticket_80.rb @@ -7,9 +7,6 @@ # copy: (C) CopyLoose 2006 Bib Development Team <bib-devel>at<uberdev>dot<org> #------------------------------------------------------------------------------ -require 'test/unit' -require 'rexml/document' - module REXMLTests class Ticket80 < Test::Unit::TestCase diff --git a/test/rexml/test_validation_rng.rb b/test/rexml/test_validation_rng.rb index b5b50450e0..4872396b84 100644 --- a/test/rexml/test_validation_rng.rb +++ b/test/rexml/test_validation_rng.rb @@ -1,7 +1,5 @@ # frozen_string_literal: false -require "test/unit/testcase" -require "rexml/document" require "rexml/validation/relaxng" module REXMLTests @@ -9,7 +7,7 @@ class RNGValidation < Test::Unit::TestCase include REXML def test_validate - rng = %q{ + rng = <<-XML <?xml version="1.0" encoding="UTF-8"?> <element name="A" xmlns="http://relaxng.org/ns/structure/1.0"> <element name="B"> @@ -26,7 +24,7 @@ def test_validate </element> </element> </element> - } + XML validator = REXML::Validation::RelaxNG.new( rng ) no_error( validator, %q{<A><B><C X="x"><E/><E/></C><D/></B></A>} ) @@ -35,7 +33,7 @@ def test_validate def test_sequence - rng = %q{ + rng = <<-XML <?xml version="1.0" encoding="UTF-8"?> <element name="A" xmlns="http://relaxng.org/ns/structure/1.0"> <element name="B"> @@ -47,7 +45,7 @@ def test_sequence </element> </element> </element> - } + XML validator = REXML::Validation::RelaxNG.new( rng ) error( validator, %q{<A><B><C/><C/><D/></B></A>} ) @@ -58,7 +56,7 @@ def test_sequence def test_choice - rng = %q{ + rng = <<-XML <?xml version="1.0" encoding="UTF-8"?> <element name="A" xmlns="http://relaxng.org/ns/structure/1.0"> <element name="B"> @@ -72,7 +70,7 @@ def test_choice </choice> </element> </element> - } + XML validator = REXML::Validation::RelaxNG.new( rng ) error( validator, %q{<A><B><C/><D/></B></A>} ) @@ -81,7 +79,7 @@ def test_choice end def test_optional - rng = %q{ + rng = <<-XML <?xml version="1.0" encoding="UTF-8"?> <element name="A" xmlns="http://relaxng.org/ns/structure/1.0"> <element name="B"> @@ -92,7 +90,7 @@ def test_optional </optional> </element> </element> - } + XML validator = REXML::Validation::RelaxNG.new( rng ) no_error( validator, %q{<A><B/></A>} ) @@ -102,7 +100,7 @@ def test_optional end def test_zero_or_more - rng = %q{ + rng = <<-XML <?xml version="1.0" encoding="UTF-8"?> <element name="A" xmlns="http://relaxng.org/ns/structure/1.0"> <element name="B"> @@ -113,7 +111,7 @@ def test_zero_or_more </zeroOrMore> </element> </element> - } + XML validator = REXML::Validation::RelaxNG.new( rng ) no_error( validator, %q{<A><B/></A>} ) no_error( validator, %q{<A><B><C/></B></A>} ) @@ -121,7 +119,7 @@ def test_zero_or_more error( validator, %q{<A><B><D/></B></A>} ) error( validator, %q{<A></A>} ) - rng = %q{ + rng = <<-XML <?xml version="1.0" encoding="UTF-8"?> <element name="A" xmlns="http://relaxng.org/ns/structure/1.0"> <element name="B"> @@ -135,7 +133,7 @@ def test_zero_or_more </zeroOrMore> </element> </element> - } + XML validator = REXML::Validation::RelaxNG.new( rng ) no_error( validator, %q{<A><B/></A>} ) @@ -145,7 +143,7 @@ def test_zero_or_more end def test_one_or_more - rng = %q{ + rng = <<-XML <?xml version="1.0" encoding="UTF-8"?> <element name="A" xmlns="http://relaxng.org/ns/structure/1.0"> <element name="B"> @@ -156,7 +154,7 @@ def test_one_or_more </oneOrMore> </element> </element> - } + XML validator = REXML::Validation::RelaxNG.new( rng ) error( validator, %q{<A><B/></A>} ) @@ -167,13 +165,13 @@ def test_one_or_more end def test_attribute - rng = %q{ + rng = <<-XML <?xml version="1.0" encoding="UTF-8"?> <element name="A" xmlns="http://relaxng.org/ns/structure/1.0"> <attribute name="X"/> <attribute name="Y"/> </element> - } + XML validator = REXML::Validation::RelaxNG.new( rng ) error( validator, %q{<A><B/></A>} ) @@ -183,7 +181,7 @@ def test_attribute end def test_choice_attributes - rng = %q{ + rng = <<-XML <?xml version="1.0" encoding="UTF-8"?> <element name="A" xmlns="http://relaxng.org/ns/structure/1.0"> <choice> @@ -191,7 +189,7 @@ def test_choice_attributes <attribute name="Y"/> </choice> </element> - } + XML validator = REXML::Validation::RelaxNG.new( rng ) error( validator, %q{<A X="1" Y="1"/>} ) @@ -201,7 +199,7 @@ def test_choice_attributes end def test_choice_attribute_element - rng = %q{ + rng = <<-XML <?xml version="1.0" encoding="UTF-8"?> <element name="A" xmlns="http://relaxng.org/ns/structure/1.0"> <choice> @@ -209,7 +207,7 @@ def test_choice_attribute_element <element name="B"/> </choice> </element> - } + XML validator = REXML::Validation::RelaxNG.new( rng ) error( validator, %q{<A X="1"><B/></A>} ) @@ -219,12 +217,12 @@ def test_choice_attribute_element end def test_empty - rng = %q{ + rng = <<-XML <?xml version="1.0" encoding="UTF-8"?> <element name="A" xmlns="http://relaxng.org/ns/structure/1.0"> <empty/> </element> - } + XML validator = REXML::Validation::RelaxNG.new( rng ) error( validator, %q{<A><B/></A>} ) @@ -233,12 +231,12 @@ def test_empty end def test_text_val - rng = %q{ + rng = <<-XML <?xml version="1.0" encoding="UTF-8"?> <element name="A" xmlns="http://relaxng.org/ns/structure/1.0"> <text/> </element> - } + XML validator = REXML::Validation::RelaxNG.new( rng ) error( validator, %q{<A><B/></A>} ) @@ -247,7 +245,7 @@ def test_text_val end def test_choice_text - rng = %q{ + rng = <<-XML <?xml version="1.0" encoding="UTF-8"?> <element name="A" xmlns="http://relaxng.org/ns/structure/1.0"> <choice> @@ -255,7 +253,7 @@ def test_choice_text <text/> </choice> </element> - } + XML validator = REXML::Validation::RelaxNG.new( rng ) error( validator, %q{<A><B/>Text</A>} ) @@ -265,7 +263,7 @@ def test_choice_text end def test_group - rng = %q{ + rng = <<-XML <?xml version="1.0" encoding="UTF-8"?> <element name="A" xmlns="http://relaxng.org/ns/structure/1.0"> <choice> @@ -276,7 +274,7 @@ def test_group </group> </choice> </element> - } + XML validator = REXML::Validation::RelaxNG.new( rng ) error( validator, %q{<A><B/><C/></A>} ) @@ -284,7 +282,7 @@ def test_group no_error( validator, %q{<A><B/></A>} ) no_error( validator, %q{<A><C/><D/></A>} ) - rng = %q{ + rng = <<-XML <?xml version="1.0" encoding="UTF-8"?> <element name="A" xmlns="http://relaxng.org/ns/structure/1.0"> <element name="B"/> @@ -293,7 +291,7 @@ def test_group <element name="D"/> </group> </element> - } + XML validator = REXML::Validation::RelaxNG.new( rng ) error( validator, %q{<A><B/><C/></A>} ) @@ -304,14 +302,14 @@ def test_group def test_value # Values as text nodes - rng = %q{ + rng = <<-XML <?xml version="1.0" encoding="UTF-8"?> <element name="A" xmlns="http://relaxng.org/ns/structure/1.0"> <element name="B"> <value>VaLuE</value> </element> </element> - } + XML validator = REXML::Validation::RelaxNG.new( rng ) error( validator, %q{<A><B>X</B></A>} ) @@ -319,7 +317,7 @@ def test_value no_error( validator, %q{<A><B>VaLuE</B></A>} ) # Values as text nodes, via choice - rng = %q{ + rng = <<-XML <?xml version="1.0" encoding="UTF-8"?> <element name="A" xmlns="http://relaxng.org/ns/structure/1.0"> <element name="B"> @@ -329,7 +327,7 @@ def test_value </choice> </element> </element> - } + XML validator = REXML::Validation::RelaxNG.new( rng ) error( validator, %q{<A><B/></A>} ) @@ -338,14 +336,14 @@ def test_value no_error( validator, %q{<A><B>Option 2</B></A>} ) # Attribute values - rng = %q{ + rng = <<-XML <?xml version="1.0" encoding="UTF-8"?> <element name="A" xmlns="http://relaxng.org/ns/structure/1.0"> <attribute name="B"> <value>VaLuE</value> </attribute> </element> - } + XML validator = REXML::Validation::RelaxNG.new( rng ) error( validator, %q{<A/>} ) @@ -354,7 +352,7 @@ def test_value no_error( validator, %q{<A B="VaLuE"/>} ) # Attribute values via choice - rng = %q{ + rng = <<-XML <?xml version="1.0" encoding="UTF-8"?> <element name="A" xmlns="http://relaxng.org/ns/structure/1.0"> <attribute name="B"> @@ -364,7 +362,7 @@ def test_value </choice> </attribute> </element> - } + XML validator = REXML::Validation::RelaxNG.new( rng ) error( validator, %q{<A B=""/>} ) @@ -374,7 +372,7 @@ def test_value end def test_interleave - rng = %q{ + rng = <<-XML <?xml version="1.0" encoding="UTF-8"?> <element name="A" xmlns="http://relaxng.org/ns/structure/1.0"> <element name="B"> @@ -385,7 +383,7 @@ def test_interleave </interleave> </element> </element> - } + XML validator = REXML::Validation::RelaxNG.new( rng ) error( validator, %q{<A><B><C/></B></A>} ) @@ -398,7 +396,7 @@ def test_interleave end def test_mixed - rng = %q{ + rng = <<-XML <?xml version="1.0" encoding="UTF-8"?> <element name="A" xmlns="http://relaxng.org/ns/structure/1.0"> <element name="B"> @@ -407,7 +405,7 @@ def test_mixed </mixed> </element> </element> - } + XML validator = REXML::Validation::RelaxNG.new( rng ) no_error( validator, %q{<A><B>Text<D/></B></A>} ) @@ -415,7 +413,7 @@ def test_mixed end def test_ref_sequence - rng = %q{ + rng = <<-XML <?xml version="1.0" encoding="UTF-8"?> <grammar xmlns="http://relaxng.org/ns/structure/1.0"> <start> @@ -431,7 +429,7 @@ def test_ref_sequence </element> </define> </grammar> - } + XML validator = REXML::Validation::RelaxNG.new( rng ) no_error( validator, %q{<A><B X=''/><B X=''/></A>} ) @@ -439,7 +437,7 @@ def test_ref_sequence end def test_ref_choice - rng = %q{ + rng = <<-XML <?xml version="1.0" encoding="UTF-8"?> <grammar xmlns="http://relaxng.org/ns/structure/1.0"> <start> @@ -455,7 +453,7 @@ def test_ref_choice <element name="C"/> </define> </grammar> - } + XML validator = REXML::Validation::RelaxNG.new( rng ) error( validator, %q{<A><D/></A>} ) @@ -463,7 +461,7 @@ def test_ref_choice no_error( validator, %q{<A><B/></A>} ) no_error( validator, %q{<A><C/></A>} ) - rng = %q{ + rng = <<-XML <?xml version="1.0" encoding="UTF-8"?> <grammar xmlns="http://relaxng.org/ns/structure/1.0"> <start> @@ -479,7 +477,7 @@ def test_ref_choice </choice> </define> </grammar> - } + XML validator = REXML::Validation::RelaxNG.new( rng ) error( validator, %q{<A><D/></A>} ) @@ -487,7 +485,7 @@ def test_ref_choice no_error( validator, %q{<A><B/></A>} ) no_error( validator, %q{<A><C/></A>} ) - rng = %q{ + rng = <<-XML <?xml version="1.0" encoding="UTF-8"?> <grammar xmlns="http://relaxng.org/ns/structure/1.0"> <start> @@ -504,7 +502,7 @@ def test_ref_choice <element name="C"/> </define> </grammar> - } + XML validator = REXML::Validation::RelaxNG.new( rng ) error( validator, %q{<A><B/><C/></A>} ) @@ -515,7 +513,7 @@ def test_ref_choice def test_ref_zero_plus - rng = %q{ + rng = <<-XML <?xml version="1.0" encoding="UTF-8"?> <grammar xmlns="http://relaxng.org/ns/structure/1.0"> <start> @@ -532,7 +530,7 @@ def test_ref_zero_plus </element> </define> </grammar> - } + XML validator = REXML::Validation::RelaxNG.new( rng ) error( validator, %q{<A><B/></A>} ) @@ -540,7 +538,7 @@ def test_ref_zero_plus no_error( validator, %q{<A><B X=''/></A>} ) no_error( validator, %q{<A><B X=''/><B X=''/><B X=''/></A>} ) - rng = %q{ + rng = <<-XML <?xml version="1.0" encoding="UTF-8"?> <grammar xmlns="http://relaxng.org/ns/structure/1.0"> <start> @@ -557,7 +555,7 @@ def test_ref_zero_plus </zeroOrMore> </define> </grammar> - } + XML validator = REXML::Validation::RelaxNG.new( rng ) error( validator, %q{<A><B/></A>} ) @@ -568,7 +566,7 @@ def test_ref_zero_plus def test_ref_one_plus - rng = %q{ + rng = <<-XML <?xml version="1.0" encoding="UTF-8"?> <grammar xmlns="http://relaxng.org/ns/structure/1.0"> <start> @@ -585,7 +583,7 @@ def test_ref_one_plus </element> </define> </grammar> - } + XML validator = REXML::Validation::RelaxNG.new( rng ) error( validator, %q{<A><B/></A>} ) @@ -593,7 +591,7 @@ def test_ref_one_plus no_error( validator, %q{<A><B X=''/></A>} ) no_error( validator, %q{<A><B X=''/><B X=''/><B X=''/></A>} ) - rng = %q{ + rng = <<-XML <?xml version="1.0" encoding="UTF-8"?> <grammar xmlns="http://relaxng.org/ns/structure/1.0"> <start> @@ -610,7 +608,7 @@ def test_ref_one_plus </oneOrMore> </define> </grammar> - } + XML validator = REXML::Validation::RelaxNG.new( rng ) error( validator, %q{<A><B/></A>} ) @@ -620,7 +618,7 @@ def test_ref_one_plus end def test_ref_interleave - rng = %q{ + rng = <<-XML <?xml version="1.0" encoding="UTF-8"?> <grammar xmlns="http://relaxng.org/ns/structure/1.0"> <start> @@ -636,7 +634,7 @@ def test_ref_interleave <element name="C"/> </define> </grammar> - } + XML validator = REXML::Validation::RelaxNG.new( rng ) error( validator, %q{<A><B/></A>} ) @@ -645,7 +643,7 @@ def test_ref_interleave no_error( validator, %q{<A><B/><C/></A>} ) no_error( validator, %q{<A><C/><B/></A>} ) - rng = %q{ + rng = <<-XML <?xml version="1.0" encoding="UTF-8"?> <grammar xmlns="http://relaxng.org/ns/structure/1.0"> <start> @@ -661,7 +659,7 @@ def test_ref_interleave </interleave> </define> </grammar> - } + XML validator = REXML::Validation::RelaxNG.new( rng ) error( validator, %q{<A><B/></A>} ) @@ -670,7 +668,7 @@ def test_ref_interleave no_error( validator, %q{<A><B/><C/></A>} ) no_error( validator, %q{<A><C/><B/></A>} ) - rng = %q{ + rng = <<-XML <?xml version="1.0" encoding="UTF-8"?> <grammar xmlns="http://relaxng.org/ns/structure/1.0"> <start> @@ -689,7 +687,7 @@ def test_ref_interleave <element name="C"/> </define> </grammar> - } + XML validator = REXML::Validation::RelaxNG.new( rng ) error( validator, %q{<A><B/></A>} ) @@ -700,7 +698,7 @@ def test_ref_interleave end def test_ref_recurse - rng = %q{ + rng = <<-XML <?xml version="1.0" encoding="UTF-8"?> <grammar xmlns="http://relaxng.org/ns/structure/1.0"> <start> @@ -717,7 +715,7 @@ def test_ref_recurse </element> </define> </grammar> - } + XML validator = REXML::Validation::RelaxNG.new( rng ) error( validator, %q{<A></A>} ) @@ -726,7 +724,7 @@ def test_ref_recurse end def test_ref_optional - rng = %q{ + rng = <<-XML <?xml version="1.0" encoding="UTF-8"?> <grammar xmlns="http://relaxng.org/ns/structure/1.0"> <start> @@ -742,7 +740,7 @@ def test_ref_optional </element> </define> </grammar> - } + XML validator = REXML::Validation::RelaxNG.new( rng ) no_error( validator, %q{<A></A>} ) @@ -750,7 +748,7 @@ def test_ref_optional error( validator, %q{<A><B/><B/></A>} ) error( validator, %q{<A><C/></A>} ) - rng = %q{ + rng = <<-XML <?xml version="1.0" encoding="UTF-8"?> <grammar xmlns="http://relaxng.org/ns/structure/1.0"> <start> @@ -766,7 +764,7 @@ def test_ref_optional </optional> </define> </grammar> - } + XML validator = REXML::Validation::RelaxNG.new( rng ) no_error( validator, %q{<A></A>} ) diff --git a/test/rexml/test_xml_declaration.rb b/test/rexml/test_xml_declaration.rb index a4d97c41d0..6a1f4df053 100644 --- a/test/rexml/test_xml_declaration.rb +++ b/test/rexml/test_xml_declaration.rb @@ -1,20 +1,16 @@ -# -*- coding: utf-8 -*- # frozen_string_literal: false # # Created by Henrik Mårtensson on 2007-02-18. # Copyright (c) 2007. All rights reserved. -require "rexml/document" -require "test/unit" - module REXMLTests class TestXmlDeclaration < Test::Unit::TestCase def setup - xml = <<-'END_XML' + xml = <<~XML <?xml encoding= 'UTF-8' standalone='yes'?> <root> </root> - END_XML + XML @doc = REXML::Document.new xml @root = @doc.root @xml_declaration = @doc.children[0] @@ -32,5 +28,18 @@ def test_has_sibling assert_kind_of(REXML::XMLDecl, @root.previous_sibling.previous_sibling) assert_kind_of(REXML::Element, @xml_declaration.next_sibling.next_sibling) end + + def test_write_prologue_quote + @doc.context[:prologue_quote] = :quote + assert_equal("<?xml version=\"1.0\" " + + "encoding=\"UTF-8\" standalone=\"yes\"?>", + @xml_declaration.to_s) + end + + def test_is_writethis_attribute_copied_by_clone + assert_equal(true, @xml_declaration.clone.writethis) + @xml_declaration.nowrite + assert_equal(false, @xml_declaration.clone.writethis) + end end end diff --git a/test/rexml/xpath/test_attribute.rb b/test/rexml/xpath/test_attribute.rb index 9304db4e0d..b778ff81f8 100644 --- a/test/rexml/xpath/test_attribute.rb +++ b/test/rexml/xpath/test_attribute.rb @@ -1,13 +1,11 @@ # frozen_string_literal: false -require 'test/unit' -require 'rexml/document' module REXMLTests class TestXPathAttribute < Test::Unit::TestCase def setup @xml = <<-XML <?xml version="1.0" encoding="UTF-8"?> -<root> +<root xmlns="http://example.com/"> <child name="one">child1</child> <child name="two">child2</child> <child name="three">child3</child> @@ -26,5 +24,13 @@ def test_xpath_each children = REXML::XPath.each(@document, "/root/child[@name='two']") assert_equal(["child2"], children.collect(&:text)) end + + def test_no_namespace + children = REXML::XPath.match(@document, + "/root/child[@nothing:name='two']", + "" => "http://example.com/", + "nothing" => "") + assert_equal(["child2"], children.collect(&:text)) + end end end diff --git a/test/rexml/xpath/test_axis_preceding_sibling.rb b/test/rexml/xpath/test_axis_preceding_sibling.rb index 5842c6bc50..9c44ad63cc 100644 --- a/test/rexml/xpath/test_axis_preceding_sibling.rb +++ b/test/rexml/xpath/test_axis_preceding_sibling.rb @@ -1,6 +1,4 @@ # frozen_string_literal: false -require "test/unit/testcase" -require "rexml/document" module REXMLTests class TestXPathAxisPredcedingSibling < Test::Unit::TestCase diff --git a/test/rexml/xpath/test_axis_self.rb b/test/rexml/xpath/test_axis_self.rb new file mode 100644 index 0000000000..4e422f54bf --- /dev/null +++ b/test/rexml/xpath/test_axis_self.rb @@ -0,0 +1,20 @@ +# frozen_string_literal: false + +module REXMLTests + class TestXPathAxisSelf < Test::Unit::TestCase + def test_only + doc = REXML::Document.new("<root><child/></root>") + assert_equal([doc.root], + REXML::XPath.match(doc.root, ".")) + end + + def test_have_predicate + doc = REXML::Document.new("<root><child/></root>") + error = assert_raise(REXML::ParseException) do + REXML::XPath.match(doc.root, ".[child]") + end + assert_equal("Garbage component exists at the end: <[child]>: <.[child]>", + error.message) + end + end +end diff --git a/test/rexml/xpath/test_base.rb b/test/rexml/xpath/test_base.rb index 5079fdd75a..771cc48c58 100644 --- a/test/rexml/xpath/test_base.rb +++ b/test/rexml/xpath/test_base.rb @@ -1,11 +1,10 @@ # frozen_string_literal: false -require_relative "../rexml_test_utils" -require "rexml/document" +require_relative "helper" module REXMLTests class TestXPathBase < Test::Unit::TestCase - include REXMLTestUtils + include Helper::Fixture include REXML SOURCE = <<-EOF <a id='1'> @@ -369,11 +368,15 @@ def test_complex assert_equal 2, c end + def match(xpath) + XPath.match(@@doc, xpath).collect(&:to_s) + end + def test_grouping - t = XPath.first( @@doc, "a/d/*[name()='d' and (name()='f' or name()='q')]" ) - assert_nil t - t = XPath.first( @@doc, "a/d/*[(name()='d' and name()='f') or name()='q']" ) - assert_equal 'q', t.name + assert_equal([], + match("a/d/*[name()='d' and (name()='f' or name()='q')]")) + assert_equal(["<q id='19'/>"], + match("a/d/*[(name()='d' and name()='f') or name()='q']")) end def test_preceding @@ -450,6 +453,46 @@ def test_following # puts results #end + def test_nested_predicates + doc = Document.new <<-EOF + <div> + <div> + <test>ab</test> + <test>cd</test> + </div> + <div> + <test>ef</test> + <test>gh</test> + </div> + <div> + <test>hi</test> + </div> + </div> + EOF + + matches = XPath.match(doc, '(/div/div/test[0])').map(&:text) + assert_equal [], matches + matches = XPath.match(doc, '(/div/div/test[1])').map(&:text) + assert_equal ["ab", "ef", "hi"], matches + matches = XPath.match(doc, '(/div/div/test[2])').map(&:text) + assert_equal ["cd", "gh"], matches + matches = XPath.match(doc, '(/div/div/test[3])').map(&:text) + assert_equal [], matches + + matches = XPath.match(doc, '(/div/div/test[1])[1]').map(&:text) + assert_equal ["ab"], matches + matches = XPath.match(doc, '(/div/div/test[1])[2]').map(&:text) + assert_equal ["ef"], matches + matches = XPath.match(doc, '(/div/div/test[1])[3]').map(&:text) + assert_equal ["hi"], matches + matches = XPath.match(doc, '(/div/div/test[2])[1]').map(&:text) + assert_equal ["cd"], matches + matches = XPath.match(doc, '(/div/div/test[2])[2]').map(&:text) + assert_equal ["gh"], matches + matches = XPath.match(doc, '(/div/div/test[2])[3]').map(&:text) + assert_equal [], matches + end + # Contributed by Mike Stok def test_starts_with source = <<-EOF @@ -610,7 +653,7 @@ def test_comparisons source = "<a><b id='1'/><b id='2'/><b id='3'/></a>" doc = REXML::Document.new(source) - # NOTE TO SER: check that number() is required + # NOTE: check that number() is required assert_equal 2, REXML::XPath.match(doc, "//b[number(@id) > 1]").size assert_equal 3, REXML::XPath.match(doc, "//b[number(@id) >= 1]").size assert_equal 1, REXML::XPath.match(doc, "//b[number(@id) <= 1]").size @@ -632,29 +675,36 @@ def test_spaces <c id='a'/> </b> <c id='b'/> + <c id='c'/> + <c/> </a>") - assert_equal( 1, REXML::XPath.match(doc, - "//*[local-name()='c' and @id='b']").size ) - assert_equal( 1, REXML::XPath.match(doc, - "//*[ local-name()='c' and @id='b' ]").size ) - assert_equal( 1, REXML::XPath.match(doc, - "//*[ local-name() = 'c' and @id = 'b' ]").size ) - assert_equal( 1, - REXML::XPath.match(doc, '/a/c[@id]').size ) - assert_equal( 1, - REXML::XPath.match(doc, '/a/c[(@id)]').size ) - assert_equal( 1, - REXML::XPath.match(doc, '/a/c[ @id ]').size ) - assert_equal( 1, - REXML::XPath.match(doc, '/a/c[ (@id) ]').size ) - assert_equal( 1, - REXML::XPath.match(doc, '/a/c[( @id )]').size ) - assert_equal( 1, REXML::XPath.match(doc.root, - '/a/c[ ( @id ) ]').size ) - assert_equal( 1, REXML::XPath.match(doc, - '/a/c [ ( @id ) ] ').size ) - assert_equal( 1, REXML::XPath.match(doc, - ' / a / c [ ( @id ) ] ').size ) + match = lambda do |xpath| + REXML::XPath.match(doc, xpath).collect(&:to_s) + end + assert_equal(["<c id='b'/>"], + match.call("//*[local-name()='c' and @id='b']")) + assert_equal(["<c id='b'/>"], + match.call("//*[ local-name()='c' and @id='b' ]")) + assert_equal(["<c id='b'/>"], + match.call("//*[ local-name() = 'c' and @id = 'b' ]")) + assert_equal(["<c id='b'/>", "<c id='c'/>"], + match.call('/a/c[@id]')) + assert_equal(["<c id='b'/>", "<c id='c'/>"], + match.call('/a/c[(@id)]')) + assert_equal(["<c id='b'/>", "<c id='c'/>"], + match.call('/a/c[ @id ]')) + assert_equal(["<c id='b'/>", "<c id='c'/>"], + match.call('/a/c[ (@id) ]')) + assert_equal(["<c id='b'/>", "<c id='c'/>"], + match.call('/a/c[( @id )]')) + assert_equal(["<c id='b'/>", "<c id='c'/>"], + match.call('/a/c[ ( @id ) ]')) + assert_equal(["<c id='b'/>", "<c id='c'/>"], + match.call('/a/c [ ( @id ) ] ')) + assert_equal(["<c id='b'/>", "<c id='c'/>"], + match.call(' / a / c [ ( @id ) ] ')) + assert_equal(["<c id='b'/>", "<c id='c'/>"], + match.call('/ a / child:: c [( @id )] /')) end def test_text_nodes @@ -692,11 +742,22 @@ def test_auto_string_value end def test_ordering - source = "<a><b><c id='1'/><c id='2'/></b><b><d id='1'/><d id='2'/></b></a>" + source = <<-XML +<a> + <b> + <c id='1'/> + <c id='2'/> + </b> + <b> + <d id='3'/> + <d id='4'/> + </b> +</a> + XML d = REXML::Document.new( source ) r = REXML::XPath.match( d, %q{/a/*/*[1]} ) - assert_equal( 1, r.size ) - r.each { |el| assert_equal( '1', el.attribute('id').value ) } + assert_equal(["1", "3"], + r.collect {|element| element.attribute("id").value}) end def test_descendant_or_self_ordering @@ -830,31 +891,44 @@ def test_a_star_star_one </a> EOL d = REXML::Document.new( string ) - c1 = XPath.match( d, '/a/*/*[1]' ) - assert_equal( 1, c1.length ) - assert_equal( 'c1', c1[0].name ) + cs = XPath.match( d, '/a/*/*[1]' ) + assert_equal(["c1", "c2"], cs.collect(&:name)) end def test_sum - d = Document.new("<a>"+ - "<b>1</b><b>2</b><b>3</b>"+ - "<c><d>1</d><d>2</d></c>"+ - "<e att='1'/><e att='2'/>"+ - "</a>") - - for v,p in [[6, "sum(/a/b)"], - [9, "sum(//b | //d)"], - [3, "sum(/a/e/@*)"] ] - assert_equal( v, XPath::match( d, p ).first ) - end + d = Document.new(<<-XML) +<a> + <b>1</b> + <b>2</b> + <b>3</b> + <c> + <d>1</d> + <d>2</d> + </c> + <e att='1'/> + <e att='2'/> +</a> + XML + + assert_equal([6], XPath::match(d, "sum(/a/b)")) + assert_equal([9], XPath::match(d, "sum(//b | //d)")) + assert_equal([3], XPath::match(d, "sum(/a/e/@*)")) end def test_xpath_namespace - d = REXML::Document.new("<tag1 xmlns='ns1'><tag2 xmlns='ns2'/><tada>xa</tada></tag1>") - x = d.root - num = 0 - x.each_element('tada') { num += 1 } - assert_equal(1, num) + d = REXML::Document.new(<<-XML) +<tag1 xmlns='ns1'> + <tag2 xmlns='ns2'/> + <tada>xa</tada> + <tada xmlns=''>xb</tada> +</tag1> + XML + actual = [] + d.root.each_element('tada') do |element| + actual << element.to_s + end + assert_equal(["<tada>xa</tada>", "<tada xmlns=''>xb</tada>"], + actual) end def test_ticket_39 @@ -990,7 +1064,7 @@ def test_ticket_59 </a>" d = Document.new(data) res = d.elements.to_a( "//c" ).collect {|e| e.attributes['id'].to_i} - assert_equal( res, res.sort ) + assert_equal((1..12).to_a, res) end def ticket_61_fixture(doc, xpath) diff --git a/test/rexml/xpath/test_compare.rb b/test/rexml/xpath/test_compare.rb new file mode 100644 index 0000000000..11d11e55be --- /dev/null +++ b/test/rexml/xpath/test_compare.rb @@ -0,0 +1,252 @@ +# frozen_string_literal: false + +module REXMLTests + class TestXPathCompare < Test::Unit::TestCase + def match(xml, xpath) + document = REXML::Document.new(xml) + REXML::XPath.match(document, xpath) + end + + class TestEqual < self + class TestNodeSet < self + def test_boolean_true + xml = <<-XML +<?xml version="1.0" encoding="UTF-8"?> +<root> + <child/> + <child/> +</root> + XML + assert_equal([true], + match(xml, "/root/child=true()")) + end + + def test_boolean_false + xml = <<-XML +<?xml version="1.0" encoding="UTF-8"?> +<root> +</root> + XML + assert_equal([false], + match(xml, "/root/child=true()")) + end + + def test_number_true + xml = <<-XML +<?xml version="1.0" encoding="UTF-8"?> +<root> + <child>100</child> + <child>200</child> +</root> + XML + assert_equal([true], + match(xml, "/root/child=100")) + end + + def test_number_false + xml = <<-XML +<?xml version="1.0" encoding="UTF-8"?> +<root> + <child>100</child> + <child>200</child> +</root> + XML + assert_equal([false], + match(xml, "/root/child=300")) + end + + def test_string_true + xml = <<-XML +<?xml version="1.0" encoding="UTF-8"?> +<root> + <child>text</child> + <child>string</child> +</root> + XML + assert_equal([true], + match(xml, "/root/child='string'")) + end + + def test_string_false + xml = <<-XML +<?xml version="1.0" encoding="UTF-8"?> +<root> + <child>text</child> + <child>string</child> +</root> + XML + assert_equal([false], + match(xml, "/root/child='nonexistent'")) + end + end + + class TestBoolean < self + def test_number_true + xml = "<root/>" + assert_equal([true], + match(xml, "true()=1")) + end + + def test_number_false + xml = "<root/>" + assert_equal([false], + match(xml, "true()=0")) + end + + def test_string_true + xml = "<root/>" + assert_equal([true], + match(xml, "true()='string'")) + end + + def test_string_false + xml = "<root/>" + assert_equal([false], + match(xml, "true()=''")) + end + end + + class TestNumber < self + def test_string_true + xml = "<root/>" + assert_equal([true], + match(xml, "1='1'")) + end + + def test_string_false + xml = "<root/>" + assert_equal([false], + match(xml, "1='2'")) + end + end + end + + class TestGreaterThan < self + class TestNodeSet < self + def test_boolean_truex + xml = <<-XML +<?xml version="1.0" encoding="UTF-8"?> +<root> + <child/> +</root> + XML + assert_equal([true], + match(xml, "/root/child>false()")) + end + + def test_boolean_false + xml = <<-XML +<?xml version="1.0" encoding="UTF-8"?> +<root> + <child/> +</root> + XML + assert_equal([false], + match(xml, "/root/child>true()")) + end + + def test_number_true + xml = <<-XML +<?xml version="1.0" encoding="UTF-8"?> +<root> + <child>100</child> + <child>200</child> +</root> + XML + assert_equal([true], + match(xml, "/root/child>199")) + end + + def test_number_false + xml = <<-XML +<?xml version="1.0" encoding="UTF-8"?> +<root> + <child>100</child> + <child>200</child> +</root> + XML + assert_equal([false], + match(xml, "/root/child>200")) + end + + def test_string_true + xml = <<-XML +<?xml version="1.0" encoding="UTF-8"?> +<root> + <child>100</child> + <child>200</child> +</root> + XML + assert_equal([true], + match(xml, "/root/child>'199'")) + end + + def test_string_false + xml = <<-XML +<?xml version="1.0" encoding="UTF-8"?> +<root> + <child>100</child> + <child>200</child> +</root> + XML + assert_equal([false], + match(xml, "/root/child>'200'")) + end + end + + class TestBoolean < self + def test_string_true + xml = "<root/>" + assert_equal([true], + match(xml, "true()>'0'")) + end + + def test_string_false + xml = "<root/>" + assert_equal([false], + match(xml, "true()>'1'")) + end + end + + class TestNumber < self + def test_boolean_true + xml = "<root/>" + assert_equal([true], + match(xml, "true()>0")) + end + + def test_number_false + xml = "<root/>" + assert_equal([false], + match(xml, "true()>1")) + end + + def test_string_true + xml = "<root/>" + assert_equal([true], + match(xml, "1>'0'")) + end + + def test_string_false + xml = "<root/>" + assert_equal([false], + match(xml, "1>'1'")) + end + end + + class TestString < self + def test_string_true + xml = "<root/>" + assert_equal([true], + match(xml, "'1'>'0'")) + end + + def test_string_false + xml = "<root/>" + assert_equal([false], + match(xml, "'1'>'1'")) + end + end + end + end +end diff --git a/test/rexml/xpath/test_node.rb b/test/rexml/xpath/test_node.rb index e0e958e70f..742bfbbab6 100644 --- a/test/rexml/xpath/test_node.rb +++ b/test/rexml/xpath/test_node.rb @@ -1,10 +1,6 @@ # -*- coding: utf-8 -*- # frozen_string_literal: false -require_relative "../rexml_test_utils" - -require "rexml/document" - module REXMLTests class TestXPathNode < Test::Unit::TestCase def matches(xml, xpath) diff --git a/test/rexml/xpath/test_predicate.rb b/test/rexml/xpath/test_predicate.rb index ce1aaa324b..278e3765b6 100644 --- a/test/rexml/xpath/test_predicate.rb +++ b/test/rexml/xpath/test_predicate.rb @@ -1,13 +1,12 @@ # frozen_string_literal: false -require "test/unit/testcase" -require "rexml/document" + require "rexml/xpath" require "rexml/parsers/xpathparser" module REXMLTests class TestXPathPredicate < Test::Unit::TestCase include REXML - SRC=<<-EOL + SRC=<<~EOL <article> <section role="subdivision" id="1"> <para>free flowing text.</para> @@ -29,6 +28,15 @@ def setup end + def test_predicate_only + error = assert_raise(REXML::ParseException) do + do_path("[article]") + end + assert_equal("Garbage component exists at the end: " + + "<[article]>: <[article]>", + error.message) + end + def test_predicates_parent path = '//section[../self::section[@role="division"]]' m = do_path( path ) diff --git a/test/rexml/xpath/test_text.rb b/test/rexml/xpath/test_text.rb index 7222388e1b..dccc4c83c0 100644 --- a/test/rexml/xpath/test_text.rb +++ b/test/rexml/xpath/test_text.rb @@ -1,6 +1,5 @@ # frozen_string_literal: false -require 'test/unit' -require 'rexml/document' + require 'rexml/element' require 'rexml/xpath'
Locations
Projects
Search
Status Monitor
Help
OpenBuildService.org
Documentation
API Documentation
Code of Conduct
Contact
Support
@OBShq
Terms
openSUSE Build Service is sponsored by
The Open Build Service is an
openSUSE project
.
Sign Up
Log In
Places
Places
All Projects
Status Monitor