Skip to content

Commit

Permalink
Fix ReDoS caused by very large character references using repeated 0s (
Browse files Browse the repository at this point in the history
…#169)

This patch will fix the ReDoS that is caused by large string of 0s on a
character reference (like `&#00000000...`).

This is occurred in Ruby 3.1 or earlier.
  • Loading branch information
Watson1978 authored Jul 16, 2024
1 parent b8a5f4c commit 0af55fa
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 14 deletions.
48 changes: 34 additions & 14 deletions lib/rexml/text.rb
Original file line number Diff line number Diff line change
Expand Up @@ -151,25 +151,45 @@ def Text.check string, pattern, doctype
end
end

# context sensitive
string.scan(pattern) do
if $1[-1] != ?;
raise "Illegal character #{$1.inspect} in raw string #{string.inspect}"
elsif $1[0] == ?&
if $5 and $5[0] == ?#
case ($5[1] == ?x ? $5[2..-1].to_i(16) : $5[1..-1].to_i)
when *VALID_CHAR
pos = 0
while (index = string.index(/<|&/, pos))
if string[index] == "<"
raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
end

unless (end_index = string.index(/[^\s];/, index + 1))
raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
end

value = string[(index + 1)..end_index]
if /\s/.match?(value)
raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
end

if value[0] == "#"
character_reference = value[1..-1]

unless (/\A(\d+|x[0-9a-fA-F]+)\z/.match?(character_reference))
if character_reference[0] == "x" || character_reference[-1] == "x"
raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
else
raise "Illegal character #{$1.inspect} in raw string #{string.inspect}"
raise "Illegal character #{string.inspect} in raw string #{string.inspect}"
end
# FIXME: below can't work but this needs API change.
# elsif @parent and $3 and !SUBSTITUTES.include?($1)
# if !doctype or !doctype.entities.has_key?($3)
# raise "Undeclared entity '#{$1}' in raw string \"#{string}\""
# end
end

case (character_reference[0] == "x" ? character_reference[1..-1].to_i(16) : character_reference[0..-1].to_i)
when *VALID_CHAR
else
raise "Illegal character #{string.inspect} in raw string #{string.inspect}"
end
elsif !(/\A#{Entity::NAME}\z/um.match?(value))
raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
end

pos = end_index + 1
end

string
end

def node_type
Expand Down
17 changes: 17 additions & 0 deletions test/parse/test_character_reference.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
require "test/unit"
require "core_assertions"

require "rexml/document"

module REXMLTests
class TestParseCharacterReference < Test::Unit::TestCase
include Test::Unit::CoreAssertions

def test_gt_linear_performance_many_preceding_zeros
seq = [10000, 50000, 100000, 150000, 200000]
assert_linear_performance(seq, rehearsal: 10) do |n|
REXML::Document.new('<test testing="&#' + "0" * n + '97;"/>')
end
end
end
end

0 comments on commit 0af55fa

Please sign in to comment.