Sign Up
Log In
Log In
or
Sign Up
Places
All Projects
Status Monitor
Collapse sidebar
SUSE:SLE-12-SP4:GA
python-lxml
0002-CVE-2020-27783.patch
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
File 0002-CVE-2020-27783.patch of Package python-lxml
From a105ab8dc262ec6735977c25c13f0bdfcdec72a7 Mon Sep 17 00:00:00 2001 From: Stefan Behnel <stefan_ml@behnel.de> Date: Thu, 26 Nov 2020 09:20:52 +0100 Subject: [PATCH] Prevent combinations of <math/svg> and <style> to sneak JavaScript through the HTML cleaner. --- CHANGES.txt | 11 +++++++++++ src/lxml/html/clean.py | 22 ++++++++++++++-------- src/lxml/html/tests/test_clean.py | 10 ++++++++++ src/lxml/html/tests/test_clean.txt | 18 +++++++++++++++--- 4 files changed, 50 insertions(+), 11 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index 7afec7e28..e3b771401 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -2,6 +2,17 @@ lxml changelog ============== +4.6.2 (2020-11-26) +================== + +Bugs fixed +---------- + +* A vulnerability (CVE-2020-27783) was discovered in the HTML Cleaner by Yaniv Nizry, + which allowed JavaScript to pass through. The cleaner now removes more sneaky + "style" content. + + 4.6.1 (2020-10-18) ================== diff --git a/src/lxml/html/clean.py b/src/lxml/html/clean.py index 7b51981d7..0fa1544c4 100644 --- a/src/lxml/html/clean.py +++ b/src/lxml/html/clean.py @@ -61,12 +61,15 @@ # This is an IE-specific construct you can have in a stylesheet to # run some Javascript: -_css_javascript_re = re.compile( - r'expression\s*\(.*?\)', re.S|re.I) +_replace_css_javascript = re.compile( + r'expression\s*\(.*?\)', re.S|re.I).sub # Do I have to worry about @\nimport? -_css_import_re = re.compile( - r'@\s*import', re.I) +_replace_css_import = re.compile( + r'@\s*import', re.I).sub + +_looks_like_tag_content = re.compile( + r'</?[a-zA-Z]+|\son[a-zA-Z]+\s*=', ).search # All kinds of schemes besides just javascript: that can cause # execution: @@ -304,8 +307,8 @@ def __call__(self, doc): if not self.inline_style: for el in _find_styled_elements(doc): old = el.get('style') - new = _css_javascript_re.sub('', old) - new = _css_import_re.sub('', new) + new = _replace_css_javascript('', old) + new = _replace_css_import('', new) if self._has_sneaky_javascript(new): # Something tricky is going on... del el.attrib['style'] @@ -317,9 +320,9 @@ def __call__(self, doc): el.drop_tree() continue old = el.text or '' - new = _css_javascript_re.sub('', old) + new = _replace_css_javascript('', old) # The imported CSS can do anything; we just can't allow: - new = _css_import_re.sub('', old) + new = _replace_css_import('', new) if self._has_sneaky_javascript(new): # Something tricky is going on... el.text = '/* deleted */' @@ -539,6 +542,9 @@ def _has_sneaky_javascript(self, style): if '</noscript' in style: # e.g. '<noscript><style><a title="</noscript><img src=x onerror=alert(1)>">' return True + if _looks_like_tag_content(style): + # e.g. '<math><style><img src=x onerror=alert(1)></style></math>' + return True return False def clean_html(self, html): diff --git a/src/lxml/html/tests/test_clean.py b/src/lxml/html/tests/test_clean.py index 3c8ee252f..0e669f98d 100644 --- a/src/lxml/html/tests/test_clean.py +++ b/src/lxml/html/tests/test_clean.py @@ -113,6 +113,16 @@ def test_sneaky_noscript_in_style(self): b'<noscript><style>/* deleted */</style></noscript>', lxml.html.tostring(clean_html(s))) + def test_sneaky_js_in_math_style(self): + # This gets parsed as <math> -> <style>"..."</style> + # thus passing any tag/script/whatever content through into the output. + html = '<math><style><img src=x onerror=alert(1)></style></math>' + s = lxml.html.fragment_fromstring(html) + + self.assertEqual( + b'<math><style>/* deleted */</style></math>', + lxml.html.tostring(clean_html(s))) + def test_suite(): suite = unittest.TestSuite() diff --git a/src/lxml/html/tests/test_clean.txt b/src/lxml/html/tests/test_clean.txt index 275be07c6..18e6c7e61 100644 --- a/src/lxml/html/tests/test_clean.txt +++ b/src/lxml/html/tests/test_clean.txt @@ -104,7 +104,11 @@ >>> print(Cleaner(page_structure=False, comments=False).clean_html(doc)) <html> <head> - <style>/* deleted */</style> + <style> + body {background-image: url()}; + div {background-image: url()}; + div {color: }; + </style> </head> <body> <!-- I am interpreted for EVIL! --> @@ -126,7 +130,11 @@ >>> print(Cleaner(page_structure=False, safe_attrs_only=False).clean_html(doc)) <html> <head> - <style>/* deleted */</style> + <style> + body {background-image: url()}; + div {background-image: url()}; + div {color: }; + </style> </head> <body> <a href="">a link</a>
Locations
Projects
Search
Status Monitor
Help
OpenBuildService.org
Documentation
API Documentation
Code of Conduct
Contact
Support
@OBShq
Terms
openSUSE Build Service is sponsored by
The Open Build Service is an
openSUSE project
.
Sign Up
Log In
Places
Places
All Projects
Status Monitor