self.assertNotIn(attr, sanitized_html, 'html_sanitize did not remove enough unwanted attributes')
emails =[("Charles <charles.bidule@truc.fr>", "<p>Charles <charles.bidule@truc.fr></p>"),
- ("Dupuis <'tr/-:dupuis><#><$'@truc.baz.fr>", "<p>Dupuis <'tr/-:dupuis><#><$'@truc.baz.fr></p>"),
+ ("Dupuis <'tr/-: ${dupuis><#><$'@truc.baz.fr>", "<p>Dupuis <'tr/-: ${dupuis><#><$'@truc.baz.fr></p>"),
("Technical <service/technical+2@open.com>", "<p>Technical <service/technical+2@open.com></p>"),
("Div nico <div-nico@open.com>", "<p>Div nico <div-nico@open.com></p>")]
for email in emails:
src = ustr(src, errors='replace')
# html encode email tags
- part = re.compile(r"(<\s*[^\s]+@[^\s]+\s*>)", re.IGNORECASE | re.DOTALL)
+ part = re.compile(r"(<\s*([^\s]+|'([^']|(?:\\)\\(\\\\)*')+'|\"([^\"]|(?:\\)\\(\\\\)*\")+\")@[^\s]+\s*>)", re.IGNORECASE | re.DOTALL)
src = part.sub(lambda m: cgi.escape(m.group(1)), src)
# some corner cases make the parser crash (such as <SCRIPT/XSS SRC=\"http://ha.ckers.org/xss.js\"></SCRIPT> in test_mail)