[FIX] tools: html_sanitize: keep mako tags (<% ... %>), required for example in the...
authorThibault Delavallée <tde@openerp.com>
Thu, 7 Aug 2014 09:21:41 +0000 (11:21 +0200)
committerThibault Delavallée <tde@openerp.com>
Thu, 7 Aug 2014 14:47:58 +0000 (16:47 +0200)
openerp/addons/base/tests/test_mail.py
openerp/tools/mail.py

index 4ebec83..01193de 100644 (file)
 
 import unittest2
 
-from lxml import etree
-
 from openerp.tools import html_sanitize, html_email_clean, append_content_to_html, plaintext2html, email_split
-
 import test_mail_examples
 
 
@@ -45,6 +42,24 @@ class TestSanitizer(unittest2.TestCase):
             html = html_sanitize(content)
             self.assertEqual(html, expected, 'html_sanitize is broken')
 
+    def test_mako(self):
+        cases = [
+            ('''<p>Some text</p>
+<% set signup_url = object.get_signup_url() %>
+% if signup_url:
+<p>
+    You can access this document and pay online via our Customer Portal:
+</p>''', '''<p>Some text</p>
+<% set signup_url = object.get_signup_url() %>
+% if signup_url:
+<p>
+    You can access this document and pay online via our Customer Portal:
+</p>''')
+        ]
+        for content, expected in cases:
+            html = html_sanitize(content, silent=False)
+            self.assertEqual(html, expected, 'html_sanitize: broken mako management')
+
     def test_evil_malicious_code(self):
         # taken from https://www.owasp.org/index.php/XSS_Filter_Evasion_Cheat_Sheet#Tests
         cases = [
@@ -363,16 +378,17 @@ class TestHtmlTools(unittest2.TestCase):
         for html, content, plaintext_flag, preserve_flag, container_tag, expected in test_samples:
             self.assertEqual(append_content_to_html(html, content, plaintext_flag, preserve_flag, container_tag), expected, 'append_content_to_html is broken')
 
+
 class TestEmailTools(unittest2.TestCase):
     """ Test some of our generic utility functions for emails """
 
     def test_email_split(self):
         cases = [
-            ("John <12345@gmail.com>", ['12345@gmail.com']), # regular form 
-            ("d@x; 1@2", ['d@x', '1@2']), # semi-colon + extra space
-            ("'(ss)' <123@gmail.com>, 'foo' <foo@bar>", ['123@gmail.com','foo@bar']), # comma + single-quoting
-            ('"john@gmail.com"<johnny@gmail.com>', ['johnny@gmail.com']), # double-quoting
-            ('"<jg>" <johnny@gmail.com>', ['johnny@gmail.com']), # double-quoting with brackets 
+            ("John <12345@gmail.com>", ['12345@gmail.com']),  # regular form
+            ("d@x; 1@2", ['d@x', '1@2']),  # semi-colon + extra space
+            ("'(ss)' <123@gmail.com>, 'foo' <foo@bar>", ['123@gmail.com', 'foo@bar']),  # comma + single-quoting
+            ('"john@gmail.com"<johnny@gmail.com>', ['johnny@gmail.com']),  # double-quoting
+            ('"<jg>" <johnny@gmail.com>', ['johnny@gmail.com']),  # double-quoting with brackets
         ]
         for text, expected in cases:
             self.assertEqual(email_split(text), expected, 'email_split is broken')
index 83bd951..f6406eb 100644 (file)
@@ -63,6 +63,9 @@ def html_sanitize(src, silent=True, strict=False):
     # html encode email tags
     part = re.compile(r"(<(([^a<>]|a[^<>\s])[^<>]*)@[^<>]+>)", re.IGNORECASE | re.DOTALL)
     src = part.sub(lambda m: cgi.escape(m.group(1)), src)
+    # html encode mako tags <% ... %> to decode them later and keep them alive, otherwise they are stripped by the cleaner
+    src = src.replace('<%', cgi.escape('<%'))
+    src = src.replace('%>', cgi.escape('%>'))
 
     kwargs = {
         'page_structure': True,
@@ -71,7 +74,7 @@ def html_sanitize(src, silent=True, strict=False):
         'remove_unknown_tags': False,
         'allow_tags': allowed_tags,
         'comments': False,
-        'processing_instructions' : False
+        'processing_instructions': False
     }
     if etree.LXML_VERSION >= (2, 3, 1):
         # kill_tags attribute has been added in version 2.3.1
@@ -104,6 +107,8 @@ def html_sanitize(src, silent=True, strict=False):
         cleaned = cleaned.replace('%20', ' ')
         cleaned = cleaned.replace('%5B', '[')
         cleaned = cleaned.replace('%5D', ']')
+        cleaned = cleaned.replace('&lt;%', '<%')
+        cleaned = cleaned.replace('%&gt;', '%>')
     except etree.ParserError, e:
         if 'empty' in str(e):
             return ""