2 # -*- coding: utf-8 -*-
3 # This test can be run stand-alone with something like:
4 # > PYTHONPATH=. python2 openerp/tests/test_misc.py
5 ##############################################################################
7 # OpenERP, Open Source Business Applications
8 # Copyright (c) 2012-TODAY OpenERP S.A. <http://openerp.com>
10 # This program is free software: you can redistribute it and/or modify
11 # it under the terms of the GNU Affero General Public License as
12 # published by the Free Software Foundation, either version 3 of the
13 # License, or (at your option) any later version.
15 # This program is distributed in the hope that it will be useful,
16 # but WITHOUT ANY WARRANTY; without even the implied warranty of
17 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 # GNU Affero General Public License for more details.
20 # You should have received a copy of the GNU Affero General Public License
21 # along with this program. If not, see <http://www.gnu.org/licenses/>.
23 ##############################################################################
26 from . import test_mail_examples
27 from openerp.tools import html_sanitize, html_email_clean, append_content_to_html, plaintext2html
30 class TestSanitizer(unittest2.TestCase):
31 """ Test the html sanitizer that filters html to remove unwanted attributes """
33 def test_basic_sanitizer(self):
35 ("yop", "<p>yop</p>"), # simple
36 ("lala<p>yop</p>xxx", "<div><p>lala</p><p>yop</p>xxx</div>"), # trailing text
37 ("Merci à l'intérêt pour notre produit.nous vous contacterons bientôt. Merci",
38 u"<p>Merci à l'intérêt pour notre produit.nous vous contacterons bientôt. Merci</p>"), # unicode
40 for content, expected in cases:
41 html = html_sanitize(content)
42 self.assertEqual(html, expected, 'html_sanitize is broken')
44 def test_evil_malicious_code(self):
45 # taken from https://www.owasp.org/index.php/XSS_Filter_Evasion_Cheat_Sheet#Tests
47 ("<IMG SRC=javascript:alert('XSS')>"), # no quotes and semicolons
48 ("<IMG SRC=javascript:alert('XSS')>"), # UTF-8 Unicode encoding
49 ("<IMG SRC=javascript:alert('XSS')>"), # hex encoding
50 ("<IMG SRC=\"jav
ascript:alert('XSS');\">"), # embedded carriage return
51 ("<IMG SRC=\"jav
ascript:alert('XSS');\">"), # embedded newline
52 ("<IMG SRC=\"jav ascript:alert('XSS');\">"), # embedded tab
53 ("<IMG SRC=\"jav	ascript:alert('XSS');\">"), # embedded encoded tab
54 ("<IMG SRC=\"  javascript:alert('XSS');\">"), # spaces and meta-characters
55 ("<IMG SRC=\"javascript:alert('XSS')\""), # half-open html
56 ("<IMG \"\"\"><SCRIPT>alert(\"XSS\")</SCRIPT>\">"), # malformed tag
57 ("<SCRIPT/XSS SRC=\"http://ha.ckers.org/xss.js\"></SCRIPT>"), # non-alpha-non-digits
58 ("<SCRIPT/SRC=\"http://ha.ckers.org/xss.js\"></SCRIPT>"), # non-alpha-non-digits
59 ("<<SCRIPT>alert(\"XSS\");//<</SCRIPT>"), # extraneous open brackets
60 ("<SCRIPT SRC=http://ha.ckers.org/xss.js?< B >"), # non-closing script tags
61 ("<INPUT TYPE=\"IMAGE\" SRC=\"javascript:alert('XSS');\">"), # input image
62 ("<BODY BACKGROUND=\"javascript:alert('XSS')\">"), # body image
63 ("<IMG DYNSRC=\"javascript:alert('XSS')\">"), # img dynsrc
64 ("<IMG LOWSRC=\"javascript:alert('XSS')\">"), # img lowsrc
65 ("<TABLE BACKGROUND=\"javascript:alert('XSS')\">"), # table
66 ("<TABLE><TD BACKGROUND=\"javascript:alert('XSS')\">"), # td
67 ("<DIV STYLE=\"background-image: url(javascript:alert('XSS'))\">"), # div background
68 ("<DIV STYLE=\"background-image:\0075\0072\006C\0028'\006a\0061\0076\0061\0073\0063\0072\0069\0070\0074\003a\0061\006c\0065\0072\0074\0028.1027\0058.1053\0053\0027\0029'\0029\">"), # div background with unicoded exploit
69 ("<DIV STYLE=\"background-image: url(javascript:alert('XSS'))\">"), # div background + extra characters
70 ("<IMG SRC='vbscript:msgbox(\"XSS\")'>"), # VBscrip in an image
71 ("<BODY ONLOAD=alert('XSS')>"), # event handler
72 ("<BR SIZE=\"&{alert('XSS')}\>"), # & javascript includes
73 ("<LINK REL=\"stylesheet\" HREF=\"javascript:alert('XSS');\">"), # style sheet
74 ("<LINK REL=\"stylesheet\" HREF=\"http://ha.ckers.org/xss.css\">"), # remote style sheet
75 ("<STYLE>@import'http://ha.ckers.org/xss.css';</STYLE>"), # remote style sheet 2
76 ("<META HTTP-EQUIV=\"Link\" Content=\"<http://ha.ckers.org/xss.css>; REL=stylesheet\">"), # remote style sheet 3
77 ("<STYLE>BODY{-moz-binding:url(\"http://ha.ckers.org/xssmoz.xml#xss\")}</STYLE>"), # remote style sheet 4
78 ("<IMG STYLE=\"xss:expr/*XSS*/ession(alert('XSS'))\">"), # style attribute using a comment to break up expression
79 ("""<!--[if gte IE 4]>
80 <SCRIPT>alert('XSS');</SCRIPT>
81 <![endif]-->"""), # down-level hidden block
84 html = html_sanitize(content)
85 self.assertNotIn('javascript', html, 'html_sanitize did not remove a malicious javascript')
86 self.assertTrue('ha.ckers.org' not in html or 'http://ha.ckers.org/xss.css' in html, 'html_sanitize did not remove a malicious code in %s (%s)' % (content, html))
89 sanitized_html = html_sanitize(test_mail_examples.MISC_HTML_SOURCE)
90 for tag in ['<div', '<b', '<i', '<u', '<strike', '<li', '<blockquote', '<a href']:
91 self.assertIn(tag, sanitized_html, 'html_sanitize stripped too much of original html')
92 for attr in ['javascript']:
93 self.assertNotIn(attr, sanitized_html, 'html_sanitize did not remove enough unwanted attributes')
95 emails = [("Charles <charles.bidule@truc.fr>", "Charles <charles.bidule@truc.fr>"),
96 ("Dupuis <'tr/-: ${dupuis#$'@truc.baz.fr>", "Dupuis <'tr/-: ${dupuis#$'@truc.baz.fr>"),
97 ("Technical <service/technical+2@open.com>", "Technical <service/technical+2@open.com>"),
98 ("Div nico <div-nico@open.com>", "Div nico <div-nico@open.com>")]
100 self.assertIn(email[1], html_sanitize(email[0]), 'html_sanitize stripped emails of original html')
102 def test_edi_source(self):
103 html = html_sanitize(test_mail_examples.EDI_LIKE_HTML_SOURCE)
104 self.assertIn('div style="font-family: \'Lucica Grande\', Ubuntu, Arial, Verdana, sans-serif; font-size: 12px; color: rgb(34, 34, 34); background-color: #FFF;', html,
105 'html_sanitize removed valid style attribute')
106 self.assertIn('<span style="color: #222; margin-bottom: 5px; display: block; ">', html,
107 'html_sanitize removed valid style attribute')
108 self.assertIn('img class="oe_edi_paypal_button" src="https://www.paypal.com/en_US/i/btn/btn_paynowCC_LG.gif"', html,
109 'html_sanitize removed valid img')
110 self.assertNotIn('</body></html>', html, 'html_sanitize did not remove extra closing tags')
113 class TestCleaner(unittest2.TestCase):
114 """ Test the email cleaner function that filters the content of incoming emails """
116 def test_00_basic_text(self):
117 """ html_email_clean test for signatures """
120 """This is Sparta!\n--\nAdministrator\n+9988776655""",
122 ['Administrator', '9988776655']
124 """<p>--\nAdministrator</p>""",
126 ['--', 'Administrator']
128 """<p>This is Sparta!\n---\nAdministrator</p>""",
130 ['---', 'Administrator']
132 """<p>--<br>Administrator</p>""",
136 """<p>This is Sparta!<br/>--<br>Administrator</p>""",
140 """This is Sparta!\n>Ah bon ?\nCertes\n> Chouette !\nClair""",
141 ['This is Sparta!', 'Certes', 'Clair'],
142 ['Ah bon', 'Chouette']
145 for test, in_lst, out_lst in test_data:
146 new_html = html_email_clean(test, remove=True)
148 self.assertIn(text, new_html, 'html_email_cleaner wrongly removed content')
150 self.assertNotIn(text, new_html, 'html_email_cleaner did not remove unwanted content')
152 def test_10_email_text(self):
153 """ html_email_clean test for text-based emails """
154 new_html = html_email_clean(test_mail_examples.TEXT_1, remove=True)
155 for ext in test_mail_examples.TEXT_1_IN:
156 self.assertIn(ext, new_html, 'html_email_cleaner wrongly removed not quoted content')
157 for ext in test_mail_examples.TEXT_1_OUT:
158 self.assertNotIn(ext, new_html, 'html_email_cleaner did not erase signature / quoted content')
160 new_html = html_email_clean(test_mail_examples.TEXT_2, remove=True)
161 for ext in test_mail_examples.TEXT_2_IN:
162 self.assertIn(ext, new_html, 'html_email_cleaner wrongly removed not quoted content')
163 for ext in test_mail_examples.TEXT_2_OUT:
164 self.assertNotIn(ext, new_html, 'html_email_cleaner did not erase signature / quoted content')
166 def test_20_email_html(self):
167 new_html = html_email_clean(test_mail_examples.HTML_1, remove=True)
168 for ext in test_mail_examples.HTML_1_IN:
169 self.assertIn(ext, new_html, 'html_email_cleaner wrongly removed not quoted content')
170 for ext in test_mail_examples.HTML_1_OUT:
171 self.assertNotIn(ext, new_html, 'html_email_cleaner did not erase signature / quoted content')
173 new_html = html_email_clean(test_mail_examples.HTML_2, remove=True)
174 for ext in test_mail_examples.HTML_2_IN:
175 self.assertIn(ext, new_html, 'html_email_cleaner wrongly removed not quoted content')
176 for ext in test_mail_examples.HTML_2_OUT:
177 self.assertNotIn(ext, new_html, 'html_email_cleaner did not erase signature / quoted content')
179 # --- MAIL ORIGINAL --- -> can't parse this one currently, too much language-dependent
180 # new_html = html_email_clean(test_mail_examples.HTML_3, remove=False)
181 # for ext in test_mail_examples.HTML_3_IN:
182 # self.assertIn(ext, new_html, 'html_email_cleaner wrongly removed not quoted content')
183 # for ext in test_mail_examples.HTML_3_OUT:
184 # self.assertNotIn(ext, new_html, 'html_email_cleaner did not erase signature / quoted content')
186 def test_30_email_msoffice(self):
187 new_html = html_email_clean(test_mail_examples.MSOFFICE_1, remove=True)
188 for ext in test_mail_examples.MSOFFICE_1_IN:
189 self.assertIn(ext, new_html, 'html_email_cleaner wrongly removed not quoted content')
190 for ext in test_mail_examples.MSOFFICE_1_OUT:
191 self.assertNotIn(ext, new_html, 'html_email_cleaner did not erase signature / quoted content')
193 new_html = html_email_clean(test_mail_examples.MSOFFICE_2, remove=True)
194 for ext in test_mail_examples.MSOFFICE_2_IN:
195 self.assertIn(ext, new_html, 'html_email_cleaner wrongly removed not quoted content')
196 for ext in test_mail_examples.MSOFFICE_2_OUT:
197 self.assertNotIn(ext, new_html, 'html_email_cleaner did not erase signature / quoted content')
199 new_html = html_email_clean(test_mail_examples.MSOFFICE_3, remove=True)
200 for ext in test_mail_examples.MSOFFICE_3_IN:
201 self.assertIn(ext, new_html, 'html_email_cleaner wrongly removed not quoted content')
202 for ext in test_mail_examples.MSOFFICE_3_OUT:
203 self.assertNotIn(ext, new_html, 'html_email_cleaner did not erase signature / quoted content')
205 def test_40_email_hotmail(self):
206 new_html = html_email_clean(test_mail_examples.HOTMAIL_1, remove=True)
207 for ext in test_mail_examples.HOTMAIL_1_IN:
208 self.assertIn(ext, new_html, 'html_email_cleaner wrongly removed not quoted content')
209 for ext in test_mail_examples.HOTMAIL_1_OUT:
210 self.assertNotIn(ext, new_html, 'html_email_cleaner did not erase signature / quoted content')
212 def test_50_email_gmail(self):
213 new_html = html_email_clean(test_mail_examples.GMAIL_1, remove=True)
214 for ext in test_mail_examples.GMAIL_1_IN:
215 self.assertIn(ext, new_html, 'html_email_cleaner wrongly removed not quoted content')
216 for ext in test_mail_examples.GMAIL_1_OUT:
217 self.assertNotIn(ext, new_html, 'html_email_cleaner did not erase signature / quoted content')
219 def test_60_email_thunderbird(self):
220 new_html = html_email_clean(test_mail_examples.THUNDERBIRD_1, remove=True)
221 for ext in test_mail_examples.THUNDERBIRD_1_IN:
222 self.assertIn(ext, new_html, 'html_email_cleaner wrongly removed not quoted content')
223 for ext in test_mail_examples.THUNDERBIRD_1_OUT:
224 self.assertNotIn(ext, new_html, 'html_email_cleaner did not erase signature / quoted content')
226 def test_70_read_more(self):
227 new_html = html_email_clean(test_mail_examples.BUG1, remove=True, shorten=True, max_length=100)
228 for ext in test_mail_examples.BUG_1_IN:
229 self.assertIn(ext, new_html, 'html_email_cleaner wrongly removed valid content')
230 for ext in test_mail_examples.BUG_1_OUT:
231 self.assertNotIn(ext, new_html, 'html_email_cleaner did not removed invalid content')
233 new_html = html_email_clean(test_mail_examples.BUG2, remove=True, shorten=True, max_length=4000)
234 for ext in test_mail_examples.BUG_2_IN:
235 self.assertIn(ext, new_html, 'html_email_cleaner wrongly removed valid content')
236 for ext in test_mail_examples.BUG_2_OUT:
237 self.assertNotIn(ext, new_html, 'html_email_cleaner did not removed invalid content')
239 def test_90_misc(self):
240 # False boolean for text must return empty string
241 new_html = html_email_clean(False)
242 self.assertEqual(new_html, False, 'html_email_cleaner did change a False in an other value.')
244 # Message with xml and doctype tags don't crash
245 new_html = html_email_clean(u'<?xml version="1.0" encoding="iso-8859-1"?>\n<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"\n "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">\n<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">\n <head>\n <title>404 - Not Found</title>\n </head>\n <body>\n <h1>404 - Not Found</h1>\n </body>\n</html>\n')
246 self.assertNotIn('encoding', new_html, 'html_email_cleaner did not remove correctly encoding attributes')
249 class TestHtmlTools(unittest2.TestCase):
250 """ Test some of our generic utility functions about html """
252 def test_plaintext2html(self):
254 ("First \nSecond \nThird\n \nParagraph\n\r--\nSignature paragraph", 'div',
255 "<div><p>First <br/>Second <br/>Third</p><p>Paragraph</p><p>--<br/>Signature paragraph</p></div>"),
256 ("First<p>It should be escaped</p>\nSignature", False,
257 "<p>First<p>It should be escaped</p><br/>Signature</p>")
259 for content, container_tag, expected in cases:
260 html = plaintext2html(content, container_tag)
261 self.assertEqual(html, expected, 'plaintext2html is broken')
263 def test_append_to_html(self):
265 ('<!DOCTYPE...><HTML encoding="blah">some <b>content</b></HtMl>', '--\nYours truly', True, True, False,
266 '<!DOCTYPE...><html encoding="blah">some <b>content</b>\n<pre>--\nYours truly</pre>\n</html>'),
267 ('<!DOCTYPE...><HTML encoding="blah">some <b>content</b></HtMl>', '--\nYours truly', True, False, False,
268 '<!DOCTYPE...><html encoding="blah">some <b>content</b>\n<p>--<br/>Yours truly</p>\n</html>'),
269 ('<html><body>some <b>content</b></body></html>', '<!DOCTYPE...>\n<html><body>\n<p>--</p>\n<p>Yours truly</p>\n</body>\n</html>', False, False, False,
270 '<html><body>some <b>content</b>\n\n\n<p>--</p>\n<p>Yours truly</p>\n\n\n</body></html>'),
272 for html, content, plaintext_flag, preserve_flag, container_tag, expected in test_samples:
273 self.assertEqual(append_content_to_html(html, content, plaintext_flag, preserve_flag, container_tag), expected, 'append_content_to_html is broken')
276 if __name__ == '__main__':