openerp/addons/base/tests/test_mail.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3 # This test can be run stand-alone with something like:
   4 # > PYTHONPATH=. python2 openerp/tests/test_misc.py
   5 ##############################################################################
   6 #
   7 #    OpenERP, Open Source Business Applications
   8 #    Copyright (c) 2012-TODAY OpenERP S.A. <http://openerp.com>
   9 #
  10 #    This program is free software: you can redistribute it and/or modify
  11 #    it under the terms of the GNU Affero General Public License as
  12 #    published by the Free Software Foundation, either version 3 of the
  13 #    License, or (at your option) any later version.
  14 #
  15 #    This program is distributed in the hope that it will be useful,
  16 #    but WITHOUT ANY WARRANTY; without even the implied warranty of
  17 #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  18 #    GNU Affero General Public License for more details.
  19 #
  20 #    You should have received a copy of the GNU Affero General Public License
  21 #    along with this program.  If not, see <http://www.gnu.org/licenses/>.
  22 #
  23 ##############################################################################
  24
  25 import unittest2
  26
  27 from openerp.tools import html_sanitize, html_email_clean, append_content_to_html, plaintext2html, email_split
  28 import test_mail_examples
  29
  30
  31 class TestSanitizer(unittest2.TestCase):
  32     """ Test the html sanitizer that filters html to remove unwanted attributes """
  33
  34     def test_basic_sanitizer(self):
  35         cases = [
  36             ("yop", "<p>yop</p>"),  # simple
  37             ("lala<p>yop</p>xxx", "<p>lala</p><p>yop</p>xxx"),  # trailing text
  38             ("Merci à l'intérêt pour notre produit.nous vous contacterons bientôt. Merci",
  39                 u"<p>Merci à l'intérêt pour notre produit.nous vous contacterons bientôt. Merci</p>"),  # unicode
  40         ]
  41         for content, expected in cases:
  42             html = html_sanitize(content)
  43             self.assertEqual(html, expected, 'html_sanitize is broken')
  44
  45     def test_mako(self):
  46         cases = [
  47             ('''<p>Some text</p>
  48 <% set signup_url = object.get_signup_url() %>
  49 % if signup_url:
  50 <p>
  51     You can access this document and pay online via our Customer Portal:
  52 </p>''', '''<p>Some text</p>
  53 <% set signup_url = object.get_signup_url() %>
  54 % if signup_url:
  55 <p>
  56     You can access this document and pay online via our Customer Portal:
  57 </p>''')
  58         ]
  59         for content, expected in cases:
  60             html = html_sanitize(content, silent=False)
  61             self.assertEqual(html, expected, 'html_sanitize: broken mako management')
  62
  63     def test_evil_malicious_code(self):
  64         # taken from https://www.owasp.org/index.php/XSS_Filter_Evasion_Cheat_Sheet#Tests
  65         cases = [
  66             ("<IMG SRC=javascript:alert('XSS')>"),  # no quotes and semicolons
  67             ("<IMG SRC=&#106;&#97;&#118;&#97;&#115;&#99;&#114;&#105;&#112;&#116;&#58;&#97;&#108;&#101;&#114;&#116;&#40;&#39;&#88;&#83;&#83;&#39;&#41;>"),  # UTF-8 Unicode encoding
  68             ("<IMG SRC=&#x6A&#x61&#x76&#x61&#x73&#x63&#x72&#x69&#x70&#x74&#x3A&#x61&#x6C&#x65&#x72&#x74&#x28&#x27&#x58&#x53&#x53&#x27&#x29>"),  # hex encoding
  69             ("<IMG SRC=\"jav&#x0D;ascript:alert('XSS');\">"),  # embedded carriage return
  70             ("<IMG SRC=\"jav&#x0A;ascript:alert('XSS');\">"),  # embedded newline
  71             ("<IMG SRC=\"jav   ascript:alert('XSS');\">"),  # embedded tab
  72             ("<IMG SRC=\"jav&#x09;ascript:alert('XSS');\">"),  # embedded encoded tab
  73             ("<IMG SRC=\" &#14;  javascript:alert('XSS');\">"),  # spaces and meta-characters
  74             ("<IMG SRC=\"javascript:alert('XSS')\""),  # half-open html
  75             ("<IMG \"\"\"><SCRIPT>alert(\"XSS\")</SCRIPT>\">"),  # malformed tag
  76             ("<SCRIPT/XSS SRC=\"http://ha.ckers.org/xss.js\"></SCRIPT>"),  # non-alpha-non-digits
  77             ("<SCRIPT/SRC=\"http://ha.ckers.org/xss.js\"></SCRIPT>"),  # non-alpha-non-digits
  78             ("<<SCRIPT>alert(\"XSS\");//<</SCRIPT>"),  # extraneous open brackets
  79             ("<SCRIPT SRC=http://ha.ckers.org/xss.js?< B >"),  # non-closing script tags
  80             ("<INPUT TYPE=\"IMAGE\" SRC=\"javascript:alert('XSS');\">"),  # input image
  81             ("<BODY BACKGROUND=\"javascript:alert('XSS')\">"),  # body image
  82             ("<IMG DYNSRC=\"javascript:alert('XSS')\">"),  # img dynsrc
  83             ("<IMG LOWSRC=\"javascript:alert('XSS')\">"),  # img lowsrc
  84             ("<TABLE BACKGROUND=\"javascript:alert('XSS')\">"),  # table
  85             ("<TABLE><TD BACKGROUND=\"javascript:alert('XSS')\">"),  # td
  86             ("<DIV STYLE=\"background-image: url(javascript:alert('XSS'))\">"),  # div background
  87             ("<DIV STYLE=\"background-image:\0075\0072\006C\0028'\006a\0061\0076\0061\0073\0063\0072\0069\0070\0074\003a\0061\006c\0065\0072\0074\0028.1027\0058.1053\0053\0027\0029'\0029\">"),  # div background with unicoded exploit
  88             ("<DIV STYLE=\"background-image: url(&#1;javascript:alert('XSS'))\">"),  # div background + extra characters
  89             ("<IMG SRC='vbscript:msgbox(\"XSS\")'>"),  # VBscrip in an image
  90             ("<BODY ONLOAD=alert('XSS')>"),  # event handler
  91             ("<BR SIZE=\"&{alert('XSS')}\>"),  # & javascript includes
  92             ("<LINK REL=\"stylesheet\" HREF=\"javascript:alert('XSS');\">"),  # style sheet
  93             ("<LINK REL=\"stylesheet\" HREF=\"http://ha.ckers.org/xss.css\">"),  # remote style sheet
  94             ("<STYLE>@import'http://ha.ckers.org/xss.css';</STYLE>"),  # remote style sheet 2
  95             ("<META HTTP-EQUIV=\"Link\" Content=\"<http://ha.ckers.org/xss.css>; REL=stylesheet\">"),  # remote style sheet 3
  96             ("<STYLE>BODY{-moz-binding:url(\"http://ha.ckers.org/xssmoz.xml#xss\")}</STYLE>"),  # remote style sheet 4
  97             ("<IMG STYLE=\"xss:expr/*XSS*/ession(alert('XSS'))\">"),  # style attribute using a comment to break up expression
  98         ]
  99         for content in cases:
 100             html = html_sanitize(content)
 101             self.assertNotIn('javascript', html, 'html_sanitize did not remove a malicious javascript')
 102             self.assertTrue('ha.ckers.org' not in html or 'http://ha.ckers.org/xss.css' in html, 'html_sanitize did not remove a malicious code in %s (%s)' % (content, html))
 103
 104         content = "<!--[if gte IE 4]><SCRIPT>alert('XSS');</SCRIPT><![endif]-->"  # down-level hidden block
 105         self.assertEquals(html_sanitize(content, silent=False), '')
 106
 107     def test_html(self):
 108         sanitized_html = html_sanitize(test_mail_examples.MISC_HTML_SOURCE)
 109         for tag in ['<div', '<b', '<i', '<u', '<strike', '<li', '<blockquote', '<a href']:
 110             self.assertIn(tag, sanitized_html, 'html_sanitize stripped too much of original html')
 111         for attr in ['javascript']:
 112             self.assertNotIn(attr, sanitized_html, 'html_sanitize did not remove enough unwanted attributes')
 113
 114         emails = [("Charles <charles.bidule@truc.fr>", "Charles &lt;charles.bidule@truc.fr&gt;"),
 115                 ("Dupuis <'tr/-: ${dupuis#$'@truc.baz.fr>", "Dupuis &lt;'tr/-: ${dupuis#$'@truc.baz.fr&gt;"),
 116                 ("Technical <service/technical+2@open.com>", "Technical &lt;service/technical+2@open.com&gt;"),
 117                 ("Div nico <div-nico@open.com>", "Div nico &lt;div-nico@open.com&gt;")]
 118         for email in emails:
 119             self.assertIn(email[1], html_sanitize(email[0]), 'html_sanitize stripped emails of original html')
 120
 121     def test_edi_source(self):
 122         html = html_sanitize(test_mail_examples.EDI_LIKE_HTML_SOURCE)
 123         self.assertIn('div style="font-family: \'Lucica Grande\', Ubuntu, Arial, Verdana, sans-serif; font-size: 12px; color: rgb(34, 34, 34); background-color: #FFF;', html,
 124             'html_sanitize removed valid style attribute')
 125         self.assertIn('<span style="color: #222; margin-bottom: 5px; display: block; ">', html,
 126             'html_sanitize removed valid style attribute')
 127         self.assertIn('img class="oe_edi_paypal_button" src="https://www.paypal.com/en_US/i/btn/btn_paynowCC_LG.gif"', html,
 128             'html_sanitize removed valid img')
 129         self.assertNotIn('</body></html>', html, 'html_sanitize did not remove extra closing tags')
 130
 131
 132 class TestCleaner(unittest2.TestCase):
 133     """ Test the email cleaner function that filters the content of incoming emails """
 134
 135     def test_00_basic_text(self):
 136         """ html_email_clean test for signatures """
 137         test_data = [
 138             (
 139                 """This is Sparta!\n--\nAdministrator\n+9988776655""",
 140                 ['This is Sparta!'],
 141                 ['Administrator', '9988776655']
 142             ), (
 143                 """<p>--\nAdministrator</p>""",
 144                 [],
 145                 ['--', 'Administrator']
 146             ), (
 147                 """<p>This is Sparta!\n---\nAdministrator</p>""",
 148                 ['This is Sparta!'],
 149                 ['---', 'Administrator']
 150             ), (
 151                 """<p>--<br>Administrator</p>""",
 152                 [],
 153                 []
 154             ), (
 155                 """<p>This is Sparta!<br/>--<br>Administrator</p>""",
 156                 ['This is Sparta!'],
 157                 []
 158             ), (
 159                 """This is Sparta!\n>Ah bon ?\nCertes\n> Chouette !\nClair""",
 160                 ['This is Sparta!', 'Certes', 'Clair'],
 161                 ['Ah bon', 'Chouette']
 162             )
 163         ]
 164         for test, in_lst, out_lst in test_data:
 165             new_html = html_email_clean(test, remove=True)
 166             for text in in_lst:
 167                 self.assertIn(text, new_html, 'html_email_cleaner wrongly removed content')
 168             for text in out_lst:
 169                 self.assertNotIn(text, new_html, 'html_email_cleaner did not remove unwanted content')
 170
 171     def test_05_shorten(self):
 172         # TEST: shorten length
 173         test_str = '''<div>
 174         <span>
 175         </span>
 176         <p>Hello, <span>Raoul</span>
 177     <bold>You</bold> are
 178     pretty</p>
 179 <span>Really</span>
 180 </div>
 181 '''
 182         # shorten at 'H' of Hello -> should shorten after Hello,
 183         html = html_email_clean(test_str, shorten=True, max_length=1, remove=True)
 184         self.assertIn('Hello,', html, 'html_email_cleaner: shorten error or too short')
 185         self.assertNotIn('Raoul', html, 'html_email_cleaner: shorten error or too long')
 186         self.assertIn('read more', html, 'html_email_cleaner: shorten error about read more inclusion')
 187         # shorten at 'are' -> should shorten after are
 188         html = html_email_clean(test_str, shorten=True, max_length=17, remove=True)
 189         self.assertIn('Hello,', html, 'html_email_cleaner: shorten error or too short')
 190         self.assertIn('Raoul', html, 'html_email_cleaner: shorten error or too short')
 191         self.assertIn('are', html, 'html_email_cleaner: shorten error or too short')
 192         self.assertNotIn('pretty', html, 'html_email_cleaner: shorten error or too long')
 193         self.assertNotIn('Really', html, 'html_email_cleaner: shorten error or too long')
 194         self.assertIn('read more', html, 'html_email_cleaner: shorten error about read more inclusion')
 195
 196         # TEST: shorten in quote
 197         test_str = '''<div> Blahble
 198             bluih      blouh
 199         <blockquote>This is a quote
 200         <span>And this is quite a long quote, after all.</span>
 201         </blockquote>
 202 </div>'''
 203         # shorten in the quote
 204         html = html_email_clean(test_str, shorten=True, max_length=25, remove=True)
 205         self.assertIn('Blahble', html, 'html_email_cleaner: shorten error or too short')
 206         self.assertIn('bluih', html, 'html_email_cleaner: shorten error or too short')
 207         self.assertIn('blouh', html, 'html_email_cleaner: shorten error or too short')
 208         self.assertNotIn('quote', html, 'html_email_cleaner: shorten error or too long')
 209         self.assertIn('read more', html, 'html_email_cleaner: shorten error about read more inclusion')
 210         # shorten in second word
 211         html = html_email_clean(test_str, shorten=True, max_length=9, remove=True)
 212         self.assertIn('Blahble', html, 'html_email_cleaner: shorten error or too short')
 213         self.assertIn('bluih', html, 'html_email_cleaner: shorten error or too short')
 214         self.assertNotIn('blouh', html, 'html_email_cleaner: shorten error or too short')
 215         self.assertNotIn('quote', html, 'html_email_cleaner: shorten error or too long')
 216         self.assertIn('read more', html, 'html_email_cleaner: shorten error about read more inclusion')
 217         # shorten waaay too large
 218         html = html_email_clean(test_str, shorten=True, max_length=900, remove=True)
 219         self.assertIn('Blahble', html, 'html_email_cleaner: shorten error or too short')
 220         self.assertIn('bluih', html, 'html_email_cleaner: shorten error or too short')
 221         self.assertIn('blouh', html, 'html_email_cleaner: shorten error or too short')
 222         self.assertNotIn('quote', html, 'html_email_cleaner: shorten error or too long')
 223
 224     def test_10_email_text(self):
 225         """ html_email_clean test for text-based emails """
 226         new_html = html_email_clean(test_mail_examples.TEXT_1, remove=True)
 227         for ext in test_mail_examples.TEXT_1_IN:
 228             self.assertIn(ext, new_html, 'html_email_cleaner wrongly removed not quoted content')
 229         for ext in test_mail_examples.TEXT_1_OUT:
 230             self.assertNotIn(ext, new_html, 'html_email_cleaner did not erase signature / quoted content')
 231
 232         new_html = html_email_clean(test_mail_examples.TEXT_2, remove=True)
 233         for ext in test_mail_examples.TEXT_2_IN:
 234             self.assertIn(ext, new_html, 'html_email_cleaner wrongly removed not quoted content')
 235         for ext in test_mail_examples.TEXT_2_OUT:
 236             self.assertNotIn(ext, new_html, 'html_email_cleaner did not erase signature / quoted content')
 237
 238     def test_20_email_html(self):
 239         new_html = html_email_clean(test_mail_examples.HTML_1, remove=True)
 240         for ext in test_mail_examples.HTML_1_IN:
 241             self.assertIn(ext, new_html, 'html_email_cleaner wrongly removed not quoted content')
 242         for ext in test_mail_examples.HTML_1_OUT:
 243             self.assertNotIn(ext, new_html, 'html_email_cleaner did not erase signature / quoted content')
 244
 245         new_html = html_email_clean(test_mail_examples.HTML_2, remove=True)
 246         for ext in test_mail_examples.HTML_2_IN:
 247             self.assertIn(ext, new_html, 'html_email_cleaner wrongly removed not quoted content')
 248         for ext in test_mail_examples.HTML_2_OUT:
 249             self.assertNotIn(ext, new_html, 'html_email_cleaner did not erase signature / quoted content')
 250
 251         # --- MAIL ORIGINAL --- -> can't parse this one currently, too much language-dependent
 252         # new_html = html_email_clean(test_mail_examples.HTML_3, remove=False)
 253         # for ext in test_mail_examples.HTML_3_IN:
 254         #     self.assertIn(ext, new_html, 'html_email_cleaner wrongly removed not quoted content')
 255         # for ext in test_mail_examples.HTML_3_OUT:
 256         #     self.assertNotIn(ext, new_html, 'html_email_cleaner did not erase signature / quoted content')
 257
 258     def test_30_email_msoffice(self):
 259         new_html = html_email_clean(test_mail_examples.MSOFFICE_1, remove=True)
 260         for ext in test_mail_examples.MSOFFICE_1_IN:
 261             self.assertIn(ext, new_html, 'html_email_cleaner wrongly removed not quoted content')
 262         for ext in test_mail_examples.MSOFFICE_1_OUT:
 263             self.assertNotIn(ext, new_html, 'html_email_cleaner did not erase signature / quoted content')
 264
 265         new_html = html_email_clean(test_mail_examples.MSOFFICE_2, remove=True)
 266         for ext in test_mail_examples.MSOFFICE_2_IN:
 267             self.assertIn(ext, new_html, 'html_email_cleaner wrongly removed not quoted content')
 268         for ext in test_mail_examples.MSOFFICE_2_OUT:
 269             self.assertNotIn(ext, new_html, 'html_email_cleaner did not erase signature / quoted content')
 270
 271         new_html = html_email_clean(test_mail_examples.MSOFFICE_3, remove=True)
 272         for ext in test_mail_examples.MSOFFICE_3_IN:
 273             self.assertIn(ext, new_html, 'html_email_cleaner wrongly removed not quoted content')
 274         for ext in test_mail_examples.MSOFFICE_3_OUT:
 275             self.assertNotIn(ext, new_html, 'html_email_cleaner did not erase signature / quoted content')
 276
 277     def test_40_email_hotmail(self):
 278         new_html = html_email_clean(test_mail_examples.HOTMAIL_1, remove=True)
 279         for ext in test_mail_examples.HOTMAIL_1_IN:
 280             self.assertIn(ext, new_html, 'html_email_cleaner wrongly removed not quoted content')
 281         for ext in test_mail_examples.HOTMAIL_1_OUT:
 282             self.assertNotIn(ext, new_html, 'html_email_cleaner did not erase signature / quoted content')
 283
 284     def test_50_email_gmail(self):
 285         new_html = html_email_clean(test_mail_examples.GMAIL_1, remove=True)
 286         for ext in test_mail_examples.GMAIL_1_IN:
 287             self.assertIn(ext, new_html, 'html_email_cleaner wrongly removed not quoted content')
 288         for ext in test_mail_examples.GMAIL_1_OUT:
 289             self.assertNotIn(ext, new_html, 'html_email_cleaner did not erase signature / quoted content')
 290
 291     def test_60_email_thunderbird(self):
 292         new_html = html_email_clean(test_mail_examples.THUNDERBIRD_1, remove=True)
 293         for ext in test_mail_examples.THUNDERBIRD_1_IN:
 294             self.assertIn(ext, new_html, 'html_email_cleaner wrongly removed not quoted content')
 295         for ext in test_mail_examples.THUNDERBIRD_1_OUT:
 296             self.assertNotIn(ext, new_html, 'html_email_cleaner did not erase signature / quoted content')
 297
 298     def test_70_read_more_and_shorten(self):
 299         expand_options = {
 300             'oe_expand_container_class': 'span_class',
 301             'oe_expand_container_content': 'Herbert Einstein',
 302             'oe_expand_separator_node': 'br_lapin',
 303             'oe_expand_a_class': 'a_class',
 304             'oe_expand_a_content': 'read mee',
 305         }
 306         new_html = html_email_clean(test_mail_examples.OERP_WEBSITE_HTML_1, remove=True, shorten=True, max_length=100, expand_options=expand_options)
 307         for ext in test_mail_examples.OERP_WEBSITE_HTML_1_IN:
 308             self.assertIn(ext, new_html, 'html_email_cleaner wrongly removed not quoted content')
 309         for ext in test_mail_examples.OERP_WEBSITE_HTML_1_OUT:
 310             self.assertNotIn(ext, new_html, 'html_email_cleaner did not erase overlimit content')
 311         for ext in ['<span class="span_class">Herbert Einstein<br_lapin></br_lapin><a href="#" class="a_class">read mee</a></span>']:
 312             self.assertIn(ext, new_html, 'html_email_cleaner wrongly take into account specific expand options')
 313
 314         new_html = html_email_clean(test_mail_examples.OERP_WEBSITE_HTML_2, remove=True, shorten=True, max_length=200, expand_options=expand_options, protect_sections=False)
 315         for ext in test_mail_examples.OERP_WEBSITE_HTML_2_IN:
 316             self.assertIn(ext, new_html, 'html_email_cleaner wrongly removed not quoted content')
 317         for ext in test_mail_examples.OERP_WEBSITE_HTML_2_OUT:
 318             self.assertNotIn(ext, new_html, 'html_email_cleaner did not erase overlimit content')
 319         for ext in ['<span class="span_class">Herbert Einstein<br_lapin></br_lapin><a href="#" class="a_class">read mee</a></span>']:
 320             self.assertIn(ext, new_html, 'html_email_cleaner wrongly take into account specific expand options')
 321
 322         new_html = html_email_clean(test_mail_examples.OERP_WEBSITE_HTML_2, remove=True, shorten=True, max_length=200, expand_options=expand_options, protect_sections=True)
 323         for ext in test_mail_examples.OERP_WEBSITE_HTML_2_IN:
 324             self.assertIn(ext, new_html, 'html_email_cleaner wrongly removed not quoted content')
 325         for ext in test_mail_examples.OERP_WEBSITE_HTML_2_OUT:
 326             self.assertNotIn(ext, new_html, 'html_email_cleaner did not erase overlimit content')
 327         for ext in [
 328                 '<span class="span_class">Herbert Einstein<br_lapin></br_lapin><a href="#" class="a_class">read mee</a></span>',
 329                 'tasks using the gantt chart and control deadlines']:
 330             self.assertIn(ext, new_html, 'html_email_cleaner wrongly take into account specific expand options')
 331
 332     def test_70_read_more(self):
 333         new_html = html_email_clean(test_mail_examples.BUG1, remove=True, shorten=True, max_length=100)
 334         for ext in test_mail_examples.BUG_1_IN:
 335             self.assertIn(ext, new_html, 'html_email_cleaner wrongly removed valid content')
 336         for ext in test_mail_examples.BUG_1_OUT:
 337             self.assertNotIn(ext, new_html, 'html_email_cleaner did not removed invalid content')
 338
 339         new_html = html_email_clean(test_mail_examples.BUG2, remove=True, shorten=True, max_length=250)
 340         for ext in test_mail_examples.BUG_2_IN:
 341             self.assertIn(ext, new_html, 'html_email_cleaner wrongly removed valid content')
 342         for ext in test_mail_examples.BUG_2_OUT:
 343             self.assertNotIn(ext, new_html, 'html_email_cleaner did not removed invalid content')
 344
 345     def test_90_misc(self):
 346         # False boolean for text must return empty string
 347         new_html = html_email_clean(False)
 348         self.assertEqual(new_html, False, 'html_email_cleaner did change a False in an other value.')
 349
 350         # Message with xml and doctype tags don't crash
 351         new_html = html_email_clean(u'<?xml version="1.0" encoding="iso-8859-1"?>\n<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"\n         "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">\n<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">\n <head>\n  <title>404 - Not Found</title>\n </head>\n <body>\n  <h1>404 - Not Found</h1>\n </body>\n</html>\n')
 352         self.assertNotIn('encoding', new_html, 'html_email_cleaner did not remove correctly encoding attributes')
 353
 354
 355 class TestHtmlTools(unittest2.TestCase):
 356     """ Test some of our generic utility functions about html """
 357
 358     def test_plaintext2html(self):
 359         cases = [
 360             ("First \nSecond \nThird\n \nParagraph\n\r--\nSignature paragraph", 'div',
 361              "<div><p>First <br/>Second <br/>Third</p><p>Paragraph</p><p>--<br/>Signature paragraph</p></div>"),
 362             ("First<p>It should be escaped</p>\nSignature", False,
 363              "<p>First&lt;p&gt;It should be escaped&lt;/p&gt;<br/>Signature</p>")
 364         ]
 365         for content, container_tag, expected in cases:
 366             html = plaintext2html(content, container_tag)
 367             self.assertEqual(html, expected, 'plaintext2html is broken')
 368
 369     def test_append_to_html(self):
 370         test_samples = [
 371             ('<!DOCTYPE...><HTML encoding="blah">some <b>content</b></HtMl>', '--\nYours truly', True, True, False,
 372              '<!DOCTYPE...><html encoding="blah">some <b>content</b>\n<pre>--\nYours truly</pre>\n</html>'),
 373             ('<!DOCTYPE...><HTML encoding="blah">some <b>content</b></HtMl>', '--\nYours truly', True, False, False,
 374              '<!DOCTYPE...><html encoding="blah">some <b>content</b>\n<p>--<br/>Yours truly</p>\n</html>'),
 375             ('<html><body>some <b>content</b></body></html>', '<!DOCTYPE...>\n<html><body>\n<p>--</p>\n<p>Yours truly</p>\n</body>\n</html>', False, False, False,
 376              '<html><body>some <b>content</b>\n\n\n<p>--</p>\n<p>Yours truly</p>\n\n\n</body></html>'),
 377         ]
 378         for html, content, plaintext_flag, preserve_flag, container_tag, expected in test_samples:
 379             self.assertEqual(append_content_to_html(html, content, plaintext_flag, preserve_flag, container_tag), expected, 'append_content_to_html is broken')
 380
 381
 382 class TestEmailTools(unittest2.TestCase):
 383     """ Test some of our generic utility functions for emails """
 384
 385     def test_email_split(self):
 386         cases = [
 387             ("John <12345@gmail.com>", ['12345@gmail.com']),  # regular form
 388             ("d@x; 1@2", ['d@x', '1@2']),  # semi-colon + extra space
 389             ("'(ss)' <123@gmail.com>, 'foo' <foo@bar>", ['123@gmail.com', 'foo@bar']),  # comma + single-quoting
 390             ('"john@gmail.com"<johnny@gmail.com>', ['johnny@gmail.com']),  # double-quoting
 391             ('"<jg>" <johnny@gmail.com>', ['johnny@gmail.com']),  # double-quoting with brackets
 392         ]
 393         for text, expected in cases:
 394             self.assertEqual(email_split(text), expected, 'email_split is broken')
 395
 396 if __name__ == '__main__':
 397     unittest2.main()