[FIX] tools.email_split: improve email extraction using stdlib + extra tests

author Olivier Dony <odo@openerp.com>

Tue, 14 Jan 2014 15:44:26 +0000 (16:44 +0100)

committer Olivier Dony <odo@openerp.com>

Tue, 14 Jan 2014 15:44:26 +0000 (16:44 +0100)
author Olivier Dony <odo@openerp.com>
Tue, 14 Jan 2014 15:44:26 +0000 (16:44 +0100)
committer Olivier Dony <odo@openerp.com>
Tue, 14 Jan 2014 15:44:26 +0000 (16:44 +0100)
diff --cc openerp/tests/test_mail.py

index ddb8fc4,ddb8fc4..40f50a0
--- 1/openerp/tests/test_mail.py
--- 2/openerp/tests/test_mail.py
+++ b/openerp/tests/test_mail.py
@@@ -23,7 -23,7 +23,7 @@@
   ##############################################################################
   
   import unittest2
--from openerp.tools import html_sanitize, html_email_clean, append_content_to_html, plaintext2html
++from openerp.tools import html_sanitize, html_email_clean, append_content_to_html, plaintext2html, email_split
   
   HTML_SOURCE = """
   <font size="2" style="color: rgb(31, 31, 31); font-family: monospace; font-variant: normal; line-height: normal; ">test1</font>
@@@ -311,6 -311,6 +311,19 @@@ class TestHtmlTools(unittest2.TestCase)
           for html, content, plaintext_flag, preserve_flag, container_tag, expected in test_samples:
               self.assertEqual(append_content_to_html(html, content, plaintext_flag, preserve_flag, container_tag), expected, 'append_content_to_html is broken')
   
++class TestEmailTools(unittest2.TestCase):
++    """ Test some of our generic utility functions for emails """
++
++    def test_email_split(self):
++        cases = [
++            ("John <12345@gmail.com>", ['12345@gmail.com']), # regular form 
++            ("d@x; 1@2", ['d@x', '1@2']), # semi-colon + extra space
++            ("'(ss)' <123@gmail.com>, 'foo' <foo@bar>", ['123@gmail.com','foo@bar']), # comma + single-quoting
++            ('"john@gmail.com"<johnny@gmail.com>', ['johnny@gmail.com']), # double-quoting
++            ('"<jg>" <johnny@gmail.com>', ['johnny@gmail.com']), # double-quoting with brackets 
++        ]
++        for text, expected in cases:
++            self.assertEqual(email_split(text), expected, 'email_split is broken')
   
   if __name__ == '__main__':
       unittest2.main()
diff --cc openerp/tools/mail.py

index b0605bf,b0605bf..eb13f6e
--- 1/openerp/tools/mail.py
--- 2/openerp/tools/mail.py
+++ b/openerp/tools/mail.py
@@@ -30,6 -30,6 +30,7 @@@ import r
   import socket
   import threading
   import time
++from email.utils import getaddresses
   
   from openerp.loglevels import ustr
   
@@@ -358,4 -358,4 +359,9 @@@ def email_split(text)
       """ Return a list of the email addresses found in ``text`` """
       if not text:
           return []
--    return re.findall(r'([^ ,<@]+@[^> ,]+)', text)
++    return [addr[1] for addr in getaddresses([text])
++                # getaddresses() returns '' when email parsing fails, and
++                # sometimes returns emails without at least '@'. The '@'
++                # is strictly required in RFC2822's `addr-spec`.
++                if addr[1]
++                if '@' in addr[1]]
author	Olivier Dony <odo@openerp.com>
	Tue, 14 Jan 2014 15:44:26 +0000 (16:44 +0100)
committer	Olivier Dony <odo@openerp.com>
	Tue, 14 Jan 2014 15:44:26 +0000 (16:44 +0100)
		1	2
openerp/tests/test_mail.py	patch \|	diff1 \|	diff2 \|	blob \| history
openerp/tools/mail.py	patch \|	diff1 \|	diff2 \|	blob \| history