[FIX] tools: mail: fixed / improved html_email_clean.
authorThibault Delavallée <tde@openerp.com>
Thu, 17 Oct 2013 11:05:25 +0000 (13:05 +0200)
committerThibault Delavallée <tde@openerp.com>
Thu, 17 Oct 2013 11:05:25 +0000 (13:05 +0200)
Read more links could not appear when the shorten limit was obtained
inside a quote that is removed in the process, removing the read
more link node. It now get the first parent that is not inside a
quote to be sure the link is displayed.

Added the test-case that helped find this bug.

bzr revid: tde@openerp.com-20131017110525-etn0u5s91zbnvul1

openerp/tests/test_mail.py
openerp/tests/test_mail_examples.py
openerp/tools/mail.py

index fd27ebd..5526576 100755 (executable)
@@ -230,6 +230,12 @@ class TestCleaner(unittest2.TestCase):
         for ext in test_mail_examples.BUG_1_OUT:
             self.assertNotIn(ext, new_html, 'html_email_cleaner did not removed invalid content')
 
+        new_html = html_email_clean(test_mail_examples.BUG2, remove=True, shorten=True, max_length=100)
+        for ext in test_mail_examples.BUG_2_IN:
+            self.assertIn(ext, new_html, 'html_email_cleaner wrongly removed valid content')
+        for ext in test_mail_examples.BUG_2_OUT:
+            self.assertNotIn(ext, new_html, 'html_email_cleaner did not removed invalid content')
+
     def test_90_misc(self):
         # False boolean for text must return empty string
         new_html = html_email_clean(False)
index 9ad058c..5467110 100644 (file)
@@ -693,3 +693,266 @@ BUG_1_OUT = [
     '81.81.37.00',
     'openerp.com',
 ]
+
+
+BUG2 = """
+<div>
+    <br>
+    <div class="moz-forward-container"><br>
+      <br>
+      -------- Original Message --------
+      <table class="moz-email-headers-table" border="0" cellpadding="0" cellspacing="0">
+        <tbody>
+          <tr>
+            <th nowrap="" valign="BASELINE" align="RIGHT">Subject:
+            </th>
+            <td>Fwd: TR: OpenERP S.A. Payment Reminder</td>
+          </tr>
+          <tr>
+            <th nowrap="" valign="BASELINE" align="RIGHT">Date: </th>
+            <td>Wed, 16 Oct 2013 14:11:13 +0200</td>
+          </tr>
+          <tr>
+            <th nowrap="" valign="BASELINE" align="RIGHT">From: </th>
+            <td>Christine Herrmann <a class="moz-txt-link-rfc2396E" href="mailto:che@openerp.com">&lt;che@openerp.com&gt;</a></td>
+          </tr>
+          <tr>
+            <th nowrap="" valign="BASELINE" align="RIGHT">To: </th>
+            <td><a class="moz-txt-link-abbreviated" href="mailto:online@openerp.com">online@openerp.com</a></td>
+          </tr>
+        </tbody>
+      </table>
+      <br>
+      <br>
+      
+      <br>
+      <div class="moz-forward-container"><br>
+        <br>
+        -------- Message original --------
+        <table class="moz-email-headers-table" border="0" cellpadding="0" cellspacing="0">
+          <tbody>
+            <tr>
+              <th nowrap="" valign="BASELINE" align="RIGHT">Sujet:
+              </th>
+              <td>TR: OpenERP S.A. Payment Reminder</td>
+            </tr>
+            <tr>
+              <th nowrap="" valign="BASELINE" align="RIGHT">Date&nbsp;:
+              </th>
+              <td>Wed, 16 Oct 2013 10:34:45 -0000</td>
+            </tr>
+            <tr>
+              <th nowrap="" valign="BASELINE" align="RIGHT">De&nbsp;: </th>
+              <td>Ida Siwatala <a class="moz-txt-link-rfc2396E" href="mailto:infos@inzoservices.com">&lt;infos@inzoservices.com&gt;</a></td>
+            </tr>
+            <tr>
+              <th nowrap="" valign="BASELINE" align="RIGHT">Répondre
+
+                à&nbsp;: </th>
+              <td><a class="moz-txt-link-abbreviated" href="mailto:catchall@openerp.my.openerp.com">catchall@openerp.my.openerp.com</a></td>
+            </tr>
+            <tr>
+              <th nowrap="" valign="BASELINE" align="RIGHT">Pour&nbsp;:
+              </th>
+              <td>Christine Herrmann (che) <a class="moz-txt-link-rfc2396E" href="mailto:che@openerp.com">&lt;che@openerp.com&gt;</a></td>
+            </tr>
+          </tbody>
+        </table>
+        <br>
+        <br>
+        <div>
+          <div class="WordSection1">
+            <p class="MsoNormal"><span style="font-size:11.0pt;font-family:&quot;Calibri&quot;,&quot;sans-serif&quot;;color:#1F497D">Bonjour,</span></p>
+            <p class="MsoNormal"><span style="font-size:11.0pt;font-family:&quot;Calibri&quot;,&quot;sans-serif&quot;;color:#1F497D"></span></p>
+            <p>&nbsp;</p>
+            <p class="MsoNormal"><span style="font-size:11.0pt;font-family:&quot;Calibri&quot;,&quot;sans-serif&quot;;color:#1F497D">Pourriez-vous
+
+                me faire un retour sur ce point.</span></p>
+            <p class="MsoNormal"><span style="font-size:11.0pt;font-family:&quot;Calibri&quot;,&quot;sans-serif&quot;;color:#1F497D"></span></p>
+            <p>&nbsp;</p>
+            <p class="MsoNormal"><span style="font-size:11.0pt;font-family:&quot;Calibri&quot;,&quot;sans-serif&quot;;color:#1F497D">Cordialement</span></p>
+            <p class="MsoNormal"><span style="font-size:11.0pt;font-family:&quot;Calibri&quot;,&quot;sans-serif&quot;;color:#1F497D"></span></p>
+            <p>&nbsp;</p>
+            <div>
+              <div style="border:none;border-top:solid #B5C4DF
+                1.0pt;padding:3.0pt 0cm 0cm 0cm">
+                <p class="MsoNormal"><b><span style="font-size:10.0pt;font-family:&quot;Tahoma&quot;,&quot;sans-serif&quot;">De&nbsp;:</span></b><span style="font-size:10.0pt;font-family:&quot;Tahoma&quot;,&quot;sans-serif&quot;">
+                    Ida Siwatala [<a class="moz-txt-link-freetext" href="mailto:infos@inzoservices.com">mailto:infos@inzoservices.com</a>]
+                    <br>
+                    <b>Envoyé&nbsp;:</b> vendredi 4 octobre 2013 20:03<br>
+                    <b>À&nbsp;:</b> 'Followers of
+                    INZO-services-8-all-e-Maxime-Lisbonne-77176-Savigny-le-temple-France'<br>
+                    <b>Objet&nbsp;:</b> RE: OpenERP S.A. Payment Reminder</span></p>
+              </div>
+            </div>
+            <p>&nbsp;</p>
+            <p class="MsoNormal"><span style="font-size:11.0pt;font-family:&quot;Calibri&quot;,&quot;sans-serif&quot;;color:#1F497D">Bonsoir,</span></p>
+            <p class="MsoNormal"><span style="font-size:11.0pt;font-family:&quot;Calibri&quot;,&quot;sans-serif&quot;;color:#1F497D"></span></p>
+            <p>&nbsp;</p>
+            <p class="MsoNormal"><span style="font-size:11.0pt;font-family:&quot;Calibri&quot;,&quot;sans-serif&quot;;color:#1F497D">Je
+
+                me permets de revenir vers vous par écrit , car j’ai
+                fait 2 appels vers votre service en exposant mon
+                problème, mais je n’ai pas eu de retour.</span></p>
+            <p class="MsoNormal"><span style="font-size:11.0pt;font-family:&quot;Calibri&quot;,&quot;sans-serif&quot;;color:#1F497D">Cela
+
+                fait un mois que j’ai fait la souscription de votre
+                produit, mais je me rends compte qu’il est pas adapté à
+                ma situation ( fonctionnalité manquante et surtout je
+                n’ai pas beaucoup de temps à passer à résoudre des
+                bugs). </span></p>
+            <p class="MsoNormal"><span style="font-size:11.0pt;font-family:&quot;Calibri&quot;,&quot;sans-serif&quot;;color:#1F497D">C’est
+
+                pourquoi , j’ai demandé qu’un accord soit trouvé avec
+                vous pour annuler le contrat (tout en vous payant le
+                mois d’utilisation de septembre).</span></p>
+            <p class="MsoNormal"><span style="font-size:11.0pt;font-family:&quot;Calibri&quot;,&quot;sans-serif&quot;;color:#1F497D"></span></p>
+            <p>&nbsp;</p>
+            <p class="MsoNormal"><span style="font-size:11.0pt;font-family:&quot;Calibri&quot;,&quot;sans-serif&quot;;color:#1F497D">Pourriez-vous
+
+                me faire un retour sur ce point.</span></p>
+            <p class="MsoNormal"><span style="font-size:11.0pt;font-family:&quot;Calibri&quot;,&quot;sans-serif&quot;;color:#1F497D"></span></p>
+            <p>&nbsp;</p>
+            <p class="MsoNormal"><span style="font-size:11.0pt;font-family:&quot;Calibri&quot;,&quot;sans-serif&quot;;color:#1F497D">Cordialement,</span></p>
+            <p class="MsoNormal"><span style="font-size:11.0pt;font-family:&quot;Calibri&quot;,&quot;sans-serif&quot;;color:#1F497D"></span></p>
+            <p>&nbsp;</p>
+            <p class="MsoNormal"><span style="font-size:11.0pt;font-family:&quot;Calibri&quot;,&quot;sans-serif&quot;;color:#1F497D">Ida
+
+                Siwatala</span></p>
+            <p class="MsoNormal"><span style="font-size:11.0pt;font-family:&quot;Calibri&quot;,&quot;sans-serif&quot;;color:#1F497D"></span></p>
+            <p>&nbsp;</p>
+            <p class="MsoNormal"><b><span style="font-size:10.0pt;font-family:&quot;Tahoma&quot;,&quot;sans-serif&quot;">De&nbsp;:</span></b><span style="font-size:10.0pt;font-family:&quot;Tahoma&quot;,&quot;sans-serif&quot;">
+                <a href="mailto:che@openerp.com">che@openerp.com</a>
+                [<a href="mailto:che@openerp.com">mailto:che@openerp.com</a>]
+                <br>
+                <b>Envoyé&nbsp;:</b> vendredi 4 octobre 2013 17:41<br>
+                <b>À&nbsp;:</b> <a href="mailto:infos@inzoservices.com">infos@inzoservices.com</a><br>
+                <b>Objet&nbsp;:</b> OpenERP S.A. Payment Reminder</span></p>
+            <p>&nbsp;</p>
+            <div>
+              <p style="background:white"><span style="font-size:9.0pt;font-family:&quot;Arial&quot;,&quot;sans-serif&quot;;color:#222222">Dear
+
+                  INZO services,</span></p>
+              <p style="background:white"><span style="font-size:9.0pt;font-family:&quot;Arial&quot;,&quot;sans-serif&quot;;color:#222222">Exception
+
+                  made if there was a mistake of ours, it seems that the
+                  following amount stays unpaid. Please, take
+                  appropriate measures in order to carry out this
+                  payment in the next 8 days. </span></p>
+              <p class="MsoNormal" style="background:white"><span style="font-size:9.0pt;font-family:&quot;Arial&quot;,&quot;sans-serif&quot;;color:#222222"></span></p>
+              <p>&nbsp;</p>
+              <table class="MsoNormalTable" style="width:100.0%;border:outset 1.5pt" width="100%" border="1" cellpadding="0">
+                <tbody>
+                  <tr>
+                    <td style="padding:.75pt .75pt .75pt .75pt">
+                      <p class="MsoNormal">Date de facturation</p>
+                    </td>
+                    <td style="padding:.75pt .75pt .75pt .75pt">
+                      <p class="MsoNormal">Description</p>
+                    </td>
+                    <td style="padding:.75pt .75pt .75pt .75pt">
+                      <p class="MsoNormal">Reference</p>
+                    </td>
+                    <td style="padding:.75pt .75pt .75pt .75pt">
+                      <p class="MsoNormal">Due Date</p>
+                    </td>
+                    <td style="padding:.75pt .75pt .75pt .75pt">
+                      <p class="MsoNormal">Amount (€)</p>
+                    </td>
+                    <td style="padding:.75pt .75pt .75pt .75pt">
+                      <p class="MsoNormal">Lit.</p>
+                    </td>
+                  </tr>
+                  <tr>
+                    <td style="padding:.75pt .75pt .75pt .75pt">
+                      <p class="MsoNormal"><b>2013-09-24</b></p>
+                    </td>
+                    <td style="padding:.75pt .75pt .75pt .75pt">
+                      <p class="MsoNormal"><b>2013/1121</b></p>
+                    </td>
+                    <td style="padding:.75pt .75pt .75pt .75pt">
+                      <p class="MsoNormal"><b>Enterprise - Inzo Services
+                          - Juillet 2013</b></p>
+                    </td>
+                    <td style="padding:.75pt .75pt .75pt .75pt">
+                      <p class="MsoNormal"><b>2013-09-24</b></p>
+                    </td>
+                    <td style="padding:.75pt .75pt .75pt .75pt">
+                      <p class="MsoNormal"><b>420.0</b></p>
+                    </td>
+                    <td style="padding:.75pt .75pt .75pt .75pt"><br>
+                    </td>
+                  </tr>
+                  <tr>
+                    <td style="padding:.75pt .75pt .75pt .75pt"><br>
+                    </td>
+                    <td style="border:none;padding:.75pt .75pt .75pt
+                      .75pt"><br>
+                    </td>
+                    <td style="border:none;padding:.75pt .75pt .75pt
+                      .75pt"><br>
+                    </td>
+                    <td style="border:none;padding:.75pt .75pt .75pt
+                      .75pt"><br>
+                    </td>
+                    <td style="border:none;padding:.75pt .75pt .75pt
+                      .75pt"><br>
+                    </td>
+                    <td style="border:none;padding:.75pt .75pt .75pt
+                      .75pt"><br>
+                    </td>
+                  </tr>
+                </tbody>
+              </table>
+              <p class="MsoNormal" style="text-align:center;background:white" align="center"><span style="font-size:9.0pt;font-family:&quot;Arial&quot;,&quot;sans-serif&quot;;color:#222222">Amount
+
+                  due : 420.00 € </span></p>
+              <p style="background:white"><span style="font-size:9.0pt;font-family:&quot;Arial&quot;,&quot;sans-serif&quot;;color:#222222">Would
+
+                  your payment have been carried out after this mail was
+                  sent, please ignore this message. Do not hesitate to
+                  contact our accounting department. </span></p>
+              <p class="MsoNormal" style="background:white"><span style="font-size:9.0pt;font-family:&quot;Arial&quot;,&quot;sans-serif&quot;;color:#222222"><br>
+                  Best Regards, <br>
+                  Aurore Lesage <br>
+                  OpenERP<br>
+                  Chaussée de Namur, 40 <br>
+                  B-1367 Grand Rosières <br>
+                  Tel: +32.81.81.37.00 - Fax: +32.81.73.35.01 <br>
+                  E-mail : <a href="mailto:ale@openerp.com">ale@openerp.com</a> <br>
+                  Web: <a href="http://www.openerp.com">http://www.openerp.com</a></span></p>
+            </div>
+          </div>
+        </div>
+        --<br>
+        INZO services <small>Sent by <a style="color:inherit" href="http://www.openerp.com">OpenERP
+            S.A.</a> using <a style="color:inherit" href="https://www.openerp.com/">OpenERP</a>.</small>
+        <small>Access your messages and documents <a style="color:inherit" href="https://accounts.openerp.com?db=openerp#action=mail.action_mail_redirect&amp;login=che&amp;message_id=5750830">in
+
+            OpenERP</a></small> <br>
+        <pre class="moz-signature" cols="72">-- 
+Christine Herrmann 
+
+OpenERP 
+Chaussée de Namur, 40 
+B-1367 Grand Rosières 
+Tel: +32.81.81.37.00 - Fax: +32.81.73.35.01 
+
+Web: <a class="moz-txt-link-freetext" href="http://www.openerp.com">http://www.openerp.com</a> </pre>
+        <br>
+      </div>
+      <br>
+      <br>
+    </div>
+    <br>
+  
+</div>"""
+
+BUG_2_IN = [
+    'read more',
+    '...',
+]
+BUG_2_OUT = [
+    'Fwd: TR: OpenERP S.A'
+    'fait un mois'
+]
index 879e251..528140e 100644 (file)
@@ -175,6 +175,32 @@ def html_email_clean(html, remove=False, shorten=False, max_length=300):
             iteration += 1
         new_node = _insert_new_node(node, -1, new_node_tag, text[idx:] + (cur_node.tail or ''), None, {})
 
+    def _truncate_node(node, position, find_first_blank=True):
+        if node.text is None:
+            node.text = ''
+        # truncate text
+        end_position = position if len(node.text) >= position else len(node.text)
+        innertext = node.text[0:end_position]
+        outertext = node.text[end_position:]
+        if find_first_blank:
+            stop_idx = outertext.find(' ')
+            if stop_idx == -1:
+                stop_idx = len(outertext)
+        else:
+            stop_idx = 0
+        node.text = innertext + outertext[0:stop_idx]
+        # create <span> ... <a href="#">read more</a></span> node
+        read_more_node = _create_node('span', ' ... ', None, {'class': 'oe_mail_expand'})
+        read_more_link_node = _create_node('a', 'read more', None, {'href': '#', 'class': 'oe_mail_expand'})
+        read_more_node.append(read_more_link_node)
+        # create outertext node
+        overtext_node = _create_node('span', outertext[stop_idx:])
+        # tag node
+        overtext_node.set('in_overlength', '1')
+        # add newly created nodes in dom
+        node.append(read_more_node)
+        node.append(overtext_node)
+
     if not html or not isinstance(html, basestring):
         return html
     html = ustr(html)
@@ -226,14 +252,16 @@ def html_email_clean(html, remove=False, shorten=False, max_length=300):
         if 'SkyDrivePlaceholder' in node.get('class', '') or 'SkyDrivePlaceholder' in node.get('id', ''):
             root.set('hotmail', '1')
 
-        # state of the parsing
+        # state of the parsing: flag quotes and tails to remove
         if quote_begin:
             node.set('in_quote', '1')
             node.set('tail_remove', '1')
+        # state of the parsing: flag when being in over-length content
         if overlength:
             node.set('in_overlength', '1')
             node.set('tail_remove', '1')
 
+        # find quote in msoffice / hotmail / blockquote / text quote and signatures
         if root.get('msoffice') and node.tag == 'div' and 'border-top:solid' in node.get('style', ''):
             quote_begin = True
             node.set('in_quote', '1')
@@ -242,35 +270,28 @@ def html_email_clean(html, remove=False, shorten=False, max_length=300):
             quote_begin = True
             node.set('in_quote', '1')
             node.set('tail_remove', '1')
+        if node.tag == 'blockquote' or node.get('text_quote') or node.get('text_signature'):
+            node.set('in_quote', '1')
 
         # shorten:
         # 1/ truncate the text at the next available space
         # 2/ create a 'read more' node, next to current node
         # 3/ add the truncated text in a new node, next to 'read more' node
         if shorten and not overlength and cur_char_nbr + len(node.text or '') > max_length:
+            node_to_truncate = node
+            while node.get('in_quote') and node.getparent():
+                node_to_truncate = node.getparent()
             overlength = True
-            # truncate text
-            innertext = node.text[0:(max_length - cur_char_nbr)]
-            outertext = node.text[(max_length - cur_char_nbr):]
-            stop_idx = outertext.find(' ')
-            if stop_idx == -1:
-                stop_idx = len(outertext)
-            node.text = innertext + outertext[0:stop_idx]
-            # create <span> ... <a href="#">read more</a></span> node
-            read_more_node = _create_node('span', ' ... ', None, {'class': 'oe_mail_expand'})
-            read_more_link_node = _create_node('a', 'read more', None, {'href': '#', 'class': 'oe_mail_expand'})
-            read_more_node.append(read_more_link_node)
-            # create outertext node
-            new_node = _create_node('span', outertext[stop_idx:])
-            # add newly created nodes in dom
-            node.append(read_more_node)
-            # tag node
-            new_node.set('in_overlength', '1')
-
-            cur_char_nbr += len(node.text or '')
+            node_to_truncate.set('truncate', '1')
+            node_to_truncate.set('truncate_position', str(max_length - cur_char_nbr))
+        cur_char_nbr += len(node.text or '')
 
-        if node.tag == 'blockquote' or node.get('text_quote') or node.get('text_signature'):
-            node.set('in_quote', '1')
+    # Tree modification
+    # ------------------------------------------------------------
+
+    for node in root.iter():
+        if node.get('truncate'):
+            _truncate_node(node, int(node.get('truncate_position', '0')))
 
     # Post processing
     # ------------------------------------------------------------