openerp/tools/mail.py

   1 # -*- coding: utf-8 -*-
   2 ##############################################################################
   3 #
   4 #    OpenERP, Open Source Business Applications
   5 #    Copyright (C) 2012-TODAY OpenERP S.A. (<http://openerp.com>).
   6 #
   7 #    This program is free software: you can redistribute it and/or modify
   8 #    it under the terms of the GNU Affero General Public License as
   9 #    published by the Free Software Foundation, either version 3 of the
  10 #    License, or (at your option) any later version.
  11 #
  12 #    This program is distributed in the hope that it will be useful,
  13 #    but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 #    GNU Affero General Public License for more details.
  16 #
  17 #    You should have received a copy of the GNU Affero General Public License
  18 #    along with this program.  If not, see <http://www.gnu.org/licenses/>.
  19 #
  20 ##############################################################################
  21
  22 from lxml import etree
  23 import cgi
  24 import logging
  25 import lxml.html
  26 import lxml.html.clean as clean
  27 import random
  28 import re
  29 import socket
  30 import threading
  31 import time
  32 from email.utils import getaddresses
  33
  34 import openerp
  35 from openerp.loglevels import ustr
  36
  37 _logger = logging.getLogger(__name__)
  38
  39
  40 #----------------------------------------------------------
  41 # HTML Sanitizer
  42 #----------------------------------------------------------
  43
  44 tags_to_kill = ["script", "head", "meta", "title", "link", "style", "frame", "iframe", "base", "object", "embed"]
  45 tags_to_remove = ['html', 'body', 'font']
  46
  47 # allow new semantic HTML5 tags
  48 allowed_tags = clean.defs.tags | frozenset('article section header footer hgroup nav aside figure main'.split() + [etree.Comment])
  49 safe_attrs = clean.defs.safe_attrs | frozenset(
  50     ['style',
  51      'data-oe-model', 'data-oe-id', 'data-oe-field', 'data-oe-type', 'data-oe-expression', 'data-oe-translate', 'data-oe-nodeid',
  52      'data-snippet-id', 'data-publish', 'data-id', 'data-res_id', 'data-member_id', 'data-view-id'
  53      ])
  54
  55
  56 def html_sanitize(src, silent=True, strict=False):
  57     if not src:
  58         return src
  59     src = ustr(src, errors='replace')
  60
  61     logger = logging.getLogger(__name__ + '.html_sanitize')
  62
  63     # html encode email tags
  64     part = re.compile(r"(<(([^a<>]|a[^<>\s])[^<>]*)@[^<>]+>)", re.IGNORECASE | re.DOTALL)
  65     src = part.sub(lambda m: cgi.escape(m.group(1)), src)
  66     # html encode mako tags <% ... %> to decode them later and keep them alive, otherwise they are stripped by the cleaner
  67     src = src.replace('<%', cgi.escape('<%'))
  68     src = src.replace('%>', cgi.escape('%>'))
  69
  70     kwargs = {
  71         'page_structure': True,
  72         'style': False,             # do not remove style attributes
  73         'forms': True,              # remove form tags
  74         'remove_unknown_tags': False,
  75         'allow_tags': allowed_tags,
  76         'comments': False,
  77         'processing_instructions': False
  78     }
  79     if etree.LXML_VERSION >= (2, 3, 1):
  80         # kill_tags attribute has been added in version 2.3.1
  81         kwargs.update({
  82             'kill_tags': tags_to_kill,
  83             'remove_tags': tags_to_remove,
  84         })
  85     else:
  86         kwargs['remove_tags'] = tags_to_kill + tags_to_remove
  87
  88     if strict:
  89         if etree.LXML_VERSION >= (3, 1, 0):
  90             # lxml < 3.1.0 does not allow to specify safe_attrs. We keep all attributes in order to keep "style"
  91             kwargs.update({
  92                 'safe_attrs_only': True,
  93                 'safe_attrs': safe_attrs,
  94             })
  95     else:
  96         kwargs['safe_attrs_only'] = False    # keep oe-data attributes + style
  97         kwargs['frames'] = False,            # do not remove frames (embbed video in CMS blogs)
  98
  99     try:
 100         # some corner cases make the parser crash (such as <SCRIPT/XSS SRC=\"http://ha.ckers.org/xss.js\"></SCRIPT> in test_mail)
 101         cleaner = clean.Cleaner(**kwargs)
 102         cleaned = cleaner.clean_html(src)
 103         # MAKO compatibility: $, { and } inside quotes are escaped, preventing correct mako execution
 104         cleaned = cleaned.replace('%24', '$')
 105         cleaned = cleaned.replace('%7B', '{')
 106         cleaned = cleaned.replace('%7D', '}')
 107         cleaned = cleaned.replace('%20', ' ')
 108         cleaned = cleaned.replace('%5B', '[')
 109         cleaned = cleaned.replace('%5D', ']')
 110         cleaned = cleaned.replace('&lt;%', '<%')
 111         cleaned = cleaned.replace('%&gt;', '%>')
 112     except etree.ParserError, e:
 113         if 'empty' in str(e):
 114             return ""
 115         if not silent:
 116             raise
 117         logger.warning('ParserError obtained when sanitizing %r', src, exc_info=True)
 118         cleaned = '<p>ParserError when sanitizing</p>'
 119     except Exception:
 120         if not silent:
 121             raise
 122         logger.warning('unknown error obtained when sanitizing %r', src, exc_info=True)
 123         cleaned = '<p>Unknown error when sanitizing</p>'
 124
 125     # this is ugly, but lxml/etree tostring want to put everything in a 'div' that breaks the editor -> remove that
 126     if cleaned.startswith('<div>') and cleaned.endswith('</div>'):
 127         cleaned = cleaned[5:-6]
 128
 129     return cleaned
 130
 131
 132 #----------------------------------------------------------
 133 # HTML Cleaner
 134 #----------------------------------------------------------
 135
 136 def html_email_clean(html, remove=False, shorten=False, max_length=300, expand_options=None,
 137                      protect_sections=False):
 138     """ html_email_clean: clean the html by doing the following steps:
 139
 140      - try to strip email quotes, by removing blockquotes or having some client-
 141        specific heuristics
 142      - try to strip signatures
 143      - shorten the html to a maximum number of characters if requested
 144
 145     Some specific use case:
 146
 147      - MsOffice: ``div.style = border-top:solid;`` delimitates the beginning of
 148        a quote; detecting by finding WordSection1 of MsoNormal
 149      - Hotmail: ``hr.stopSpelling`` delimitates the beginning of a quote; detect
 150        Hotmail by funding ``SkyDrivePlaceholder``
 151
 152     :param string html: sanitized html; tags like html or head should not
 153                         be present in the html string. This method therefore
 154                         takes as input html code coming from a sanitized source,
 155                         like fields.html.
 156     :param boolean remove: remove the html code that is unwanted; otherwise it
 157                            is only flagged and tagged
 158     :param boolean shorten: shorten the html; every excessing content will
 159                             be flagged as to remove
 160     :param int max_length: if shortening, maximum number of characters before
 161                            shortening
 162     :param dict expand_options: options for the read more link when shortening
 163                                 the content.The used keys are the following:
 164
 165                                  - oe_expand_container_tag: class applied to the
 166                                    container of the whole read more link
 167                                  - oe_expand_container_class: class applied to the
 168                                    link container (default: oe_mail_expand)
 169                                  - oe_expand_container_content: content of the
 170                                    container (default: ...)
 171                                  - oe_expand_separator_node: optional separator, like
 172                                    adding ... <br /><br /> <a ...>read more</a> (default: void)
 173                                  - oe_expand_a_href: href of the read more link itself
 174                                    (default: #)
 175                                  - oe_expand_a_class: class applied to the <a> containing
 176                                    the link itself (default: oe_mail_expand)
 177                                  - oe_expand_a_content: content of the <a> (default: read more)
 178
 179                                 The formatted read more link is the following:
 180                                 <cont_tag class="oe_expand_container_class">
 181                                     oe_expand_container_content
 182                                     if expand_options.get('oe_expand_separator_node'):
 183                                         <oe_expand_separator_node/>
 184                                     <a href="oe_expand_a_href" class="oe_expand_a_class">
 185                                         oe_expand_a_content
 186                                     </a>
 187                                 </span>
 188     """
 189     def _replace_matching_regex(regex, source, replace=''):
 190         """ Replace all matching expressions in source by replace """
 191         if not source:
 192             return source
 193         dest = ''
 194         idx = 0
 195         for item in re.finditer(regex, source):
 196             dest += source[idx:item.start()] + replace
 197             idx = item.end()
 198         dest += source[idx:]
 199         return dest
 200
 201     def _create_node(tag, text, tail=None, attrs={}):
 202         new_node = etree.Element(tag)
 203         new_node.text = text
 204         new_node.tail = tail
 205         for key, val in attrs.iteritems():
 206             new_node.set(key, val)
 207         return new_node
 208
 209     def _insert_new_node(node, index, new_node_tag, new_node_text, new_node_tail=None, new_node_attrs={}):
 210         new_node = _create_node(new_node_tag, new_node_text, new_node_tail, new_node_attrs)
 211         node.insert(index, new_node)
 212         return new_node
 213
 214     def _tag_matching_regex_in_text(regex, node, new_node_tag='span', new_node_attrs={}):
 215         text = node.text or ''
 216         if not re.search(regex, text):
 217             return
 218
 219         cur_node = node
 220         node.text = ''
 221         idx, iteration = 0, 0
 222         for item in re.finditer(regex, text):
 223             if iteration == 0:
 224                 cur_node.text = text[idx:item.start()]
 225             else:
 226                 _insert_new_node(node, (iteration - 1) * 2 + 1, new_node_tag, text[idx:item.start()])
 227             new_node = _insert_new_node(node, iteration * 2, new_node_tag, text[item.start():item.end()], None, new_node_attrs)
 228
 229             cur_node = new_node
 230             idx = item.end()
 231             iteration += 1
 232         new_node = _insert_new_node(node, -1, new_node_tag, text[idx:] + (cur_node.tail or ''), None, {})
 233
 234     def _truncate_node(node, position, simplify_whitespaces=True):
 235         """ Truncate a node text at a given position. This algorithm will shorten
 236         at the end of the word whose ending character exceeds position.
 237
 238             :param bool simplify_whitespaces: whether to try to count all successive
 239                                               whitespaces as one character. This
 240                                               option should not be True when trying
 241                                               to keep 'pre' consistency.
 242         """
 243         if node.text is None:
 244             node.text = ''
 245
 246         truncate_idx = -1
 247         if simplify_whitespaces:
 248             cur_char_nbr = 0
 249             word = None
 250             node_words = node.text.strip(' \t\r\n').split()
 251             for word in node_words:
 252                 cur_char_nbr += len(word)
 253                 if cur_char_nbr >= position:
 254                     break
 255             if word:
 256                 truncate_idx = node.text.find(word) + len(word)
 257         else:
 258             truncate_idx = position
 259         if truncate_idx == -1 or truncate_idx > len(node.text):
 260             truncate_idx = len(node.text)
 261
 262         # compose new text bits
 263         innertext = node.text[0:truncate_idx]
 264         outertext = node.text[truncate_idx:]
 265         node.text = innertext
 266
 267         # create <span> ... <a href="#">read more</a></span> node
 268         read_more_node = _create_node(
 269             expand_options.get('oe_expand_container_tag', 'span'),
 270             expand_options.get('oe_expand_container_content', ' ... '),
 271             None,
 272             {'class': expand_options.get('oe_expand_container_class', 'oe_mail_expand')}
 273         )
 274         if expand_options.get('oe_expand_separator_node'):
 275             read_more_separator_node = _create_node(
 276                 expand_options.get('oe_expand_separator_node'),
 277                 '',
 278                 None,
 279                 {}
 280             )
 281             read_more_node.append(read_more_separator_node)
 282         read_more_link_node = _create_node(
 283             'a',
 284             expand_options.get('oe_expand_a_content', 'read more'),
 285             None,
 286             {
 287                 'href': expand_options.get('oe_expand_a_href', '#'),
 288                 'class': expand_options.get('oe_expand_a_class', 'oe_mail_expand'),
 289             }
 290         )
 291         read_more_node.append(read_more_link_node)
 292         # create outertext node
 293         overtext_node = _create_node('span', outertext)
 294         # tag node
 295         overtext_node.set('in_overlength', '1')
 296         # add newly created nodes in dom
 297         node.append(read_more_node)
 298         node.append(overtext_node)
 299
 300     if expand_options is None:
 301         expand_options = {}
 302
 303     if not html or not isinstance(html, basestring):
 304         return html
 305     html = ustr(html)
 306
 307     # Pre processing
 308     # ------------------------------------------------------------
 309     # TDE TODO: --- MAIL ORIGINAL ---: '[\-]{4,}([^\-]*)[\-]{4,}'
 310
 311     # html: remove encoding attribute inside tags
 312     doctype = re.compile(r'(<[^>]*\s)(encoding=(["\'][^"\']*?["\']|[^\s\n\r>]+)(\s[^>]*|/)?>)', re.IGNORECASE | re.DOTALL)
 313     html = doctype.sub(r"", html)
 314
 315     # html: ClEditor seems to love using <div><br /><div> -> replace with <br />
 316     br_div_tags = re.compile(r'(<div>\s*<br\s*\/>\s*<\/div>)', re.IGNORECASE)
 317     html = _replace_matching_regex(br_div_tags, html, '<br />')
 318
 319     # form a tree
 320     root = lxml.html.fromstring(html)
 321     if not len(root) and root.text is None and root.tail is None:
 322         html = '<div>%s</div>' % html
 323         root = lxml.html.fromstring(html)
 324
 325     quote_tags = re.compile(r'(\n(>)+[^\n\r]*)')
 326     signature = re.compile(r'([-]{2,}[\s]?[\r\n]{1,2}[\s\S]+)')
 327     for node in root.iter():
 328         # remove all tails and replace them by a span element, because managing text and tails can be a pain in the ass
 329         if node.tail:
 330             tail_node = _create_node('span', node.tail)
 331             node.tail = None
 332             node.addnext(tail_node)
 333
 334         # form node and tag text-based quotes and signature
 335         _tag_matching_regex_in_text(quote_tags, node, 'span', {'text_quote': '1'})
 336         _tag_matching_regex_in_text(signature, node, 'span', {'text_signature': '1'})
 337
 338     # Processing
 339     # ------------------------------------------------------------
 340
 341     # tree: tag nodes
 342     # signature_begin = False  # try dynamic signature recognition
 343     quote_begin = False
 344     overlength = False
 345     overlength_section_id = None
 346     overlength_section_count = 0
 347     cur_char_nbr = 0
 348     for node in root.iter():
 349         # comments do not need processing
 350         # note: bug in node.get(value, default) for HtmlComments, default never returned
 351         if node.tag == etree.Comment:
 352             continue
 353         # do not take into account multiple spaces that are displayed as max 1 space in html
 354         node_text = ' '.join((node.text and node.text.strip(' \t\r\n') or '').split())
 355
 356         # root: try to tag the client used to write the html
 357         if 'WordSection1' in node.get('class', '') or 'MsoNormal' in node.get('class', ''):
 358             root.set('msoffice', '1')
 359         if 'SkyDrivePlaceholder' in node.get('class', '') or 'SkyDrivePlaceholder' in node.get('id', ''):
 360             root.set('hotmail', '1')
 361
 362         # protect sections by tagging section limits and blocks contained inside sections, using an increasing id to re-find them later
 363         if node.tag == 'section':
 364             overlength_section_count += 1
 365             node.set('section_closure', str(overlength_section_count))
 366         if node.getparent() is not None and (node.getparent().get('section_closure') or node.getparent().get('section_inner')):
 367             node.set('section_inner', str(overlength_section_count))
 368
 369         # state of the parsing: flag quotes and tails to remove
 370         if quote_begin:
 371             node.set('in_quote', '1')
 372             node.set('tail_remove', '1')
 373         # state of the parsing: flag when being in over-length content, depending on section content if defined (only when having protect_sections)
 374         if overlength:
 375             if not overlength_section_id or int(node.get('section_inner', overlength_section_count + 1)) > overlength_section_count:
 376                 node.set('in_overlength', '1')
 377                 node.set('tail_remove', '1')
 378
 379         # find quote in msoffice / hotmail / blockquote / text quote and signatures
 380         if root.get('msoffice') and node.tag == 'div' and 'border-top:solid' in node.get('style', ''):
 381             quote_begin = True
 382             node.set('in_quote', '1')
 383             node.set('tail_remove', '1')
 384         if root.get('hotmail') and node.tag == 'hr' and ('stopSpelling' in node.get('class', '') or 'stopSpelling' in node.get('id', '')):
 385             quote_begin = True
 386             node.set('in_quote', '1')
 387             node.set('tail_remove', '1')
 388         if node.tag == 'blockquote' or node.get('text_quote') or node.get('text_signature'):
 389             # here no quote_begin because we want to be able to remove some quoted
 390             # text without removing all the remaining context
 391             node.set('in_quote', '1')
 392         if node.getparent() is not None and node.getparent().get('in_quote'):
 393             # inside a block of removed text but not in quote_begin (see above)
 394             node.set('in_quote', '1')
 395
 396         # shorten:
 397         # if protect section:
 398         #   1/ find the first parent not being inside a section
 399         #   2/ add the read more link
 400         # else:
 401         #   1/ truncate the text at the next available space
 402         #   2/ create a 'read more' node, next to current node
 403         #   3/ add the truncated text in a new node, next to 'read more' node
 404         node_text = (node.text or '').strip().strip('\n').strip()
 405         if shorten and not overlength and cur_char_nbr + len(node_text) > max_length:
 406             node_to_truncate = node
 407             while node_to_truncate.getparent() is not None:
 408                 if node_to_truncate.get('in_quote'):
 409                     node_to_truncate = node_to_truncate.getparent()
 410                 elif protect_sections and (node_to_truncate.getparent().get('section_inner') or node_to_truncate.getparent().get('section_closure')):
 411                     node_to_truncate = node_to_truncate.getparent()
 412                     overlength_section_id = node_to_truncate.get('section_closure')
 413                 else:
 414                     break
 415
 416             overlength = True
 417             node_to_truncate.set('truncate', '1')
 418             if node_to_truncate == node:
 419                 node_to_truncate.set('truncate_position', str(max_length - cur_char_nbr))
 420             else:
 421                 node_to_truncate.set('truncate_position', str(len(node.text or '')))
 422         cur_char_nbr += len(node_text)
 423
 424     # Tree modification
 425     # ------------------------------------------------------------
 426
 427     for node in root.iter():
 428         if node.get('truncate'):
 429             _truncate_node(node, int(node.get('truncate_position', '0')), node.tag != 'pre')
 430
 431     # Post processing
 432     # ------------------------------------------------------------
 433
 434     to_remove = []
 435     for node in root.iter():
 436         if node.get('in_quote') or node.get('in_overlength'):
 437             # copy the node tail into parent text
 438             if node.tail and not node.get('tail_remove'):
 439                 parent = node.getparent()
 440                 parent.tail = node.tail + (parent.tail or '')
 441             to_remove.append(node)
 442         if node.get('tail_remove'):
 443             node.tail = ''
 444         # clean node
 445         for attribute_name in ['in_quote', 'tail_remove', 'in_overlength', 'msoffice', 'hotmail', 'truncate', 'truncate_position']:
 446             node.attrib.pop(attribute_name, None)
 447     for node in to_remove:
 448         if remove:
 449             node.getparent().remove(node)
 450         else:
 451             if not expand_options.get('oe_expand_a_class', 'oe_mail_expand') in node.get('class', ''):  # trick: read more link should be displayed even if it's in overlength
 452                 node_class = node.get('class', '') + ' oe_mail_cleaned'
 453                 node.set('class', node_class)
 454
 455     # html: \n that were tail of elements have been encapsulated into <span> -> back to \n
 456     html = etree.tostring(root, pretty_print=False)
 457     linebreaks = re.compile(r'<span[^>]*>([\s]*[\r\n]+[\s]*)<\/span>', re.IGNORECASE | re.DOTALL)
 458     html = _replace_matching_regex(linebreaks, html, '\n')
 459
 460     return html
 461
 462
 463 #----------------------------------------------------------
 464 # HTML/Text management
 465 #----------------------------------------------------------
 466
 467 def html2plaintext(html, body_id=None, encoding='utf-8'):
 468     """ From an HTML text, convert the HTML to plain text.
 469     If @param body_id is provided then this is the tag where the
 470     body (not necessarily <body>) starts.
 471     """
 472     ## (c) Fry-IT, www.fry-it.com, 2007
 473     ## <peter@fry-it.com>
 474     ## download here: http://www.peterbe.com/plog/html2plaintext
 475
 476     html = ustr(html)
 477     tree = etree.fromstring(html, parser=etree.HTMLParser())
 478
 479     if body_id is not None:
 480         source = tree.xpath('//*[@id=%s]' % (body_id,))
 481     else:
 482         source = tree.xpath('//body')
 483     if len(source):
 484         tree = source[0]
 485
 486     url_index = []
 487     i = 0
 488     for link in tree.findall('.//a'):
 489         url = link.get('href')
 490         if url:
 491             i += 1
 492             link.tag = 'span'
 493             link.text = '%s [%s]' % (link.text, i)
 494             url_index.append(url)
 495
 496     html = ustr(etree.tostring(tree, encoding=encoding))
 497     # \r char is converted into &#13;, must remove it
 498     html = html.replace('&#13;', '')
 499
 500     html = html.replace('<strong>', '*').replace('</strong>', '*')
 501     html = html.replace('<b>', '*').replace('</b>', '*')
 502     html = html.replace('<h3>', '*').replace('</h3>', '*')
 503     html = html.replace('<h2>', '**').replace('</h2>', '**')
 504     html = html.replace('<h1>', '**').replace('</h1>', '**')
 505     html = html.replace('<em>', '/').replace('</em>', '/')
 506     html = html.replace('<tr>', '\n')
 507     html = html.replace('</p>', '\n')
 508     html = re.sub('<br\s*/?>', '\n', html)
 509     html = re.sub('<.*?>', ' ', html)
 510     html = html.replace(' ' * 2, ' ')
 511     html = html.replace('&gt;', '>')
 512     html = html.replace('&lt;', '<')
 513     html = html.replace('&amp;', '&')
 514
 515     # strip all lines
 516     html = '\n'.join([x.strip() for x in html.splitlines()])
 517     html = html.replace('\n' * 2, '\n')
 518
 519     for i, url in enumerate(url_index):
 520         if i == 0:
 521             html += '\n\n'
 522         html += ustr('[%s] %s\n') % (i + 1, url)
 523
 524     return html
 525
 526 def plaintext2html(text, container_tag=False):
 527     """ Convert plaintext into html. Content of the text is escaped to manage
 528         html entities, using cgi.escape().
 529         - all \n,\r are replaced by <br />
 530         - enclose content into <p>
 531         - 2 or more consecutive <br /> are considered as paragraph breaks
 532
 533         :param string container_tag: container of the html; by default the
 534             content is embedded into a <div>
 535     """
 536     text = cgi.escape(ustr(text))
 537
 538     # 1. replace \n and \r
 539     text = text.replace('\n', '<br/>')
 540     text = text.replace('\r', '<br/>')
 541
 542     # 2-3: form paragraphs
 543     idx = 0
 544     final = '<p>'
 545     br_tags = re.compile(r'(([<]\s*[bB][rR]\s*\/?[>]\s*){2,})')
 546     for item in re.finditer(br_tags, text):
 547         final += text[idx:item.start()] + '</p><p>'
 548         idx = item.end()
 549     final += text[idx:] + '</p>'
 550
 551     # 4. container
 552     if container_tag:
 553         final = '<%s>%s</%s>' % (container_tag, final, container_tag)
 554     return ustr(final)
 555
 556 def append_content_to_html(html, content, plaintext=True, preserve=False, container_tag=False):
 557     """ Append extra content at the end of an HTML snippet, trying
 558         to locate the end of the HTML document (</body>, </html>, or
 559         EOF), and converting the provided content in html unless ``plaintext``
 560         is False.
 561         Content conversion can be done in two ways:
 562         - wrapping it into a pre (preserve=True)
 563         - use plaintext2html (preserve=False, using container_tag to wrap the
 564             whole content)
 565         A side-effect of this method is to coerce all HTML tags to
 566         lowercase in ``html``, and strip enclosing <html> or <body> tags in
 567         content if ``plaintext`` is False.
 568
 569         :param str html: html tagsoup (doesn't have to be XHTML)
 570         :param str content: extra content to append
 571         :param bool plaintext: whether content is plaintext and should
 572             be wrapped in a <pre/> tag.
 573         :param bool preserve: if content is plaintext, wrap it into a <pre>
 574             instead of converting it into html
 575     """
 576     html = ustr(html)
 577     if plaintext and preserve:
 578         content = u'\n<pre>%s</pre>\n' % ustr(content)
 579     elif plaintext:
 580         content = '\n%s\n' % plaintext2html(content, container_tag)
 581     else:
 582         content = re.sub(r'(?i)(</?html.*>|</?body.*>|<!\W*DOCTYPE.*>)', '', content)
 583         content = u'\n%s\n' % ustr(content)
 584     # Force all tags to lowercase
 585     html = re.sub(r'(</?)\W*(\w+)([ >])',
 586         lambda m: '%s%s%s' % (m.group(1), m.group(2).lower(), m.group(3)), html)
 587     insert_location = html.find('</body>')
 588     if insert_location == -1:
 589         insert_location = html.find('</html>')
 590     if insert_location == -1:
 591         return '%s%s' % (html, content)
 592     return '%s%s%s' % (html[:insert_location], content, html[insert_location:])
 593
 594 #----------------------------------------------------------
 595 # Emails
 596 #----------------------------------------------------------
 597
 598 # matches any email in a body of text
 599 email_re = re.compile(r"""([a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6})""", re.VERBOSE)
 600
 601 # matches a string containing only one email
 602 single_email_re = re.compile(r"""^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}$""", re.VERBOSE)
 603
 604 res_re = re.compile(r"\[([0-9]+)\]", re.UNICODE)
 605 command_re = re.compile("^Set-([a-z]+) *: *(.+)$", re.I + re.UNICODE)
 606
 607 # Updated in 7.0 to match the model name as well
 608 # Typical form of references is <timestamp-openerp-record_id-model_name@domain>
 609 # group(1) = the record ID ; group(2) = the model (if any) ; group(3) = the domain
 610 reference_re = re.compile("<.*-open(?:object|erp)-(\\d+)(?:-([\w.]+))?[^>]*@([^>]*)>", re.UNICODE)
 611
 612
 613 def generate_tracking_message_id(res_id):
 614     """Returns a string that can be used in the Message-ID RFC822 header field
 615
 616        Used to track the replies related to a given object thanks to the "In-Reply-To"
 617        or "References" fields that Mail User Agents will set.
 618     """
 619     try:
 620         rnd = random.SystemRandom().random()
 621     except NotImplementedError:
 622         rnd = random.random()
 623     rndstr = ("%.15f" % rnd)[2:]
 624     return "<%.15f.%s-openerp-%s@%s>" % (time.time(), rndstr, res_id, socket.gethostname())
 625
 626 def email_send(email_from, email_to, subject, body, email_cc=None, email_bcc=None, reply_to=False,
 627                attachments=None, message_id=None, references=None, openobject_id=False, debug=False, subtype='plain', headers=None,
 628                smtp_server=None, smtp_port=None, ssl=False, smtp_user=None, smtp_password=None, cr=None, uid=None):
 629     """Low-level function for sending an email (deprecated).
 630
 631     :deprecate: since OpenERP 6.1, please use ir.mail_server.send_email() instead.
 632     :param email_from: A string used to fill the `From` header, if falsy,
 633                        config['email_from'] is used instead.  Also used for
 634                        the `Reply-To` header if `reply_to` is not provided
 635     :param email_to: a sequence of addresses to send the mail to.
 636     """
 637
 638     # If not cr, get cr from current thread database
 639     local_cr = None
 640     if not cr:
 641         db_name = getattr(threading.currentThread(), 'dbname', None)
 642         if db_name:
 643             local_cr = cr = openerp.registry(db_name).cursor()
 644         else:
 645             raise Exception("No database cursor found, please pass one explicitly")
 646
 647     # Send Email
 648     try:
 649         mail_server_pool = openerp.registry(cr.dbname)['ir.mail_server']
 650         res = False
 651         # Pack Message into MIME Object
 652         email_msg = mail_server_pool.build_email(email_from, email_to, subject, body, email_cc, email_bcc, reply_to,
 653                    attachments, message_id, references, openobject_id, subtype, headers=headers)
 654
 655         res = mail_server_pool.send_email(cr, uid or 1, email_msg, mail_server_id=None,
 656                        smtp_server=smtp_server, smtp_port=smtp_port, smtp_user=smtp_user, smtp_password=smtp_password,
 657                        smtp_encryption=('ssl' if ssl else None), smtp_debug=debug)
 658     except Exception:
 659         _logger.exception("tools.email_send failed to deliver email")
 660         return False
 661     finally:
 662         if local_cr:
 663             cr.close()
 664     return res
 665
 666 def email_split(text):
 667     """ Return a list of the email addresses found in ``text`` """
 668     if not text:
 669         return []
 670     return [addr[1] for addr in getaddresses([text])
 671                 # getaddresses() returns '' when email parsing fails, and
 672                 # sometimes returns emails without at least '@'. The '@'
 673                 # is strictly required in RFC2822's `addr-spec`.
 674                 if addr[1]
 675                 if '@' in addr[1]]