openerp/tools/mail.py

   1 # -*- coding: utf-8 -*-
   2 ##############################################################################
   3 #
   4 #    OpenERP, Open Source Business Applications
   5 #    Copyright (C) 2012-TODAY OpenERP S.A. (<http://openerp.com>).
   6 #
   7 #    This program is free software: you can redistribute it and/or modify
   8 #    it under the terms of the GNU Affero General Public License as
   9 #    published by the Free Software Foundation, either version 3 of the
  10 #    License, or (at your option) any later version.
  11 #
  12 #    This program is distributed in the hope that it will be useful,
  13 #    but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 #    GNU Affero General Public License for more details.
  16 #
  17 #    You should have received a copy of the GNU Affero General Public License
  18 #    along with this program.  If not, see <http://www.gnu.org/licenses/>.
  19 #
  20 ##############################################################################
  21
  22 from lxml import etree
  23 import cgi
  24 import logging
  25 import lxml.html
  26 import lxml.html.clean as clean
  27 import random
  28 import re
  29 import socket
  30 import threading
  31 import time
  32 from email.utils import getaddresses
  33
  34 import openerp
  35 from openerp.loglevels import ustr
  36
  37 _logger = logging.getLogger(__name__)
  38
  39
  40 #----------------------------------------------------------
  41 # HTML Sanitizer
  42 #----------------------------------------------------------
  43
  44 tags_to_kill = ["script", "head", "meta", "title", "link", "style", "frame", "iframe", "base", "object", "embed"]
  45 tags_to_remove = ['html', 'body', 'font']
  46
  47 # allow new semantic HTML5 tags
  48 allowed_tags = clean.defs.tags | frozenset('article section header footer hgroup nav aside figure main'.split() + [etree.Comment])
  49 safe_attrs = clean.defs.safe_attrs | frozenset(
  50     ['style',
  51      'data-oe-model', 'data-oe-id', 'data-oe-field', 'data-oe-type', 'data-oe-expression', 'data-oe-translate', 'data-oe-nodeid',
  52      'data-snippet-id', 'data-publish', 'data-id', 'data-res_id', 'data-member_id', 'data-view-id'
  53      ])
  54
  55
  56 def html_sanitize(src, silent=True, strict=False):
  57     if not src:
  58         return src
  59     src = ustr(src, errors='replace')
  60
  61     logger = logging.getLogger(__name__ + '.html_sanitize')
  62
  63     # html encode email tags
  64     part = re.compile(r"(<(([^a<>]|a[^<>\s])[^<>]*)@[^<>]+>)", re.IGNORECASE | re.DOTALL)
  65     src = part.sub(lambda m: cgi.escape(m.group(1)), src)
  66
  67     kwargs = {
  68         'page_structure': True,
  69         'style': False,             # do not remove style attributes
  70         'forms': True,              # remove form tags
  71         'remove_unknown_tags': False,
  72         'allow_tags': allowed_tags,
  73         'comments': False,
  74         'processing_instructions' : False
  75     }
  76     if etree.LXML_VERSION >= (2, 3, 1):
  77         # kill_tags attribute has been added in version 2.3.1
  78         kwargs.update({
  79             'kill_tags': tags_to_kill,
  80             'remove_tags': tags_to_remove,
  81         })
  82     else:
  83         kwargs['remove_tags'] = tags_to_kill + tags_to_remove
  84
  85     if strict:
  86         if etree.LXML_VERSION >= (3, 1, 0):
  87             # lxml < 3.1.0 does not allow to specify safe_attrs. We keep all attributes in order to keep "style"
  88             kwargs.update({
  89                 'safe_attrs_only': True,
  90                 'safe_attrs': safe_attrs,
  91             })
  92     else:
  93         kwargs['safe_attrs_only'] = False    # keep oe-data attributes + style
  94         kwargs['frames'] = False,            # do not remove frames (embbed video in CMS blogs)
  95
  96     try:
  97         # some corner cases make the parser crash (such as <SCRIPT/XSS SRC=\"http://ha.ckers.org/xss.js\"></SCRIPT> in test_mail)
  98         cleaner = clean.Cleaner(**kwargs)
  99         cleaned = cleaner.clean_html(src)
 100         # MAKO compatibility: $, { and } inside quotes are escaped, preventing correct mako execution
 101         cleaned = cleaned.replace('%24', '$')
 102         cleaned = cleaned.replace('%7B', '{')
 103         cleaned = cleaned.replace('%7D', '}')
 104         cleaned = cleaned.replace('%20', ' ')
 105         cleaned = cleaned.replace('%5B', '[')
 106         cleaned = cleaned.replace('%5D', ']')
 107     except etree.ParserError, e:
 108         if 'empty' in str(e):
 109             return ""
 110         if not silent:
 111             raise
 112         logger.warning('ParserError obtained when sanitizing %r', src, exc_info=True)
 113         cleaned = '<p>ParserError when sanitizing</p>'
 114     except Exception:
 115         if not silent:
 116             raise
 117         logger.warning('unknown error obtained when sanitizing %r', src, exc_info=True)
 118         cleaned = '<p>Unknown error when sanitizing</p>'
 119
 120     # this is ugly, but lxml/etree tostring want to put everything in a 'div' that breaks the editor -> remove that
 121     if cleaned.startswith('<div>') and cleaned.endswith('</div>'):
 122         cleaned = cleaned[5:-6]
 123
 124     return cleaned
 125
 126
 127 #----------------------------------------------------------
 128 # HTML Cleaner
 129 #----------------------------------------------------------
 130
 131 def html_email_clean(html, remove=False, shorten=False, max_length=300, expand_options=None,
 132                      protect_sections=False):
 133     """ html_email_clean: clean the html by doing the following steps:
 134
 135      - try to strip email quotes, by removing blockquotes or having some client-
 136        specific heuristics
 137      - try to strip signatures
 138      - shorten the html to a maximum number of characters if requested
 139
 140     Some specific use case:
 141
 142      - MsOffice: ``div.style = border-top:solid;`` delimitates the beginning of
 143        a quote; detecting by finding WordSection1 of MsoNormal
 144      - Hotmail: ``hr.stopSpelling`` delimitates the beginning of a quote; detect
 145        Hotmail by funding ``SkyDrivePlaceholder``
 146
 147     :param string html: sanitized html; tags like html or head should not
 148                         be present in the html string. This method therefore
 149                         takes as input html code coming from a sanitized source,
 150                         like fields.html.
 151     :param boolean remove: remove the html code that is unwanted; otherwise it
 152                            is only flagged and tagged
 153     :param boolean shorten: shorten the html; every excessing content will
 154                             be flagged as to remove
 155     :param int max_length: if shortening, maximum number of characters before
 156                            shortening
 157     :param dict expand_options: options for the read more link when shortening
 158                                 the content.The used keys are the following:
 159
 160                                  - oe_expand_container_tag: class applied to the
 161                                    container of the whole read more link
 162                                  - oe_expand_container_class: class applied to the
 163                                    link container (default: oe_mail_expand)
 164                                  - oe_expand_container_content: content of the
 165                                    container (default: ...)
 166                                  - oe_expand_separator_node: optional separator, like
 167                                    adding ... <br /><br /> <a ...>read more</a> (default: void)
 168                                  - oe_expand_a_href: href of the read more link itself
 169                                    (default: #)
 170                                  - oe_expand_a_class: class applied to the <a> containing
 171                                    the link itself (default: oe_mail_expand)
 172                                  - oe_expand_a_content: content of the <a> (default: read more)
 173
 174                                 The formatted read more link is the following:
 175                                 <cont_tag class="oe_expand_container_class">
 176                                     oe_expand_container_content
 177                                     if expand_options.get('oe_expand_separator_node'):
 178                                         <oe_expand_separator_node/>
 179                                     <a href="oe_expand_a_href" class="oe_expand_a_class">
 180                                         oe_expand_a_content
 181                                     </a>
 182                                 </span>
 183     """
 184     def _replace_matching_regex(regex, source, replace=''):
 185         """ Replace all matching expressions in source by replace """
 186         if not source:
 187             return source
 188         dest = ''
 189         idx = 0
 190         for item in re.finditer(regex, source):
 191             dest += source[idx:item.start()] + replace
 192             idx = item.end()
 193         dest += source[idx:]
 194         return dest
 195
 196     def _create_node(tag, text, tail=None, attrs={}):
 197         new_node = etree.Element(tag)
 198         new_node.text = text
 199         new_node.tail = tail
 200         for key, val in attrs.iteritems():
 201             new_node.set(key, val)
 202         return new_node
 203
 204     def _insert_new_node(node, index, new_node_tag, new_node_text, new_node_tail=None, new_node_attrs={}):
 205         new_node = _create_node(new_node_tag, new_node_text, new_node_tail, new_node_attrs)
 206         node.insert(index, new_node)
 207         return new_node
 208
 209     def _tag_matching_regex_in_text(regex, node, new_node_tag='span', new_node_attrs={}):
 210         text = node.text or ''
 211         if not re.search(regex, text):
 212             return
 213
 214         cur_node = node
 215         node.text = ''
 216         idx, iteration = 0, 0
 217         for item in re.finditer(regex, text):
 218             if iteration == 0:
 219                 cur_node.text = text[idx:item.start()]
 220             else:
 221                 _insert_new_node(node, (iteration - 1) * 2 + 1, new_node_tag, text[idx:item.start()])
 222             new_node = _insert_new_node(node, iteration * 2, new_node_tag, text[item.start():item.end()], None, new_node_attrs)
 223
 224             cur_node = new_node
 225             idx = item.end()
 226             iteration += 1
 227         new_node = _insert_new_node(node, -1, new_node_tag, text[idx:] + (cur_node.tail or ''), None, {})
 228
 229     def _truncate_node(node, position, simplify_whitespaces=True):
 230         """ Truncate a node text at a given position. This algorithm will shorten
 231         at the end of the word whose ending character exceeds position.
 232
 233             :param bool simplify_whitespaces: whether to try to count all successive
 234                                               whitespaces as one character. This
 235                                               option should not be True when trying
 236                                               to keep 'pre' consistency.
 237         """
 238         if node.text is None:
 239             node.text = ''
 240
 241         truncate_idx = -1
 242         if simplify_whitespaces:
 243             cur_char_nbr = 0
 244             word = None
 245             node_words = node.text.strip(' \t\r\n').split()
 246             for word in node_words:
 247                 cur_char_nbr += len(word)
 248                 if cur_char_nbr >= position:
 249                     break
 250             if word:
 251                 truncate_idx = node.text.find(word) + len(word)
 252         else:
 253             truncate_idx = position
 254         if truncate_idx == -1 or truncate_idx > len(node.text):
 255             truncate_idx = len(node.text)
 256
 257         # compose new text bits
 258         innertext = node.text[0:truncate_idx]
 259         outertext = node.text[truncate_idx:]
 260         node.text = innertext
 261
 262         # create <span> ... <a href="#">read more</a></span> node
 263         read_more_node = _create_node(
 264             expand_options.get('oe_expand_container_tag', 'span'),
 265             expand_options.get('oe_expand_container_content', ' ... '),
 266             None,
 267             {'class': expand_options.get('oe_expand_container_class', 'oe_mail_expand')}
 268         )
 269         if expand_options.get('oe_expand_separator_node'):
 270             read_more_separator_node = _create_node(
 271                 expand_options.get('oe_expand_separator_node'),
 272                 '',
 273                 None,
 274                 {}
 275             )
 276             read_more_node.append(read_more_separator_node)
 277         read_more_link_node = _create_node(
 278             'a',
 279             expand_options.get('oe_expand_a_content', 'read more'),
 280             None,
 281             {
 282                 'href': expand_options.get('oe_expand_a_href', '#'),
 283                 'class': expand_options.get('oe_expand_a_class', 'oe_mail_expand'),
 284             }
 285         )
 286         read_more_node.append(read_more_link_node)
 287         # create outertext node
 288         overtext_node = _create_node('span', outertext)
 289         # tag node
 290         overtext_node.set('in_overlength', '1')
 291         # add newly created nodes in dom
 292         node.append(read_more_node)
 293         node.append(overtext_node)
 294
 295     if expand_options is None:
 296         expand_options = {}
 297
 298     if not html or not isinstance(html, basestring):
 299         return html
 300     html = ustr(html)
 301
 302     # Pre processing
 303     # ------------------------------------------------------------
 304     # TDE TODO: --- MAIL ORIGINAL ---: '[\-]{4,}([^\-]*)[\-]{4,}'
 305
 306     # html: remove encoding attribute inside tags
 307     doctype = re.compile(r'(<[^>]*\s)(encoding=(["\'][^"\']*?["\']|[^\s\n\r>]+)(\s[^>]*|/)?>)', re.IGNORECASE | re.DOTALL)
 308     html = doctype.sub(r"", html)
 309
 310     # html: ClEditor seems to love using <div><br /><div> -> replace with <br />
 311     br_div_tags = re.compile(r'(<div>\s*<br\s*\/>\s*<\/div>)', re.IGNORECASE)
 312     html = _replace_matching_regex(br_div_tags, html, '<br />')
 313
 314     # form a tree
 315     root = lxml.html.fromstring(html)
 316     if not len(root) and root.text is None and root.tail is None:
 317         html = '<div>%s</div>' % html
 318         root = lxml.html.fromstring(html)
 319
 320     quote_tags = re.compile(r'(\n(>)+[^\n\r]*)')
 321     signature = re.compile(r'([-]{2,}[\s]?[\r\n]{1,2}[\s\S]+)')
 322     for node in root.iter():
 323         # remove all tails and replace them by a span element, because managing text and tails can be a pain in the ass
 324         if node.tail:
 325             tail_node = _create_node('span', node.tail)
 326             node.tail = None
 327             node.addnext(tail_node)
 328
 329         # form node and tag text-based quotes and signature
 330         _tag_matching_regex_in_text(quote_tags, node, 'span', {'text_quote': '1'})
 331         _tag_matching_regex_in_text(signature, node, 'span', {'text_signature': '1'})
 332
 333     # Processing
 334     # ------------------------------------------------------------
 335
 336     # tree: tag nodes
 337     # signature_begin = False  # try dynamic signature recognition
 338     quote_begin = False
 339     overlength = False
 340     overlength_section_id = None
 341     overlength_section_count = 0
 342     cur_char_nbr = 0
 343     for node in root.iter():
 344         # comments do not need processing
 345         # note: bug in node.get(value, default) for HtmlComments, default never returned
 346         if node.tag == etree.Comment:
 347             continue
 348         # do not take into account multiple spaces that are displayed as max 1 space in html
 349         node_text = ' '.join((node.text and node.text.strip(' \t\r\n') or '').split())
 350
 351         # root: try to tag the client used to write the html
 352         if 'WordSection1' in node.get('class', '') or 'MsoNormal' in node.get('class', ''):
 353             root.set('msoffice', '1')
 354         if 'SkyDrivePlaceholder' in node.get('class', '') or 'SkyDrivePlaceholder' in node.get('id', ''):
 355             root.set('hotmail', '1')
 356
 357         # protect sections by tagging section limits and blocks contained inside sections, using an increasing id to re-find them later
 358         if node.tag == 'section':
 359             overlength_section_count += 1
 360             node.set('section_closure', str(overlength_section_count))
 361         if node.getparent() is not None and (node.getparent().get('section_closure') or node.getparent().get('section_inner')):
 362             node.set('section_inner', str(overlength_section_count))
 363
 364         # state of the parsing: flag quotes and tails to remove
 365         if quote_begin:
 366             node.set('in_quote', '1')
 367             node.set('tail_remove', '1')
 368         # state of the parsing: flag when being in over-length content, depending on section content if defined (only when having protect_sections)
 369         if overlength:
 370             if not overlength_section_id or int(node.get('section_inner', overlength_section_count + 1)) > overlength_section_count:
 371                 node.set('in_overlength', '1')
 372                 node.set('tail_remove', '1')
 373
 374         # find quote in msoffice / hotmail / blockquote / text quote and signatures
 375         if root.get('msoffice') and node.tag == 'div' and 'border-top:solid' in node.get('style', ''):
 376             quote_begin = True
 377             node.set('in_quote', '1')
 378             node.set('tail_remove', '1')
 379         if root.get('hotmail') and node.tag == 'hr' and ('stopSpelling' in node.get('class', '') or 'stopSpelling' in node.get('id', '')):
 380             quote_begin = True
 381             node.set('in_quote', '1')
 382             node.set('tail_remove', '1')
 383         if node.tag == 'blockquote' or node.get('text_quote') or node.get('text_signature'):
 384             node.set('in_quote', '1')
 385
 386         # shorten:
 387         # if protect section:
 388         #   1/ find the first parent not being inside a section
 389         #   2/ add the read more link
 390         # else:
 391         #   1/ truncate the text at the next available space
 392         #   2/ create a 'read more' node, next to current node
 393         #   3/ add the truncated text in a new node, next to 'read more' node
 394         node_text = (node.text or '').strip().strip('\n').strip()
 395         if shorten and not overlength and cur_char_nbr + len(node_text) > max_length:
 396             node_to_truncate = node
 397             while node_to_truncate.getparent() is not None:
 398                 if node_to_truncate.get('in_quote'):
 399                     node_to_truncate = node_to_truncate.getparent()
 400                 elif protect_sections and (node_to_truncate.getparent().get('section_inner') or node_to_truncate.getparent().get('section_closure')):
 401                     node_to_truncate = node_to_truncate.getparent()
 402                     overlength_section_id = node_to_truncate.get('section_closure')
 403                 else:
 404                     break
 405
 406             overlength = True
 407             node_to_truncate.set('truncate', '1')
 408             if node_to_truncate == node:
 409                 node_to_truncate.set('truncate_position', str(max_length - cur_char_nbr))
 410             else:
 411                 node_to_truncate.set('truncate_position', str(len(node.text or '')))
 412         cur_char_nbr += len(node_text)
 413
 414     # Tree modification
 415     # ------------------------------------------------------------
 416
 417     for node in root.iter():
 418         if node.get('truncate'):
 419             _truncate_node(node, int(node.get('truncate_position', '0')), node.tag != 'pre')
 420
 421     # Post processing
 422     # ------------------------------------------------------------
 423
 424     to_remove = []
 425     for node in root.iter():
 426         if node.get('in_quote') or node.get('in_overlength'):
 427             # copy the node tail into parent text
 428             if node.tail and not node.get('tail_remove'):
 429                 parent = node.getparent()
 430                 parent.tail = node.tail + (parent.tail or '')
 431             to_remove.append(node)
 432         if node.get('tail_remove'):
 433             node.tail = ''
 434         # clean node
 435         for attribute_name in ['in_quote', 'tail_remove', 'in_overlength', 'msoffice', 'hotmail', 'truncate', 'truncate_position']:
 436             node.attrib.pop(attribute_name, None)
 437     for node in to_remove:
 438         if remove:
 439             node.getparent().remove(node)
 440         else:
 441             if not expand_options.get('oe_expand_a_class', 'oe_mail_expand') in node.get('class', ''):  # trick: read more link should be displayed even if it's in overlength
 442                 node_class = node.get('class', '') + ' oe_mail_cleaned'
 443                 node.set('class', node_class)
 444
 445     # html: \n that were tail of elements have been encapsulated into <span> -> back to \n
 446     html = etree.tostring(root, pretty_print=False)
 447     linebreaks = re.compile(r'<span[^>]*>([\s]*[\r\n]+[\s]*)<\/span>', re.IGNORECASE | re.DOTALL)
 448     html = _replace_matching_regex(linebreaks, html, '\n')
 449
 450     return html
 451
 452
 453 #----------------------------------------------------------
 454 # HTML/Text management
 455 #----------------------------------------------------------
 456
 457 def html2plaintext(html, body_id=None, encoding='utf-8'):
 458     """ From an HTML text, convert the HTML to plain text.
 459     If @param body_id is provided then this is the tag where the
 460     body (not necessarily <body>) starts.
 461     """
 462     ## (c) Fry-IT, www.fry-it.com, 2007
 463     ## <peter@fry-it.com>
 464     ## download here: http://www.peterbe.com/plog/html2plaintext
 465
 466     html = ustr(html)
 467     tree = etree.fromstring(html, parser=etree.HTMLParser())
 468
 469     if body_id is not None:
 470         source = tree.xpath('//*[@id=%s]' % (body_id,))
 471     else:
 472         source = tree.xpath('//body')
 473     if len(source):
 474         tree = source[0]
 475
 476     url_index = []
 477     i = 0
 478     for link in tree.findall('.//a'):
 479         url = link.get('href')
 480         if url:
 481             i += 1
 482             link.tag = 'span'
 483             link.text = '%s [%s]' % (link.text, i)
 484             url_index.append(url)
 485
 486     html = ustr(etree.tostring(tree, encoding=encoding))
 487     # \r char is converted into &#13;, must remove it
 488     html = html.replace('&#13;', '')
 489
 490     html = html.replace('<strong>', '*').replace('</strong>', '*')
 491     html = html.replace('<b>', '*').replace('</b>', '*')
 492     html = html.replace('<h3>', '*').replace('</h3>', '*')
 493     html = html.replace('<h2>', '**').replace('</h2>', '**')
 494     html = html.replace('<h1>', '**').replace('</h1>', '**')
 495     html = html.replace('<em>', '/').replace('</em>', '/')
 496     html = html.replace('<tr>', '\n')
 497     html = html.replace('</p>', '\n')
 498     html = re.sub('<br\s*/?>', '\n', html)
 499     html = re.sub('<.*?>', ' ', html)
 500     html = html.replace(' ' * 2, ' ')
 501     html = html.replace('&gt;', '>')
 502     html = html.replace('&lt;', '<')
 503     html = html.replace('&amp;', '&')
 504
 505     # strip all lines
 506     html = '\n'.join([x.strip() for x in html.splitlines()])
 507     html = html.replace('\n' * 2, '\n')
 508
 509     for i, url in enumerate(url_index):
 510         if i == 0:
 511             html += '\n\n'
 512         html += ustr('[%s] %s\n') % (i + 1, url)
 513
 514     return html
 515
 516 def plaintext2html(text, container_tag=False):
 517     """ Convert plaintext into html. Content of the text is escaped to manage
 518         html entities, using cgi.escape().
 519         - all \n,\r are replaced by <br />
 520         - enclose content into <p>
 521         - 2 or more consecutive <br /> are considered as paragraph breaks
 522
 523         :param string container_tag: container of the html; by default the
 524             content is embedded into a <div>
 525     """
 526     text = cgi.escape(ustr(text))
 527
 528     # 1. replace \n and \r
 529     text = text.replace('\n', '<br/>')
 530     text = text.replace('\r', '<br/>')
 531
 532     # 2-3: form paragraphs
 533     idx = 0
 534     final = '<p>'
 535     br_tags = re.compile(r'(([<]\s*[bB][rR]\s*\/?[>]\s*){2,})')
 536     for item in re.finditer(br_tags, text):
 537         final += text[idx:item.start()] + '</p><p>'
 538         idx = item.end()
 539     final += text[idx:] + '</p>'
 540
 541     # 4. container
 542     if container_tag:
 543         final = '<%s>%s</%s>' % (container_tag, final, container_tag)
 544     return ustr(final)
 545
 546 def append_content_to_html(html, content, plaintext=True, preserve=False, container_tag=False):
 547     """ Append extra content at the end of an HTML snippet, trying
 548         to locate the end of the HTML document (</body>, </html>, or
 549         EOF), and converting the provided content in html unless ``plaintext``
 550         is False.
 551         Content conversion can be done in two ways:
 552         - wrapping it into a pre (preserve=True)
 553         - use plaintext2html (preserve=False, using container_tag to wrap the
 554             whole content)
 555         A side-effect of this method is to coerce all HTML tags to
 556         lowercase in ``html``, and strip enclosing <html> or <body> tags in
 557         content if ``plaintext`` is False.
 558
 559         :param str html: html tagsoup (doesn't have to be XHTML)
 560         :param str content: extra content to append
 561         :param bool plaintext: whether content is plaintext and should
 562             be wrapped in a <pre/> tag.
 563         :param bool preserve: if content is plaintext, wrap it into a <pre>
 564             instead of converting it into html
 565     """
 566     html = ustr(html)
 567     if plaintext and preserve:
 568         content = u'\n<pre>%s</pre>\n' % ustr(content)
 569     elif plaintext:
 570         content = '\n%s\n' % plaintext2html(content, container_tag)
 571     else:
 572         content = re.sub(r'(?i)(</?html.*>|</?body.*>|<!\W*DOCTYPE.*>)', '', content)
 573         content = u'\n%s\n' % ustr(content)
 574     # Force all tags to lowercase
 575     html = re.sub(r'(</?)\W*(\w+)([ >])',
 576         lambda m: '%s%s%s' % (m.group(1), m.group(2).lower(), m.group(3)), html)
 577     insert_location = html.find('</body>')
 578     if insert_location == -1:
 579         insert_location = html.find('</html>')
 580     if insert_location == -1:
 581         return '%s%s' % (html, content)
 582     return '%s%s%s' % (html[:insert_location], content, html[insert_location:])
 583
 584 #----------------------------------------------------------
 585 # Emails
 586 #----------------------------------------------------------
 587
 588 # matches any email in a body of text
 589 email_re = re.compile(r"""([a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6})""", re.VERBOSE)
 590
 591 # matches a string containing only one email
 592 single_email_re = re.compile(r"""^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}$""", re.VERBOSE)
 593
 594 res_re = re.compile(r"\[([0-9]+)\]", re.UNICODE)
 595 command_re = re.compile("^Set-([a-z]+) *: *(.+)$", re.I + re.UNICODE)
 596
 597 # Updated in 7.0 to match the model name as well
 598 # Typical form of references is <timestamp-openerp-record_id-model_name@domain>
 599 # group(1) = the record ID ; group(2) = the model (if any) ; group(3) = the domain
 600 reference_re = re.compile("<.*-open(?:object|erp)-(\\d+)(?:-([\w.]+))?[^>]*@([^>]*)>", re.UNICODE)
 601
 602 # Bounce regex
 603 # Typical form of bounce is bounce-128-crm.lead-34@domain
 604 # group(1) = the mail ID; group(2) = the model (if any); group(3) = the record ID
 605 bounce_re = re.compile("[\w]+-(\d+)-?([\w.]+)?-?(\d+)?", re.UNICODE)
 606
 607 def generate_tracking_message_id(res_id):
 608     """Returns a string that can be used in the Message-ID RFC822 header field
 609
 610        Used to track the replies related to a given object thanks to the "In-Reply-To"
 611        or "References" fields that Mail User Agents will set.
 612     """
 613     try:
 614         rnd = random.SystemRandom().random()
 615     except NotImplementedError:
 616         rnd = random.random()
 617     rndstr = ("%.15f" % rnd)[2:]
 618     return "<%.15f.%s-openerp-%s@%s>" % (time.time(), rndstr, res_id, socket.gethostname())
 619
 620 def email_send(email_from, email_to, subject, body, email_cc=None, email_bcc=None, reply_to=False,
 621                attachments=None, message_id=None, references=None, openobject_id=False, debug=False, subtype='plain', headers=None,
 622                smtp_server=None, smtp_port=None, ssl=False, smtp_user=None, smtp_password=None, cr=None, uid=None):
 623     """Low-level function for sending an email (deprecated).
 624
 625     :deprecate: since OpenERP 6.1, please use ir.mail_server.send_email() instead.
 626     :param email_from: A string used to fill the `From` header, if falsy,
 627                        config['email_from'] is used instead.  Also used for
 628                        the `Reply-To` header if `reply_to` is not provided
 629     :param email_to: a sequence of addresses to send the mail to.
 630     """
 631
 632     # If not cr, get cr from current thread database
 633     local_cr = None
 634     if not cr:
 635         db_name = getattr(threading.currentThread(), 'dbname', None)
 636         if db_name:
 637             local_cr = cr = openerp.registry(db_name).cursor()
 638         else:
 639             raise Exception("No database cursor found, please pass one explicitly")
 640
 641     # Send Email
 642     try:
 643         mail_server_pool = openerp.registry(cr.dbname)['ir.mail_server']
 644         res = False
 645         # Pack Message into MIME Object
 646         email_msg = mail_server_pool.build_email(email_from, email_to, subject, body, email_cc, email_bcc, reply_to,
 647                    attachments, message_id, references, openobject_id, subtype, headers=headers)
 648
 649         res = mail_server_pool.send_email(cr, uid or 1, email_msg, mail_server_id=None,
 650                        smtp_server=smtp_server, smtp_port=smtp_port, smtp_user=smtp_user, smtp_password=smtp_password,
 651                        smtp_encryption=('ssl' if ssl else None), smtp_debug=debug)
 652     except Exception:
 653         _logger.exception("tools.email_send failed to deliver email")
 654         return False
 655     finally:
 656         if local_cr:
 657             cr.close()
 658     return res
 659
 660 def email_split(text):
 661     """ Return a list of the email addresses found in ``text`` """
 662     if not text:
 663         return []
 664     return [addr[1] for addr in getaddresses([text])
 665                 # getaddresses() returns '' when email parsing fails, and
 666                 # sometimes returns emails without at least '@'. The '@'
 667                 # is strictly required in RFC2822's `addr-spec`.
 668                 if addr[1]
 669                 if '@' in addr[1]]