openerp/tools/mail.py

   1 # -*- coding: utf-8 -*-
   2 ##############################################################################
   3 #
   4 #    OpenERP, Open Source Business Applications
   5 #    Copyright (C) 2012-TODAY OpenERP S.A. (<http://openerp.com>).
   6 #
   7 #    This program is free software: you can redistribute it and/or modify
   8 #    it under the terms of the GNU Affero General Public License as
   9 #    published by the Free Software Foundation, either version 3 of the
  10 #    License, or (at your option) any later version.
  11 #
  12 #    This program is distributed in the hope that it will be useful,
  13 #    but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 #    GNU Affero General Public License for more details.
  16 #
  17 #    You should have received a copy of the GNU Affero General Public License
  18 #    along with this program.  If not, see <http://www.gnu.org/licenses/>.
  19 #
  20 ##############################################################################
  21
  22 from lxml import etree
  23 import cgi
  24 import logging
  25 import lxml.html
  26 import lxml.html.clean as clean
  27 import random
  28 import re
  29 import socket
  30 import threading
  31 import time
  32 from email.utils import getaddresses
  33
  34 import openerp
  35 from openerp.loglevels import ustr
  36
  37 _logger = logging.getLogger(__name__)
  38
  39
  40 #----------------------------------------------------------
  41 # HTML Sanitizer
  42 #----------------------------------------------------------
  43
  44 tags_to_kill = ["script", "head", "meta", "title", "link", "style", "frame", "iframe", "base", "object", "embed"]
  45 tags_to_remove = ['html', 'body', 'font']
  46
  47 # allow new semantic HTML5 tags
  48 allowed_tags = clean.defs.tags | frozenset('article section header footer hgroup nav aside figure main'.split() + [etree.Comment])
  49 safe_attrs = clean.defs.safe_attrs | frozenset(
  50     ['style',
  51      'data-oe-model', 'data-oe-id', 'data-oe-field', 'data-oe-type', 'data-oe-expression', 'data-oe-translate', 'data-oe-nodeid',
  52      'data-snippet-id', 'data-publish', 'data-id', 'data-res_id', 'data-member_id', 'data-view-id'
  53      ])
  54
  55
  56 def html_sanitize(src, silent=True, strict=False):
  57     if not src:
  58         return src
  59     src = ustr(src, errors='replace')
  60
  61     logger = logging.getLogger(__name__ + '.html_sanitize')
  62
  63     # html encode email tags
  64     part = re.compile(r"(<(([^a<>]|a[^<>\s])[^<>]*)@[^<>]+>)", re.IGNORECASE | re.DOTALL)
  65     src = part.sub(lambda m: cgi.escape(m.group(1)), src)
  66     # html encode mako tags <% ... %> to decode them later and keep them alive, otherwise they are stripped by the cleaner
  67     src = src.replace('<%', cgi.escape('<%'))
  68     src = src.replace('%>', cgi.escape('%>'))
  69
  70     kwargs = {
  71         'page_structure': True,
  72         'style': False,             # do not remove style attributes
  73         'forms': True,              # remove form tags
  74         'remove_unknown_tags': False,
  75         'allow_tags': allowed_tags,
  76         'comments': False,
  77         'processing_instructions': False
  78     }
  79     if etree.LXML_VERSION >= (2, 3, 1):
  80         # kill_tags attribute has been added in version 2.3.1
  81         kwargs.update({
  82             'kill_tags': tags_to_kill,
  83             'remove_tags': tags_to_remove,
  84         })
  85     else:
  86         kwargs['remove_tags'] = tags_to_kill + tags_to_remove
  87
  88     if strict:
  89         if etree.LXML_VERSION >= (3, 1, 0):
  90             # lxml < 3.1.0 does not allow to specify safe_attrs. We keep all attributes in order to keep "style"
  91             kwargs.update({
  92                 'safe_attrs_only': True,
  93                 'safe_attrs': safe_attrs,
  94             })
  95     else:
  96         kwargs['safe_attrs_only'] = False    # keep oe-data attributes + style
  97         kwargs['frames'] = False,            # do not remove frames (embbed video in CMS blogs)
  98
  99     try:
 100         # some corner cases make the parser crash (such as <SCRIPT/XSS SRC=\"http://ha.ckers.org/xss.js\"></SCRIPT> in test_mail)
 101         cleaner = clean.Cleaner(**kwargs)
 102         cleaned = cleaner.clean_html(src)
 103         # MAKO compatibility: $, { and } inside quotes are escaped, preventing correct mako execution
 104         cleaned = cleaned.replace('%24', '$')
 105         cleaned = cleaned.replace('%7B', '{')
 106         cleaned = cleaned.replace('%7D', '}')
 107         cleaned = cleaned.replace('%20', ' ')
 108         cleaned = cleaned.replace('%5B', '[')
 109         cleaned = cleaned.replace('%5D', ']')
 110         cleaned = cleaned.replace('&lt;%', '<%')
 111         cleaned = cleaned.replace('%&gt;', '%>')
 112     except etree.ParserError, e:
 113         if 'empty' in str(e):
 114             return ""
 115         if not silent:
 116             raise
 117         logger.warning('ParserError obtained when sanitizing %r', src, exc_info=True)
 118         cleaned = '<p>ParserError when sanitizing</p>'
 119     except Exception:
 120         if not silent:
 121             raise
 122         logger.warning('unknown error obtained when sanitizing %r', src, exc_info=True)
 123         cleaned = '<p>Unknown error when sanitizing</p>'
 124
 125     # this is ugly, but lxml/etree tostring want to put everything in a 'div' that breaks the editor -> remove that
 126     if cleaned.startswith('<div>') and cleaned.endswith('</div>'):
 127         cleaned = cleaned[5:-6]
 128
 129     return cleaned
 130
 131
 132 #----------------------------------------------------------
 133 # HTML Cleaner
 134 #----------------------------------------------------------
 135
 136 def html_email_clean(html, remove=False, shorten=False, max_length=300, expand_options=None,
 137                      protect_sections=False):
 138     """ html_email_clean: clean the html by doing the following steps:
 139
 140      - try to strip email quotes, by removing blockquotes or having some client-
 141        specific heuristics
 142      - try to strip signatures
 143      - shorten the html to a maximum number of characters if requested
 144
 145     Some specific use case:
 146
 147      - MsOffice: ``div.style = border-top:solid;`` delimitates the beginning of
 148        a quote; detecting by finding WordSection1 of MsoNormal
 149      - Hotmail: ``hr.stopSpelling`` delimitates the beginning of a quote; detect
 150        Hotmail by funding ``SkyDrivePlaceholder``
 151
 152     :param string html: sanitized html; tags like html or head should not
 153                         be present in the html string. This method therefore
 154                         takes as input html code coming from a sanitized source,
 155                         like fields.html.
 156     :param boolean remove: remove the html code that is unwanted; otherwise it
 157                            is only flagged and tagged
 158     :param boolean shorten: shorten the html; every excessing content will
 159                             be flagged as to remove
 160     :param int max_length: if shortening, maximum number of characters before
 161                            shortening
 162     :param dict expand_options: options for the read more link when shortening
 163                                 the content.The used keys are the following:
 164
 165                                  - oe_expand_container_tag: class applied to the
 166                                    container of the whole read more link
 167                                  - oe_expand_container_class: class applied to the
 168                                    link container (default: oe_mail_expand)
 169                                  - oe_expand_container_content: content of the
 170                                    container (default: ...)
 171                                  - oe_expand_separator_node: optional separator, like
 172                                    adding ... <br /><br /> <a ...>read more</a> (default: void)
 173                                  - oe_expand_a_href: href of the read more link itself
 174                                    (default: #)
 175                                  - oe_expand_a_class: class applied to the <a> containing
 176                                    the link itself (default: oe_mail_expand)
 177                                  - oe_expand_a_content: content of the <a> (default: read more)
 178
 179                                 The formatted read more link is the following:
 180                                 <cont_tag class="oe_expand_container_class">
 181                                     oe_expand_container_content
 182                                     if expand_options.get('oe_expand_separator_node'):
 183                                         <oe_expand_separator_node/>
 184                                     <a href="oe_expand_a_href" class="oe_expand_a_class">
 185                                         oe_expand_a_content
 186                                     </a>
 187                                 </span>
 188     """
 189     def _replace_matching_regex(regex, source, replace=''):
 190         """ Replace all matching expressions in source by replace """
 191         if not source:
 192             return source
 193         dest = ''
 194         idx = 0
 195         for item in re.finditer(regex, source):
 196             dest += source[idx:item.start()] + replace
 197             idx = item.end()
 198         dest += source[idx:]
 199         return dest
 200
 201     def _create_node(tag, text, tail=None, attrs={}):
 202         new_node = etree.Element(tag)
 203         new_node.text = text
 204         new_node.tail = tail
 205         for key, val in attrs.iteritems():
 206             new_node.set(key, val)
 207         return new_node
 208
 209     def _insert_new_node(node, index, new_node_tag, new_node_text, new_node_tail=None, new_node_attrs={}):
 210         new_node = _create_node(new_node_tag, new_node_text, new_node_tail, new_node_attrs)
 211         node.insert(index, new_node)
 212         return new_node
 213
 214     def _tag_matching_regex_in_text(regex, node, new_node_tag='span', new_node_attrs={}):
 215         text = node.text or ''
 216         if not re.search(regex, text):
 217             return
 218
 219         cur_node = node
 220         node.text = ''
 221         idx, iteration = 0, 0
 222         for item in re.finditer(regex, text):
 223             if iteration == 0:
 224                 cur_node.text = text[idx:item.start()]
 225             else:
 226                 _insert_new_node(node, (iteration - 1) * 2 + 1, new_node_tag, text[idx:item.start()])
 227             new_node = _insert_new_node(node, iteration * 2, new_node_tag, text[item.start():item.end()], None, new_node_attrs)
 228
 229             cur_node = new_node
 230             idx = item.end()
 231             iteration += 1
 232         new_node = _insert_new_node(node, -1, new_node_tag, text[idx:] + (cur_node.tail or ''), None, {})
 233
 234     def _truncate_node(node, position, simplify_whitespaces=True):
 235         """ Truncate a node text at a given position. This algorithm will shorten
 236         at the end of the word whose ending character exceeds position.
 237
 238             :param bool simplify_whitespaces: whether to try to count all successive
 239                                               whitespaces as one character. This
 240                                               option should not be True when trying
 241                                               to keep 'pre' consistency.
 242         """
 243         if node.text is None:
 244             node.text = ''
 245
 246         truncate_idx = -1
 247         if simplify_whitespaces:
 248             cur_char_nbr = 0
 249             word = None
 250             node_words = node.text.strip(' \t\r\n').split()
 251             for word in node_words:
 252                 cur_char_nbr += len(word)
 253                 if cur_char_nbr >= position:
 254                     break
 255             if word:
 256                 truncate_idx = node.text.find(word) + len(word)
 257         else:
 258             truncate_idx = position
 259         if truncate_idx == -1 or truncate_idx > len(node.text):
 260             truncate_idx = len(node.text)
 261
 262         # compose new text bits
 263         innertext = node.text[0:truncate_idx]
 264         outertext = node.text[truncate_idx:]
 265         node.text = innertext
 266
 267         # create <span> ... <a href="#">read more</a></span> node
 268         read_more_node = _create_node(
 269             expand_options.get('oe_expand_container_tag', 'span'),
 270             expand_options.get('oe_expand_container_content', ' ... '),
 271             None,
 272             {'class': expand_options.get('oe_expand_container_class', 'oe_mail_expand')}
 273         )
 274         if expand_options.get('oe_expand_separator_node'):
 275             read_more_separator_node = _create_node(
 276                 expand_options.get('oe_expand_separator_node'),
 277                 '',
 278                 None,
 279                 {}
 280             )
 281             read_more_node.append(read_more_separator_node)
 282         read_more_link_node = _create_node(
 283             'a',
 284             expand_options.get('oe_expand_a_content', 'read more'),
 285             None,
 286             {
 287                 'href': expand_options.get('oe_expand_a_href', '#'),
 288                 'class': expand_options.get('oe_expand_a_class', 'oe_mail_expand'),
 289             }
 290         )
 291         read_more_node.append(read_more_link_node)
 292         # create outertext node
 293         overtext_node = _create_node('span', outertext)
 294         # tag node
 295         overtext_node.set('in_overlength', '1')
 296         # add newly created nodes in dom
 297         node.append(read_more_node)
 298         node.append(overtext_node)
 299
 300     if expand_options is None:
 301         expand_options = {}
 302
 303     if not html or not isinstance(html, basestring):
 304         return html
 305     html = ustr(html)
 306
 307     # Pre processing
 308     # ------------------------------------------------------------
 309     # TDE TODO: --- MAIL ORIGINAL ---: '[\-]{4,}([^\-]*)[\-]{4,}'
 310
 311     # html: remove encoding attribute inside tags
 312     doctype = re.compile(r'(<[^>]*\s)(encoding=(["\'][^"\']*?["\']|[^\s\n\r>]+)(\s[^>]*|/)?>)', re.IGNORECASE | re.DOTALL)
 313     html = doctype.sub(r"", html)
 314
 315     # html: ClEditor seems to love using <div><br /><div> -> replace with <br />
 316     br_div_tags = re.compile(r'(<div>\s*<br\s*\/>\s*<\/div>)', re.IGNORECASE)
 317     html = _replace_matching_regex(br_div_tags, html, '<br />')
 318
 319     # form a tree
 320     root = lxml.html.fromstring(html)
 321     if not len(root) and root.text is None and root.tail is None:
 322         html = '<div>%s</div>' % html
 323         root = lxml.html.fromstring(html)
 324
 325     quote_tags = re.compile(r'(\n(>)+[^\n\r]*)')
 326     signature = re.compile(r'([-]{2,}[\s]?[\r\n]{1,2}[\s\S]+)')
 327     for node in root.iter():
 328         # remove all tails and replace them by a span element, because managing text and tails can be a pain in the ass
 329         if node.tail:
 330             tail_node = _create_node('span', node.tail)
 331             node.tail = None
 332             node.addnext(tail_node)
 333
 334         # form node and tag text-based quotes and signature
 335         _tag_matching_regex_in_text(quote_tags, node, 'span', {'text_quote': '1'})
 336         _tag_matching_regex_in_text(signature, node, 'span', {'text_signature': '1'})
 337
 338     # Processing
 339     # ------------------------------------------------------------
 340
 341     # tree: tag nodes
 342     # signature_begin = False  # try dynamic signature recognition
 343     quote_begin = False
 344     overlength = False
 345     overlength_section_id = None
 346     overlength_section_count = 0
 347     cur_char_nbr = 0
 348     for node in root.iter():
 349         # comments do not need processing
 350         # note: bug in node.get(value, default) for HtmlComments, default never returned
 351         if node.tag == etree.Comment:
 352             continue
 353         # do not take into account multiple spaces that are displayed as max 1 space in html
 354         node_text = ' '.join((node.text and node.text.strip(' \t\r\n') or '').split())
 355
 356         # root: try to tag the client used to write the html
 357         if 'WordSection1' in node.get('class', '') or 'MsoNormal' in node.get('class', ''):
 358             root.set('msoffice', '1')
 359         if 'SkyDrivePlaceholder' in node.get('class', '') or 'SkyDrivePlaceholder' in node.get('id', ''):
 360             root.set('hotmail', '1')
 361
 362         # protect sections by tagging section limits and blocks contained inside sections, using an increasing id to re-find them later
 363         if node.tag == 'section':
 364             overlength_section_count += 1
 365             node.set('section_closure', str(overlength_section_count))
 366         if node.getparent() is not None and (node.getparent().get('section_closure') or node.getparent().get('section_inner')):
 367             node.set('section_inner', str(overlength_section_count))
 368
 369         # state of the parsing: flag quotes and tails to remove
 370         if quote_begin:
 371             node.set('in_quote', '1')
 372             node.set('tail_remove', '1')
 373         # state of the parsing: flag when being in over-length content, depending on section content if defined (only when having protect_sections)
 374         if overlength:
 375             if not overlength_section_id or int(node.get('section_inner', overlength_section_count + 1)) > overlength_section_count:
 376                 node.set('in_overlength', '1')
 377                 node.set('tail_remove', '1')
 378
 379         # find quote in msoffice / hotmail / blockquote / text quote and signatures
 380         if root.get('msoffice') and node.tag == 'div' and 'border-top:solid' in node.get('style', ''):
 381             quote_begin = True
 382             node.set('in_quote', '1')
 383             node.set('tail_remove', '1')
 384         if root.get('hotmail') and node.tag == 'hr' and ('stopSpelling' in node.get('class', '') or 'stopSpelling' in node.get('id', '')):
 385             quote_begin = True
 386             node.set('in_quote', '1')
 387             node.set('tail_remove', '1')
 388         if node.tag == 'blockquote' or node.get('text_quote') or node.get('text_signature'):
 389             node.set('in_quote', '1')
 390
 391         # shorten:
 392         # if protect section:
 393         #   1/ find the first parent not being inside a section
 394         #   2/ add the read more link
 395         # else:
 396         #   1/ truncate the text at the next available space
 397         #   2/ create a 'read more' node, next to current node
 398         #   3/ add the truncated text in a new node, next to 'read more' node
 399         node_text = (node.text or '').strip().strip('\n').strip()
 400         if shorten and not overlength and cur_char_nbr + len(node_text) > max_length:
 401             node_to_truncate = node
 402             while node_to_truncate.getparent() is not None:
 403                 if node_to_truncate.get('in_quote'):
 404                     node_to_truncate = node_to_truncate.getparent()
 405                 elif protect_sections and (node_to_truncate.getparent().get('section_inner') or node_to_truncate.getparent().get('section_closure')):
 406                     node_to_truncate = node_to_truncate.getparent()
 407                     overlength_section_id = node_to_truncate.get('section_closure')
 408                 else:
 409                     break
 410
 411             overlength = True
 412             node_to_truncate.set('truncate', '1')
 413             if node_to_truncate == node:
 414                 node_to_truncate.set('truncate_position', str(max_length - cur_char_nbr))
 415             else:
 416                 node_to_truncate.set('truncate_position', str(len(node.text or '')))
 417         cur_char_nbr += len(node_text)
 418
 419     # Tree modification
 420     # ------------------------------------------------------------
 421
 422     for node in root.iter():
 423         if node.get('truncate'):
 424             _truncate_node(node, int(node.get('truncate_position', '0')), node.tag != 'pre')
 425
 426     # Post processing
 427     # ------------------------------------------------------------
 428
 429     to_remove = []
 430     for node in root.iter():
 431         if node.get('in_quote') or node.get('in_overlength'):
 432             # copy the node tail into parent text
 433             if node.tail and not node.get('tail_remove'):
 434                 parent = node.getparent()
 435                 parent.tail = node.tail + (parent.tail or '')
 436             to_remove.append(node)
 437         if node.get('tail_remove'):
 438             node.tail = ''
 439         # clean node
 440         for attribute_name in ['in_quote', 'tail_remove', 'in_overlength', 'msoffice', 'hotmail', 'truncate', 'truncate_position']:
 441             node.attrib.pop(attribute_name, None)
 442     for node in to_remove:
 443         if remove:
 444             node.getparent().remove(node)
 445         else:
 446             if not expand_options.get('oe_expand_a_class', 'oe_mail_expand') in node.get('class', ''):  # trick: read more link should be displayed even if it's in overlength
 447                 node_class = node.get('class', '') + ' oe_mail_cleaned'
 448                 node.set('class', node_class)
 449
 450     # html: \n that were tail of elements have been encapsulated into <span> -> back to \n
 451     html = etree.tostring(root, pretty_print=False)
 452     linebreaks = re.compile(r'<span[^>]*>([\s]*[\r\n]+[\s]*)<\/span>', re.IGNORECASE | re.DOTALL)
 453     html = _replace_matching_regex(linebreaks, html, '\n')
 454
 455     return html
 456
 457
 458 #----------------------------------------------------------
 459 # HTML/Text management
 460 #----------------------------------------------------------
 461
 462 def html2plaintext(html, body_id=None, encoding='utf-8'):
 463     """ From an HTML text, convert the HTML to plain text.
 464     If @param body_id is provided then this is the tag where the
 465     body (not necessarily <body>) starts.
 466     """
 467     ## (c) Fry-IT, www.fry-it.com, 2007
 468     ## <peter@fry-it.com>
 469     ## download here: http://www.peterbe.com/plog/html2plaintext
 470
 471     html = ustr(html)
 472     tree = etree.fromstring(html, parser=etree.HTMLParser())
 473
 474     if body_id is not None:
 475         source = tree.xpath('//*[@id=%s]' % (body_id,))
 476     else:
 477         source = tree.xpath('//body')
 478     if len(source):
 479         tree = source[0]
 480
 481     url_index = []
 482     i = 0
 483     for link in tree.findall('.//a'):
 484         url = link.get('href')
 485         if url:
 486             i += 1
 487             link.tag = 'span'
 488             link.text = '%s [%s]' % (link.text, i)
 489             url_index.append(url)
 490
 491     html = ustr(etree.tostring(tree, encoding=encoding))
 492     # \r char is converted into &#13;, must remove it
 493     html = html.replace('&#13;', '')
 494
 495     html = html.replace('<strong>', '*').replace('</strong>', '*')
 496     html = html.replace('<b>', '*').replace('</b>', '*')
 497     html = html.replace('<h3>', '*').replace('</h3>', '*')
 498     html = html.replace('<h2>', '**').replace('</h2>', '**')
 499     html = html.replace('<h1>', '**').replace('</h1>', '**')
 500     html = html.replace('<em>', '/').replace('</em>', '/')
 501     html = html.replace('<tr>', '\n')
 502     html = html.replace('</p>', '\n')
 503     html = re.sub('<br\s*/?>', '\n', html)
 504     html = re.sub('<.*?>', ' ', html)
 505     html = html.replace(' ' * 2, ' ')
 506     html = html.replace('&gt;', '>')
 507     html = html.replace('&lt;', '<')
 508
 509     # strip all lines
 510     html = '\n'.join([x.strip() for x in html.splitlines()])
 511     html = html.replace('\n' * 2, '\n')
 512
 513     for i, url in enumerate(url_index):
 514         if i == 0:
 515             html += '\n\n'
 516         html += ustr('[%s] %s\n') % (i + 1, url)
 517
 518     return html
 519
 520 def plaintext2html(text, container_tag=False):
 521     """ Convert plaintext into html. Content of the text is escaped to manage
 522         html entities, using cgi.escape().
 523         - all \n,\r are replaced by <br />
 524         - enclose content into <p>
 525         - 2 or more consecutive <br /> are considered as paragraph breaks
 526
 527         :param string container_tag: container of the html; by default the
 528             content is embedded into a <div>
 529     """
 530     text = cgi.escape(ustr(text))
 531
 532     # 1. replace \n and \r
 533     text = text.replace('\n', '<br/>')
 534     text = text.replace('\r', '<br/>')
 535
 536     # 2-3: form paragraphs
 537     idx = 0
 538     final = '<p>'
 539     br_tags = re.compile(r'(([<]\s*[bB][rR]\s*\/?[>]\s*){2,})')
 540     for item in re.finditer(br_tags, text):
 541         final += text[idx:item.start()] + '</p><p>'
 542         idx = item.end()
 543     final += text[idx:] + '</p>'
 544
 545     # 4. container
 546     if container_tag:
 547         final = '<%s>%s</%s>' % (container_tag, final, container_tag)
 548     return ustr(final)
 549
 550 def append_content_to_html(html, content, plaintext=True, preserve=False, container_tag=False):
 551     """ Append extra content at the end of an HTML snippet, trying
 552         to locate the end of the HTML document (</body>, </html>, or
 553         EOF), and converting the provided content in html unless ``plaintext``
 554         is False.
 555         Content conversion can be done in two ways:
 556         - wrapping it into a pre (preserve=True)
 557         - use plaintext2html (preserve=False, using container_tag to wrap the
 558             whole content)
 559         A side-effect of this method is to coerce all HTML tags to
 560         lowercase in ``html``, and strip enclosing <html> or <body> tags in
 561         content if ``plaintext`` is False.
 562
 563         :param str html: html tagsoup (doesn't have to be XHTML)
 564         :param str content: extra content to append
 565         :param bool plaintext: whether content is plaintext and should
 566             be wrapped in a <pre/> tag.
 567         :param bool preserve: if content is plaintext, wrap it into a <pre>
 568             instead of converting it into html
 569     """
 570     html = ustr(html)
 571     if plaintext and preserve:
 572         content = u'\n<pre>%s</pre>\n' % ustr(content)
 573     elif plaintext:
 574         content = '\n%s\n' % plaintext2html(content, container_tag)
 575     else:
 576         content = re.sub(r'(?i)(</?html.*>|</?body.*>|<!\W*DOCTYPE.*>)', '', content)
 577         content = u'\n%s\n' % ustr(content)
 578     # Force all tags to lowercase
 579     html = re.sub(r'(</?)\W*(\w+)([ >])',
 580         lambda m: '%s%s%s' % (m.group(1), m.group(2).lower(), m.group(3)), html)
 581     insert_location = html.find('</body>')
 582     if insert_location == -1:
 583         insert_location = html.find('</html>')
 584     if insert_location == -1:
 585         return '%s%s' % (html, content)
 586     return '%s%s%s' % (html[:insert_location], content, html[insert_location:])
 587
 588 #----------------------------------------------------------
 589 # Emails
 590 #----------------------------------------------------------
 591
 592 # matches any email in a body of text
 593 email_re = re.compile(r"""([a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6})""", re.VERBOSE)
 594
 595 # matches a string containing only one email
 596 single_email_re = re.compile(r"""^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}$""", re.VERBOSE)
 597
 598 res_re = re.compile(r"\[([0-9]+)\]", re.UNICODE)
 599 command_re = re.compile("^Set-([a-z]+) *: *(.+)$", re.I + re.UNICODE)
 600
 601 # Updated in 7.0 to match the model name as well
 602 # Typical form of references is <timestamp-openerp-record_id-model_name@domain>
 603 # group(1) = the record ID ; group(2) = the model (if any) ; group(3) = the domain
 604 reference_re = re.compile("<.*-open(?:object|erp)-(\\d+)(?:-([\w.]+))?.*@(.*)>", re.UNICODE)
 605
 606 # Bounce regex
 607 # Typical form of bounce is bounce-128-crm.lead-34@domain
 608 # group(1) = the mail ID; group(2) = the model (if any); group(3) = the record ID
 609 bounce_re = re.compile("[\w]+-(\d+)-?([\w.]+)?-?(\d+)?", re.UNICODE)
 610
 611 def generate_tracking_message_id(res_id):
 612     """Returns a string that can be used in the Message-ID RFC822 header field
 613
 614        Used to track the replies related to a given object thanks to the "In-Reply-To"
 615        or "References" fields that Mail User Agents will set.
 616     """
 617     try:
 618         rnd = random.SystemRandom().random()
 619     except NotImplementedError:
 620         rnd = random.random()
 621     rndstr = ("%.15f" % rnd)[2:]
 622     return "<%.15f.%s-openerp-%s@%s>" % (time.time(), rndstr, res_id, socket.gethostname())
 623
 624 def email_send(email_from, email_to, subject, body, email_cc=None, email_bcc=None, reply_to=False,
 625                attachments=None, message_id=None, references=None, openobject_id=False, debug=False, subtype='plain', headers=None,
 626                smtp_server=None, smtp_port=None, ssl=False, smtp_user=None, smtp_password=None, cr=None, uid=None):
 627     """Low-level function for sending an email (deprecated).
 628
 629     :deprecate: since OpenERP 6.1, please use ir.mail_server.send_email() instead.
 630     :param email_from: A string used to fill the `From` header, if falsy,
 631                        config['email_from'] is used instead.  Also used for
 632                        the `Reply-To` header if `reply_to` is not provided
 633     :param email_to: a sequence of addresses to send the mail to.
 634     """
 635
 636     # If not cr, get cr from current thread database
 637     local_cr = None
 638     if not cr:
 639         db_name = getattr(threading.currentThread(), 'dbname', None)
 640         if db_name:
 641             local_cr = cr = openerp.registry(db_name).cursor()
 642         else:
 643             raise Exception("No database cursor found, please pass one explicitly")
 644
 645     # Send Email
 646     try:
 647         mail_server_pool = openerp.registry(cr.dbname)['ir.mail_server']
 648         res = False
 649         # Pack Message into MIME Object
 650         email_msg = mail_server_pool.build_email(email_from, email_to, subject, body, email_cc, email_bcc, reply_to,
 651                    attachments, message_id, references, openobject_id, subtype, headers=headers)
 652
 653         res = mail_server_pool.send_email(cr, uid or 1, email_msg, mail_server_id=None,
 654                        smtp_server=smtp_server, smtp_port=smtp_port, smtp_user=smtp_user, smtp_password=smtp_password,
 655                        smtp_encryption=('ssl' if ssl else None), smtp_debug=debug)
 656     except Exception:
 657         _logger.exception("tools.email_send failed to deliver email")
 658         return False
 659     finally:
 660         if local_cr:
 661             cr.close()
 662     return res
 663
 664 def email_split(text):
 665     """ Return a list of the email addresses found in ``text`` """
 666     if not text:
 667         return []
 668     return [addr[1] for addr in getaddresses([text])
 669                 # getaddresses() returns '' when email parsing fails, and
 670                 # sometimes returns emails without at least '@'. The '@'
 671                 # is strictly required in RFC2822's `addr-spec`.
 672                 if addr[1]
 673                 if '@' in addr[1]]