openerp/tools/mail.py

   1 # -*- coding: utf-8 -*-
   2 ##############################################################################
   3 #
   4 #    OpenERP, Open Source Business Applications
   5 #    Copyright (C) 2012-TODAY OpenERP S.A. (<http://openerp.com>).
   6 #
   7 #    This program is free software: you can redistribute it and/or modify
   8 #    it under the terms of the GNU Affero General Public License as
   9 #    published by the Free Software Foundation, either version 3 of the
  10 #    License, or (at your option) any later version.
  11 #
  12 #    This program is distributed in the hope that it will be useful,
  13 #    but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 #    GNU Affero General Public License for more details.
  16 #
  17 #    You should have received a copy of the GNU Affero General Public License
  18 #    along with this program.  If not, see <http://www.gnu.org/licenses/>.
  19 #
  20 ##############################################################################
  21
  22 from lxml import etree
  23 import cgi
  24 import logging
  25 import lxml.html
  26 import lxml.html.clean as clean
  27 import random
  28 import re
  29 import socket
  30 import threading
  31 import time
  32 from email.utils import getaddresses
  33
  34 import openerp
  35 from openerp.loglevels import ustr
  36
  37 _logger = logging.getLogger(__name__)
  38
  39
  40 #----------------------------------------------------------
  41 # HTML Sanitizer
  42 #----------------------------------------------------------
  43
  44 tags_to_kill = ["script", "head", "meta", "title", "link", "style", "frame", "iframe", "base", "object", "embed"]
  45 tags_to_remove = ['html', 'body', 'font']
  46
  47 # allow new semantic HTML5 tags
  48 allowed_tags = clean.defs.tags | frozenset('article section header footer hgroup nav aside figure main'.split())
  49 safe_attrs = clean.defs.safe_attrs | frozenset(
  50     ['style',
  51      'data-oe-model', 'data-oe-id', 'data-oe-field', 'data-oe-type', 'data-oe-expression', 'data-oe-translate', 'data-oe-nodeid',
  52      'data-snippet-id', 'data-publish', 'data-id', 'data-res_id', 'data-member_id', 'data-view-id'
  53      ])
  54
  55
  56 def html_sanitize(src, silent=True, strict=False):
  57     if not src:
  58         return src
  59     src = ustr(src, errors='replace')
  60
  61     logger = logging.getLogger(__name__ + '.html_sanitize')
  62
  63     # html encode email tags
  64     part = re.compile(r"(<(([^a<>]|a[^<>\s])[^<>]*)@[^<>]+>)", re.IGNORECASE | re.DOTALL)
  65     src = part.sub(lambda m: cgi.escape(m.group(1)), src)
  66
  67     kwargs = {
  68         'page_structure': True,
  69         'style': False,             # do not remove style attributes
  70         'forms': True,              # remove form tags
  71         'remove_unknown_tags': False,
  72         'allow_tags': allowed_tags,
  73     }
  74     if etree.LXML_VERSION >= (2, 3, 1):
  75         # kill_tags attribute has been added in version 2.3.1
  76         kwargs.update({
  77             'kill_tags': tags_to_kill,
  78             'remove_tags': tags_to_remove,
  79         })
  80     else:
  81         kwargs['remove_tags'] = tags_to_kill + tags_to_remove
  82
  83     if strict:
  84         if etree.LXML_VERSION >= (3, 1, 0):
  85             # lxml < 3.1.0 does not allow to specify safe_attrs. We keep all attributes in order to keep "style"
  86             kwargs.update({
  87                 'safe_attrs_only': True,
  88                 'safe_attrs': safe_attrs,
  89             })
  90     else:
  91         kwargs['safe_attrs_only'] = False    # keep oe-data attributes + style
  92         kwargs['frames'] = False,            # do not remove frames (embbed video in CMS blogs)
  93
  94     try:
  95         # some corner cases make the parser crash (such as <SCRIPT/XSS SRC=\"http://ha.ckers.org/xss.js\"></SCRIPT> in test_mail)
  96         cleaner = clean.Cleaner(**kwargs)
  97         cleaned = cleaner.clean_html(src)
  98         # MAKO compatibility: $, { and } inside quotes are escaped, preventing correct mako execution
  99         cleaned = cleaned.replace('%24', '$')
 100         cleaned = cleaned.replace('%7B', '{')
 101         cleaned = cleaned.replace('%7D', '}')
 102         cleaned = cleaned.replace('%20', ' ')
 103         cleaned = cleaned.replace('%5B', '[')
 104         cleaned = cleaned.replace('%5D', ']')
 105     except etree.ParserError, e:
 106         if 'empty' in str(e):
 107             return ""
 108         if not silent:
 109             raise
 110         logger.warning('ParserError obtained when sanitizing %r', src, exc_info=True)
 111         cleaned = '<p>ParserError when sanitizing</p>'
 112     except Exception:
 113         if not silent:
 114             raise
 115         logger.warning('unknown error obtained when sanitizing %r', src, exc_info=True)
 116         cleaned = '<p>Unknown error when sanitizing</p>'
 117
 118     # this is ugly, but lxml/etree tostring want to put everything in a 'div' that breaks the editor -> remove that
 119     if cleaned.startswith('<div>') and cleaned.endswith('</div>'):
 120         cleaned = cleaned[5:-6]
 121
 122     return cleaned
 123
 124
 125 #----------------------------------------------------------
 126 # HTML Cleaner
 127 #----------------------------------------------------------
 128
 129 def html_email_clean(html, remove=False, shorten=False, max_length=300, expand_options=None,
 130                      protect_sections=False):
 131     """ html_email_clean: clean the html by doing the following steps:
 132
 133      - try to strip email quotes, by removing blockquotes or having some client-
 134        specific heuristics
 135      - try to strip signatures
 136      - shorten the html to a maximum number of characters if requested
 137
 138     Some specific use case:
 139
 140      - MsOffice: ``div.style = border-top:solid;`` delimitates the beginning of
 141        a quote; detecting by finding WordSection1 of MsoNormal
 142      - Hotmail: ``hr.stopSpelling`` delimitates the beginning of a quote; detect
 143        Hotmail by funding ``SkyDrivePlaceholder``
 144
 145     :param string html: sanitized html; tags like html or head should not
 146                         be present in the html string. This method therefore
 147                         takes as input html code coming from a sanitized source,
 148                         like fields.html.
 149     :param boolean remove: remove the html code that is unwanted; otherwise it
 150                            is only flagged and tagged
 151     :param boolean shorten: shorten the html; every excessing content will
 152                             be flagged as to remove
 153     :param int max_length: if shortening, maximum number of characters before
 154                            shortening
 155     :param dict expand_options: options for the read more link when shortening
 156                                 the content.The used keys are the following:
 157
 158                                  - oe_expand_container_tag: class applied to the
 159                                    container of the whole read more link
 160                                  - oe_expand_container_class: class applied to the
 161                                    link container (default: oe_mail_expand)
 162                                  - oe_expand_container_content: content of the
 163                                    container (default: ...)
 164                                  - oe_expand_separator_node: optional separator, like
 165                                    adding ... <br /><br /> <a ...>read more</a> (default: void)
 166                                  - oe_expand_a_href: href of the read more link itself
 167                                    (default: #)
 168                                  - oe_expand_a_class: class applied to the <a> containing
 169                                    the link itself (default: oe_mail_expand)
 170                                  - oe_expand_a_content: content of the <a> (default: read more)
 171
 172                                 The formatted read more link is the following:
 173                                 <cont_tag class="oe_expand_container_class">
 174                                     oe_expand_container_content
 175                                     if expand_options.get('oe_expand_separator_node'):
 176                                         <oe_expand_separator_node/>
 177                                     <a href="oe_expand_a_href" class="oe_expand_a_class">
 178                                         oe_expand_a_content
 179                                     </a>
 180                                 </span>
 181     """
 182     def _replace_matching_regex(regex, source, replace=''):
 183         """ Replace all matching expressions in source by replace """
 184         if not source:
 185             return source
 186         dest = ''
 187         idx = 0
 188         for item in re.finditer(regex, source):
 189             dest += source[idx:item.start()] + replace
 190             idx = item.end()
 191         dest += source[idx:]
 192         return dest
 193
 194     def _create_node(tag, text, tail=None, attrs={}):
 195         new_node = etree.Element(tag)
 196         new_node.text = text
 197         new_node.tail = tail
 198         for key, val in attrs.iteritems():
 199             new_node.set(key, val)
 200         return new_node
 201
 202     def _insert_new_node(node, index, new_node_tag, new_node_text, new_node_tail=None, new_node_attrs={}):
 203         new_node = _create_node(new_node_tag, new_node_text, new_node_tail, new_node_attrs)
 204         node.insert(index, new_node)
 205         return new_node
 206
 207     def _tag_matching_regex_in_text(regex, node, new_node_tag='span', new_node_attrs={}):
 208         text = node.text or ''
 209         if not re.search(regex, text):
 210             return
 211
 212         cur_node = node
 213         node.text = ''
 214         idx, iteration = 0, 0
 215         for item in re.finditer(regex, text):
 216             if iteration == 0:
 217                 cur_node.text = text[idx:item.start()]
 218             else:
 219                 _insert_new_node(node, (iteration - 1) * 2 + 1, new_node_tag, text[idx:item.start()])
 220             new_node = _insert_new_node(node, iteration * 2, new_node_tag, text[item.start():item.end()], None, new_node_attrs)
 221
 222             cur_node = new_node
 223             idx = item.end()
 224             iteration += 1
 225         new_node = _insert_new_node(node, -1, new_node_tag, text[idx:] + (cur_node.tail or ''), None, {})
 226
 227     def _truncate_node(node, position, simplify_whitespaces=True):
 228         """ Truncate a node text at a given position. This algorithm will shorten
 229         at the end of the word whose ending character exceeds position.
 230
 231             :param bool simplify_whitespaces: whether to try to count all successive
 232                                               whitespaces as one character. This
 233                                               option should not be True when trying
 234                                               to keep 'pre' consistency.
 235         """
 236         if node.text is None:
 237             node.text = ''
 238
 239         truncate_idx = -1
 240         if simplify_whitespaces:
 241             cur_char_nbr = 0
 242             word = None
 243             node_words = node.text.strip(' \t\r\n').split()
 244             for word in node_words:
 245                 cur_char_nbr += len(word)
 246                 if cur_char_nbr >= position:
 247                     break
 248             if word:
 249                 truncate_idx = node.text.find(word) + len(word)
 250         else:
 251             truncate_idx = position
 252         if truncate_idx == -1 or truncate_idx > len(node.text):
 253             truncate_idx = len(node.text)
 254
 255         # compose new text bits
 256         innertext = node.text[0:truncate_idx]
 257         outertext = node.text[truncate_idx:]
 258         node.text = innertext
 259
 260         # create <span> ... <a href="#">read more</a></span> node
 261         read_more_node = _create_node(
 262             expand_options.get('oe_expand_container_tag', 'span'),
 263             expand_options.get('oe_expand_container_content', ' ... '),
 264             None,
 265             {'class': expand_options.get('oe_expand_container_class', 'oe_mail_expand')}
 266         )
 267         if expand_options.get('oe_expand_separator_node'):
 268             read_more_separator_node = _create_node(
 269                 expand_options.get('oe_expand_separator_node'),
 270                 '',
 271                 None,
 272                 {}
 273             )
 274             read_more_node.append(read_more_separator_node)
 275         read_more_link_node = _create_node(
 276             'a',
 277             expand_options.get('oe_expand_a_content', 'read more'),
 278             None,
 279             {
 280                 'href': expand_options.get('oe_expand_a_href', '#'),
 281                 'class': expand_options.get('oe_expand_a_class', 'oe_mail_expand'),
 282             }
 283         )
 284         read_more_node.append(read_more_link_node)
 285         # create outertext node
 286         overtext_node = _create_node('span', outertext)
 287         # tag node
 288         overtext_node.set('in_overlength', '1')
 289         # add newly created nodes in dom
 290         node.append(read_more_node)
 291         node.append(overtext_node)
 292
 293     if expand_options is None:
 294         expand_options = {}
 295
 296     if not html or not isinstance(html, basestring):
 297         return html
 298     html = ustr(html)
 299
 300     # Pre processing
 301     # ------------------------------------------------------------
 302     # TDE TODO: --- MAIL ORIGINAL ---: '[\-]{4,}([^\-]*)[\-]{4,}'
 303
 304     # html: remove encoding attribute inside tags
 305     doctype = re.compile(r'(<[^>]*\s)(encoding=(["\'][^"\']*?["\']|[^\s\n\r>]+)(\s[^>]*|/)?>)', re.IGNORECASE | re.DOTALL)
 306     html = doctype.sub(r"", html)
 307
 308     # html: ClEditor seems to love using <div><br /><div> -> replace with <br />
 309     br_div_tags = re.compile(r'(<div>\s*<br\s*\/>\s*<\/div>)', re.IGNORECASE)
 310     html = _replace_matching_regex(br_div_tags, html, '<br />')
 311
 312     # form a tree
 313     root = lxml.html.fromstring(html)
 314     if not len(root) and root.text is None and root.tail is None:
 315         html = '<div>%s</div>' % html
 316         root = lxml.html.fromstring(html)
 317
 318     quote_tags = re.compile(r'(\n(>)+[^\n\r]*)')
 319     signature = re.compile(r'([-]{2,}[\s]?[\r\n]{1,2}[\s\S]+)')
 320     for node in root.iter():
 321         # remove all tails and replace them by a span element, because managing text and tails can be a pain in the ass
 322         if node.tail:
 323             tail_node = _create_node('span', node.tail)
 324             node.tail = None
 325             node.addnext(tail_node)
 326
 327         # form node and tag text-based quotes and signature
 328         _tag_matching_regex_in_text(quote_tags, node, 'span', {'text_quote': '1'})
 329         _tag_matching_regex_in_text(signature, node, 'span', {'text_signature': '1'})
 330
 331     # Processing
 332     # ------------------------------------------------------------
 333
 334     # tree: tag nodes
 335     # signature_begin = False  # try dynamic signature recognition
 336     quote_begin = False
 337     overlength = False
 338     overlength_section_id = None
 339     overlength_section_count = 0
 340     cur_char_nbr = 0
 341     for node in root.iter():
 342         # do not take into account multiple spaces that are displayed as max 1 space in html
 343         node_text = ' '.join((node.text and node.text.strip(' \t\r\n') or '').split())
 344
 345         # root: try to tag the client used to write the html
 346         if 'WordSection1' in node.get('class', '') or 'MsoNormal' in node.get('class', ''):
 347             root.set('msoffice', '1')
 348         if 'SkyDrivePlaceholder' in node.get('class', '') or 'SkyDrivePlaceholder' in node.get('id', ''):
 349             root.set('hotmail', '1')
 350
 351         # protect sections by tagging section limits and blocks contained inside sections, using an increasing id to re-find them later
 352         if node.tag == 'section':
 353             overlength_section_count += 1
 354             node.set('section_closure', str(overlength_section_count))
 355         if node.getparent() is not None and (node.getparent().get('section_closure') or node.getparent().get('section_inner')):
 356             node.set('section_inner', str(overlength_section_count))
 357
 358         # state of the parsing: flag quotes and tails to remove
 359         if quote_begin:
 360             node.set('in_quote', '1')
 361             node.set('tail_remove', '1')
 362         # state of the parsing: flag when being in over-length content, depending on section content if defined (only when having protect_sections)
 363         if overlength:
 364             if not overlength_section_id or int(node.get('section_inner', overlength_section_count + 1)) > overlength_section_count:
 365                 node.set('in_overlength', '1')
 366                 node.set('tail_remove', '1')
 367
 368         # find quote in msoffice / hotmail / blockquote / text quote and signatures
 369         if root.get('msoffice') and node.tag == 'div' and 'border-top:solid' in node.get('style', ''):
 370             quote_begin = True
 371             node.set('in_quote', '1')
 372             node.set('tail_remove', '1')
 373         if root.get('hotmail') and node.tag == 'hr' and ('stopSpelling' in node.get('class', '') or 'stopSpelling' in node.get('id', '')):
 374             quote_begin = True
 375             node.set('in_quote', '1')
 376             node.set('tail_remove', '1')
 377         if node.tag == 'blockquote' or node.get('text_quote') or node.get('text_signature'):
 378             node.set('in_quote', '1')
 379
 380         # shorten:
 381         # if protect section:
 382         #   1/ find the first parent not being inside a section
 383         #   2/ add the read more link
 384         # else:
 385         #   1/ truncate the text at the next available space
 386         #   2/ create a 'read more' node, next to current node
 387         #   3/ add the truncated text in a new node, next to 'read more' node
 388         node_text = (node.text or '').strip().strip('\n').strip()
 389         if shorten and not overlength and cur_char_nbr + len(node_text) > max_length:
 390             node_to_truncate = node
 391             while node_to_truncate.getparent() is not None:
 392                 if node_to_truncate.get('in_quote'):
 393                     node_to_truncate = node_to_truncate.getparent()
 394                 elif protect_sections and (node_to_truncate.getparent().get('section_inner') or node_to_truncate.getparent().get('section_closure')):
 395                     node_to_truncate = node_to_truncate.getparent()
 396                     overlength_section_id = node_to_truncate.get('section_closure')
 397                 else:
 398                     break
 399
 400             overlength = True
 401             node_to_truncate.set('truncate', '1')
 402             if node_to_truncate == node:
 403                 node_to_truncate.set('truncate_position', str(max_length - cur_char_nbr))
 404             else:
 405                 node_to_truncate.set('truncate_position', str(len(node.text or '')))
 406         cur_char_nbr += len(node_text)
 407
 408     # Tree modification
 409     # ------------------------------------------------------------
 410
 411     for node in root.iter():
 412         if node.get('truncate'):
 413             _truncate_node(node, int(node.get('truncate_position', '0')), node.tag != 'pre')
 414
 415     # Post processing
 416     # ------------------------------------------------------------
 417
 418     to_remove = []
 419     for node in root.iter():
 420         if node.get('in_quote') or node.get('in_overlength'):
 421             # copy the node tail into parent text
 422             if node.tail and not node.get('tail_remove'):
 423                 parent = node.getparent()
 424                 parent.tail = node.tail + (parent.tail or '')
 425             to_remove.append(node)
 426         if node.get('tail_remove'):
 427             node.tail = ''
 428         # clean node
 429         for attribute_name in ['in_quote', 'tail_remove', 'in_overlength', 'msoffice', 'hotmail', 'truncate', 'truncate_position']:
 430             node.attrib.pop(attribute_name, None)
 431     for node in to_remove:
 432         if remove:
 433             node.getparent().remove(node)
 434         else:
 435             if not expand_options.get('oe_expand_a_class', 'oe_mail_expand') in node.get('class', ''):  # trick: read more link should be displayed even if it's in overlength
 436                 node_class = node.get('class', '') + ' oe_mail_cleaned'
 437                 node.set('class', node_class)
 438
 439     # html: \n that were tail of elements have been encapsulated into <span> -> back to \n
 440     html = etree.tostring(root, pretty_print=False)
 441     linebreaks = re.compile(r'<span[^>]*>([\s]*[\r\n]+[\s]*)<\/span>', re.IGNORECASE | re.DOTALL)
 442     html = _replace_matching_regex(linebreaks, html, '\n')
 443
 444     return html
 445
 446
 447 #----------------------------------------------------------
 448 # HTML/Text management
 449 #----------------------------------------------------------
 450
 451 def html2plaintext(html, body_id=None, encoding='utf-8'):
 452     """ From an HTML text, convert the HTML to plain text.
 453     If @param body_id is provided then this is the tag where the
 454     body (not necessarily <body>) starts.
 455     """
 456     ## (c) Fry-IT, www.fry-it.com, 2007
 457     ## <peter@fry-it.com>
 458     ## download here: http://www.peterbe.com/plog/html2plaintext
 459
 460     html = ustr(html)
 461     tree = etree.fromstring(html, parser=etree.HTMLParser())
 462
 463     if body_id is not None:
 464         source = tree.xpath('//*[@id=%s]' % (body_id,))
 465     else:
 466         source = tree.xpath('//body')
 467     if len(source):
 468         tree = source[0]
 469
 470     url_index = []
 471     i = 0
 472     for link in tree.findall('.//a'):
 473         url = link.get('href')
 474         if url:
 475             i += 1
 476             link.tag = 'span'
 477             link.text = '%s [%s]' % (link.text, i)
 478             url_index.append(url)
 479
 480     html = ustr(etree.tostring(tree, encoding=encoding))
 481     # \r char is converted into &#13;, must remove it
 482     html = html.replace('&#13;', '')
 483
 484     html = html.replace('<strong>', '*').replace('</strong>', '*')
 485     html = html.replace('<b>', '*').replace('</b>', '*')
 486     html = html.replace('<h3>', '*').replace('</h3>', '*')
 487     html = html.replace('<h2>', '**').replace('</h2>', '**')
 488     html = html.replace('<h1>', '**').replace('</h1>', '**')
 489     html = html.replace('<em>', '/').replace('</em>', '/')
 490     html = html.replace('<tr>', '\n')
 491     html = html.replace('</p>', '\n')
 492     html = re.sub('<br\s*/?>', '\n', html)
 493     html = re.sub('<.*?>', ' ', html)
 494     html = html.replace(' ' * 2, ' ')
 495
 496     # strip all lines
 497     html = '\n'.join([x.strip() for x in html.splitlines()])
 498     html = html.replace('\n' * 2, '\n')
 499
 500     for i, url in enumerate(url_index):
 501         if i == 0:
 502             html += '\n\n'
 503         html += ustr('[%s] %s\n') % (i + 1, url)
 504
 505     return html
 506
 507 def plaintext2html(text, container_tag=False):
 508     """ Convert plaintext into html. Content of the text is escaped to manage
 509         html entities, using cgi.escape().
 510         - all \n,\r are replaced by <br />
 511         - enclose content into <p>
 512         - 2 or more consecutive <br /> are considered as paragraph breaks
 513
 514         :param string container_tag: container of the html; by default the
 515             content is embedded into a <div>
 516     """
 517     text = cgi.escape(ustr(text))
 518
 519     # 1. replace \n and \r
 520     text = text.replace('\n', '<br/>')
 521     text = text.replace('\r', '<br/>')
 522
 523     # 2-3: form paragraphs
 524     idx = 0
 525     final = '<p>'
 526     br_tags = re.compile(r'(([<]\s*[bB][rR]\s*\/?[>]\s*){2,})')
 527     for item in re.finditer(br_tags, text):
 528         final += text[idx:item.start()] + '</p><p>'
 529         idx = item.end()
 530     final += text[idx:] + '</p>'
 531
 532     # 4. container
 533     if container_tag:
 534         final = '<%s>%s</%s>' % (container_tag, final, container_tag)
 535     return ustr(final)
 536
 537 def append_content_to_html(html, content, plaintext=True, preserve=False, container_tag=False):
 538     """ Append extra content at the end of an HTML snippet, trying
 539         to locate the end of the HTML document (</body>, </html>, or
 540         EOF), and converting the provided content in html unless ``plaintext``
 541         is False.
 542         Content conversion can be done in two ways:
 543         - wrapping it into a pre (preserve=True)
 544         - use plaintext2html (preserve=False, using container_tag to wrap the
 545             whole content)
 546         A side-effect of this method is to coerce all HTML tags to
 547         lowercase in ``html``, and strip enclosing <html> or <body> tags in
 548         content if ``plaintext`` is False.
 549
 550         :param str html: html tagsoup (doesn't have to be XHTML)
 551         :param str content: extra content to append
 552         :param bool plaintext: whether content is plaintext and should
 553             be wrapped in a <pre/> tag.
 554         :param bool preserve: if content is plaintext, wrap it into a <pre>
 555             instead of converting it into html
 556     """
 557     html = ustr(html)
 558     if plaintext and preserve:
 559         content = u'\n<pre>%s</pre>\n' % ustr(content)
 560     elif plaintext:
 561         content = '\n%s\n' % plaintext2html(content, container_tag)
 562     else:
 563         content = re.sub(r'(?i)(</?html.*>|</?body.*>|<!\W*DOCTYPE.*>)', '', content)
 564         content = u'\n%s\n' % ustr(content)
 565     # Force all tags to lowercase
 566     html = re.sub(r'(</?)\W*(\w+)([ >])',
 567         lambda m: '%s%s%s' % (m.group(1), m.group(2).lower(), m.group(3)), html)
 568     insert_location = html.find('</body>')
 569     if insert_location == -1:
 570         insert_location = html.find('</html>')
 571     if insert_location == -1:
 572         return '%s%s' % (html, content)
 573     return '%s%s%s' % (html[:insert_location], content, html[insert_location:])
 574
 575 #----------------------------------------------------------
 576 # Emails
 577 #----------------------------------------------------------
 578
 579 # matches any email in a body of text
 580 email_re = re.compile(r"""([a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6})""", re.VERBOSE)
 581
 582 # matches a string containing only one email
 583 single_email_re = re.compile(r"""^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}$""", re.VERBOSE)
 584
 585 res_re = re.compile(r"\[([0-9]+)\]", re.UNICODE)
 586 command_re = re.compile("^Set-([a-z]+) *: *(.+)$", re.I + re.UNICODE)
 587
 588 # Updated in 7.0 to match the model name as well
 589 # Typical form of references is <timestamp-openerp-record_id-model_name@domain>
 590 # group(1) = the record ID ; group(2) = the model (if any) ; group(3) = the domain
 591 reference_re = re.compile("<.*-open(?:object|erp)-(\\d+)(?:-([\w.]+))?.*@(.*)>", re.UNICODE)
 592
 593 # Bounce regex
 594 # Typical form of bounce is bounce-128-crm.lead-34@domain
 595 # group(1) = the mail ID; group(2) = the model (if any); group(3) = the record ID
 596 bounce_re = re.compile("[\w]+-(\d+)-?([\w.]+)?-?(\d+)?", re.UNICODE)
 597
 598 def generate_tracking_message_id(res_id):
 599     """Returns a string that can be used in the Message-ID RFC822 header field
 600
 601        Used to track the replies related to a given object thanks to the "In-Reply-To"
 602        or "References" fields that Mail User Agents will set.
 603     """
 604     try:
 605         rnd = random.SystemRandom().random()
 606     except NotImplementedError:
 607         rnd = random.random()
 608     rndstr = ("%.15f" % rnd)[2:]
 609     return "<%.15f.%s-openerp-%s@%s>" % (time.time(), rndstr, res_id, socket.gethostname())
 610
 611 def email_send(email_from, email_to, subject, body, email_cc=None, email_bcc=None, reply_to=False,
 612                attachments=None, message_id=None, references=None, openobject_id=False, debug=False, subtype='plain', headers=None,
 613                smtp_server=None, smtp_port=None, ssl=False, smtp_user=None, smtp_password=None, cr=None, uid=None):
 614     """Low-level function for sending an email (deprecated).
 615
 616     :deprecate: since OpenERP 6.1, please use ir.mail_server.send_email() instead.
 617     :param email_from: A string used to fill the `From` header, if falsy,
 618                        config['email_from'] is used instead.  Also used for
 619                        the `Reply-To` header if `reply_to` is not provided
 620     :param email_to: a sequence of addresses to send the mail to.
 621     """
 622
 623     # If not cr, get cr from current thread database
 624     local_cr = None
 625     if not cr:
 626         db_name = getattr(threading.currentThread(), 'dbname', None)
 627         if db_name:
 628             local_cr = cr = openerp.registry(db_name).db.cursor()
 629         else:
 630             raise Exception("No database cursor found, please pass one explicitly")
 631
 632     # Send Email
 633     try:
 634         mail_server_pool = openerp.registry(cr.dbname)['ir.mail_server']
 635         res = False
 636         # Pack Message into MIME Object
 637         email_msg = mail_server_pool.build_email(email_from, email_to, subject, body, email_cc, email_bcc, reply_to,
 638                    attachments, message_id, references, openobject_id, subtype, headers=headers)
 639
 640         res = mail_server_pool.send_email(cr, uid or 1, email_msg, mail_server_id=None,
 641                        smtp_server=smtp_server, smtp_port=smtp_port, smtp_user=smtp_user, smtp_password=smtp_password,
 642                        smtp_encryption=('ssl' if ssl else None), smtp_debug=debug)
 643     except Exception:
 644         _logger.exception("tools.email_send failed to deliver email")
 645         return False
 646     finally:
 647         if local_cr:
 648             cr.close()
 649     return res
 650
 651 def email_split(text):
 652     """ Return a list of the email addresses found in ``text`` """
 653     if not text:
 654         return []
 655     return [addr[1] for addr in getaddresses([text])
 656                 # getaddresses() returns '' when email parsing fails, and
 657                 # sometimes returns emails without at least '@'. The '@'
 658                 # is strictly required in RFC2822's `addr-spec`.
 659                 if addr[1]
 660                 if '@' in addr[1]]