From ed8d1a9f2f26cd6a00c863f80b99450dfdbe7e89 Mon Sep 17 00:00:00 2001 From: Olivier Dony Date: Tue, 6 Sep 2011 19:17:46 +0200 Subject: [PATCH] [IMP] ir.mail_server: improve handling of non-ASCII in headers bzr revid: odo@openerp.com-20110906171746-nnjk6ic0bdlqltcu --- openerp/addons/base/ir/ir_mail_server.py | 110 +++++++++++++++++++++++------- 1 file changed, 84 insertions(+), 26 deletions(-) diff --git a/openerp/addons/base/ir/ir_mail_server.py b/openerp/addons/base/ir/ir_mail_server.py index 9b11470..0701412 100644 --- a/openerp/addons/base/ir/ir_mail_server.py +++ b/openerp/addons/base/ir/ir_mail_server.py @@ -26,6 +26,7 @@ from email.Header import Header from email.Utils import formatdate, make_msgid, COMMASPACE from email import Encoders import logging +import re import smtplib from osv import osv @@ -55,6 +56,21 @@ class WriteToLogger(object): def write(self, s): self.logger.log(self.level, s) + +def try_coerce_ascii(string_utf8): + """Attempts to decode the given utf8-encoded string + as ASCII after coercing it to UTF-8, then return + the confirmed 7-bit ASCII string. + + If the process fails (because the string + contains non-ASCII characters) returns ``None``. + """ + try: + string_utf8.decode('ascii') + except UnicodeDecodeError: + return + return string_utf8 + def encode_header(header_text): """Returns an appropriate representation of the given header value, suitable for direct assignment as a header value in an @@ -65,29 +81,63 @@ def encode_header(header_text): :param header_text: unicode or utf-8 encoded string with header value :rtype: string | email.header.Header :return: if ``header_text`` represents a plain ASCII string, - return a 7-bit string, otherwise returns an email.header.Header + return the same 7-bit string, otherwise returns an email.header.Header that will perform the appropriate RFC2047 encoding of non-ASCII values. """ if not header_text: return "" - - # convert anything to utf-8, suitable for - # testing ASCIIness, as 7-bit chars are + # convert anything to utf-8, suitable for testing ASCIIness, as 7-bit chars are # encoded as ASCII in utf-8 header_text_utf8 = tools.ustr(header_text).encode('utf-8') + header_text_ascii = try_coerce_ascii(header_text_utf8) + # if this header contains non-ASCII characters, + # we'll need to wrap it up in a message.header.Header + # that will take care of RFC2047-encoding it as + # 7-bit string. + return header_text_ascii if header_text_ascii\ + else Header(header_text_utf8, 'utf-8') + +name_with_email_pattern = re.compile(r'("[^<@>]+")\s*<([^ ,<@]+@[^> ,]+)>') +address_pattern = re.compile(r'([^ ,<@]+@[^> ,]+)') + +def extract_rfc2822_addresses(text): + """Returns a list of valid RFC2822 addresses + that can be found in ``source``, ignoring + malformed ones and non-ASCII ones. + """ + if not text: return [] + candidates = address_pattern.findall(tools.ustr(text).encode('utf-8')) + return filter(try_coerce_ascii, candidates) + +def encode_rfc2822_address_header(header_text): + """If ``header_text`` contains non-ASCII characters, + attempts to locate patterns of the form + ``"Name" `` and replace the + ``"Name"`` portion by the RFC2047-encoded + version, preserving the address part untouched. + """ + header_text_utf8 = tools.ustr(header_text).encode('utf-8') + header_text_ascii = try_coerce_ascii(header_text_utf8) + if header_text_ascii: + return header_text_ascii + # non-ASCII characters are present, attempt to + # replace all "Name" patterns with the RFC2047- + # encoded version + def replace(match_obj): + name, email = match_obj.group(1), match_obj.group(2) + name_encoded = str(Header(name, 'utf-8')) + return "%s <%s>" % (name_encoded, email) + header_text_utf8 = name_with_email_pattern.sub(replace, + header_text_utf8) + # try again after encoding + header_text_ascii = try_coerce_ascii(header_text_utf8) + if header_text_ascii: + return header_text_ascii + # fallback to extracting pure addresses only, which could + # still cause a failure downstream if the actual addresses + # contain non-ASCII characters + return COMMASPACE.join(extract_rfc2822_addresses(header_text_utf8)) - # check for non-ascii content in the header value - try: - header_text_utf8.decode('ascii') - # was plain ascii, we can use it verbatim as - # a header - return header_text_utf8 - except UnicodeDecodeError: - # this header contains non-ASCII characters, so - # we need to wrap it up in a message.header.Header - # that will take care of RFC2047-encoding it as - # 7-bit string. - return Header(header_text_utf8, 'utf-8') class ir_mail_server(osv.osv): """Represents an SMTP server, able to send outgoing e-mails, with SSL and TLS capabilities.""" @@ -190,7 +240,7 @@ class ir_mail_server(osv.osv): """Constructs an RFC2822 email.message.Message object based on the keyword arguments passed, and returns it. :param string email_from: sender email address - :param list email_from: list of recipient addresses (to be joined with commas) + :param list email_to: list of recipient addresses (to be joined with commas) :param string subject: email subject (no pre-encoding/quoting necessary) :param string body: email body, according to the ``subtype`` (by default, plaintext). If html subtype is used, the message will be automatically converted @@ -238,17 +288,17 @@ class ir_mail_server(osv.osv): if references: msg['references'] = encode_header(references) msg['Subject'] = encode_header(subject) - msg['From'] = encode_header(email_from) + msg['From'] = encode_rfc2822_address_header(email_from) del msg['Reply-To'] if reply_to: - msg['Reply-To'] = encode_header(reply_to) + msg['Reply-To'] = encode_rfc2822_address_header(reply_to) else: msg['Reply-To'] = msg['From'] - msg['To'] = encode_header(COMMASPACE.join(email_to)) + msg['To'] = encode_rfc2822_address_header(COMMASPACE.join(email_to)) if email_cc: - msg['Cc'] = encode_header(COMMASPACE.join(email_cc)) + msg['Cc'] = encode_rfc2822_address_header(COMMASPACE.join(email_cc)) if email_bcc: - msg['Bcc'] = encode_header(COMMASPACE.join(email_bcc)) + msg['Bcc'] = encode_rfc2822_address_header(COMMASPACE.join(email_bcc)) msg['Date'] = formatdate(localtime=True) # Custom headers may override normal headers or provide additional ones for key, value in headers.iteritems(): @@ -291,7 +341,9 @@ class ir_mail_server(osv.osv): If both mail_server_id and smtp_server are None, look for an 'smtp_server' value in server config, and fails if not found. - :param message: the email.message.Message to send + :param message: the email.message.Message to send. The envelope sender will be extracted from the + ``Return-Path`` or ``From`` headers. The envelope recipients will be + extracted from the combined list of ``To``, ``CC`` and ``BCC`` headers. :param mail_server_id: optional id of ir.mail_server to use for sending. overrides other smtp_* arguments. :param smtp_server: optional hostname of SMTP server to use :param smtp_encryption: one of 'none', 'starttls' or 'ssl' (see ir.mail_server fields for explanation) @@ -303,12 +355,18 @@ class ir_mail_server(osv.osv): :return: the Message-ID of the message that was just sent, if successfully sent, otherwise raises MailDeliveryException and logs root cause. """ - smtp_from = message['From'] - assert smtp_from, "The From header is required in any outbound e-mail" + smtp_from = message['Return-Path'] or message['From'] + assert smtp_from, "The Return-Path or From header is required for any outbound e-mail" + + # The email's "Envelope From" (Return-Path), and all recipient addresses must only contain ASCII characters. + from_rfc2822 = extract_rfc2822_addresses(smtp_from) + assert len(from_rfc2822) == 1, "Malformed 'Return-Path' or 'From' address - it may only contain plain ASCII characters" + smtp_from = from_rfc2822[0] email_to = message['To'] email_cc = message['Cc'] email_bcc = message['Bcc'] - smtp_to_list = filter(None,tools.flatten([email_to, email_cc, email_bcc])) + smtp_to_list = filter(None, tools.flatten(map(extract_rfc2822_addresses,[email_to, email_cc, email_bcc]))) + assert smtp_to_list, "At least one valid recipient address should be specified for outgoing emails (To/Cc/Bcc)" # Get SMTP Server Details from Mail Server mail_server = None -- 1.7.10.4