+ header_text_ascii = try_coerce_ascii(header_text_utf8)
+ # if this header contains non-ASCII characters,
+ # we'll need to wrap it up in a message.header.Header
+ # that will take care of RFC2047-encoding it as
+ # 7-bit string.
+ return header_text_ascii if header_text_ascii\
+ else Header(header_text_utf8, 'utf-8')
+
+name_with_email_pattern = re.compile(r'("[^<@>]+")\s*<([^ ,<@]+@[^> ,]+)>')
+address_pattern = re.compile(r'([^ ,<@]+@[^> ,]+)')
+
+def extract_rfc2822_addresses(text):
+ """Returns a list of valid RFC2822 addresses
+ that can be found in ``source``, ignoring
+ malformed ones and non-ASCII ones.
+ """
+ if not text: return []
+ candidates = address_pattern.findall(tools.ustr(text).encode('utf-8'))
+ return filter(try_coerce_ascii, candidates)
+
+def encode_rfc2822_address_header(header_text):
+ """If ``header_text`` contains non-ASCII characters,
+ attempts to locate patterns of the form
+ ``"Name" <address@domain>`` and replace the
+ ``"Name"`` portion by the RFC2047-encoded
+ version, preserving the address part untouched.
+ """
+ header_text_utf8 = tools.ustr(header_text).encode('utf-8')
+ header_text_ascii = try_coerce_ascii(header_text_utf8)
+ if header_text_ascii:
+ return header_text_ascii
+ # non-ASCII characters are present, attempt to
+ # replace all "Name" patterns with the RFC2047-
+ # encoded version
+ def replace(match_obj):
+ name, email = match_obj.group(1), match_obj.group(2)
+ name_encoded = str(Header(name, 'utf-8'))
+ return "%s <%s>" % (name_encoded, email)
+ header_text_utf8 = name_with_email_pattern.sub(replace,
+ header_text_utf8)
+ # try again after encoding
+ header_text_ascii = try_coerce_ascii(header_text_utf8)
+ if header_text_ascii:
+ return header_text_ascii
+ # fallback to extracting pure addresses only, which could
+ # still cause a failure downstream if the actual addresses
+ # contain non-ASCII characters
+ return COMMASPACE.join(extract_rfc2822_addresses(header_text_utf8))