[IMP] mail_gateway: another round of encoding handling improvements. WARNING: this...
authorOlivier Dony <odo@openerp.com>
Fri, 2 Jul 2010 13:44:45 +0000 (15:44 +0200)
committerOlivier Dony <odo@openerp.com>
Fri, 2 Jul 2010 13:44:45 +0000 (15:44 +0200)
bzr revid: odo@openerp.com-20100702134445-eoeu987j6smszk02

addons/mail_gateway/mail_gateway.py
addons/mail_gateway/scripts/openerp_mailgate/openerp_mailgate.py

index 087403b..e893e36 100644 (file)
@@ -29,6 +29,7 @@ import base64
 import re
 from tools.translate import _
 import logging
+import xmlrpclib
 
 _logger = logging.getLogger('mailgate')
 
@@ -173,21 +174,11 @@ class mailgate_tool(osv.osv_memory):
     _name = 'email.server.tools'
     _description = "Email Server Tools"
 
-    def _to_decode(self, s, charsets):
-        if not s:
-            return s
-        for charset in charsets:
-            if charset:
-                try:
-                    return s.decode(charset)
-                except UnicodeError:
-                    pass
-        return s.decode('latin1')
-
     def _decode_header(self, text):
+        """Returns unicode() string conversion of the the given encoded smtp header"""
         if text:
-            text = decode_header(text.replace('\r', '')) 
-        return ''.join(map(lambda x:self._to_decode(x[0], [x[1]]), text or []))
+            text = decode_header(text.replace('\r', ''))
+            return ''.join([tools.ustr(x[0], x[1]) for x in text])
 
     def to_email(self,text):
         return re.findall(r'([^ ,<@]+@[^> ,]+)',text)
@@ -219,7 +210,7 @@ class mailgate_tool(osv.osv_memory):
                         'message_id': msg.get('message-id'), 
                         'references': msg.get('references'), 
                         'res_id': res_id,
-                        'user_id': uid, 
+                        'user_id': uid,
                         'attachment_ids': [(6, 0, attach)]
             }
             msg_pool.create(cr, uid, msg_data, context=context)
@@ -236,9 +227,10 @@ class mailgate_tool(osv.osv_memory):
         for res in model_pool.browse(cr, uid, res_ids, context=context):
             message_followers = model_pool.message_followers(cr, uid, [res.id])[res.id]
             message_followers_emails = self.to_email(','.join(message_followers))
-            message_recipients = self.to_email(','.join([self._decode_header(msg['from']),
+            message_recipients = self.to_email(','.join(filter(None,
+                                                         [self._decode_header(msg['from']),
                                                          self._decode_header(msg['to']),
-                                                         self._decode_header(msg['cc'])]))
+                                                         self._decode_header(msg['cc'])])))
             message_forward = [i for i in message_followers_emails if (i and (i not in message_recipients))]
 
             if message_forward:
@@ -261,13 +253,22 @@ class mailgate_tool(osv.osv_memory):
         @param cr: the current row, from the database cursor,
         @param uid: the current user’s ID for security checks,
         @param model: OpenObject Model
-        @param message: Email details
+        @param message: Email details, passed as a string or an xmlrpclib.Binary
         @param attach: Email attachments
         @param context: A standard dictionary for contextual values"""
-        model_pool = self.pool.get(model)
+
+        # extract message bytes, we are forced to pass the message as binary because
+        # we don't know its encoding until we parse its headers and hence can't
+        # convert it to utf-8 for transport between the mailgate script and here.
+        if isinstance(message, xmlrpclib.Binary):
+            message = str(message.data)
+
         if not context:
             context = {}
+
+        model_pool = self.pool.get(model)
         res_id = False
+
         # Create New Record into particular model
         def create_record(msg):
             att_ids = []
@@ -301,14 +302,16 @@ class mailgate_tool(osv.osv_memory):
 
         # Warning: message_from_string doesn't always work correctly on unicode,
         # we must use utf-8 strings here :-(
-        msg_txt = email.message_from_string(tools.ustr(message).encode('utf-8'))
-        message_id = msg_txt.get('Message-ID', False)
+        if isinstance(message, unicode):
+            message = message.encode('utf-8')
+        msg_txt = email.message_from_string(message)
+        message_id = msg_txt.get('message-id', False)
         msg = {}
 
         if not message_id:
             # Very unusual situation, be we should be fault-tolerant here
             message_id = time.time()
-            msg_txt['Message-ID'] = message_id
+            msg_txt['message-id'] = message_id
             _logger.info('Message without message-id, generating a random one: %s', message_id)
 
         fields = msg_txt.keys()
@@ -327,14 +330,14 @@ class mailgate_tool(osv.osv_memory):
         if 'Delivered-To' in fields:
             msg['to'] = self._decode_header(msg_txt.get('Delivered-To'))
 
-        if 'Cc' in fields:
-            msg['cc'] = self._decode_header(msg_txt.get('Cc'))
+        if 'CC' in fields:
+            msg['cc'] = self._decode_header(msg_txt.get('CC'))
 
-        if 'Reply-To' in fields:
+        if 'Reply-to' in fields:
             msg['reply'] = self._decode_header(msg_txt.get('Reply-To'))
 
         if 'Date' in fields:
-            msg['date'] = msg_txt.get('Date')
+            msg['date'] = self._decode_header(msg_txt.get('Date'))
 
         if 'Content-Transfer-Encoding' in fields:
             msg['encoding'] = msg_txt.get('Content-Transfer-Encoding')
@@ -343,15 +346,15 @@ class mailgate_tool(osv.osv_memory):
             msg['references'] = msg_txt.get('References')
 
         if 'X-Priority' in fields:
-            msg['priority'] = msg_txt.get('X-priority', '3 (Normal)').split(' ')[0]
+            msg['priority'] = msg_txt.get('X-Priority', '3 (Normal)').split(' ')[0]
 
-        if not msg_txt.is_multipart() or 'text/plain' in msg.get('content-type', ''):
+        if not msg_txt.is_multipart() or 'text/plain' in msg.get('Content-Type', ''):
             encoding = msg_txt.get_content_charset()
-            msg['body'] = msg_txt.get_payload(decode=True)
-            if encoding:
-                msg['body'] = tools.ustr(msg['body'])
+            body = msg_txt.get_payload(decode=True)
+            msg['body'] = tools.ustr(body, encoding)
 
         attachments = {}
+        has_plain_text = False
         if msg_txt.is_multipart() or 'multipart/alternative' in msg.get('content-type', ''):
             body = ""
             for part in msg_txt.walk():
@@ -359,26 +362,28 @@ class mailgate_tool(osv.osv_memory):
                     continue
 
                 encoding = part.get_content_charset()
-
+                filename = part.get_filename()
                 if part.get_content_maintype()=='text':
                     content = part.get_payload(decode=True)
-                    filename = part.get_filename()
-                    if filename :
+                    if filename:
                         attachments[filename] = content
-                    else:
-                        if encoding:
-                            content = unicode(content, encoding)
+                    elif not has_plain_text:
+                        # main content parts should have 'text' maintype
+                        # and no filename. we ignore the html part if
+                        # there is already a plaintext part without filename,
+                        # because presumably these are alternatives.
+                        content = tools.ustr(content, encoding)
                         if part.get_content_subtype() == 'html':
                             body = tools.ustr(tools.html2plaintext(content))
                         elif part.get_content_subtype() == 'plain':
                             body = content
-                elif part.get_content_maintype() in ('application', 'image', 'text'):
-                    filename = part.get_filename();
+                            has_plain_text = True
+                elif part.get_content_maintype() in ('application', 'image'):
                     if filename :
                         attachments[filename] = part.get_payload(decode=True)
                     else:
                         res = part.get_payload(decode=True)
-                        body += tools.ustr(res)
+                        body += tools.ustr(res, encoding)
 
             msg['body'] = body
             msg['attachments'] = attachments
@@ -402,20 +407,22 @@ class mailgate_tool(osv.osv_memory):
                         res_id = res_id.group(1)
                 if res_id:
                     res_id = int(res_id)
-                    res_ids.append(res_id)
                     model_pool = self.pool.get(model)
-
-                    vals = {}
-                    if hasattr(model_pool, 'message_update'):
-                        model_pool.message_update(cr, uid, [res_id], vals, msg, context=context)
+                    if model_pool.exists(cr, uid, res_id):
+                        res_ids.append(res_id)
+                        if hasattr(model_pool, 'message_update'):
+                            model_pool.message_update(cr, uid, [res_id], {}, msg, context=context)
+                        else:
+                            raise NotImplementedError('model %s does not support updating records, mailgate API method message_update() is missing'%model)
 
         if not len(res_ids):
             new_res_id, attachment_ids = create_record(msg)
             res_ids = [new_res_id]
+
         # Store messages
         context.update({'model' : model})
         if hasattr(model_pool, '_history'):
-            model_pool._history(cr, uid, res_ids, _('Receive'), history=True,
+            model_pool._history(cr, uid, res_ids, _('receive'), history=True,
                             subject = msg.get('subject'),
                             email = msg.get('to'),
                             details = msg.get('body'),
index 166e1e8..affd03e 100755 (executable)
@@ -51,7 +51,9 @@ class email_parser(object):
 
     def parse(self, message):
         try:
-            res_id = self.rpc('email.server.tools', 'process_email', self.model, message)
+            # pass message as bytes because we don't know its encoding until we parse its headers
+            # and hence can't convert it to utf-8 for transport
+            res_id = self.rpc('email.server.tools', 'process_email', self.model, xmlrpclib.Binary(message))
         except Exception, e:
             logger = logging.getLogger('mail-gateway')
             logger.warning('Failed to process incoming email. Source of the failed mail is available at debug level.', exc_info=True)