[ADD] deduplication of website images being uploaded
authorXavier Morel <xmo@openerp.com>
Mon, 3 Mar 2014 15:38:55 +0000 (16:38 +0100)
committerXavier Morel <xmo@openerp.com>
Mon, 3 Mar 2014 15:38:55 +0000 (16:38 +0100)
There is a deduplication in ir.attachment, but it's only for FS-stored content
*and* it only deduplicates storage not models (as there are access rights
issues involved).

The goal here is to always return the same attachment when a user uploads the
exact same image multiple times (because it's simpler or whatever).

Initially tried to use a binary field & digest(), but search() blows up
because it tries to utf-8 encode raw binary data. So use char & hexdigest
instead.

_compute_checksum returns None if the provided attachment data does not look
like a website image attachment.

Unhandled: multiple existing matches, maybe a UNIQUE constraint on the
checksum field would be a good idea just in case.

bzr revid: xmo@openerp.com-20140303153855-5f2l8v0jq2mgb26f

addons/website/models/website.py

index 8f95a4a..4e39f29 100644 (file)
@@ -1,4 +1,5 @@
 # -*- coding: utf-8 -*-
+import hashlib
 import inspect
 import itertools
 import logging
@@ -562,10 +563,37 @@ class ir_attachment(osv.osv):
                     'max_height': 768,
                 })
         return result
+    def _datas_checksum(self, cr, uid, ids, name, arg, context=None):
+        return dict(
+            (attach['id'], self._compute_checksum(attach))
+            for attach in self.read(
+                cr, uid, ids, ['res_model', 'res_id', 'type', 'datas'],
+                context=context)
+        )
+
+    def _compute_checksum(self, attachment_dict):
+        if attachment_dict.get('res_model') == 'ir.ui.view'\
+                and not attachment_dict.get('res_id')\
+                and attachment_dict.get('type', 'binary') == 'binary'\
+                and attachment_dict.get('datas'):
+            return hashlib.new('sha1', attachment_dict['datas']).hexdigest()
+        return None
+
     _columns = {
+        'datas_checksum': fields.function(_datas_checksum, size=40,
+              string="Datas checksum", type='char', store=True, select=True),
         'website_url': fields.function(_website_url_get, string="Attachment URL", type='char')
     }
 
+    def create(self, cr, uid, values, context=None):
+        chk = self._compute_checksum(values)
+        if chk:
+            match = self.search(cr, uid, [('datas_checksum', '=', chk)], context=context)
+            if match:
+                return match[0]
+        return super(ir_attachment, self).create(
+            cr, uid, values, context=context)
+
 class res_partner(osv.osv):
     _inherit = "res.partner"