addons/document/document_storage.py

   1 # -*- encoding: utf-8 -*-
   2 ##############################################################################
   3 #
   4 #    OpenERP, Open Source Management Solution
   5 #
   6 #    Copyright (C) P. Christeas, 2009, all rights reserved
   7 #
   8 #    This program is free software: you can redistribute it and/or modify
   9 #    it under the terms of the GNU General Public License as published by
  10 #    the Free Software Foundation, either version 3 of the License, or
  11 #    (at your option) any later version.
  12 #
  13 #    This program is distributed in the hope that it will be useful,
  14 #    but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 #    GNU General Public License for more details.
  17 #
  18 #    You should have received a copy of the GNU General Public License
  19 #    along with this program.  If not, see <http://www.gnu.org/licenses/>.
  20 #
  21 ##############################################################################
  22
  23 from osv import osv, fields
  24 import os
  25 import tools
  26 import base64
  27 from tools.misc import ustr
  28 from tools.translate import _
  29
  30 from osv.orm import except_orm
  31
  32 import random
  33 import string
  34 import netsvc
  35 from content_index import cntIndex
  36
  37 DMS_ROOT_PATH = tools.config.get('document_path', os.path.join(tools.config.get('root_path'), 'filestore'))
  38
  39
  40 """ The algorithm of data storage
  41
  42 We have to consider 3 cases of data /retrieval/:
  43  Given (context,path) we need to access the file (aka. node).
  44  given (directory, context), we need one of its children (for listings, views)
  45  given (ir.attachment, context), we needs its data and metadata (node).
  46
  47 For data /storage/ we have the cases:
  48  Have (ir.attachment, context), we modify the file (save, update, rename etc).
  49  Have (directory, context), we create a file.
  50  Have (path, context), we create or modify a file.
  51
  52 Note that in all above cases, we don't explicitly choose the storage media,
  53 but always require a context to be present.
  54
  55 Note that a node will not always have a corresponding ir.attachment. Dynamic
  56 nodes, for once, won't. Their metadata will be computed by the parent storage
  57 media + directory.
  58
  59 The algorithm says that in any of the above cases, our first goal is to locate
  60 the node for any combination of search criteria. It would be wise NOT to
  61 represent each node in the path (like node[/] + node[/dir1] + node[/dir1/dir2])
  62 but directly jump to the end node (like node[/dir1/dir2]) whenever possible.
  63
  64 We also contain all the parenting loop code in one function. This is intentional,
  65 because one day this will be optimized in the db (Pg 8.4).
  66
  67
  68 """
  69
  70 def random_name():
  71     random.seed()
  72     d = [random.choice(string.ascii_letters) for x in xrange(10) ]
  73     name = "".join(d)
  74     return name
  75
  76 INVALID_CHARS = {'*':str(hash('*')), '|':str(hash('|')) , "\\":str(hash("\\")), '/':'__', ':':str(hash(':')), '"':str(hash('"')), '<':str(hash('<')) , '>':str(hash('>')) , '?':str(hash('?'))}
  77
  78
  79 def create_directory(path):
  80     dir_name = random_name()
  81     path = os.path.join(path, dir_name)
  82     os.makedirs(path)
  83     return dir_name
  84
  85
  86 class document_storage(osv.osv):
  87     """ The primary object for data storage.
  88     Each instance of this object is a storage media, in which our application
  89     can store contents. The object here controls the behaviour of the storage
  90     media.
  91     The referring document.directory-ies will control the placement of data
  92     into the storage.
  93
  94     It is a bad idea to have multiple document.storage objects pointing to
  95     the same tree of filesystem storage.
  96     """
  97     _name = 'document.storage'
  98     _description = 'Storage Media'
  99     _log_create=True
 100
 101     _columns = {
 102         'name': fields.char('Name', size=64, required=True, select=1),
 103         'write_date': fields.datetime('Date Modified', readonly=True),
 104         'write_uid':  fields.many2one('res.users', 'Last Modification User', readonly=True),
 105         'create_date': fields.datetime('Date Created', readonly=True),
 106         'create_uid':  fields.many2one('res.users', 'Creator', readonly=True),
 107         'user_id': fields.many2one('res.users', 'Owner'),
 108         'group_ids': fields.many2many('res.groups', 'document_storage_group_rel', 'item_id', 'group_id', 'Groups'),
 109         'dir_ids': fields.one2many('document.directory', 'parent_id', 'Directories'),
 110         'type': fields.selection([('db', 'Database'), ('filestore', 'Internal File storage'),
 111             ('realstore', 'External file storage'), ('virtual', 'Virtual storage')], 'Type', required=True),
 112         'path': fields.char('Path', size=250, select=1, help="For file storage, the root path of the storage"),
 113         'online': fields.boolean('Online', help="If not checked, media is currently offline and its contents not available", required=True),
 114         'readonly': fields.boolean('Read Only', help="If set, media is for reading only"),
 115     }
 116
 117     def _get_rootpath(self, cr, uid, context=None):
 118         return os.path.join(DMS_ROOT_PATH, cr.dbname)
 119
 120     _defaults = {
 121         'user_id': lambda self, cr, uid, ctx: uid,
 122         'online': lambda *args: True,
 123         'readonly': lambda *args: False,
 124         # Note: the defaults below should only be used ONCE for the default
 125         # storage media. All other times, we should create different paths at least.
 126         'type': lambda *args: 'filestore',
 127         'path': _get_rootpath,
 128     }
 129     _sql_constraints = [
 130         # SQL note: a path = NULL doesn't have to be unique.
 131         ('path_uniq', 'UNIQUE(type,path)', "The storage path must be unique!")
 132         ]
 133
 134     def get_data(self, cr, uid, id, file_node, context=None, fil_obj=None):
 135         """ retrieve the contents of some file_node having storage_id = id
 136             optionally, fil_obj could point to the browse object of the file
 137             (ir.attachment)
 138         """
 139         if not context:
 140             context = {}
 141         boo = self.browse(cr, uid, id, context)
 142         if fil_obj:
 143             ira = fil_obj
 144         else:
 145             ira = self.pool.get('ir.attachment').browse(cr, uid, file_node.file_id, context=context)
 146         return self.__get_data_3(cr, uid, boo, ira, context)
 147
 148     def __get_data_3(self, cr, uid, boo, ira, context):
 149         if not boo.online:
 150             raise RuntimeError('media offline')
 151         if boo.type == 'filestore':
 152             if not ira.store_fname:
 153                 # On a migrated db, some files may have the wrong storage type
 154                 # try to fix their directory.
 155                 if ira.file_size:
 156                     netsvc.Logger().notifyChannel('document', netsvc.LOG_WARNING, "ir.attachment #%d does not have a filename, but is at filestore, fix it!" % ira.id)
 157                 return None
 158             fpath = os.path.join(boo.path, ira.store_fname)
 159             return file(fpath, 'rb').read()
 160         elif boo.type == 'db':
 161             # TODO: we need a better api for large files
 162             if ira.db_datas:
 163                 out = base64.decodestring(ira.db_datas)
 164             else:
 165                 out = ''
 166             return out
 167         elif boo.type == 'realstore':
 168             if not ira.store_fname:
 169                 # On a migrated db, some files may have the wrong storage type
 170                 # try to fix their directory.
 171                 if ira.file_size:
 172                     netsvc.Logger().notifyChannel('document',netsvc.LOG_WARNING,"ir.attachment #%d does not have a filename, trying the name." %ira.id)
 173                 sfname = ira.name
 174             fpath = os.path.join(boo.path,ira.store_fname or ira.name)
 175             if os.path.exists(fpath):
 176                 return file(fpath,'rb').read()
 177             elif not ira.store_fname:
 178                 return None
 179             else:
 180                 raise IOError("File not found: %s" % fpath)
 181         else:
 182             raise TypeError("No %s storage" % boo.type)
 183
 184     def set_data(self, cr, uid, id, file_node, data, context=None, fil_obj=None):
 185         """ store the data.
 186             This function MUST be used from an ir.attachment. It wouldn't make sense
 187             to store things persistently for other types (dynamic).
 188         """
 189         if not context:
 190             context = {}
 191         boo = self.browse(cr, uid, id, context)
 192         logger = netsvc.Logger()
 193         if fil_obj:
 194             ira = fil_obj
 195         else:
 196             ira = self.pool.get('ir.attachment').browse(cr, uid, file_node.file_id, context=context)
 197
 198         if not boo.online:
 199             raise RuntimeError('media offline')
 200         logger.notifyChannel('document', netsvc.LOG_DEBUG, "Store data for ir.attachment #%d" % ira.id)
 201         store_fname = None
 202         fname = None
 203         if boo.type == 'filestore':
 204             path = boo.path
 205             try:
 206                 flag = None
 207                 # This can be improved
 208                 if os.path.isdir(path):
 209                     for dirs in os.listdir(path):
 210                         if os.path.isdir(os.path.join(path, dirs)) and len(os.listdir(os.path.join(path, dirs))) < 4000:
 211                             flag = dirs
 212                             break
 213                 flag = flag or create_directory(path)
 214                 filename = random_name()
 215                 fname = os.path.join(path, flag, filename)
 216                 fp = file(fname, 'wb')
 217                 fp.write(data)
 218                 fp.close()
 219                 logger.notifyChannel('document', netsvc.LOG_DEBUG, "Saved data to %s" % fname)
 220                 filesize = len(data) # os.stat(fname).st_size
 221                 store_fname = os.path.join(flag, filename)
 222
 223                 # TODO Here, an old file would be left hanging.
 224
 225             except Exception, e :
 226                 netsvc.Logger().notifyChannel('document', netsvc.LOG_WARNING, "Couldn't save data: %s" % str(e))
 227                 raise except_orm(_('Error!'), str(e))
 228         elif boo.type == 'db':
 229             filesize = len(data)
 230             # will that work for huge data? TODO
 231             out = base64.encodestring(data)
 232             cr.execute('UPDATE ir_attachment SET db_datas = %s WHERE id = %s',
 233                 (out, file_node.file_id))
 234         elif boo.type == 'realstore':
 235             try:
 236                 file_node.fix_ppath(cr, ira)
 237                 npath = file_node.full_path() or []
 238                 # npath may contain empty elements, for root directory etc.
 239                 for i, n in enumerate(npath):
 240                     if n == None:
 241                         del npath[i]
 242                 for n in npath:
 243                     for ch in ('*', '|', "\\", '/', ':', '"', '<', '>', '?', '..'):
 244                         if ch in n:
 245                             raise ValueError("Invalid char %s in path %s" %(ch, n))
 246                 dpath = [boo.path,]
 247                 dpath += npath[:-1]
 248                 path = os.path.join(*dpath)
 249                 if not os.path.isdir(path):
 250                     os.makedirs(path)
 251                 fname = os.path.join(path, npath[-1])
 252                 fp = file(fname,'wb')
 253                 fp.write(data)
 254                 fp.close()
 255                 logger.notifyChannel('document',netsvc.LOG_DEBUG,"Saved data to %s" % fname)
 256                 filesize = len(data) # os.stat(fname).st_size
 257                 store_fname = os.path.join(*npath)
 258                 # TODO Here, an old file would be left hanging.
 259             except Exception,e :
 260                 import traceback
 261                 traceback.print_exc()
 262                 netsvc.Logger().notifyChannel('document',netsvc.LOG_WARNING,"Couldn't save data: %s" % e)
 263                 raise except_orm(_('Error!'), str(e))
 264         else:
 265             raise TypeError("No %s storage" % boo.type)
 266
 267         # 2nd phase: store the metadata
 268         try:
 269             icont = ''
 270             mime = ira.file_type
 271             if not mime:
 272                 mime = ""
 273             try:
 274                 mime, icont = cntIndex.doIndex(data, ira.datas_fname,
 275                 ira.file_type or None, fname)
 276             except Exception, e:
 277                 logger.notifyChannel('document', netsvc.LOG_DEBUG, 'Cannot index file: %s' % str(e))
 278                 pass
 279
 280             # a hack: /assume/ that the calling write operation will not try
 281             # to write the fname and size, and update them in the db concurrently.
 282             # We cannot use a write() here, because we are already in one.
 283             cr.execute('UPDATE ir_attachment SET store_fname = %s, file_size = %s, index_content = %s, file_type = %s WHERE id = %s',
 284                 (store_fname, filesize, ustr(icont), mime, file_node.file_id))
 285             file_node.content_length = filesize
 286             file_node.content_type = mime
 287             return True
 288         except Exception, e :
 289             netsvc.Logger().notifyChannel('document', netsvc.LOG_WARNING, "Couldn't save data: %s" % str(e))
 290             # should we really rollback once we have written the actual data?
 291             # at the db case (only), that rollback would be safe
 292             raise except_orm(_('Error at doc write!'), str(e))
 293
 294     def prepare_unlink(self, cr, uid, storage_bo, fil_bo):
 295         """ Before we unlink a file (fil_boo), prepare the list of real
 296         files that have to be removed, too. """
 297
 298         if not storage_bo.online:
 299             raise RuntimeError('media offline')
 300
 301         if storage_bo.type == 'filestore':
 302             fname = fil_bo.store_fname
 303             if not fname:
 304                 return None
 305             path = storage_bo.path
 306             return (storage_bo.id, 'file', os.path.join(path, fname))
 307         elif storage_bo.type == 'db':
 308             return None
 309         elif storage_bo.type == 'realstore':
 310             fname = fil_bo.store_fname
 311             if not fname:
 312                 return None
 313             path = storage_bo.path
 314             return ( storage_bo.id, 'file', os.path.join(path,fname))
 315         else:
 316             raise TypeError("No %s storage" % boo.type)
 317
 318     def do_unlink(self, cr, uid, unres):
 319         for id, ktype, fname in unres:
 320             if ktype == 'file':
 321                 try:
 322                     os.unlink(fname)
 323                 except Exception, e:
 324                     netsvc.Logger().notifyChannel('document', netsvc.LOG_WARNING, "Could not remove file %s, please remove manually." % fname)
 325             else:
 326                 netsvc.Logger().notifyChannel('document', netsvc.LOG_WARNING, "Unknown unlink key %s" % ktype)
 327
 328         return True
 329
 330
 331 document_storage()
 332
 333
 334 #eof