addons/document/document_storage.py

   1 # -*- encoding: utf-8 -*-
   2 ##############################################################################
   3 #
   4 #    OpenERP, Open Source Management Solution
   5 #
   6 #    Copyright (C) P. Christeas, 2009, all rights reserved
   7 #
   8 #    This program is free software: you can redistribute it and/or modify
   9 #    it under the terms of the GNU General Public License as published by
  10 #    the Free Software Foundation, either version 3 of the License, or
  11 #    (at your option) any later version.
  12 #
  13 #    This program is distributed in the hope that it will be useful,
  14 #    but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 #    GNU General Public License for more details.
  17 #
  18 #    You should have received a copy of the GNU General Public License
  19 #    along with this program.  If not, see <http://www.gnu.org/licenses/>.
  20 #
  21 ##############################################################################
  22
  23 from osv import osv, fields
  24 import os
  25 import tools
  26 import base64
  27 from tools.misc import ustr
  28 from tools.translate import _
  29
  30 from osv.orm import except_orm
  31
  32 import random
  33 import string
  34 import netsvc
  35 from content_index import cntIndex
  36
  37 DMS_ROOT_PATH = tools.config.get('document_path', os.path.join(tools.config.get('root_path'), 'filestore'))
  38
  39
  40 """ The algorithm of data storage
  41
  42 We have to consider 3 cases of data /retrieval/:
  43  Given (context,path) we need to access the file (aka. node).
  44  given (directory, context), we need one of its children (for listings, views)
  45  given (ir.attachment, context), we needs its data and metadata (node).
  46
  47 For data /storage/ we have the cases:
  48  Have (ir.attachment, context), we modify the file (save, update, rename etc).
  49  Have (directory, context), we create a file.
  50  Have (path, context), we create or modify a file.
  51
  52 Note that in all above cases, we don't explicitly choose the storage media,
  53 but always require a context to be present.
  54
  55 Note that a node will not always have a corresponding ir.attachment. Dynamic
  56 nodes, for once, won't. Their metadata will be computed by the parent storage
  57 media + directory.
  58
  59 The algorithm says that in any of the above cases, our first goal is to locate
  60 the node for any combination of search criteria. It would be wise NOT to
  61 represent each node in the path (like node[/] + node[/dir1] + node[/dir1/dir2])
  62 but directly jump to the end node (like node[/dir1/dir2]) whenever possible.
  63
  64 We also contain all the parenting loop code in one function. This is intentional,
  65 because one day this will be optimized in the db (Pg 8.4).
  66
  67
  68 """
  69
  70 def random_name():
  71     random.seed()
  72     d = [random.choice(string.ascii_letters) for x in xrange(10) ]
  73     name = "".join(d)
  74     return name
  75
  76 INVALID_CHARS = {'*':str(hash('*')), '|':str(hash('|')) , "\\":str(hash("\\")), '/':'__', ':':str(hash(':')), '"':str(hash('"')), '<':str(hash('<')) , '>':str(hash('>')) , '?':str(hash('?'))}
  77
  78
  79 def create_directory(path):
  80     dir_name = random_name()
  81     path = os.path.join(path, dir_name)
  82     os.makedirs(path)
  83     return dir_name
  84
  85
  86 class document_storage(osv.osv):
  87     """ The primary object for data storage.
  88     Each instance of this object is a storage media, in which our application
  89     can store contents. The object here controls the behaviour of the storage
  90     media.
  91     The referring document.directory-ies will control the placement of data
  92     into the storage.
  93
  94     It is a bad idea to have multiple document.storage objects pointing to
  95     the same tree of filesystem storage.
  96     """
  97     _name = 'document.storage'
  98     _description = 'Document storage media'
  99     _columns = {
 100         'name': fields.char('Name', size=64, required=True, select=1),
 101         'write_date': fields.datetime('Date Modified', readonly=True),
 102         'write_uid':  fields.many2one('res.users', 'Last Modification User', readonly=True),
 103         'create_date': fields.datetime('Date Created', readonly=True),
 104         'create_uid':  fields.many2one('res.users', 'Creator', readonly=True),
 105         'user_id': fields.many2one('res.users', 'Owner'),
 106         'group_ids': fields.many2many('res.groups', 'document_directory_group_rel', 'item_id', 'group_id', 'Groups'),
 107         'dir_ids': fields.one2many('document.directory', 'parent_id', 'Directories'),
 108         'type': fields.selection([('db', 'Database'), ('filestore', 'Internal File storage'),
 109             ('realstore', 'External file storage'), ('virtual', 'Virtual storage')], 'Type', required=True),
 110         'path': fields.char('Path', size=250, select=1, help="For file storage, the root path of the storage"),
 111         'online': fields.boolean('Online', help="If not checked, media is currently offline and its contents not available", required=True),
 112         'readonly': fields.boolean('Read Only', help="If set, media is for reading only"),
 113     }
 114
 115     def _get_rootpath(self, cr, uid, context=None):
 116         return os.path.join(DMS_ROOT_PATH, cr.dbname)
 117
 118     _defaults = {
 119         'user_id': lambda self, cr, uid, ctx: uid,
 120         'online': lambda *args: True,
 121         'readonly': lambda *args: False,
 122         # Note: the defaults below should only be used ONCE for the default
 123         # storage media. All other times, we should create different paths at least.
 124         'type': lambda *args: 'filestore',
 125         'path': _get_rootpath,
 126     }
 127     _sql_constraints = [
 128         # SQL note: a path = NULL doesn't have to be unique.
 129         ('path_uniq', 'UNIQUE(type,path)', "The storage path must be unique!")
 130         ]
 131
 132     def get_data(self, cr, uid, id, file_node, context=None, fil_obj=None):
 133         """ retrieve the contents of some file_node having storage_id = id
 134             optionally, fil_obj could point to the browse object of the file
 135             (ir.attachment)
 136         """
 137         if not context:
 138             context = {}
 139         boo = self.browse(cr, uid, id, context)
 140         if fil_obj:
 141             ira = fil_obj
 142         else:
 143             ira = self.pool.get('ir.attachment').browse(cr, uid, file_node.file_id, context=context)
 144         return self.__get_data_3(cr, uid, boo, ira, context)
 145
 146     def __get_data_3(self, cr, uid, boo, ira, context):
 147         if not boo.online:
 148             raise RuntimeError('media offline')
 149         if boo.type == 'filestore':
 150             if not ira.store_fname:
 151                 # On a migrated db, some files may have the wrong storage type
 152                 # try to fix their directory.
 153                 if ira.file_size:
 154                     netsvc.Logger().notifyChannel('document', netsvc.LOG_WARNING, "ir.attachment #%d does not have a filename, but is at filestore, fix it!" % ira.id)
 155                 return None
 156             fpath = os.path.join(boo.path, ira.store_fname)
 157             return file(fpath, 'rb').read()
 158         elif boo.type == 'db':
 159             # TODO: we need a better api for large files
 160             if ira.db_datas:
 161                 out = base64.decodestring(ira.db_datas)
 162             else:
 163                 out = ''
 164             return out
 165         elif boo.type == 'realstore':
 166             if not ira.store_fname:
 167                 # On a migrated db, some files may have the wrong storage type
 168                 # try to fix their directory.
 169                 if ira.file_size:
 170                     netsvc.Logger().notifyChannel('document',netsvc.LOG_WARNING,"ir.attachment #%d does not have a filename, trying the name." %ira.id)
 171                 sfname = ira.name
 172             fpath = os.path.join(boo.path,ira.store_fname or ira.name)
 173             if os.path.exists(fpath):
 174                 return file(fpath,'rb').read()
 175             elif not ira.store_fname:
 176                 return None
 177             else:
 178                 raise IOError("File not found: %s" % fpath)
 179         else:
 180             raise TypeError("No %s storage" % boo.type)
 181
 182     def set_data(self, cr, uid, id, file_node, data, context=None, fil_obj=None):
 183         """ store the data.
 184             This function MUST be used from an ir.attachment. It wouldn't make sense
 185             to store things persistently for other types (dynamic).
 186         """
 187         if not context:
 188             context = {}
 189         boo = self.browse(cr, uid, id, context)
 190         logger = netsvc.Logger()
 191         if fil_obj:
 192             ira = fil_obj
 193         else:
 194             ira = self.pool.get('ir.attachment').browse(cr, uid, file_node.file_id, context=context)
 195
 196         if not boo.online:
 197             raise RuntimeError('media offline')
 198         logger.notifyChannel('document', netsvc.LOG_DEBUG, "Store data for ir.attachment #%d" % ira.id)
 199         store_fname = None
 200         fname = None
 201         if boo.type == 'filestore':
 202             path = boo.path
 203             try:
 204                 flag = None
 205                 # This can be improved
 206                 if os.path.isdir(path):
 207                     for dirs in os.listdir(path):
 208                         if os.path.isdir(os.path.join(path, dirs)) and len(os.listdir(os.path.join(path, dirs))) < 4000:
 209                             flag = dirs
 210                             break
 211                 flag = flag or create_directory(path)
 212                 filename = random_name()
 213                 fname = os.path.join(path, flag, filename)
 214                 fp = file(fname, 'wb')
 215                 fp.write(data)
 216                 fp.close()
 217                 logger.notifyChannel('document', netsvc.LOG_DEBUG, "Saved data to %s" % fname)
 218                 filesize = len(data) # os.stat(fname).st_size
 219                 store_fname = os.path.join(flag, filename)
 220
 221                 # TODO Here, an old file would be left hanging.
 222
 223             except Exception, e :
 224                 netsvc.Logger().notifyChannel('document', netsvc.LOG_WARNING, "Couldn't save data: %s" % str(e))
 225                 raise except_orm(_('Error!'), str(e))
 226         elif boo.type == 'db':
 227             filesize = len(data)
 228             # will that work for huge data? TODO
 229             out = base64.encodestring(data)
 230             cr.execute('UPDATE ir_attachment SET db_datas = %s WHERE id = %s',
 231                 (out, file_node.file_id))
 232         elif boo.type == 'realstore':
 233             try:
 234                 file_node.fix_ppath(cr, ira)
 235                 npath = file_node.full_path() or []
 236                 # npath may contain empty elements, for root directory etc.
 237                 for i, n in enumerate(npath):
 238                     if n == None:
 239                         del npath[i]
 240                 for n in npath:
 241                     for ch in ('*', '|', "\\", '/', ':', '"', '<', '>', '?', '..'):
 242                         if ch in n:
 243                             raise ValueError("Invalid char %s in path %s" %(ch, n))
 244                 dpath = [boo.path,]
 245                 dpath += npath[:-1]
 246                 path = os.path.join(*dpath)
 247                 if not os.path.isdir(path):
 248                     os.makedirs(path)
 249                 fname = os.path.join(path, npath[-1])
 250                 fp = file(fname,'wb')
 251                 fp.write(data)
 252                 fp.close()
 253                 logger.notifyChannel('document',netsvc.LOG_DEBUG,"Saved data to %s" % fname)
 254                 filesize = len(data) # os.stat(fname).st_size
 255                 store_fname = os.path.join(*npath)
 256                 # TODO Here, an old file would be left hanging.
 257             except Exception,e :
 258                 import traceback
 259                 traceback.print_exc()
 260                 netsvc.Logger().notifyChannel('document',netsvc.LOG_WARNING,"Couldn't save data: %s" % e)
 261                 raise except_orm(_('Error!'), str(e))
 262         else:
 263             raise TypeError("No %s storage" % boo.type)
 264
 265         # 2nd phase: store the metadata
 266         try:
 267             icont = ''
 268             mime = ira.file_type
 269             if not mime:
 270                 mime = ""
 271             try:
 272                 mime, icont = cntIndex.doIndex(data, ira.datas_fname,
 273                 ira.file_type or None, fname)
 274             except Exception, e:
 275                 logger.notifyChannel('document', netsvc.LOG_DEBUG, 'Cannot index file: %s' % str(e))
 276                 pass
 277
 278             # a hack: /assume/ that the calling write operation will not try
 279             # to write the fname and size, and update them in the db concurrently.
 280             # We cannot use a write() here, because we are already in one.
 281             cr.execute('UPDATE ir_attachment SET store_fname = %s, file_size = %s, index_content = %s, file_type = %s WHERE id = %s',
 282                 (store_fname, filesize, ustr(icont), mime, file_node.file_id))
 283             file_node.content_length = filesize
 284             file_node.content_type = mime
 285             return True
 286         except Exception, e :
 287             netsvc.Logger().notifyChannel('document', netsvc.LOG_WARNING, "Couldn't save data: %s" % str(e))
 288             # should we really rollback once we have written the actual data?
 289             # at the db case (only), that rollback would be safe
 290             raise except_orm(_('Error at doc write!'), str(e))
 291
 292     def prepare_unlink(self, cr, uid, storage_bo, fil_bo):
 293         """ Before we unlink a file (fil_boo), prepare the list of real
 294         files that have to be removed, too. """
 295
 296         if not storage_bo.online:
 297             raise RuntimeError('media offline')
 298
 299         if storage_bo.type == 'filestore':
 300             fname = fil_bo.store_fname
 301             if not fname:
 302                 return None
 303             path = storage_bo.path
 304             return (storage_bo.id, 'file', os.path.join(path, fname))
 305         elif storage_bo.type == 'db':
 306             return None
 307         elif storage_bo.type == 'realstore':
 308             fname = fil_bo.store_fname
 309             if not fname:
 310                 return None
 311             path = storage_bo.path
 312             return ( storage_bo.id, 'file', os.path.join(path,fname))
 313         else:
 314             raise TypeError("No %s storage" % boo.type)
 315
 316     def do_unlink(self, cr, uid, unres):
 317         for id, ktype, fname in unres:
 318             if ktype == 'file':
 319                 try:
 320                     os.unlink(fname)
 321                 except Exception, e:
 322                     netsvc.Logger().notifyChannel('document', netsvc.LOG_WARNING, "Could not remove file %s, please remove manually." % fname)
 323             else:
 324                 netsvc.Logger().notifyChannel('document', netsvc.LOG_WARNING, "Unknown unlink key %s" % ktype)
 325
 326         return True
 327
 328
 329 document_storage()
 330
 331
 332 #eof