1 # -*- encoding: utf-8 -*-
2 ##############################################################################
4 # OpenERP, Open Source Management Solution
6 # Copyright (C) P. Christeas, 2009, all rights reserved
8 # This program is free software: you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation, either version 3 of the License, or
11 # (at your option) any later version.
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with this program. If not, see <http://www.gnu.org/licenses/>.
21 ##############################################################################
23 from osv import osv, fields
30 from StringIO import StringIO
33 from tools.misc import ustr
34 from tools.translate import _
36 from osv.orm import except_orm
42 from content_index import cntIndex
44 DMS_ROOT_PATH = tools.config.get('document_path', os.path.join(tools.config.get('root_path'), 'filestore'))
47 """ The algorithm of data storage
49 We have to consider 3 cases of data /retrieval/:
50 Given (context,path) we need to access the file (aka. node).
51 given (directory, context), we need one of its children (for listings, views)
52 given (ir.attachment, context), we need its data and metadata (node).
54 For data /storage/ we have the cases:
55 Have (ir.attachment, context), we modify the file (save, update, rename etc).
56 Have (directory, context), we create a file.
57 Have (path, context), we create or modify a file.
59 Note that in all above cases, we don't explicitly choose the storage media,
60 but always require a context to be present.
62 Note that a node will not always have a corresponding ir.attachment. Dynamic
63 nodes, for once, won't. Their metadata will be computed by the parent storage
66 The algorithm says that in any of the above cases, our first goal is to locate
67 the node for any combination of search criteria. It would be wise NOT to
68 represent each node in the path (like node[/] + node[/dir1] + node[/dir1/dir2])
69 but directly jump to the end node (like node[/dir1/dir2]) whenever possible.
71 We also contain all the parenting loop code in one function. This is intentional,
72 because one day this will be optimized in the db (Pg 8.4).
79 d = [random.choice(string.ascii_letters) for x in xrange(10) ]
83 INVALID_CHARS = {'*':str(hash('*')), '|':str(hash('|')) , "\\":str(hash("\\")), '/':'__', ':':str(hash(':')), '"':str(hash('"')), '<':str(hash('<')) , '>':str(hash('>')) , '?':str(hash('?'))}
86 def create_directory(path):
87 dir_name = random_name()
88 path = os.path.join(path, dir_name)
92 class nodefd_file(nodes.node_descriptor):
93 """ A descriptor to a real file
95 Inheriting directly from file doesn't work, since file exports
96 some read-only attributes (like 'name') that we don't like.
98 def __init__(self, parent, path, mode):
99 nodes.node_descriptor.__init__(self, parent)
100 self.__file = open(path, mode)
101 if mode.endswith('b'):
104 self._size = os.stat(path).st_size
106 for attr in ('closed', 'read', 'write', 'seek', 'tell', 'next'):
107 setattr(self,attr, getattr(self.__file, attr))
116 # TODO: locking in init, close()
117 fname = self.__file.name
120 if self.mode in ('w', 'w+', 'r+'):
121 par = self._get_parent()
122 cr = pooler.get_db(par.context.dbname).cursor()
126 if isinstance(filename, (tuple, list)):
127 filename = '/'.join(filename)
130 mime, icont = cntIndex.doIndex(None, filename=filename,
131 content_type=None, realfname=fname)
133 logging.getLogger('document.storage').debug('Cannot index file:', exc_info=True)
137 icont_u = ustr(icont)
142 fsize = os.stat(fname).st_size
143 cr.execute("UPDATE ir_attachment " \
144 " SET index_content = %s, file_type = %s, " \
147 (icont_u, mime, fsize, par.file_id))
148 par.content_length = fsize
149 par.content_type = mime
153 logging.getLogger('document.storage').warning('Cannot save file indexed content:', exc_info=True)
155 elif self.mode in ('a', 'a+' ):
157 par = self._get_parent()
158 cr = pooler.get_db(par.context.dbname).cursor()
159 fsize = os.stat(fname).st_size
160 cr.execute("UPDATE ir_attachment SET file_size = %s " \
162 (fsize, par.file_id))
163 par.content_length = fsize
167 logging.getLogger('document.storage').warning('Cannot save file appended content:', exc_info=True)
171 class nodefd_db(StringIO, nodes.node_descriptor):
172 """ A descriptor to db data
174 def __init__(self, parent, ira_browse, mode):
175 nodes.node_descriptor.__init__(self, parent)
177 if mode.endswith('b'):
180 if mode in ('r', 'r+'):
181 cr = ira_browse._cr # reuse the cursor of the browse object, just now
182 cr.execute('SELECT db_datas FROM ir_attachment WHERE id = %s',(ira_browse.id,))
183 data = cr.fetchone()[0]
185 self._size = len(data)
186 StringIO.__init__(self, data)
187 elif mode in ('w', 'w+'):
188 StringIO.__init__(self, None)
189 # at write, we start at 0 (= overwrite), but have the original
190 # data available, in case of a seek()
192 StringIO.__init__(self, None)
194 logging.getLogger('document.storage').error("Incorrect mode %s specified", mode)
195 raise IOError(errno.EINVAL, "Invalid file mode")
202 # we now open a *separate* cursor, to update the data.
203 # FIXME: this may be improved, for concurrency handling
204 par = self._get_parent()
205 # uid = par.context.uid
206 cr = pooler.get_db(par.context.dbname).cursor()
208 if self.mode in ('w', 'w+', 'r+'):
209 data = self.getvalue()
213 if isinstance(filename, (tuple, list)):
214 filename = '/'.join(filename)
217 mime, icont = cntIndex.doIndex(data, filename=filename,
218 content_type=None, realfname=None)
220 logging.getLogger('document.storage').debug('Cannot index file:', exc_info=True)
224 icont_u = ustr(icont)
228 out = psycopg2.Binary(data)
229 cr.execute("UPDATE ir_attachment " \
230 "SET db_datas = %s, file_size=%s, " \
231 " index_content= %s, file_type=%s " \
233 (out, len(data), icont_u, mime, par.file_id))
234 elif self.mode == 'a':
235 data = self.getvalue()
236 out = psycopg2.Binary(data)
237 cr.execute("UPDATE ir_attachment " \
238 "SET db_datas = COALESCE(db_datas,'') || %s, " \
239 " file_size = COALESCE(file_size, 0) + %s " \
241 (out, len(data), par.file_id))
244 logging.getLogger('document.storage').exception('Cannot update db file #%d for close:', par.file_id)
250 class nodefd_db64(StringIO, nodes.node_descriptor):
251 """ A descriptor to db data, base64 (the old way)
253 It stores the data in base64 encoding at the db. Not optimal, but
254 the transparent compression of Postgres will save the day.
256 def __init__(self, parent, ira_browse, mode):
257 nodes.node_descriptor.__init__(self, parent)
259 if mode.endswith('b'):
262 if mode in ('r', 'r+'):
263 data = base64.decodestring(ira_browse.db_datas)
265 self._size = len(data)
266 StringIO.__init__(self, data)
267 elif mode in ('w', 'w+'):
268 StringIO.__init__(self, None)
269 # at write, we start at 0 (= overwrite), but have the original
270 # data available, in case of a seek()
272 StringIO.__init__(self, None)
274 logging.getLogger('document.storage').error("Incorrect mode %s specified", mode)
275 raise IOError(errno.EINVAL, "Invalid file mode")
282 # we now open a *separate* cursor, to update the data.
283 # FIXME: this may be improved, for concurrency handling
284 par = self._get_parent()
285 # uid = par.context.uid
286 cr = pooler.get_db(par.context.dbname).cursor()
288 if self.mode in ('w', 'w+', 'r+'):
289 data = self.getvalue()
293 if isinstance(filename, (tuple, list)):
294 filename = '/'.join(filename)
297 mime, icont = cntIndex.doIndex(data, filename=filename,
298 content_type=None, realfname=None)
300 logging.getLogger('document.storage').debug('Cannot index file:', exc_info=True)
304 icont_u = ustr(icont)
308 cr.execute('UPDATE ir_attachment SET db_datas = %s::bytea, file_size=%s, ' \
309 'index_content = %s, file_type = %s ' \
311 (base64.encodestring(data), len(data), icont_u, mime, par.file_id))
312 elif self.mode == 'a':
313 data = self.getvalue()
314 # Yes, we're obviously using the wrong representation for storing our
315 # data as base64-in-bytea
316 cr.execute("UPDATE ir_attachment " \
317 "SET db_datas = encode( (COALESCE(decode(encode(db_datas,'escape'),'base64'),'') || decode(%s, 'base64')),'base64')::bytea , " \
318 " file_size = COALESCE(file_size, 0) + %s " \
320 (base64.encodestring(data), len(data), par.file_id))
323 logging.getLogger('document.storage').exception('Cannot update db file #%d for close:', par.file_id)
329 class document_storage(osv.osv):
330 """ The primary object for data storage.
331 Each instance of this object is a storage media, in which our application
332 can store contents. The object here controls the behaviour of the storage
334 The referring document.directory-ies will control the placement of data
337 It is a bad idea to have multiple document.storage objects pointing to
338 the same tree of filesystem storage.
340 _name = 'document.storage'
341 _description = 'Storage Media'
342 _doclog = logging.getLogger('document')
345 'name': fields.char('Name', size=64, required=True, select=1),
346 'write_date': fields.datetime('Date Modified', readonly=True),
347 'write_uid': fields.many2one('res.users', 'Last Modification User', readonly=True),
348 'create_date': fields.datetime('Date Created', readonly=True),
349 'create_uid': fields.many2one('res.users', 'Creator', readonly=True),
350 'user_id': fields.many2one('res.users', 'Owner'),
351 'group_ids': fields.many2many('res.groups', 'document_storage_group_rel', 'item_id', 'group_id', 'Groups'),
352 'dir_ids': fields.one2many('document.directory', 'parent_id', 'Directories'),
353 'type': fields.selection([('db', 'Database'), ('filestore', 'Internal File storage'),
354 ('realstore','External file storage'),], 'Type', required=True),
355 'path': fields.char('Path', size=250, select=1, help="For file storage, the root path of the storage"),
356 'online': fields.boolean('Online', help="If not checked, media is currently offline and its contents not available", required=True),
357 'readonly': fields.boolean('Read Only', help="If set, media is for reading only"),
360 def _get_rootpath(self, cr, uid, context=None):
361 return os.path.join(DMS_ROOT_PATH, cr.dbname)
364 'user_id': lambda self, cr, uid, ctx: uid,
365 'online': lambda *args: True,
366 'readonly': lambda *args: False,
367 # Note: the defaults below should only be used ONCE for the default
368 # storage media. All other times, we should create different paths at least.
369 'type': lambda *args: 'filestore',
370 'path': _get_rootpath,
373 # SQL note: a path = NULL doesn't have to be unique.
374 ('path_uniq', 'UNIQUE(type,path)', "The storage path must be unique!")
377 def __get_random_fname(self, path):
379 # This can be improved
380 if os.path.isdir(path):
381 for dirs in os.listdir(path):
382 if os.path.isdir(os.path.join(path, dirs)) and len(os.listdir(os.path.join(path, dirs))) < 4000:
385 flag = flag or create_directory(path)
386 filename = random_name()
387 return os.path.join(flag, filename)
389 def __prepare_realpath(self, cr, file_node, ira, store_path, do_create=True):
390 """ Cleanup path for realstore, create dirs if needed
392 @param file_node the node
393 @param ira ir.attachment browse of the file_node
394 @param store_path the path of the parent storage object, list
395 @param do_create create the directories, if needed
397 @return tuple(path "/var/filestore/real/dir/", npath ['dir','fname.ext'] )
399 file_node.fix_ppath(cr, ira)
400 npath = file_node.full_path() or []
401 # npath may contain empty elements, for root directory etc.
402 npath = filter(lambda x: x is not None, npath)
405 # self._doclog.debug('Npath: %s', npath)
408 raise ValueError("Invalid '..' element in path")
409 for ch in ('*', '|', "\\", '/', ':', '"', '<', '>', '?',):
411 raise ValueError("Invalid char %s in path %s" %(ch, n))
412 dpath = [store_path,]
414 path = os.path.join(*dpath)
415 if not os.path.isdir(path):
416 self._doclog.debug("Create dirs: %s", path)
420 def get_data(self, cr, uid, id, file_node, context=None, fil_obj=None):
421 """ retrieve the contents of some file_node having storage_id = id
422 optionally, fil_obj could point to the browse object of the file
425 boo = self.browse(cr, uid, id, context=context)
427 raise IOError(errno.EREMOTE, 'medium offline')
432 ira = self.pool.get('ir.attachment').browse(cr, uid, file_node.file_id, context=context)
433 return self.__get_data_3(cr, uid, boo, ira, context)
435 def get_file(self, cr, uid, id, file_node, mode, context=None):
436 """ Return a file-like object for the contents of some node
440 boo = self.browse(cr, uid, id, context=context)
442 raise IOError(errno.EREMOTE, 'medium offline')
444 if boo.readonly and mode not in ('r', 'rb'):
445 raise IOError(errno.EPERM, "Readonly medium")
447 ira = self.pool.get('ir.attachment').browse(cr, uid, file_node.file_id, context=context)
448 if boo.type == 'filestore':
449 if not ira.store_fname:
450 # On a migrated db, some files may have the wrong storage type
451 # try to fix their directory.
452 if mode in ('r','r+'):
454 self._doclog.warning( "ir.attachment #%d does not have a filename, but is at filestore, fix it!" % ira.id)
455 raise IOError(errno.ENOENT, 'No file can be located')
457 store_fname = self.__get_random_fname(boo.path)
458 cr.execute('UPDATE ir_attachment SET store_fname = %s WHERE id = %s',
459 (store_fname, ira.id))
460 fpath = os.path.join(boo.path, store_fname)
462 fpath = os.path.join(boo.path, ira.store_fname)
463 return nodefd_file(file_node, path=fpath, mode=mode)
465 elif boo.type == 'db':
466 # TODO: we need a better api for large files
467 return nodefd_db(file_node, ira_browse=ira, mode=mode)
469 elif boo.type == 'db64':
470 return nodefd_db64(file_node, ira_browse=ira, mode=mode)
472 elif boo.type == 'realstore':
473 path, npath = self.__prepare_realpath(cr, file_node, ira, boo.path,
474 do_create = (mode[0] in ('w','a')) )
475 fpath = os.path.join(path, npath[-1])
476 if (not os.path.exists(fpath)) and mode[0] == 'r':
477 raise IOError("File not found: %s" % fpath)
478 elif mode[0] in ('w', 'a') and not ira.store_fname:
479 store_fname = os.path.join(*npath)
480 cr.execute('UPDATE ir_attachment SET store_fname = %s WHERE id = %s',
481 (store_fname, ira.id))
482 return nodefd_file(file_node, path=fpath, mode=mode)
484 elif boo.type == 'virtual':
485 raise ValueError('Virtual storage does not support static files')
488 raise TypeError("No %s storage" % boo.type)
490 def __get_data_3(self, cr, uid, boo, ira, context):
491 if boo.type == 'filestore':
492 if not ira.store_fname:
493 # On a migrated db, some files may have the wrong storage type
494 # try to fix their directory.
496 self._doclog.warning( "ir.attachment #%d does not have a filename, but is at filestore, fix it!" % ira.id)
498 fpath = os.path.join(boo.path, ira.store_fname)
499 return file(fpath, 'rb').read()
500 elif boo.type == 'db64':
501 # TODO: we need a better api for large files
503 out = base64.decodestring(ira.db_datas)
507 elif boo.type == 'db':
508 # We do an explicit query, to avoid type transformations.
509 cr.execute('SELECT db_datas FROM ir_attachment WHERE id = %s', (ira.id,))
515 elif boo.type == 'realstore':
516 if not ira.store_fname:
517 # On a migrated db, some files may have the wrong storage type
518 # try to fix their directory.
520 self._doclog.warning("ir.attachment #%d does not have a filename, trying the name." %ira.id)
522 fpath = os.path.join(boo.path,ira.store_fname or ira.name)
523 if os.path.exists(fpath):
524 return file(fpath,'rb').read()
525 elif not ira.store_fname:
528 raise IOError(errno.ENOENT, "File not found: %s" % fpath)
530 elif boo.type == 'virtual':
531 raise ValueError('Virtual storage does not support static files')
534 raise TypeError("No %s storage" % boo.type)
536 def set_data(self, cr, uid, id, file_node, data, context=None, fil_obj=None):
538 This function MUST be used from an ir.attachment. It wouldn't make sense
539 to store things persistently for other types (dynamic).
541 boo = self.browse(cr, uid, id, context=context)
545 ira = self.pool.get('ir.attachment').browse(cr, uid, file_node.file_id, context=context)
548 raise IOError(errno.EREMOTE, 'medium offline')
551 raise IOError(errno.EPERM, "Readonly medium")
553 self._doclog.debug( "Store data for ir.attachment #%d" % ira.id)
556 if boo.type == 'filestore':
559 store_fname = self.__get_random_fname(path)
560 fname = os.path.join(path, store_fname)
561 fp = open(fname, 'wb')
566 self._doclog.debug( "Saved data to %s" % fname)
567 filesize = len(data) # os.stat(fname).st_size
569 # TODO Here, an old file would be left hanging.
572 self._doclog.warning( "Couldn't save data to %s", path, exc_info=True)
573 raise except_orm(_('Error!'), str(e))
574 elif boo.type == 'db':
576 # will that work for huge data?
577 out = psycopg2.Binary(data)
578 cr.execute('UPDATE ir_attachment SET db_datas = %s WHERE id = %s',
579 (out, file_node.file_id))
580 elif boo.type == 'db64':
582 # will that work for huge data?
583 out = base64.encodestring(data)
584 cr.execute('UPDATE ir_attachment SET db_datas = %s WHERE id = %s',
585 (out, file_node.file_id))
586 elif boo.type == 'realstore':
588 path, npath = self.__prepare_realpath(cr, file_node, ira, boo.path, do_create=True)
589 fname = os.path.join(path, npath[-1])
590 fp = open(fname,'wb')
595 self._doclog.debug("Saved data to %s", fname)
596 filesize = len(data) # os.stat(fname).st_size
597 store_fname = os.path.join(*npath)
598 # TODO Here, an old file would be left hanging.
600 self._doclog.warning("Couldn't save data:", exc_info=True)
601 raise except_orm(_('Error!'), str(e))
603 elif boo.type == 'virtual':
604 raise ValueError('Virtual storage does not support static files')
607 raise TypeError("No %s storage" % boo.type)
609 # 2nd phase: store the metadata
616 mime, icont = cntIndex.doIndex(data, ira.datas_fname,
617 ira.file_type or None, fname)
619 self._doclog.debug('Cannot index file:', exc_info=True)
623 icont_u = ustr(icont)
627 # a hack: /assume/ that the calling write operation will not try
628 # to write the fname and size, and update them in the db concurrently.
629 # We cannot use a write() here, because we are already in one.
630 cr.execute('UPDATE ir_attachment SET store_fname = %s, file_size = %s, index_content = %s, file_type = %s WHERE id = %s',
631 (store_fname, filesize, icont_u, mime, file_node.file_id))
632 file_node.content_length = filesize
633 file_node.content_type = mime
635 except Exception, e :
636 self._doclog.warning("Couldn't save data:", exc_info=True)
637 # should we really rollback once we have written the actual data?
638 # at the db case (only), that rollback would be safe
639 raise except_orm(_('Error at doc write!'), str(e))
641 def prepare_unlink(self, cr, uid, storage_bo, fil_bo):
642 """ Before we unlink a file (fil_boo), prepare the list of real
643 files that have to be removed, too. """
645 if not storage_bo.online:
646 raise IOError(errno.EREMOTE, 'medium offline')
648 if storage_bo.readonly:
649 raise IOError(errno.EPERM, "Readonly medium")
651 if storage_bo.type == 'filestore':
652 fname = fil_bo.store_fname
655 path = storage_bo.path
656 return (storage_bo.id, 'file', os.path.join(path, fname))
657 elif storage_bo.type in ('db', 'db64'):
659 elif storage_bo.type == 'realstore':
660 fname = fil_bo.store_fname
663 path = storage_bo.path
664 return ( storage_bo.id, 'file', os.path.join(path, fname))
666 raise TypeError("No %s storage" % storage_bo.type)
668 def do_unlink(self, cr, uid, unres):
669 for id, ktype, fname in unres:
674 self._doclog.warning("Could not remove file %s, please remove manually.", fname, exc_info=True)
676 self._doclog.warning("Unknown unlink key %s" % ktype)
680 def simple_rename(self, cr, uid, file_node, new_name, context=None):
681 """ A preparation for a file rename.
682 It will not affect the database, but merely check and perhaps
683 rename the realstore file.
685 @return the dict of values that can safely be be stored in the db.
687 sbro = self.browse(cr, uid, file_node.storage_id, context=context)
688 assert sbro, "The file #%d didn't provide storage" % file_node.file_id
691 raise IOError(errno.EREMOTE, 'medium offline')
694 raise IOError(errno.EPERM, "Readonly medium")
696 if sbro.type in ('filestore', 'db', 'db64'):
697 # nothing to do for a rename, allow to change the db field
698 return { 'name': new_name, 'datas_fname': new_name }
699 elif sbro.type == 'realstore':
700 ira = self.pool.get('ir.attachment').browse(cr, uid, file_node.file_id, context=context)
702 path, npath = self.__prepare_realpath(cr, file_node, ira, sbro.path, do_create=False)
703 fname = ira.store_fname
706 self._doclog.warning("Trying to rename a non-stored file")
707 if fname != os.path.join(*npath):
708 self._doclog.warning("inconsistency in realstore: %s != %s" , fname, repr(npath))
710 oldpath = os.path.join(path, npath[-1])
711 newpath = os.path.join(path, new_name)
712 os.rename(oldpath, newpath)
713 store_path = npath[:-1]
714 store_path.append(new_name)
715 store_fname = os.path.join(*store_path)
716 return { 'name': new_name, 'datas_fname': new_name, 'store_fname': store_fname }
718 raise TypeError("No %s storage" % sbro.type)
720 def simple_move(self, cr, uid, file_node, ndir_bro, context=None):
721 """ A preparation for a file move.
722 It will not affect the database, but merely check and perhaps
723 move the realstore file.
725 @param ndir_bro a browse object of document.directory, where this
727 @return the dict of values that can safely be be stored in the db.
729 sbro = self.browse(cr, uid, file_node.storage_id, context=context)
730 assert sbro, "The file #%d didn't provide storage" % file_node.file_id
733 raise IOError(errno.EREMOTE, 'medium offline')
736 raise IOError(errno.EPERM, "Readonly medium")
742 psto = par.storage_id.id
745 if file_node.storage_id != psto:
746 self._doclog.debug('Cannot move file %r from %r to %r', file_node, file_node.parent, ndir_bro.name)
747 raise NotImplementedError('Cannot move files between storage media')
749 if sbro.type in ('filestore', 'db', 'db64'):
750 # nothing to do for a rename, allow to change the db field
751 return { 'parent_id': ndir_bro.id }
752 elif sbro.type == 'realstore':
753 ira = self.pool.get('ir.attachment').browse(cr, uid, file_node.file_id, context=context)
755 path, opath = self.__prepare_realpath(cr, file_node, ira, sbro.path, do_create=False)
756 fname = ira.store_fname
759 self._doclog.warning("Trying to rename a non-stored file")
760 if fname != os.path.join(*opath):
761 self._doclog.warning("inconsistency in realstore: %s != %s" , fname, repr(opath))
763 oldpath = os.path.join(path, opath[-1])
765 npath = [sbro.path,] + (ndir_bro.get_full_path() or [])
766 npath = filter(lambda x: x is not None, npath)
767 newdir = os.path.join(*npath)
768 if not os.path.isdir(newdir):
769 self._doclog.debug("Must create dir %s", newdir)
771 npath.append(opath[-1])
772 newpath = os.path.join(*npath)
774 self._doclog.debug("Going to move %s from %s to %s", opath[-1], oldpath, newpath)
775 shutil.move(oldpath, newpath)
777 store_path = npath[1:] + [opath[-1],]
778 store_fname = os.path.join(*store_path)
780 return { 'store_fname': store_fname }
782 raise TypeError("No %s storage" % sbro.type)
790 # vim:expandtab:smartindent:tabstop=4:softtabstop=4:shiftwidth=4: