1 # -*- encoding: utf-8 -*-
2 ##############################################################################
4 # OpenERP, Open Source Management Solution
6 # Copyright (C) P. Christeas, 2009, all rights reserved
8 # This program is free software: you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation, either version 3 of the License, or
11 # (at your option) any later version.
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with this program. If not, see <http://www.gnu.org/licenses/>.
21 ##############################################################################
23 from osv import osv, fields
30 from StringIO import StringIO
32 from tools.misc import ustr
33 from tools.translate import _
34 from osv.orm import except_orm
39 from content_index import cntIndex
40 _logger = logging.getLogger(__name__)
41 DMS_ROOT_PATH = tools.config.get('document_path', os.path.join(tools.config.get('root_path'), 'filestore'))
44 """ The algorithm of data storage
46 We have to consider 3 cases of data /retrieval/:
47 Given (context,path) we need to access the file (aka. node).
48 given (directory, context), we need one of its children (for listings, views)
49 given (ir.attachment, context), we need its data and metadata (node).
51 For data /storage/ we have the cases:
52 Have (ir.attachment, context), we modify the file (save, update, rename etc).
53 Have (directory, context), we create a file.
54 Have (path, context), we create or modify a file.
56 Note that in all above cases, we don't explicitly choose the storage media,
57 but always require a context to be present.
59 Note that a node will not always have a corresponding ir.attachment. Dynamic
60 nodes, for once, won't. Their metadata will be computed by the parent storage
63 The algorithm says that in any of the above cases, our first goal is to locate
64 the node for any combination of search criteria. It would be wise NOT to
65 represent each node in the path (like node[/] + node[/dir1] + node[/dir1/dir2])
66 but directly jump to the end node (like node[/dir1/dir2]) whenever possible.
68 We also contain all the parenting loop code in one function. This is intentional,
69 because one day this will be optimized in the db (Pg 8.4).
76 d = [random.choice(string.ascii_letters) for x in xrange(10) ]
80 INVALID_CHARS = {'*':str(hash('*')), '|':str(hash('|')) , "\\":str(hash("\\")), '/':'__', ':':str(hash(':')), '"':str(hash('"')), '<':str(hash('<')) , '>':str(hash('>')) , '?':str(hash('?'))}
83 def create_directory(path):
84 dir_name = random_name()
85 path = os.path.join(path, dir_name)
89 class nodefd_file(nodes.node_descriptor):
90 """ A descriptor to a real file
92 Inheriting directly from file doesn't work, since file exports
93 some read-only attributes (like 'name') that we don't like.
95 def __init__(self, parent, path, mode):
96 nodes.node_descriptor.__init__(self, parent)
97 self.__file = open(path, mode)
98 if mode.endswith('b'):
101 self._size = os.stat(path).st_size
103 for attr in ('closed', 'read', 'write', 'seek', 'tell', 'next'):
104 setattr(self,attr, getattr(self.__file, attr))
113 # TODO: locking in init, close()
114 fname = self.__file.name
117 if self.mode in ('w', 'w+', 'r+'):
118 par = self._get_parent()
119 cr = pooler.get_db(par.context.dbname).cursor()
123 if isinstance(filename, (tuple, list)):
124 filename = '/'.join(filename)
127 mime, icont = cntIndex.doIndex(None, filename=filename,
128 content_type=None, realfname=fname)
130 _logger.debug('Cannot index file:', exc_info=True)
134 icont_u = ustr(icont)
139 fsize = os.stat(fname).st_size
140 cr.execute("UPDATE ir_attachment " \
141 " SET index_content = %s, file_type = %s, " \
144 (icont_u, mime, fsize, par.file_id))
145 par.content_length = fsize
146 par.content_type = mime
150 _logger.warning('Cannot save file indexed content:', exc_info=True)
152 elif self.mode in ('a', 'a+' ):
154 par = self._get_parent()
155 cr = pooler.get_db(par.context.dbname).cursor()
156 fsize = os.stat(fname).st_size
157 cr.execute("UPDATE ir_attachment SET file_size = %s " \
159 (fsize, par.file_id))
160 par.content_length = fsize
164 _logger.warning('Cannot save file appended content:', exc_info=True)
168 class nodefd_db(StringIO, nodes.node_descriptor):
169 """ A descriptor to db data
171 def __init__(self, parent, ira_browse, mode):
172 nodes.node_descriptor.__init__(self, parent)
174 if mode.endswith('b'):
177 if mode in ('r', 'r+'):
178 cr = ira_browse._cr # reuse the cursor of the browse object, just now
179 cr.execute('SELECT db_datas FROM ir_attachment WHERE id = %s',(ira_browse.id,))
180 data = cr.fetchone()[0]
182 self._size = len(data)
183 StringIO.__init__(self, data)
184 elif mode in ('w', 'w+'):
185 StringIO.__init__(self, None)
186 # at write, we start at 0 (= overwrite), but have the original
187 # data available, in case of a seek()
189 StringIO.__init__(self, None)
191 _logger.error("Incorrect mode %s is specified.", mode)
192 raise IOError(errno.EINVAL, "Invalid file mode.")
199 # we now open a *separate* cursor, to update the data.
200 # FIXME: this may be improved, for concurrency handling
201 par = self._get_parent()
202 # uid = par.context.uid
203 cr = pooler.get_db(par.context.dbname).cursor()
205 if self.mode in ('w', 'w+', 'r+'):
206 data = self.getvalue()
210 if isinstance(filename, (tuple, list)):
211 filename = '/'.join(filename)
214 mime, icont = cntIndex.doIndex(data, filename=filename,
215 content_type=None, realfname=None)
217 _logger.debug('Cannot index file:', exc_info=True)
221 icont_u = ustr(icont)
225 out = psycopg2.Binary(data)
226 cr.execute("UPDATE ir_attachment " \
227 "SET db_datas = %s, file_size=%s, " \
228 " index_content= %s, file_type=%s " \
230 (out, len(data), icont_u, mime, par.file_id))
231 elif self.mode == 'a':
232 data = self.getvalue()
233 out = psycopg2.Binary(data)
234 cr.execute("UPDATE ir_attachment " \
235 "SET db_datas = COALESCE(db_datas,'') || %s, " \
236 " file_size = COALESCE(file_size, 0) + %s " \
238 (out, len(data), par.file_id))
241 _logger.exception('Cannot update db file #%d for close.', par.file_id)
247 class nodefd_db64(StringIO, nodes.node_descriptor):
248 """ A descriptor to db data, base64 (the old way)
250 It stores the data in base64 encoding at the db. Not optimal, but
251 the transparent compression of Postgres will save the day.
253 def __init__(self, parent, ira_browse, mode):
254 nodes.node_descriptor.__init__(self, parent)
256 if mode.endswith('b'):
259 if mode in ('r', 'r+'):
260 data = base64.decodestring(ira_browse.db_datas)
262 self._size = len(data)
263 StringIO.__init__(self, data)
264 elif mode in ('w', 'w+'):
265 StringIO.__init__(self, None)
266 # at write, we start at 0 (= overwrite), but have the original
267 # data available, in case of a seek()
269 StringIO.__init__(self, None)
271 _logger.error("Incorrect mode %s is specified.", mode)
272 raise IOError(errno.EINVAL, "Invalid file mode.")
279 # we now open a *separate* cursor, to update the data.
280 # FIXME: this may be improved, for concurrency handling
281 par = self._get_parent()
282 # uid = par.context.uid
283 cr = pooler.get_db(par.context.dbname).cursor()
285 if self.mode in ('w', 'w+', 'r+'):
286 data = self.getvalue()
290 if isinstance(filename, (tuple, list)):
291 filename = '/'.join(filename)
294 mime, icont = cntIndex.doIndex(data, filename=filename,
295 content_type=None, realfname=None)
297 self.logger.debug('Cannot index file:', exc_info=True)
301 icont_u = ustr(icont)
305 cr.execute('UPDATE ir_attachment SET db_datas = %s::bytea, file_size=%s, ' \
306 'index_content = %s, file_type = %s ' \
308 (base64.encodestring(data), len(data), icont_u, mime, par.file_id))
309 elif self.mode == 'a':
310 data = self.getvalue()
311 # Yes, we're obviously using the wrong representation for storing our
312 # data as base64-in-bytea
313 cr.execute("UPDATE ir_attachment " \
314 "SET db_datas = encode( (COALESCE(decode(encode(db_datas,'escape'),'base64'),'') || decode(%s, 'base64')),'base64')::bytea , " \
315 " file_size = COALESCE(file_size, 0) + %s " \
317 (base64.encodestring(data), len(data), par.file_id))
320 _logger.exception('Cannot update db file #%d for close.', par.file_id)
326 class document_storage(osv.osv):
327 """ The primary object for data storage.
328 Each instance of this object is a storage media, in which our application
329 can store contents. The object here controls the behaviour of the storage
331 The referring document.directory-ies will control the placement of data
334 It is a bad idea to have multiple document.storage objects pointing to
335 the same tree of filesystem storage.
337 _name = 'document.storage'
338 _description = 'Storage Media'
341 'name': fields.char('Name', size=64, required=True, select=1),
342 'write_date': fields.datetime('Date Modified', readonly=True),
343 'write_uid': fields.many2one('res.users', 'Last Modification User', readonly=True),
344 'create_date': fields.datetime('Date Created', readonly=True),
345 'create_uid': fields.many2one('res.users', 'Creator', readonly=True),
346 'user_id': fields.many2one('res.users', 'Owner'),
347 'group_ids': fields.many2many('res.groups', 'document_storage_group_rel', 'item_id', 'group_id', 'Groups'),
348 'dir_ids': fields.one2many('document.directory', 'parent_id', 'Directories'),
349 'type': fields.selection([('db', 'Database'), ('filestore', 'Internal File storage'),
350 ('realstore','External file storage'),], 'Type', required=True),
351 'path': fields.char('Path', size=250, select=1, help="For file storage, the root path of the storage"),
352 'online': fields.boolean('Online', help="If not checked, media is currently offline and its contents not available", required=True),
353 'readonly': fields.boolean('Read Only', help="If set, media is for reading only"),
356 def _get_rootpath(self, cr, uid, context=None):
357 return os.path.join(DMS_ROOT_PATH, cr.dbname)
360 'user_id': lambda self, cr, uid, ctx: uid,
361 'online': lambda *args: True,
362 'readonly': lambda *args: False,
363 # Note: the defaults below should only be used ONCE for the default
364 # storage media. All other times, we should create different paths at least.
365 'type': lambda *args: 'filestore',
366 'path': _get_rootpath,
369 # SQL note: a path = NULL doesn't have to be unique.
370 ('path_uniq', 'UNIQUE(type,path)', "The storage path must be unique!")
373 def __get_random_fname(self, path):
375 # This can be improved
376 if os.path.isdir(path):
377 for dirs in os.listdir(path):
378 if os.path.isdir(os.path.join(path, dirs)) and len(os.listdir(os.path.join(path, dirs))) < 4000:
381 flag = flag or create_directory(path)
382 filename = random_name()
383 return os.path.join(flag, filename)
385 def __prepare_realpath(self, cr, file_node, ira, store_path, do_create=True):
386 """ Cleanup path for realstore, create dirs if needed
388 @param file_node the node
389 @param ira ir.attachment browse of the file_node
390 @param store_path the path of the parent storage object, list
391 @param do_create create the directories, if needed
393 @return tuple(path "/var/filestore/real/dir/", npath ['dir','fname.ext'] )
395 file_node.fix_ppath(cr, ira)
396 npath = file_node.full_path() or []
397 # npath may contain empty elements, for root directory etc.
398 npath = filter(lambda x: x is not None, npath)
401 # self._logger.debug('Npath: %s', npath)
404 raise ValueError("Invalid '..' element in path.")
405 for ch in ('*', '|', "\\", '/', ':', '"', '<', '>', '?',):
407 raise ValueError("Invalid char %s in path %s." %(ch, n))
408 dpath = [store_path,]
410 path = os.path.join(*dpath)
411 if not os.path.isdir(path):
412 _logger.debug("Create dirs: %s", path)
416 def get_data(self, cr, uid, id, file_node, context=None, fil_obj=None):
417 """ retrieve the contents of some file_node having storage_id = id
418 optionally, fil_obj could point to the browse object of the file
421 boo = self.browse(cr, uid, id, context=context)
423 raise IOError(errno.EREMOTE, 'Medium offline.')
428 ira = self.pool.get('ir.attachment').browse(cr, uid, file_node.file_id, context=context)
429 return self.__get_data_3(cr, uid, boo, ira, context)
431 def get_file(self, cr, uid, id, file_node, mode, context=None):
432 """ Return a file-like object for the contents of some node
436 boo = self.browse(cr, uid, id, context=context)
438 raise IOError(errno.EREMOTE, 'Medium offline.')
440 if boo.readonly and mode not in ('r', 'rb'):
441 raise IOError(errno.EPERM, "Readonly medium.")
443 ira = self.pool.get('ir.attachment').browse(cr, uid, file_node.file_id, context=context)
444 if boo.type == 'filestore':
445 if not ira.store_fname:
446 # On a migrated db, some files may have the wrong storage type
447 # try to fix their directory.
448 if mode in ('r','r+'):
450 _logger.warning( "ir.attachment #%d does not have a filename, but is at filestore. This should get fixed." % ira.id)
451 raise IOError(errno.ENOENT, 'No file can be located.')
453 store_fname = self.__get_random_fname(boo.path)
454 cr.execute('UPDATE ir_attachment SET store_fname = %s WHERE id = %s',
455 (store_fname, ira.id))
456 fpath = os.path.join(boo.path, store_fname)
458 fpath = os.path.join(boo.path, ira.store_fname)
459 return nodefd_file(file_node, path=fpath, mode=mode)
461 elif boo.type == 'db':
462 # TODO: we need a better api for large files
463 return nodefd_db(file_node, ira_browse=ira, mode=mode)
465 elif boo.type == 'db64':
466 return nodefd_db64(file_node, ira_browse=ira, mode=mode)
468 elif boo.type == 'realstore':
469 path, npath = self.__prepare_realpath(cr, file_node, ira, boo.path,
470 do_create = (mode[0] in ('w','a')) )
471 fpath = os.path.join(path, npath[-1])
472 if (not os.path.exists(fpath)) and mode[0] == 'r':
473 raise IOError("File not found: %s." % fpath)
474 elif mode[0] in ('w', 'a') and not ira.store_fname:
475 store_fname = os.path.join(*npath)
476 cr.execute('UPDATE ir_attachment SET store_fname = %s WHERE id = %s',
477 (store_fname, ira.id))
478 return nodefd_file(file_node, path=fpath, mode=mode)
480 elif boo.type == 'virtual':
481 raise ValueError('Virtual storage does not support static file(s).')
484 raise TypeError("No %s storage." % boo.type)
486 def __get_data_3(self, cr, uid, boo, ira, context):
487 if boo.type == 'filestore':
488 if not ira.store_fname:
489 # On a migrated db, some files may have the wrong storage type
490 # try to fix their directory.
492 _logger.warning( "ir.attachment #%d does not have a filename, but is at filestore. This should get fixed." % ira.id)
494 fpath = os.path.join(boo.path, ira.store_fname)
495 return file(fpath, 'rb').read()
496 elif boo.type == 'db64':
497 # TODO: we need a better api for large files
499 out = base64.decodestring(ira.db_datas)
503 elif boo.type == 'db':
504 # We do an explicit query, to avoid type transformations.
505 cr.execute('SELECT db_datas FROM ir_attachment WHERE id = %s', (ira.id,))
511 elif boo.type == 'realstore':
512 if not ira.store_fname:
513 # On a migrated db, some files may have the wrong storage type
514 # try to fix their directory.
516 _logger.warning("ir.attachment #%d does not have a filename, trying the name." %ira.id)
518 fpath = os.path.join(boo.path,ira.store_fname or ira.name)
519 if os.path.exists(fpath):
520 return file(fpath,'rb').read()
521 elif not ira.store_fname:
524 raise IOError(errno.ENOENT, "File not found: %s." % fpath)
526 elif boo.type == 'virtual':
527 raise ValueError('Virtual storage does not support static file(s).')
530 raise TypeError("No %s storage!" % boo.type)
532 def set_data(self, cr, uid, id, file_node, data, context=None, fil_obj=None):
534 This function MUST be used from an ir.attachment. It wouldn't make sense
535 to store things persistently for other types (dynamic).
537 boo = self.browse(cr, uid, id, context=context)
541 ira = self.pool.get('ir.attachment').browse(cr, uid, file_node.file_id, context=context)
544 raise IOError(errno.EREMOTE, 'Medium offline.')
547 raise IOError(errno.EPERM, "Readonly medium.")
549 _logger.debug( "Store data for ir.attachment #%d." % ira.id)
552 if boo.type == 'filestore':
555 store_fname = self.__get_random_fname(path)
556 fname = os.path.join(path, store_fname)
557 fp = open(fname, 'wb')
562 _logger.debug( "Saved data to %s." % fname)
563 filesize = len(data) # os.stat(fname).st_size
565 # TODO Here, an old file would be left hanging.
568 _logger.warning( "Cannot save data to %s.", path, exc_info=True)
569 raise except_orm(_('Error!'), str(e))
570 elif boo.type == 'db':
572 # will that work for huge data?
573 out = psycopg2.Binary(data)
574 cr.execute('UPDATE ir_attachment SET db_datas = %s WHERE id = %s',
575 (out, file_node.file_id))
576 elif boo.type == 'db64':
578 # will that work for huge data?
579 out = base64.encodestring(data)
580 cr.execute('UPDATE ir_attachment SET db_datas = %s WHERE id = %s',
581 (out, file_node.file_id))
582 elif boo.type == 'realstore':
584 path, npath = self.__prepare_realpath(cr, file_node, ira, boo.path, do_create=True)
585 fname = os.path.join(path, npath[-1])
586 fp = open(fname,'wb')
591 _logger.debug("Saved data to %s.", fname)
592 filesize = len(data) # os.stat(fname).st_size
593 store_fname = os.path.join(*npath)
594 # TODO Here, an old file would be left hanging.
596 _logger.warning("Cannot save data.", exc_info=True)
597 raise except_orm(_('Error!'), str(e))
599 elif boo.type == 'virtual':
600 raise ValueError('Virtual storage does not support static file(s).')
603 raise TypeError("No %s storage!" % boo.type)
605 # 2nd phase: store the metadata
612 mime, icont = cntIndex.doIndex(data, ira.datas_fname,
613 ira.file_type or None, fname)
615 _logger.debug('Cannot index file.', exc_info=True)
619 icont_u = ustr(icont)
623 # a hack: /assume/ that the calling write operation will not try
624 # to write the fname and size, and update them in the db concurrently.
625 # We cannot use a write() here, because we are already in one.
626 cr.execute('UPDATE ir_attachment SET store_fname = %s, file_size = %s, index_content = %s, file_type = %s WHERE id = %s',
627 (store_fname, filesize, icont_u, mime, file_node.file_id))
628 file_node.content_length = filesize
629 file_node.content_type = mime
631 except Exception, e :
632 self._logger.warning("Cannot save data.", exc_info=True)
633 # should we really rollback once we have written the actual data?
634 # at the db case (only), that rollback would be safe
635 raise except_orm(_('Error at doc write!'), str(e))
637 def prepare_unlink(self, cr, uid, storage_bo, fil_bo):
638 """ Before we unlink a file (fil_boo), prepare the list of real
639 files that have to be removed, too. """
641 if not storage_bo.online:
642 raise IOError(errno.EREMOTE, 'Medium offline.')
644 if storage_bo.readonly:
645 raise IOError(errno.EPERM, "Readonly medium.")
647 if storage_bo.type == 'filestore':
648 fname = fil_bo.store_fname
651 path = storage_bo.path
652 return (storage_bo.id, 'file', os.path.join(path, fname))
653 elif storage_bo.type in ('db', 'db64'):
655 elif storage_bo.type == 'realstore':
656 fname = fil_bo.store_fname
659 path = storage_bo.path
660 return ( storage_bo.id, 'file', os.path.join(path, fname))
662 raise TypeError("No %s storage!" % storage_bo.type)
664 def do_unlink(self, cr, uid, unres):
665 for id, ktype, fname in unres:
670 _logger.warning("Cannot remove file %s, please remove it manually.", fname, exc_info=True)
672 _logger.warning("Unlink unknown key %s." % ktype)
676 def simple_rename(self, cr, uid, file_node, new_name, context=None):
677 """ A preparation for a file rename.
678 It will not affect the database, but merely check and perhaps
679 rename the realstore file.
681 @return the dict of values that can safely be be stored in the db.
683 sbro = self.browse(cr, uid, file_node.storage_id, context=context)
684 assert sbro, "The file #%d didn't provide storage" % file_node.file_id
687 raise IOError(errno.EREMOTE, 'Medium offline.')
690 raise IOError(errno.EPERM, "Readonly medium.")
692 if sbro.type in ('filestore', 'db', 'db64'):
693 # nothing to do for a rename, allow to change the db field
694 return { 'name': new_name, 'datas_fname': new_name }
695 elif sbro.type == 'realstore':
696 ira = self.pool.get('ir.attachment').browse(cr, uid, file_node.file_id, context=context)
698 path, npath = self.__prepare_realpath(cr, file_node, ira, sbro.path, do_create=False)
699 fname = ira.store_fname
702 _logger.warning("Trying to rename a non-stored file.")
703 if fname != os.path.join(*npath):
704 _logger.warning("Inconsistency to realstore: %s != %s." , fname, repr(npath))
706 oldpath = os.path.join(path, npath[-1])
707 newpath = os.path.join(path, new_name)
708 os.rename(oldpath, newpath)
709 store_path = npath[:-1]
710 store_path.append(new_name)
711 store_fname = os.path.join(*store_path)
712 return { 'name': new_name, 'datas_fname': new_name, 'store_fname': store_fname }
714 raise TypeError("No %s storage!" % sbro.type)
716 def simple_move(self, cr, uid, file_node, ndir_bro, context=None):
717 """ A preparation for a file move.
718 It will not affect the database, but merely check and perhaps
719 move the realstore file.
721 @param ndir_bro a browse object of document.directory, where this
723 @return the dict of values that can safely be be stored in the db.
725 sbro = self.browse(cr, uid, file_node.storage_id, context=context)
726 assert sbro, "The file #%d didn't provide storage" % file_node.file_id
729 raise IOError(errno.EREMOTE, 'Medium offline.')
732 raise IOError(errno.EPERM, "Readonly medium.")
738 psto = par.storage_id.id
741 if file_node.storage_id != psto:
742 _logger.debug('Cannot move file %r from %r to %r.', file_node, file_node.parent, ndir_bro.name)
743 raise NotImplementedError('Cannot move file(s) between storage media.')
745 if sbro.type in ('filestore', 'db', 'db64'):
746 # nothing to do for a rename, allow to change the db field
747 return { 'parent_id': ndir_bro.id }
748 elif sbro.type == 'realstore':
749 ira = self.pool.get('ir.attachment').browse(cr, uid, file_node.file_id, context=context)
751 path, opath = self.__prepare_realpath(cr, file_node, ira, sbro.path, do_create=False)
752 fname = ira.store_fname
755 _logger.warning("Trying to rename a non-stored file.")
756 if fname != os.path.join(*opath):
757 _logger.warning("Inconsistency to realstore: %s != %s." , fname, repr(opath))
759 oldpath = os.path.join(path, opath[-1])
761 npath = [sbro.path,] + (ndir_bro.get_full_path() or [])
762 npath = filter(lambda x: x is not None, npath)
763 newdir = os.path.join(*npath)
764 if not os.path.isdir(newdir):
765 _logger.debug("Must create dir %s.", newdir)
767 npath.append(opath[-1])
768 newpath = os.path.join(*npath)
770 _logger.debug("Going to move %s from %s to %s.", opath[-1], oldpath, newpath)
771 shutil.move(oldpath, newpath)
773 store_path = npath[1:] + [opath[-1],]
774 store_fname = os.path.join(*store_path)
776 return { 'store_fname': store_fname }
778 raise TypeError("No %s storage." % sbro.type)
786 # vim:expandtab:smartindent:tabstop=4:softtabstop=4:shiftwidth=4: