1 # -*- encoding: utf-8 -*-
2 ##############################################################################
4 # OpenERP, Open Source Management Solution
6 # Copyright (C) P. Christeas, 2009, all rights reserved
8 # This program is free software: you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation, either version 3 of the License, or
11 # (at your option) any later version.
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with this program. If not, see <http://www.gnu.org/licenses/>.
21 ##############################################################################
23 from osv import osv, fields
29 from StringIO import StringIO
32 from tools.misc import ustr
33 from tools.translate import _
35 from osv.orm import except_orm
41 from content_index import cntIndex
43 DMS_ROOT_PATH = tools.config.get('document_path', os.path.join(tools.config.get('root_path'), 'filestore'))
46 """ The algorithm of data storage
48 We have to consider 3 cases of data /retrieval/:
49 Given (context,path) we need to access the file (aka. node).
50 given (directory, context), we need one of its children (for listings, views)
51 given (ir.attachment, context), we need its data and metadata (node).
53 For data /storage/ we have the cases:
54 Have (ir.attachment, context), we modify the file (save, update, rename etc).
55 Have (directory, context), we create a file.
56 Have (path, context), we create or modify a file.
58 Note that in all above cases, we don't explicitly choose the storage media,
59 but always require a context to be present.
61 Note that a node will not always have a corresponding ir.attachment. Dynamic
62 nodes, for once, won't. Their metadata will be computed by the parent storage
65 The algorithm says that in any of the above cases, our first goal is to locate
66 the node for any combination of search criteria. It would be wise NOT to
67 represent each node in the path (like node[/] + node[/dir1] + node[/dir1/dir2])
68 but directly jump to the end node (like node[/dir1/dir2]) whenever possible.
70 We also contain all the parenting loop code in one function. This is intentional,
71 because one day this will be optimized in the db (Pg 8.4).
78 d = [random.choice(string.ascii_letters) for x in xrange(10) ]
82 INVALID_CHARS = {'*':str(hash('*')), '|':str(hash('|')) , "\\":str(hash("\\")), '/':'__', ':':str(hash(':')), '"':str(hash('"')), '<':str(hash('<')) , '>':str(hash('>')) , '?':str(hash('?'))}
85 def create_directory(path):
86 dir_name = random_name()
87 path = os.path.join(path, dir_name)
91 class nodefd_file(nodes.node_descriptor):
92 """ A descriptor to a real file
94 Inheriting directly from file doesn't work, since file exports
95 some read-only attributes (like 'name') that we don't like.
97 def __init__(self, parent, path, mode):
98 nodes.node_descriptor.__init__(self, parent)
99 self.__file = open(path, mode)
100 if mode.endswith('b'):
103 self._size = os.stat(path).st_size
105 for attr in ('closed', 'read', 'write', 'seek', 'tell', 'next'):
106 setattr(self,attr, getattr(self.__file, attr))
115 # TODO: locking in init, close()
116 fname = self.__file.name
119 if self.mode in ('w', 'w+', 'r+'):
120 par = self._get_parent()
121 cr = pooler.get_db(par.context.dbname).cursor()
125 if isinstance(filename, (tuple, list)):
126 filename = '/'.join(filename)
129 mime, icont = cntIndex.doIndex(None, filename=filename,
130 content_type=None, realfname=fname)
132 logging.getLogger('document.storage').debug('Cannot index file:', exc_info=True)
136 icont_u = ustr(icont)
141 fsize = os.stat(fname).st_size
142 cr.execute("UPDATE ir_attachment " \
143 " SET index_content = %s, file_type = %s, " \
146 (icont_u, mime, fsize, par.file_id))
147 par.content_length = fsize
148 par.content_type = mime
152 logging.getLogger('document.storage').warning('Cannot save file indexed content:', exc_info=True)
154 elif self.mode in ('a', 'a+' ):
156 par = self._get_parent()
157 cr = pooler.get_db(par.context.dbname).cursor()
158 fsize = os.stat(fname).st_size
159 cr.execute("UPDATE ir_attachment SET file_size = %s " \
161 (fsize, par.file_id))
162 par.content_length = fsize
166 logging.getLogger('document.storage').warning('Cannot save file appended content:', exc_info=True)
170 class nodefd_db(StringIO, nodes.node_descriptor):
171 """ A descriptor to db data
173 def __init__(self, parent, ira_browse, mode):
174 nodes.node_descriptor.__init__(self, parent)
176 if mode.endswith('b'):
179 if mode in ('r', 'r+'):
180 cr = ira_browse._cr # reuse the cursor of the browse object, just now
181 cr.execute('SELECT db_datas FROM ir_attachment WHERE id = %s',(ira_browse.id,))
182 data = cr.fetchone()[0]
183 self._size = len(data)
184 StringIO.__init__(self, data)
185 elif mode in ('w', 'w+'):
186 StringIO.__init__(self, None)
187 # at write, we start at 0 (= overwrite), but have the original
188 # data available, in case of a seek()
190 StringIO.__init__(self, None)
192 logging.getLogger('document.storage').error("Incorrect mode %s specified", mode)
193 raise IOError(errno.EINVAL, "Invalid file mode")
200 # we now open a *separate* cursor, to update the data.
201 # FIXME: this may be improved, for concurrency handling
202 par = self._get_parent()
203 # uid = par.context.uid
204 cr = pooler.get_db(par.context.dbname).cursor()
206 if self.mode in ('w', 'w+', 'r+'):
207 data = self.getvalue()
211 if isinstance(filename, (tuple, list)):
212 filename = '/'.join(filename)
215 mime, icont = cntIndex.doIndex(data, filename=filename,
216 content_type=None, realfname=None)
218 logging.getLogger('document.storage').debug('Cannot index file:', exc_info=True)
222 icont_u = ustr(icont)
226 out = psycopg2.Binary(data)
227 cr.execute("UPDATE ir_attachment " \
228 "SET db_datas = %s, file_size=%s, " \
229 " index_content= %s, file_type=%s " \
231 (out, len(data), icont_u, mime, par.file_id))
232 elif self.mode == 'a':
233 data = self.getvalue()
234 out = psycopg2.Binary(data)
235 cr.execute("UPDATE ir_attachment " \
236 "SET db_datas = COALESCE(db_datas,'') || %s, " \
237 " file_size = COALESCE(file_size, 0) + %s " \
239 (out, len(data), par.file_id))
242 logging.getLogger('document.storage').exception('Cannot update db file #%d for close:', par.file_id)
248 class nodefd_db64(StringIO, nodes.node_descriptor):
249 """ A descriptor to db data, base64 (the old way)
251 It stores the data in base64 encoding at the db. Not optimal, but
252 the transparent compression of Postgres will save the day.
254 def __init__(self, parent, ira_browse, mode):
255 nodes.node_descriptor.__init__(self, parent)
257 if mode.endswith('b'):
260 if mode in ('r', 'r+'):
261 data = base64.decodestring(ira_browse.db_datas)
262 self._size = len(data)
263 StringIO.__init__(self, data)
264 elif mode in ('w', 'w+'):
265 StringIO.__init__(self, None)
266 # at write, we start at 0 (= overwrite), but have the original
267 # data available, in case of a seek()
269 StringIO.__init__(self, None)
271 logging.getLogger('document.storage').error("Incorrect mode %s specified", mode)
272 raise IOError(errno.EINVAL, "Invalid file mode")
279 # we now open a *separate* cursor, to update the data.
280 # FIXME: this may be improved, for concurrency handling
281 par = self._get_parent()
282 # uid = par.context.uid
283 cr = pooler.get_db(par.context.dbname).cursor()
285 if self.mode in ('w', 'w+', 'r+'):
286 data = self.getvalue()
290 if isinstance(filename, (tuple, list)):
291 filename = '/'.join(filename)
294 mime, icont = cntIndex.doIndex(data, filename=filename,
295 content_type=None, realfname=None)
297 logging.getLogger('document.storage').debug('Cannot index file:', exc_info=True)
301 icont_u = ustr(icont)
305 cr.execute('UPDATE ir_attachment SET db_datas = %s::bytea, file_size=%s, ' \
306 'index_content = %s, file_type = %s ' \
308 (base64.encodestring(data), len(data), icont_u, mime, par.file_id))
309 elif self.mode == 'a':
310 data = self.getvalue()
311 # Yes, we're obviously using the wrong representation for storing our
312 # data as base64-in-bytea
313 cr.execute("UPDATE ir_attachment " \
314 "SET db_datas = encode( (COALESCE(decode(encode(db_datas,'escape'),'base64'),'') || decode(%s, 'base64')),'base64')::bytea , " \
315 " file_size = COALESCE(file_size, 0) + %s " \
317 (base64.encodestring(data), len(data), par.file_id))
320 logging.getLogger('document.storage').exception('Cannot update db file #%d for close:', par.file_id)
326 class document_storage(osv.osv):
327 """ The primary object for data storage.
328 Each instance of this object is a storage media, in which our application
329 can store contents. The object here controls the behaviour of the storage
331 The referring document.directory-ies will control the placement of data
334 It is a bad idea to have multiple document.storage objects pointing to
335 the same tree of filesystem storage.
337 _name = 'document.storage'
338 _description = 'Storage Media'
339 _doclog = logging.getLogger('document')
342 'name': fields.char('Name', size=64, required=True, select=1),
343 'write_date': fields.datetime('Date Modified', readonly=True),
344 'write_uid': fields.many2one('res.users', 'Last Modification User', readonly=True),
345 'create_date': fields.datetime('Date Created', readonly=True),
346 'create_uid': fields.many2one('res.users', 'Creator', readonly=True),
347 'user_id': fields.many2one('res.users', 'Owner'),
348 'group_ids': fields.many2many('res.groups', 'document_storage_group_rel', 'item_id', 'group_id', 'Groups'),
349 'dir_ids': fields.one2many('document.directory', 'parent_id', 'Directories'),
350 'type': fields.selection([('db', 'Database'), ('filestore', 'Internal File storage'),
351 ('realstore','External file storage'),], 'Type', required=True),
352 'path': fields.char('Path', size=250, select=1, help="For file storage, the root path of the storage"),
353 'online': fields.boolean('Online', help="If not checked, media is currently offline and its contents not available", required=True),
354 'readonly': fields.boolean('Read Only', help="If set, media is for reading only"),
357 def _get_rootpath(self, cr, uid, context=None):
358 return os.path.join(DMS_ROOT_PATH, cr.dbname)
361 'user_id': lambda self, cr, uid, ctx: uid,
362 'online': lambda *args: True,
363 'readonly': lambda *args: False,
364 # Note: the defaults below should only be used ONCE for the default
365 # storage media. All other times, we should create different paths at least.
366 'type': lambda *args: 'filestore',
367 'path': _get_rootpath,
370 # SQL note: a path = NULL doesn't have to be unique.
371 ('path_uniq', 'UNIQUE(type,path)', "The storage path must be unique!")
374 def __get_random_fname(self, path):
376 # This can be improved
377 if os.path.isdir(path):
378 for dirs in os.listdir(path):
379 if os.path.isdir(os.path.join(path, dirs)) and len(os.listdir(os.path.join(path, dirs))) < 4000:
382 flag = flag or create_directory(path)
383 filename = random_name()
384 return os.path.join(flag, filename)
386 def __prepare_realpath(self, cr, file_node, ira, store_path, do_create=True):
387 """ Cleanup path for realstore, create dirs if needed
389 @param file_node the node
390 @param ira ir.attachment browse of the file_node
391 @param store_path the path of the parent storage object, list
392 @param do_create create the directories, if needed
394 @return tuple(path "/var/filestore/real/dir/", npath ['dir','fname.ext'] )
396 file_node.fix_ppath(cr, ira)
397 npath = file_node.full_path() or []
398 # npath may contain empty elements, for root directory etc.
399 npath = filter(lambda x: x is not None, npath)
402 # self._doclog.debug('Npath: %s', npath)
405 raise ValueError("Invalid '..' element in path")
406 for ch in ('*', '|', "\\", '/', ':', '"', '<', '>', '?',):
408 raise ValueError("Invalid char %s in path %s" %(ch, n))
409 dpath = [store_path,]
411 path = os.path.join(*dpath)
412 if not os.path.isdir(path):
413 self._doclog.debug("Create dirs: %s", path)
417 def get_data(self, cr, uid, id, file_node, context=None, fil_obj=None):
418 """ retrieve the contents of some file_node having storage_id = id
419 optionally, fil_obj could point to the browse object of the file
424 boo = self.browse(cr, uid, id, context)
426 raise IOError(errno.EREMOTE, 'medium offline')
431 ira = self.pool.get('ir.attachment').browse(cr, uid, file_node.file_id, context=context)
432 return self.__get_data_3(cr, uid, boo, ira, context)
434 def get_file(self, cr, uid, id, file_node, mode, context=None):
435 """ Return a file-like object for the contents of some node
439 boo = self.browse(cr, uid, id, context)
441 raise IOError(errno.EREMOTE, 'medium offline')
443 if boo.readonly and mode not in ('r', 'rb'):
444 raise IOError(errno.EPERM, "Readonly medium")
446 ira = self.pool.get('ir.attachment').browse(cr, uid, file_node.file_id, context=context)
447 if boo.type == 'filestore':
448 if not ira.store_fname:
449 # On a migrated db, some files may have the wrong storage type
450 # try to fix their directory.
451 if mode in ('r','r+'):
453 self._doclog.warning( "ir.attachment #%d does not have a filename, but is at filestore, fix it!" % ira.id)
454 raise IOError(errno.ENOENT, 'No file can be located')
456 store_fname = self.__get_random_fname(boo.path)
457 cr.execute('UPDATE ir_attachment SET store_fname = %s WHERE id = %s',
458 (store_fname, ira.id))
459 fpath = os.path.join(boo.path, store_fname)
461 fpath = os.path.join(boo.path, ira.store_fname)
462 return nodefd_file(file_node, path=fpath, mode=mode)
464 elif boo.type == 'db':
465 # TODO: we need a better api for large files
466 return nodefd_db(file_node, ira_browse=ira, mode=mode)
468 elif boo.type == 'db64':
469 return nodefd_db64(file_node, ira_browse=ira, mode=mode)
471 elif boo.type == 'realstore':
472 path, npath = self.__prepare_realpath(cr, file_node, ira, boo.path,
473 do_create = (mode[1] in ('w','a')) )
474 fpath = os.path.join(path, npath[-1])
475 if (not os.path.exists(fpath)) and mode[1] == 'r':
476 raise IOError("File not found: %s" % fpath)
477 elif mode[1] in ('w', 'a') and not ira.store_fname:
478 store_fname = os.path.join(*npath)
479 cr.execute('UPDATE ir_attachment SET store_fname = %s WHERE id = %s',
480 (store_fname, ira.id))
481 return nodefd_file(file_node, path=fpath, mode=mode)
483 elif boo.type == 'virtual':
484 raise ValueError('Virtual storage does not support static files')
487 raise TypeError("No %s storage" % boo.type)
489 def __get_data_3(self, cr, uid, boo, ira, context):
490 if boo.type == 'filestore':
491 if not ira.store_fname:
492 # On a migrated db, some files may have the wrong storage type
493 # try to fix their directory.
495 self._doclog.warning( "ir.attachment #%d does not have a filename, but is at filestore, fix it!" % ira.id)
497 fpath = os.path.join(boo.path, ira.store_fname)
498 return file(fpath, 'rb').read()
499 elif boo.type == 'db64':
500 # TODO: we need a better api for large files
502 out = base64.decodestring(ira.db_datas)
506 elif boo.type == 'db':
507 # We do an explicit query, to avoid type transformations.
508 cr.execute('SELECT db_datas FROM ir_attachment WHERE id = %s', (ira.id,))
514 elif boo.type == 'realstore':
515 if not ira.store_fname:
516 # On a migrated db, some files may have the wrong storage type
517 # try to fix their directory.
519 self._doclog.warning("ir.attachment #%d does not have a filename, trying the name." %ira.id)
521 fpath = os.path.join(boo.path,ira.store_fname or ira.name)
522 if os.path.exists(fpath):
523 return file(fpath,'rb').read()
524 elif not ira.store_fname:
527 raise IOError(errno.ENOENT, "File not found: %s" % fpath)
529 elif boo.type == 'virtual':
530 raise ValueError('Virtual storage does not support static files')
533 raise TypeError("No %s storage" % boo.type)
535 def set_data(self, cr, uid, id, file_node, data, context=None, fil_obj=None):
537 This function MUST be used from an ir.attachment. It wouldn't make sense
538 to store things persistently for other types (dynamic).
542 boo = self.browse(cr, uid, id, context)
546 ira = self.pool.get('ir.attachment').browse(cr, uid, file_node.file_id, context=context)
549 raise IOError(errno.EREMOTE, 'medium offline')
552 raise IOError(errno.EPERM, "Readonly medium")
554 self._doclog.debug( "Store data for ir.attachment #%d" % ira.id)
557 if boo.type == 'filestore':
560 store_fname = self.__get_random_fname(path)
561 fname = os.path.join(path, store_fname)
562 fp = file(fname, 'wb')
565 self._doclog.debug( "Saved data to %s" % fname)
566 filesize = len(data) # os.stat(fname).st_size
568 # TODO Here, an old file would be left hanging.
571 self._doclog.warning( "Couldn't save data to %s", path, exc_info=True)
572 raise except_orm(_('Error!'), str(e))
573 elif boo.type == 'db':
575 # will that work for huge data?
576 out = psycopg2.Binary(data)
577 cr.execute('UPDATE ir_attachment SET db_datas = %s WHERE id = %s',
578 (out, file_node.file_id))
579 elif boo.type == 'db64':
581 # will that work for huge data?
582 out = base64.encodestring(data)
583 cr.execute('UPDATE ir_attachment SET db_datas = %s WHERE id = %s',
584 (out, file_node.file_id))
585 elif boo.type == 'realstore':
587 path, npath = self.__prepare_realpath(cr, file_node, ira, boo.path, do_create=True)
588 fname = os.path.join(path, npath[-1])
589 fp = file(fname,'wb')
592 self._doclog.debug("Saved data to %s", fname)
593 filesize = len(data) # os.stat(fname).st_size
594 store_fname = os.path.join(*npath)
595 # TODO Here, an old file would be left hanging.
597 self._doclog.warning("Couldn't save data:", exc_info=True)
598 raise except_orm(_('Error!'), str(e))
600 elif boo.type == 'virtual':
601 raise ValueError('Virtual storage does not support static files')
604 raise TypeError("No %s storage" % boo.type)
606 # 2nd phase: store the metadata
613 mime, icont = cntIndex.doIndex(data, ira.datas_fname,
614 ira.file_type or None, fname)
616 self._doclog.debug('Cannot index file:', exc_info=True)
620 icont_u = ustr(icont)
624 # a hack: /assume/ that the calling write operation will not try
625 # to write the fname and size, and update them in the db concurrently.
626 # We cannot use a write() here, because we are already in one.
627 cr.execute('UPDATE ir_attachment SET store_fname = %s, file_size = %s, index_content = %s, file_type = %s WHERE id = %s',
628 (store_fname, filesize, icont_u, mime, file_node.file_id))
629 file_node.content_length = filesize
630 file_node.content_type = mime
632 except Exception, e :
633 self._doclog.warning("Couldn't save data:", exc_info=True)
634 # should we really rollback once we have written the actual data?
635 # at the db case (only), that rollback would be safe
636 raise except_orm(_('Error at doc write!'), str(e))
638 def prepare_unlink(self, cr, uid, storage_bo, fil_bo):
639 """ Before we unlink a file (fil_boo), prepare the list of real
640 files that have to be removed, too. """
642 if not storage_bo.online:
643 raise IOError(errno.EREMOTE, 'medium offline')
645 if storage_bo.readonly:
646 raise IOError(errno.EPERM, "Readonly medium")
648 if storage_bo.type == 'filestore':
649 fname = fil_bo.store_fname
652 path = storage_bo.path
653 return (storage_bo.id, 'file', os.path.join(path, fname))
654 elif storage_bo.type in ('db', 'db64'):
656 elif storage_bo.type == 'realstore':
657 fname = fil_bo.store_fname
660 path = storage_bo.path
661 return ( storage_bo.id, 'file', os.path.join(path, fname))
663 raise TypeError("No %s storage" % storage_bo.type)
665 def do_unlink(self, cr, uid, unres):
666 for id, ktype, fname in unres:
671 self._doclog.warning("Could not remove file %s, please remove manually.", fname, exc_info=True)
673 self._doclog.warning("Unknown unlink key %s" % ktype)
677 def simple_rename(self, cr, uid, file_node, new_name, context=None):
678 """ A preparation for a file rename.
679 It will not affect the database, but merely check and perhaps
680 rename the realstore file.
682 @return the dict of values that can safely be be stored in the db.
684 sbro = self.browse(cr, uid, file_node.storage_id, context=context)
685 assert sbro, "The file #%d didn't provide storage" % file_node.file_id
688 raise IOError(errno.EREMOTE, 'medium offline')
691 raise IOError(errno.EPERM, "Readonly medium")
693 if sbro.type in ('filestore', 'db', 'db64'):
694 # nothing to do for a rename, allow to change the db field
695 return { 'name': new_name, 'datas_fname': new_name }
696 elif sbro.type == 'realstore':
697 ira = self.pool.get('ir.attachment').browse(cr, uid, file_node.file_id, context=context)
699 path, npath = self.__prepare_realpath(cr, file_node, ira, sbro.path, do_create=False)
700 fname = ira.store_fname
703 self._doclog.warning("Trying to rename a non-stored file")
704 if fname != os.path.join(*npath):
705 self._doclog.warning("inconsistency in realstore: %s != %s" , fname, repr(npath))
707 oldpath = os.path.join(path, npath[-1])
708 newpath = os.path.join(path, new_name)
709 os.rename(oldpath, newpath)
710 store_path = npath[:-1]
711 store_path.append(new_name)
712 store_fname = os.path.join(*store_path)
713 return { 'name': new_name, 'datas_fname': new_name, 'store_fname': store_fname }
715 raise TypeError("No %s storage" % sbro.type)
717 def simple_move(self, cr, uid, file_node, ndir_bro, context=None):
718 """ A preparation for a file move.
719 It will not affect the database, but merely check and perhaps
720 move the realstore file.
722 @param ndir_bro a browse object of document.directory, where this
724 @return the dict of values that can safely be be stored in the db.
726 sbro = self.browse(cr, uid, file_node.storage_id, context=context)
727 assert sbro, "The file #%d didn't provide storage" % file_node.file_id
730 raise IOError(errno.EREMOTE, 'medium offline')
733 raise IOError(errno.EPERM, "Readonly medium")
739 psto = par.storage_id.id
742 if file_node.storage_id != psto:
743 self._doclog.debug('Cannot move file %r from %r to %r', file_node, file_node.parent, ndir_bro.name)
744 raise NotImplementedError('Cannot move files between storage media')
746 if sbro.type in ('filestore', 'db', 'db64'):
747 # nothing to do for a rename, allow to change the db field
748 return { 'parent_id': ndir_bro.id }
749 elif sbro.type == 'realstore':
750 raise NotImplementedError("Cannot move in realstore, yet") # TODO
751 fname = fil_bo.store_fname
753 return ValueError("Tried to rename a non-stored file")
755 oldpath = os.path.join(path, fname)
757 for ch in ('*', '|', "\\", '/', ':', '"', '<', '>', '?', '..'):
759 raise ValueError("Invalid char %s in name %s" %(ch, new_name))
761 file_node.fix_ppath(cr, ira)
762 npath = file_node.full_path() or []
764 dpath.extend(npath[:-1])
765 dpath.append(new_name)
766 newpath = os.path.join(*dpath)
767 # print "old, new paths:", oldpath, newpath
768 os.rename(oldpath, newpath)
769 return { 'name': new_name, 'datas_fname': new_name, 'store_fname': new_name }
771 raise TypeError("No %s storage" % sbro.type)