1 # -*- encoding: utf-8 -*-
2 ##############################################################################
4 # OpenERP, Open Source Management Solution
6 # Copyright (C) P. Christeas, 2009, all rights reserved
8 # This program is free software: you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation, either version 3 of the License, or
11 # (at your option) any later version.
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with this program. If not, see <http://www.gnu.org/licenses/>.
21 ##############################################################################
23 from osv import osv, fields
30 from StringIO import StringIO
33 from tools.misc import ustr
34 from tools.translate import _
36 from osv.orm import except_orm
42 from content_index import cntIndex
44 DMS_ROOT_PATH = tools.config.get('document_path', os.path.join(tools.config.get('root_path'), 'filestore'))
47 """ The algorithm of data storage
49 We have to consider 3 cases of data /retrieval/:
50 Given (context,path) we need to access the file (aka. node).
51 given (directory, context), we need one of its children (for listings, views)
52 given (ir.attachment, context), we need its data and metadata (node).
54 For data /storage/ we have the cases:
55 Have (ir.attachment, context), we modify the file (save, update, rename etc).
56 Have (directory, context), we create a file.
57 Have (path, context), we create or modify a file.
59 Note that in all above cases, we don't explicitly choose the storage media,
60 but always require a context to be present.
62 Note that a node will not always have a corresponding ir.attachment. Dynamic
63 nodes, for once, won't. Their metadata will be computed by the parent storage
66 The algorithm says that in any of the above cases, our first goal is to locate
67 the node for any combination of search criteria. It would be wise NOT to
68 represent each node in the path (like node[/] + node[/dir1] + node[/dir1/dir2])
69 but directly jump to the end node (like node[/dir1/dir2]) whenever possible.
71 We also contain all the parenting loop code in one function. This is intentional,
72 because one day this will be optimized in the db (Pg 8.4).
79 d = [random.choice(string.ascii_letters) for x in xrange(10) ]
83 INVALID_CHARS = {'*':str(hash('*')), '|':str(hash('|')) , "\\":str(hash("\\")), '/':'__', ':':str(hash(':')), '"':str(hash('"')), '<':str(hash('<')) , '>':str(hash('>')) , '?':str(hash('?'))}
86 def create_directory(path):
87 dir_name = random_name()
88 path = os.path.join(path, dir_name)
92 class nodefd_file(nodes.node_descriptor):
93 """ A descriptor to a real file
95 Inheriting directly from file doesn't work, since file exports
96 some read-only attributes (like 'name') that we don't like.
98 def __init__(self, parent, path, mode):
99 nodes.node_descriptor.__init__(self, parent)
100 self.__file = open(path, mode)
101 if mode.endswith('b'):
104 self._size = os.stat(path).st_size
106 for attr in ('closed', 'read', 'write', 'seek', 'tell', 'next'):
107 setattr(self,attr, getattr(self.__file, attr))
116 # TODO: locking in init, close()
117 fname = self.__file.name
120 if self.mode in ('w', 'w+', 'r+'):
121 par = self._get_parent()
122 cr = pooler.get_db(par.context.dbname).cursor()
126 if isinstance(filename, (tuple, list)):
127 filename = '/'.join(filename)
130 mime, icont = cntIndex.doIndex(None, filename=filename,
131 content_type=None, realfname=fname)
133 logging.getLogger('document.storage').debug('Cannot index file:', exc_info=True)
137 icont_u = ustr(icont)
142 fsize = os.stat(fname).st_size
143 cr.execute("UPDATE ir_attachment " \
144 " SET index_content = %s, file_type = %s, " \
147 (icont_u, mime, fsize, par.file_id))
148 par.content_length = fsize
149 par.content_type = mime
153 logging.getLogger('document.storage').warning('Cannot save file indexed content:', exc_info=True)
155 elif self.mode in ('a', 'a+' ):
157 par = self._get_parent()
158 cr = pooler.get_db(par.context.dbname).cursor()
159 fsize = os.stat(fname).st_size
160 cr.execute("UPDATE ir_attachment SET file_size = %s " \
162 (fsize, par.file_id))
163 par.content_length = fsize
167 logging.getLogger('document.storage').warning('Cannot save file appended content:', exc_info=True)
171 class nodefd_db(StringIO, nodes.node_descriptor):
172 """ A descriptor to db data
174 def __init__(self, parent, ira_browse, mode):
175 nodes.node_descriptor.__init__(self, parent)
177 if mode.endswith('b'):
180 if mode in ('r', 'r+'):
181 cr = ira_browse._cr # reuse the cursor of the browse object, just now
182 cr.execute('SELECT db_datas FROM ir_attachment WHERE id = %s',(ira_browse.id,))
183 data = cr.fetchone()[0]
184 self._size = len(data)
185 StringIO.__init__(self, data)
186 elif mode in ('w', 'w+'):
187 StringIO.__init__(self, None)
188 # at write, we start at 0 (= overwrite), but have the original
189 # data available, in case of a seek()
191 StringIO.__init__(self, None)
193 logging.getLogger('document.storage').error("Incorrect mode %s specified", mode)
194 raise IOError(errno.EINVAL, "Invalid file mode")
201 # we now open a *separate* cursor, to update the data.
202 # FIXME: this may be improved, for concurrency handling
203 par = self._get_parent()
204 # uid = par.context.uid
205 cr = pooler.get_db(par.context.dbname).cursor()
207 if self.mode in ('w', 'w+', 'r+'):
208 data = self.getvalue()
212 if isinstance(filename, (tuple, list)):
213 filename = '/'.join(filename)
216 mime, icont = cntIndex.doIndex(data, filename=filename,
217 content_type=None, realfname=None)
219 logging.getLogger('document.storage').debug('Cannot index file:', exc_info=True)
223 icont_u = ustr(icont)
227 out = psycopg2.Binary(data)
228 cr.execute("UPDATE ir_attachment " \
229 "SET db_datas = %s, file_size=%s, " \
230 " index_content= %s, file_type=%s " \
232 (out, len(data), icont_u, mime, par.file_id))
233 elif self.mode == 'a':
234 data = self.getvalue()
235 out = psycopg2.Binary(data)
236 cr.execute("UPDATE ir_attachment " \
237 "SET db_datas = COALESCE(db_datas,'') || %s, " \
238 " file_size = COALESCE(file_size, 0) + %s " \
240 (out, len(data), par.file_id))
243 logging.getLogger('document.storage').exception('Cannot update db file #%d for close:', par.file_id)
249 class nodefd_db64(StringIO, nodes.node_descriptor):
250 """ A descriptor to db data, base64 (the old way)
252 It stores the data in base64 encoding at the db. Not optimal, but
253 the transparent compression of Postgres will save the day.
255 def __init__(self, parent, ira_browse, mode):
256 nodes.node_descriptor.__init__(self, parent)
258 if mode.endswith('b'):
261 if mode in ('r', 'r+'):
262 data = base64.decodestring(ira_browse.db_datas)
263 self._size = len(data)
264 StringIO.__init__(self, data)
265 elif mode in ('w', 'w+'):
266 StringIO.__init__(self, None)
267 # at write, we start at 0 (= overwrite), but have the original
268 # data available, in case of a seek()
270 StringIO.__init__(self, None)
272 logging.getLogger('document.storage').error("Incorrect mode %s specified", mode)
273 raise IOError(errno.EINVAL, "Invalid file mode")
280 # we now open a *separate* cursor, to update the data.
281 # FIXME: this may be improved, for concurrency handling
282 par = self._get_parent()
283 # uid = par.context.uid
284 cr = pooler.get_db(par.context.dbname).cursor()
286 if self.mode in ('w', 'w+', 'r+'):
287 data = self.getvalue()
291 if isinstance(filename, (tuple, list)):
292 filename = '/'.join(filename)
295 mime, icont = cntIndex.doIndex(data, filename=filename,
296 content_type=None, realfname=None)
298 logging.getLogger('document.storage').debug('Cannot index file:', exc_info=True)
302 icont_u = ustr(icont)
306 cr.execute('UPDATE ir_attachment SET db_datas = %s::bytea, file_size=%s, ' \
307 'index_content = %s, file_type = %s ' \
309 (base64.encodestring(data), len(data), icont_u, mime, par.file_id))
310 elif self.mode == 'a':
311 data = self.getvalue()
312 # Yes, we're obviously using the wrong representation for storing our
313 # data as base64-in-bytea
314 cr.execute("UPDATE ir_attachment " \
315 "SET db_datas = encode( (COALESCE(decode(encode(db_datas,'escape'),'base64'),'') || decode(%s, 'base64')),'base64')::bytea , " \
316 " file_size = COALESCE(file_size, 0) + %s " \
318 (base64.encodestring(data), len(data), par.file_id))
321 logging.getLogger('document.storage').exception('Cannot update db file #%d for close:', par.file_id)
327 class document_storage(osv.osv):
328 """ The primary object for data storage.
329 Each instance of this object is a storage media, in which our application
330 can store contents. The object here controls the behaviour of the storage
332 The referring document.directory-ies will control the placement of data
335 It is a bad idea to have multiple document.storage objects pointing to
336 the same tree of filesystem storage.
338 _name = 'document.storage'
339 _description = 'Storage Media'
340 _doclog = logging.getLogger('document')
343 'name': fields.char('Name', size=64, required=True, select=1),
344 'write_date': fields.datetime('Date Modified', readonly=True),
345 'write_uid': fields.many2one('res.users', 'Last Modification User', readonly=True),
346 'create_date': fields.datetime('Date Created', readonly=True),
347 'create_uid': fields.many2one('res.users', 'Creator', readonly=True),
348 'user_id': fields.many2one('res.users', 'Owner'),
349 'group_ids': fields.many2many('res.groups', 'document_storage_group_rel', 'item_id', 'group_id', 'Groups'),
350 'dir_ids': fields.one2many('document.directory', 'parent_id', 'Directories'),
351 'type': fields.selection([('db', 'Database'), ('filestore', 'Internal File storage'),
352 ('realstore','External file storage'),], 'Type', required=True),
353 'path': fields.char('Path', size=250, select=1, help="For file storage, the root path of the storage"),
354 'online': fields.boolean('Online', help="If not checked, media is currently offline and its contents not available", required=True),
355 'readonly': fields.boolean('Read Only', help="If set, media is for reading only"),
358 def _get_rootpath(self, cr, uid, context=None):
359 return os.path.join(DMS_ROOT_PATH, cr.dbname)
362 'user_id': lambda self, cr, uid, ctx: uid,
363 'online': lambda *args: True,
364 'readonly': lambda *args: False,
365 # Note: the defaults below should only be used ONCE for the default
366 # storage media. All other times, we should create different paths at least.
367 'type': lambda *args: 'filestore',
368 'path': _get_rootpath,
371 # SQL note: a path = NULL doesn't have to be unique.
372 ('path_uniq', 'UNIQUE(type,path)', "The storage path must be unique!")
375 def __get_random_fname(self, path):
377 # This can be improved
378 if os.path.isdir(path):
379 for dirs in os.listdir(path):
380 if os.path.isdir(os.path.join(path, dirs)) and len(os.listdir(os.path.join(path, dirs))) < 4000:
383 flag = flag or create_directory(path)
384 filename = random_name()
385 return os.path.join(flag, filename)
387 def __prepare_realpath(self, cr, file_node, ira, store_path, do_create=True):
388 """ Cleanup path for realstore, create dirs if needed
390 @param file_node the node
391 @param ira ir.attachment browse of the file_node
392 @param store_path the path of the parent storage object, list
393 @param do_create create the directories, if needed
395 @return tuple(path "/var/filestore/real/dir/", npath ['dir','fname.ext'] )
397 file_node.fix_ppath(cr, ira)
398 npath = file_node.full_path() or []
399 # npath may contain empty elements, for root directory etc.
400 npath = filter(lambda x: x is not None, npath)
403 # self._doclog.debug('Npath: %s', npath)
406 raise ValueError("Invalid '..' element in path")
407 for ch in ('*', '|', "\\", '/', ':', '"', '<', '>', '?',):
409 raise ValueError("Invalid char %s in path %s" %(ch, n))
410 dpath = [store_path,]
412 path = os.path.join(*dpath)
413 if not os.path.isdir(path):
414 self._doclog.debug("Create dirs: %s", path)
418 def get_data(self, cr, uid, id, file_node, context=None, fil_obj=None):
419 """ retrieve the contents of some file_node having storage_id = id
420 optionally, fil_obj could point to the browse object of the file
425 boo = self.browse(cr, uid, id, context)
427 raise IOError(errno.EREMOTE, 'medium offline')
432 ira = self.pool.get('ir.attachment').browse(cr, uid, file_node.file_id, context=context)
433 return self.__get_data_3(cr, uid, boo, ira, context)
435 def get_file(self, cr, uid, id, file_node, mode, context=None):
436 """ Return a file-like object for the contents of some node
440 boo = self.browse(cr, uid, id, context)
442 raise IOError(errno.EREMOTE, 'medium offline')
444 if boo.readonly and mode not in ('r', 'rb'):
445 raise IOError(errno.EPERM, "Readonly medium")
447 ira = self.pool.get('ir.attachment').browse(cr, uid, file_node.file_id, context=context)
448 if boo.type == 'filestore':
449 if not ira.store_fname:
450 # On a migrated db, some files may have the wrong storage type
451 # try to fix their directory.
452 if mode in ('r','r+'):
454 self._doclog.warning( "ir.attachment #%d does not have a filename, but is at filestore, fix it!" % ira.id)
455 raise IOError(errno.ENOENT, 'No file can be located')
457 store_fname = self.__get_random_fname(boo.path)
458 cr.execute('UPDATE ir_attachment SET store_fname = %s WHERE id = %s',
459 (store_fname, ira.id))
460 fpath = os.path.join(boo.path, store_fname)
462 fpath = os.path.join(boo.path, ira.store_fname)
463 return nodefd_file(file_node, path=fpath, mode=mode)
465 elif boo.type == 'db':
466 # TODO: we need a better api for large files
467 return nodefd_db(file_node, ira_browse=ira, mode=mode)
469 elif boo.type == 'db64':
470 return nodefd_db64(file_node, ira_browse=ira, mode=mode)
472 elif boo.type == 'realstore':
473 path, npath = self.__prepare_realpath(cr, file_node, ira, boo.path,
474 do_create = (mode[0] in ('w','a')) )
475 fpath = os.path.join(path, npath[-1])
476 if (not os.path.exists(fpath)) and mode[0] == 'r':
477 raise IOError("File not found: %s" % fpath)
478 elif mode[0] in ('w', 'a') and not ira.store_fname:
479 store_fname = os.path.join(*npath)
480 cr.execute('UPDATE ir_attachment SET store_fname = %s WHERE id = %s',
481 (store_fname, ira.id))
482 return nodefd_file(file_node, path=fpath, mode=mode)
484 elif boo.type == 'virtual':
485 raise ValueError('Virtual storage does not support static files')
488 raise TypeError("No %s storage" % boo.type)
490 def __get_data_3(self, cr, uid, boo, ira, context):
491 if boo.type == 'filestore':
492 if not ira.store_fname:
493 # On a migrated db, some files may have the wrong storage type
494 # try to fix their directory.
496 self._doclog.warning( "ir.attachment #%d does not have a filename, but is at filestore, fix it!" % ira.id)
498 fpath = os.path.join(boo.path, ira.store_fname)
499 return file(fpath, 'rb').read()
500 elif boo.type == 'db64':
501 # TODO: we need a better api for large files
503 out = base64.decodestring(ira.db_datas)
507 elif boo.type == 'db':
508 # We do an explicit query, to avoid type transformations.
509 cr.execute('SELECT db_datas FROM ir_attachment WHERE id = %s', (ira.id,))
515 elif boo.type == 'realstore':
516 if not ira.store_fname:
517 # On a migrated db, some files may have the wrong storage type
518 # try to fix their directory.
520 self._doclog.warning("ir.attachment #%d does not have a filename, trying the name." %ira.id)
522 fpath = os.path.join(boo.path,ira.store_fname or ira.name)
523 if os.path.exists(fpath):
524 return file(fpath,'rb').read()
525 elif not ira.store_fname:
528 raise IOError(errno.ENOENT, "File not found: %s" % fpath)
530 elif boo.type == 'virtual':
531 raise ValueError('Virtual storage does not support static files')
534 raise TypeError("No %s storage" % boo.type)
536 def set_data(self, cr, uid, id, file_node, data, context=None, fil_obj=None):
538 This function MUST be used from an ir.attachment. It wouldn't make sense
539 to store things persistently for other types (dynamic).
543 boo = self.browse(cr, uid, id, context)
547 ira = self.pool.get('ir.attachment').browse(cr, uid, file_node.file_id, context=context)
550 raise IOError(errno.EREMOTE, 'medium offline')
553 raise IOError(errno.EPERM, "Readonly medium")
555 self._doclog.debug( "Store data for ir.attachment #%d" % ira.id)
558 if boo.type == 'filestore':
561 store_fname = self.__get_random_fname(path)
562 fname = os.path.join(path, store_fname)
563 fp = file(fname, 'wb')
566 self._doclog.debug( "Saved data to %s" % fname)
567 filesize = len(data) # os.stat(fname).st_size
569 # TODO Here, an old file would be left hanging.
572 self._doclog.warning( "Couldn't save data to %s", path, exc_info=True)
573 raise except_orm(_('Error!'), str(e))
574 elif boo.type == 'db':
576 # will that work for huge data?
577 out = psycopg2.Binary(data)
578 cr.execute('UPDATE ir_attachment SET db_datas = %s WHERE id = %s',
579 (out, file_node.file_id))
580 elif boo.type == 'db64':
582 # will that work for huge data?
583 out = base64.encodestring(data)
584 cr.execute('UPDATE ir_attachment SET db_datas = %s WHERE id = %s',
585 (out, file_node.file_id))
586 elif boo.type == 'realstore':
588 path, npath = self.__prepare_realpath(cr, file_node, ira, boo.path, do_create=True)
589 fname = os.path.join(path, npath[-1])
590 fp = file(fname,'wb')
593 self._doclog.debug("Saved data to %s", fname)
594 filesize = len(data) # os.stat(fname).st_size
595 store_fname = os.path.join(*npath)
596 # TODO Here, an old file would be left hanging.
598 self._doclog.warning("Couldn't save data:", exc_info=True)
599 raise except_orm(_('Error!'), str(e))
601 elif boo.type == 'virtual':
602 raise ValueError('Virtual storage does not support static files')
605 raise TypeError("No %s storage" % boo.type)
607 # 2nd phase: store the metadata
614 mime, icont = cntIndex.doIndex(data, ira.datas_fname,
615 ira.file_type or None, fname)
617 self._doclog.debug('Cannot index file:', exc_info=True)
621 icont_u = ustr(icont)
625 # a hack: /assume/ that the calling write operation will not try
626 # to write the fname and size, and update them in the db concurrently.
627 # We cannot use a write() here, because we are already in one.
628 cr.execute('UPDATE ir_attachment SET store_fname = %s, file_size = %s, index_content = %s, file_type = %s WHERE id = %s',
629 (store_fname, filesize, icont_u, mime, file_node.file_id))
630 file_node.content_length = filesize
631 file_node.content_type = mime
633 except Exception, e :
634 self._doclog.warning("Couldn't save data:", exc_info=True)
635 # should we really rollback once we have written the actual data?
636 # at the db case (only), that rollback would be safe
637 raise except_orm(_('Error at doc write!'), str(e))
639 def prepare_unlink(self, cr, uid, storage_bo, fil_bo):
640 """ Before we unlink a file (fil_boo), prepare the list of real
641 files that have to be removed, too. """
643 if not storage_bo.online:
644 raise IOError(errno.EREMOTE, 'medium offline')
646 if storage_bo.readonly:
647 raise IOError(errno.EPERM, "Readonly medium")
649 if storage_bo.type == 'filestore':
650 fname = fil_bo.store_fname
653 path = storage_bo.path
654 return (storage_bo.id, 'file', os.path.join(path, fname))
655 elif storage_bo.type in ('db', 'db64'):
657 elif storage_bo.type == 'realstore':
658 fname = fil_bo.store_fname
661 path = storage_bo.path
662 return ( storage_bo.id, 'file', os.path.join(path, fname))
664 raise TypeError("No %s storage" % storage_bo.type)
666 def do_unlink(self, cr, uid, unres):
667 for id, ktype, fname in unres:
672 self._doclog.warning("Could not remove file %s, please remove manually.", fname, exc_info=True)
674 self._doclog.warning("Unknown unlink key %s" % ktype)
678 def simple_rename(self, cr, uid, file_node, new_name, context=None):
679 """ A preparation for a file rename.
680 It will not affect the database, but merely check and perhaps
681 rename the realstore file.
683 @return the dict of values that can safely be be stored in the db.
685 sbro = self.browse(cr, uid, file_node.storage_id, context=context)
686 assert sbro, "The file #%d didn't provide storage" % file_node.file_id
689 raise IOError(errno.EREMOTE, 'medium offline')
692 raise IOError(errno.EPERM, "Readonly medium")
694 if sbro.type in ('filestore', 'db', 'db64'):
695 # nothing to do for a rename, allow to change the db field
696 return { 'name': new_name, 'datas_fname': new_name }
697 elif sbro.type == 'realstore':
698 ira = self.pool.get('ir.attachment').browse(cr, uid, file_node.file_id, context=context)
700 path, npath = self.__prepare_realpath(cr, file_node, ira, sbro.path, do_create=False)
701 fname = ira.store_fname
704 self._doclog.warning("Trying to rename a non-stored file")
705 if fname != os.path.join(*npath):
706 self._doclog.warning("inconsistency in realstore: %s != %s" , fname, repr(npath))
708 oldpath = os.path.join(path, npath[-1])
709 newpath = os.path.join(path, new_name)
710 os.rename(oldpath, newpath)
711 store_path = npath[:-1]
712 store_path.append(new_name)
713 store_fname = os.path.join(*store_path)
714 return { 'name': new_name, 'datas_fname': new_name, 'store_fname': store_fname }
716 raise TypeError("No %s storage" % sbro.type)
718 def simple_move(self, cr, uid, file_node, ndir_bro, context=None):
719 """ A preparation for a file move.
720 It will not affect the database, but merely check and perhaps
721 move the realstore file.
723 @param ndir_bro a browse object of document.directory, where this
725 @return the dict of values that can safely be be stored in the db.
727 sbro = self.browse(cr, uid, file_node.storage_id, context=context)
728 assert sbro, "The file #%d didn't provide storage" % file_node.file_id
731 raise IOError(errno.EREMOTE, 'medium offline')
734 raise IOError(errno.EPERM, "Readonly medium")
740 psto = par.storage_id.id
743 if file_node.storage_id != psto:
744 self._doclog.debug('Cannot move file %r from %r to %r', file_node, file_node.parent, ndir_bro.name)
745 raise NotImplementedError('Cannot move files between storage media')
747 if sbro.type in ('filestore', 'db', 'db64'):
748 # nothing to do for a rename, allow to change the db field
749 return { 'parent_id': ndir_bro.id }
750 elif sbro.type == 'realstore':
751 ira = self.pool.get('ir.attachment').browse(cr, uid, file_node.file_id, context=context)
753 path, opath = self.__prepare_realpath(cr, file_node, ira, sbro.path, do_create=False)
754 fname = ira.store_fname
757 self._doclog.warning("Trying to rename a non-stored file")
758 if fname != os.path.join(*opath):
759 self._doclog.warning("inconsistency in realstore: %s != %s" , fname, repr(opath))
761 oldpath = os.path.join(path, opath[-1])
763 npath = [sbro.path,] + (ndir_bro.get_full_path() or [])
764 npath = filter(lambda x: x is not None, npath)
765 newdir = os.path.join(*npath)
766 if not os.path.isdir(newdir):
767 self._doclog.debug("Must create dir %s", newdir)
769 npath.append(opath[-1])
770 newpath = os.path.join(*npath)
772 self._doclog.debug("Going to move %s from %s to %s", opath[-1], oldpath, newpath)
773 shutil.move(oldpath, newpath)
775 store_path = npath[1:] + [opath[-1],]
776 store_fname = os.path.join(*store_path)
778 return { 'store_fname': store_fname }
780 raise TypeError("No %s storage" % sbro.type)