1 # -*- encoding: utf-8 -*-
2 ##############################################################################
4 # OpenERP, Open Source Management Solution
6 # Copyright (C) P. Christeas, 2009, all rights reserved
8 # This program is free software: you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation, either version 3 of the License, or
11 # (at your option) any later version.
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with this program. If not, see <http://www.gnu.org/licenses/>.
21 ##############################################################################
23 from osv import osv, fields
29 from StringIO import StringIO
32 from tools.misc import ustr
33 from tools.translate import _
35 from osv.orm import except_orm
42 from content_index import cntIndex
44 DMS_ROOT_PATH = tools.config.get('document_path', os.path.join(tools.config.get('root_path'), 'filestore'))
47 """ The algorithm of data storage
49 We have to consider 3 cases of data /retrieval/:
50 Given (context,path) we need to access the file (aka. node).
51 given (directory, context), we need one of its children (for listings, views)
52 given (ir.attachment, context), we needs its data and metadata (node).
54 For data /storage/ we have the cases:
55 Have (ir.attachment, context), we modify the file (save, update, rename etc).
56 Have (directory, context), we create a file.
57 Have (path, context), we create or modify a file.
59 Note that in all above cases, we don't explicitly choose the storage media,
60 but always require a context to be present.
62 Note that a node will not always have a corresponding ir.attachment. Dynamic
63 nodes, for once, won't. Their metadata will be computed by the parent storage
66 The algorithm says that in any of the above cases, our first goal is to locate
67 the node for any combination of search criteria. It would be wise NOT to
68 represent each node in the path (like node[/] + node[/dir1] + node[/dir1/dir2])
69 but directly jump to the end node (like node[/dir1/dir2]) whenever possible.
71 We also contain all the parenting loop code in one function. This is intentional,
72 because one day this will be optimized in the db (Pg 8.4).
79 d = [random.choice(string.ascii_letters) for x in xrange(10) ]
83 INVALID_CHARS = {'*':str(hash('*')), '|':str(hash('|')) , "\\":str(hash("\\")), '/':'__', ':':str(hash(':')), '"':str(hash('"')), '<':str(hash('<')) , '>':str(hash('>')) , '?':str(hash('?'))}
86 def create_directory(path):
87 dir_name = random_name()
88 path = os.path.join(path, dir_name)
92 class nodefd_file(nodes.node_descriptor):
93 """ A descriptor to a real file
95 Inheriting directly from file doesn't work, since file exports
96 some read-only attributes (like 'name') that we don't like.
98 def __init__(self, parent, path, mode):
99 nodes.node_descriptor.__init__(self, parent)
100 self.__file = open(path, mode)
101 if mode.endswith('b'):
105 for attr in ('closed', 'read', 'write', 'seek', 'tell'):
106 setattr(self,attr, getattr(self.__file, attr))
109 # TODO: locking in init, close()
110 fname = self.__file.name
113 if self.mode in ('w', 'w+', 'r+'):
114 par = self._get_parent()
115 cr = pooler.get_db(par.context.dbname).cursor()
119 if isinstance(filename, (tuple, list)):
120 filename = '/'.join(filename)
123 mime, icont = cntIndex.doIndex(None, filename=filename,
124 content_type=None, realfname=fname)
126 logging.getLogger('document.storage').debug('Cannot index file:', exc_info=True)
130 icont_u = ustr(icont)
135 fsize = os.stat(fname).st_size
136 cr.execute("UPDATE ir_attachment " \
137 " SET index_content = %s, file_type = %s, " \
140 (icont_u, mime, fsize, par.file_id))
141 par.content_length = fsize
142 par.content_type = mime
146 logging.getLogger('document.storage').warning('Cannot save file indexed content:', exc_info=True)
148 elif self.mode in ('a', 'a+' ):
150 par = self._get_parent()
151 cr = pooler.get_db(par.context.dbname).cursor()
152 fsize = os.stat(fname).st_size
153 cr.execute("UPDATE ir_attachment SET file_size = %s " \
155 (fsize, par.file_id))
156 par.content_length = fsize
160 logging.getLogger('document.storage').warning('Cannot save file appended content:', exc_info=True)
164 class nodefd_db(StringIO, nodes.node_descriptor):
165 """ A descriptor to db data
167 def __init__(self, parent, ira_browse, mode):
168 nodes.node_descriptor.__init__(self, parent)
169 if mode.endswith('b'):
172 if mode in ('r', 'r+'):
173 cr = ira_browse._cr # reuse the cursor of the browse object, just now
174 cr.execute('SELECT db_datas FROM ir_attachment WHERE id = %s',(ira_browse.id,))
175 data = cr.fetchone()[0]
176 StringIO.__init__(self, data)
177 elif mode in ('w', 'w+'):
178 StringIO.__init__(self, None)
179 # at write, we start at 0 (= overwrite), but have the original
180 # data available, in case of a seek()
182 StringIO.__init__(self, None)
184 logging.getLogger('document.storage').error("Incorrect mode %s specified", mode)
185 raise IOError(errno.EINVAL, "Invalid file mode")
189 # we now open a *separate* cursor, to update the data.
190 # FIXME: this may be improved, for concurrency handling
191 par = self._get_parent()
192 uid = par.context.uid
193 cr = pooler.get_db(par.context.dbname).cursor()
195 if self.mode in ('w', 'w+', 'r+'):
196 data = self.getvalue()
200 if isinstance(filename, (tuple, list)):
201 filename = '/'.join(filename)
204 mime, icont = cntIndex.doIndex(data, filename=filename,
205 content_type=None, realfname=None)
207 logging.getLogger('document.storage').debug('Cannot index file:', exc_info=True)
211 icont_u = ustr(icont)
215 out = psycopg2.Binary(data)
216 cr.execute("UPDATE ir_attachment " \
217 "SET db_datas = %s, file_size=%s, " \
218 " index_content= %s, file_type=%s " \
220 (out, len(data), icont_u, mime, par.file_id))
221 elif self.mode == 'a':
222 data = self.getvalue()
223 out = psycopg2.Binary(data)
224 cr.execute("UPDATE ir_attachment " \
225 "SET db_datas = COALESCE(db_datas,'') || %s, " \
226 " file_size = COALESCE(file_size, 0) + %s " \
228 (out, len(data), par.file_id))
231 logging.getLogger('document.storage').exception('Cannot update db file #%d for close:', par.file_id)
237 class nodefd_db64(StringIO, nodes.node_descriptor):
238 """ A descriptor to db data, base64 (the old way)
240 It stores the data in base64 encoding at the db. Not optimal, but
241 the transparent compression of Postgres will save the day.
243 def __init__(self, parent, ira_browse, mode):
244 nodes.node_descriptor.__init__(self, parent)
245 if mode.endswith('b'):
248 if mode in ('r', 'r+'):
249 StringIO.__init__(self, base64.decodestring(ira_browse.db_datas))
250 elif mode in ('w', 'w+'):
251 StringIO.__init__(self, None)
252 # at write, we start at 0 (= overwrite), but have the original
253 # data available, in case of a seek()
255 StringIO.__init__(self, None)
257 logging.getLogger('document.storage').error("Incorrect mode %s specified", mode)
258 raise IOError(errno.EINVAL, "Invalid file mode")
262 # we now open a *separate* cursor, to update the data.
263 # FIXME: this may be improved, for concurrency handling
264 par = self._get_parent()
265 uid = par.context.uid
266 cr = pooler.get_db(par.context.dbname).cursor()
268 if self.mode in ('w', 'w+', 'r+'):
269 data = self.getvalue()
273 if isinstance(filename, (tuple, list)):
274 filename = '/'.join(filename)
277 mime, icont = cntIndex.doIndex(data, filename=filename,
278 content_type=None, realfname=None)
280 logging.getLogger('document.storage').debug('Cannot index file:', exc_info=True)
284 icont_u = ustr(icont)
288 cr.execute('UPDATE ir_attachment SET db_datas = %s::bytea, file_size=%s, ' \
289 'index_content = %s, file_type = %s ' \
291 (base64.encodestring(out), len(out), icont_u, mime, par.file_id))
292 elif self.mode == 'a':
293 out = self.getvalue()
294 # Yes, we're obviously using the wrong representation for storing our
295 # data as base64-in-bytea
296 cr.execute("UPDATE ir_attachment " \
297 "SET db_datas = encode( (COALESCE(decode(encode(db_datas,'escape'),'base64'),'') || decode(%s, 'base64')),'base64')::bytea , " \
298 " file_size = COALESCE(file_size, 0) + %s " \
300 (base64.encodestring(out), len(out), par.file_id))
303 logging.getLogger('document.storage').exception('Cannot update db file #%d for close:', par.file_id)
309 class document_storage(osv.osv):
310 """ The primary object for data storage.
311 Each instance of this object is a storage media, in which our application
312 can store contents. The object here controls the behaviour of the storage
314 The referring document.directory-ies will control the placement of data
317 It is a bad idea to have multiple document.storage objects pointing to
318 the same tree of filesystem storage.
320 _name = 'document.storage'
321 _description = 'Storage Media'
322 _doclog = logging.getLogger('document')
325 'name': fields.char('Name', size=64, required=True, select=1),
326 'write_date': fields.datetime('Date Modified', readonly=True),
327 'write_uid': fields.many2one('res.users', 'Last Modification User', readonly=True),
328 'create_date': fields.datetime('Date Created', readonly=True),
329 'create_uid': fields.many2one('res.users', 'Creator', readonly=True),
330 'user_id': fields.many2one('res.users', 'Owner'),
331 'group_ids': fields.many2many('res.groups', 'document_storage_group_rel', 'item_id', 'group_id', 'Groups'),
332 'dir_ids': fields.one2many('document.directory', 'parent_id', 'Directories'),
333 'type': fields.selection([('db', 'Database'), ('filestore', 'Internal File storage'),
334 ('realstore','External file storage'),], 'Type', required=True),
335 'path': fields.char('Path', size=250, select=1, help="For file storage, the root path of the storage"),
336 'online': fields.boolean('Online', help="If not checked, media is currently offline and its contents not available", required=True),
337 'readonly': fields.boolean('Read Only', help="If set, media is for reading only"),
340 def _get_rootpath(self, cr, uid, context=None):
341 return os.path.join(DMS_ROOT_PATH, cr.dbname)
344 'user_id': lambda self, cr, uid, ctx: uid,
345 'online': lambda *args: True,
346 'readonly': lambda *args: False,
347 # Note: the defaults below should only be used ONCE for the default
348 # storage media. All other times, we should create different paths at least.
349 'type': lambda *args: 'filestore',
350 'path': _get_rootpath,
353 # SQL note: a path = NULL doesn't have to be unique.
354 ('path_uniq', 'UNIQUE(type,path)', "The storage path must be unique!")
357 def __get_random_fname(self, path):
359 # This can be improved
360 if os.path.isdir(path):
361 for dirs in os.listdir(path):
362 if os.path.isdir(os.path.join(path, dirs)) and len(os.listdir(os.path.join(path, dirs))) < 4000:
365 flag = flag or create_directory(path)
366 filename = random_name()
367 return os.path.join(flag, filename)
369 def __prepare_realpath(self, cr, file_node, ira, store_path, do_create=True):
370 """ Cleanup path for realstore, create dirs if needed
372 @param file_node the node
373 @param ira ir.attachment browse of the file_node
374 @param store_path the path of the parent storage object, list
375 @param do_create create the directories, if needed
377 @return tuple(path "/var/filestore/real/dir/", npath ['dir','fname.ext'] )
379 file_node.fix_ppath(cr, ira)
380 npath = file_node.full_path() or []
381 # npath may contain empty elements, for root directory etc.
382 npath = filter(lambda x: x is not None, npath)
385 # self._doclog.debug('Npath: %s', npath)
388 raise ValueError("Invalid '..' element in path")
389 for ch in ('*', '|', "\\", '/', ':', '"', '<', '>', '?',):
391 raise ValueError("Invalid char %s in path %s" %(ch, n))
392 dpath = [store_path,]
394 path = os.path.join(*dpath)
395 if not os.path.isdir(path):
396 self._doclog.debug("Create dirs: %s", path)
400 def get_data(self, cr, uid, id, file_node, context=None, fil_obj=None):
401 """ retrieve the contents of some file_node having storage_id = id
402 optionally, fil_obj could point to the browse object of the file
407 boo = self.browse(cr, uid, id, context)
409 raise IOError(errno.EREMOTE, 'medium offline')
414 ira = self.pool.get('ir.attachment').browse(cr, uid, file_node.file_id, context=context)
415 return self.__get_data_3(cr, uid, boo, ira, context)
417 def get_file(self, cr, uid, id, file_node, mode, context=None):
418 """ Return a file-like object for the contents of some node
422 boo = self.browse(cr, uid, id, context)
424 raise IOError(errno.EREMOTE, 'medium offline')
426 if boo.readonly and mode not in ('r', 'rb'):
427 raise IOError(errno.EPERM, "Readonly medium")
429 ira = self.pool.get('ir.attachment').browse(cr, uid, file_node.file_id, context=context)
430 if boo.type == 'filestore':
431 if not ira.store_fname:
432 # On a migrated db, some files may have the wrong storage type
433 # try to fix their directory.
434 if mode in ('r','r+'):
436 self._doclog.warning( "ir.attachment #%d does not have a filename, but is at filestore, fix it!" % ira.id)
437 raise IOError(errno.ENOENT, 'No file can be located')
439 store_fname = self.__get_random_fname(boo.path)
440 cr.execute('UPDATE ir_attachment SET store_fname = %s WHERE id = %s',
441 (store_fname, ira.id))
442 fpath = os.path.join(boo.path, store_fname)
444 fpath = os.path.join(boo.path, ira.store_fname)
445 return nodefd_file(file_node, path=fpath, mode=mode)
447 elif boo.type == 'db':
448 # TODO: we need a better api for large files
449 return nodefd_db(file_node, ira_browse=ira, mode=mode)
451 elif boo.type == 'db64':
452 return nodefd_db64(file_node, ira_browse=ira, mode=mode)
454 elif boo.type == 'realstore':
455 path, npath = self.__prepare_realpath(cr, file_node, ira, boo.path,
456 do_create = (mode[1] in ('w','a')) )
457 fpath = os.path.join(path, npath[-1])
458 if (not os.path.exists(fpath)) and mode[1] == 'r':
459 raise IOError("File not found: %s" % fpath)
460 elif mode[1] in ('w', 'a') and not ira.store_fname:
461 store_fname = os.path.join(*npath)
462 cr.execute('UPDATE ir_attachment SET store_fname = %s WHERE id = %s',
463 (store_fname, ira.id))
464 return nodefd_file(file_node, path=fpath, mode=mode)
466 elif boo.type == 'virtual':
467 raise ValueError('Virtual storage does not support static files')
470 raise TypeError("No %s storage" % boo.type)
472 def __get_data_3(self, cr, uid, boo, ira, context):
473 if boo.type == 'filestore':
474 if not ira.store_fname:
475 # On a migrated db, some files may have the wrong storage type
476 # try to fix their directory.
478 self._doclog.warning( "ir.attachment #%d does not have a filename, but is at filestore, fix it!" % ira.id)
480 fpath = os.path.join(boo.path, ira.store_fname)
481 return file(fpath, 'rb').read()
482 elif boo.type == 'db64':
483 # TODO: we need a better api for large files
485 out = base64.decodestring(ira.db_datas)
489 elif boo.type == 'db':
490 # We do an explicit query, to avoid type transformations.
491 cr.execute('SELECT db_datas FROM ir_attachment WHERE id = %s', (ira.id,))
497 elif boo.type == 'realstore':
498 if not ira.store_fname:
499 # On a migrated db, some files may have the wrong storage type
500 # try to fix their directory.
502 self._doclog.warning("ir.attachment #%d does not have a filename, trying the name." %ira.id)
504 fpath = os.path.join(boo.path,ira.store_fname or ira.name)
505 if os.path.exists(fpath):
506 return file(fpath,'rb').read()
507 elif not ira.store_fname:
510 raise IOError(errno.ENOENT, "File not found: %s" % fpath)
512 elif boo.type == 'virtual':
513 raise ValueError('Virtual storage does not support static files')
516 raise TypeError("No %s storage" % boo.type)
518 def set_data(self, cr, uid, id, file_node, data, context=None, fil_obj=None):
520 This function MUST be used from an ir.attachment. It wouldn't make sense
521 to store things persistently for other types (dynamic).
525 boo = self.browse(cr, uid, id, context)
529 ira = self.pool.get('ir.attachment').browse(cr, uid, file_node.file_id, context=context)
532 raise IOError(errno.EREMOTE, 'medium offline')
535 raise IOError(errno.EPERM, "Readonly medium")
537 self._doclog.debug( "Store data for ir.attachment #%d" % ira.id)
540 if boo.type == 'filestore':
543 store_fname = self.__get_random_fname(path)
544 fname = os.path.join(path, store_fname)
545 fp = file(fname, 'wb')
548 self._doclog.debug( "Saved data to %s" % fname)
549 filesize = len(data) # os.stat(fname).st_size
551 # TODO Here, an old file would be left hanging.
554 self._doclog.warning( "Couldn't save data to %s", path, exc_info=True)
555 raise except_orm(_('Error!'), str(e))
556 elif boo.type == 'db':
558 # will that work for huge data?
559 out = psycopg2.Binary(data)
560 cr.execute('UPDATE ir_attachment SET db_datas = %s WHERE id = %s',
561 (out, file_node.file_id))
562 elif boo.type == 'db64':
564 # will that work for huge data?
565 out = base64.encodestring(data)
566 cr.execute('UPDATE ir_attachment SET db_datas = %s WHERE id = %s',
567 (out, file_node.file_id))
568 elif boo.type == 'realstore':
570 path, npath = self.__prepare_realpath(cr, file_node, ira, boo.path, do_create=True)
571 fname = os.path.join(path, npath[-1])
572 fp = file(fname,'wb')
575 self._doclog.debug("Saved data to %s", fname)
576 filesize = len(data) # os.stat(fname).st_size
577 store_fname = os.path.join(*npath)
578 # TODO Here, an old file would be left hanging.
580 self._doclog.warning("Couldn't save data:", exc_info=True)
581 raise except_orm(_('Error!'), str(e))
583 elif boo.type == 'virtual':
584 raise ValueError('Virtual storage does not support static files')
587 raise TypeError("No %s storage" % boo.type)
589 # 2nd phase: store the metadata
596 mime, icont = cntIndex.doIndex(data, ira.datas_fname,
597 ira.file_type or None, fname)
599 self._doclog.debug('Cannot index file:', exc_info=True)
603 icont_u = ustr(icont)
607 # a hack: /assume/ that the calling write operation will not try
608 # to write the fname and size, and update them in the db concurrently.
609 # We cannot use a write() here, because we are already in one.
610 cr.execute('UPDATE ir_attachment SET store_fname = %s, file_size = %s, index_content = %s, file_type = %s WHERE id = %s',
611 (store_fname, filesize, icont_u, mime, file_node.file_id))
612 file_node.content_length = filesize
613 file_node.content_type = mime
615 except Exception, e :
616 self._doclog.warning("Couldn't save data:", exc_info=True)
617 # should we really rollback once we have written the actual data?
618 # at the db case (only), that rollback would be safe
619 raise except_orm(_('Error at doc write!'), str(e))
621 def prepare_unlink(self, cr, uid, storage_bo, fil_bo):
622 """ Before we unlink a file (fil_boo), prepare the list of real
623 files that have to be removed, too. """
625 if not storage_bo.online:
626 raise IOError(errno.EREMOTE, 'medium offline')
628 if storage_bo.readonly:
629 raise IOError(errno.EPERM, "Readonly medium")
631 if storage_bo.type == 'filestore':
632 fname = fil_bo.store_fname
635 path = storage_bo.path
636 return (storage_bo.id, 'file', os.path.join(path, fname))
637 elif storage_bo.type in ('db', 'db64'):
639 elif storage_bo.type == 'realstore':
640 fname = fil_bo.store_fname
643 path = storage_bo.path
644 return ( storage_bo.id, 'file', os.path.join(path, fname))
646 raise TypeError("No %s storage" % storage_bo.type)
648 def do_unlink(self, cr, uid, unres):
649 for id, ktype, fname in unres:
654 self._doclog.warning("Could not remove file %s, please remove manually.", fname, exc_info=True)
656 self._doclog.warning("Unknown unlink key %s" % ktype)
660 def simple_rename(self, cr, uid, file_node, new_name, context=None):
661 """ A preparation for a file rename.
662 It will not affect the database, but merely check and perhaps
663 rename the realstore file.
665 @return the dict of values that can safely be be stored in the db.
667 sbro = self.browse(cr, uid, file_node.storage_id, context=context)
668 assert sbro, "The file #%d didn't provide storage" % file_node.file_id
671 raise IOError(errno.EREMOTE, 'medium offline')
674 raise IOError(errno.EPERM, "Readonly medium")
676 if sbro.type in ('filestore', 'db', 'db64'):
677 # nothing to do for a rename, allow to change the db field
678 return { 'name': new_name, 'datas_fname': new_name }
679 elif sbro.type == 'realstore':
680 ira = self.pool.get('ir.attachment').browse(cr, uid, file_node.file_id, context=context)
682 path, npath = self.__prepare_realpath(cr, file_node, ira, sbro.path, do_create=False)
683 fname = ira.store_fname
686 self._doclog.warning("Trying to rename a non-stored file")
687 if fname != os.path.join(*npath):
688 self._doclog.warning("inconsistency in realstore: %s != %s" , fname, repr(npath))
690 oldpath = os.path.join(path, npath[-1])
691 newpath = os.path.join(path, new_name)
692 os.rename(oldpath, newpath)
693 store_path = npath[:-1]
694 store_path.append(new_name)
695 store_fname = os.path.join(*store_path)
696 return { 'name': new_name, 'datas_fname': new_name, 'store_fname': store_fname }
698 raise TypeError("No %s storage" % boo.type)
700 def simple_move(self, cr, uid, file_node, ndir_bro, context=None):
701 """ A preparation for a file move.
702 It will not affect the database, but merely check and perhaps
703 move the realstore file.
705 @param ndir_bro a browse object of document.directory, where this
707 @return the dict of values that can safely be be stored in the db.
709 sbro = self.browse(cr, uid, file_node.storage_id, context=context)
710 assert sbro, "The file #%d didn't provide storage" % file_node.file_id
713 raise IOError(errno.EREMOTE, 'medium offline')
716 raise IOError(errno.EPERM, "Readonly medium")
722 psto = par.storage_id.id
725 if file_node.storage_id != psto:
726 self._doclog.debug('Cannot move file %r from %r to %r', file_node, file_node.parent, ndir_bro.name)
727 raise NotImplementedError('Cannot move files between storage media')
729 if sbro.type in ('filestore', 'db', 'db64'):
730 # nothing to do for a rename, allow to change the db field
731 return { 'parent_id': ndir_bro.id }
732 elif sbro.type == 'realstore':
733 raise NotImplementedError("Cannot move in realstore, yet") # TODO
734 fname = fil_bo.store_fname
736 return ValueError("Tried to rename a non-stored file")
737 path = storage_bo.path
738 oldpath = os.path.join(path, fname)
740 for ch in ('*', '|', "\\", '/', ':', '"', '<', '>', '?', '..'):
742 raise ValueError("Invalid char %s in name %s" %(ch, new_name))
744 file_node.fix_ppath(cr, ira)
745 npath = file_node.full_path() or []
747 dpath.extend(npath[:-1])
748 dpath.append(new_name)
749 newpath = os.path.join(*dpath)
750 # print "old, new paths:", oldpath, newpath
751 os.rename(oldpath, newpath)
752 return { 'name': new_name, 'datas_fname': new_name, 'store_fname': new_name }
754 raise TypeError("No %s storage" % boo.type)