1 # -*- encoding: utf-8 -*-
2 ##############################################################################
4 # OpenERP, Open Source Management Solution
6 # Copyright (C) P. Christeas, 2009, all rights reserved
8 # This program is free software: you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation, either version 3 of the License, or
11 # (at your option) any later version.
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with this program. If not, see <http://www.gnu.org/licenses/>.
21 ##############################################################################
23 from osv import osv, fields
29 from StringIO import StringIO
32 from tools.misc import ustr
33 from tools.translate import _
35 from osv.orm import except_orm
42 from content_index import cntIndex
44 DMS_ROOT_PATH = tools.config.get('document_path', os.path.join(tools.config.get('root_path'), 'filestore'))
47 """ The algorithm of data storage
49 We have to consider 3 cases of data /retrieval/:
50 Given (context,path) we need to access the file (aka. node).
51 given (directory, context), we need one of its children (for listings, views)
52 given (ir.attachment, context), we needs its data and metadata (node).
54 For data /storage/ we have the cases:
55 Have (ir.attachment, context), we modify the file (save, update, rename etc).
56 Have (directory, context), we create a file.
57 Have (path, context), we create or modify a file.
59 Note that in all above cases, we don't explicitly choose the storage media,
60 but always require a context to be present.
62 Note that a node will not always have a corresponding ir.attachment. Dynamic
63 nodes, for once, won't. Their metadata will be computed by the parent storage
66 The algorithm says that in any of the above cases, our first goal is to locate
67 the node for any combination of search criteria. It would be wise NOT to
68 represent each node in the path (like node[/] + node[/dir1] + node[/dir1/dir2])
69 but directly jump to the end node (like node[/dir1/dir2]) whenever possible.
71 We also contain all the parenting loop code in one function. This is intentional,
72 because one day this will be optimized in the db (Pg 8.4).
79 d = [random.choice(string.ascii_letters) for x in xrange(10) ]
83 INVALID_CHARS = {'*':str(hash('*')), '|':str(hash('|')) , "\\":str(hash("\\")), '/':'__', ':':str(hash(':')), '"':str(hash('"')), '<':str(hash('<')) , '>':str(hash('>')) , '?':str(hash('?'))}
86 def create_directory(path):
87 dir_name = random_name()
88 path = os.path.join(path, dir_name)
92 class nodefd_file(nodes.node_descriptor):
93 """ A descriptor to a real file
95 Inheriting directly from file doesn't work, since file exports
96 some read-only attributes (like 'name') that we don't like.
98 def __init__(self, parent, path, mode):
99 nodes.node_descriptor.__init__(self, parent)
100 self.__file = open(path, mode)
101 if mode.endswith('b'):
105 for attr in ('closed', 'read', 'write', 'seek', 'tell'):
106 setattr(self,attr, getattr(self.__file, attr))
109 # TODO: locking in init, close()
110 fname = self.__file.name
113 if self.mode in ('w', 'w+', 'r+'):
114 par = self._get_parent()
115 cr = pooler.get_db(par.context.dbname).cursor()
119 if isinstance(filename, (tuple, list)):
120 filename = '/'.join(filename)
123 mime, icont = cntIndex.doIndex(None, filename=filename,
124 content_type=None, realfname=fname)
126 logging.getLogger('document.storage').debug('Cannot index file:', exc_info=True)
130 icont_u = ustr(icont)
135 fsize = os.stat(fname).st_size
136 cr.execute("UPDATE ir_attachment " \
137 " SET index_content = %s, file_type = %s, " \
140 (icont_u, mime, fsize, par.file_id))
141 par.content_length = fsize
142 par.content_type = mime
146 logging.getLogger('document.storage').warning('Cannot save file indexed content:', exc_info=True)
148 elif self.mode in ('a', 'a+' ):
150 par = self._get_parent()
151 cr = pooler.get_db(par.context.dbname).cursor()
152 fsize = os.stat(fname).st_size
153 cr.execute("UPDATE ir_attachment SET file_size = %s " \
155 (fsize, par.file_id))
156 par.content_length = fsize
160 logging.getLogger('document.storage').warning('Cannot save file appended content:', exc_info=True)
164 class nodefd_db(StringIO, nodes.node_descriptor):
165 """ A descriptor to db data
167 def __init__(self, parent, ira_browse, mode):
168 nodes.node_descriptor.__init__(self, parent)
169 if mode.endswith('b'):
172 if mode in ('r', 'r+'):
173 cr = ira_browse._cr # reuse the cursor of the browse object, just now
174 cr.execute('SELECT db_datas FROM ir_attachment WHERE id = %s',(ira_browse.id,))
175 data = cr.fetchone()[0]
176 StringIO.__init__(self, data)
177 elif mode in ('w', 'w+'):
178 StringIO.__init__(self, None)
179 # at write, we start at 0 (= overwrite), but have the original
180 # data available, in case of a seek()
182 StringIO.__init__(self, None)
184 logging.getLogger('document.storage').error("Incorrect mode %s specified", mode)
185 raise IOError(errno.EINVAL, "Invalid file mode")
189 # we now open a *separate* cursor, to update the data.
190 # FIXME: this may be improved, for concurrency handling
191 par = self._get_parent()
192 uid = par.context.uid
193 cr = pooler.get_db(par.context.dbname).cursor()
195 if self.mode in ('w', 'w+', 'r+'):
196 data = self.getvalue()
200 if isinstance(filename, (tuple, list)):
201 filename = '/'.join(filename)
204 mime, icont = cntIndex.doIndex(data, filename=filename,
205 content_type=None, realfname=None)
207 logging.getLogger('document.storage').debug('Cannot index file:', exc_info=True)
211 icont_u = ustr(icont)
215 out = psycopg2.Binary(data)
216 cr.execute("UPDATE ir_attachment " \
217 "SET db_datas = %s, file_size=%s, " \
218 " index_content= %s, file_type=%s " \
220 (out, len(data), icont_u, mime, par.file_id))
221 elif self.mode == 'a':
222 data = self.getvalue()
223 out = psycopg2.Binary(data)
224 cr.execute("UPDATE ir_attachment " \
225 "SET db_datas = COALESCE(db_datas,'') || %s, " \
226 " file_size = COALESCE(file_size, 0) + %s " \
228 (out, len(data), par.file_id))
231 logging.getLogger('document.storage').exception('Cannot update db file #%d for close:', par.file_id)
237 class nodefd_db64(StringIO, nodes.node_descriptor):
238 """ A descriptor to db data, base64 (the old way)
240 It stores the data in base64 encoding at the db. Not optimal, but
241 the transparent compression of Postgres will save the day.
243 def __init__(self, parent, ira_browse, mode):
244 nodes.node_descriptor.__init__(self, parent)
245 if mode.endswith('b'):
248 if mode in ('r', 'r+'):
249 StringIO.__init__(self, base64.decodestring(ira_browse.db_datas))
250 elif mode in ('w', 'w+'):
251 StringIO.__init__(self, None)
252 # at write, we start at 0 (= overwrite), but have the original
253 # data available, in case of a seek()
255 StringIO.__init__(self, None)
257 logging.getLogger('document.storage').error("Incorrect mode %s specified", mode)
258 raise IOError(errno.EINVAL, "Invalid file mode")
262 # we now open a *separate* cursor, to update the data.
263 # FIXME: this may be improved, for concurrency handling
264 par = self._get_parent()
265 uid = par.context.uid
266 cr = pooler.get_db(par.context.dbname).cursor()
268 if self.mode in ('w', 'w+', 'r+'):
269 data = self.getvalue()
273 if isinstance(filename, (tuple, list)):
274 filename = '/'.join(filename)
277 mime, icont = cntIndex.doIndex(data, filename=filename,
278 content_type=None, realfname=None)
280 logging.getLogger('document.storage').debug('Cannot index file:', exc_info=True)
284 icont_u = ustr(icont)
288 cr.execute('UPDATE ir_attachment SET db_datas = %s::bytea, file_size=%s, ' \
289 'index_content = %s, file_type = %s ' \
291 (base64.encodestring(out), len(out), icont_u, mime, par.file_id))
292 elif self.mode == 'a':
293 out = self.getvalue()
294 # Yes, we're obviously using the wrong representation for storing our
295 # data as base64-in-bytea
296 cr.execute("UPDATE ir_attachment " \
297 "SET db_datas = encode( (COALESCE(decode(encode(db_datas,'escape'),'base64'),'') || decode(%s, 'base64')),'base64')::bytea , " \
298 " file_size = COALESCE(file_size, 0) + %s " \
300 (base64.encodestring(out), len(out), par.file_id))
303 logging.getLogger('document.storage').exception('Cannot update db file #%d for close:', par.file_id)
309 class document_storage(osv.osv):
310 """ The primary object for data storage.
311 Each instance of this object is a storage media, in which our application
312 can store contents. The object here controls the behaviour of the storage
314 The referring document.directory-ies will control the placement of data
317 It is a bad idea to have multiple document.storage objects pointing to
318 the same tree of filesystem storage.
320 _name = 'document.storage'
321 _description = 'Storage Media'
322 _doclog = logging.getLogger('document')
325 'name': fields.char('Name', size=64, required=True, select=1),
326 'write_date': fields.datetime('Date Modified', readonly=True),
327 'write_uid': fields.many2one('res.users', 'Last Modification User', readonly=True),
328 'create_date': fields.datetime('Date Created', readonly=True),
329 'create_uid': fields.many2one('res.users', 'Creator', readonly=True),
330 'user_id': fields.many2one('res.users', 'Owner'),
331 'group_ids': fields.many2many('res.groups', 'document_storage_group_rel', 'item_id', 'group_id', 'Groups'),
332 'dir_ids': fields.one2many('document.directory', 'parent_id', 'Directories'),
333 'type': fields.selection([('db', 'Database'), ('filestore', 'Internal File storage'),
334 ('realstore','External file storage'),], 'Type', required=True),
335 'path': fields.char('Path', size=250, select=1, help="For file storage, the root path of the storage"),
336 'online': fields.boolean('Online', help="If not checked, media is currently offline and its contents not available", required=True),
337 'readonly': fields.boolean('Read Only', help="If set, media is for reading only"),
340 def _get_rootpath(self, cr, uid, context=None):
341 return os.path.join(DMS_ROOT_PATH, cr.dbname)
344 'user_id': lambda self, cr, uid, ctx: uid,
345 'online': lambda *args: True,
346 'readonly': lambda *args: False,
347 # Note: the defaults below should only be used ONCE for the default
348 # storage media. All other times, we should create different paths at least.
349 'type': lambda *args: 'filestore',
350 'path': _get_rootpath,
353 # SQL note: a path = NULL doesn't have to be unique.
354 ('path_uniq', 'UNIQUE(type,path)', "The storage path must be unique!")
357 def __get_random_fname(self, path):
359 # This can be improved
360 if os.path.isdir(path):
361 for dirs in os.listdir(path):
362 if os.path.isdir(os.path.join(path, dirs)) and len(os.listdir(os.path.join(path, dirs))) < 4000:
365 flag = flag or create_directory(path)
366 filename = random_name()
367 return os.path.join(flag, filename)
369 def __prepare_realpath(self, cr, file_node, ira, store_path, do_create=True):
370 """ Cleanup path for realstore, create dirs if needed
372 @param file_node the node
373 @param ira ir.attachment browse of the file_node
374 @param store_path the path of the parent storage object, list
375 @param do_create create the directories, if needed
377 @return tuple(path "/var/filestore/real/dir/", npath ['dir','fname.ext'] )
379 file_node.fix_ppath(cr, ira)
380 npath = file_node.full_path() or []
381 # npath may contain empty elements, for root directory etc.
382 npath = filter(lambda x: x is not None, npath)
385 # self._doclog.debug('Npath: %s', npath)
388 raise ValueError("Invalid '..' element in path")
389 for ch in ('*', '|', "\\", '/', ':', '"', '<', '>', '?',):
391 raise ValueError("Invalid char %s in path %s" %(ch, n))
392 dpath = [store_path,]
394 path = os.path.join(*dpath)
395 if not os.path.isdir(path):
396 self._doclog.debug("Create dirs: %s", path)
400 def get_data(self, cr, uid, id, file_node, context=None, fil_obj=None):
401 """ retrieve the contents of some file_node having storage_id = id
402 optionally, fil_obj could point to the browse object of the file
407 boo = self.browse(cr, uid, id, context)
411 ira = self.pool.get('ir.attachment').browse(cr, uid, file_node.file_id, context=context)
412 return self.__get_data_3(cr, uid, boo, ira, context)
414 def get_file(self, cr, uid, id, file_node, mode, context=None):
415 """ Return a file-like object for the contents of some node
419 boo = self.browse(cr, uid, id, context)
421 raise RuntimeError('media offline')
423 ira = self.pool.get('ir.attachment').browse(cr, uid, file_node.file_id, context=context)
424 if boo.type == 'filestore':
425 if not ira.store_fname:
426 # On a migrated db, some files may have the wrong storage type
427 # try to fix their directory.
428 if mode in ('r','r+'):
430 self._doclog.warning( "ir.attachment #%d does not have a filename, but is at filestore, fix it!" % ira.id)
431 raise IOError(errno.ENOENT, 'No file can be located')
433 store_fname = self.__get_random_fname(boo.path)
434 cr.execute('UPDATE ir_attachment SET store_fname = %s WHERE id = %s',
435 (store_fname, ira.id))
436 fpath = os.path.join(boo.path, store_fname)
438 fpath = os.path.join(boo.path, ira.store_fname)
439 return nodefd_file(file_node, path=fpath, mode=mode)
441 elif boo.type == 'db':
442 # TODO: we need a better api for large files
443 return nodefd_db(file_node, ira_browse=ira, mode=mode)
445 elif boo.type == 'db64':
446 return nodefd_db64(file_node, ira_browse=ira, mode=mode)
448 elif boo.type == 'realstore':
449 path, npath = self.__prepare_realpath(cr, file_node, ira, boo.path,
450 do_create = (mode[1] in ('w','a')) )
451 fpath = os.path.join(path, npath[-1])
452 if (not os.path.exists(fpath)) and mode[1] == 'r':
453 raise IOError("File not found: %s" % fpath)
454 elif mode[1] in ('w', 'a') and not ira.store_fname:
455 store_fname = os.path.join(*npath)
456 cr.execute('UPDATE ir_attachment SET store_fname = %s WHERE id = %s',
457 (store_fname, ira.id))
458 return nodefd_file(file_node, path=fpath, mode=mode)
460 elif boo.type == 'virtual':
461 raise ValueError('Virtual storage does not support static files')
464 raise TypeError("No %s storage" % boo.type)
466 def __get_data_3(self, cr, uid, boo, ira, context):
468 raise RuntimeError('media offline')
469 if boo.type == 'filestore':
470 if not ira.store_fname:
471 # On a migrated db, some files may have the wrong storage type
472 # try to fix their directory.
474 self._doclog.warning( "ir.attachment #%d does not have a filename, but is at filestore, fix it!" % ira.id)
476 fpath = os.path.join(boo.path, ira.store_fname)
477 return file(fpath, 'rb').read()
478 elif boo.type == 'db64':
479 # TODO: we need a better api for large files
481 out = base64.decodestring(ira.db_datas)
485 elif boo.type == 'db':
486 # We do an explicit query, to avoid type transformations.
487 cr.execute('SELECT db_datas FROM ir_attachment WHERE id = %s', (ira.id,))
493 elif boo.type == 'realstore':
494 if not ira.store_fname:
495 # On a migrated db, some files may have the wrong storage type
496 # try to fix their directory.
498 self._doclog.warning("ir.attachment #%d does not have a filename, trying the name." %ira.id)
500 fpath = os.path.join(boo.path,ira.store_fname or ira.name)
501 if os.path.exists(fpath):
502 return file(fpath,'rb').read()
503 elif not ira.store_fname:
506 raise IOError("File not found: %s" % fpath)
508 elif boo.type == 'virtual':
509 raise ValueError('Virtual storage does not support static files')
512 raise TypeError("No %s storage" % boo.type)
514 def set_data(self, cr, uid, id, file_node, data, context=None, fil_obj=None):
516 This function MUST be used from an ir.attachment. It wouldn't make sense
517 to store things persistently for other types (dynamic).
521 boo = self.browse(cr, uid, id, context)
525 ira = self.pool.get('ir.attachment').browse(cr, uid, file_node.file_id, context=context)
528 raise RuntimeError('media offline')
529 self._doclog.debug( "Store data for ir.attachment #%d" % ira.id)
532 if boo.type == 'filestore':
535 store_fname = self.__get_random_fname(path)
536 fname = os.path.join(path, store_fname)
537 fp = file(fname, 'wb')
540 self._doclog.debug( "Saved data to %s" % fname)
541 filesize = len(data) # os.stat(fname).st_size
543 # TODO Here, an old file would be left hanging.
546 self._doclog.warning( "Couldn't save data to %s", path, exc_info=True)
547 raise except_orm(_('Error!'), str(e))
548 elif boo.type == 'db':
550 # will that work for huge data?
551 out = psycopg2.Binary(data)
552 cr.execute('UPDATE ir_attachment SET db_datas = %s WHERE id = %s',
553 (out, file_node.file_id))
554 elif boo.type == 'db64':
556 # will that work for huge data?
557 out = base64.encodestring(data)
558 cr.execute('UPDATE ir_attachment SET db_datas = %s WHERE id = %s',
559 (out, file_node.file_id))
560 elif boo.type == 'realstore':
562 path, npath = self.__prepare_realpath(cr, file_node, ira, boo.path, do_create=True)
563 fname = os.path.join(path, npath[-1])
564 fp = file(fname,'wb')
567 self._doclog.debug("Saved data to %s", fname)
568 filesize = len(data) # os.stat(fname).st_size
569 store_fname = os.path.join(*npath)
570 # TODO Here, an old file would be left hanging.
572 self._doclog.warning("Couldn't save data:", exc_info=True)
573 raise except_orm(_('Error!'), str(e))
575 elif boo.type == 'virtual':
576 raise ValueError('Virtual storage does not support static files')
579 raise TypeError("No %s storage" % boo.type)
581 # 2nd phase: store the metadata
588 mime, icont = cntIndex.doIndex(data, ira.datas_fname,
589 ira.file_type or None, fname)
591 self._doclog.debug('Cannot index file:', exc_info=True)
595 icont_u = ustr(icont)
599 # a hack: /assume/ that the calling write operation will not try
600 # to write the fname and size, and update them in the db concurrently.
601 # We cannot use a write() here, because we are already in one.
602 cr.execute('UPDATE ir_attachment SET store_fname = %s, file_size = %s, index_content = %s, file_type = %s WHERE id = %s',
603 (store_fname, filesize, icont_u, mime, file_node.file_id))
604 file_node.content_length = filesize
605 file_node.content_type = mime
607 except Exception, e :
608 self._doclog.warning("Couldn't save data:", exc_info=True)
609 # should we really rollback once we have written the actual data?
610 # at the db case (only), that rollback would be safe
611 raise except_orm(_('Error at doc write!'), str(e))
613 def prepare_unlink(self, cr, uid, storage_bo, fil_bo):
614 """ Before we unlink a file (fil_boo), prepare the list of real
615 files that have to be removed, too. """
617 if not storage_bo.online:
618 raise RuntimeError('media offline')
620 if storage_bo.type == 'filestore':
621 fname = fil_bo.store_fname
624 path = storage_bo.path
625 return (storage_bo.id, 'file', os.path.join(path, fname))
626 elif storage_bo.type in ('db', 'db64'):
628 elif storage_bo.type == 'realstore':
629 fname = fil_bo.store_fname
632 path = storage_bo.path
633 return ( storage_bo.id, 'file', os.path.join(path, fname))
635 raise TypeError("No %s storage" % storage_bo.type)
637 def do_unlink(self, cr, uid, unres):
638 for id, ktype, fname in unres:
643 self._doclog.warning("Could not remove file %s, please remove manually.", fname, exc_info=True)
645 self._doclog.warning("Unknown unlink key %s" % ktype)
649 def simple_rename(self, cr, uid, file_node, new_name, context=None):
650 """ A preparation for a file rename.
651 It will not affect the database, but merely check and perhaps
652 rename the realstore file.
654 @return the dict of values that can safely be be stored in the db.
656 sbro = self.browse(cr, uid, file_node.storage_id, context=context)
657 assert sbro, "The file #%d didn't provide storage" % file_node.file_id
659 if sbro.type in ('filestore', 'db', 'db64'):
660 # nothing to do for a rename, allow to change the db field
661 return { 'name': new_name, 'datas_fname': new_name }
662 elif sbro.type == 'realstore':
663 ira = self.pool.get('ir.attachment').browse(cr, uid, file_node.file_id, context=context)
665 path, npath = self.__prepare_realpath(cr, file_node, ira, sbro.path, do_create=False)
666 fname = ira.store_fname
669 self._doclog.warning("Trying to rename a non-stored file")
670 if fname != os.path.join(*npath):
671 self._doclog.warning("inconsistency in realstore: %s != %s" , fname, repr(npath))
673 oldpath = os.path.join(path, npath[-1])
674 newpath = os.path.join(path, new_name)
675 os.rename(oldpath, newpath)
676 store_path = npath[:-1]
677 store_path.append(new_name)
678 store_fname = os.path.join(*store_path)
679 return { 'name': new_name, 'datas_fname': new_name, 'store_fname': store_fname }
681 raise TypeError("No %s storage" % boo.type)
683 def simple_move(self, cr, uid, file_node, ndir_bro, context=None):
684 """ A preparation for a file move.
685 It will not affect the database, but merely check and perhaps
686 move the realstore file.
688 @param ndir_bro a browse object of document.directory, where this
690 @return the dict of values that can safely be be stored in the db.
692 sbro = self.browse(cr, uid, file_node.storage_id, context=context)
693 assert sbro, "The file #%d didn't provide storage" % file_node.file_id
699 psto = par.storage_id.id
702 if file_node.storage_id != psto:
703 self._doclog.debug('Cannot move file %r from %r to %r', file_node, file_node.parent, ndir_bro.name)
704 raise NotImplementedError('Cannot move files between storage media')
706 if sbro.type in ('filestore', 'db', 'db64'):
707 # nothing to do for a rename, allow to change the db field
708 return { 'parent_id': ndir_bro.id }
709 elif sbro.type == 'realstore':
710 raise NotImplementedError("Cannot move in realstore, yet") # TODO
711 fname = fil_bo.store_fname
713 return ValueError("Tried to rename a non-stored file")
714 path = storage_bo.path
715 oldpath = os.path.join(path, fname)
717 for ch in ('*', '|', "\\", '/', ':', '"', '<', '>', '?', '..'):
719 raise ValueError("Invalid char %s in name %s" %(ch, new_name))
721 file_node.fix_ppath(cr, ira)
722 npath = file_node.full_path() or []
724 dpath.extend(npath[:-1])
725 dpath.append(new_name)
726 newpath = os.path.join(*dpath)
727 # print "old, new paths:", oldpath, newpath
728 os.rename(oldpath, newpath)
729 return { 'name': new_name, 'datas_fname': new_name, 'store_fname': new_name }
731 raise TypeError("No %s storage" % boo.type)