1 # -*- encoding: utf-8 -*-
2 ##############################################################################
4 # OpenERP, Open Source Management Solution
6 # Copyright (C) P. Christeas, 2009, all rights reserved
8 # This program is free software: you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation, either version 3 of the License, or
11 # (at your option) any later version.
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with this program. If not, see <http://www.gnu.org/licenses/>.
21 ##############################################################################
23 from osv import osv, fields
29 from StringIO import StringIO
32 from tools.misc import ustr
33 from tools.translate import _
35 from osv.orm import except_orm
41 from content_index import cntIndex
43 DMS_ROOT_PATH = tools.config.get('document_path', os.path.join(tools.config.get('root_path'), 'filestore'))
46 """ The algorithm of data storage
48 We have to consider 3 cases of data /retrieval/:
49 Given (context,path) we need to access the file (aka. node).
50 given (directory, context), we need one of its children (for listings, views)
51 given (ir.attachment, context), we needs its data and metadata (node).
53 For data /storage/ we have the cases:
54 Have (ir.attachment, context), we modify the file (save, update, rename etc).
55 Have (directory, context), we create a file.
56 Have (path, context), we create or modify a file.
58 Note that in all above cases, we don't explicitly choose the storage media,
59 but always require a context to be present.
61 Note that a node will not always have a corresponding ir.attachment. Dynamic
62 nodes, for once, won't. Their metadata will be computed by the parent storage
65 The algorithm says that in any of the above cases, our first goal is to locate
66 the node for any combination of search criteria. It would be wise NOT to
67 represent each node in the path (like node[/] + node[/dir1] + node[/dir1/dir2])
68 but directly jump to the end node (like node[/dir1/dir2]) whenever possible.
70 We also contain all the parenting loop code in one function. This is intentional,
71 because one day this will be optimized in the db (Pg 8.4).
78 d = [random.choice(string.ascii_letters) for x in xrange(10) ]
82 INVALID_CHARS = {'*':str(hash('*')), '|':str(hash('|')) , "\\":str(hash("\\")), '/':'__', ':':str(hash(':')), '"':str(hash('"')), '<':str(hash('<')) , '>':str(hash('>')) , '?':str(hash('?'))}
85 def create_directory(path):
86 dir_name = random_name()
87 path = os.path.join(path, dir_name)
91 class nodefd_file(nodes.node_descriptor):
92 """ A descriptor to a real file
94 Inheriting directly from file doesn't work, since file exports
95 some read-only attributes (like 'name') that we don't like.
97 def __init__(self, parent, path, mode):
98 nodes.node_descriptor.__init__(self, parent)
99 self.__file = open(path, mode)
100 if mode.endswith('b'):
104 for attr in ('closed', 'read', 'write', 'seek', 'tell'):
105 setattr(self,attr, getattr(self.__file, attr))
108 # TODO: locking in init, close()
109 fname = self.__file.name
112 if self.mode in ('w', 'w+', 'r+'):
113 par = self._get_parent()
114 cr = pooler.get_db(par.context.dbname).cursor()
118 if isinstance(filename, (tuple, list)):
119 filename = '/'.join(filename)
122 mime, icont = cntIndex.doIndex(None, filename=filename,
123 content_type=None, realfname=fname)
125 logging.getLogger('document.storage').debug('Cannot index file:', exc_info=True)
129 icont_u = ustr(icont)
134 fsize = os.stat(fname).st_size
135 cr.execute("UPDATE ir_attachment " \
136 " SET index_content = %s, file_type = %s, " \
139 (icont_u, mime, fsize, par.file_id))
140 par.content_length = fsize
141 par.content_type = mime
145 logging.getLogger('document.storage').warning('Cannot save file indexed content:', exc_info=True)
147 elif self.mode in ('a', 'a+' ):
149 par = self._get_parent()
150 cr = pooler.get_db(par.context.dbname).cursor()
151 fsize = os.stat(fname).st_size
152 cr.execute("UPDATE ir_attachment SET file_size = %s " \
154 (fsize, par.file_id))
155 par.content_length = fsize
159 logging.getLogger('document.storage').warning('Cannot save file appended content:', exc_info=True)
163 class nodefd_db(StringIO, nodes.node_descriptor):
164 """ A descriptor to db data
166 def __init__(self, parent, ira_browse, mode):
167 nodes.node_descriptor.__init__(self, parent)
168 if mode.endswith('b'):
171 if mode in ('r', 'r+'):
172 cr = ira_browse._cr # reuse the cursor of the browse object, just now
173 cr.execute('SELECT db_datas FROM ir_attachment WHERE id = %s',(ira_browse.id,))
174 data = cr.fetchone()[0]
175 StringIO.__init__(self, data)
176 elif mode in ('w', 'w+'):
177 StringIO.__init__(self, None)
178 # at write, we start at 0 (= overwrite), but have the original
179 # data available, in case of a seek()
181 StringIO.__init__(self, None)
183 logging.getLogger('document.storage').error("Incorrect mode %s specified", mode)
184 raise IOError(errno.EINVAL, "Invalid file mode")
188 # we now open a *separate* cursor, to update the data.
189 # FIXME: this may be improved, for concurrency handling
190 par = self._get_parent()
191 # uid = par.context.uid
192 cr = pooler.get_db(par.context.dbname).cursor()
194 if self.mode in ('w', 'w+', 'r+'):
195 data = self.getvalue()
199 if isinstance(filename, (tuple, list)):
200 filename = '/'.join(filename)
203 mime, icont = cntIndex.doIndex(data, filename=filename,
204 content_type=None, realfname=None)
206 logging.getLogger('document.storage').debug('Cannot index file:', exc_info=True)
210 icont_u = ustr(icont)
214 out = psycopg2.Binary(data)
215 cr.execute("UPDATE ir_attachment " \
216 "SET db_datas = %s, file_size=%s, " \
217 " index_content= %s, file_type=%s " \
219 (out, len(data), icont_u, mime, par.file_id))
220 elif self.mode == 'a':
221 data = self.getvalue()
222 out = psycopg2.Binary(data)
223 cr.execute("UPDATE ir_attachment " \
224 "SET db_datas = COALESCE(db_datas,'') || %s, " \
225 " file_size = COALESCE(file_size, 0) + %s " \
227 (out, len(data), par.file_id))
230 logging.getLogger('document.storage').exception('Cannot update db file #%d for close:', par.file_id)
236 class nodefd_db64(StringIO, nodes.node_descriptor):
237 """ A descriptor to db data, base64 (the old way)
239 It stores the data in base64 encoding at the db. Not optimal, but
240 the transparent compression of Postgres will save the day.
242 def __init__(self, parent, ira_browse, mode):
243 nodes.node_descriptor.__init__(self, parent)
244 if mode.endswith('b'):
247 if mode in ('r', 'r+'):
248 StringIO.__init__(self, base64.decodestring(ira_browse.db_datas))
249 elif mode in ('w', 'w+'):
250 StringIO.__init__(self, None)
251 # at write, we start at 0 (= overwrite), but have the original
252 # data available, in case of a seek()
254 StringIO.__init__(self, None)
256 logging.getLogger('document.storage').error("Incorrect mode %s specified", mode)
257 raise IOError(errno.EINVAL, "Invalid file mode")
261 # we now open a *separate* cursor, to update the data.
262 # FIXME: this may be improved, for concurrency handling
263 par = self._get_parent()
264 # uid = par.context.uid
265 cr = pooler.get_db(par.context.dbname).cursor()
267 if self.mode in ('w', 'w+', 'r+'):
268 data = self.getvalue()
272 if isinstance(filename, (tuple, list)):
273 filename = '/'.join(filename)
276 mime, icont = cntIndex.doIndex(data, filename=filename,
277 content_type=None, realfname=None)
279 logging.getLogger('document.storage').debug('Cannot index file:', exc_info=True)
283 icont_u = ustr(icont)
287 cr.execute('UPDATE ir_attachment SET db_datas = %s::bytea, file_size=%s, ' \
288 'index_content = %s, file_type = %s ' \
290 (base64.encodestring(data), len(data), icont_u, mime, par.file_id))
291 elif self.mode == 'a':
292 data = self.getvalue()
293 # Yes, we're obviously using the wrong representation for storing our
294 # data as base64-in-bytea
295 cr.execute("UPDATE ir_attachment " \
296 "SET db_datas = encode( (COALESCE(decode(encode(db_datas,'escape'),'base64'),'') || decode(%s, 'base64')),'base64')::bytea , " \
297 " file_size = COALESCE(file_size, 0) + %s " \
299 (base64.encodestring(data), len(data), par.file_id))
302 logging.getLogger('document.storage').exception('Cannot update db file #%d for close:', par.file_id)
308 class document_storage(osv.osv):
309 """ The primary object for data storage.
310 Each instance of this object is a storage media, in which our application
311 can store contents. The object here controls the behaviour of the storage
313 The referring document.directory-ies will control the placement of data
316 It is a bad idea to have multiple document.storage objects pointing to
317 the same tree of filesystem storage.
319 _name = 'document.storage'
320 _description = 'Storage Media'
321 _doclog = logging.getLogger('document')
324 'name': fields.char('Name', size=64, required=True, select=1),
325 'write_date': fields.datetime('Date Modified', readonly=True),
326 'write_uid': fields.many2one('res.users', 'Last Modification User', readonly=True),
327 'create_date': fields.datetime('Date Created', readonly=True),
328 'create_uid': fields.many2one('res.users', 'Creator', readonly=True),
329 'user_id': fields.many2one('res.users', 'Owner'),
330 'group_ids': fields.many2many('res.groups', 'document_storage_group_rel', 'item_id', 'group_id', 'Groups'),
331 'dir_ids': fields.one2many('document.directory', 'parent_id', 'Directories'),
332 'type': fields.selection([('db', 'Database'), ('filestore', 'Internal File storage'),
333 ('realstore','External file storage'),], 'Type', required=True),
334 'path': fields.char('Path', size=250, select=1, help="For file storage, the root path of the storage"),
335 'online': fields.boolean('Online', help="If not checked, media is currently offline and its contents not available", required=True),
336 'readonly': fields.boolean('Read Only', help="If set, media is for reading only"),
339 def _get_rootpath(self, cr, uid, context=None):
340 return os.path.join(DMS_ROOT_PATH, cr.dbname)
343 'user_id': lambda self, cr, uid, ctx: uid,
344 'online': lambda *args: True,
345 'readonly': lambda *args: False,
346 # Note: the defaults below should only be used ONCE for the default
347 # storage media. All other times, we should create different paths at least.
348 'type': lambda *args: 'filestore',
349 'path': _get_rootpath,
352 # SQL note: a path = NULL doesn't have to be unique.
353 ('path_uniq', 'UNIQUE(type,path)', "The storage path must be unique!")
356 def __get_random_fname(self, path):
358 # This can be improved
359 if os.path.isdir(path):
360 for dirs in os.listdir(path):
361 if os.path.isdir(os.path.join(path, dirs)) and len(os.listdir(os.path.join(path, dirs))) < 4000:
364 flag = flag or create_directory(path)
365 filename = random_name()
366 return os.path.join(flag, filename)
368 def __prepare_realpath(self, cr, file_node, ira, store_path, do_create=True):
369 """ Cleanup path for realstore, create dirs if needed
371 @param file_node the node
372 @param ira ir.attachment browse of the file_node
373 @param store_path the path of the parent storage object, list
374 @param do_create create the directories, if needed
376 @return tuple(path "/var/filestore/real/dir/", npath ['dir','fname.ext'] )
378 file_node.fix_ppath(cr, ira)
379 npath = file_node.full_path() or []
380 # npath may contain empty elements, for root directory etc.
381 npath = filter(lambda x: x is not None, npath)
384 # self._doclog.debug('Npath: %s', npath)
387 raise ValueError("Invalid '..' element in path")
388 for ch in ('*', '|', "\\", '/', ':', '"', '<', '>', '?',):
390 raise ValueError("Invalid char %s in path %s" %(ch, n))
391 dpath = [store_path,]
393 path = os.path.join(*dpath)
394 if not os.path.isdir(path):
395 self._doclog.debug("Create dirs: %s", path)
399 def get_data(self, cr, uid, id, file_node, context=None, fil_obj=None):
400 """ retrieve the contents of some file_node having storage_id = id
401 optionally, fil_obj could point to the browse object of the file
406 boo = self.browse(cr, uid, id, context)
408 raise IOError(errno.EREMOTE, 'medium offline')
413 ira = self.pool.get('ir.attachment').browse(cr, uid, file_node.file_id, context=context)
414 return self.__get_data_3(cr, uid, boo, ira, context)
416 def get_file(self, cr, uid, id, file_node, mode, context=None):
417 """ Return a file-like object for the contents of some node
421 boo = self.browse(cr, uid, id, context)
423 raise IOError(errno.EREMOTE, 'medium offline')
425 if boo.readonly and mode not in ('r', 'rb'):
426 raise IOError(errno.EPERM, "Readonly medium")
428 ira = self.pool.get('ir.attachment').browse(cr, uid, file_node.file_id, context=context)
429 if boo.type == 'filestore':
430 if not ira.store_fname:
431 # On a migrated db, some files may have the wrong storage type
432 # try to fix their directory.
433 if mode in ('r','r+'):
435 self._doclog.warning( "ir.attachment #%d does not have a filename, but is at filestore, fix it!" % ira.id)
436 raise IOError(errno.ENOENT, 'No file can be located')
438 store_fname = self.__get_random_fname(boo.path)
439 cr.execute('UPDATE ir_attachment SET store_fname = %s WHERE id = %s',
440 (store_fname, ira.id))
441 fpath = os.path.join(boo.path, store_fname)
443 fpath = os.path.join(boo.path, ira.store_fname)
444 return nodefd_file(file_node, path=fpath, mode=mode)
446 elif boo.type == 'db':
447 # TODO: we need a better api for large files
448 return nodefd_db(file_node, ira_browse=ira, mode=mode)
450 elif boo.type == 'db64':
451 return nodefd_db64(file_node, ira_browse=ira, mode=mode)
453 elif boo.type == 'realstore':
454 path, npath = self.__prepare_realpath(cr, file_node, ira, boo.path,
455 do_create = (mode[1] in ('w','a')) )
456 fpath = os.path.join(path, npath[-1])
457 if (not os.path.exists(fpath)) and mode[1] == 'r':
458 raise IOError("File not found: %s" % fpath)
459 elif mode[1] in ('w', 'a') and not ira.store_fname:
460 store_fname = os.path.join(*npath)
461 cr.execute('UPDATE ir_attachment SET store_fname = %s WHERE id = %s',
462 (store_fname, ira.id))
463 return nodefd_file(file_node, path=fpath, mode=mode)
465 elif boo.type == 'virtual':
466 raise ValueError('Virtual storage does not support static files')
469 raise TypeError("No %s storage" % boo.type)
471 def __get_data_3(self, cr, uid, boo, ira, context):
472 if boo.type == 'filestore':
473 if not ira.store_fname:
474 # On a migrated db, some files may have the wrong storage type
475 # try to fix their directory.
477 self._doclog.warning( "ir.attachment #%d does not have a filename, but is at filestore, fix it!" % ira.id)
479 fpath = os.path.join(boo.path, ira.store_fname)
480 return file(fpath, 'rb').read()
481 elif boo.type == 'db64':
482 # TODO: we need a better api for large files
484 out = base64.decodestring(ira.db_datas)
488 elif boo.type == 'db':
489 # We do an explicit query, to avoid type transformations.
490 cr.execute('SELECT db_datas FROM ir_attachment WHERE id = %s', (ira.id,))
496 elif boo.type == 'realstore':
497 if not ira.store_fname:
498 # On a migrated db, some files may have the wrong storage type
499 # try to fix their directory.
501 self._doclog.warning("ir.attachment #%d does not have a filename, trying the name." %ira.id)
503 fpath = os.path.join(boo.path,ira.store_fname or ira.name)
504 if os.path.exists(fpath):
505 return file(fpath,'rb').read()
506 elif not ira.store_fname:
509 raise IOError(errno.ENOENT, "File not found: %s" % fpath)
511 elif boo.type == 'virtual':
512 raise ValueError('Virtual storage does not support static files')
515 raise TypeError("No %s storage" % boo.type)
517 def set_data(self, cr, uid, id, file_node, data, context=None, fil_obj=None):
519 This function MUST be used from an ir.attachment. It wouldn't make sense
520 to store things persistently for other types (dynamic).
524 boo = self.browse(cr, uid, id, context)
528 ira = self.pool.get('ir.attachment').browse(cr, uid, file_node.file_id, context=context)
531 raise IOError(errno.EREMOTE, 'medium offline')
534 raise IOError(errno.EPERM, "Readonly medium")
536 self._doclog.debug( "Store data for ir.attachment #%d" % ira.id)
539 if boo.type == 'filestore':
542 store_fname = self.__get_random_fname(path)
543 fname = os.path.join(path, store_fname)
544 fp = file(fname, 'wb')
547 self._doclog.debug( "Saved data to %s" % fname)
548 filesize = len(data) # os.stat(fname).st_size
550 # TODO Here, an old file would be left hanging.
553 self._doclog.warning( "Couldn't save data to %s", path, exc_info=True)
554 raise except_orm(_('Error!'), str(e))
555 elif boo.type == 'db':
557 # will that work for huge data?
558 out = psycopg2.Binary(data)
559 cr.execute('UPDATE ir_attachment SET db_datas = %s WHERE id = %s',
560 (out, file_node.file_id))
561 elif boo.type == 'db64':
563 # will that work for huge data?
564 out = base64.encodestring(data)
565 cr.execute('UPDATE ir_attachment SET db_datas = %s WHERE id = %s',
566 (out, file_node.file_id))
567 elif boo.type == 'realstore':
569 path, npath = self.__prepare_realpath(cr, file_node, ira, boo.path, do_create=True)
570 fname = os.path.join(path, npath[-1])
571 fp = file(fname,'wb')
574 self._doclog.debug("Saved data to %s", fname)
575 filesize = len(data) # os.stat(fname).st_size
576 store_fname = os.path.join(*npath)
577 # TODO Here, an old file would be left hanging.
579 self._doclog.warning("Couldn't save data:", exc_info=True)
580 raise except_orm(_('Error!'), str(e))
582 elif boo.type == 'virtual':
583 raise ValueError('Virtual storage does not support static files')
586 raise TypeError("No %s storage" % boo.type)
588 # 2nd phase: store the metadata
595 mime, icont = cntIndex.doIndex(data, ira.datas_fname,
596 ira.file_type or None, fname)
598 self._doclog.debug('Cannot index file:', exc_info=True)
602 icont_u = ustr(icont)
606 # a hack: /assume/ that the calling write operation will not try
607 # to write the fname and size, and update them in the db concurrently.
608 # We cannot use a write() here, because we are already in one.
609 cr.execute('UPDATE ir_attachment SET store_fname = %s, file_size = %s, index_content = %s, file_type = %s WHERE id = %s',
610 (store_fname, filesize, icont_u, mime, file_node.file_id))
611 file_node.content_length = filesize
612 file_node.content_type = mime
614 except Exception, e :
615 self._doclog.warning("Couldn't save data:", exc_info=True)
616 # should we really rollback once we have written the actual data?
617 # at the db case (only), that rollback would be safe
618 raise except_orm(_('Error at doc write!'), str(e))
620 def prepare_unlink(self, cr, uid, storage_bo, fil_bo):
621 """ Before we unlink a file (fil_boo), prepare the list of real
622 files that have to be removed, too. """
624 if not storage_bo.online:
625 raise IOError(errno.EREMOTE, 'medium offline')
627 if storage_bo.readonly:
628 raise IOError(errno.EPERM, "Readonly medium")
630 if storage_bo.type == 'filestore':
631 fname = fil_bo.store_fname
634 path = storage_bo.path
635 return (storage_bo.id, 'file', os.path.join(path, fname))
636 elif storage_bo.type in ('db', 'db64'):
638 elif storage_bo.type == 'realstore':
639 fname = fil_bo.store_fname
642 path = storage_bo.path
643 return ( storage_bo.id, 'file', os.path.join(path, fname))
645 raise TypeError("No %s storage" % storage_bo.type)
647 def do_unlink(self, cr, uid, unres):
648 for id, ktype, fname in unres:
653 self._doclog.warning("Could not remove file %s, please remove manually.", fname, exc_info=True)
655 self._doclog.warning("Unknown unlink key %s" % ktype)
659 def simple_rename(self, cr, uid, file_node, new_name, context=None):
660 """ A preparation for a file rename.
661 It will not affect the database, but merely check and perhaps
662 rename the realstore file.
664 @return the dict of values that can safely be be stored in the db.
666 sbro = self.browse(cr, uid, file_node.storage_id, context=context)
667 assert sbro, "The file #%d didn't provide storage" % file_node.file_id
670 raise IOError(errno.EREMOTE, 'medium offline')
673 raise IOError(errno.EPERM, "Readonly medium")
675 if sbro.type in ('filestore', 'db', 'db64'):
676 # nothing to do for a rename, allow to change the db field
677 return { 'name': new_name, 'datas_fname': new_name }
678 elif sbro.type == 'realstore':
679 ira = self.pool.get('ir.attachment').browse(cr, uid, file_node.file_id, context=context)
681 path, npath = self.__prepare_realpath(cr, file_node, ira, sbro.path, do_create=False)
682 fname = ira.store_fname
685 self._doclog.warning("Trying to rename a non-stored file")
686 if fname != os.path.join(*npath):
687 self._doclog.warning("inconsistency in realstore: %s != %s" , fname, repr(npath))
689 oldpath = os.path.join(path, npath[-1])
690 newpath = os.path.join(path, new_name)
691 os.rename(oldpath, newpath)
692 store_path = npath[:-1]
693 store_path.append(new_name)
694 store_fname = os.path.join(*store_path)
695 return { 'name': new_name, 'datas_fname': new_name, 'store_fname': store_fname }
697 raise TypeError("No %s storage" % sbro.type)
699 def simple_move(self, cr, uid, file_node, ndir_bro, context=None):
700 """ A preparation for a file move.
701 It will not affect the database, but merely check and perhaps
702 move the realstore file.
704 @param ndir_bro a browse object of document.directory, where this
706 @return the dict of values that can safely be be stored in the db.
708 sbro = self.browse(cr, uid, file_node.storage_id, context=context)
709 assert sbro, "The file #%d didn't provide storage" % file_node.file_id
712 raise IOError(errno.EREMOTE, 'medium offline')
715 raise IOError(errno.EPERM, "Readonly medium")
721 psto = par.storage_id.id
724 if file_node.storage_id != psto:
725 self._doclog.debug('Cannot move file %r from %r to %r', file_node, file_node.parent, ndir_bro.name)
726 raise NotImplementedError('Cannot move files between storage media')
728 if sbro.type in ('filestore', 'db', 'db64'):
729 # nothing to do for a rename, allow to change the db field
730 return { 'parent_id': ndir_bro.id }
731 elif sbro.type == 'realstore':
732 raise NotImplementedError("Cannot move in realstore, yet") # TODO
733 fname = fil_bo.store_fname
735 return ValueError("Tried to rename a non-stored file")
737 oldpath = os.path.join(path, fname)
739 for ch in ('*', '|', "\\", '/', ':', '"', '<', '>', '?', '..'):
741 raise ValueError("Invalid char %s in name %s" %(ch, new_name))
743 file_node.fix_ppath(cr, ira)
744 npath = file_node.full_path() or []
746 dpath.extend(npath[:-1])
747 dpath.append(new_name)
748 newpath = os.path.join(*dpath)
749 # print "old, new paths:", oldpath, newpath
750 os.rename(oldpath, newpath)
751 return { 'name': new_name, 'datas_fname': new_name, 'store_fname': new_name }
753 raise TypeError("No %s storage" % sbro.type)