1 # -*- encoding: utf-8 -*-
2 ##############################################################################
4 # OpenERP, Open Source Management Solution
6 # Copyright (C) P. Christeas, 2009, all rights reserved
8 # This program is free software: you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation, either version 3 of the License, or
11 # (at your option) any later version.
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with this program. If not, see <http://www.gnu.org/licenses/>.
21 ##############################################################################
23 from osv import osv, fields
29 from StringIO import StringIO
32 from tools.misc import ustr
33 from tools.translate import _
35 from osv.orm import except_orm
42 from content_index import cntIndex
44 DMS_ROOT_PATH = tools.config.get('document_path', os.path.join(tools.config.get('root_path'), 'filestore'))
47 """ The algorithm of data storage
49 We have to consider 3 cases of data /retrieval/:
50 Given (context,path) we need to access the file (aka. node).
51 given (directory, context), we need one of its children (for listings, views)
52 given (ir.attachment, context), we needs its data and metadata (node).
54 For data /storage/ we have the cases:
55 Have (ir.attachment, context), we modify the file (save, update, rename etc).
56 Have (directory, context), we create a file.
57 Have (path, context), we create or modify a file.
59 Note that in all above cases, we don't explicitly choose the storage media,
60 but always require a context to be present.
62 Note that a node will not always have a corresponding ir.attachment. Dynamic
63 nodes, for once, won't. Their metadata will be computed by the parent storage
66 The algorithm says that in any of the above cases, our first goal is to locate
67 the node for any combination of search criteria. It would be wise NOT to
68 represent each node in the path (like node[/] + node[/dir1] + node[/dir1/dir2])
69 but directly jump to the end node (like node[/dir1/dir2]) whenever possible.
71 We also contain all the parenting loop code in one function. This is intentional,
72 because one day this will be optimized in the db (Pg 8.4).
79 d = [random.choice(string.ascii_letters) for x in xrange(10) ]
83 INVALID_CHARS = {'*':str(hash('*')), '|':str(hash('|')) , "\\":str(hash("\\")), '/':'__', ':':str(hash(':')), '"':str(hash('"')), '<':str(hash('<')) , '>':str(hash('>')) , '?':str(hash('?'))}
86 def create_directory(path):
87 dir_name = random_name()
88 path = os.path.join(path, dir_name)
92 class nodefd_file(nodes.node_descriptor):
93 """ A descriptor to a real file
95 Inheriting directly from file doesn't work, since file exports
96 some read-only attributes (like 'name') that we don't like.
98 def __init__(self, parent, path, mode):
99 nodes.node_descriptor.__init__(self, parent)
100 self.__file = open(path, mode)
101 if mode.endswith('b'):
105 for attr in ('closed', 'read', 'write', 'seek', 'tell'):
106 setattr(self,attr, getattr(self.__file, attr))
109 # TODO: locking in init, close()
110 fname = self.__file.name
113 if self.mode in ('w', 'w+', 'r+'):
114 par = self._get_parent()
115 cr = pooler.get_db(par.context.dbname).cursor()
119 if isinstance(filename, (tuple, list)):
120 filename = '/'.join(filename)
123 mime, icont = cntIndex.doIndex(None, filename=filename,
124 content_type=None, realfname=fname)
126 logging.getLogger('document.storage').debug('Cannot index file:', exc_info=True)
130 icont_u = ustr(icont)
135 fsize = os.stat(fname).st_size
136 cr.execute("UPDATE ir_attachment " \
137 " SET index_content = %s, file_type = %s, " \
140 (icont_u, mime, fsize, par.file_id))
141 par.content_length = fsize
142 par.content_type = mime
146 logging.getLogger('document.storage').warning('Cannot save file indexed content:', exc_info=True)
148 elif self.mode in ('a', 'a+' ):
150 par = self._get_parent()
151 cr = pooler.get_db(par.context.dbname).cursor()
152 fsize = os.stat(fname).st_size
153 cr.execute("UPDATE ir_attachment SET file_size = %s " \
155 (fsize, par.file_id))
156 par.content_length = fsize
157 par.content_type = mime
161 logging.getLogger('document.storage').warning('Cannot save file appended content:', exc_info=True)
165 class nodefd_db(StringIO, nodes.node_descriptor):
166 """ A descriptor to db data
168 def __init__(self, parent, ira_browse, mode):
169 nodes.node_descriptor.__init__(self, parent)
170 if mode.endswith('b'):
173 if mode in ('r', 'r+'):
174 cr = ira_browse._cr # reuse the cursor of the browse object, just now
175 cr.execute('SELECT db_datas FROM ir_attachment WHERE id = %s',(ira_browse.id,))
176 data = cr.fetchone()[0]
177 StringIO.__init__(self, data)
178 elif mode in ('w', 'w+'):
179 StringIO.__init__(self, None)
180 # at write, we start at 0 (= overwrite), but have the original
181 # data available, in case of a seek()
183 StringIO.__init__(self, None)
185 logging.getLogger('document.storage').error("Incorrect mode %s specified", mode)
186 raise IOError(errno.EINVAL, "Invalid file mode")
190 # we now open a *separate* cursor, to update the data.
191 # FIXME: this may be improved, for concurrency handling
192 par = self._get_parent()
193 uid = par.context.uid
194 cr = pooler.get_db(par.context.dbname).cursor()
196 if self.mode in ('w', 'w+', 'r+'):
197 data = self.getvalue()
201 if isinstance(filename, (tuple, list)):
202 filename = '/'.join(filename)
205 mime, icont = cntIndex.doIndex(data, filename=filename,
206 content_type=None, realfname=None)
208 logging.getLogger('document.storage').debug('Cannot index file:', exc_info=True)
212 icont_u = ustr(icont)
216 out = psycopg2.Binary(data)
217 cr.execute("UPDATE ir_attachment " \
218 "SET db_datas = %s, file_size=%s, " \
219 " index_content= %s, file_type=%s " \
221 (out, len(data), icont_u, mime, par.file_id))
222 elif self.mode == 'a':
223 data = self.getvalue()
224 out = psycopg2.Binary(data)
225 cr.execute("UPDATE ir_attachment " \
226 "SET db_datas = COALESCE(db_datas,'') || %s, " \
227 " file_size = COALESCE(file_size, 0) + %s " \
229 (out, len(data), par.file_id))
232 logging.getLogger('document.storage').exception('Cannot update db file #%d for close:', par.file_id)
238 class nodefd_db64(StringIO, nodes.node_descriptor):
239 """ A descriptor to db data, base64 (the old way)
241 It stores the data in base64 encoding at the db. Not optimal, but
242 the transparent compression of Postgres will save the day.
244 def __init__(self, parent, ira_browse, mode):
245 nodes.node_descriptor.__init__(self, parent)
246 if mode.endswith('b'):
249 if mode in ('r', 'r+'):
250 StringIO.__init__(self, base64.decodestring(ira_browse.db_datas))
251 elif mode in ('w', 'w+'):
252 StringIO.__init__(self, None)
253 # at write, we start at 0 (= overwrite), but have the original
254 # data available, in case of a seek()
256 StringIO.__init__(self, None)
258 logging.getLogger('document.storage').error("Incorrect mode %s specified", mode)
259 raise IOError(errno.EINVAL, "Invalid file mode")
263 # we now open a *separate* cursor, to update the data.
264 # FIXME: this may be improved, for concurrency handling
265 par = self._get_parent()
266 uid = par.context.uid
267 cr = pooler.get_db(par.context.dbname).cursor()
269 if self.mode in ('w', 'w+', 'r+'):
270 data = self.getvalue()
274 if isinstance(filename, (tuple, list)):
275 filename = '/'.join(filename)
278 mime, icont = cntIndex.doIndex(data, filename=filename,
279 content_type=None, realfname=None)
281 logging.getLogger('document.storage').debug('Cannot index file:', exc_info=True)
285 icont_u = ustr(icont)
289 cr.execute('UPDATE ir_attachment SET db_datas = %s::bytea, file_size=%s, ' \
290 'index_content = %s, file_type = %s ' \
292 (base64.encodestring(out), len(out), icont_u, mime, par.file_id))
293 elif self.mode == 'a':
294 out = self.getvalue()
295 # Yes, we're obviously using the wrong representation for storing our
296 # data as base64-in-bytea
297 cr.execute("UPDATE ir_attachment " \
298 "SET db_datas = encode( (COALESCE(decode(encode(db_datas,'escape'),'base64'),'') || decode(%s, 'base64')),'base64')::bytea , " \
299 " file_size = COALESCE(file_size, 0) + %s " \
301 (base64.encodestring(out), len(out), par.file_id))
304 logging.getLogger('document.storage').exception('Cannot update db file #%d for close:', par.file_id)
310 class document_storage(osv.osv):
311 """ The primary object for data storage.
312 Each instance of this object is a storage media, in which our application
313 can store contents. The object here controls the behaviour of the storage
315 The referring document.directory-ies will control the placement of data
318 It is a bad idea to have multiple document.storage objects pointing to
319 the same tree of filesystem storage.
321 _name = 'document.storage'
322 _description = 'Storage Media'
323 _doclog = logging.getLogger('document')
326 'name': fields.char('Name', size=64, required=True, select=1),
327 'write_date': fields.datetime('Date Modified', readonly=True),
328 'write_uid': fields.many2one('res.users', 'Last Modification User', readonly=True),
329 'create_date': fields.datetime('Date Created', readonly=True),
330 'create_uid': fields.many2one('res.users', 'Creator', readonly=True),
331 'user_id': fields.many2one('res.users', 'Owner'),
332 'group_ids': fields.many2many('res.groups', 'document_storage_group_rel', 'item_id', 'group_id', 'Groups'),
333 'dir_ids': fields.one2many('document.directory', 'parent_id', 'Directories'),
334 'type': fields.selection([('db', 'Database'), ('filestore', 'Internal File storage'),
335 ('realstore','External file storage'),], 'Type', required=True),
336 'path': fields.char('Path', size=250, select=1, help="For file storage, the root path of the storage"),
337 'online': fields.boolean('Online', help="If not checked, media is currently offline and its contents not available", required=True),
338 'readonly': fields.boolean('Read Only', help="If set, media is for reading only"),
341 def _get_rootpath(self, cr, uid, context=None):
342 return os.path.join(DMS_ROOT_PATH, cr.dbname)
345 'user_id': lambda self, cr, uid, ctx: uid,
346 'online': lambda *args: True,
347 'readonly': lambda *args: False,
348 # Note: the defaults below should only be used ONCE for the default
349 # storage media. All other times, we should create different paths at least.
350 'type': lambda *args: 'filestore',
351 'path': _get_rootpath,
354 # SQL note: a path = NULL doesn't have to be unique.
355 ('path_uniq', 'UNIQUE(type,path)', "The storage path must be unique!")
358 def __get_random_fname(self, path):
360 # This can be improved
361 if os.path.isdir(path):
362 for dirs in os.listdir(path):
363 if os.path.isdir(os.path.join(path, dirs)) and len(os.listdir(os.path.join(path, dirs))) < 4000:
366 flag = flag or create_directory(path)
367 filename = random_name()
368 return os.path.join(flag, filename)
370 def get_data(self, cr, uid, id, file_node, context=None, fil_obj=None):
371 """ retrieve the contents of some file_node having storage_id = id
372 optionally, fil_obj could point to the browse object of the file
377 boo = self.browse(cr, uid, id, context)
381 ira = self.pool.get('ir.attachment').browse(cr, uid, file_node.file_id, context=context)
382 return self.__get_data_3(cr, uid, boo, ira, context)
384 def get_file(self, cr, uid, id, file_node, mode, context=None):
385 """ Return a file-like object for the contents of some node
389 boo = self.browse(cr, uid, id, context)
391 raise RuntimeError('media offline')
393 ira = self.pool.get('ir.attachment').browse(cr, uid, file_node.file_id, context=context)
394 if boo.type == 'filestore':
395 if not ira.store_fname:
396 # On a migrated db, some files may have the wrong storage type
397 # try to fix their directory.
398 if mode in ('r','r+'):
400 self._doclog.warning( "ir.attachment #%d does not have a filename, but is at filestore, fix it!" % ira.id)
401 raise IOError(errno.ENOENT, 'No file can be located')
403 store_fname = self.__get_random_fname(boo.path)
404 cr.execute('UPDATE ir_attachment SET store_fname = %s WHERE id = %s',
405 (store_fname, ira.id))
406 fpath = os.path.join(boo.path, store_fname)
408 fpath = os.path.join(boo.path, ira.store_fname)
409 return nodefd_file(file_node, path=fpath, mode=mode)
411 elif boo.type == 'db':
412 # TODO: we need a better api for large files
413 return nodefd_db(file_node, ira_browse=ira, mode=mode)
415 elif boo.type == 'db64':
416 return nodefd_db64(file_node, ira_browse=ira, mode=mode)
418 elif boo.type == 'realstore':
419 if not ira.store_fname:
420 # On a migrated db, some files may have the wrong storage type
421 # try to fix their directory.
423 self._doclog.warning("ir.attachment #%d does not have a filename, trying the name." %ira.id)
425 fpath = os.path.join(boo.path,ira.store_fname or ira.name)
426 if (not os.path.exists(fpath)) and mode in ('r','r+'):
427 raise IOError("File not found: %s" % fpath)
428 return nodefd_file(file_node, path=fpath, mode=mode)
430 elif boo.type == 'virtual':
431 raise ValueError('Virtual storage does not support static files')
434 raise TypeError("No %s storage" % boo.type)
436 def __get_data_3(self, cr, uid, boo, ira, context):
438 raise RuntimeError('media offline')
439 if boo.type == 'filestore':
440 if not ira.store_fname:
441 # On a migrated db, some files may have the wrong storage type
442 # try to fix their directory.
444 self._doclog.warning( "ir.attachment #%d does not have a filename, but is at filestore, fix it!" % ira.id)
446 fpath = os.path.join(boo.path, ira.store_fname)
447 return file(fpath, 'rb').read()
448 elif boo.type == 'db64':
449 # TODO: we need a better api for large files
451 out = base64.decodestring(ira.db_datas)
455 elif boo.type == 'db':
456 # We do an explicit query, to avoid type transformations.
457 cr.execute('SELECT db_datas FROM ir_attachment WHERE id = %s', (ira.id,))
463 elif boo.type == 'realstore':
464 if not ira.store_fname:
465 # On a migrated db, some files may have the wrong storage type
466 # try to fix their directory.
468 self._doclog.warning("ir.attachment #%d does not have a filename, trying the name." %ira.id)
470 fpath = os.path.join(boo.path,ira.store_fname or ira.name)
471 if os.path.exists(fpath):
472 return file(fpath,'rb').read()
473 elif not ira.store_fname:
476 raise IOError("File not found: %s" % fpath)
478 elif boo.type == 'virtual':
479 raise ValueError('Virtual storage does not support static files')
482 raise TypeError("No %s storage" % boo.type)
484 def set_data(self, cr, uid, id, file_node, data, context=None, fil_obj=None):
486 This function MUST be used from an ir.attachment. It wouldn't make sense
487 to store things persistently for other types (dynamic).
491 boo = self.browse(cr, uid, id, context)
495 ira = self.pool.get('ir.attachment').browse(cr, uid, file_node.file_id, context=context)
498 raise RuntimeError('media offline')
499 self._doclog.debug( "Store data for ir.attachment #%d" % ira.id)
502 if boo.type == 'filestore':
505 store_fname = self.__get_random_fname(path)
506 fname = os.path.join(path, store_fname)
507 fp = file(fname, 'wb')
510 self._doclog.debug( "Saved data to %s" % fname)
511 filesize = len(data) # os.stat(fname).st_size
513 # TODO Here, an old file would be left hanging.
516 self._doclog.warning( "Couldn't save data to %s", path, exc_info=True)
517 raise except_orm(_('Error!'), str(e))
518 elif boo.type == 'db':
520 # will that work for huge data?
521 out = psycopg2.Binary(data)
522 cr.execute('UPDATE ir_attachment SET db_datas = %s WHERE id = %s',
523 (out, file_node.file_id))
524 elif boo.type == 'db64':
526 # will that work for huge data?
527 out = base64.encodestring(data)
528 cr.execute('UPDATE ir_attachment SET db_datas = %s WHERE id = %s',
529 (out, file_node.file_id))
530 elif boo.type == 'realstore':
532 file_node.fix_ppath(cr, ira)
533 npath = file_node.full_path() or []
534 # npath may contain empty elements, for root directory etc.
535 for i, n in enumerate(npath):
539 for ch in ('*', '|', "\\", '/', ':', '"', '<', '>', '?', '..'):
541 raise ValueError("Invalid char %s in path %s" %(ch, n))
544 path = os.path.join(*dpath)
545 if not os.path.isdir(path):
547 fname = os.path.join(path, npath[-1])
548 fp = file(fname,'wb')
551 self._doclog.debug("Saved data to %s", fname)
552 filesize = len(data) # os.stat(fname).st_size
553 store_fname = os.path.join(*npath)
554 # TODO Here, an old file would be left hanging.
556 self._doclog.warning("Couldn't save data:", exc_info=True)
557 raise except_orm(_('Error!'), str(e))
559 elif boo.type == 'virtual':
560 raise ValueError('Virtual storage does not support static files')
563 raise TypeError("No %s storage" % boo.type)
565 # 2nd phase: store the metadata
572 mime, icont = cntIndex.doIndex(data, ira.datas_fname,
573 ira.file_type or None, fname)
575 self._doclog.debug('Cannot index file:', exc_info=True)
579 icont_u = ustr(icont)
583 # a hack: /assume/ that the calling write operation will not try
584 # to write the fname and size, and update them in the db concurrently.
585 # We cannot use a write() here, because we are already in one.
586 cr.execute('UPDATE ir_attachment SET store_fname = %s, file_size = %s, index_content = %s, file_type = %s WHERE id = %s',
587 (store_fname, filesize, icont_u, mime, file_node.file_id))
588 file_node.content_length = filesize
589 file_node.content_type = mime
591 except Exception, e :
592 self._doclog.warning("Couldn't save data:", exc_info=True)
593 # should we really rollback once we have written the actual data?
594 # at the db case (only), that rollback would be safe
595 raise except_orm(_('Error at doc write!'), str(e))
597 def prepare_unlink(self, cr, uid, storage_bo, fil_bo):
598 """ Before we unlink a file (fil_boo), prepare the list of real
599 files that have to be removed, too. """
601 if not storage_bo.online:
602 raise RuntimeError('media offline')
604 if storage_bo.type == 'filestore':
605 fname = fil_bo.store_fname
608 path = storage_bo.path
609 return (storage_bo.id, 'file', os.path.join(path, fname))
610 elif storage_bo.type in ('db', 'db64'):
612 elif storage_bo.type == 'realstore':
613 fname = fil_bo.store_fname
616 path = storage_bo.path
617 return ( storage_bo.id, 'file', os.path.join(path, fname))
619 raise TypeError("No %s storage" % storage_bo.type)
621 def do_unlink(self, cr, uid, unres):
622 for id, ktype, fname in unres:
627 self._doclog.warning("Could not remove file %s, please remove manually.", fname, exc_info=True)
629 self._doclog.warning("Unknown unlink key %s" % ktype)
633 def simple_rename(self, cr, uid, file_node, new_name, context=None):
634 """ A preparation for a file rename.
635 It will not affect the database, but merely check and perhaps
636 rename the realstore file.
638 @return the dict of values that can safely be be stored in the db.
640 sbro = self.browse(cr, uid, file_node.storage_id, context=context)
641 assert sbro, "The file #%d didn't provide storage" % file_node.file_id
643 if sbro.type in ('filestore', 'db', 'db64'):
644 # nothing to do for a rename, allow to change the db field
645 return { 'name': new_name, 'datas_fname': new_name }
646 elif sbro.type == 'realstore':
647 fname = fil_bo.store_fname
649 return ValueError("Tried to rename a non-stored file")
650 path = storage_bo.path
651 oldpath = os.path.join(path, fname)
653 for ch in ('*', '|', "\\", '/', ':', '"', '<', '>', '?', '..'):
655 raise ValueError("Invalid char %s in name %s" %(ch, new_name))
657 file_node.fix_ppath(cr, ira)
658 npath = file_node.full_path() or []
660 dpath.extend(npath[:-1])
661 dpath.append(new_name)
662 newpath = os.path.join(*dpath)
663 # print "old, new paths:", oldpath, newpath
664 os.rename(oldpath, newpath)
665 return { 'name': new_name, 'datas_fname': new_name, 'store_fname': new_name }
667 raise TypeError("No %s storage" % boo.type)
669 def simple_move(self, cr, uid, file_node, ndir_bro, context=None):
670 """ A preparation for a file move.
671 It will not affect the database, but merely check and perhaps
672 move the realstore file.
674 @param ndir_bro a browse object of document.directory, where this
676 @return the dict of values that can safely be be stored in the db.
678 sbro = self.browse(cr, uid, file_node.storage_id, context=context)
679 assert sbro, "The file #%d didn't provide storage" % file_node.file_id
685 psto = par.storage_id.id
688 if file_node.storage_id != psto:
689 self._doclog.debug('Cannot move file %r from %r to %r', file_node, file_node.parent, ndir_bro.name)
690 raise NotImplementedError('Cannot move files between storage media')
692 if sbro.type in ('filestore', 'db', 'db64'):
693 # nothing to do for a rename, allow to change the db field
694 return { 'parent_id': ndir_bro.id }
695 elif sbro.type == 'realstore':
696 raise NotImplementedError("Cannot move in realstore, yet") # TODO
697 fname = fil_bo.store_fname
699 return ValueError("Tried to rename a non-stored file")
700 path = storage_bo.path
701 oldpath = os.path.join(path, fname)
703 for ch in ('*', '|', "\\", '/', ':', '"', '<', '>', '?', '..'):
705 raise ValueError("Invalid char %s in name %s" %(ch, new_name))
707 file_node.fix_ppath(cr, ira)
708 npath = file_node.full_path() or []
710 dpath.extend(npath[:-1])
711 dpath.append(new_name)
712 newpath = os.path.join(*dpath)
713 # print "old, new paths:", oldpath, newpath
714 os.rename(oldpath, newpath)
715 return { 'name': new_name, 'datas_fname': new_name, 'store_fname': new_name }
717 raise TypeError("No %s storage" % boo.type)