1 # -*- encoding: utf-8 -*-
2 ##############################################################################
4 # OpenERP, Open Source Management Solution
6 # Copyright (C) P. Christeas, 2009, all rights reserved
8 # This program is free software: you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation, either version 3 of the License, or
11 # (at your option) any later version.
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with this program. If not, see <http://www.gnu.org/licenses/>.
21 ##############################################################################
23 from osv import osv, fields
29 from StringIO import StringIO
32 from tools.misc import ustr
33 from tools.translate import _
35 from osv.orm import except_orm
42 from content_index import cntIndex
44 DMS_ROOT_PATH = tools.config.get('document_path', os.path.join(tools.config.get('root_path'), 'filestore'))
47 """ The algorithm of data storage
49 We have to consider 3 cases of data /retrieval/:
50 Given (context,path) we need to access the file (aka. node).
51 given (directory, context), we need one of its children (for listings, views)
52 given (ir.attachment, context), we needs its data and metadata (node).
54 For data /storage/ we have the cases:
55 Have (ir.attachment, context), we modify the file (save, update, rename etc).
56 Have (directory, context), we create a file.
57 Have (path, context), we create or modify a file.
59 Note that in all above cases, we don't explicitly choose the storage media,
60 but always require a context to be present.
62 Note that a node will not always have a corresponding ir.attachment. Dynamic
63 nodes, for once, won't. Their metadata will be computed by the parent storage
66 The algorithm says that in any of the above cases, our first goal is to locate
67 the node for any combination of search criteria. It would be wise NOT to
68 represent each node in the path (like node[/] + node[/dir1] + node[/dir1/dir2])
69 but directly jump to the end node (like node[/dir1/dir2]) whenever possible.
71 We also contain all the parenting loop code in one function. This is intentional,
72 because one day this will be optimized in the db (Pg 8.4).
79 d = [random.choice(string.ascii_letters) for x in xrange(10) ]
83 INVALID_CHARS = {'*':str(hash('*')), '|':str(hash('|')) , "\\":str(hash("\\")), '/':'__', ':':str(hash(':')), '"':str(hash('"')), '<':str(hash('<')) , '>':str(hash('>')) , '?':str(hash('?'))}
86 def create_directory(path):
87 dir_name = random_name()
88 path = os.path.join(path, dir_name)
92 class nodefd_file(nodes.node_descriptor):
93 """ A descriptor to a real file
95 Inheriting directly from file doesn't work, since file exports
96 some read-only attributes (like 'name') that we don't like.
98 def __init__(self, parent, path, mode):
99 nodes.node_descriptor.__init__(self, parent)
100 self.__file = open(path, mode)
101 if mode in ('w', 'w+', 'r+'):
102 self._need_index = True
104 self._need_index = False
106 for attr in ('closed', 'read', 'write', 'seek', 'tell'):
107 setattr(self,attr, getattr(self.__file, attr))
110 # TODO: locking in init, close()
111 fname = self.__file.name
115 par = self._get_parent()
116 cr = pooler.get_db(par.context.dbname).cursor()
120 if isinstance(filename, (tuple, list)):
121 filename = '/'.join(filename)
124 mime, icont = cntIndex.doIndex(None, filename=filename,
125 content_type=None, realfname=fname)
127 logging.getLogger('document.storage').debug('Cannot index file:', exc_info=True)
131 icont_u = ustr(icont)
136 cr.execute('UPDATE ir_attachment SET index_content = %s, file_type = %s WHERE id = %s',
137 (icont_u, mime, par.file_id))
138 par.content_length = filesize
139 par.content_type = mime
143 logging.getLogger('document.storage').debug('Cannot save file indexed content:', exc_info=True)
146 class nodefd_db(StringIO, nodes.node_descriptor):
147 """ A descriptor to db data
149 def __init__(self, parent, ira_browse, mode):
150 nodes.node_descriptor.__init__(self, parent)
151 if mode.endswith('b'):
154 if mode in ('r', 'r+'):
155 cr = ira_browse._cr # reuse the cursor of the browse object, just now
156 cr.execute('SELECT db_datas FROM ir_attachment WHERE id = %s',(ira_browse.id,))
157 data = cr.fetchone()[0]
158 StringIO.__init__(self, data)
159 elif mode in ('w', 'w+'):
160 StringIO.__init__(self, None)
161 # at write, we start at 0 (= overwrite), but have the original
162 # data available, in case of a seek()
164 StringIO.__init__(self, None)
166 logging.getLogger('document.storage').error("Incorrect mode %s specified", mode)
167 raise IOError(errno.EINVAL, "Invalid file mode")
171 # we now open a *separate* cursor, to update the data.
172 # FIXME: this may be improved, for concurrency handling
173 par = self._get_parent()
174 uid = par.context.uid
175 cr = pooler.get_db(par.context.dbname).cursor()
177 if self.mode in ('w', 'w+', 'r+'):
178 data = self.getvalue()
182 if isinstance(filename, (tuple, list)):
183 filename = '/'.join(filename)
186 mime, icont = cntIndex.doIndex(data, filename=filename,
187 content_type=None, realfname=None)
189 logging.getLogger('document.storage').debug('Cannot index file:', exc_info=True)
193 icont_u = ustr(icont)
197 out = psycopg2.Binary(data)
198 cr.execute("UPDATE ir_attachment " \
199 "SET db_datas = %s, file_size=%s, " \
200 " index_content= %s, file_type=%s " \
202 (out, len(data), icont_u, mime, par.file_id))
203 elif self.mode == 'a':
204 data = self.getvalue()
205 out = psycopg2.Binary(data)
206 cr.execute("UPDATE ir_attachment " \
207 "SET db_datas = COALESCE(db_datas,'') || %s, " \
208 " file_size = COALESCE(file_size, 0) + %s " \
210 (out, len(data), par.file_id))
213 logging.getLogger('document.storage').exception('Cannot update db file #%d for close:', par.file_id)
219 class nodefd_db64(StringIO, nodes.node_descriptor):
220 """ A descriptor to db data, base64 (the old way)
222 It stores the data in base64 encoding at the db. Not optimal, but
223 the transparent compression of Postgres will save the day.
225 def __init__(self, parent, ira_browse, mode):
226 nodes.node_descriptor.__init__(self, parent)
227 if mode.endswith('b'):
230 if mode in ('r', 'r+'):
231 StringIO.__init__(self, base64.decodestring(ira_browse.db_datas))
232 elif mode in ('w', 'w+'):
233 StringIO.__init__(self, None)
234 # at write, we start at 0 (= overwrite), but have the original
235 # data available, in case of a seek()
237 StringIO.__init__(self, None)
239 logging.getLogger('document.storage').error("Incorrect mode %s specified", mode)
240 raise IOError(errno.EINVAL, "Invalid file mode")
244 # we now open a *separate* cursor, to update the data.
245 # FIXME: this may be improved, for concurrency handling
246 par = self._get_parent()
247 uid = par.context.uid
248 cr = pooler.get_db(par.context.dbname).cursor()
250 if self.mode in ('w', 'w+', 'r+'):
251 data = self.getvalue()
255 if isinstance(filename, (tuple, list)):
256 filename = '/'.join(filename)
259 mime, icont = cntIndex.doIndex(data, filename=filename,
260 content_type=None, realfname=None)
262 logging.getLogger('document.storage').debug('Cannot index file:', exc_info=True)
266 icont_u = ustr(icont)
270 cr.execute('UPDATE ir_attachment SET db_datas = %s::bytea, file_size=%s, ' \
271 'index_content = %s, file_type = %s ' \
273 (base64.encodestring(out), len(out), icont_u, mime, par.file_id))
274 elif self.mode == 'a':
275 out = self.getvalue()
276 # Yes, we're obviously using the wrong representation for storing our
277 # data as base64-in-bytea
278 cr.execute("UPDATE ir_attachment " \
279 "SET db_datas = encode( (COALESCE(decode(encode(db_datas,'escape'),'base64'),'') || decode(%s, 'base64')),'base64')::bytea , " \
280 " file_size = COALESCE(file_size, 0) + %s " \
282 (base64.encodestring(out), len(out), par.file_id))
285 logging.getLogger('document.storage').exception('Cannot update db file #%d for close:', par.file_id)
291 class document_storage(osv.osv):
292 """ The primary object for data storage.
293 Each instance of this object is a storage media, in which our application
294 can store contents. The object here controls the behaviour of the storage
296 The referring document.directory-ies will control the placement of data
299 It is a bad idea to have multiple document.storage objects pointing to
300 the same tree of filesystem storage.
302 _name = 'document.storage'
303 _description = 'Storage Media'
304 _doclog = logging.getLogger('document')
307 'name': fields.char('Name', size=64, required=True, select=1),
308 'write_date': fields.datetime('Date Modified', readonly=True),
309 'write_uid': fields.many2one('res.users', 'Last Modification User', readonly=True),
310 'create_date': fields.datetime('Date Created', readonly=True),
311 'create_uid': fields.many2one('res.users', 'Creator', readonly=True),
312 'user_id': fields.many2one('res.users', 'Owner'),
313 'group_ids': fields.many2many('res.groups', 'document_storage_group_rel', 'item_id', 'group_id', 'Groups'),
314 'dir_ids': fields.one2many('document.directory', 'parent_id', 'Directories'),
315 'type': fields.selection([('db', 'Database'), ('filestore', 'Internal File storage'),
316 ('realstore','External file storage'),], 'Type', required=True),
317 'path': fields.char('Path', size=250, select=1, help="For file storage, the root path of the storage"),
318 'online': fields.boolean('Online', help="If not checked, media is currently offline and its contents not available", required=True),
319 'readonly': fields.boolean('Read Only', help="If set, media is for reading only"),
322 def _get_rootpath(self, cr, uid, context=None):
323 return os.path.join(DMS_ROOT_PATH, cr.dbname)
326 'user_id': lambda self, cr, uid, ctx: uid,
327 'online': lambda *args: True,
328 'readonly': lambda *args: False,
329 # Note: the defaults below should only be used ONCE for the default
330 # storage media. All other times, we should create different paths at least.
331 'type': lambda *args: 'filestore',
332 'path': _get_rootpath,
335 # SQL note: a path = NULL doesn't have to be unique.
336 ('path_uniq', 'UNIQUE(type,path)', "The storage path must be unique!")
339 def get_data(self, cr, uid, id, file_node, context=None, fil_obj=None):
340 """ retrieve the contents of some file_node having storage_id = id
341 optionally, fil_obj could point to the browse object of the file
346 boo = self.browse(cr, uid, id, context)
350 ira = self.pool.get('ir.attachment').browse(cr, uid, file_node.file_id, context=context)
351 return self.__get_data_3(cr, uid, boo, ira, context)
353 def get_file(self, cr, uid, id, file_node, mode, context=None):
354 """ Return a file-like object for the contents of some node
358 boo = self.browse(cr, uid, id, context)
360 raise RuntimeError('media offline')
362 ira = self.pool.get('ir.attachment').browse(cr, uid, file_node.file_id, context=context)
363 if boo.type == 'filestore':
364 if not ira.store_fname:
365 # On a migrated db, some files may have the wrong storage type
366 # try to fix their directory.
368 self._doclog.warning( "ir.attachment #%d does not have a filename, but is at filestore, fix it!" % ira.id)
369 raise IOError(errno.ENOENT, 'No file can be located')
370 fpath = os.path.join(boo.path, ira.store_fname)
371 return nodefd_file(file_node, path=fpath, mode=mode)
373 elif boo.type == 'db':
374 # TODO: we need a better api for large files
375 return nodefd_db(file_node, ira_browse=ira, mode=mode)
377 elif boo.type == 'db64':
378 return nodefd_db64(file_node, ira_browse=ira, mode=mode)
380 elif boo.type == 'realstore':
381 if not ira.store_fname:
382 # On a migrated db, some files may have the wrong storage type
383 # try to fix their directory.
385 self._doclog.warning("ir.attachment #%d does not have a filename, trying the name." %ira.id)
387 fpath = os.path.join(boo.path,ira.store_fname or ira.name)
388 if not os.path.exists(fpath):
389 raise IOError("File not found: %s" % fpath)
390 return nodefd_file(file_node, path=fpath, mode=mode)
392 elif boo.type == 'virtual':
393 raise ValueError('Virtual storage does not support static files')
396 raise TypeError("No %s storage" % boo.type)
398 def __get_data_3(self, cr, uid, boo, ira, context):
400 raise RuntimeError('media offline')
401 if boo.type == 'filestore':
402 if not ira.store_fname:
403 # On a migrated db, some files may have the wrong storage type
404 # try to fix their directory.
406 self._doclog.warning( "ir.attachment #%d does not have a filename, but is at filestore, fix it!" % ira.id)
408 fpath = os.path.join(boo.path, ira.store_fname)
409 return file(fpath, 'rb').read()
410 elif boo.type == 'db64':
411 # TODO: we need a better api for large files
413 out = base64.decodestring(ira.db_datas)
417 elif boo.type == 'db':
418 # We do an explicit query, to avoid type transformations.
419 cr.execute('SELECT db_datas FROM ir_attachment WHERE id = %s', (ira.id,))
425 elif boo.type == 'realstore':
426 if not ira.store_fname:
427 # On a migrated db, some files may have the wrong storage type
428 # try to fix their directory.
430 self._doclog.warning("ir.attachment #%d does not have a filename, trying the name." %ira.id)
432 fpath = os.path.join(boo.path,ira.store_fname or ira.name)
433 if os.path.exists(fpath):
434 return file(fpath,'rb').read()
435 elif not ira.store_fname:
438 raise IOError("File not found: %s" % fpath)
440 elif boo.type == 'virtual':
441 raise ValueError('Virtual storage does not support static files')
444 raise TypeError("No %s storage" % boo.type)
446 def set_data(self, cr, uid, id, file_node, data, context=None, fil_obj=None):
448 This function MUST be used from an ir.attachment. It wouldn't make sense
449 to store things persistently for other types (dynamic).
453 boo = self.browse(cr, uid, id, context)
457 ira = self.pool.get('ir.attachment').browse(cr, uid, file_node.file_id, context=context)
460 raise RuntimeError('media offline')
461 self._doclog.debug( "Store data for ir.attachment #%d" % ira.id)
464 if boo.type == 'filestore':
468 # This can be improved
469 if os.path.isdir(path):
470 for dirs in os.listdir(path):
471 if os.path.isdir(os.path.join(path, dirs)) and len(os.listdir(os.path.join(path, dirs))) < 4000:
474 flag = flag or create_directory(path)
475 filename = random_name()
476 fname = os.path.join(path, flag, filename)
477 fp = file(fname, 'wb')
480 self._doclog.debug( "Saved data to %s" % fname)
481 filesize = len(data) # os.stat(fname).st_size
482 store_fname = os.path.join(flag, filename)
484 # TODO Here, an old file would be left hanging.
487 self._doclog.warning( "Couldn't save data to %s", path, exc_info=True)
488 raise except_orm(_('Error!'), str(e))
489 elif boo.type == 'db':
491 # will that work for huge data?
492 out = psycopg2.Binary(data)
493 cr.execute('UPDATE ir_attachment SET db_datas = %s WHERE id = %s',
494 (out, file_node.file_id))
495 elif boo.type == 'db64':
497 # will that work for huge data?
498 out = base64.encodestring(data)
499 cr.execute('UPDATE ir_attachment SET db_datas = %s WHERE id = %s',
500 (out, file_node.file_id))
501 elif boo.type == 'realstore':
503 file_node.fix_ppath(cr, ira)
504 npath = file_node.full_path() or []
505 # npath may contain empty elements, for root directory etc.
506 for i, n in enumerate(npath):
510 for ch in ('*', '|', "\\", '/', ':', '"', '<', '>', '?', '..'):
512 raise ValueError("Invalid char %s in path %s" %(ch, n))
515 path = os.path.join(*dpath)
516 if not os.path.isdir(path):
518 fname = os.path.join(path, npath[-1])
519 fp = file(fname,'wb')
522 self._doclog.debug("Saved data to %s", fname)
523 filesize = len(data) # os.stat(fname).st_size
524 store_fname = os.path.join(*npath)
525 # TODO Here, an old file would be left hanging.
527 self._doclog.warning("Couldn't save data:", exc_info=True)
528 raise except_orm(_('Error!'), str(e))
530 elif boo.type == 'virtual':
531 raise ValueError('Virtual storage does not support static files')
534 raise TypeError("No %s storage" % boo.type)
536 # 2nd phase: store the metadata
543 mime, icont = cntIndex.doIndex(data, ira.datas_fname,
544 ira.file_type or None, fname)
546 self._doclog.debug('Cannot index file:', exc_info=True)
550 icont_u = ustr(icont)
554 # a hack: /assume/ that the calling write operation will not try
555 # to write the fname and size, and update them in the db concurrently.
556 # We cannot use a write() here, because we are already in one.
557 cr.execute('UPDATE ir_attachment SET store_fname = %s, file_size = %s, index_content = %s, file_type = %s WHERE id = %s',
558 (store_fname, filesize, icont_u, mime, file_node.file_id))
559 file_node.content_length = filesize
560 file_node.content_type = mime
562 except Exception, e :
563 self._doclog.warning("Couldn't save data:", exc_info=True)
564 # should we really rollback once we have written the actual data?
565 # at the db case (only), that rollback would be safe
566 raise except_orm(_('Error at doc write!'), str(e))
568 def prepare_unlink(self, cr, uid, storage_bo, fil_bo):
569 """ Before we unlink a file (fil_boo), prepare the list of real
570 files that have to be removed, too. """
572 if not storage_bo.online:
573 raise RuntimeError('media offline')
575 if storage_bo.type == 'filestore':
576 fname = fil_bo.store_fname
579 path = storage_bo.path
580 return (storage_bo.id, 'file', os.path.join(path, fname))
581 elif storage_bo.type in ('db', 'db64'):
583 elif storage_bo.type == 'realstore':
584 fname = fil_bo.store_fname
587 path = storage_bo.path
588 return ( storage_bo.id, 'file', os.path.join(path, fname))
590 raise TypeError("No %s storage" % storage_bo.type)
592 def do_unlink(self, cr, uid, unres):
593 for id, ktype, fname in unres:
598 self._doclog.warning("Could not remove file %s, please remove manually.", fname, exc_info=True)
600 self._doclog.warning("Unknown unlink key %s" % ktype)
604 def simple_rename(self, cr, uid, file_node, new_name, context=None):
605 """ A preparation for a file rename.
606 It will not affect the database, but merely check and perhaps
607 rename the realstore file.
609 @return the dict of values that can safely be be stored in the db.
611 sbro = self.browse(cr, uid, file_node.storage_id, context=context)
612 assert sbro, "The file #%d didn't provide storage" % file_node.file_id
614 if sbro.type in ('filestore', 'db', 'db64'):
615 # nothing to do for a rename, allow to change the db field
616 return { 'name': new_name, 'datas_fname': new_name }
617 elif sbro.type == 'realstore':
618 fname = fil_bo.store_fname
620 return ValueError("Tried to rename a non-stored file")
621 path = storage_bo.path
622 oldpath = os.path.join(path, fname)
624 for ch in ('*', '|', "\\", '/', ':', '"', '<', '>', '?', '..'):
626 raise ValueError("Invalid char %s in name %s" %(ch, new_name))
628 file_node.fix_ppath(cr, ira)
629 npath = file_node.full_path() or []
631 dpath.extend(npath[:-1])
632 dpath.append(new_name)
633 newpath = os.path.join(*dpath)
634 # print "old, new paths:", oldpath, newpath
635 os.rename(oldpath, newpath)
636 return { 'name': new_name, 'datas_fname': new_name, 'store_fname': new_name }
638 raise TypeError("No %s storage" % boo.type)
640 def simple_move(self, cr, uid, file_node, ndir_bro, context=None):
641 """ A preparation for a file move.
642 It will not affect the database, but merely check and perhaps
643 move the realstore file.
645 @param ndir_bro a browse object of document.directory, where this
647 @return the dict of values that can safely be be stored in the db.
649 sbro = self.browse(cr, uid, file_node.storage_id, context=context)
650 assert sbro, "The file #%d didn't provide storage" % file_node.file_id
656 psto = par.storage_id.id
659 if file_node.storage_id != psto:
660 self._doclog.debug('Cannot move file %r from %r to %r', file_node, file_node.parent, ndir_bro.name)
661 raise NotImplementedError('Cannot move files between storage media')
663 if sbro.type in ('filestore', 'db', 'db64'):
664 # nothing to do for a rename, allow to change the db field
665 return { 'parent_id': ndir_bro.id }
666 elif sbro.type == 'realstore':
667 raise NotImplementedError("Cannot move in realstore, yet") # TODO
668 fname = fil_bo.store_fname
670 return ValueError("Tried to rename a non-stored file")
671 path = storage_bo.path
672 oldpath = os.path.join(path, fname)
674 for ch in ('*', '|', "\\", '/', ':', '"', '<', '>', '?', '..'):
676 raise ValueError("Invalid char %s in name %s" %(ch, new_name))
678 file_node.fix_ppath(cr, ira)
679 npath = file_node.full_path() or []
681 dpath.extend(npath[:-1])
682 dpath.append(new_name)
683 newpath = os.path.join(*dpath)
684 # print "old, new paths:", oldpath, newpath
685 os.rename(oldpath, newpath)
686 return { 'name': new_name, 'datas_fname': new_name, 'store_fname': new_name }
688 raise TypeError("No %s storage" % boo.type)