1 # -*- encoding: utf-8 -*-
2 ##############################################################################
4 # OpenERP, Open Source Management Solution
6 # Copyright (C) P. Christeas, 2009, all rights reserved
8 # This program is free software: you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation, either version 3 of the License, or
11 # (at your option) any later version.
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with this program. If not, see <http://www.gnu.org/licenses/>.
21 ##############################################################################
23 from osv import osv, fields
29 from StringIO import StringIO
32 from tools.misc import ustr
33 from tools.translate import _
35 from osv.orm import except_orm
42 from content_index import cntIndex
44 DMS_ROOT_PATH = tools.config.get('document_path', os.path.join(tools.config.get('root_path'), 'filestore'))
47 """ The algorithm of data storage
49 We have to consider 3 cases of data /retrieval/:
50 Given (context,path) we need to access the file (aka. node).
51 given (directory, context), we need one of its children (for listings, views)
52 given (ir.attachment, context), we needs its data and metadata (node).
54 For data /storage/ we have the cases:
55 Have (ir.attachment, context), we modify the file (save, update, rename etc).
56 Have (directory, context), we create a file.
57 Have (path, context), we create or modify a file.
59 Note that in all above cases, we don't explicitly choose the storage media,
60 but always require a context to be present.
62 Note that a node will not always have a corresponding ir.attachment. Dynamic
63 nodes, for once, won't. Their metadata will be computed by the parent storage
66 The algorithm says that in any of the above cases, our first goal is to locate
67 the node for any combination of search criteria. It would be wise NOT to
68 represent each node in the path (like node[/] + node[/dir1] + node[/dir1/dir2])
69 but directly jump to the end node (like node[/dir1/dir2]) whenever possible.
71 We also contain all the parenting loop code in one function. This is intentional,
72 because one day this will be optimized in the db (Pg 8.4).
79 d = [random.choice(string.ascii_letters) for x in xrange(10) ]
83 INVALID_CHARS = {'*':str(hash('*')), '|':str(hash('|')) , "\\":str(hash("\\")), '/':'__', ':':str(hash(':')), '"':str(hash('"')), '<':str(hash('<')) , '>':str(hash('>')) , '?':str(hash('?'))}
86 def create_directory(path):
87 dir_name = random_name()
88 path = os.path.join(path, dir_name)
92 class nodefd_file(nodes.node_descriptor):
93 """ A descriptor to a real file
95 Inheriting directly from file doesn't work, since file exports
96 some read-only attributes (like 'name') that we don't like.
98 def __init__(self, parent, path, mode):
99 nodes.node_descriptor.__init__(self, parent)
100 self.__file = open(path, mode)
102 for attr in ('closed', 'read', 'write', 'seek', 'tell'):
103 setattr(self,attr, getattr(self.__file, attr))
106 # TODO: locking in init, close()
110 class nodefd_db(StringIO, nodes.node_descriptor):
111 """ A descriptor to db data
113 def __init__(self, parent, ira_browse, mode):
114 nodes.node_descriptor.__init__(self, parent)
115 if mode.endswith('b'):
118 if mode in ('r', 'r+'):
119 cr = ira_browse._cr # reuse the cursor of the browse object, just now
120 cr.execute('SELECT db_datas FROM ir_attachment WHERE id = %s',(ira_browse.id,))
121 data = cr.fetchone()[0]
122 StringIO.__init__(self, data)
123 elif mode in ('w', 'w+'):
124 StringIO.__init__(self, None)
125 # at write, we start at 0 (= overwrite), but have the original
126 # data available, in case of a seek()
128 StringIO.__init__(self, None)
130 logging.getLogger('document.storage').error("Incorrect mode %s specified", mode)
131 raise IOError(errno.EINVAL, "Invalid file mode")
135 # we now open a *separate* cursor, to update the data.
136 # FIXME: this may be improved, for concurrency handling
137 par = self._get_parent()
138 uid = par.context.uid
139 cr = pooler.get_db(par.context.dbname).cursor()
141 if self.mode in ('w', 'w+', 'r+'):
142 data = self.getvalue()
143 out = psycopg2.Binary(data)
144 cr.execute("UPDATE ir_attachment SET db_datas = %s, file_size=%s WHERE id = %s",
145 (out, len(data), par.file_id))
146 elif self.mode == 'a':
147 data = self.getvalue()
148 out = psycopg2.Binary(data)
149 cr.execute("UPDATE ir_attachment " \
150 "SET db_datas = COALESCE(db_datas,'') || %s, " \
151 " file_size = COALESCE(file_size, 0) + %s " \
153 (out, len(data), par.file_id))
156 logging.getLogger('document.storage').exception('Cannot update db file #%d for close:', par.file_id)
162 class nodefd_db64(StringIO, nodes.node_descriptor):
163 """ A descriptor to db data, base64 (the old way)
165 It stores the data in base64 encoding at the db. Not optimal, but
166 the transparent compression of Postgres will save the day.
168 def __init__(self, parent, ira_browse, mode):
169 nodes.node_descriptor.__init__(self, parent)
170 if mode.endswith('b'):
173 if mode in ('r', 'r+'):
174 StringIO.__init__(self, base64.decodestring(ira_browse.db_datas))
175 elif mode in ('w', 'w+'):
176 StringIO.__init__(self, None)
177 # at write, we start at 0 (= overwrite), but have the original
178 # data available, in case of a seek()
180 StringIO.__init__(self, None)
182 logging.getLogger('document.storage').error("Incorrect mode %s specified", mode)
183 raise IOError(errno.EINVAL, "Invalid file mode")
187 # we now open a *separate* cursor, to update the data.
188 # FIXME: this may be improved, for concurrency handling
189 par = self._get_parent()
190 uid = par.context.uid
191 cr = pooler.get_db(par.context.dbname).cursor()
193 if self.mode in ('w', 'w+', 'r+'):
194 out = self.getvalue()
195 cr.execute('UPDATE ir_attachment SET db_datas = %s::bytea, file_size=%s WHERE id = %s',
196 (base64.encodestring(out), len(out), par.file_id))
197 elif self.mode == 'a':
198 out = self.getvalue()
199 # Yes, we're obviously using the wrong representation for storing our
200 # data as base64-in-bytea
201 cr.execute("UPDATE ir_attachment " \
202 "SET db_datas = encode( (COALESCE(decode(encode(db_datas,'escape'),'base64'),'') || decode(%s, 'base64')),'base64')::bytea , " \
203 " file_size = COALESCE(file_size, 0) + %s " \
205 (base64.encodestring(out), len(out), par.file_id))
208 logging.getLogger('document.storage').exception('Cannot update db file #%d for close:', par.file_id)
214 class document_storage(osv.osv):
215 """ The primary object for data storage.
216 Each instance of this object is a storage media, in which our application
217 can store contents. The object here controls the behaviour of the storage
219 The referring document.directory-ies will control the placement of data
222 It is a bad idea to have multiple document.storage objects pointing to
223 the same tree of filesystem storage.
225 _name = 'document.storage'
226 _description = 'Storage Media'
227 _doclog = logging.getLogger('document')
230 'name': fields.char('Name', size=64, required=True, select=1),
231 'write_date': fields.datetime('Date Modified', readonly=True),
232 'write_uid': fields.many2one('res.users', 'Last Modification User', readonly=True),
233 'create_date': fields.datetime('Date Created', readonly=True),
234 'create_uid': fields.many2one('res.users', 'Creator', readonly=True),
235 'user_id': fields.many2one('res.users', 'Owner'),
236 'group_ids': fields.many2many('res.groups', 'document_storage_group_rel', 'item_id', 'group_id', 'Groups'),
237 'dir_ids': fields.one2many('document.directory', 'parent_id', 'Directories'),
238 'type': fields.selection([('db', 'Database'), ('filestore', 'Internal File storage'),
239 ('realstore','External file storage'),], 'Type', required=True),
240 'path': fields.char('Path', size=250, select=1, help="For file storage, the root path of the storage"),
241 'online': fields.boolean('Online', help="If not checked, media is currently offline and its contents not available", required=True),
242 'readonly': fields.boolean('Read Only', help="If set, media is for reading only"),
245 def _get_rootpath(self, cr, uid, context=None):
246 return os.path.join(DMS_ROOT_PATH, cr.dbname)
249 'user_id': lambda self, cr, uid, ctx: uid,
250 'online': lambda *args: True,
251 'readonly': lambda *args: False,
252 # Note: the defaults below should only be used ONCE for the default
253 # storage media. All other times, we should create different paths at least.
254 'type': lambda *args: 'filestore',
255 'path': _get_rootpath,
258 # SQL note: a path = NULL doesn't have to be unique.
259 ('path_uniq', 'UNIQUE(type,path)', "The storage path must be unique!")
262 def get_data(self, cr, uid, id, file_node, context=None, fil_obj=None):
263 """ retrieve the contents of some file_node having storage_id = id
264 optionally, fil_obj could point to the browse object of the file
269 boo = self.browse(cr, uid, id, context)
273 ira = self.pool.get('ir.attachment').browse(cr, uid, file_node.file_id, context=context)
274 return self.__get_data_3(cr, uid, boo, ira, context)
276 def get_file(self, cr, uid, id, file_node, mode, context=None):
277 """ Return a file-like object for the contents of some node
281 boo = self.browse(cr, uid, id, context)
283 raise RuntimeError('media offline')
285 ira = self.pool.get('ir.attachment').browse(cr, uid, file_node.file_id, context=context)
286 if boo.type == 'filestore':
287 if not ira.store_fname:
288 # On a migrated db, some files may have the wrong storage type
289 # try to fix their directory.
291 self._doclog.warning( "ir.attachment #%d does not have a filename, but is at filestore, fix it!" % ira.id)
292 raise IOError(errno.ENOENT, 'No file can be located')
293 fpath = os.path.join(boo.path, ira.store_fname)
294 return nodefd_file(file_node, path=fpath, mode=mode)
296 elif boo.type == 'db':
297 # TODO: we need a better api for large files
298 return nodefd_db(file_node, ira_browse=ira, mode=mode)
300 elif boo.type == 'db64':
301 return nodefd_db64(file_node, ira_browse=ira, mode=mode)
303 elif boo.type == 'realstore':
304 if not ira.store_fname:
305 # On a migrated db, some files may have the wrong storage type
306 # try to fix their directory.
308 self._doclog.warning("ir.attachment #%d does not have a filename, trying the name." %ira.id)
310 fpath = os.path.join(boo.path,ira.store_fname or ira.name)
311 if not os.path.exists(fpath):
312 raise IOError("File not found: %s" % fpath)
313 return nodefd_file(file_node, path=fpath, mode=mode)
315 elif boo.type == 'virtual':
316 raise ValueError('Virtual storage does not support static files')
319 raise TypeError("No %s storage" % boo.type)
321 def __get_data_3(self, cr, uid, boo, ira, context):
323 raise RuntimeError('media offline')
324 if boo.type == 'filestore':
325 if not ira.store_fname:
326 # On a migrated db, some files may have the wrong storage type
327 # try to fix their directory.
329 self._doclog.warning( "ir.attachment #%d does not have a filename, but is at filestore, fix it!" % ira.id)
331 fpath = os.path.join(boo.path, ira.store_fname)
332 return file(fpath, 'rb').read()
333 elif boo.type == 'db64':
334 # TODO: we need a better api for large files
336 out = base64.decodestring(ira.db_datas)
340 elif boo.type == 'db':
341 # We do an explicit query, to avoid type transformations.
342 cr.execute('SELECT db_datas FROM ir_attachment WHERE id = %s', (ira.id,))
348 elif boo.type == 'realstore':
349 if not ira.store_fname:
350 # On a migrated db, some files may have the wrong storage type
351 # try to fix their directory.
353 self._doclog.warning("ir.attachment #%d does not have a filename, trying the name." %ira.id)
355 fpath = os.path.join(boo.path,ira.store_fname or ira.name)
356 if os.path.exists(fpath):
357 return file(fpath,'rb').read()
358 elif not ira.store_fname:
361 raise IOError("File not found: %s" % fpath)
363 elif boo.type == 'virtual':
364 raise ValueError('Virtual storage does not support static files')
367 raise TypeError("No %s storage" % boo.type)
369 def set_data(self, cr, uid, id, file_node, data, context=None, fil_obj=None):
371 This function MUST be used from an ir.attachment. It wouldn't make sense
372 to store things persistently for other types (dynamic).
376 boo = self.browse(cr, uid, id, context)
380 ira = self.pool.get('ir.attachment').browse(cr, uid, file_node.file_id, context=context)
383 raise RuntimeError('media offline')
384 self._doclog.debug( "Store data for ir.attachment #%d" % ira.id)
387 if boo.type == 'filestore':
391 # This can be improved
392 if os.path.isdir(path):
393 for dirs in os.listdir(path):
394 if os.path.isdir(os.path.join(path, dirs)) and len(os.listdir(os.path.join(path, dirs))) < 4000:
397 flag = flag or create_directory(path)
398 filename = random_name()
399 fname = os.path.join(path, flag, filename)
400 fp = file(fname, 'wb')
403 self._doclog.debug( "Saved data to %s" % fname)
404 filesize = len(data) # os.stat(fname).st_size
405 store_fname = os.path.join(flag, filename)
407 # TODO Here, an old file would be left hanging.
410 self._doclog.warning( "Couldn't save data to %s", path, exc_info=True)
411 raise except_orm(_('Error!'), str(e))
412 elif boo.type == 'db':
414 # will that work for huge data?
415 out = psycopg2.Binary(data)
416 cr.execute('UPDATE ir_attachment SET db_datas = %s WHERE id = %s',
417 (out, file_node.file_id))
418 elif boo.type == 'db64':
420 # will that work for huge data?
421 out = base64.encodestring(data)
422 cr.execute('UPDATE ir_attachment SET db_datas = %s WHERE id = %s',
423 (out, file_node.file_id))
424 elif boo.type == 'realstore':
426 file_node.fix_ppath(cr, ira)
427 npath = file_node.full_path() or []
428 # npath may contain empty elements, for root directory etc.
429 for i, n in enumerate(npath):
433 for ch in ('*', '|', "\\", '/', ':', '"', '<', '>', '?', '..'):
435 raise ValueError("Invalid char %s in path %s" %(ch, n))
438 path = os.path.join(*dpath)
439 if not os.path.isdir(path):
441 fname = os.path.join(path, npath[-1])
442 fp = file(fname,'wb')
445 self._doclog.debug("Saved data to %s", fname)
446 filesize = len(data) # os.stat(fname).st_size
447 store_fname = os.path.join(*npath)
448 # TODO Here, an old file would be left hanging.
450 self._doclog.warning("Couldn't save data:", exc_info=True)
451 raise except_orm(_('Error!'), str(e))
453 elif boo.type == 'virtual':
454 raise ValueError('Virtual storage does not support static files')
457 raise TypeError("No %s storage" % boo.type)
459 # 2nd phase: store the metadata
466 mime, icont = cntIndex.doIndex(data, ira.datas_fname,
467 ira.file_type or None, fname)
469 self._doclog.debug('Cannot index file:', exc_info=True)
473 icont_u = ustr(icont)
477 # a hack: /assume/ that the calling write operation will not try
478 # to write the fname and size, and update them in the db concurrently.
479 # We cannot use a write() here, because we are already in one.
480 cr.execute('UPDATE ir_attachment SET store_fname = %s, file_size = %s, index_content = %s, file_type = %s WHERE id = %s',
481 (store_fname, filesize, icont_u, mime, file_node.file_id))
482 file_node.content_length = filesize
483 file_node.content_type = mime
485 except Exception, e :
486 self._doclog.warning("Couldn't save data:", exc_info=True)
487 # should we really rollback once we have written the actual data?
488 # at the db case (only), that rollback would be safe
489 raise except_orm(_('Error at doc write!'), str(e))
491 def prepare_unlink(self, cr, uid, storage_bo, fil_bo):
492 """ Before we unlink a file (fil_boo), prepare the list of real
493 files that have to be removed, too. """
495 if not storage_bo.online:
496 raise RuntimeError('media offline')
498 if storage_bo.type == 'filestore':
499 fname = fil_bo.store_fname
502 path = storage_bo.path
503 return (storage_bo.id, 'file', os.path.join(path, fname))
504 elif storage_bo.type in ('db', 'db64'):
506 elif storage_bo.type == 'realstore':
507 fname = fil_bo.store_fname
510 path = storage_bo.path
511 return ( storage_bo.id, 'file', os.path.join(path, fname))
513 raise TypeError("No %s storage" % storage_bo.type)
515 def do_unlink(self, cr, uid, unres):
516 for id, ktype, fname in unres:
521 self._doclog.warning("Could not remove file %s, please remove manually.", fname, exc_info=True)
523 self._doclog.warning("Unknown unlink key %s" % ktype)
527 def simple_rename(self, cr, uid, file_node, new_name, context=None):
528 """ A preparation for a file rename.
529 It will not affect the database, but merely check and perhaps
530 rename the realstore file.
532 @return the dict of values that can safely be be stored in the db.
534 sbro = self.browse(cr, uid, file_node.storage_id, context=context)
535 assert sbro, "The file #%d didn't provide storage" % file_node.file_id
537 if sbro.type in ('filestore', 'db', 'db64'):
538 # nothing to do for a rename, allow to change the db field
539 return { 'name': new_name, 'datas_fname': new_name }
540 elif sbro.type == 'realstore':
541 fname = fil_bo.store_fname
543 return ValueError("Tried to rename a non-stored file")
544 path = storage_bo.path
545 oldpath = os.path.join(path, fname)
547 for ch in ('*', '|', "\\", '/', ':', '"', '<', '>', '?', '..'):
549 raise ValueError("Invalid char %s in name %s" %(ch, new_name))
551 file_node.fix_ppath(cr, ira)
552 npath = file_node.full_path() or []
554 dpath.extend(npath[:-1])
555 dpath.append(new_name)
556 newpath = os.path.join(*dpath)
557 # print "old, new paths:", oldpath, newpath
558 os.rename(oldpath, newpath)
559 return { 'name': new_name, 'datas_fname': new_name, 'store_fname': new_name }
561 raise TypeError("No %s storage" % boo.type)
563 def simple_move(self, cr, uid, file_node, ndir_bro, context=None):
564 """ A preparation for a file move.
565 It will not affect the database, but merely check and perhaps
566 move the realstore file.
568 @param ndir_bro a browse object of document.directory, where this
570 @return the dict of values that can safely be be stored in the db.
572 sbro = self.browse(cr, uid, file_node.storage_id, context=context)
573 assert sbro, "The file #%d didn't provide storage" % file_node.file_id
579 psto = par.storage_id.id
582 if file_node.storage_id != psto:
583 self._doclog.debug('Cannot move file %r from %r to %r', file_node, file_node.parent, ndir_bro.name)
584 raise NotImplementedError('Cannot move files between storage media')
586 if sbro.type in ('filestore', 'db', 'db64'):
587 # nothing to do for a rename, allow to change the db field
588 return { 'parent_id': ndir_bro.id }
589 elif sbro.type == 'realstore':
590 raise NotImplementedError("Cannot move in realstore, yet") # TODO
591 fname = fil_bo.store_fname
593 return ValueError("Tried to rename a non-stored file")
594 path = storage_bo.path
595 oldpath = os.path.join(path, fname)
597 for ch in ('*', '|', "\\", '/', ':', '"', '<', '>', '?', '..'):
599 raise ValueError("Invalid char %s in name %s" %(ch, new_name))
601 file_node.fix_ppath(cr, ira)
602 npath = file_node.full_path() or []
604 dpath.extend(npath[:-1])
605 dpath.append(new_name)
606 newpath = os.path.join(*dpath)
607 # print "old, new paths:", oldpath, newpath
608 os.rename(oldpath, newpath)
609 return { 'name': new_name, 'datas_fname': new_name, 'store_fname': new_name }
611 raise TypeError("No %s storage" % boo.type)