1 # -*- encoding: utf-8 -*-
2 ##############################################################################
4 # OpenERP, Open Source Management Solution
6 # Copyright (C) P. Christeas, 2009, all rights reserved
8 # This program is free software: you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation, either version 3 of the License, or
11 # (at your option) any later version.
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with this program. If not, see <http://www.gnu.org/licenses/>.
21 ##############################################################################
23 from osv import osv, fields
29 from StringIO import StringIO
31 from tools.misc import ustr
32 from tools.translate import _
34 from osv.orm import except_orm
41 from content_index import cntIndex
43 DMS_ROOT_PATH = tools.config.get('document_path', os.path.join(tools.config.get('root_path'), 'filestore'))
46 """ The algorithm of data storage
48 We have to consider 3 cases of data /retrieval/:
49 Given (context,path) we need to access the file (aka. node).
50 given (directory, context), we need one of its children (for listings, views)
51 given (ir.attachment, context), we needs its data and metadata (node).
53 For data /storage/ we have the cases:
54 Have (ir.attachment, context), we modify the file (save, update, rename etc).
55 Have (directory, context), we create a file.
56 Have (path, context), we create or modify a file.
58 Note that in all above cases, we don't explicitly choose the storage media,
59 but always require a context to be present.
61 Note that a node will not always have a corresponding ir.attachment. Dynamic
62 nodes, for once, won't. Their metadata will be computed by the parent storage
65 The algorithm says that in any of the above cases, our first goal is to locate
66 the node for any combination of search criteria. It would be wise NOT to
67 represent each node in the path (like node[/] + node[/dir1] + node[/dir1/dir2])
68 but directly jump to the end node (like node[/dir1/dir2]) whenever possible.
70 We also contain all the parenting loop code in one function. This is intentional,
71 because one day this will be optimized in the db (Pg 8.4).
78 d = [random.choice(string.ascii_letters) for x in xrange(10) ]
82 INVALID_CHARS = {'*':str(hash('*')), '|':str(hash('|')) , "\\":str(hash("\\")), '/':'__', ':':str(hash(':')), '"':str(hash('"')), '<':str(hash('<')) , '>':str(hash('>')) , '?':str(hash('?'))}
85 def create_directory(path):
86 dir_name = random_name()
87 path = os.path.join(path, dir_name)
91 class nodefd_file(nodes.node_descriptor):
92 """ A descriptor to a real file
94 Inheriting directly from file doesn't work, since file exports
95 some read-only attributes (like 'name') that we don't like.
97 def __init__(self, parent, path, mode):
98 nodes.node_descriptor.__init__(self, parent)
99 self.__file = open(path, mode)
101 for attr in ('closed', 'read', 'write', 'seek', 'tell'):
102 setattr(self,attr, getattr(self.__file, attr))
105 # TODO: locking in init, close()
109 class nodefd_db(StringIO, nodes.node_descriptor):
110 """ A descriptor to db data
112 def __init__(self, parent, ira_browse, mode):
113 nodes.node_descriptor.__init__(self, parent)
114 if mode.endswith('b'):
117 if mode in ('r', 'r+'):
118 cr.execute('SELECT db_datas FROM ir_attachment WHERE id = %s', ira_browse.id)
119 data = cr.fetchone()[0]
120 StringIO.__init__(self, data)
121 elif mode in ('w', 'w+'):
122 StringIO.__init__(self, None)
123 # at write, we start at 0 (= overwrite), but have the original
124 # data available, in case of a seek()
126 StringIO.__init__(self, None)
128 logging.getLogger('document.storage').error("Incorrect mode %s specified", mode)
129 raise IOError(errno.EINVAL, "Invalid file mode")
133 # we now open a *separate* cursor, to update the data.
134 # FIXME: this may be improved, for concurrency handling
135 par = self._get_parent()
136 uid = par.context.uid
137 cr = pooler.get_db(par.context.dbname).cursor()
139 if self.mode in ('w', 'w+', 'r+'):
140 out = self.getvalue()
141 cr.execute("UPDATE ir_attachment SET db_datas = decode(%s,'escape'), file_size=%s WHERE id = %s",
142 (out, len(out), par.file_id))
143 elif self.mode == 'a':
144 out = self.getvalue()
145 cr.execute("UPDATE ir_attachment " \
146 "SET db_datas = COALESCE(db_datas,'') || decode(%s, 'escape'), " \
147 " file_size = COALESCE(file_size, 0) + %s " \
149 (out, len(out), par.file_id))
152 logging.getLogger('document.storage').exception('Cannot update db file #%d for close:', par.file_id)
158 class nodefd_db64(StringIO, nodes.node_descriptor):
159 """ A descriptor to db data, base64 (the old way)
161 It stores the data in base64 encoding at the db. Not optimal, but
162 the transparent compression of Postgres will save the day.
164 def __init__(self, parent, ira_browse, mode):
165 nodes.node_descriptor.__init__(self, parent)
166 if mode.endswith('b'):
169 if mode in ('r', 'r+'):
170 StringIO.__init__(self, base64.decodestring(ira_browse.db_datas))
171 elif mode in ('w', 'w+'):
172 StringIO.__init__(self, None)
173 # at write, we start at 0 (= overwrite), but have the original
174 # data available, in case of a seek()
176 StringIO.__init__(self, None)
178 logging.getLogger('document.storage').error("Incorrect mode %s specified", mode)
179 raise IOError(errno.EINVAL, "Invalid file mode")
183 # we now open a *separate* cursor, to update the data.
184 # FIXME: this may be improved, for concurrency handling
185 par = self._get_parent()
186 uid = par.context.uid
187 cr = pooler.get_db(par.context.dbname).cursor()
189 if self.mode in ('w', 'w+', 'r+'):
190 out = self.getvalue()
191 cr.execute('UPDATE ir_attachment SET db_datas = %s::bytea, file_size=%s WHERE id = %s',
192 (base64.encodestring(out), len(out), par.file_id))
193 elif self.mode == 'a':
194 out = self.getvalue()
195 # Yes, we're obviously using the wrong representation for storing our
196 # data as base64-in-bytea
197 cr.execute("UPDATE ir_attachment " \
198 "SET db_datas = encode( (COALESCE(decode(encode(db_datas,'escape'),'base64'),'') || decode(%s, 'base64')),'base64')::bytea , " \
199 " file_size = COALESCE(file_size, 0) + %s " \
201 (base64.encodestring(out), len(out), par.file_id))
204 logging.getLogger('document.storage').exception('Cannot update db file #%d for close:', par.file_id)
210 class document_storage(osv.osv):
211 """ The primary object for data storage.
212 Each instance of this object is a storage media, in which our application
213 can store contents. The object here controls the behaviour of the storage
215 The referring document.directory-ies will control the placement of data
218 It is a bad idea to have multiple document.storage objects pointing to
219 the same tree of filesystem storage.
221 _name = 'document.storage'
222 _description = 'Storage Media'
223 _doclog = logging.getLogger('document')
226 'name': fields.char('Name', size=64, required=True, select=1),
227 'write_date': fields.datetime('Date Modified', readonly=True),
228 'write_uid': fields.many2one('res.users', 'Last Modification User', readonly=True),
229 'create_date': fields.datetime('Date Created', readonly=True),
230 'create_uid': fields.many2one('res.users', 'Creator', readonly=True),
231 'user_id': fields.many2one('res.users', 'Owner'),
232 'group_ids': fields.many2many('res.groups', 'document_storage_group_rel', 'item_id', 'group_id', 'Groups'),
233 'dir_ids': fields.one2many('document.directory', 'parent_id', 'Directories'),
234 'type': fields.selection([('db', 'Database'), ('filestore', 'Internal File storage'),
235 ('realstore', 'External file storage'), ('virtual', 'Virtual storage')], 'Type', required=True),
236 'path': fields.char('Path', size=250, select=1, help="For file storage, the root path of the storage"),
237 'online': fields.boolean('Online', help="If not checked, media is currently offline and its contents not available", required=True),
238 'readonly': fields.boolean('Read Only', help="If set, media is for reading only"),
241 def _get_rootpath(self, cr, uid, context=None):
242 return os.path.join(DMS_ROOT_PATH, cr.dbname)
245 'user_id': lambda self, cr, uid, ctx: uid,
246 'online': lambda *args: True,
247 'readonly': lambda *args: False,
248 # Note: the defaults below should only be used ONCE for the default
249 # storage media. All other times, we should create different paths at least.
250 'type': lambda *args: 'filestore',
251 'path': _get_rootpath,
254 # SQL note: a path = NULL doesn't have to be unique.
255 ('path_uniq', 'UNIQUE(type,path)', "The storage path must be unique!")
258 def get_data(self, cr, uid, id, file_node, context=None, fil_obj=None):
259 """ retrieve the contents of some file_node having storage_id = id
260 optionally, fil_obj could point to the browse object of the file
265 boo = self.browse(cr, uid, id, context)
269 ira = self.pool.get('ir.attachment').browse(cr, uid, file_node.file_id, context=context)
270 return self.__get_data_3(cr, uid, boo, ira, context)
272 def get_file(self, cr, uid, id, file_node, mode, context=None):
275 boo = self.browse(cr, uid, id, context)
277 raise RuntimeError('media offline')
279 ira = self.pool.get('ir.attachment').browse(cr, uid, file_node.file_id, context=context)
280 if boo.type == 'filestore':
281 if not ira.store_fname:
282 # On a migrated db, some files may have the wrong storage type
283 # try to fix their directory.
285 self._doclog.warning( "ir.attachment #%d does not have a filename, but is at filestore, fix it!" % ira.id)
286 raise IOError(errno.ENOENT, 'No file can be located')
287 fpath = os.path.join(boo.path, ira.store_fname)
289 self._doclog.debug("Trying to read \"%s\".."% fpath)
290 return nodefd_file(file_node, path=fpath, mode=mode)
292 elif boo.type == 'db':
293 # TODO: we need a better api for large files
295 self._doclog.debug("Trying to obtain db_datas for ir.attachment[%d]", ira.id)
296 return nodefd_db64(file_node, ira_browse=ira, mode=mode)
298 elif boo.type == 'realstore':
299 if not ira.store_fname:
300 # On a migrated db, some files may have the wrong storage type
301 # try to fix their directory.
303 self._doclog.warning("ir.attachment #%d does not have a filename, trying the name." %ira.id)
305 fpath = os.path.join(boo.path,ira.store_fname or ira.name)
306 if not os.path.exists(fpath):
307 raise IOError("File not found: %s" % fpath)
308 return nodefd_file(file_node, path=fpath, mode=mode)
311 raise TypeError("No %s storage" % boo.type)
313 def __get_data_3(self, cr, uid, boo, ira, context):
315 raise RuntimeError('media offline')
316 if boo.type == 'filestore':
317 if not ira.store_fname:
318 # On a migrated db, some files may have the wrong storage type
319 # try to fix their directory.
321 self._doclog.warning( "ir.attachment #%d does not have a filename, but is at filestore, fix it!" % ira.id)
323 fpath = os.path.join(boo.path, ira.store_fname)
324 return file(fpath, 'rb').read()
325 elif boo.type == 'db':
326 # TODO: we need a better api for large files
328 out = base64.decodestring(ira.db_datas)
332 elif boo.type == 'realstore':
333 if not ira.store_fname:
334 # On a migrated db, some files may have the wrong storage type
335 # try to fix their directory.
337 self._doclog.warning("ir.attachment #%d does not have a filename, trying the name." %ira.id)
339 fpath = os.path.join(boo.path,ira.store_fname or ira.name)
340 if os.path.exists(fpath):
341 return file(fpath,'rb').read()
342 elif not ira.store_fname:
345 raise IOError("File not found: %s" % fpath)
347 raise TypeError("No %s storage" % boo.type)
349 def set_data(self, cr, uid, id, file_node, data, context=None, fil_obj=None):
351 This function MUST be used from an ir.attachment. It wouldn't make sense
352 to store things persistently for other types (dynamic).
356 boo = self.browse(cr, uid, id, context)
360 ira = self.pool.get('ir.attachment').browse(cr, uid, file_node.file_id, context=context)
363 raise RuntimeError('media offline')
364 self._doclog.debug( "Store data for ir.attachment #%d" % ira.id)
367 if boo.type == 'filestore':
371 # This can be improved
372 if os.path.isdir(path):
373 for dirs in os.listdir(path):
374 if os.path.isdir(os.path.join(path, dirs)) and len(os.listdir(os.path.join(path, dirs))) < 4000:
377 flag = flag or create_directory(path)
378 filename = random_name()
379 fname = os.path.join(path, flag, filename)
380 fp = file(fname, 'wb')
383 self._doclog.debug( "Saved data to %s" % fname)
384 filesize = len(data) # os.stat(fname).st_size
385 store_fname = os.path.join(flag, filename)
387 # TODO Here, an old file would be left hanging.
390 self._doclog.warning( "Couldn't save data to %s", path, exc_info=True)
391 raise except_orm(_('Error!'), str(e))
392 elif boo.type == 'db':
394 # will that work for huge data? TODO
395 out = base64.encodestring(data)
396 cr.execute('UPDATE ir_attachment SET db_datas = %s WHERE id = %s',
397 (out, file_node.file_id))
398 elif boo.type == 'realstore':
400 file_node.fix_ppath(cr, ira)
401 npath = file_node.full_path() or []
402 # npath may contain empty elements, for root directory etc.
403 for i, n in enumerate(npath):
407 for ch in ('*', '|', "\\", '/', ':', '"', '<', '>', '?', '..'):
409 raise ValueError("Invalid char %s in path %s" %(ch, n))
412 path = os.path.join(*dpath)
413 if not os.path.isdir(path):
415 fname = os.path.join(path, npath[-1])
416 fp = file(fname,'wb')
419 self._doclog.debug("Saved data to %s", fname)
420 filesize = len(data) # os.stat(fname).st_size
421 store_fname = os.path.join(*npath)
422 # TODO Here, an old file would be left hanging.
424 self._doclog.warning("Couldn't save data:", exc_info=True)
425 raise except_orm(_('Error!'), str(e))
427 raise TypeError("No %s storage" % boo.type)
429 # 2nd phase: store the metadata
436 mime, icont = cntIndex.doIndex(data, ira.datas_fname,
437 ira.file_type or None, fname)
439 self._doclog.debug('Cannot index file:', exc_info=True)
443 icont_u = ustr(icont)
447 # a hack: /assume/ that the calling write operation will not try
448 # to write the fname and size, and update them in the db concurrently.
449 # We cannot use a write() here, because we are already in one.
450 cr.execute('UPDATE ir_attachment SET store_fname = %s, file_size = %s, index_content = %s, file_type = %s WHERE id = %s',
451 (store_fname, filesize, icont_u, mime, file_node.file_id))
452 file_node.content_length = filesize
453 file_node.content_type = mime
455 except Exception, e :
456 self._doclog.warning("Couldn't save data:", exc_info=True)
457 # should we really rollback once we have written the actual data?
458 # at the db case (only), that rollback would be safe
459 raise except_orm(_('Error at doc write!'), str(e))
461 def prepare_unlink(self, cr, uid, storage_bo, fil_bo):
462 """ Before we unlink a file (fil_boo), prepare the list of real
463 files that have to be removed, too. """
465 if not storage_bo.online:
466 raise RuntimeError('media offline')
468 if storage_bo.type == 'filestore':
469 fname = fil_bo.store_fname
472 path = storage_bo.path
473 return (storage_bo.id, 'file', os.path.join(path, fname))
474 elif storage_bo.type == 'db':
476 elif storage_bo.type == 'realstore':
477 fname = fil_bo.store_fname
480 path = storage_bo.path
481 return ( storage_bo.id, 'file', os.path.join(path, fname))
483 raise TypeError("No %s storage" % storage_bo.type)
485 def do_unlink(self, cr, uid, unres):
486 for id, ktype, fname in unres:
491 self._doclog.warning("Could not remove file %s, please remove manually.", fname, exc_info=True)
493 self._doclog.warning("Unknown unlink key %s" % ktype)
497 def simple_rename(self, cr, uid, file_node, new_name, context=None):
498 """ A preparation for a file rename.
499 It will not affect the database, but merely check and perhaps
500 rename the realstore file.
502 @return the dict of values that can safely be be stored in the db.
504 sbro = self.browse(cr, uid, file_node.storage_id, context=context)
505 assert sbro, "The file #%d didn't provide storage" % file_node.file_id
507 if sbro.type in ('filestore', 'db'):
508 # nothing to do for a rename, allow to change the db field
509 return { 'name': new_name, 'datas_fname': new_name }
510 elif sbro.type == 'realstore':
511 fname = fil_bo.store_fname
513 return ValueError("Tried to rename a non-stored file")
514 path = storage_bo.path
515 oldpath = os.path.join(path, fname)
517 for ch in ('*', '|', "\\", '/', ':', '"', '<', '>', '?', '..'):
519 raise ValueError("Invalid char %s in name %s" %(ch, new_name))
521 file_node.fix_ppath(cr, ira)
522 npath = file_node.full_path() or []
524 dpath.extend(npath[:-1])
525 dpath.append(new_name)
526 newpath = os.path.join(*dpath)
527 # print "old, new paths:", oldpath, newpath
528 os.rename(oldpath, newpath)
529 return { 'name': new_name, 'datas_fname': new_name, 'store_fname': new_name }
531 raise TypeError("No %s storage" % boo.type)