1 # -*- encoding: utf-8 -*-
2 ##############################################################################
4 # OpenERP, Open Source Management Solution
6 # Copyright (C) P. Christeas, 2009, all rights reserved
8 # This program is free software: you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation, either version 3 of the License, or
11 # (at your option) any later version.
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with this program. If not, see <http://www.gnu.org/licenses/>.
21 ##############################################################################
23 from osv import osv, fields
29 from StringIO import StringIO
31 from tools.misc import ustr
32 from tools.translate import _
34 from osv.orm import except_orm
41 from content_index import cntIndex
43 DMS_ROOT_PATH = tools.config.get('document_path', os.path.join(tools.config.get('root_path'), 'filestore'))
46 """ The algorithm of data storage
48 We have to consider 3 cases of data /retrieval/:
49 Given (context,path) we need to access the file (aka. node).
50 given (directory, context), we need one of its children (for listings, views)
51 given (ir.attachment, context), we needs its data and metadata (node).
53 For data /storage/ we have the cases:
54 Have (ir.attachment, context), we modify the file (save, update, rename etc).
55 Have (directory, context), we create a file.
56 Have (path, context), we create or modify a file.
58 Note that in all above cases, we don't explicitly choose the storage media,
59 but always require a context to be present.
61 Note that a node will not always have a corresponding ir.attachment. Dynamic
62 nodes, for once, won't. Their metadata will be computed by the parent storage
65 The algorithm says that in any of the above cases, our first goal is to locate
66 the node for any combination of search criteria. It would be wise NOT to
67 represent each node in the path (like node[/] + node[/dir1] + node[/dir1/dir2])
68 but directly jump to the end node (like node[/dir1/dir2]) whenever possible.
70 We also contain all the parenting loop code in one function. This is intentional,
71 because one day this will be optimized in the db (Pg 8.4).
78 d = [random.choice(string.ascii_letters) for x in xrange(10) ]
82 INVALID_CHARS = {'*':str(hash('*')), '|':str(hash('|')) , "\\":str(hash("\\")), '/':'__', ':':str(hash(':')), '"':str(hash('"')), '<':str(hash('<')) , '>':str(hash('>')) , '?':str(hash('?'))}
85 def create_directory(path):
86 dir_name = random_name()
87 path = os.path.join(path, dir_name)
91 class nodefd_file(nodes.node_descriptor):
92 """ A descriptor to a real file
94 Inheriting directly from file doesn't work, since file exports
95 some read-only attributes (like 'name') that we don't like.
97 def __init__(self, parent, path, mode):
98 nodes.node_descriptor.__init__(self, parent)
99 self.__file = open(path, mode)
101 for attr in ('closed', 'read', 'write', 'seek', 'tell'):
102 setattr(self,attr, getattr(self.__file, attr))
105 # TODO: locking in init, close()
109 class nodefd_db(StringIO, nodes.node_descriptor):
110 """ A descriptor to db data
112 def __init__(self, parent, ira_browse, mode):
113 nodes.node_descriptor.__init__(self, parent)
114 if mode.endswith('b'):
117 if mode in ('r', 'r+'):
118 cr.execute('SELECT db_datas FROM ir_attachment WHERE id = %s', ira_browse.id)
119 data = cr.fetchone()[0]
120 StringIO.__init__(self, data)
121 elif mode in ('w', 'w+'):
122 StringIO.__init__(self, None)
123 # at write, we start at 0 (= overwrite), but have the original
124 # data available, in case of a seek()
126 StringIO.__init__(self, None)
128 logging.getLogger('document.storage').error("Incorrect mode %s specified", mode)
129 raise IOError(errno.EINVAL, "Invalid file mode")
133 # we now open a *separate* cursor, to update the data.
134 # FIXME: this may be improved, for concurrency handling
135 par = self._get_parent()
136 uid = par.context.uid
137 cr = pooler.get_db(par.context.dbname).cursor()
139 if self.mode in ('w', 'w+', 'r+'):
140 out = self.getvalue()
141 cr.execute("UPDATE ir_attachment SET db_datas = decode(%s,'escape'), file_size=%s WHERE id = %s",
142 (out, len(out), par.file_id))
143 elif self.mode == 'a':
144 out = self.getvalue()
145 cr.execute("UPDATE ir_attachment " \
146 "SET db_datas = COALESCE(db_datas,'') || decode(%s, 'escape'), " \
147 " file_size = COALESCE(file_size, 0) + %s " \
149 (out, len(out), par.file_id))
152 logging.getLogger('document.storage').exception('Cannot update db file #%d for close:', par.file_id)
158 class nodefd_db64(StringIO, nodes.node_descriptor):
159 """ A descriptor to db data, base64 (the old way)
161 It stores the data in base64 encoding at the db. Not optimal, but
162 the transparent compression of Postgres will save the day.
164 def __init__(self, parent, ira_browse, mode):
165 nodes.node_descriptor.__init__(self, parent)
166 if mode.endswith('b'):
169 if mode in ('r', 'r+'):
170 StringIO.__init__(self, base64.decodestring(ira_browse.db_datas))
171 elif mode in ('w', 'w+'):
172 StringIO.__init__(self, None)
173 # at write, we start at 0 (= overwrite), but have the original
174 # data available, in case of a seek()
176 StringIO.__init__(self, None)
178 logging.getLogger('document.storage').error("Incorrect mode %s specified", mode)
179 raise IOError(errno.EINVAL, "Invalid file mode")
183 # we now open a *separate* cursor, to update the data.
184 # FIXME: this may be improved, for concurrency handling
185 par = self._get_parent()
186 uid = par.context.uid
187 cr = pooler.get_db(par.context.dbname).cursor()
189 if self.mode in ('w', 'w+', 'r+'):
190 out = self.getvalue()
191 cr.execute('UPDATE ir_attachment SET db_datas = %s::bytea, file_size=%s WHERE id = %s',
192 (base64.encodestring(out), len(out), par.file_id))
193 elif self.mode == 'a':
194 out = self.getvalue()
195 # Yes, we're obviously using the wrong representation for storing our
196 # data as base64-in-bytea
197 cr.execute("UPDATE ir_attachment " \
198 "SET db_datas = encode( (COALESCE(decode(encode(db_datas,'escape'),'base64'),'') || decode(%s, 'base64')),'base64')::bytea , " \
199 " file_size = COALESCE(file_size, 0) + %s " \
201 (base64.encodestring(out), len(out), par.file_id))
204 logging.getLogger('document.storage').exception('Cannot update db file #%d for close:', par.file_id)
210 class document_storage(osv.osv):
211 """ The primary object for data storage.
212 Each instance of this object is a storage media, in which our application
213 can store contents. The object here controls the behaviour of the storage
215 The referring document.directory-ies will control the placement of data
218 It is a bad idea to have multiple document.storage objects pointing to
219 the same tree of filesystem storage.
221 _name = 'document.storage'
222 _description = 'Storage Media'
223 _doclog = logging.getLogger('document')
226 'name': fields.char('Name', size=64, required=True, select=1),
227 'write_date': fields.datetime('Date Modified', readonly=True),
228 'write_uid': fields.many2one('res.users', 'Last Modification User', readonly=True),
229 'create_date': fields.datetime('Date Created', readonly=True),
230 'create_uid': fields.many2one('res.users', 'Creator', readonly=True),
231 'user_id': fields.many2one('res.users', 'Owner'),
232 'group_ids': fields.many2many('res.groups', 'document_storage_group_rel', 'item_id', 'group_id', 'Groups'),
233 'dir_ids': fields.one2many('document.directory', 'parent_id', 'Directories'),
234 'type': fields.selection([('db', 'Database'), ('filestore', 'Internal File storage'),
235 ('realstore','External file storage'),], 'Type', required=True),
236 'path': fields.char('Path', size=250, select=1, help="For file storage, the root path of the storage"),
237 'online': fields.boolean('Online', help="If not checked, media is currently offline and its contents not available", required=True),
238 'readonly': fields.boolean('Read Only', help="If set, media is for reading only"),
241 def _get_rootpath(self, cr, uid, context=None):
242 return os.path.join(DMS_ROOT_PATH, cr.dbname)
245 'user_id': lambda self, cr, uid, ctx: uid,
246 'online': lambda *args: True,
247 'readonly': lambda *args: False,
248 # Note: the defaults below should only be used ONCE for the default
249 # storage media. All other times, we should create different paths at least.
250 'type': lambda *args: 'filestore',
251 'path': _get_rootpath,
254 # SQL note: a path = NULL doesn't have to be unique.
255 ('path_uniq', 'UNIQUE(type,path)', "The storage path must be unique!")
258 def get_data(self, cr, uid, id, file_node, context=None, fil_obj=None):
259 """ retrieve the contents of some file_node having storage_id = id
260 optionally, fil_obj could point to the browse object of the file
265 boo = self.browse(cr, uid, id, context)
269 ira = self.pool.get('ir.attachment').browse(cr, uid, file_node.file_id, context=context)
270 return self.__get_data_3(cr, uid, boo, ira, context)
272 def get_file(self, cr, uid, id, file_node, mode, context=None):
273 """ Return a file-like object for the contents of some node
277 boo = self.browse(cr, uid, id, context)
279 raise RuntimeError('media offline')
281 ira = self.pool.get('ir.attachment').browse(cr, uid, file_node.file_id, context=context)
282 if boo.type == 'filestore':
283 if not ira.store_fname:
284 # On a migrated db, some files may have the wrong storage type
285 # try to fix their directory.
287 self._doclog.warning( "ir.attachment #%d does not have a filename, but is at filestore, fix it!" % ira.id)
288 raise IOError(errno.ENOENT, 'No file can be located')
289 fpath = os.path.join(boo.path, ira.store_fname)
290 return nodefd_file(file_node, path=fpath, mode=mode)
292 elif boo.type == 'db':
293 # TODO: we need a better api for large files
294 return nodefd_db(file_node, ira_browse=ira, mode=mode)
296 elif boo.type == 'db64':
297 return nodefd_db64(file_node, ira_browse=ira, mode=mode)
299 elif boo.type == 'realstore':
300 if not ira.store_fname:
301 # On a migrated db, some files may have the wrong storage type
302 # try to fix their directory.
304 self._doclog.warning("ir.attachment #%d does not have a filename, trying the name." %ira.id)
306 fpath = os.path.join(boo.path,ira.store_fname or ira.name)
307 if not os.path.exists(fpath):
308 raise IOError("File not found: %s" % fpath)
309 return nodefd_file(file_node, path=fpath, mode=mode)
311 elif boo.type == 'virtual':
312 raise ValueError('Virtual storage does not support static files')
315 raise TypeError("No %s storage" % boo.type)
317 def __get_data_3(self, cr, uid, boo, ira, context):
319 raise RuntimeError('media offline')
320 if boo.type == 'filestore':
321 if not ira.store_fname:
322 # On a migrated db, some files may have the wrong storage type
323 # try to fix their directory.
325 self._doclog.warning( "ir.attachment #%d does not have a filename, but is at filestore, fix it!" % ira.id)
327 fpath = os.path.join(boo.path, ira.store_fname)
328 return file(fpath, 'rb').read()
329 elif boo.type == 'db64':
330 # TODO: we need a better api for large files
332 out = base64.decodestring(ira.db_datas)
336 elif boo.type == 'db':
337 # We do an explicit query, to avoid type transformations.
338 cr.execute('SELECT db_datas FROM ir_attachment WHERE id = %s', (ira.id,))
344 elif boo.type == 'realstore':
345 if not ira.store_fname:
346 # On a migrated db, some files may have the wrong storage type
347 # try to fix their directory.
349 self._doclog.warning("ir.attachment #%d does not have a filename, trying the name." %ira.id)
351 fpath = os.path.join(boo.path,ira.store_fname or ira.name)
352 if os.path.exists(fpath):
353 return file(fpath,'rb').read()
354 elif not ira.store_fname:
357 raise IOError("File not found: %s" % fpath)
359 elif boo.type == 'virtual':
360 raise ValueError('Virtual storage does not support static files')
363 raise TypeError("No %s storage" % boo.type)
365 def set_data(self, cr, uid, id, file_node, data, context=None, fil_obj=None):
367 This function MUST be used from an ir.attachment. It wouldn't make sense
368 to store things persistently for other types (dynamic).
372 boo = self.browse(cr, uid, id, context)
376 ira = self.pool.get('ir.attachment').browse(cr, uid, file_node.file_id, context=context)
379 raise RuntimeError('media offline')
380 self._doclog.debug( "Store data for ir.attachment #%d" % ira.id)
383 if boo.type == 'filestore':
387 # This can be improved
388 if os.path.isdir(path):
389 for dirs in os.listdir(path):
390 if os.path.isdir(os.path.join(path, dirs)) and len(os.listdir(os.path.join(path, dirs))) < 4000:
393 flag = flag or create_directory(path)
394 filename = random_name()
395 fname = os.path.join(path, flag, filename)
396 fp = file(fname, 'wb')
399 self._doclog.debug( "Saved data to %s" % fname)
400 filesize = len(data) # os.stat(fname).st_size
401 store_fname = os.path.join(flag, filename)
403 # TODO Here, an old file would be left hanging.
406 self._doclog.warning( "Couldn't save data to %s", path, exc_info=True)
407 raise except_orm(_('Error!'), str(e))
408 elif boo.type == 'db':
410 # will that work for huge data?
411 cr.execute('UPDATE ir_attachment SET db_datas = %s WHERE id = %s',
412 (data, file_node.file_id))
413 elif boo.type == 'db64':
415 # will that work for huge data?
416 out = base64.encodestring(data)
417 cr.execute('UPDATE ir_attachment SET db_datas = %s WHERE id = %s',
418 (out, file_node.file_id))
419 elif boo.type == 'realstore':
421 file_node.fix_ppath(cr, ira)
422 npath = file_node.full_path() or []
423 # npath may contain empty elements, for root directory etc.
424 for i, n in enumerate(npath):
428 for ch in ('*', '|', "\\", '/', ':', '"', '<', '>', '?', '..'):
430 raise ValueError("Invalid char %s in path %s" %(ch, n))
433 path = os.path.join(*dpath)
434 if not os.path.isdir(path):
436 fname = os.path.join(path, npath[-1])
437 fp = file(fname,'wb')
440 self._doclog.debug("Saved data to %s", fname)
441 filesize = len(data) # os.stat(fname).st_size
442 store_fname = os.path.join(*npath)
443 # TODO Here, an old file would be left hanging.
445 self._doclog.warning("Couldn't save data:", exc_info=True)
446 raise except_orm(_('Error!'), str(e))
448 elif boo.type == 'virtual':
449 raise ValueError('Virtual storage does not support static files')
452 raise TypeError("No %s storage" % boo.type)
454 # 2nd phase: store the metadata
461 mime, icont = cntIndex.doIndex(data, ira.datas_fname,
462 ira.file_type or None, fname)
464 self._doclog.debug('Cannot index file:', exc_info=True)
468 icont_u = ustr(icont)
472 # a hack: /assume/ that the calling write operation will not try
473 # to write the fname and size, and update them in the db concurrently.
474 # We cannot use a write() here, because we are already in one.
475 cr.execute('UPDATE ir_attachment SET store_fname = %s, file_size = %s, index_content = %s, file_type = %s WHERE id = %s',
476 (store_fname, filesize, icont_u, mime, file_node.file_id))
477 file_node.content_length = filesize
478 file_node.content_type = mime
480 except Exception, e :
481 self._doclog.warning("Couldn't save data:", exc_info=True)
482 # should we really rollback once we have written the actual data?
483 # at the db case (only), that rollback would be safe
484 raise except_orm(_('Error at doc write!'), str(e))
486 def prepare_unlink(self, cr, uid, storage_bo, fil_bo):
487 """ Before we unlink a file (fil_boo), prepare the list of real
488 files that have to be removed, too. """
490 if not storage_bo.online:
491 raise RuntimeError('media offline')
493 if storage_bo.type == 'filestore':
494 fname = fil_bo.store_fname
497 path = storage_bo.path
498 return (storage_bo.id, 'file', os.path.join(path, fname))
499 elif storage_bo.type in ('db', 'db64'):
501 elif storage_bo.type == 'realstore':
502 fname = fil_bo.store_fname
505 path = storage_bo.path
506 return ( storage_bo.id, 'file', os.path.join(path, fname))
508 raise TypeError("No %s storage" % storage_bo.type)
510 def do_unlink(self, cr, uid, unres):
511 for id, ktype, fname in unres:
516 self._doclog.warning("Could not remove file %s, please remove manually.", fname, exc_info=True)
518 self._doclog.warning("Unknown unlink key %s" % ktype)
522 def simple_rename(self, cr, uid, file_node, new_name, context=None):
523 """ A preparation for a file rename.
524 It will not affect the database, but merely check and perhaps
525 rename the realstore file.
527 @return the dict of values that can safely be be stored in the db.
529 sbro = self.browse(cr, uid, file_node.storage_id, context=context)
530 assert sbro, "The file #%d didn't provide storage" % file_node.file_id
532 if sbro.type in ('filestore', 'db', 'db64'):
533 # nothing to do for a rename, allow to change the db field
534 return { 'name': new_name, 'datas_fname': new_name }
535 elif sbro.type == 'realstore':
536 fname = fil_bo.store_fname
538 return ValueError("Tried to rename a non-stored file")
539 path = storage_bo.path
540 oldpath = os.path.join(path, fname)
542 for ch in ('*', '|', "\\", '/', ':', '"', '<', '>', '?', '..'):
544 raise ValueError("Invalid char %s in name %s" %(ch, new_name))
546 file_node.fix_ppath(cr, ira)
547 npath = file_node.full_path() or []
549 dpath.extend(npath[:-1])
550 dpath.append(new_name)
551 newpath = os.path.join(*dpath)
552 # print "old, new paths:", oldpath, newpath
553 os.rename(oldpath, newpath)
554 return { 'name': new_name, 'datas_fname': new_name, 'store_fname': new_name }
556 raise TypeError("No %s storage" % boo.type)