1 # -*- encoding: utf-8 -*-
2 ##############################################################################
4 # OpenERP, Open Source Management Solution
6 # Copyright (C) P. Christeas, 2009, all rights reserved
8 # This program is free software: you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation, either version 3 of the License, or
11 # (at your option) any later version.
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with this program. If not, see <http://www.gnu.org/licenses/>.
21 ##############################################################################
23 from osv import osv, fields
29 from StringIO import StringIO
31 from tools.misc import ustr
32 from tools.translate import _
34 from osv.orm import except_orm
41 from content_index import cntIndex
43 DMS_ROOT_PATH = tools.config.get('document_path', os.path.join(tools.config.get('root_path'), 'filestore'))
46 """ The algorithm of data storage
48 We have to consider 3 cases of data /retrieval/:
49 Given (context,path) we need to access the file (aka. node).
50 given (directory, context), we need one of its children (for listings, views)
51 given (ir.attachment, context), we needs its data and metadata (node).
53 For data /storage/ we have the cases:
54 Have (ir.attachment, context), we modify the file (save, update, rename etc).
55 Have (directory, context), we create a file.
56 Have (path, context), we create or modify a file.
58 Note that in all above cases, we don't explicitly choose the storage media,
59 but always require a context to be present.
61 Note that a node will not always have a corresponding ir.attachment. Dynamic
62 nodes, for once, won't. Their metadata will be computed by the parent storage
65 The algorithm says that in any of the above cases, our first goal is to locate
66 the node for any combination of search criteria. It would be wise NOT to
67 represent each node in the path (like node[/] + node[/dir1] + node[/dir1/dir2])
68 but directly jump to the end node (like node[/dir1/dir2]) whenever possible.
70 We also contain all the parenting loop code in one function. This is intentional,
71 because one day this will be optimized in the db (Pg 8.4).
78 d = [random.choice(string.ascii_letters) for x in xrange(10) ]
82 INVALID_CHARS = {'*':str(hash('*')), '|':str(hash('|')) , "\\":str(hash("\\")), '/':'__', ':':str(hash(':')), '"':str(hash('"')), '<':str(hash('<')) , '>':str(hash('>')) , '?':str(hash('?'))}
85 def create_directory(path):
86 dir_name = random_name()
87 path = os.path.join(path, dir_name)
91 class nodefd_file(nodes.node_descriptor):
92 """ A descriptor to a real file
94 Inheriting directly from file doesn't work, since file exports
95 some read-only attributes (like 'name') that we don't like.
97 def __init__(self, parent, path, mode):
98 nodes.node_descriptor.__init__(self, parent)
99 self.__file = open(path, mode)
101 for attr in ('closed', 'read', 'write', 'seek', 'tell'):
102 setattr(self,attr, getattr(self.__file, attr))
105 # TODO: locking in init, close()
109 class nodefd_db(StringIO, nodes.node_descriptor):
110 """ A descriptor to db data
112 def __init__(self, parent, ira_browse, mode):
113 nodes.node_descriptor.__init__(self, parent)
114 if mode.endswith('b'):
117 if mode in ('r', 'r+'):
118 StringIO.__init__(self, ira_browse.db_datas)
119 elif mode in ('w', 'w+'):
120 StringIO.__init__(self, None)
121 # at write, we start at 0 (= overwrite), but have the original
122 # data available, in case of a seek()
124 StringIO.__init__(self, None)
126 logging.getLogger('document.storage').error("Incorrect mode %s specified", mode)
127 raise IOError(errno.EINVAL, "Invalid file mode")
131 # we now open a *separate* cursor, to update the data.
132 # FIXME: this may be improved, for concurrency handling
133 par = self._get_parent()
134 uid = par.context.uid
135 cr = pooler.get_db(par.context.dbname).cursor()
137 if self.mode in ('w', 'w+', 'r+'):
138 out = self.getvalue()
139 cr.execute('UPDATE ir_attachment SET db_datas = %s, file_size=%d WHERE id = %s',
140 (out, len(out), par.file_id))
141 elif self.mode == 'a':
142 out = self.getvalue()
143 cr.execute("UPDATE ir_attachment " \
144 "SET db_datas = COALESCE(db_datas,'') || %s, " \
145 " file_size = COALESCE(file_size, 0) + %d " \
147 (out, len(out), par.file_id))
150 logging.getLogger('document.storage').exception('Cannot update db file #%d for close:', par.file_id)
156 class document_storage(osv.osv):
157 """ The primary object for data storage.
158 Each instance of this object is a storage media, in which our application
159 can store contents. The object here controls the behaviour of the storage
161 The referring document.directory-ies will control the placement of data
164 It is a bad idea to have multiple document.storage objects pointing to
165 the same tree of filesystem storage.
167 _name = 'document.storage'
168 _description = 'Storage Media'
169 _doclog = logging.getLogger('document')
172 'name': fields.char('Name', size=64, required=True, select=1),
173 'write_date': fields.datetime('Date Modified', readonly=True),
174 'write_uid': fields.many2one('res.users', 'Last Modification User', readonly=True),
175 'create_date': fields.datetime('Date Created', readonly=True),
176 'create_uid': fields.many2one('res.users', 'Creator', readonly=True),
177 'user_id': fields.many2one('res.users', 'Owner'),
178 'group_ids': fields.many2many('res.groups', 'document_storage_group_rel', 'item_id', 'group_id', 'Groups'),
179 'dir_ids': fields.one2many('document.directory', 'parent_id', 'Directories'),
180 'type': fields.selection([('db', 'Database'), ('filestore', 'Internal File storage'),
181 ('realstore', 'External file storage'), ('virtual', 'Virtual storage')], 'Type', required=True),
182 'path': fields.char('Path', size=250, select=1, help="For file storage, the root path of the storage"),
183 'online': fields.boolean('Online', help="If not checked, media is currently offline and its contents not available", required=True),
184 'readonly': fields.boolean('Read Only', help="If set, media is for reading only"),
187 def _get_rootpath(self, cr, uid, context=None):
188 return os.path.join(DMS_ROOT_PATH, cr.dbname)
191 'user_id': lambda self, cr, uid, ctx: uid,
192 'online': lambda *args: True,
193 'readonly': lambda *args: False,
194 # Note: the defaults below should only be used ONCE for the default
195 # storage media. All other times, we should create different paths at least.
196 'type': lambda *args: 'filestore',
197 'path': _get_rootpath,
200 # SQL note: a path = NULL doesn't have to be unique.
201 ('path_uniq', 'UNIQUE(type,path)', "The storage path must be unique!")
204 def get_data(self, cr, uid, id, file_node, context=None, fil_obj=None):
205 """ retrieve the contents of some file_node having storage_id = id
206 optionally, fil_obj could point to the browse object of the file
211 boo = self.browse(cr, uid, id, context)
215 ira = self.pool.get('ir.attachment').browse(cr, uid, file_node.file_id, context=context)
216 return self.__get_data_3(cr, uid, boo, ira, context)
218 def get_file(self, cr, uid, id, file_node, mode, context=None):
221 boo = self.browse(cr, uid, id, context)
223 raise RuntimeError('media offline')
225 ira = self.pool.get('ir.attachment').browse(cr, uid, file_node.file_id, context=context)
226 if boo.type == 'filestore':
227 if not ira.store_fname:
228 # On a migrated db, some files may have the wrong storage type
229 # try to fix their directory.
231 self._doclog.warning( "ir.attachment #%d does not have a filename, but is at filestore, fix it!" % ira.id)
232 raise IOError(errno.ENOENT, 'No file can be located')
233 fpath = os.path.join(boo.path, ira.store_fname)
235 self._doclog.debug("Trying to read \"%s\".."% fpath)
236 return nodefd_file(file_node, path=fpath, mode=mode)
238 elif boo.type == 'db':
239 # TODO: we need a better api for large files
241 self._doclog.debug("Trying to obtain db_datas for ir.attachment[%d]", ira.id)
242 return nodefd_db(file_node, ira_browse=ira, mode=mode)
244 elif boo.type == 'realstore':
245 if not ira.store_fname:
246 # On a migrated db, some files may have the wrong storage type
247 # try to fix their directory.
249 self._doclog.warning("ir.attachment #%d does not have a filename, trying the name." %ira.id)
251 fpath = os.path.join(boo.path,ira.store_fname or ira.name)
252 if not os.path.exists(fpath):
253 raise IOError("File not found: %s" % fpath)
254 return nodefd_file(file_node, path=fpath, mode=mode)
257 raise TypeError("No %s storage" % boo.type)
259 def __get_data_3(self, cr, uid, boo, ira, context):
261 raise RuntimeError('media offline')
262 if boo.type == 'filestore':
263 if not ira.store_fname:
264 # On a migrated db, some files may have the wrong storage type
265 # try to fix their directory.
267 self._doclog.warning( "ir.attachment #%d does not have a filename, but is at filestore, fix it!" % ira.id)
269 fpath = os.path.join(boo.path, ira.store_fname)
270 return file(fpath, 'rb').read()
271 elif boo.type == 'db':
272 # TODO: we need a better api for large files
274 out = base64.decodestring(ira.db_datas)
278 elif boo.type == 'realstore':
279 if not ira.store_fname:
280 # On a migrated db, some files may have the wrong storage type
281 # try to fix their directory.
283 self._doclog.warning("ir.attachment #%d does not have a filename, trying the name." %ira.id)
285 fpath = os.path.join(boo.path,ira.store_fname or ira.name)
286 if os.path.exists(fpath):
287 return file(fpath,'rb').read()
288 elif not ira.store_fname:
291 raise IOError("File not found: %s" % fpath)
293 raise TypeError("No %s storage" % boo.type)
295 def set_data(self, cr, uid, id, file_node, data, context=None, fil_obj=None):
297 This function MUST be used from an ir.attachment. It wouldn't make sense
298 to store things persistently for other types (dynamic).
302 boo = self.browse(cr, uid, id, context)
306 ira = self.pool.get('ir.attachment').browse(cr, uid, file_node.file_id, context=context)
309 raise RuntimeError('media offline')
310 self._doclog.debug( "Store data for ir.attachment #%d" % ira.id)
313 if boo.type == 'filestore':
317 # This can be improved
318 if os.path.isdir(path):
319 for dirs in os.listdir(path):
320 if os.path.isdir(os.path.join(path, dirs)) and len(os.listdir(os.path.join(path, dirs))) < 4000:
323 flag = flag or create_directory(path)
324 filename = random_name()
325 fname = os.path.join(path, flag, filename)
326 fp = file(fname, 'wb')
329 self._doclog.debug( "Saved data to %s" % fname)
330 filesize = len(data) # os.stat(fname).st_size
331 store_fname = os.path.join(flag, filename)
333 # TODO Here, an old file would be left hanging.
336 self._doclog.warning( "Couldn't save data to %s", path, exc_info=True)
337 raise except_orm(_('Error!'), str(e))
338 elif boo.type == 'db':
340 # will that work for huge data? TODO
341 out = base64.encodestring(data)
342 cr.execute('UPDATE ir_attachment SET db_datas = %s WHERE id = %s',
343 (out, file_node.file_id))
344 elif boo.type == 'realstore':
346 file_node.fix_ppath(cr, ira)
347 npath = file_node.full_path() or []
348 # npath may contain empty elements, for root directory etc.
349 for i, n in enumerate(npath):
353 for ch in ('*', '|', "\\", '/', ':', '"', '<', '>', '?', '..'):
355 raise ValueError("Invalid char %s in path %s" %(ch, n))
358 path = os.path.join(*dpath)
359 if not os.path.isdir(path):
361 fname = os.path.join(path, npath[-1])
362 fp = file(fname,'wb')
365 self._doclog.debug("Saved data to %s", fname)
366 filesize = len(data) # os.stat(fname).st_size
367 store_fname = os.path.join(*npath)
368 # TODO Here, an old file would be left hanging.
370 self._doclog.warning("Couldn't save data:", exc_info=True)
371 raise except_orm(_('Error!'), str(e))
373 raise TypeError("No %s storage" % boo.type)
375 # 2nd phase: store the metadata
382 mime, icont = cntIndex.doIndex(data, ira.datas_fname,
383 ira.file_type or None, fname)
385 self._doclog.debug('Cannot index file:', exc_info=True)
389 icont_u = ustr(icont)
393 # a hack: /assume/ that the calling write operation will not try
394 # to write the fname and size, and update them in the db concurrently.
395 # We cannot use a write() here, because we are already in one.
396 cr.execute('UPDATE ir_attachment SET store_fname = %s, file_size = %s, index_content = %s, file_type = %s WHERE id = %s',
397 (store_fname, filesize, icont_u, mime, file_node.file_id))
398 file_node.content_length = filesize
399 file_node.content_type = mime
401 except Exception, e :
402 self._doclog.warning("Couldn't save data:", exc_info=True)
403 # should we really rollback once we have written the actual data?
404 # at the db case (only), that rollback would be safe
405 raise except_orm(_('Error at doc write!'), str(e))
407 def prepare_unlink(self, cr, uid, storage_bo, fil_bo):
408 """ Before we unlink a file (fil_boo), prepare the list of real
409 files that have to be removed, too. """
411 if not storage_bo.online:
412 raise RuntimeError('media offline')
414 if storage_bo.type == 'filestore':
415 fname = fil_bo.store_fname
418 path = storage_bo.path
419 return (storage_bo.id, 'file', os.path.join(path, fname))
420 elif storage_bo.type == 'db':
422 elif storage_bo.type == 'realstore':
423 fname = fil_bo.store_fname
426 path = storage_bo.path
427 return ( storage_bo.id, 'file', os.path.join(path, fname))
429 raise TypeError("No %s storage" % boo.type)
431 def do_unlink(self, cr, uid, unres):
432 for id, ktype, fname in unres:
437 self._doclog.warning("Could not remove file %s, please remove manually.", fname, exc_info=True)
439 self._doclog.warning("Unknown unlink key %s" % ktype)