1 # -*- encoding: utf-8 -*-
2 ##############################################################################
4 # OpenERP, Open Source Management Solution
6 # Copyright (C) P. Christeas, 2009, all rights reserved
8 # This program is free software: you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation, either version 3 of the License, or
11 # (at your option) any later version.
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with this program. If not, see <http://www.gnu.org/licenses/>.
21 ##############################################################################
23 from osv import osv, fields
29 from StringIO import StringIO
31 from tools.misc import ustr
32 from tools.translate import _
34 from osv.orm import except_orm
40 from content_index import cntIndex
42 DMS_ROOT_PATH = tools.config.get('document_path', os.path.join(tools.config.get('root_path'), 'filestore'))
45 """ The algorithm of data storage
47 We have to consider 3 cases of data /retrieval/:
48 Given (context,path) we need to access the file (aka. node).
49 given (directory, context), we need one of its children (for listings, views)
50 given (ir.attachment, context), we needs its data and metadata (node).
52 For data /storage/ we have the cases:
53 Have (ir.attachment, context), we modify the file (save, update, rename etc).
54 Have (directory, context), we create a file.
55 Have (path, context), we create or modify a file.
57 Note that in all above cases, we don't explicitly choose the storage media,
58 but always require a context to be present.
60 Note that a node will not always have a corresponding ir.attachment. Dynamic
61 nodes, for once, won't. Their metadata will be computed by the parent storage
64 The algorithm says that in any of the above cases, our first goal is to locate
65 the node for any combination of search criteria. It would be wise NOT to
66 represent each node in the path (like node[/] + node[/dir1] + node[/dir1/dir2])
67 but directly jump to the end node (like node[/dir1/dir2]) whenever possible.
69 We also contain all the parenting loop code in one function. This is intentional,
70 because one day this will be optimized in the db (Pg 8.4).
77 d = [random.choice(string.ascii_letters) for x in xrange(10) ]
81 INVALID_CHARS = {'*':str(hash('*')), '|':str(hash('|')) , "\\":str(hash("\\")), '/':'__', ':':str(hash(':')), '"':str(hash('"')), '<':str(hash('<')) , '>':str(hash('>')) , '?':str(hash('?'))}
84 def create_directory(path):
85 dir_name = random_name()
86 path = os.path.join(path, dir_name)
90 class nodefd_file(nodes.node_descriptor):
91 """ A descriptor to a real file
93 Inheriting directly from file doesn't work, since file exports
94 some read-only attributes (like 'name') that we don't like.
96 def __init__(self, parent, path, mode):
97 nodes.node_descriptor.__init__(self, parent)
98 self.__file = open(path, mode)
100 for attr in ('closed', 'read', 'write', 'seek', 'tell'):
101 setattr(self,attr, getattr(self.__file, attr))
104 # TODO: locking in init, close()
108 class nodefd_db(StringIO, nodes.node_descriptor):
109 """ A descriptor to db data
111 def __init__(self, parent, ira_browse, mode):
112 nodes.node_descriptor.__init__(self, parent)
113 if mode.endswith('b'):
117 StringIO.__init__(self, ira_browse.db_datas)
119 StringIO.__init__(self, ira_browse.db_datas)
120 # at write, we start at 0 (= overwrite), but have the original
121 # data available, in case of a seek()
123 StringIO.__init__(self, None)
125 logging.getLogger('document.storage').error("Incorrect mode %s specified", mode)
126 raise IOError(errno.EINVAL, "Invalid file mode")
130 # we now open a *separate* cursor, to update the data.
131 # FIXME: this may be improved, for concurrency handling
132 uid = self.__parent.context.uid
133 cr = pooler.get_db(self.__parent.context.dbname).cursor()
135 out = self.getvalue()
136 cr.execute('UPDATE ir_attachment SET db_datas = %s, file_size=%d WHERE id = %s',
137 (out, len(out), self.__parent.file_id))
139 out = self.getvalue()
140 cr.execute("UPDATE ir_attachment " \
141 "SET db_datas = COALESCE(db_datas,'') || %s, " \
142 " file_size = COALESCE(file_size, 0) + %d " \
144 (out, len(out), self.__parent.file_id))
149 class document_storage(osv.osv):
150 """ The primary object for data storage.
151 Each instance of this object is a storage media, in which our application
152 can store contents. The object here controls the behaviour of the storage
154 The referring document.directory-ies will control the placement of data
157 It is a bad idea to have multiple document.storage objects pointing to
158 the same tree of filesystem storage.
160 _name = 'document.storage'
161 _description = 'Storage Media'
162 _doclog = logging.getLogger('document')
165 'name': fields.char('Name', size=64, required=True, select=1),
166 'write_date': fields.datetime('Date Modified', readonly=True),
167 'write_uid': fields.many2one('res.users', 'Last Modification User', readonly=True),
168 'create_date': fields.datetime('Date Created', readonly=True),
169 'create_uid': fields.many2one('res.users', 'Creator', readonly=True),
170 'user_id': fields.many2one('res.users', 'Owner'),
171 'group_ids': fields.many2many('res.groups', 'document_storage_group_rel', 'item_id', 'group_id', 'Groups'),
172 'dir_ids': fields.one2many('document.directory', 'parent_id', 'Directories'),
173 'type': fields.selection([('db', 'Database'), ('filestore', 'Internal File storage'),
174 ('realstore', 'External file storage'), ('virtual', 'Virtual storage')], 'Type', required=True),
175 'path': fields.char('Path', size=250, select=1, help="For file storage, the root path of the storage"),
176 'online': fields.boolean('Online', help="If not checked, media is currently offline and its contents not available", required=True),
177 'readonly': fields.boolean('Read Only', help="If set, media is for reading only"),
180 def _get_rootpath(self, cr, uid, context=None):
181 return os.path.join(DMS_ROOT_PATH, cr.dbname)
184 'user_id': lambda self, cr, uid, ctx: uid,
185 'online': lambda *args: True,
186 'readonly': lambda *args: False,
187 # Note: the defaults below should only be used ONCE for the default
188 # storage media. All other times, we should create different paths at least.
189 'type': lambda *args: 'filestore',
190 'path': _get_rootpath,
193 # SQL note: a path = NULL doesn't have to be unique.
194 ('path_uniq', 'UNIQUE(type,path)', "The storage path must be unique!")
197 def get_data(self, cr, uid, id, file_node, context=None, fil_obj=None):
198 """ retrieve the contents of some file_node having storage_id = id
199 optionally, fil_obj could point to the browse object of the file
204 boo = self.browse(cr, uid, id, context)
208 ira = self.pool.get('ir.attachment').browse(cr, uid, file_node.file_id, context=context)
209 return self.__get_data_3(cr, uid, boo, ira, context)
211 def get_file(self, cr, uid, id, file_node, mode, context=None):
214 boo = self.browse(cr, uid, id, context)
216 raise RuntimeError('media offline')
218 ira = self.pool.get('ir.attachment').browse(cr, uid, file_node.file_id, context=context)
219 if boo.type == 'filestore':
220 if not ira.store_fname:
221 # On a migrated db, some files may have the wrong storage type
222 # try to fix their directory.
224 self._doclog.warning( "ir.attachment #%d does not have a filename, but is at filestore, fix it!" % ira.id)
225 raise IOError(errno.ENOENT, 'No file can be located')
226 fpath = os.path.join(boo.path, ira.store_fname)
228 self._doclog.debug("Trying to read \"%s\".."% fpath)
229 return nodefd_file(file_node, path=fpath, mode=mode)
231 elif boo.type == 'db':
232 # TODO: we need a better api for large files
234 self._doclog.debug("Trying to obtain db_datas for ir.attachment[%d]", ira.id)
235 return nodefd_db(file_node, ira_browse=ira, mode=mode)
237 elif boo.type == 'realstore':
238 if not ira.store_fname:
239 # On a migrated db, some files may have the wrong storage type
240 # try to fix their directory.
242 self._doclog.warning("ir.attachment #%d does not have a filename, trying the name." %ira.id)
244 fpath = os.path.join(boo.path,ira.store_fname or ira.name)
245 if not os.path.exists(fpath):
246 raise IOError("File not found: %s" % fpath)
247 return nodefd_file(file_node, path=fpath, mode=mode)
250 raise TypeError("No %s storage" % boo.type)
252 def __get_data_3(self, cr, uid, boo, ira, context):
254 raise RuntimeError('media offline')
255 if boo.type == 'filestore':
256 if not ira.store_fname:
257 # On a migrated db, some files may have the wrong storage type
258 # try to fix their directory.
260 self._doclog.warning( "ir.attachment #%d does not have a filename, but is at filestore, fix it!" % ira.id)
262 fpath = os.path.join(boo.path, ira.store_fname)
263 return file(fpath, 'rb').read()
264 elif boo.type == 'db':
265 # TODO: we need a better api for large files
267 out = base64.decodestring(ira.db_datas)
271 elif boo.type == 'realstore':
272 if not ira.store_fname:
273 # On a migrated db, some files may have the wrong storage type
274 # try to fix their directory.
276 self._doclog.warning("ir.attachment #%d does not have a filename, trying the name." %ira.id)
278 fpath = os.path.join(boo.path,ira.store_fname or ira.name)
279 if os.path.exists(fpath):
280 return file(fpath,'rb').read()
281 elif not ira.store_fname:
284 raise IOError("File not found: %s" % fpath)
286 raise TypeError("No %s storage" % boo.type)
288 def set_data(self, cr, uid, id, file_node, data, context=None, fil_obj=None):
290 This function MUST be used from an ir.attachment. It wouldn't make sense
291 to store things persistently for other types (dynamic).
295 boo = self.browse(cr, uid, id, context)
299 ira = self.pool.get('ir.attachment').browse(cr, uid, file_node.file_id, context=context)
302 raise RuntimeError('media offline')
303 self._doclog.debug( "Store data for ir.attachment #%d" % ira.id)
306 if boo.type == 'filestore':
310 # This can be improved
311 if os.path.isdir(path):
312 for dirs in os.listdir(path):
313 if os.path.isdir(os.path.join(path, dirs)) and len(os.listdir(os.path.join(path, dirs))) < 4000:
316 flag = flag or create_directory(path)
317 filename = random_name()
318 fname = os.path.join(path, flag, filename)
319 fp = file(fname, 'wb')
322 self._doclog.debug( "Saved data to %s" % fname)
323 filesize = len(data) # os.stat(fname).st_size
324 store_fname = os.path.join(flag, filename)
326 # TODO Here, an old file would be left hanging.
329 self._doclog.warning( "Couldn't save data to %s", path, exc_info=True)
330 raise except_orm(_('Error!'), str(e))
331 elif boo.type == 'db':
333 # will that work for huge data? TODO
334 out = base64.encodestring(data)
335 cr.execute('UPDATE ir_attachment SET db_datas = %s WHERE id = %s',
336 (out, file_node.file_id))
337 elif boo.type == 'realstore':
339 file_node.fix_ppath(cr, ira)
340 npath = file_node.full_path() or []
341 # npath may contain empty elements, for root directory etc.
342 for i, n in enumerate(npath):
346 for ch in ('*', '|', "\\", '/', ':', '"', '<', '>', '?', '..'):
348 raise ValueError("Invalid char %s in path %s" %(ch, n))
351 path = os.path.join(*dpath)
352 if not os.path.isdir(path):
354 fname = os.path.join(path, npath[-1])
355 fp = file(fname,'wb')
358 self._doclog.debug("Saved data to %s", fname)
359 filesize = len(data) # os.stat(fname).st_size
360 store_fname = os.path.join(*npath)
361 # TODO Here, an old file would be left hanging.
363 self._doclog.warning("Couldn't save data:", exc_info=True)
364 raise except_orm(_('Error!'), str(e))
366 raise TypeError("No %s storage" % boo.type)
368 # 2nd phase: store the metadata
375 mime, icont = cntIndex.doIndex(data, ira.datas_fname,
376 ira.file_type or None, fname)
378 self._doclog.debug('Cannot index file:', exc_info=True)
382 icont_u = ustr(icont)
386 # a hack: /assume/ that the calling write operation will not try
387 # to write the fname and size, and update them in the db concurrently.
388 # We cannot use a write() here, because we are already in one.
389 cr.execute('UPDATE ir_attachment SET store_fname = %s, file_size = %s, index_content = %s, file_type = %s WHERE id = %s',
390 (store_fname, filesize, icont_u, mime, file_node.file_id))
391 file_node.content_length = filesize
392 file_node.content_type = mime
394 except Exception, e :
395 self._doclog.warning("Couldn't save data:", exc_info=True)
396 # should we really rollback once we have written the actual data?
397 # at the db case (only), that rollback would be safe
398 raise except_orm(_('Error at doc write!'), str(e))
400 def prepare_unlink(self, cr, uid, storage_bo, fil_bo):
401 """ Before we unlink a file (fil_boo), prepare the list of real
402 files that have to be removed, too. """
404 if not storage_bo.online:
405 raise RuntimeError('media offline')
407 if storage_bo.type == 'filestore':
408 fname = fil_bo.store_fname
411 path = storage_bo.path
412 return (storage_bo.id, 'file', os.path.join(path, fname))
413 elif storage_bo.type == 'db':
415 elif storage_bo.type == 'realstore':
416 fname = fil_bo.store_fname
419 path = storage_bo.path
420 return ( storage_bo.id, 'file', os.path.join(path, fname))
422 raise TypeError("No %s storage" % boo.type)
424 def do_unlink(self, cr, uid, unres):
425 for id, ktype, fname in unres:
430 self._doclog.warning("Could not remove file %s, please remove manually.", fname, exc_info=True)
432 self._doclog.warning("Unknown unlink key %s" % ktype)