1 # -*- encoding: utf-8 -*-
2 ##############################################################################
4 # OpenERP, Open Source Management Solution
6 # Copyright (C) P. Christeas, 2009, all rights reserved
8 # This program is free software: you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation, either version 3 of the License, or
11 # (at your option) any later version.
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with this program. If not, see <http://www.gnu.org/licenses/>.
21 ##############################################################################
23 from osv import osv, fields
29 from tools.misc import ustr
30 from tools.translate import _
32 from osv.orm import except_orm
37 from content_index import cntIndex
39 DMS_ROOT_PATH = tools.config.get('document_path', os.path.join(tools.config.get('root_path'), 'filestore'))
42 """ The algorithm of data storage
44 We have to consider 3 cases of data /retrieval/:
45 Given (context,path) we need to access the file (aka. node).
46 given (directory, context), we need one of its children (for listings, views)
47 given (ir.attachment, context), we needs its data and metadata (node).
49 For data /storage/ we have the cases:
50 Have (ir.attachment, context), we modify the file (save, update, rename etc).
51 Have (directory, context), we create a file.
52 Have (path, context), we create or modify a file.
54 Note that in all above cases, we don't explicitly choose the storage media,
55 but always require a context to be present.
57 Note that a node will not always have a corresponding ir.attachment. Dynamic
58 nodes, for once, won't. Their metadata will be computed by the parent storage
61 The algorithm says that in any of the above cases, our first goal is to locate
62 the node for any combination of search criteria. It would be wise NOT to
63 represent each node in the path (like node[/] + node[/dir1] + node[/dir1/dir2])
64 but directly jump to the end node (like node[/dir1/dir2]) whenever possible.
66 We also contain all the parenting loop code in one function. This is intentional,
67 because one day this will be optimized in the db (Pg 8.4).
74 d = [random.choice(string.ascii_letters) for x in xrange(10) ]
78 INVALID_CHARS = {'*':str(hash('*')), '|':str(hash('|')) , "\\":str(hash("\\")), '/':'__', ':':str(hash(':')), '"':str(hash('"')), '<':str(hash('<')) , '>':str(hash('>')) , '?':str(hash('?'))}
81 def create_directory(path):
82 dir_name = random_name()
83 path = os.path.join(path, dir_name)
88 class document_storage(osv.osv):
89 """ The primary object for data storage.
90 Each instance of this object is a storage media, in which our application
91 can store contents. The object here controls the behaviour of the storage
93 The referring document.directory-ies will control the placement of data
96 It is a bad idea to have multiple document.storage objects pointing to
97 the same tree of filesystem storage.
99 _name = 'document.storage'
100 _description = 'Storage Media'
101 _doclog = logging.getLogger('document')
104 'name': fields.char('Name', size=64, required=True, select=1),
105 'write_date': fields.datetime('Date Modified', readonly=True),
106 'write_uid': fields.many2one('res.users', 'Last Modification User', readonly=True),
107 'create_date': fields.datetime('Date Created', readonly=True),
108 'create_uid': fields.many2one('res.users', 'Creator', readonly=True),
109 'user_id': fields.many2one('res.users', 'Owner'),
110 'group_ids': fields.many2many('res.groups', 'document_storage_group_rel', 'item_id', 'group_id', 'Groups'),
111 'dir_ids': fields.one2many('document.directory', 'parent_id', 'Directories'),
112 'type': fields.selection([('db', 'Database'), ('filestore', 'Internal File storage'),
113 ('realstore', 'External file storage'), ('virtual', 'Virtual storage')], 'Type', required=True),
114 'path': fields.char('Path', size=250, select=1, help="For file storage, the root path of the storage"),
115 'online': fields.boolean('Online', help="If not checked, media is currently offline and its contents not available", required=True),
116 'readonly': fields.boolean('Read Only', help="If set, media is for reading only"),
119 def _get_rootpath(self, cr, uid, context=None):
120 return os.path.join(DMS_ROOT_PATH, cr.dbname)
123 'user_id': lambda self, cr, uid, ctx: uid,
124 'online': lambda *args: True,
125 'readonly': lambda *args: False,
126 # Note: the defaults below should only be used ONCE for the default
127 # storage media. All other times, we should create different paths at least.
128 'type': lambda *args: 'filestore',
129 'path': _get_rootpath,
132 # SQL note: a path = NULL doesn't have to be unique.
133 ('path_uniq', 'UNIQUE(type,path)', "The storage path must be unique!")
136 def get_data(self, cr, uid, id, file_node, context=None, fil_obj=None):
137 """ retrieve the contents of some file_node having storage_id = id
138 optionally, fil_obj could point to the browse object of the file
143 boo = self.browse(cr, uid, id, context)
147 ira = self.pool.get('ir.attachment').browse(cr, uid, file_node.file_id, context=context)
148 return self.__get_data_3(cr, uid, boo, ira, context)
150 def __get_data_3(self, cr, uid, boo, ira, context):
152 raise RuntimeError('media offline')
153 if boo.type == 'filestore':
154 if not ira.store_fname:
155 # On a migrated db, some files may have the wrong storage type
156 # try to fix their directory.
158 self._doclog.warning( "ir.attachment #%d does not have a filename, but is at filestore, fix it!" % ira.id)
160 fpath = os.path.join(boo.path, ira.store_fname)
161 return file(fpath, 'rb').read()
162 elif boo.type == 'db':
163 # TODO: we need a better api for large files
165 out = base64.decodestring(ira.db_datas)
169 elif boo.type == 'realstore':
170 if not ira.store_fname:
171 # On a migrated db, some files may have the wrong storage type
172 # try to fix their directory.
174 self._doclog.warning("ir.attachment #%d does not have a filename, trying the name." %ira.id)
176 fpath = os.path.join(boo.path,ira.store_fname or ira.name)
177 if os.path.exists(fpath):
178 return file(fpath,'rb').read()
179 elif not ira.store_fname:
182 raise IOError("File not found: %s" % fpath)
184 raise TypeError("No %s storage" % boo.type)
186 def set_data(self, cr, uid, id, file_node, data, context=None, fil_obj=None):
188 This function MUST be used from an ir.attachment. It wouldn't make sense
189 to store things persistently for other types (dynamic).
193 boo = self.browse(cr, uid, id, context)
197 ira = self.pool.get('ir.attachment').browse(cr, uid, file_node.file_id, context=context)
200 raise RuntimeError('media offline')
201 self._doclog.debug( "Store data for ir.attachment #%d" % ira.id)
204 if boo.type == 'filestore':
208 # This can be improved
209 if os.path.isdir(path):
210 for dirs in os.listdir(path):
211 if os.path.isdir(os.path.join(path, dirs)) and len(os.listdir(os.path.join(path, dirs))) < 4000:
214 flag = flag or create_directory(path)
215 filename = random_name()
216 fname = os.path.join(path, flag, filename)
217 fp = file(fname, 'wb')
220 self._doclog.debug( "Saved data to %s" % fname)
221 filesize = len(data) # os.stat(fname).st_size
222 store_fname = os.path.join(flag, filename)
224 # TODO Here, an old file would be left hanging.
227 self._doclog.warning( "Couldn't save data to %s", path, exc_info=True)
228 raise except_orm(_('Error!'), str(e))
229 elif boo.type == 'db':
231 # will that work for huge data? TODO
232 out = base64.encodestring(data)
233 cr.execute('UPDATE ir_attachment SET db_datas = %s WHERE id = %s',
234 (out, file_node.file_id))
235 elif boo.type == 'realstore':
237 file_node.fix_ppath(cr, ira)
238 npath = file_node.full_path() or []
239 # npath may contain empty elements, for root directory etc.
240 for i, n in enumerate(npath):
244 for ch in ('*', '|', "\\", '/', ':', '"', '<', '>', '?', '..'):
246 raise ValueError("Invalid char %s in path %s" %(ch, n))
249 path = os.path.join(*dpath)
250 if not os.path.isdir(path):
252 fname = os.path.join(path, npath[-1])
253 fp = file(fname,'wb')
256 self._doclog.debug("Saved data to %s", fname)
257 filesize = len(data) # os.stat(fname).st_size
258 store_fname = os.path.join(*npath)
259 # TODO Here, an old file would be left hanging.
261 self._doclog.warning("Couldn't save data:", exc_info=True)
262 raise except_orm(_('Error!'), str(e))
264 raise TypeError("No %s storage" % boo.type)
266 # 2nd phase: store the metadata
273 mime, icont = cntIndex.doIndex(data, ira.datas_fname,
274 ira.file_type or None, fname)
276 self._doclog.debug('Cannot index file:', exc_info=True)
279 # a hack: /assume/ that the calling write operation will not try
280 # to write the fname and size, and update them in the db concurrently.
281 # We cannot use a write() here, because we are already in one.
282 cr.execute('UPDATE ir_attachment SET store_fname = %s, file_size = %s, index_content = %s, file_type = %s WHERE id = %s',
283 (store_fname, filesize, ustr(icont), mime, file_node.file_id))
284 file_node.content_length = filesize
285 file_node.content_type = mime
287 except Exception, e :
288 self._doclog.warning( "Couldn't save data:", exc_info=True)
289 # should we really rollback once we have written the actual data?
290 # at the db case (only), that rollback would be safe
291 raise except_orm(_('Error at doc write!'), str(e))
293 def prepare_unlink(self, cr, uid, storage_bo, fil_bo):
294 """ Before we unlink a file (fil_boo), prepare the list of real
295 files that have to be removed, too. """
297 if not storage_bo.online:
298 raise RuntimeError('media offline')
300 if storage_bo.type == 'filestore':
301 fname = fil_bo.store_fname
304 path = storage_bo.path
305 return (storage_bo.id, 'file', os.path.join(path, fname))
306 elif storage_bo.type == 'db':
308 elif storage_bo.type == 'realstore':
309 fname = fil_bo.store_fname
312 path = storage_bo.path
313 return ( storage_bo.id, 'file', os.path.join(path, fname))
315 raise TypeError("No %s storage" % boo.type)
317 def do_unlink(self, cr, uid, unres):
318 for id, ktype, fname in unres:
323 self._doclog.warning("Could not remove file %s, please remove manually.", fname, exc_info=True)
325 self._doclog.warning("Unknown unlink key %s" % ktype)