1 # -*- encoding: utf-8 -*-
2 ##############################################################################
4 # OpenERP, Open Source Management Solution
6 # Copyright (C) P. Christeas, 2009, all rights reserved
8 # This program is free software: you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation, either version 3 of the License, or
11 # (at your option) any later version.
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with this program. If not, see <http://www.gnu.org/licenses/>.
21 ##############################################################################
23 from osv import osv, fields
27 from tools.misc import ustr
28 from tools.translate import _
30 from osv.orm import except_orm
35 from content_index import cntIndex
37 DMS_ROOT_PATH = tools.config.get('document_path', os.path.join(tools.config.get('root_path'), 'filestore'))
40 """ The algorithm of data storage
42 We have to consider 3 cases of data /retrieval/:
43 Given (context,path) we need to access the file (aka. node).
44 given (directory, context), we need one of its children (for listings, views)
45 given (ir.attachment, context), we needs its data and metadata (node).
47 For data /storage/ we have the cases:
48 Have (ir.attachment, context), we modify the file (save, update, rename etc).
49 Have (directory, context), we create a file.
50 Have (path, context), we create or modify a file.
52 Note that in all above cases, we don't explicitly choose the storage media,
53 but always require a context to be present.
55 Note that a node will not always have a corresponding ir.attachment. Dynamic
56 nodes, for once, won't. Their metadata will be computed by the parent storage
59 The algorithm says that in any of the above cases, our first goal is to locate
60 the node for any combination of search criteria. It would be wise NOT to
61 represent each node in the path (like node[/] + node[/dir1] + node[/dir1/dir2])
62 but directly jump to the end node (like node[/dir1/dir2]) whenever possible.
64 We also contain all the parenting loop code in one function. This is intentional,
65 because one day this will be optimized in the db (Pg 8.4).
72 d = [random.choice(string.ascii_letters) for x in xrange(10) ]
76 INVALID_CHARS = {'*':str(hash('*')), '|':str(hash('|')) , "\\":str(hash("\\")), '/':'__', ':':str(hash(':')), '"':str(hash('"')), '<':str(hash('<')) , '>':str(hash('>')) , '?':str(hash('?'))}
79 def create_directory(path):
80 dir_name = random_name()
81 path = os.path.join(path, dir_name)
86 class document_storage(osv.osv):
87 """ The primary object for data storage.
88 Each instance of this object is a storage media, in which our application
89 can store contents. The object here controls the behaviour of the storage
91 The referring document.directory-ies will control the placement of data
94 It is a bad idea to have multiple document.storage objects pointing to
95 the same tree of filesystem storage.
97 _name = 'document.storage'
98 _description = 'Document storage media'
100 'name': fields.char('Name', size=64, required=True, select=1),
101 'write_date': fields.datetime('Date Modified', readonly=True),
102 'write_uid': fields.many2one('res.users', 'Last Modification User', readonly=True),
103 'create_date': fields.datetime('Date Created', readonly=True),
104 'create_uid': fields.many2one('res.users', 'Creator', readonly=True),
105 'user_id': fields.many2one('res.users', 'Owner'),
106 'group_ids': fields.many2many('res.groups', 'document_directory_group_rel', 'item_id', 'group_id', 'Groups'),
107 'dir_ids': fields.one2many('document.directory', 'parent_id', 'Directories'),
108 'type': fields.selection([('db', 'Database'), ('filestore', 'Internal File storage'),
109 ('realstore', 'External file storage'), ('virtual', 'Virtual storage')], 'Type', required=True),
110 'path': fields.char('Path', size=250, select=1, help="For file storage, the root path of the storage"),
111 'online': fields.boolean('Online', help="If not checked, media is currently offline and its contents not available", required=True),
112 'readonly': fields.boolean('Read Only', help="If set, media is for reading only"),
115 def _get_rootpath(self, cr, uid, context=None):
116 return os.path.join(DMS_ROOT_PATH, cr.dbname)
119 'user_id': lambda self, cr, uid, ctx: uid,
120 'online': lambda *args: True,
121 'readonly': lambda *args: False,
122 # Note: the defaults below should only be used ONCE for the default
123 # storage media. All other times, we should create different paths at least.
124 'type': lambda *args: 'filestore',
125 'path': _get_rootpath,
128 # SQL note: a path = NULL doesn't have to be unique.
129 ('path_uniq', 'UNIQUE(type,path)', "The storage path must be unique!")
132 def get_data(self, cr, uid, id, file_node, context=None, fil_obj=None):
133 """ retrieve the contents of some file_node having storage_id = id
134 optionally, fil_obj could point to the browse object of the file
139 boo = self.browse(cr, uid, id, context)
143 ira = self.pool.get('ir.attachment').browse(cr, uid, file_node.file_id, context=context)
144 return self.__get_data_3(cr, uid, boo, ira, context)
146 def __get_data_3(self, cr, uid, boo, ira, context):
148 raise RuntimeError('media offline')
149 if boo.type == 'filestore':
150 if not ira.store_fname:
151 # On a migrated db, some files may have the wrong storage type
152 # try to fix their directory.
154 netsvc.Logger().notifyChannel('document', netsvc.LOG_WARNING, "ir.attachment #%d does not have a filename, but is at filestore, fix it!" % ira.id)
156 fpath = os.path.join(boo.path, ira.store_fname)
157 return file(fpath, 'rb').read()
158 elif boo.type == 'db':
159 # TODO: we need a better api for large files
161 out = base64.decodestring(ira.db_datas)
165 elif boo.type == 'realstore':
166 if not ira.store_fname:
167 # On a migrated db, some files may have the wrong storage type
168 # try to fix their directory.
170 netsvc.Logger().notifyChannel('document',netsvc.LOG_WARNING,"ir.attachment #%d does not have a filename, trying the name." %ira.id)
172 fpath = os.path.join(boo.path,ira.store_fname or ira.name)
173 if os.path.exists(fpath):
174 return file(fpath,'rb').read()
175 elif not ira.store_fname:
178 raise IOError("File not found: %s" % fpath)
180 raise TypeError("No %s storage" % boo.type)
182 def set_data(self, cr, uid, id, file_node, data, context=None, fil_obj=None):
184 This function MUST be used from an ir.attachment. It wouldn't make sense
185 to store things persistently for other types (dynamic).
189 boo = self.browse(cr, uid, id, context)
190 logger = netsvc.Logger()
194 ira = self.pool.get('ir.attachment').browse(cr, uid, file_node.file_id, context=context)
197 raise RuntimeError('media offline')
198 logger.notifyChannel('document', netsvc.LOG_DEBUG, "Store data for ir.attachment #%d" % ira.id)
201 if boo.type == 'filestore':
205 # This can be improved
206 if os.path.isdir(path):
207 for dirs in os.listdir(path):
208 if os.path.isdir(os.path.join(path, dirs)) and len(os.listdir(os.path.join(path, dirs))) < 4000:
211 flag = flag or create_directory(path)
212 filename = random_name()
213 fname = os.path.join(path, flag, filename)
214 fp = file(fname, 'wb')
217 logger.notifyChannel('document', netsvc.LOG_DEBUG, "Saved data to %s" % fname)
218 filesize = len(data) # os.stat(fname).st_size
219 store_fname = os.path.join(flag, filename)
221 # TODO Here, an old file would be left hanging.
223 except Exception, e :
224 netsvc.Logger().notifyChannel('document', netsvc.LOG_WARNING, "Couldn't save data: %s" % str(e))
225 raise except_orm(_('Error!'), str(e))
226 elif boo.type == 'db':
228 # will that work for huge data? TODO
229 out = base64.encodestring(data)
230 cr.execute('UPDATE ir_attachment SET db_datas = %s WHERE id = %s',
231 (out, file_node.file_id))
232 elif boo.type == 'realstore':
234 file_node.fix_ppath(cr, ira)
235 npath = file_node.full_path() or []
236 # npath may contain empty elements, for root directory etc.
237 for i, n in enumerate(npath):
241 for ch in ('*', '|', "\\", '/', ':', '"', '<', '>', '?', '..'):
243 raise ValueError("Invalid char %s in path %s" %(ch, n))
246 path = os.path.join(*dpath)
247 if not os.path.isdir(path):
249 fname = os.path.join(path, npath[-1])
250 fp = file(fname,'wb')
253 logger.notifyChannel('document',netsvc.LOG_DEBUG,"Saved data to %s" % fname)
254 filesize = len(data) # os.stat(fname).st_size
255 store_fname = os.path.join(*npath)
256 # TODO Here, an old file would be left hanging.
259 traceback.print_exc()
260 netsvc.Logger().notifyChannel('document',netsvc.LOG_WARNING,"Couldn't save data: %s" % e)
261 raise except_orm(_('Error!'), str(e))
263 raise TypeError("No %s storage" % boo.type)
265 # 2nd phase: store the metadata
272 mime, icont = cntIndex.doIndex(data, ira.datas_fname,
273 ira.file_type or None, fname)
275 logger.notifyChannel('document', netsvc.LOG_DEBUG, 'Cannot index file: %s' % str(e))
278 # a hack: /assume/ that the calling write operation will not try
279 # to write the fname and size, and update them in the db concurrently.
280 # We cannot use a write() here, because we are already in one.
281 cr.execute('UPDATE ir_attachment SET store_fname = %s, file_size = %s, index_content = %s, file_type = %s WHERE id = %s',
282 (store_fname, filesize, ustr(icont), mime, file_node.file_id))
283 file_node.content_length = filesize
284 file_node.content_type = mime
286 except Exception, e :
287 netsvc.Logger().notifyChannel('document', netsvc.LOG_WARNING, "Couldn't save data: %s" % str(e))
288 # should we really rollback once we have written the actual data?
289 # at the db case (only), that rollback would be safe
290 raise except_orm(_('Error at doc write!'), str(e))
292 def prepare_unlink(self, cr, uid, storage_bo, fil_bo):
293 """ Before we unlink a file (fil_boo), prepare the list of real
294 files that have to be removed, too. """
296 if not storage_bo.online:
297 raise RuntimeError('media offline')
299 if storage_bo.type == 'filestore':
300 fname = fil_bo.store_fname
303 path = storage_bo.path
304 return (storage_bo.id, 'file', os.path.join(path, fname))
305 elif storage_bo.type == 'db':
307 elif storage_bo.type == 'realstore':
308 fname = fil_bo.store_fname
311 path = storage_bo.path
312 return ( storage_bo.id, 'file', os.path.join(path,fname))
314 raise TypeError("No %s storage" % boo.type)
316 def do_unlink(self, cr, uid, unres):
317 for id, ktype, fname in unres:
322 netsvc.Logger().notifyChannel('document', netsvc.LOG_WARNING, "Could not remove file %s, please remove manually." % fname)
324 netsvc.Logger().notifyChannel('document', netsvc.LOG_WARNING, "Unknown unlink key %s" % ktype)