1 # -*- coding: utf-8 -*-
2 ##############################################################################
4 # OpenERP, Open Source Management Solution
5 # Copyright (C) 2004-2010 Tiny SPRL (<http://tiny.be>).
7 # This program is free software: you can redistribute it and/or modify
8 # it under the terms of the GNU Affero General Public License as
9 # published by the Free Software Foundation, either version 3 of the
10 # License, or (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU Affero General Public License for more details.
17 # You should have received a copy of the GNU Affero General Public License
18 # along with this program. If not, see <http://www.gnu.org/licenses/>.
20 ##############################################################################
24 # A quick hack: if netsvc is not there, emulate it. Thus, work offline, too
28 netsvc.Logger().notifyChannel("index",lvl,msg)
32 LOG_DEBUG_RPC = 'debug_rpc'
37 LOG_CRITICAL = 'critical'
43 class NhException(Exception):
46 from subprocess import Popen, PIPE
49 """ An indexer knows how to parse the content of some file.
51 Typically, one indexer should be instantiated per file
53 Override this class to add more functionality. Note that
54 you should only override the Content or the File methods
55 that give an optimal result. """
57 def _getMimeTypes(self):
58 """ Return supported mimetypes """
61 def _getExtensions(self):
64 def _getDefMime(self,ext):
65 """ Return a mimetype for this document type, ideally the
66 closest to the extension ext. """
67 mts = self._getMimeTypes();
72 def indexContent(self,content,filename=None, realfile = None):
73 """ Use either content or the real file, to index.
74 Some parsers will work better with the actual
75 content, others parse a file easier. Try the
81 return self._doIndexContent(content)
87 return self._doIndexFile(realfile)
91 fp = open(realfile,'rb')
95 # The not-handled exception may be raised here
96 return self._doIndexContent(content2)
99 # last try, with a tmp file
102 fname,ext = filename and os.path.splitext(filename) or ('','')
103 fd, rfname = tempfile.mkstemp(suffix=ext)
104 os.write(fd, content)
106 res = self._doIndexFile(rfname)
112 raise NhException('No appropriate method to index file')
114 def _doIndexContent(self,content):
115 raise NhException("Content not handled here")
117 def _doIndexFile(self,fpath):
118 raise NhException("Content not handled here")
122 def mime_match(mime, mdict):
123 if mdict.has_key(mime):
124 return (mime, mdict[mime])
126 mpat = mime.split('/')[0]+'/*'
127 if mdict.has_key(mpat):
128 return (mime, mdict[mpat])
132 class contentIndex() :
137 def register(self, obj):
139 for mime in obj._getMimeTypes():
140 self.mimes[mime] = obj
143 for ext in obj._getExtensions():
148 log(netsvc.LOG_DEBUG, "Register content indexer: %r" % obj)
150 raise Exception("Your indexer should at least suport a mimetype or extension")
152 def doIndex(self,content, filename=None, content_type=None, realfname = None, debug=False):
156 if content_type and self.mimes.has_key(content_type):
158 fobj = self.mimes[content_type]
160 bname,ext = os.path.splitext(filename)
161 if self.exts.has_key(ext):
162 fobj = self.exts[ext]
163 mime = fobj._getDefMime(ext)
165 if content_type and not fobj:
166 mime,fobj = mime_match(content_type, self.mimes)
173 bname,ext = os.path.splitext(filename)
174 fd, fname = tempfile.mkstemp(suffix=ext)
175 os.write(fd, content)
178 fp = Popen(['file','-b','--mime-type',fname], shell=False, stdout=PIPE).stdout
181 mime2 = result.strip()
182 log(netsvc.LOG_DEBUG,"File gave us: %s" % mime2)
183 # Note that the temporary file still exists now.
184 mime,fobj = mime_match(mime2, self.mimes)
188 log(netsvc.LOG_WARNING,"Cannot determine mime type: %s" % str(e))
192 res = (mime, fobj.indexContent(content,filename,fname or realfname) )
194 log(netsvc.LOG_DEBUG,"Have no object, return (%s, None)" % mime)
197 log(netsvc.LOG_WARNING,"Could not index file, %s" % e)
200 # If we created a tmp file, unlink it now
201 if not realfname and fname:
205 log(netsvc.LOG_WARNING,"Could not unlink %s, %s" %(fname, e))
209 cntIndex = contentIndex()
211 # vim:expandtab:smartindent:tabstop=4:softtabstop=4:shiftwidth=4: