1 # -*- coding: utf-8 -*-
2 ##############################################################################
4 # OpenERP, Open Source Management Solution
5 # Copyright (C) 2004-2009 Tiny SPRL (<http://tiny.be>).
7 # This program is free software: you can redistribute it and/or modify
8 # it under the terms of the GNU Affero General Public License as
9 # published by the Free Software Foundation, either version 3 of the
10 # License, or (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU Affero General Public License for more details.
17 # You should have received a copy of the GNU Affero General Public License
18 # along with this program. If not, see <http://www.gnu.org/licenses/>.
20 ##############################################################################
24 # A quick hack: if netsvc is not there, emulate it. Thus, work offline, too
28 netsvc.Logger().notifyChannel("index",lvl,msg)
32 LOG_DEBUG_RPC = 'debug_rpc'
38 LOG_CRITICAL = 'critical'
44 class NhException(Exception):
47 from subprocess import Popen, PIPE
50 """ An indexer knows how to parse the content of some file.
52 Typically, one indexer should be instantiated per file
54 Override this class to add more functionality. Note that
55 you should only override the Content or the File methods
56 that give an optimal result. """
58 def _getMimeTypes(self):
59 """ Return supported mimetypes """
62 def _getExtensions(self):
65 def _getDefMime(self,ext):
66 """ Return a mimetype for this document type, ideally the
67 closest to the extension ext. """
68 mts = self._getMimeTypes();
73 def indexContent(self,content,filename=None, realfile = None):
74 """ Use either content or the real file, to index.
75 Some parsers will work better with the actual
76 content, others parse a file easier. Try the
82 return self._doIndexContent(content)
88 return self._doIndexFile(realfile)
92 fp = open(realfile,'rb')
96 # The not-handled exception may be raised here
97 return self._doIndexContent(content2)
100 # last try, with a tmp file
103 fname,ext = filename and os.path.splitext(filename) or ('','')
104 fd, rfname = tempfile.mkstemp(suffix=ext)
105 os.write(fd, content)
107 res = self._doIndexFile(rfname)
113 raise NhException('No appropriate method to index file')
115 def _doIndexContent(self,content):
116 raise NhException("Content not handled here")
118 def _doIndexFile(self,fpath):
119 raise NhException("Content not handled here")
123 def mime_match(mime, mdict):
124 if mdict.has_key(mime):
125 return (mime, mdict[mime])
127 mpat = mime.split('/')[0]+'/*'
128 if mdict.has_key(mpat):
129 return (mime, mdict[mpat])
133 class contentIndex() :
138 def register(self, obj):
140 for mime in obj._getMimeTypes():
141 self.mimes[mime] = obj
144 for ext in obj._getExtensions():
149 log(netsvc.LOG_DEBUG, "Register content indexer: %r" % obj)
151 raise Exception("Your indexer should at least suport a mimetype or extension")
153 def doIndex(self,content, filename=None, content_type=None, realfname = None, debug=False):
157 if content_type and self.mimes.has_key(content_type):
159 fobj = self.mimes[content_type]
161 bname,ext = os.path.splitext(filename)
162 if self.exts.has_key(ext):
163 fobj = self.exts[ext]
164 mime = fobj._getDefMime(ext)
166 if content_type and not fobj:
167 mime,fobj = mime_match(content_type, self.mimes)
174 bname,ext = os.path.splitext(filename)
175 fd, fname = tempfile.mkstemp(suffix=ext)
176 os.write(fd, content)
179 fp = Popen(['file','-b','--mime-type',fname], shell=False, stdout=PIPE).stdout
182 mime2 = result.strip()
183 log(netsvc.LOG_DEBUG,"File gave us: %s" % mime2)
184 # Note that the temporary file still exists now.
185 mime,fobj = mime_match(mime2, self.mimes)
189 log(netsvc.LOG_WARNING,"Cannot determine mime type: %s" % str(e))
193 res = (mime, fobj.indexContent(content,filename,fname or realfname) )
195 log(netsvc.LOG_DEBUG,"Have no object, return (%s, None)" % mime)
198 log(netsvc.LOG_WARNING,"Could not index file, %s" % e)
201 # If we created a tmp file, unlink it now
202 if not realfname and fname:
206 log(netsvc.LOG_WARNING,"Could not unlink %s, %s" %(fname, e))
210 cntIndex = contentIndex()
212 # vim:expandtab:smartindent:tabstop=4:softtabstop=4:shiftwidth=4: