1 # -*- coding: utf-8 -*-
2 ##############################################################################
4 # OpenERP, Open Source Management Solution
5 # Copyright (C) 2004-2010 Tiny SPRL (<http://tiny.be>).
7 # This program is free software: you can redistribute it and/or modify
8 # it under the terms of the GNU Affero General Public License as
9 # published by the Free Software Foundation, either version 3 of the
10 # License, or (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU Affero General Public License for more details.
17 # You should have received a copy of the GNU Affero General Public License
18 # along with this program. If not, see <http://www.gnu.org/licenses/>.
20 ##############################################################################
24 from subprocess import Popen, PIPE
25 _logger = logging.getLogger(__name__)
26 class NhException(Exception):
30 class indexer(object):
31 """ An indexer knows how to parse the content of some file.
33 Typically, one indexer should be instantiated per file
35 Override this class to add more functionality. Note that
36 you should only override the Content or the File methods
37 that give an optimal result. """
39 def _getMimeTypes(self):
40 """ Return supported mimetypes """
43 def _getExtensions(self):
46 def _getDefMime(self, ext):
47 """ Return a mimetype for this document type, ideally the
48 closest to the extension ext. """
49 mts = self._getMimeTypes();
54 def indexContent(self, content, filename=None, realfile=None):
55 """ Use either content or the real file, to index.
56 Some parsers will work better with the actual
57 content, others parse a file easier. Try the
63 return self._doIndexContent(content)
69 return self._doIndexFile(realfile)
73 fp = open(realfile,'rb')
79 # The not-handled exception may be raised here
80 return self._doIndexContent(content2)
83 # last try, with a tmp file
86 fname,ext = filename and os.path.splitext(filename) or ('','')
87 fd, rfname = tempfile.mkstemp(suffix=ext)
90 res = self._doIndexFile(rfname)
96 raise NhException('No appropriate method to index file.')
98 def _doIndexContent(self, content):
99 raise NhException("Content cannot be handled here.")
101 def _doIndexFile(self, fpath):
102 raise NhException("Content cannot be handled here.")
105 return "<indexer %s.%s>" %(self.__module__, self.__class__.__name__)
107 def mime_match(mime, mdict):
108 if mdict.has_key(mime):
109 return (mime, mdict[mime])
111 mpat = mime.split('/')[0]+'/*'
112 if mdict.has_key(mpat):
113 return (mime, mdict[mpat])
117 class contentIndex(object):
123 def register(self, obj):
125 for mime in obj._getMimeTypes():
126 self.mimes[mime] = obj
129 for ext in obj._getExtensions():
134 _logger.debug('Register content indexer: %r.', obj)
136 raise Exception("Your indexer should at least support a mimetype or extension.")
138 def doIndex(self, content, filename=None, content_type=None, realfname=None, debug=False):
142 if content_type and self.mimes.has_key(content_type):
144 fobj = self.mimes[content_type]
146 bname,ext = os.path.splitext(filename)
147 if self.exts.has_key(ext):
148 fobj = self.exts[ext]
149 mime = fobj._getDefMime(ext)
151 if content_type and not fobj:
152 mime,fobj = mime_match(content_type, self.mimes)
160 bname,ext = os.path.splitext(filename or 'test.tmp')
162 bname, ext = filename, 'tmp'
163 fd, fname = tempfile.mkstemp(suffix=ext)
164 os.write(fd, content)
167 pop = Popen(['file','-b','--mime',fname], shell=False, stdout=PIPE)
168 (result, _) = pop.communicate()
170 mime2 = result.split(';')[0]
171 _logger.debug('File gives us: %s', mime2)
172 # Note that the temporary file still exists now.
173 mime,fobj = mime_match(mime2, self.mimes)
177 _logger.exception('Cannot determine mime type.')
181 res = (mime, fobj.indexContent(content,filename,fname or realfname) )
183 _logger.debug("Have no object, return (%s, None).", mime)
186 _logger.exception("Cannot index file %s (%s).",
187 filename, fname or realfname)
190 # If we created a tmp file, unlink it now
191 if not realfname and fname:
195 _logger.exception("Cannot unlink %s.", fname)
198 cntIndex = contentIndex()
200 # vim:expandtab:smartindent:tabstop=4:softtabstop=4:shiftwidth=4: