1 # -*- coding: utf-8 -*-
2 ##############################################################################
4 # OpenERP, Open Source Management Solution
5 # Copyright (C) 2004-2010 Tiny SPRL (<http://tiny.be>).
7 # This program is free software: you can redistribute it and/or modify
8 # it under the terms of the GNU Affero General Public License as
9 # published by the Free Software Foundation, either version 3 of the
10 # License, or (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU Affero General Public License for more details.
17 # You should have received a copy of the GNU Affero General Public License
18 # along with this program. If not, see <http://www.gnu.org/licenses/>.
20 ##############################################################################
25 from content_index import indexer, cntIndex
26 from subprocess import Popen, PIPE
30 return s.decode('utf-8')
33 return s.decode('latin')
36 return s.encode('ascii')
40 class TxtIndex(indexer):
41 def _getMimeTypes(self):
42 return ['text/plain','text/html','text/diff','text/xml', 'text/*',
45 def _getExtensions(self):
46 return ['.txt', '.py']
48 def _doIndexContent(self,content):
51 cntIndex.register(TxtIndex())
53 class DocIndex(indexer):
54 def _getMimeTypes(self):
55 return [ 'application/ms-word']
57 def _getExtensions(self):
60 def _doIndexFile(self,fname):
61 fp = Popen(['antiword',fname], shell=False, stdout=PIPE).stdout
62 return _to_unicode( fp.read())
64 cntIndex.register(DocIndex())
66 class PdfIndex(indexer):
67 def _getMimeTypes(self):
68 return [ 'application/pdf']
70 def _getExtensions(self):
73 def _doIndexFile(self,fname):
74 fp = Popen(['pdftotext', '-enc', 'UTF-8', '-nopgbrk', fname, '-'], shell=False, stdout=PIPE).stdout
75 return _to_unicode( fp.read())
77 cntIndex.register(PdfIndex())
79 class ImageNoIndex(indexer):
80 def _getMimeTypes(self):
83 def _getExtensions(self):
84 #better return no extension, and let 'file' do its magic
86 #return ['.png','.jpg','.gif','.jpeg','.bmp','.tiff']
88 def _doIndexContent(self,content):
92 cntIndex.register(ImageNoIndex())
95 #def _getDefMime(self,ext):
97 #def content_index(content, filename=None, content_type=None):
98 #fname,ext = os.path.splitext(filename)
100 #elif ext in ('.xls','.ods','.odt','.odp'):
101 #s = StringIO.StringIO(content)
102 #o = odt2txt.OpenDocumentTextFile(s)
103 #result = _to_unicode(o.toString())
105 #elif ext in ('.txt','.py','.patch','.html','.csv','.xml'):