[IMP]document : get improvement of document from xrg-addons branch who made nice...
[odoo/odoo.git] / addons / document / content_index.py
1 # -*- coding: utf-8 -*-
2 ##############################################################################
3 #    
4 #    OpenERP, Open Source Management Solution
5 #    Copyright (C) 2004-2009 Tiny SPRL (<http://tiny.be>).
6 #
7 #    This program is free software: you can redistribute it and/or modify
8 #    it under the terms of the GNU Affero General Public License as
9 #    published by the Free Software Foundation, either version 3 of the
10 #    License, or (at your option) any later version.
11 #
12 #    This program is distributed in the hope that it will be useful,
13 #    but WITHOUT ANY WARRANTY; without even the implied warranty of
14 #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 #    GNU Affero General Public License for more details.
16 #
17 #    You should have received a copy of the GNU Affero General Public License
18 #    along with this program.  If not, see <http://www.gnu.org/licenses/>.     
19 #
20 ##############################################################################
21 import os
22 import tempfile
23
24 # A quick hack: if netsvc is not there, emulate it. Thus, work offline, too
25 try:
26         import netsvc
27         def log(lvl,msg):
28                 netsvc.Logger().notifyChannel("index",lvl,msg)
29 except:
30         class netsvc:
31                 LOG_NOTSET = 'notset'
32                 LOG_DEBUG_RPC = 'debug_rpc'
33                 LOG_DEBUG = 'debug'
34                 LOG_DEBUG2 = 'debug2'
35                 LOG_INFO = 'info'
36                 LOG_WARNING = 'warn'
37                 LOG_ERROR = 'error'
38                 LOG_CRITICAL = 'critical'
39         
40         def log(lvl,msg):
41                 print msg
42
43
44 class NhException(Exception):
45         pass
46
47 from subprocess import Popen, PIPE
48
49 class indexer():
50         """ An indexer knows how to parse the content of some file.
51         
52             Typically, one indexer should be instantiated per file
53             type.
54             Override this class to add more functionality. Note that
55             you should only override the Content or the File methods
56             that give an optimal result. """
57             
58         def _getMimeTypes(self):
59             """ Return supported mimetypes """
60             return []
61         
62         def _getExtensions(self):
63             return []
64         
65         def _getDefMime(self,ext):
66                 """ Return a mimetype for this document type, ideally the
67                     closest to the extension ext. """
68                 mts = self._getMimeTypes();
69                 if len (mts):
70                         return mts[0]
71                 return None
72
73         def indexContent(self,content,filename=None, realfile = None):
74                 """ Use either content or the real file, to index.
75                     Some parsers will work better with the actual
76                     content, others parse a file easier. Try the
77                     optimal.
78                 """
79                 res = ''
80                 try:
81                         if content != None:
82                                 return self._doIndexContent(content)
83                 except NhException:
84                         pass
85                 
86                 if realfile != None:
87                         try:
88                                 return self._doIndexFile(realfile)
89                         except NhException:
90                                 pass
91                         
92                         fp = open(realfile,'rb')
93                         content2 = fp.read()
94                         fp.close()
95                         
96                         # The not-handled exception may be raised here
97                         return self._doIndexContent(content2)
98                         
99                         
100                 # last try, with a tmp file
101                 if content:
102                         try:
103                                 fname,ext = filename and os.path.splitext(filename) or ('','')
104                                 fd, rfname = tempfile.mkstemp(suffix=ext)
105                                 os.write(fd, content)
106                                 os.close(fd)
107                                 res = self._doIndexFile(rfname)
108                                 os.unlink(rfname)
109                                 return res
110                         except NhException:
111                                 pass
112
113                 raise NhException('No appropriate method to index file')
114         
115         def _doIndexContent(self,content):
116                 raise NhException("Content not handled here")
117
118         def _doIndexFile(self,fpath):
119                 raise NhException("Content not handled here")
120                 
121                 
122
123 def mime_match(mime, mdict):
124         if mdict.has_key(mime):
125                 return (mime, mdict[mime])
126         if '/' in mime:
127                 mpat = mime.split('/')[0]+'/*'
128                 if mdict.has_key(mpat):
129                         return (mime, mdict[mpat])
130         
131         return (None, None)
132
133 class contentIndex() :
134         def __init__(self):
135                 self.mimes = {}
136                 self.exts = {}
137         
138         def register(self, obj):
139                 f = False
140                 for mime in obj._getMimeTypes():
141                         self.mimes[mime] = obj
142                         f = True
143                         
144                 for ext in obj._getExtensions():
145                         self.exts[ext] = obj
146                         f = True
147                         
148                 if f:
149                         log(netsvc.LOG_DEBUG, "Register content indexer: %r" % obj)
150                 if not f:
151                         raise Exception("Your indexer should at least suport a mimetype or extension")
152         
153         def doIndex(self,content, filename=None, content_type=None, realfname = None, debug=False):
154                 fobj = None
155                 fname = None
156                 mime = None
157                 if content_type and self.mimes.has_key(content_type):
158                         mime = content_type
159                         fobj = self.mimes[content_type]
160                 elif filename:
161                         bname,ext = os.path.splitext(filename)
162                         if self.exts.has_key(ext):
163                                 fobj = self.exts[ext]
164                                 mime = fobj._getDefMime(ext)
165                 
166                 if content_type and not fobj:
167                         mime,fobj = mime_match(content_type, self.mimes)
168                 
169                 if not fobj:
170                     try:
171                         if realfname :
172                                 fname = realfname
173                         else:
174                                 bname,ext = os.path.splitext(filename)
175                                 fd, fname = tempfile.mkstemp(suffix=ext)
176                                 os.write(fd, content)
177                                 os.close(fd)
178                         
179                         fp = Popen(['file','-b','--mime-type',fname], shell=False, stdout=PIPE).stdout
180                         result = fp.read()
181                         fp.close()
182                         mime2 = result.strip()
183                         log(netsvc.LOG_DEBUG,"File gave us: %s" % mime2)
184                         # Note that the temporary file still exists now.
185                         mime,fobj = mime_match(mime2, self.mimes)
186                         if not mime:
187                                 mime = mime2
188                     except Exception, e:
189                         log(netsvc.LOG_WARNING,"Cannot determine mime type: %s" % str(e))
190                 
191                 try:
192                         if fobj:
193                                 res = (mime, fobj.indexContent(content,filename,fname or realfname) )
194                         else:
195                                 log(netsvc.LOG_DEBUG,"Have no object, return (%s, None)" % mime)
196                                 res = (mime, None )
197                 except Exception, e:
198                         log(netsvc.LOG_WARNING,"Could not index file, %s" % e)
199                         res = None
200                 
201                 # If we created a tmp file, unlink it now
202                 if not realfname and fname:
203                         try:
204                                 os.unlink(fname)
205                         except Exception, e:
206                                 log(netsvc.LOG_WARNING,"Could not unlink %s, %s" %(fname, e))
207                 
208                 return res
209
210 cntIndex = contentIndex()
211
212 # vim:expandtab:smartindent:tabstop=4:softtabstop=4:shiftwidth=4: