[REF] removed unecessary code. This should be handled by the record rules/access...
[odoo/odoo.git] / addons / base_report_designer / openerp_sxw2rml / openerp_sxw2rml.py
1 # -*- coding: utf-8 -*-
2 ##############################################################################
3 #
4 # Copyright (c):
5 #
6 #     2005 pyopenoffice.py Martin Simon (http://www.bezirksreiter.de)
7 #     2005 Fabien Pinckaers, TINY SPRL. (http://tiny.be)
8 #
9 #    This program is free software: you can redistribute it and/or modify
10 #    it under the terms of the GNU Affero General Public License as
11 #    published by the Free Software Foundation, either version 3 of the
12 #    License, or (at your option) any later version.
13 #
14 #    This program is distributed in the hope that it will be useful,
15 #    but WITHOUT ANY WARRANTY; without even the implied warranty of
16 #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17 #    GNU Affero General Public License for more details.
18 #
19 #    You should have received a copy of the GNU Affero General Public License
20 #    along with this program.  If not, see <http://www.gnu.org/licenses/>.
21 #
22 ##############################################################################
23 #!/usr/bin/python
24 """
25 OpenERP SXW2RML - The OpenERP's report engine
26
27 OpenERP SXW2RML is part of the OpenERP Report Project.
28 OpenERP Report is a module that allows you to render high quality PDF document
29 from an OpenOffice template (.sxw) and any relationl database.
30 """
31 __version__ = '0.9'
32
33
34 import re
35 import string
36 import os
37 import zipfile
38 import xml.dom.minidom
39 from reportlab.lib.units import toLength
40 import base64
41 import copy
42
43 class DomApiGeneral:
44     """General DOM API utilities."""
45     def __init__(self, content_string="", file=""):
46         self.content_string = content_string
47         self.re_digits = re.compile(r"(.*?\d)(pt|cm|mm|inch|in)")
48
49     def _unitTuple(self, string):
50         """Split values and units to a tuple."""
51         temp = self.re_digits.findall(string)
52         if not temp:
53             return (string,"")
54         else:
55             return (temp[0])
56
57     def stringPercentToFloat(self, string):
58         temp = string.replace("""%""","")
59         return float(temp)/100
60
61     def findChildrenByName(self, parent, name, attr_dict=None):
62         """Helper functions. Does not work recursively.
63         Optional: also test for certain attribute/value pairs."""
64         if attr_dict is None:
65             attr_dict = {}
66         children = []
67         for c in parent.childNodes:
68             if c.nodeType == c.ELEMENT_NODE and c.nodeName == name:
69                 children.append(c)
70         if attr_dict == {}:
71             return children
72         else:
73             return self._selectForAttributes(nodelist=children,attr_dict=attr_dict)
74
75     def _selectForAttributes(self, nodelist, attr_dict):
76         "Helper function."""
77         selected_nodes = []
78         for n in nodelist:
79             check = 1
80             for a in attr_dict.keys():
81                 if n.getAttribute(a) != attr_dict[a]:
82                     # at least one incorrect attribute value?
83                     check = 0
84             if check:
85                 selected_nodes.append(n)
86         return selected_nodes
87
88     def _stringToTuple(self, s):
89         """Helper function."""
90         try:
91             temp = string.split(s,",")
92             return int(temp[0]),int(temp[1])
93         except:
94             return None
95
96     def _tupleToString(self, t):
97         try:
98             return self.openOfficeStringUtf8("%s,%s" % (t[0],t[1]))
99         except:
100             return None
101
102     def _lengthToFloat(self, value):
103         v = value
104         if not self.re_digits.search(v):
105             return v
106         try:
107             if v[-4:] == "inch":
108                 # OO files use "inch" instead of "in" in Reportlab units
109                 v = v[:-2]
110         except:
111             pass
112         try:
113             c = round(toLength(v))
114             return c
115         except:
116             return v
117
118     def openOfficeStringUtf8(self, string):
119         if type(string) == unicode:
120             return string.encode("utf-8")
121         tempstring = unicode(string,"cp1252").encode("utf-8")
122         return tempstring
123
124 class DomApi(DomApiGeneral):
125     """This class provides a DOM-API for XML-Files from an SXW-Archive."""
126     def __init__(self, xml_content, xml_styles):
127         DomApiGeneral.__init__(self)
128         self.content_dom = xml.dom.minidom.parseString(xml_content)
129         self.styles_dom = xml.dom.minidom.parseString(xml_styles)
130         body = self.content_dom.getElementsByTagName("office:body")
131         self.body = body and body[0]
132
133         # TODO:
134         self.style_dict = {}
135         self.style_properties_dict = {}
136
137         # ******** always use the following order:
138         self.buildStyleDict()
139         self.buildStylePropertiesDict()
140         if self.styles_dom.getElementsByTagName("style:page-master").__len__()<>0:
141             self.page_master = self.styles_dom.getElementsByTagName("style:page-master")[0]
142         if  self.styles_dom.getElementsByTagName("style:page-layout").__len__()<>0 :
143                         self.page_master = self.styles_dom.getElementsByTagName("style:page-layout")[0]
144         self.document = self.content_dom.getElementsByTagName("office:document-content")[0]
145
146     def buildStylePropertiesDict(self):
147         for s in self.style_dict.keys():
148             self.style_properties_dict[s] = self.getStylePropertiesDict(s)
149
150     def updateWithPercents(self, dict, updatedict):
151         """Sometimes you find values like "115%" in the style hierarchy."""
152         if not updatedict:
153             # no style hierarchies for this style? =>
154             return
155         new_updatedict = copy.copy(updatedict)
156         for u in new_updatedict.keys():
157             try:
158                 if new_updatedict[u].find("""%""") != -1 and dict.has_key(u):
159                     number = float(self.re_digits.search(dict[u]).group(1))
160                     unit = self.re_digits.search(dict[u]).group(2)
161                     new_number = self.stringPercentToFloat(new_updatedict[u]) * number
162                     if unit == "pt":
163                         new_number = int(new_number)
164                         # no floats allowed for "pt"
165                         # OOo just takes the int, does not round (try it out!)
166                     new_updatedict[u] = "%s%s" % (new_number,unit)
167                 else:
168                     dict[u] = new_updatedict[u]
169             except:
170                 dict[u] = new_updatedict[u]
171         dict.update(new_updatedict)
172
173     def normalizeStyleProperties(self):
174         """Transfer all style:style-properties attributes from the
175         self.style_properties_hierarchical dict to the automatic-styles
176         from content.xml. Use this function to preprocess content.xml for
177         XSLT transformations etc.Do not try to implement this function
178         with XSlT - believe me, it's a terrible task..."""
179         styles_styles = self.styles_dom.getElementsByTagName("style:style")
180         automatic_styles = self.content_dom.getElementsByTagName("office:automatic-styles")[0]
181         for s in styles_styles:
182             automatic_styles.appendChild(s.cloneNode(deep=1))
183         content_styles = self.content_dom.getElementsByTagName("style:style")
184         # these are the content_styles with styles_styles added!!!
185         for s in content_styles:
186             c = self.findChildrenByName(s,"style:properties")
187             if c == []:
188                 # some derived automatic styles do not have "style:properties":
189                 temp = self.content_dom.createElement("style:properties")
190                 s.appendChild(temp)
191                 c = self.findChildrenByName(s,"style:properties")
192             c = c[0]
193             dict = self.style_properties_dict[(s.getAttribute("style:name")).encode("utf-8")] or {}
194             for attribute in dict.keys():
195                 c.setAttribute(self.openOfficeStringUtf8(attribute),self.openOfficeStringUtf8(dict[attribute]))
196
197     def transferStylesXml(self):
198         """Transfer certain sub-trees from styles.xml to the normalized content.xml
199         (see above). It is not necessary to do this - for example - with paragraph styles.
200         the "normalized" style properties contain all information needed for
201         further processing."""
202         # TODO: What about table styles etc.?
203         outline_styles = self.styles_dom.getElementsByTagName("text:outline-style")
204         t = self.content_dom.createElement("transferredfromstylesxml")
205         self.document.insertBefore(t,self.body)
206         t_new = self.body.previousSibling
207         try:
208             page_master = self.page_master
209             t_new.appendChild(page_master.cloneNode(deep=1))
210             t_new.appendChild(outline_styles[0].cloneNode(deep=1))
211         except:
212             pass
213
214     def normalizeLength(self):
215         """Normalize all lengthes to floats (i.e: 1 inch = 72).
216         Always use this after "normalizeContent" and "transferStyles"!"""
217         # TODO: The complex attributes of table cell styles are not transferred yet.
218         #all_styles = self.content_dom.getElementsByTagName("style:properties")
219         #all_styles += self.content_dom.getElementsByTagName("draw:image")
220         all_styles = self.content_dom.getElementsByTagName("*")
221         for s in all_styles:
222             for x in s._attrs.keys():
223                 v = s.getAttribute(x)
224                 s.setAttribute(x,"%s" % self._lengthToFloat(v))
225                 # convert float to string first!
226
227     def normalizeTableColumns(self):
228         """Handle this strange table:number-columns-repeated attribute."""
229         columns = self.content_dom.getElementsByTagName("table:table-column")
230         for c in columns:
231             if c.hasAttribute("table:number-columns-repeated"):
232                 number = int(c.getAttribute("table:number-columns-repeated"))
233                 c.removeAttribute("table:number-columns-repeated")
234                 for i in range(number-1):
235                     (c.parentNode).insertBefore(c.cloneNode(deep=1),c)
236
237     def buildStyleDict(self):
238         """Store all style:style-nodes from content.xml and styles.xml in self.style_dict.
239         Caution: in this dict the nodes from two dom apis are merged!"""
240         for st in (self.styles_dom,self.content_dom):
241             for s in st.getElementsByTagName("style:style"):
242                 name = s.getAttribute("style:name").encode("utf-8")
243                 self.style_dict[name] = s
244         return True
245
246     def toxml(self):
247         return self.content_dom.toxml(encoding="utf-8")
248
249     def getStylePropertiesDict(self, style_name):
250         res = {}
251
252         if self.style_dict[style_name].hasAttribute("style:parent-style-name"):
253             parent = self.style_dict[style_name].getAttribute("style:parent-style-name").encode("utf-8")
254             res = self.getStylePropertiesDict(parent)
255
256         children = self.style_dict[style_name].childNodes
257         for c in children:
258             if c.nodeType == c.ELEMENT_NODE and c.nodeName.find("properties")>0 :
259                 for attr in c._attrs.keys():
260                     res[attr] = c.getAttribute(attr).encode("utf-8")
261         return res
262
263 class PyOpenOffice(object):
264     """This is the main class which provides all functionality."""
265     def __init__(self, path='.', save_pict=False):
266         self.path = path
267         self.save_pict = save_pict
268         self.images = {}
269
270     def oo_read(self, fname):
271         z = zipfile.ZipFile(fname,"r")
272         content = z.read('content.xml')
273         style = z.read('styles.xml')
274         all = z.namelist()
275         for a in all:
276             if a[:9]=='Pictures/' and len(a)>10:
277                 pic_content = z.read(a)
278                 self.images[a[9:]] = pic_content
279                 if self.save_pict:
280                     f=open(os.path.join(self.path, os.path.basename(a)),"wb")
281                     f.write(pic_content)
282                     f.close()
283         z.close()
284         return content,style
285
286     def oo_replace(self, content):
287         regex = [
288             (r"<para[^>]*/>", ""),
289             (r"<para(.*)>(.*?)<text:line-break[^>]*/>", "<para$1>$2</para><para$1>"),
290         ]
291         for key,val in regex:
292             content = re.sub(key, val, content)
293         return content
294
295     def unpackNormalize(self, sourcefile):
296         c,s = self.oo_read(sourcefile)
297         c = self.oo_replace(c)
298         dom = DomApi(c,s)
299         dom.normalizeStyleProperties()
300         dom.transferStylesXml()
301         dom.normalizeLength()
302         dom.normalizeTableColumns()
303         new_c = dom.toxml()
304         return new_c
305
306 def sxw2rml(sxw_file, xsl, output='.', save_pict=False):
307     from lxml import etree
308     from StringIO import StringIO
309
310     tool = PyOpenOffice(output, save_pict = save_pict)
311     res = tool.unpackNormalize(sxw_file)
312
313     f = StringIO(xsl)
314     styledoc = etree.parse(f)
315     style = etree.XSLT(styledoc)
316
317     f = StringIO(res)
318     doc = etree.parse(f)
319     result = style(doc)
320     root = etree.XPathEvaluator(result)("/document/stylesheet")
321
322     if root:
323         root=root[0]
324         images = etree.Element("images")
325         for img in tool.images:
326             node = etree.Element('image', name=img)
327             node.text = base64.encodestring(tool.images[img])
328             images.append(node)
329         root.append(images)
330
331     try:
332         xml = str(result)
333         return xml
334     except:
335         return result
336
337 if __name__ == "__main__":
338     import optparse
339     parser = optparse.OptionParser(
340         version="OpenERP Report v%s" % __version__,
341         usage = 'openerp_sxw2rml.py [options] file.sxw')
342     parser.add_option("-v", "--verbose", default=False, dest="verbose", help="enable basic debugging")
343     parser.add_option("-o", "--output", dest="output", default='.', help="directory of image output")
344     (opt, args) = parser.parse_args()
345     if len(args) != 1:
346         parser.error("Incorrect number of arguments.")
347
348     import sys
349
350     fname = sys.argv[1]
351     f = fname
352     xsl_file = 'normalized_oo2rml.xsl'
353     z = zipfile.ZipFile(fname,"r")
354     mimetype = z.read('mimetype')
355     if mimetype.split('/')[-1] == 'vnd.oasis.opendocument.text' :
356                 xsl_file = 'normalized_odt2rml.xsl'
357     xsl = file(os.path.join(os.getcwd(), os.path.dirname(sys.argv[0]), xsl_file)).read()
358     result = sxw2rml(f, xsl, output=opt.output, save_pict=False)
359
360     print result
361 # vim:expandtab:smartindent:tabstop=4:softtabstop=4:shiftwidth=4:
362