1 # -*- coding: utf-8 -*-
2 ##############################################################################
6 # 2005 pyopenoffice.py Martin Simon (http://www.bezirksreiter.de)
7 # 2005 Fabien Pinckaers, TINY SPRL. (http://tiny.be)
9 # This program is free software: you can redistribute it and/or modify
10 # it under the terms of the GNU Affero General Public License as
11 # published by the Free Software Foundation, either version 3 of the
12 # License, or (at your option) any later version.
14 # This program is distributed in the hope that it will be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 # GNU Affero General Public License for more details.
19 # You should have received a copy of the GNU Affero General Public License
20 # along with this program. If not, see <http://www.gnu.org/licenses/>.
22 ##############################################################################
25 OpenERP SXW2RML - The OpenERP's report engine
27 OpenERP SXW2RML is part of the OpenERP Report Project.
28 OpenERP Report is a module that allows you to render high quality PDF document
29 from an OpenOffice template (.sxw) and any relationl database.
38 import xml.dom.minidom
39 from reportlab.lib.units import toLength
44 """General DOM API utilities."""
45 def __init__(self, content_string="", file=""):
46 self.content_string = content_string
47 self.re_digits = re.compile(r"(.*?\d)(pt|cm|mm|inch|in)")
49 def _unitTuple(self, string):
50 """Split values and units to a tuple."""
51 temp = self.re_digits.findall(string)
57 def stringPercentToFloat(self, string):
58 temp = string.replace("""%""","")
59 return float(temp)/100
61 def findChildrenByName(self, parent, name, attr_dict=None):
62 """Helper functions. Does not work recursively.
63 Optional: also test for certain attribute/value pairs."""
67 for c in parent.childNodes:
68 if c.nodeType == c.ELEMENT_NODE and c.nodeName == name:
73 return self._selectForAttributes(nodelist=children,attr_dict=attr_dict)
75 def _selectForAttributes(self, nodelist, attr_dict):
80 for a in attr_dict.keys():
81 if n.getAttribute(a) != attr_dict[a]:
82 # at least one incorrect attribute value?
85 selected_nodes.append(n)
88 def _stringToTuple(self, s):
89 """Helper function."""
91 temp = string.split(s,",")
92 return int(temp[0]),int(temp[1])
96 def _tupleToString(self, t):
98 return self.openOfficeStringUtf8("%s,%s" % (t[0],t[1]))
102 def _lengthToFloat(self, value):
104 if not self.re_digits.search(v):
108 # OO files use "inch" instead of "in" in Reportlab units
113 c = round(toLength(v))
118 def openOfficeStringUtf8(self, string):
119 if type(string) == unicode:
120 return string.encode("utf-8")
121 tempstring = unicode(string,"cp1252").encode("utf-8")
124 class DomApi(DomApiGeneral):
125 """This class provides a DOM-API for XML-Files from an SXW-Archive."""
126 def __init__(self, xml_content, xml_styles):
127 DomApiGeneral.__init__(self)
128 self.content_dom = xml.dom.minidom.parseString(xml_content)
129 self.styles_dom = xml.dom.minidom.parseString(xml_styles)
130 body = self.content_dom.getElementsByTagName("office:body")
131 self.body = body and body[0]
135 self.style_properties_dict = {}
137 # ******** always use the following order:
138 self.buildStyleDict()
139 self.buildStylePropertiesDict()
140 if self.styles_dom.getElementsByTagName("style:page-master").__len__()<>0:
141 self.page_master = self.styles_dom.getElementsByTagName("style:page-master")[0]
142 if self.styles_dom.getElementsByTagName("style:page-layout").__len__()<>0 :
143 self.page_master = self.styles_dom.getElementsByTagName("style:page-layout")[0]
144 self.document = self.content_dom.getElementsByTagName("office:document-content")[0]
146 def buildStylePropertiesDict(self):
147 for s in self.style_dict.keys():
148 self.style_properties_dict[s] = self.getStylePropertiesDict(s)
150 def updateWithPercents(self, dict, updatedict):
151 """Sometimes you find values like "115%" in the style hierarchy."""
153 # no style hierarchies for this style? =>
155 new_updatedict = copy.copy(updatedict)
156 for u in new_updatedict.keys():
158 if new_updatedict[u].find("""%""") != -1 and dict.has_key(u):
159 number = float(self.re_digits.search(dict[u]).group(1))
160 unit = self.re_digits.search(dict[u]).group(2)
161 new_number = self.stringPercentToFloat(new_updatedict[u]) * number
163 new_number = int(new_number)
164 # no floats allowed for "pt"
165 # OOo just takes the int, does not round (try it out!)
166 new_updatedict[u] = "%s%s" % (new_number,unit)
168 dict[u] = new_updatedict[u]
170 dict[u] = new_updatedict[u]
171 dict.update(new_updatedict)
173 def normalizeStyleProperties(self):
174 """Transfer all style:style-properties attributes from the
175 self.style_properties_hierarchical dict to the automatic-styles
176 from content.xml. Use this function to preprocess content.xml for
177 XSLT transformations etc.Do not try to implement this function
178 with XSlT - believe me, it's a terrible task..."""
179 styles_styles = self.styles_dom.getElementsByTagName("style:style")
180 automatic_styles = self.content_dom.getElementsByTagName("office:automatic-styles")[0]
181 for s in styles_styles:
182 automatic_styles.appendChild(s.cloneNode(deep=1))
183 content_styles = self.content_dom.getElementsByTagName("style:style")
184 # these are the content_styles with styles_styles added!!!
185 for s in content_styles:
186 c = self.findChildrenByName(s,"style:properties")
188 # some derived automatic styles do not have "style:properties":
189 temp = self.content_dom.createElement("style:properties")
191 c = self.findChildrenByName(s,"style:properties")
193 dict = self.style_properties_dict[(s.getAttribute("style:name")).encode("utf-8")] or {}
194 for attribute in dict.keys():
195 c.setAttribute(self.openOfficeStringUtf8(attribute),self.openOfficeStringUtf8(dict[attribute]))
197 def transferStylesXml(self):
198 """Transfer certain sub-trees from styles.xml to the normalized content.xml
199 (see above). It is not necessary to do this - for example - with paragraph styles.
200 the "normalized" style properties contain all information needed for
201 further processing."""
202 # TODO: What about table styles etc.?
203 outline_styles = self.styles_dom.getElementsByTagName("text:outline-style")
204 t = self.content_dom.createElement("transferredfromstylesxml")
205 self.document.insertBefore(t,self.body)
206 t_new = self.body.previousSibling
208 page_master = self.page_master
209 t_new.appendChild(page_master.cloneNode(deep=1))
210 t_new.appendChild(outline_styles[0].cloneNode(deep=1))
214 def normalizeLength(self):
215 """Normalize all lengthes to floats (i.e: 1 inch = 72).
216 Always use this after "normalizeContent" and "transferStyles"!"""
217 # TODO: The complex attributes of table cell styles are not transferred yet.
218 #all_styles = self.content_dom.getElementsByTagName("style:properties")
219 #all_styles += self.content_dom.getElementsByTagName("draw:image")
220 all_styles = self.content_dom.getElementsByTagName("*")
222 for x in s._attrs.keys():
223 v = s.getAttribute(x)
224 s.setAttribute(x,"%s" % self._lengthToFloat(v))
225 # convert float to string first!
227 def normalizeTableColumns(self):
228 """Handle this strange table:number-columns-repeated attribute."""
229 columns = self.content_dom.getElementsByTagName("table:table-column")
231 if c.hasAttribute("table:number-columns-repeated"):
232 number = int(c.getAttribute("table:number-columns-repeated"))
233 c.removeAttribute("table:number-columns-repeated")
234 for i in range(number-1):
235 (c.parentNode).insertBefore(c.cloneNode(deep=1),c)
237 def buildStyleDict(self):
238 """Store all style:style-nodes from content.xml and styles.xml in self.style_dict.
239 Caution: in this dict the nodes from two dom apis are merged!"""
240 for st in (self.styles_dom,self.content_dom):
241 for s in st.getElementsByTagName("style:style"):
242 name = s.getAttribute("style:name").encode("utf-8")
243 self.style_dict[name] = s
247 return self.content_dom.toxml(encoding="utf-8")
249 def getStylePropertiesDict(self, style_name):
252 if self.style_dict[style_name].hasAttribute("style:parent-style-name"):
253 parent = self.style_dict[style_name].getAttribute("style:parent-style-name").encode("utf-8")
254 res = self.getStylePropertiesDict(parent)
256 children = self.style_dict[style_name].childNodes
258 if c.nodeType == c.ELEMENT_NODE and c.nodeName.find("properties")>0 :
259 for attr in c._attrs.keys():
260 res[attr] = c.getAttribute(attr).encode("utf-8")
263 class PyOpenOffice(object):
264 """This is the main class which provides all functionality."""
265 def __init__(self, path='.', save_pict=False):
267 self.save_pict = save_pict
270 def oo_read(self, fname):
271 z = zipfile.ZipFile(fname,"r")
272 content = z.read('content.xml')
273 style = z.read('styles.xml')
276 if a[:9]=='Pictures/' and len(a)>10:
277 pic_content = z.read(a)
278 self.images[a[9:]] = pic_content
280 f=open(os.path.join(self.path, os.path.basename(a)),"wb")
286 def oo_replace(self, content):
288 (r"<para[^>]*/>", ""),
289 (r"<para(.*)>(.*?)<text:line-break[^>]*/>", "<para$1>$2</para><para$1>"),
291 for key,val in regex:
292 content = re.sub(key, val, content)
295 def unpackNormalize(self, sourcefile):
296 c,s = self.oo_read(sourcefile)
297 c = self.oo_replace(c)
299 dom.normalizeStyleProperties()
300 dom.transferStylesXml()
301 dom.normalizeLength()
302 dom.normalizeTableColumns()
306 def sxw2rml(sxw_file, xsl, output='.', save_pict=False):
307 from lxml import etree
308 from StringIO import StringIO
310 tool = PyOpenOffice(output, save_pict = save_pict)
311 res = tool.unpackNormalize(sxw_file)
314 styledoc = etree.parse(f)
315 style = etree.XSLT(styledoc)
320 root = etree.XPathEvaluator(result)("/document/stylesheet")
324 images = etree.Element("images")
325 for img in tool.images:
326 node = etree.Element('image', name=img)
327 node.text = base64.encodestring(tool.images[img])
337 if __name__ == "__main__":
339 parser = optparse.OptionParser(
340 version="OpenERP Report v%s" % __version__,
341 usage = 'openerp_sxw2rml.py [options] file.sxw')
342 parser.add_option("-v", "--verbose", default=False, dest="verbose", help="enable basic debugging")
343 parser.add_option("-o", "--output", dest="output", default='.', help="directory of image output")
344 (opt, args) = parser.parse_args()
346 parser.error("Incorrect number of arguments.")
352 xsl_file = 'normalized_oo2rml.xsl'
353 z = zipfile.ZipFile(fname,"r")
354 mimetype = z.read('mimetype')
355 if mimetype.split('/')[-1] == 'vnd.oasis.opendocument.text' :
356 xsl_file = 'normalized_odt2rml.xsl'
357 xsl = file(os.path.join(os.getcwd(), os.path.dirname(sys.argv[0]), xsl_file)).read()
358 result = sxw2rml(f, xsl, output=opt.output, save_pict=False)
361 # vim:expandtab:smartindent:tabstop=4:softtabstop=4:shiftwidth=4: