bin/report/preprocess.py

   1
   2 from lxml import etree
   3 import re
   4 rml_parents = ['tr','story','section']
   5 html_parents = ['tr','body','div']
   6 sxw_parents = ['{http://openoffice.org/2000/table}table-row','{http://openoffice.org/2000/office}body','{http://openoffice.org/2000/text}section']
   7
   8 class report(object):
   9     def preprocess_rml(self, root_node,ntype='pdf'):
  10         _regex1 = re.compile("\[\[(.*?)(repeatIn\(.*?\s*,\s*[\'\"].*?[\'\"]\s*(?:,\s*(.*?)\s*)?\s*\))(.*?)\]\]")
  11         _regex11= re.compile("\[\[(.*?)(repeatIn\(.*?\s*\(.*?\s*[\'\"].*?[\'\"]\s*\),[\'\"].*?[\'\"](?:,\s*(.*?)\s*)?\s*\))(.*?)\]\]")
  12         _regex2 = re.compile("\[\[(.*?)(removeParentNode\(\s*(?:['\"](.*?)['\"])\s*\))(.*?)\]\]")
  13         _regex3 = re.compile("\[\[\s*(.*?setTag\(\s*['\"](.*?)['\"]\s*,\s*['\"].*?['\"]\s*(?:,.*?)?\).*?)\s*\]\]")
  14         for node in root_node:
  15             if node.text:
  16                 def _sub3(txt):
  17                     n = node
  18                     while n.tag != txt.group(2):
  19                         n = n.getparent()
  20                     n.set('rml_tag', txt.group(1))
  21                     return "[[ '' ]]"
  22                 def _sub2(txt):
  23                     if txt.group(3):
  24                         n = node
  25                         try:
  26                             while n.tag != txt.group(3):
  27                                 n = n.getparent()
  28                         except:
  29                             n = node
  30                     else:
  31                         n = node.getparent()
  32                     n.set('rml_except', txt.group(0)[2:-2])
  33                     return txt.group(0)
  34                 def _sub1(txt):
  35                     if len(txt.group(4)) > 1:
  36                         return " "
  37                     match = rml_parents
  38                     if ntype in ['odt','sxw']:
  39                         match = sxw_parents
  40                     if ntype =='html2html':
  41                         match = html_parents
  42                     if txt.group(3):
  43                         match = [txt.group(3)]
  44                     n = node
  45                     while n.tag not in match:
  46                         n = n.getparent()
  47                     n.set('rml_loop', txt.group(2))
  48                     return '[['+txt.group(1)+"''"+txt.group(4)+']]'
  49                 t = _regex1.sub(_sub1, node.text)
  50                 if t == " ":
  51                     t = _regex11.sub(_sub1, node.text)
  52                 t = _regex3.sub(_sub3, t)
  53                 node.text = _regex2.sub(_sub2, t)
  54             self.preprocess_rml(node,ntype)
  55         return root_node
  56
  57 if __name__=='__main__':
  58     node = etree.XML('''<story>
  59     <para>This is a test[[ setTag('para','xpre') ]]</para>
  60     <blockTable>
  61     <tr>
  62         <td><para>Row 1 [[ setTag('tr','tr',{'style':'TrLevel'+str(a['level']), 'paraStyle':('Level'+str(a['level']))}) ]] </para></td>
  63         <td>Row 2 [[ True and removeParentNode('td') ]] </td>
  64     </tr><tr>
  65         <td>Row 1 [[repeatIn(o.order_line,'o')]] </td>
  66         <td>Row 2</td>
  67     </tr>
  68     </blockTable>
  69     <p>This isa test</p>
  70 </story>''')
  71     a = report()
  72     result = a.preprocess_rml(node)
  73     print etree.tostring(result)
  74