bin/report/preprocess.py

   1
   2 from lxml import etree
   3 import re
   4 rml_parents = ['tr','story','section']
   5 html_parents = ['tr','body','div']
   6 sxw_parents = ['{http://openoffice.org/2000/table}table-row','{http://openoffice.org/2000/office}body','{http://openoffice.org/2000/text}section']
   7
   8 class report(object):
   9     def preprocess_rml(self, root_node,type='pdf'):
  10         _regex1 = re.compile("\[\[(.*?)(repeatIn\(.*?\s*,\s*[\'\"].*?[\'\"]\s*(?:,\s*(.*?)\s*)?\s*\))(.*?)\]\]")
  11         _regex11= re.compile("\[\[(.*?)(repeatIn\(.*?\s*\(.*?\s*[\'\"].*?[\'\"]\s*\),[\'\"].*?[\'\"](?:,\s*(.*?)\s*)?\s*\))(.*?)\]\]")
  12         _regex2 = re.compile("\[\[(.*?)(removeParentNode\(\s*(?:['\"](.*?)['\"])\s*\))(.*?)\]\]")
  13         _regex3 = re.compile("\[\[\s*(.*?setTag\(\s*['\"](.*?)['\"]\s*,\s*['\"].*?['\"]\s*(?:,.*?)?\).*?)\s*\]\]")
  14         for node in root_node:
  15             if node.tag == etree.Comment:
  16                 continue
  17             if node.text:
  18                 def _sub3(txt):
  19                     n = node
  20                     while n.tag != txt.group(2):
  21                         n = n.getparent()
  22                     n.set('rml_tag', txt.group(1))
  23                     return "[[ '' ]]"
  24                 def _sub2(txt):
  25                     if txt.group(3):
  26                         n = node
  27                         try:
  28                             while n.tag != txt.group(3):
  29                                 n = n.getparent()
  30                         except:
  31                             n = node
  32                     else:
  33                         n = node.getparent()
  34                     n.set('rml_except', txt.group(0)[2:-2])
  35                     return txt.group(0)
  36                 def _sub1(txt):
  37                     if len(txt.group(4)) > 1:
  38                         return " "
  39                     match = rml_parents
  40                     if type in ['odt','sxw']:
  41                         match = sxw_parents
  42                     if type =='html2html':
  43                         match = html_parents
  44                     if txt.group(3):
  45                         match = [txt.group(3)]
  46                     n = node
  47                     while n.tag not in match:
  48                         n = n.getparent()
  49                     n.set('rml_loop', txt.group(2))
  50                     return '[['+txt.group(1)+"''"+txt.group(4)+']]'
  51                 t = _regex1.sub(_sub1, node.text)
  52                 if t == " ":
  53                     t = _regex11.sub(_sub1, node.text)
  54                 t = _regex3.sub(_sub3, t)
  55                 node.text = _regex2.sub(_sub2, t)
  56             self.preprocess_rml(node,type)
  57         return root_node
  58
  59 if __name__=='__main__':
  60     node = etree.XML('''<story>
  61     <para>This is a test[[ setTag('para','xpre') ]]</para>
  62     <blockTable>
  63     <tr>
  64         <td><para>Row 1 [[ setTag('tr','tr',{'style':'TrLevel'+str(a['level']), 'paraStyle':('Level'+str(a['level']))}) ]] </para></td>
  65         <td>Row 2 [[ True and removeParentNode('td') ]] </td>
  66     </tr><tr>
  67         <td>Row 1 [[repeatIn(o.order_line,'o')]] </td>
  68         <td>Row 2</td>
  69     </tr>
  70     </blockTable>
  71     <p>This isa test</p>
  72 </story>''')
  73     a = report()
  74     result = a.preprocess_rml(node)
  75     print etree.tostring(result)
  76