4 rml_parents = ['tr','story','section']
5 html_parents = ['tr','body','div']
6 sxw_parents = ['{http://openoffice.org/2000/table}table-row','{http://openoffice.org/2000/office}body','{http://openoffice.org/2000/text}section']
9 def preprocess_rml(self, root_node,ntype='pdf'):
10 _regex1 = re.compile("\[\[(.*?)(repeatIn\(.*?\s*,\s*[\'\"].*?[\'\"]\s*(?:,\s*(.*?)\s*)?\s*\))(.*?)\]\]")
11 _regex11= re.compile("\[\[(.*?)(repeatIn\(.*?\s*\(.*?\s*[\'\"].*?[\'\"]\s*\),[\'\"].*?[\'\"](?:,\s*(.*?)\s*)?\s*\))(.*?)\]\]")
12 _regex2 = re.compile("\[\[(.*?)(removeParentNode\(\s*(?:['\"](.*?)['\"])\s*\))(.*?)\]\]")
13 _regex3 = re.compile("\[\[\s*(.*?setTag\(\s*['\"](.*?)['\"]\s*,\s*['\"].*?['\"]\s*(?:,.*?)?\).*?)\s*\]\]")
14 for node in root_node:
18 while n.tag != txt.group(2):
20 n.set('rml_tag', txt.group(1))
26 while n.tag != txt.group(3):
32 n.set('rml_except', txt.group(0)[2:-2])
35 if len(txt.group(4)) > 1:
38 if ntype in ['odt','sxw']:
40 if ntype =='html2html':
43 match = [txt.group(3)]
45 while n.tag not in match:
47 n.set('rml_loop', txt.group(2))
48 return '[['+txt.group(1)+"''"+txt.group(4)+']]'
49 t = _regex1.sub(_sub1, node.text)
51 t = _regex11.sub(_sub1, node.text)
52 t = _regex3.sub(_sub3, t)
53 node.text = _regex2.sub(_sub2, t)
54 self.preprocess_rml(node,ntype)
57 if __name__=='__main__':
58 node = etree.XML('''<story>
59 <para>This is a test[[ setTag('para','xpre') ]]</para>
62 <td><para>Row 1 [[ setTag('tr','tr',{'style':'TrLevel'+str(a['level']), 'paraStyle':('Level'+str(a['level']))}) ]] </para></td>
63 <td>Row 2 [[ True and removeParentNode('td') ]] </td>
65 <td>Row 1 [[repeatIn(o.order_line,'o')]] </td>
72 result = a.preprocess_rml(node)
73 print etree.tostring(result)