[IMP] convert qweb to use lxml instead of minidom
authorXavier Morel <xmo@openerp.com>
Wed, 2 Jul 2014 14:01:36 +0000 (16:01 +0200)
committerXavier Morel <xmo@openerp.com>
Wed, 2 Jul 2014 14:01:36 +0000 (16:01 +0200)
should be >20x faster at parsing document strings and gain close to 40s on crawling

addons/website/models/ir_qweb.py
addons/website/tests/test_converter.py
openerp/addons/base/ir/ir_qweb.py
openerp/addons/base/tests/test_qweb.py
openerp/addons/test_converter/tests/test_html.py

index cb2e8f1..132ee14 100644 (file)
@@ -48,17 +48,17 @@ class QWeb(orm.AbstractModel):
     def add_template(self, qcontext, name, node):
         # preprocessing for multilang static urls
         if request.website:
-            for tag, attr in self.URL_ATTRS.items():
-                for e in node.getElementsByTagName(tag):
-                    url = e.getAttribute(attr)
+            for tag, attr in self.URL_ATTRS.iteritems():
+                for e in node.iterdescendants(tag=tag):
+                    url = e.get(attr)
                     if url:
-                        e.setAttribute(attr, qcontext.get('url_for')(url))
+                        e.set(attr, qcontext.get('url_for')(url))
         super(QWeb, self).add_template(qcontext, name, node)
 
     def render_att_att(self, element, attribute_name, attribute_value, qwebcontext):
         att, val = super(QWeb, self).render_att_att(element, attribute_name, attribute_value, qwebcontext)
 
-        if request.website and att == self.URL_ATTRS.get(element.nodeName) and isinstance(val, basestring):
+        if request.website and att == self.URL_ATTRS.get(element.tag) and isinstance(val, basestring):
             val = qwebcontext.get('url_for')(val)
         return att, val
 
@@ -78,7 +78,7 @@ class Field(orm.AbstractModel):
         attrs = [('data-oe-translate', 1 if column.translate else 0)]
 
         placeholder = options.get('placeholder') \
-                   or source_element.getAttribute('placeholder') \
+                   or source_element.get('placeholder') \
                    or getattr(column, 'placeholder', None)
         if placeholder:
             attrs.append(('placeholder', placeholder))
@@ -275,7 +275,7 @@ class Image(orm.AbstractModel):
 
     def to_html(self, cr, uid, field_name, record, options,
                 source_element, t_att, g_att, qweb_context, context=None):
-        assert source_element.nodeName != 'img',\
+        assert source_element.tag != 'img',\
             "Oddly enough, the root tag of an image field can not be img. " \
             "That is because the image goes into the tag, or it gets the " \
             "hose again."
index bcc6ef1..a1177a3 100644 (file)
@@ -1,9 +1,8 @@
 # -*- coding: utf-8 -*-
 import textwrap
 import unittest2
-from xml.dom.minidom import getDOMImplementation
 
-from lxml import html
+from lxml import etree, html
 from lxml.builder import E
 
 from openerp.tests import common
@@ -11,9 +10,6 @@ from openerp.addons.base.ir import ir_qweb
 from openerp.addons.website.models.ir_qweb import html_to_text
 from openerp.addons.website.models.website import slugify, unslug
 
-impl = getDOMImplementation()
-document = impl.createDocument(None, None, None)
-
 class TestUnslug(unittest2.TestCase):
     def test_unslug(self):
         tests = {
@@ -150,9 +146,9 @@ class TestConvertBack(common.TransactionCase):
             })
         [record] = Model.browse(self.cr, self.uid, [id])
 
-        e = document.createElement('span')
+        e = etree.Element('span')
         field_value = 'record.%s' % field
-        e.setAttribute('t-field', field_value)
+        e.set('t-field', field_value)
 
         rendered = self.registry('website.qweb').render_tag_field(
             e, {'field': field_value}, '', ir_qweb.QWebContext(self.cr, self.uid, {
@@ -231,9 +227,9 @@ class TestConvertBack(common.TransactionCase):
         id = Model.create(self.cr, self.uid, {field: sub_id})
         [record] = Model.browse(self.cr, self.uid, [id])
 
-        e = document.createElement('span')
+        e = etree.Element('span')
         field_value = 'record.%s' % field
-        e.setAttribute('t-field', field_value)
+        e.set('t-field', field_value)
 
         rendered = self.registry('website.qweb').render_tag_field(
             e, {'field': field_value}, '', ir_qweb.QWebContext(self.cr, self.uid, {
index 279a8eb..5379742 100644 (file)
@@ -4,6 +4,7 @@ import cStringIO
 import datetime
 import hashlib
 import json
+import itertools
 import logging
 import math
 import os
@@ -11,15 +12,13 @@ import re
 import sys
 import textwrap
 import uuid
-import xml  # FIXME use lxml and etree
-import itertools
-import lxml.html
-import werkzeug
 from subprocess import Popen, PIPE
 from urlparse import urlparse
 
 import babel
 import babel.dates
+import werkzeug
+from lxml import etree, html
 from PIL import Image
 
 import openerp.http
@@ -116,7 +115,6 @@ class QWeb(orm.AbstractModel):
 
     _name = 'ir.qweb'
 
-    node = xml.dom.Node
     _void_elements = frozenset([
         'area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'keygen',
         'link', 'menuitem', 'meta', 'param', 'source', 'track', 'wbr'])
@@ -164,18 +162,17 @@ class QWeb(orm.AbstractModel):
         if hasattr(document, 'documentElement'):
             dom = document
         elif document.startswith("<?xml"):
-            dom = xml.dom.minidom.parseString(document)
+            dom = etree.fromstring(document)
         else:
-            dom = xml.dom.minidom.parse(document)
+            dom = etree.parse(document)
 
-        for node in dom.documentElement.childNodes:
-            if node.nodeType == self.node.ELEMENT_NODE:
-                if node.getAttribute('t-name'):
-                    name = str(node.getAttribute("t-name"))
-                    self.add_template(qwebcontext, name, node)
-                if res_id and node.tagName == "t":
-                    self.add_template(qwebcontext, res_id, node)
-                    res_id = None
+        for node in dom:
+            if node.get('t-name'):
+                name = str(node.get("t-name"))
+                self.add_template(qwebcontext, name, node)
+            if res_id and node.tag == "t":
+                self.add_template(qwebcontext, res_id, node)
+                res_id = None
 
     def get_template(self, name, qwebcontext):
         origin_template = qwebcontext.get('__caller__') or qwebcontext['__stack__'][0]
@@ -247,52 +244,51 @@ class QWeb(orm.AbstractModel):
 
     def render_node(self, element, qwebcontext):
         result = ""
-        if element.nodeType == self.node.TEXT_NODE or element.nodeType == self.node.CDATA_SECTION_NODE:
-            result = element.data.encode("utf8")
-        elif element.nodeType == self.node.ELEMENT_NODE:
-            generated_attributes = ""
-            t_render = None
-            template_attributes = {}
-            for (attribute_name, attribute_value) in element.attributes.items():
-                attribute_name = str(attribute_name)
-                if attribute_name == "groups":
-                    cr = qwebcontext.get('request') and qwebcontext['request'].cr or None
-                    uid = qwebcontext.get('request') and qwebcontext['request'].uid or None
-                    can_see = self.user_has_groups(cr, uid, groups=attribute_value) if cr and uid else False
-                    if not can_see:
-                        if qwebcontext.get('editable') and not qwebcontext.get('editable_no_editor'):
-                            errmsg = _("Editor disabled because some content can not be seen by a user who does not belong to the groups %s")
-                            raise openerp.http.Retry(
-                                _("User does not belong to groups %s") % attribute_value, {
-                                    'editable_no_editor': errmsg % attribute_value
-                                })
-                        return ''
-
-                if isinstance(attribute_value, unicode):
-                    attribute_value = attribute_value.encode("utf8")
-                else:
-                    attribute_value = attribute_value.nodeValue.encode("utf8")
-
-                if attribute_name.startswith("t-"):
-                    for attribute in self._render_att:
-                        if attribute_name[2:].startswith(attribute):
-                            att, val = self._render_att[attribute](self, element, attribute_name, attribute_value, qwebcontext)
-                            generated_attributes += val and ' %s="%s"' % (att, escape(val)) or " "
-                            break
-                    else:
-                        if attribute_name[2:] in self._render_tag:
-                            t_render = attribute_name[2:]
-                        template_attributes[attribute_name[2:]] = attribute_value
-                else:
-                    generated_attributes += ' %s="%s"' % (attribute_name, escape(attribute_value))
 
-            if 'debug' in template_attributes:
-                debugger = template_attributes.get('debug', 'pdb')
-                __import__(debugger).set_trace()  # pdb, ipdb, pudb, ...
-            if t_render:
-                result = self._render_tag[t_render](self, element, template_attributes, generated_attributes, qwebcontext)
+        generated_attributes = ""
+        t_render = None
+        template_attributes = {}
+        for (attribute_name, attribute_value) in element.attrib.iteritems():
+            attribute_name = str(attribute_name)
+            if attribute_name == "groups":
+                cr = qwebcontext.get('request') and qwebcontext['request'].cr or None
+                uid = qwebcontext.get('request') and qwebcontext['request'].uid or None
+                can_see = self.user_has_groups(cr, uid, groups=attribute_value) if cr and uid else False
+                if not can_see:
+                    if qwebcontext.get('editable') and not qwebcontext.get('editable_no_editor'):
+                        errmsg = _("Editor disabled because some content can not be seen by a user who does not belong to the groups %s")
+                        raise openerp.http.Retry(
+                            _("User does not belong to groups %s") % attribute_value, {
+                                'editable_no_editor': errmsg % attribute_value
+                            })
+                    return ''
+
+            attribute_value = attribute_value.encode("utf8")
+
+            if attribute_name.startswith("t-"):
+                for attribute in self._render_att:
+                    if attribute_name[2:].startswith(attribute):
+                        att, val = self._render_att[attribute](self, element, attribute_name, attribute_value, qwebcontext)
+                        generated_attributes += val and ' %s="%s"' % (att, escape(val)) or " "
+                        break
+                else:
+                    if attribute_name[2:] in self._render_tag:
+                        t_render = attribute_name[2:]
+                    template_attributes[attribute_name[2:]] = attribute_value
             else:
-                result = self.render_element(element, template_attributes, generated_attributes, qwebcontext)
+                generated_attributes += ' %s="%s"' % (attribute_name, escape(attribute_value))
+
+        if 'debug' in template_attributes:
+            debugger = template_attributes.get('debug', 'pdb')
+            __import__(debugger).set_trace()  # pdb, ipdb, pudb, ...
+        if t_render:
+            result = self._render_tag[t_render](self, element, template_attributes, generated_attributes, qwebcontext)
+        else:
+            result = self.render_element(element, template_attributes, generated_attributes, qwebcontext)
+
+        if element.tail:
+            result += element.tail
+
         if isinstance(result, unicode):
             return result.encode('utf-8')
         return result
@@ -306,16 +302,16 @@ class QWeb(orm.AbstractModel):
         if inner:
             g_inner = inner
         else:
-            g_inner = []
-            for current_node in element.childNodes:
+            g_inner = [] if element.text is None else [element.text]
+            for current_node in element:
                 try:
                     g_inner.append(self.render_node(current_node, qwebcontext))
                 except (QWebException, openerp.http.Retry):
                     raise
                 except Exception:
                     template = qwebcontext.get('__template__')
-                    raise_qweb_exception(message="Could not render element %r" % element.nodeName, node=element, template=template)
-        name = str(element.nodeName)
+                    raise_qweb_exception(message="Could not render element %r" % element.tag, node=element, template=template)
+        name = str(element.tag)
         inner = "".join(g_inner)
         trim = template_attributes.get("trim", 0)
         if trim == 0:
@@ -417,7 +413,7 @@ class QWeb(orm.AbstractModel):
 
     def render_tag_call_assets(self, element, template_attributes, generated_attributes, qwebcontext):
         """ This special 't-call' tag can be used in order to aggregate/minify javascript and css assets"""
-        if element.childNodes:
+        if len(element):
             # An asset bundle is rendered in two differents contexts (when genereting html and
             # when generating the bundle itself) so they must be qwebcontext free
             # even '0' variable is forbidden
@@ -441,7 +437,7 @@ class QWeb(orm.AbstractModel):
 
     def render_tag_field(self, element, template_attributes, generated_attributes, qwebcontext):
         """ eg: <span t-record="browse_record(res.partner, 1)" t-field="phone">+1 555 555 8069</span>"""
-        node_name = element.nodeName
+        node_name = element.tag
         assert node_name not in ("table", "tbody", "thead", "tfoot", "tr", "td",
                                  "li", "ul", "ol", "dl", "dt", "dd"),\
             "RTE widgets do not work correctly on %r elements" % node_name
@@ -1039,11 +1035,11 @@ class AssetsBundle(object):
         self.parse()
 
     def parse(self):
-        fragments = lxml.html.fragments_fromstring(self.html)
+        fragments = html.fragments_fromstring(self.html)
         for el in fragments:
             if isinstance(el, basestring):
                 self.remains.append(el)
-            elif isinstance(el, lxml.html.HtmlElement):
+            elif isinstance(el, html.HtmlElement):
                 src = el.get('src', '')
                 href = el.get('href', '')
                 atype = el.get('type')
@@ -1063,10 +1059,10 @@ class AssetsBundle(object):
                 elif el.tag == 'script' and self.can_aggregate(src):
                     self.javascripts.append(JavascriptAsset(self, url=src))
                 else:
-                    self.remains.append(lxml.html.tostring(el))
+                    self.remains.append(html.tostring(el))
             else:
                 try:
-                    self.remains.append(lxml.html.tostring(el))
+                    self.remains.append(html.tostring(el))
                 except Exception:
                     # notYETimplementederror
                     raise NotImplementedError
@@ -1256,7 +1252,7 @@ class WebAsset(object):
                 except Exception:
                     raise AssetNotFound("Could not find %s" % self.name)
 
-    def to_html():
+    def to_html(self):
         raise NotImplementedError()
 
     @lazy_property
index ca1503a..9c393f1 100644 (file)
@@ -1,13 +1,11 @@
 # -*- coding: utf-8 -*-
 import cgi
-from xml.dom import minidom as dom
+
+from lxml import etree
 
 from openerp.tests import common
 from openerp.addons.base.ir import ir_qweb
 
-impl = dom.getDOMImplementation()
-document = impl.createDocument(None, None, None)
-
 class TestQWebTField(common.TransactionCase):
     def setUp(self):
         super(TestQWebTField, self).setUp()
@@ -18,8 +16,7 @@ class TestQWebTField(common.TransactionCase):
             self.cr, self.uid, values, context={'inherit_branding': True})
 
     def test_trivial(self):
-        field = document.createElement('span')
-        field.setAttribute('t-field', u'company.name')
+        field = etree.Element('span', {'t-field': u'company.name'})
 
         Companies = self.registry('res.company')
         company_id = Companies.create(self.cr, self.uid, {
@@ -38,8 +35,7 @@ class TestQWebTField(common.TransactionCase):
                 "My Test Company",))
 
     def test_i18n(self):
-        field = document.createElement('span')
-        field.setAttribute('t-field', u'company.name')
+        field = etree.Element('span', {'t-field': u'company.name'})
 
         Companies = self.registry('res.company')
         s = u"Testing «ταБЬℓσ»: 1<2 & 4+1>3, now 20% off!"
@@ -59,8 +55,7 @@ class TestQWebTField(common.TransactionCase):
                 cgi.escape(s.encode('utf-8')),))
 
     def test_reject_crummy_tags(self):
-        field = document.createElement('td')
-        field.setAttribute('t-field', u'company.name')
+        field = etree.Element('td', {'t-field': u'company.name'})
 
         with self.assertRaisesRegexp(
                 AssertionError,
@@ -70,8 +65,7 @@ class TestQWebTField(common.TransactionCase):
             }))
 
     def test_reject_t_tag(self):
-        field = document.createElement('t')
-        field.setAttribute('t-field', u'company.name')
+        field = etree.Element('t', {'t-field': u'company.name'})
 
         with self.assertRaisesRegexp(
                 AssertionError,
index c3fbca1..300fea5 100644 (file)
@@ -1,18 +1,16 @@
 # -*- encoding: utf-8 -*-
 import json
 import os
-import xml.dom.minidom
 import datetime
 
+from lxml import etree
+
 from openerp.tests import common
 from openerp.tools import html_escape as e
 from openerp.addons.base.ir import ir_qweb
 
 directory = os.path.dirname(__file__)
 
-impl = xml.dom.minidom.getDOMImplementation()
-doc = impl.createDocument(None, None, None)
-
 class TestExport(common.TransactionCase):
     _model = None
 
@@ -114,7 +112,7 @@ class TestCurrencyExport(TestExport):
         context = dict(inherit_branding=True)
         converted = converter.to_html(
             self.cr, self.uid, 'value', obj, options,
-            doc.createElement('span'),
+            etree.Element('span'),
             {'field': 'obj.value', 'field-options': json.dumps(options)},
             '', ir_qweb.QWebContext(self.cr, self.uid, {'obj': obj, 'c2': dest, }),
             context=context,