X-Git-Url: http://git.inspyration.org/?a=blobdiff_plain;f=npybabel.py;h=14ac115e338463f6cd6fa5460c9caea5ae9e37bb;hb=c5dc5cef00cd596d6c0b6c9899bf1d8de16e9026;hp=24a97be2cacfab4552554be9006ddfbe184ffad1;hpb=968decea96874e708bf95d63d65dadf159936074;p=odoo%2Fodoo.git

diff --git a/npybabel.py b/npybabel.py
index 24a97be..14ac115 100755
--- a/npybabel.py
+++ b/npybabel.py
@@ -17,6 +17,10 @@ XMLJS_EXPR = re.compile(r"""(?:\_t *\( *((?:"(?:[^"\\]|\\.)*")|(?:'(?:[^'\\]|\\.
 
 TRANSLATION_FLAG_COMMENT = "openerp-web"
 
+# List of etree._Element subclasses that we choose to ignore when parsing XML.
+# We include the *Base ones just in case, currently they seem to be subclasses of the _* ones.
+SKIPPED_ELEMENT_TYPES = (elt._Comment, elt._ProcessingInstruction, elt.CommentBase, elt.PIBase)
+
 def extract_xmljs(fileobj, keywords, comment_tags, options):
     """Extract messages from Javascript code embedded into XML documents.
     This complements the ``extract_javascript`` extractor which works
@@ -34,6 +38,34 @@ def extract_xmljs(fileobj, keywords, comment_tags, options):
              tuples
     :rtype: ``iterator``
     """
+    assert False, """ the XMLJS extractor does not work and was removed:
+
+    * Babel apparently does not accept two extractors for the same set of files
+      so it would not run the xmljs extractor at all, extraction of JS stuff
+      needs to be done from the XML extractor
+    * The regex above fails up if there are back-slashed quotes within the
+      translatable string (the string marked with _t), it just won't match the
+      string
+    * While extraction succeeds on XML entities (e.g. &quot;), translation
+      matching will fail if those entities are kept in the PO msgid as the
+      XML parser will get an un-escaped string, without those entities (so a
+      text extractor will extract ``Found match &quot;%s&quot;``, but the msgid
+      of the PO file must be ``Found match "%s"`` or the translation will fail
+    * single-quoted strings are not valid JSON string, so single-quoted strings
+      matched by the regex (likely since XML attributes are double-quoted,
+      single quotes within them don't have to be escaped) will blow up when
+      json-parsed for their content
+
+    I think that's about it.
+
+    If this extractor is reimplemented, it should be integrated into
+    extract_qweb, either in the current pass (probably not a good idea) or as
+    a separate pass using iterparse, matching either elements with t-js or
+    some other kinds of t-* directives (@t-esc, @t-raw, @t-att, others?),
+    shove the attribute content into a StringIO and pass *that* to Babel's
+    own extract_javascript; then add a line offset in order to yield the
+    correct line number.
+    """
     content = fileobj.read()
     found = XMLJS_EXPR.finditer(content)
     index = 0
@@ -70,10 +102,14 @@ def extract_qweb(fileobj, keywords, comment_tags, options):
     # the ancestor element had a reason to be skipped
     def iter_elements(current_element):
         for el in current_element:
+            if isinstance(el, SKIPPED_ELEMENT_TYPES): continue
             if "t-js" not in el.attrib and \
                     not ("t-jquery" in el.attrib and "t-operation" not in el.attrib) and \
                     not ("t-translation" in el.attrib and el.attrib["t-translation"].strip() == "off"):
                 handle_text(el.text, el.sourceline)
+                for att in ('title', 'alt', 'label', 'placeholder'):
+                    if att in el.attrib:
+                        handle_text(el.attrib[att], el.sourceline)
                 iter_elements(el)
             handle_text(el.tail, el.sourceline)