X-Git-Url: http://git.inspyration.org/?a=blobdiff_plain;f=npybabel.py;h=93ff5bcb9dc3e6ea0bf5114b7c3388e38b47bf22;hb=0d3bf8d1a21cae4d8bb70009fc34ad3520ca224c;hp=d2a5553fc801863b4207885a15c4fcd21d97b687;hpb=5b522f5d062a526b7c065ebe39b1879b20c3cf12;p=odoo%2Fodoo.git

diff --git a/npybabel.py b/npybabel.py
index d2a5553..93ff5bc 100755
--- a/npybabel.py
+++ b/npybabel.py
@@ -1,16 +1,32 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 # EASY-INSTALL-ENTRY-SCRIPT: 'Babel==0.9.6','console_scripts','pybabel'
 __requires__ = 'Babel==0.9.6'
 import sys
 from pkg_resources import load_entry_point
+import re
+import json
+from lxml import etree as elt
+from babel.messages import extract
 
 if __name__ == '__main__':
     sys.exit(
         load_entry_point('Babel==0.9.6', 'console_scripts', 'pybabel')()
     )
-    
-def extract_qweb(fileobj, keywords, comment_tags, options):
-    """Extract messages from XXX files.
+
+XMLJS_EXPR = re.compile(r"""(?:\_t *\( *((?:"(?:[^"\\]|\\.)*")|(?:'(?:[^'\\]|\\.)*')) *\))""")
+
+TRANSLATION_FLAG_COMMENT = "openerp-web"
+
+# List of etree._Element subclasses that we choose to ignore when parsing XML.
+# We include the *Base ones just in case, currently they seem to be subclasses of the _* ones.
+SKIPPED_ELEMENT_TYPES = (elt._Comment, elt._ProcessingInstruction, elt.CommentBase, elt.PIBase)
+
+def extract_xmljs(fileobj, keywords, comment_tags, options):
+    """Extract messages from Javascript code embedded into XML documents.
+    This complements the ``extract_javascript`` extractor which works
+    only on pure .js files, and the``extract_qweb`` extractor, which only
+    extracts XML text.
+
     :param fileobj: the file-like object the messages should be extracted
                     from
     :param keywords: a list of keywords (i.e. function names) that should
@@ -22,5 +38,103 @@ def extract_qweb(fileobj, keywords, comment_tags, options):
              tuples
     :rtype: ``iterator``
     """
+    assert False, """ the XMLJS extractor does not work and was removed:
+
+    * Babel apparently does not accept two extractors for the same set of files
+      so it would not run the xmljs extractor at all, extraction of JS stuff
+      needs to be done from the XML extractor
+    * The regex above fails up if there are back-slashed quotes within the
+      translatable string (the string marked with _t), it just won't match the
+      string
+    * While extraction succeeds on XML entities (e.g. &quot;), translation
+      matching will fail if those entities are kept in the PO msgid as the
+      XML parser will get an un-escaped string, without those entities (so a
+      text extractor will extract ``Found match &quot;%s&quot;``, but the msgid
+      of the PO file must be ``Found match "%s"`` or the translation will fail
+    * single-quoted strings are not valid JSON string, so single-quoted strings
+      matched by the regex (likely since XML attributes are double-quoted,
+      single quotes within them don't have to be escaped) will blow up when
+      json-parsed for their content
+
+    I think that's about it.
+
+    If this extractor is reimplemented, it should be integrated into
+    extract_qweb, either in the current pass (probably not a good idea) or as
+    a separate pass using iterparse, matching either elements with t-js or
+    some other kinds of t-* directives (@t-esc, @t-raw, @t-att, others?),
+    shove the attribute content into a StringIO and pass *that* to Babel's
+    own extract_javascript; then add a line offset in order to yield the
+    correct line number.
+    """
     content = fileobj.read()
-    return []
+    found = XMLJS_EXPR.finditer(content)
+    index = 0
+    line_nbr = 0
+    for f in found:
+        msg = f.group(1)
+        msg = json.loads(msg)
+        while index < f.start():
+            if content[index] == "\n":
+                line_nbr += 1
+            index += 1
+        yield (line_nbr, None, msg, [TRANSLATION_FLAG_COMMENT])
+
+def extract_qweb(fileobj, keywords, comment_tags, options):
+    """Extract messages from qweb template files.
+    :param fileobj: the file-like object the messages should be extracted
+                    from
+    :param keywords: a list of keywords (i.e. function names) that should
+                     be recognized as translation functions
+    :param comment_tags: a list of translator tags to search for and
+                         include in the results
+    :param options: a dictionary of additional options (optional)
+    :return: an iterator over ``(lineno, funcname, message, comments)``
+             tuples
+    :rtype: ``iterator``
+    """
+    result = []
+    def handle_text(text, lineno):
+        text = (text or "").strip()
+        if len(text) > 1: # Avoid mono-char tokens like ':' ',' etc.
+            result.append((lineno, None, text, [TRANSLATION_FLAG_COMMENT]))
+
+    # not using elementTree.iterparse because we need to skip sub-trees in case
+    # the ancestor element had a reason to be skipped
+    def iter_elements(current_element):
+        for el in current_element:
+            if isinstance(el, SKIPPED_ELEMENT_TYPES): continue
+            if "t-js" not in el.attrib and \
+                    not ("t-jquery" in el.attrib and "t-operation" not in el.attrib) and \
+                    not ("t-translation" in el.attrib and el.attrib["t-translation"].strip() == "off"):
+                handle_text(el.text, el.sourceline)
+                for att in ('title', 'alt', 'label'):
+                    if att in el.attrib:
+                        handle_text(el.attrib[att], el.sourceline)
+                iter_elements(el)
+            handle_text(el.tail, el.sourceline)
+
+    tree = elt.parse(fileobj)
+    iter_elements(tree.getroot())
+
+    return result
+
+def extract_javascript(fileobj, keywords, comment_tags, options):
+    """Extract messages from Javascript source files. This extractor delegates
+    to babel's buit-in javascript extractor, but adds a special comment
+    used as a flag to identify web translations. 
+
+    :param fileobj: the file-like object the messages should be extracted
+                    from
+    :param keywords: a list of keywords (i.e. function names) that should
+                     be recognized as translation functions
+    :param comment_tags: a list of translator tags to search for and
+                         include in the results
+    :param options: a dictionary of additional options (optional)
+    :return: an iterator over ``(lineno, funcname, message, comments)``
+             tuples
+    :rtype: ``iterator``
+    """
+    for (message_lineno, funcname, messages, comments) in \
+        extract.extract_javascript(fileobj, keywords, comment_tags, options):
+        comments.append(TRANSLATION_FLAG_COMMENT)
+        yield (message_lineno, funcname, messages, comments)