npybabel.py

   1 #!/usr/bin/env python
   2 # EASY-INSTALL-ENTRY-SCRIPT: 'Babel==0.9.6','console_scripts','pybabel'
   3 __requires__ = 'Babel==0.9.6'
   4 import sys
   5 from pkg_resources import load_entry_point
   6 import re
   7 import json
   8 from lxml import etree as elt
   9 from babel.messages import extract
  10
  11 if __name__ == '__main__':
  12     sys.exit(
  13         load_entry_point('Babel==0.9.6', 'console_scripts', 'pybabel')()
  14     )
  15
  16 XMLJS_EXPR = re.compile(r"""(?:\_t *\( *((?:"(?:[^"\\]|\\.)*")|(?:'(?:[^'\\]|\\.)*')) *\))""")
  17
  18 TRANSLATION_FLAG_COMMENT = "openerp-web"
  19
  20 # List of etree._Element subclasses that we choose to ignore when parsing XML.
  21 # We include the *Base ones just in case, currently they seem to be subclasses of the _* ones.
  22 SKIPPED_ELEMENT_TYPES = (elt._Comment, elt._ProcessingInstruction, elt.CommentBase, elt.PIBase)
  23
  24 def extract_xmljs(fileobj, keywords, comment_tags, options):
  25     """Extract messages from Javascript code embedded into XML documents.
  26     This complements the ``extract_javascript`` extractor which works
  27     only on pure .js files, and the``extract_qweb`` extractor, which only
  28     extracts XML text.
  29
  30     :param fileobj: the file-like object the messages should be extracted
  31                     from
  32     :param keywords: a list of keywords (i.e. function names) that should
  33                      be recognized as translation functions
  34     :param comment_tags: a list of translator tags to search for and
  35                          include in the results
  36     :param options: a dictionary of additional options (optional)
  37     :return: an iterator over ``(lineno, funcname, message, comments)``
  38              tuples
  39     :rtype: ``iterator``
  40     """
  41     assert False, """ the XMLJS extractor does not work and was removed:
  42
  43     * Babel apparently does not accept two extractors for the same set of files
  44       so it would not run the xmljs extractor at all, extraction of JS stuff
  45       needs to be done from the XML extractor
  46     * The regex above fails up if there are back-slashed quotes within the
  47       translatable string (the string marked with _t), it just won't match the
  48       string
  49     * While extraction succeeds on XML entities (e.g. &quot;), translation
  50       matching will fail if those entities are kept in the PO msgid as the
  51       XML parser will get an un-escaped string, without those entities (so a
  52       text extractor will extract ``Found match &quot;%s&quot;``, but the msgid
  53       of the PO file must be ``Found match "%s"`` or the translation will fail
  54     * single-quoted strings are not valid JSON string, so single-quoted strings
  55       matched by the regex (likely since XML attributes are double-quoted,
  56       single quotes within them don't have to be escaped) will blow up when
  57       json-parsed for their content
  58
  59     I think that's about it.
  60
  61     If this extractor is reimplemented, it should be integrated into
  62     extract_qweb, either in the current pass (probably not a good idea) or as
  63     a separate pass using iterparse, matching either elements with t-js or
  64     some other kinds of t-* directives (@t-esc, @t-raw, @t-att, others?),
  65     shove the attribute content into a StringIO and pass *that* to Babel's
  66     own extract_javascript; then add a line offset in order to yield the
  67     correct line number.
  68     """
  69     content = fileobj.read()
  70     found = XMLJS_EXPR.finditer(content)
  71     index = 0
  72     line_nbr = 0
  73     for f in found:
  74         msg = f.group(1)
  75         msg = json.loads(msg)
  76         while index < f.start():
  77             if content[index] == "\n":
  78                 line_nbr += 1
  79             index += 1
  80         yield (line_nbr, None, msg, [TRANSLATION_FLAG_COMMENT])
  81
  82 def extract_qweb(fileobj, keywords, comment_tags, options):
  83     """Extract messages from qweb template files.
  84     :param fileobj: the file-like object the messages should be extracted
  85                     from
  86     :param keywords: a list of keywords (i.e. function names) that should
  87                      be recognized as translation functions
  88     :param comment_tags: a list of translator tags to search for and
  89                          include in the results
  90     :param options: a dictionary of additional options (optional)
  91     :return: an iterator over ``(lineno, funcname, message, comments)``
  92              tuples
  93     :rtype: ``iterator``
  94     """
  95     result = []
  96     def handle_text(text, lineno):
  97         text = (text or "").strip()
  98         if len(text) > 1: # Avoid mono-char tokens like ':' ',' etc.
  99             result.append((lineno, None, text, [TRANSLATION_FLAG_COMMENT]))
 100
 101     # not using elementTree.iterparse because we need to skip sub-trees in case
 102     # the ancestor element had a reason to be skipped
 103     def iter_elements(current_element):
 104         for el in current_element:
 105             if isinstance(el, SKIPPED_ELEMENT_TYPES): continue
 106             if "t-js" not in el.attrib and \
 107                     not ("t-jquery" in el.attrib and "t-operation" not in el.attrib) and \
 108                     not ("t-translation" in el.attrib and el.attrib["t-translation"].strip() == "off"):
 109                 handle_text(el.text, el.sourceline)
 110                 for att in ('title', 'alt', 'label'):
 111                     if att in el.attrib:
 112                         handle_text(el.attrib[att], el.sourceline)
 113                 iter_elements(el)
 114             handle_text(el.tail, el.sourceline)
 115
 116     tree = elt.parse(fileobj)
 117     iter_elements(tree.getroot())
 118
 119     return result
 120
 121 def extract_javascript(fileobj, keywords, comment_tags, options):
 122     """Extract messages from Javascript source files. This extractor delegates
 123     to babel's buit-in javascript extractor, but adds a special comment
 124     used as a flag to identify web translations.
 125
 126     :param fileobj: the file-like object the messages should be extracted
 127                     from
 128     :param keywords: a list of keywords (i.e. function names) that should
 129                      be recognized as translation functions
 130     :param comment_tags: a list of translator tags to search for and
 131                          include in the results
 132     :param options: a dictionary of additional options (optional)
 133     :return: an iterator over ``(lineno, funcname, message, comments)``
 134              tuples
 135     :rtype: ``iterator``
 136     """
 137     for (message_lineno, funcname, messages, comments) in \
 138         extract.extract_javascript(fileobj, keywords, comment_tags, options):
 139         comments.append(TRANSLATION_FLAG_COMMENT)
 140         yield (message_lineno, funcname, messages, comments)