npybabel.py

   1 #!/usr/bin/env python
   2 # EASY-INSTALL-ENTRY-SCRIPT: 'Babel==0.9.6','console_scripts','pybabel'
   3 __requires__ = 'Babel==0.9.6'
   4 import sys
   5 from pkg_resources import load_entry_point
   6 import re
   7 import json
   8 from lxml import etree as elt
   9 from babel.messages import extract
  10
  11 if __name__ == '__main__':
  12     sys.exit(
  13         load_entry_point('Babel==0.9.6', 'console_scripts', 'pybabel')()
  14     )
  15
  16 XMLJS_EXPR = re.compile(r"""(?:\_t *\( *((?:"(?:[^"\\]|\\.)*")|(?:'(?:[^'\\]|\\.)*')) *\))""")
  17
  18 TRANSLATION_FLAG_COMMENT = "openerp-web"
  19
  20 # List of etree._Element subclasses that we choose to ignore when parsing XML.
  21 # We include the *Base ones just in case, currently they seem to be subclasses of the _* ones.
  22 SKIPPED_ELEMENT_TYPES = (elt._Comment, elt._ProcessingInstruction, elt.CommentBase, elt.PIBase)
  23
  24 def extract_xmljs(fileobj, keywords, comment_tags, options):
  25     """Extract messages from Javascript code embedded into XML documents.
  26     This complements the ``extract_javascript`` extractor which works
  27     only on pure .js files, and the``extract_qweb`` extractor, which only
  28     extracts XML text.
  29
  30     :param fileobj: the file-like object the messages should be extracted
  31                     from
  32     :param keywords: a list of keywords (i.e. function names) that should
  33                      be recognized as translation functions
  34     :param comment_tags: a list of translator tags to search for and
  35                          include in the results
  36     :param options: a dictionary of additional options (optional)
  37     :return: an iterator over ``(lineno, funcname, message, comments)``
  38              tuples
  39     :rtype: ``iterator``
  40     """
  41     content = fileobj.read()
  42     found = XMLJS_EXPR.finditer(content)
  43     index = 0
  44     line_nbr = 0
  45     for f in found:
  46         msg = f.group(1)
  47         msg = json.loads(msg)
  48         while index < f.start():
  49             if content[index] == "\n":
  50                 line_nbr += 1
  51             index += 1
  52         yield (line_nbr, None, msg, [TRANSLATION_FLAG_COMMENT])
  53
  54 def extract_qweb(fileobj, keywords, comment_tags, options):
  55     """Extract messages from qweb template files.
  56     :param fileobj: the file-like object the messages should be extracted
  57                     from
  58     :param keywords: a list of keywords (i.e. function names) that should
  59                      be recognized as translation functions
  60     :param comment_tags: a list of translator tags to search for and
  61                          include in the results
  62     :param options: a dictionary of additional options (optional)
  63     :return: an iterator over ``(lineno, funcname, message, comments)``
  64              tuples
  65     :rtype: ``iterator``
  66     """
  67     result = []
  68     def handle_text(text, lineno):
  69         text = (text or "").strip()
  70         if len(text) > 1: # Avoid mono-char tokens like ':' ',' etc.
  71             result.append((lineno, None, text, [TRANSLATION_FLAG_COMMENT]))
  72
  73     # not using elementTree.iterparse because we need to skip sub-trees in case
  74     # the ancestor element had a reason to be skipped
  75     def iter_elements(current_element):
  76         for el in current_element:
  77             if isinstance(el, SKIPPED_ELEMENT_TYPES): continue
  78             if "t-js" not in el.attrib and \
  79                     not ("t-jquery" in el.attrib and "t-operation" not in el.attrib) and \
  80                     not ("t-translation" in el.attrib and el.attrib["t-translation"].strip() == "off"):
  81                 handle_text(el.text, el.sourceline)
  82                 for att in ('title', 'alt', 'label'):
  83                     if att in el.attrib:
  84                         handle_text(el.attrib[att], el.sourceline)
  85                 iter_elements(el)
  86             handle_text(el.tail, el.sourceline)
  87
  88     tree = elt.parse(fileobj)
  89     iter_elements(tree.getroot())
  90
  91     return result
  92
  93 def extract_javascript(fileobj, keywords, comment_tags, options):
  94     """Extract messages from Javascript source files. This extractor delegates
  95     to babel's buit-in javascript extractor, but adds a special comment
  96     used as a flag to identify web translations.
  97
  98     :param fileobj: the file-like object the messages should be extracted
  99                     from
 100     :param keywords: a list of keywords (i.e. function names) that should
 101                      be recognized as translation functions
 102     :param comment_tags: a list of translator tags to search for and
 103                          include in the results
 104     :param options: a dictionary of additional options (optional)
 105     :return: an iterator over ``(lineno, funcname, message, comments)``
 106              tuples
 107     :rtype: ``iterator``
 108     """
 109     for (message_lineno, funcname, messages, comments) in \
 110         extract.extract_javascript(fileobj, keywords, comment_tags, options):
 111         comments.append(TRANSLATION_FLAG_COMMENT)
 112         yield (message_lineno, funcname, messages, comments)