2 # EASY-INSTALL-ENTRY-SCRIPT: 'Babel==0.9.6','console_scripts','pybabel'
3 __requires__ = 'Babel==0.9.6'
5 from pkg_resources import load_entry_point
8 from lxml import etree as elt
9 from babel.messages import extract
11 if __name__ == '__main__':
13 load_entry_point('Babel==0.9.6', 'console_scripts', 'pybabel')()
16 XMLJS_EXPR = re.compile(r"""(?:\_t *\( *((?:"(?:[^"\\]|\\.)*")|(?:'(?:[^'\\]|\\.)*')) *\))""")
18 TRANSLATION_FLAG_COMMENT = "openerp-web"
20 # List of etree._Element subclasses that we choose to ignore when parsing XML.
21 # We include the *Base ones just in case, currently they seem to be subclasses of the _* ones.
22 SKIPPED_ELEMENT_TYPES = (elt._Comment, elt._ProcessingInstruction, elt.CommentBase, elt.PIBase)
24 def extract_xmljs(fileobj, keywords, comment_tags, options):
25 """Extract messages from Javascript code embedded into XML documents.
26 This complements the ``extract_javascript`` extractor which works
27 only on pure .js files, and the``extract_qweb`` extractor, which only
30 :param fileobj: the file-like object the messages should be extracted
32 :param keywords: a list of keywords (i.e. function names) that should
33 be recognized as translation functions
34 :param comment_tags: a list of translator tags to search for and
35 include in the results
36 :param options: a dictionary of additional options (optional)
37 :return: an iterator over ``(lineno, funcname, message, comments)``
41 assert False, """ the XMLJS extractor does not work and was removed:
43 * Babel apparently does not accept two extractors for the same set of files
44 so it would not run the xmljs extractor at all, extraction of JS stuff
45 needs to be done from the XML extractor
46 * The regex above fails up if there are back-slashed quotes within the
47 translatable string (the string marked with _t), it just won't match the
49 * While extraction succeeds on XML entities (e.g. "), translation
50 matching will fail if those entities are kept in the PO msgid as the
51 XML parser will get an un-escaped string, without those entities (so a
52 text extractor will extract ``Found match "%s"``, but the msgid
53 of the PO file must be ``Found match "%s"`` or the translation will fail
54 * single-quoted strings are not valid JSON string, so single-quoted strings
55 matched by the regex (likely since XML attributes are double-quoted,
56 single quotes within them don't have to be escaped) will blow up when
57 json-parsed for their content
59 I think that's about it.
61 If this extractor is reimplemented, it should be integrated into
62 extract_qweb, either in the current pass (probably not a good idea) or as
63 a separate pass using iterparse, matching either elements with t-js or
64 some other kinds of t-* directives (@t-esc, @t-raw, @t-att, others?),
65 shove the attribute content into a StringIO and pass *that* to Babel's
66 own extract_javascript; then add a line offset in order to yield the
69 content = fileobj.read()
70 found = XMLJS_EXPR.finditer(content)
76 while index < f.start():
77 if content[index] == "\n":
80 yield (line_nbr, None, msg, [TRANSLATION_FLAG_COMMENT])
82 def extract_qweb(fileobj, keywords, comment_tags, options):
83 """Extract messages from qweb template files.
84 :param fileobj: the file-like object the messages should be extracted
86 :param keywords: a list of keywords (i.e. function names) that should
87 be recognized as translation functions
88 :param comment_tags: a list of translator tags to search for and
89 include in the results
90 :param options: a dictionary of additional options (optional)
91 :return: an iterator over ``(lineno, funcname, message, comments)``
96 def handle_text(text, lineno):
97 text = (text or "").strip()
98 if len(text) > 1: # Avoid mono-char tokens like ':' ',' etc.
99 result.append((lineno, None, text, [TRANSLATION_FLAG_COMMENT]))
101 # not using elementTree.iterparse because we need to skip sub-trees in case
102 # the ancestor element had a reason to be skipped
103 def iter_elements(current_element):
104 for el in current_element:
105 if isinstance(el, SKIPPED_ELEMENT_TYPES): continue
106 if "t-js" not in el.attrib and \
107 not ("t-jquery" in el.attrib and "t-operation" not in el.attrib) and \
108 not ("t-translation" in el.attrib and el.attrib["t-translation"].strip() == "off"):
109 handle_text(el.text, el.sourceline)
110 for att in ('title', 'alt', 'label'):
112 handle_text(el.attrib[att], el.sourceline)
114 handle_text(el.tail, el.sourceline)
116 tree = elt.parse(fileobj)
117 iter_elements(tree.getroot())
121 def extract_javascript(fileobj, keywords, comment_tags, options):
122 """Extract messages from Javascript source files. This extractor delegates
123 to babel's buit-in javascript extractor, but adds a special comment
124 used as a flag to identify web translations.
126 :param fileobj: the file-like object the messages should be extracted
128 :param keywords: a list of keywords (i.e. function names) that should
129 be recognized as translation functions
130 :param comment_tags: a list of translator tags to search for and
131 include in the results
132 :param options: a dictionary of additional options (optional)
133 :return: an iterator over ``(lineno, funcname, message, comments)``
137 for (message_lineno, funcname, messages, comments) in \
138 extract.extract_javascript(fileobj, keywords, comment_tags, options):
139 comments.append(TRANSLATION_FLAG_COMMENT)
140 yield (message_lineno, funcname, messages, comments)