X-Git-Url: http://git.inspyration.org/?a=blobdiff_plain;f=npybabel.py;h=93ff5bcb9dc3e6ea0bf5114b7c3388e38b47bf22;hb=0d3bf8d1a21cae4d8bb70009fc34ad3520ca224c;hp=d2a5553fc801863b4207885a15c4fcd21d97b687;hpb=5b522f5d062a526b7c065ebe39b1879b20c3cf12;p=odoo%2Fodoo.git diff --git a/npybabel.py b/npybabel.py index d2a5553..93ff5bc 100755 --- a/npybabel.py +++ b/npybabel.py @@ -1,16 +1,32 @@ -#!/usr/bin/python +#!/usr/bin/env python # EASY-INSTALL-ENTRY-SCRIPT: 'Babel==0.9.6','console_scripts','pybabel' __requires__ = 'Babel==0.9.6' import sys from pkg_resources import load_entry_point +import re +import json +from lxml import etree as elt +from babel.messages import extract if __name__ == '__main__': sys.exit( load_entry_point('Babel==0.9.6', 'console_scripts', 'pybabel')() ) - -def extract_qweb(fileobj, keywords, comment_tags, options): - """Extract messages from XXX files. + +XMLJS_EXPR = re.compile(r"""(?:\_t *\( *((?:"(?:[^"\\]|\\.)*")|(?:'(?:[^'\\]|\\.)*')) *\))""") + +TRANSLATION_FLAG_COMMENT = "openerp-web" + +# List of etree._Element subclasses that we choose to ignore when parsing XML. +# We include the *Base ones just in case, currently they seem to be subclasses of the _* ones. +SKIPPED_ELEMENT_TYPES = (elt._Comment, elt._ProcessingInstruction, elt.CommentBase, elt.PIBase) + +def extract_xmljs(fileobj, keywords, comment_tags, options): + """Extract messages from Javascript code embedded into XML documents. + This complements the ``extract_javascript`` extractor which works + only on pure .js files, and the``extract_qweb`` extractor, which only + extracts XML text. + :param fileobj: the file-like object the messages should be extracted from :param keywords: a list of keywords (i.e. function names) that should @@ -22,5 +38,103 @@ def extract_qweb(fileobj, keywords, comment_tags, options): tuples :rtype: ``iterator`` """ + assert False, """ the XMLJS extractor does not work and was removed: + + * Babel apparently does not accept two extractors for the same set of files + so it would not run the xmljs extractor at all, extraction of JS stuff + needs to be done from the XML extractor + * The regex above fails up if there are back-slashed quotes within the + translatable string (the string marked with _t), it just won't match the + string + * While extraction succeeds on XML entities (e.g. "), translation + matching will fail if those entities are kept in the PO msgid as the + XML parser will get an un-escaped string, without those entities (so a + text extractor will extract ``Found match "%s"``, but the msgid + of the PO file must be ``Found match "%s"`` or the translation will fail + * single-quoted strings are not valid JSON string, so single-quoted strings + matched by the regex (likely since XML attributes are double-quoted, + single quotes within them don't have to be escaped) will blow up when + json-parsed for their content + + I think that's about it. + + If this extractor is reimplemented, it should be integrated into + extract_qweb, either in the current pass (probably not a good idea) or as + a separate pass using iterparse, matching either elements with t-js or + some other kinds of t-* directives (@t-esc, @t-raw, @t-att, others?), + shove the attribute content into a StringIO and pass *that* to Babel's + own extract_javascript; then add a line offset in order to yield the + correct line number. + """ content = fileobj.read() - return [] + found = XMLJS_EXPR.finditer(content) + index = 0 + line_nbr = 0 + for f in found: + msg = f.group(1) + msg = json.loads(msg) + while index < f.start(): + if content[index] == "\n": + line_nbr += 1 + index += 1 + yield (line_nbr, None, msg, [TRANSLATION_FLAG_COMMENT]) + +def extract_qweb(fileobj, keywords, comment_tags, options): + """Extract messages from qweb template files. + :param fileobj: the file-like object the messages should be extracted + from + :param keywords: a list of keywords (i.e. function names) that should + be recognized as translation functions + :param comment_tags: a list of translator tags to search for and + include in the results + :param options: a dictionary of additional options (optional) + :return: an iterator over ``(lineno, funcname, message, comments)`` + tuples + :rtype: ``iterator`` + """ + result = [] + def handle_text(text, lineno): + text = (text or "").strip() + if len(text) > 1: # Avoid mono-char tokens like ':' ',' etc. + result.append((lineno, None, text, [TRANSLATION_FLAG_COMMENT])) + + # not using elementTree.iterparse because we need to skip sub-trees in case + # the ancestor element had a reason to be skipped + def iter_elements(current_element): + for el in current_element: + if isinstance(el, SKIPPED_ELEMENT_TYPES): continue + if "t-js" not in el.attrib and \ + not ("t-jquery" in el.attrib and "t-operation" not in el.attrib) and \ + not ("t-translation" in el.attrib and el.attrib["t-translation"].strip() == "off"): + handle_text(el.text, el.sourceline) + for att in ('title', 'alt', 'label'): + if att in el.attrib: + handle_text(el.attrib[att], el.sourceline) + iter_elements(el) + handle_text(el.tail, el.sourceline) + + tree = elt.parse(fileobj) + iter_elements(tree.getroot()) + + return result + +def extract_javascript(fileobj, keywords, comment_tags, options): + """Extract messages from Javascript source files. This extractor delegates + to babel's buit-in javascript extractor, but adds a special comment + used as a flag to identify web translations. + + :param fileobj: the file-like object the messages should be extracted + from + :param keywords: a list of keywords (i.e. function names) that should + be recognized as translation functions + :param comment_tags: a list of translator tags to search for and + include in the results + :param options: a dictionary of additional options (optional) + :return: an iterator over ``(lineno, funcname, message, comments)`` + tuples + :rtype: ``iterator`` + """ + for (message_lineno, funcname, messages, comments) in \ + extract.extract_javascript(fileobj, keywords, comment_tags, options): + comments.append(TRANSLATION_FLAG_COMMENT) + yield (message_lineno, funcname, messages, comments)