1 """gettext message extraction via Babel: http://babel.edgewall.org/"""
2 from StringIO import StringIO
4 from babel.messages.extract import extract_python
6 from mako import lexer, parsetree
8 def extract(fileobj, keywords, comment_tags, options):
9 """Extract messages from Mako templates.
11 :param fileobj: the file-like object the messages should be extracted from
12 :param keywords: a list of keywords (i.e. function names) that should be
13 recognized as translation functions
14 :param comment_tags: a list of translator tags to search for and include
16 :param options: a dictionary of additional options (optional)
17 :return: an iterator over ``(lineno, funcname, message, comments)`` tuples
20 encoding = options.get('input_encoding', options.get('encoding', None))
22 template_node = lexer.Lexer(fileobj.read(),
23 input_encoding=encoding).parse()
24 for extracted in extract_nodes(template_node.get_children(),
25 keywords, comment_tags, options):
28 def extract_nodes(nodes, keywords, comment_tags, options):
29 """Extract messages from Mako's lexer node objects
31 :param nodes: an iterable of Mako parsetree.Node objects to extract from
32 :param keywords: a list of keywords (i.e. function names) that should be
33 recognized as translation functions
34 :param comment_tags: a list of translator tags to search for and include
36 :param options: a dictionary of additional options (optional)
37 :return: an iterator over ``(lineno, funcname, message, comments)`` tuples
40 translator_comments = []
41 in_translator_comments = False
45 if in_translator_comments and isinstance(node, parsetree.Text) and \
46 not node.content.strip():
47 # Ignore whitespace within translator comments
50 if isinstance(node, parsetree.Comment):
51 value = node.text.strip()
52 if in_translator_comments:
53 translator_comments.extend(_split_comment(node.lineno, value))
55 for comment_tag in comment_tags:
56 if value.startswith(comment_tag):
57 in_translator_comments = True
58 translator_comments.extend(_split_comment(node.lineno,
62 if isinstance(node, parsetree.DefTag):
63 code = node.function_decl.code
64 child_nodes = node.nodes
65 elif isinstance(node, parsetree.CallTag):
67 child_nodes = node.nodes
68 elif isinstance(node, parsetree.PageTag):
69 code = node.body_decl.code
70 elif isinstance(node, parsetree.ControlLine):
72 translator_comments = []
73 in_translator_comments = False
76 elif isinstance(node, parsetree.Code):
77 # <% and <%! blocks would provide their own translator comments
78 translator_comments = []
79 in_translator_comments = False
82 elif isinstance(node, parsetree.Expression):
85 translator_comments = []
86 in_translator_comments = False
89 # Comments don't apply unless they immediately preceed the message
90 if translator_comments and \
91 translator_comments[-1][0] < node.lineno - 1:
92 translator_comments = []
94 translator_comments = \
95 [comment[1] for comment in translator_comments]
97 if isinstance(code, unicode):
98 code = code.encode('ascii', 'backslashreplace')
100 for lineno, funcname, messages, python_translator_comments \
101 in extract_python(code, keywords, comment_tags, options):
102 yield (node.lineno + (lineno - 1), funcname, messages,
103 translator_comments + python_translator_comments)
105 translator_comments = []
106 in_translator_comments = False
109 for extracted in extract_nodes(child_nodes, keywords, comment_tags,
114 def _split_comment(lineno, comment):
115 """Return the multiline comment at lineno split into a list of comment line
116 numbers and the accompanying comment line"""
117 return [(lineno + index, line) for index, line in
118 enumerate(comment.splitlines())]