5 return x # It seems that our test suite doesn't care.
10 x = unicode(x, "utf8", "replace")
12 result = handle_element(root[0])
16 to_remove = set(["script", "head", "meta", "title", "link", "img"])
17 to_unwrap = set(["html", "body"])
19 javascript_regex = re.compile("""^\s*javascript\s*\:.*$""")
20 def handle_a(el, new):
21 href = el.get("href", "#")
22 if javascript_regex.search(href):
29 def handle_element(el):
30 if type(el) == str or type(el) == unicode:
32 if el.tag in to_remove:
34 if el.tag in to_unwrap:
35 return reduce(lambda x,y: x+y, [handle_element(x) for x in children(el)])
36 new = pq("<%s />" % el.tag)[0]
37 for i in children(el):
38 append_to(handle_element(i), new)
40 special[el.tag](el, new)
45 if el.text is not None:
47 for i in el.getchildren():
49 if i.tail is not None:
53 def append_to(new_ones, el):
55 if type(i) == str or type(i) == unicode:
56 children = el.getchildren()
57 if len(children) == 0: