html = re.sub('<br\s*/?>', '\n', html)
html = re.sub('<.*?>', ' ', html)
html = html.replace(' ' * 2, ' ')
+ html = html.replace('>', '>')
+ html = html.replace('<', '<')
# strip all lines
- html = '\n'.join([x.strip() for x in html.splitlines()])
- html = html.replace('\n' * 2, '\n')
+ html = ''.join([x.strip() for x in html.splitlines(True)])
for i, url in enumerate(url_index):
if i == 0: