def process(word): url = _template.format(word=word) html = web.get_page(url, user_agent=True) txt = web.html_to_text(html).decode("utf-8") # txt = ascii.unicode_to_ascii(txt) txt = txt.replace(u"\xb7", "-") txt = ascii.remove_non_ascii(txt).encode("ascii") txt = re.sub("\[.*?.gif\]", "", txt) print_result(txt)
def process(word): url = _template.format(word=word) html = web.get_page(url, user_agent=True) txt = web.html_to_text(html).decode('utf-8') #txt = ascii.unicode_to_ascii(txt) txt = txt.replace(u'\xb7', '-') txt = ascii.remove_non_ascii(txt).encode('ascii') txt = re.sub('\[.*?.gif\]', '', txt) print_result(txt)
def test_remove_non_ascii(): assert ascii.remove_non_ascii('László') == 'Lszl' assert ascii.remove_non_ascii('ünnep') == 'nnep' assert ascii.remove_non_ascii('áéíóöőúüű-ok') == '-ok' assert ascii.remove_non_ascii('ÁÉÍÓÖŐÚÜŰ-ok') == '-ok'