def cleaner_Both(wikidoc): '''Use WikiExtractor for cleaning Use Parser from hell for links ''' wikidoc.clean_text = WikiExtractor.clean(wikidoc.wiki_text) wp = WikiTextProcessor(wikidoc.wiki_text) wikidoc.meta[WdNames.LINKS] = wp.get_links() return wikidoc
def cleaner_WikiTextProcessor(wikidoc): wp = WikiTextProcessor(wikidoc.wiki_text) wikidoc.clean_text = wp.get_clean_text() #wikidoc.clean_text = wp.get_text_only() wikidoc.meta[WdNames.LINKS] = wp.get_links() return wikidoc