def complex_processing(text): text = tp.unicode_normalize(text) text = tp.lowercasing(text) text = tp.general_regex(text) text = remove_referenced_name(text) text = tp.get_decontracted_form(text) text = tp.keep_alpha_space(text) text = tp.remove_repeating_chars(text) text = tp.remove_stopwords(text) if text != "": text = tp.perform_lemmatization(text) return text
def simple_processing(text): text = tp.unicode_normalize(text) text = tp.general_regex(text) text = remove_referenced_name(text) return text
def test_unicode_normalize(self): assert type(tp.unicode_normalize("word word")) == str