示例#1
0
def complex_processing(text):
    text = tp.unicode_normalize(text)
    text = tp.lowercasing(text)
    text = tp.general_regex(text)
    text = remove_referenced_name(text)
    text = tp.get_decontracted_form(text)
    text = tp.keep_alpha_space(text)
    text = tp.remove_repeating_chars(text)
    text = tp.remove_stopwords(text)
    if text != "":
        text = tp.perform_lemmatization(text)
    return text
示例#2
0
def simple_processing(text):
    text = tp.unicode_normalize(text)
    text = tp.general_regex(text)
    text = remove_referenced_name(text)
    return text
 def test_unicode_normalize(self):
     assert type(tp.unicode_normalize("word word")) == str