def is_oov(word): return TextNormalizer.normalize_word( word) not in known_words and len(word) > 0 and not word[0].isdigit()
def is_NE(word): nword = TextNormalizer.normalize_word(word) return len(word)>1 and nword not in funcwords\ and nonstop( nword ) and word[0].isupper()
def is_FW(word): nword = TextNormalizer.normalize_word(word) return len(nword) > 0 and not (is_cyrword(nword) or is_digit(nword))