Exemplo n.º 1
0
def replace(text):
    kwp = KeywordProcessor()
    kwp.non_word_boundaries = set()
    kwp.add_keywords_from_dict(
        {" {} ".format(v): [k]
         for k, v in UNICODE_EMOJI.items()})

    clean_text = kwp.replace_keywords(text).strip()

    return clean_text
Exemplo n.º 2
0
def load_stopwords_processor(stopwords_file):
  pt_chars = set(list('áãâéêíóõôúç'))
  kp = KeywordProcessor()
  kp.non_word_boundaries = kp.non_word_boundaries | pt_chars
  stopwords = [n.strip() for n in open(stopwords_file)]
  for s in stopwords: kp.add_keyword(s, ' ')
  for s in nltk.corpus.stopwords.words('portuguese'): kp.add_keyword(s, ' ')
    
  def transform(txt):
    return " ".join(kp.replace_keywords(txt).split())

  kp.transform = transform
  return kp