""" cliente = base.iniciar_conexao() con_colecao = base.iniciar_colecao(cliente, "colecao_processada") def load_base(): all_textos = base.carrega_colecao_completo("brumadinhoinflux", "colecao_completa") return all_textos # print('spaCy Version: %s' % (spacy.__version__)) spacy_nlp = spacy.load('pt') nlp = spacy.load("pt_core_news_sm") spacy_stopwords = spacy.lang.pt.stop_words.STOP_WORDS set_stop = stopWords.load_stop_words() # combina as duas bases de stopWords set_stop.union(spacy_stopwords) all_textos = load_base() def to_int_str(data): return str(int(data)) def remover_acentos(txt): return normalize('NFKD', txt).encode('ASCII', 'ignore').decode('ASCII') def common_words(tokens):
""" import ast import re # import base import string from collections import Counter from unicodedata import normalize import emoji import spacy import stopWords.StopWords as stopWords # print('spaCy Version: %s' % (spacy.__version__)) nlp = spacy.load("pt_core_news_sm") spacy_stopwords = spacy.lang.pt.stop_words.STOP_WORDS set_stop = stopWords.load_stop_words() # carrega adjetivos set_adjetivos = stopWords.load_stop_words("adjetivos.txt") # combina as duas bases de stopWords set_stop = set_stop.union(spacy_stopwords) set_stop = set_stop.union(set_adjetivos) def remove_emoji(text): """remove emoji de uma string.""" return emoji.get_emoji_regexp().sub(u'', text) def to_int_str(data): """converte para inteiro.""" return str(int(data))