示例#1
0
 def __init__(self):
     self.MODULE_PATH = pales_index()
     self.HEADERS = Config.values()['headers']
     dataset_path = os.path.normpath(pales_index() + os.path.normcase("/data/dataset.csv"))
     dataset = pandas.read_csv(dataset_path, names=self.HEADERS)
     array = dataset.values
     self.X = array[:, 0:(len(self.HEADERS) - 1)]
     self.Y = array[:, (len(self.HEADERS) - 1)]
     self.validation_size = 0.20
示例#2
0
def translate_to_keras(url: str) -> numpy.ndarray:
    info = dict()
    for head in Config.values()['headers']:
        info[head] = 0
    info.pop('result', None)
    data = score(url)
    for key in data['comparators'].keys():
        info[key] = data['comparators'][key]
    for key in data['semantic'].keys():
        info[key] = data['semantic'][key]
    return numpy.array([[v for k, v in info.items()]])
示例#3
0
 def getCache(url: str) -> Union[dict, bool]:
     hashed = hashlib.sha224(str(url).encode('utf-8')).hexdigest()
     cache = os.path.normpath(jano_index() + os.path.normcase("/cache/{0}.jcache".format(hashed)))
     if os.path.isfile(cache):
         time = pendulum.from_timestamp(os.path.getmtime(cache))
         now = pendulum.now(Config.values()['timezone'])
         if now.diff(time).in_days() >= 2:
             os.remove(cache)
             return False
         return pickle.load(open(cache, "rb"))
     else:
         return False
示例#4
0
 def polaridade(texto: str) -> dict:
     """
     Verifica a polaridade de um texto (sentimentos bons/ruins)
     :param texto: Texto a ser analisado
     :return: Polaridade em número
     """
     resultado = dict(good=0, bad=0)
     if len(texto) == 0:
         return dict()
     polyglot_text = Text(filter_stopwords(texto), hint_language_code=Config().values()['language'][:2])
     for w in polyglot_text.words:
         pol = w.polarity
         if pol < 0:
             resultado['bad'] += 1
         elif pol > 0:
             resultado['good'] += 1
     resultado['bad'] = round(resultado['bad'] / len(polyglot_text.words), 3)
     resultado['good'] = round(resultado['good'] / len(polyglot_text.words), 3)
     return resultado
示例#5
0
 def gramatica(texto: str) -> dict:
     """
     Analisa e conta cada token de um texto no formato explicado aqui: http://polyglot.readthedocs.io/en/latest/POS.html
     :param texto: Texto a ser analisado
     :return: Dicionário com as tags presentes e a quantia delas
     """
     resposta = dict()
     if len(texto) == 0:
         return dict()
     polyglot_text = Text(texto, hint_language_code=Config().values()['language'][:2])
     for word, tag in polyglot_text.pos_tags:
         if tag in resposta.keys():
             resposta[tag] += 1
         else:
             resposta[tag] = 1
     # Porcentagem
     total = sum(resposta.values())
     for tag in resposta.keys():
         resposta[tag] = round(resposta[tag] / total, 3)
     return resposta
示例#6
0
 def __init__(self):
     self.MODULE_PATH = pales_index()
     self.HEADERS = Config.values()['headers']
     dataset_path = os.path.normpath(pales_index() + os.path.normcase("/data/dataset.csv"))
     dataset = pandas.read_csv(dataset_path, names=self.HEADERS)
     array = dataset.values
     self.X = array[:, 0:(len(self.HEADERS) - 1)]
     self.Y = array[:, (len(self.HEADERS) - 1)]
     self.p = {'lr': (0.5, 5, 8),
               'first_neuron': [8, 16, 32, 64],
               'hidden_layers': [2, 3, 4, 5],
               'batch_size': (1, 5, 5),
               'epochs': [50, 100, 150],
               'dropout': (0, 0.2, 0.5),
               'weight_regulizer': [None],
               'emb_output_dims': [None],
               'shape': ['brick', 'long_funnel'],
               'kernel_initializer': ['uniform', 'normal'],
               'optimizer': [Adam, Nadam],
               'losses': [binary_crossentropy],
               'activation': [relu, elu],
               'last_activation': [sigmoid]}
示例#7
0
def count_words(texto: str) -> int:
    polyglot_text = Text(filter_stopwords(texto),
                         hint_language_code=Config.values()['language'][:2])
    return len(polyglot_text.words)