def __init__(self): self.MODULE_PATH = pales_index() self.HEADERS = Config.values()['headers'] dataset_path = os.path.normpath(pales_index() + os.path.normcase("/data/dataset.csv")) dataset = pandas.read_csv(dataset_path, names=self.HEADERS) array = dataset.values self.X = array[:, 0:(len(self.HEADERS) - 1)] self.Y = array[:, (len(self.HEADERS) - 1)] self.validation_size = 0.20
def translate_to_keras(url: str) -> numpy.ndarray: info = dict() for head in Config.values()['headers']: info[head] = 0 info.pop('result', None) data = score(url) for key in data['comparators'].keys(): info[key] = data['comparators'][key] for key in data['semantic'].keys(): info[key] = data['semantic'][key] return numpy.array([[v for k, v in info.items()]])
def getCache(url: str) -> Union[dict, bool]: hashed = hashlib.sha224(str(url).encode('utf-8')).hexdigest() cache = os.path.normpath(jano_index() + os.path.normcase("/cache/{0}.jcache".format(hashed))) if os.path.isfile(cache): time = pendulum.from_timestamp(os.path.getmtime(cache)) now = pendulum.now(Config.values()['timezone']) if now.diff(time).in_days() >= 2: os.remove(cache) return False return pickle.load(open(cache, "rb")) else: return False
def polaridade(texto: str) -> dict: """ Verifica a polaridade de um texto (sentimentos bons/ruins) :param texto: Texto a ser analisado :return: Polaridade em número """ resultado = dict(good=0, bad=0) if len(texto) == 0: return dict() polyglot_text = Text(filter_stopwords(texto), hint_language_code=Config().values()['language'][:2]) for w in polyglot_text.words: pol = w.polarity if pol < 0: resultado['bad'] += 1 elif pol > 0: resultado['good'] += 1 resultado['bad'] = round(resultado['bad'] / len(polyglot_text.words), 3) resultado['good'] = round(resultado['good'] / len(polyglot_text.words), 3) return resultado
def gramatica(texto: str) -> dict: """ Analisa e conta cada token de um texto no formato explicado aqui: http://polyglot.readthedocs.io/en/latest/POS.html :param texto: Texto a ser analisado :return: Dicionário com as tags presentes e a quantia delas """ resposta = dict() if len(texto) == 0: return dict() polyglot_text = Text(texto, hint_language_code=Config().values()['language'][:2]) for word, tag in polyglot_text.pos_tags: if tag in resposta.keys(): resposta[tag] += 1 else: resposta[tag] = 1 # Porcentagem total = sum(resposta.values()) for tag in resposta.keys(): resposta[tag] = round(resposta[tag] / total, 3) return resposta
def __init__(self): self.MODULE_PATH = pales_index() self.HEADERS = Config.values()['headers'] dataset_path = os.path.normpath(pales_index() + os.path.normcase("/data/dataset.csv")) dataset = pandas.read_csv(dataset_path, names=self.HEADERS) array = dataset.values self.X = array[:, 0:(len(self.HEADERS) - 1)] self.Y = array[:, (len(self.HEADERS) - 1)] self.p = {'lr': (0.5, 5, 8), 'first_neuron': [8, 16, 32, 64], 'hidden_layers': [2, 3, 4, 5], 'batch_size': (1, 5, 5), 'epochs': [50, 100, 150], 'dropout': (0, 0.2, 0.5), 'weight_regulizer': [None], 'emb_output_dims': [None], 'shape': ['brick', 'long_funnel'], 'kernel_initializer': ['uniform', 'normal'], 'optimizer': [Adam, Nadam], 'losses': [binary_crossentropy], 'activation': [relu, elu], 'last_activation': [sigmoid]}
def count_words(texto: str) -> int: polyglot_text = Text(filter_stopwords(texto), hint_language_code=Config.values()['language'][:2]) return len(polyglot_text.words)