def main_validacao(): """Versão de validação. Treina com arquivos da pasta treinamento e prediz arquivos da pasta validação""" random.seed(SEED) np.random.seed(SEED) #criação dos datasets a partir dos arquivos de áudio X_train, y_train = create_set(PATH_TREINAMENTO) X_val, y_val = create_set(PATH_VALIDACAO) #X_train = normalize(X_train, axis=0, norm='l2') #X_val = normalize(X_val, axis=0, norm='l2') classificador = RandomForestClassifier(n_estimators=75) classificador.fit(X_train, y_train) # classificação final e avaliação dos resultados print("\nAvaliação dos Resultados\n") y_pred = classificador.predict(X_val) print("\nPerformance do Modelo para cada classe de Caractér\n") print(classification_report(y_val, y_pred)) print("\nMatriz de confusão\n") print(labeled_confusion_matrix(y_val, y_pred)) #reorganização dos labels para formar os captchas captcha_test = rearrange(y_val, AUDIOS_POR_ARQUIVO) captcha_pred = rearrange(y_pred, AUDIOS_POR_ARQUIVO) acerto_captchas = accuracy_score(captcha_test, captcha_pred) * 100 print( "\nA acurácia obtida com o modelo para a predição dos captchas no conjunto de validação foi de %2.2f%%.\n" % (acerto_captchas))
import re import os import util # Gets all lines in xls or csv files by given regex and folder folderName = raw_input('Folder: ') filePattern = raw_input('Regex: ') is_csv = filePattern.endswith('.csv') mails = [] read = 1 for file in os.listdir(folderName): if re.match(filePattern, file): if is_csv: with open(folderName + "/" + file) as csvfile: for row in csvfile: mails.extend(re.findall(r'[\w\.-]+@[\w\.-]+', row)) else: book = xlrd.open_workbook(folderName + "/" + file) for sheet in book.sheets(): for row in sheet.get_rows(): for cell in row: if type(cell.value) is unicode: mails.extend(re.findall(r'[\w\.-]+@[\w\.-]+', cell.value)) print('Files read: ' + repr(read)) read += 1 print('Filtering double mails (total mails ' + repr(len(mails)) + ')') util.copy_to_clipboard(sorted(util.create_set(mails, lambda s: s.lower()), key=(str.lower if is_csv else unicode.lower)))
__author__ = 'jens' import util # Remove all double entries and entries that have an extension out of the second list print 'Base set' base = util.create_set(util.raw_multi_line_input(), lambda s: s.lower()) print 'Extensions to remove from base' to_del = util.create_set(util.raw_multi_line_input(), lambda s: s.lower()) for to_del_entry in to_del: base_set = set(base) for entry in base_set: if entry.lower().endswith(to_del_entry.lower()): base.remove(entry) util.copy_to_clipboard(sorted(base, key=str.lower))
__author__ = 'jens' import util # Read multi line input and copies all unique lines to the clipboard # This will also trim all leading and trailing spaces multiLineInput = util.raw_multi_line_input() uniqueSorted = util.trim(sorted(util.create_set(multiLineInput, lambda s: s.lower()), key=str.lower)) util.copy_to_clipboard(uniqueSorted)
__author__ = 'jens' import util # Read multi line input and copies all unique lines to the clipboard # This will also trim all leading and trailing spaces multiLineInput = util.raw_multi_line_input() uniqueSorted = util.trim( sorted(util.create_set(multiLineInput, lambda s: s.lower()), key=str.lower)) util.copy_to_clipboard(uniqueSorted)