p_dict = {} r_dict = {} f_dict = {} w_dict = {} files = [f for f in os.listdir(result_dir) if '.csv' in f] for f in files: pattern = re.compile('[0-9]+') percentage = int(pattern.search(f).group()) result = pd.read_csv(result_dir + 'result_' + str(percentage) + '.csv', index_col=0) if how == 'soft': matrix = metrics.confusion_matrix(result) elif how == 'hard': matrix = metrics.hard_matrix(result) matrix.to_csv(path + 'Metricas/' + how + '_matrix_' + str(percentage) + '.csv') w_dict[percentage] = metrics.weighted_f_score(matrix) clases = matrix.columns.tolist() p = [metrics.precision(matrix, c) for c in clases] r = [metrics.recall(matrix, c) for c in clases] f = [metrics.f_score(matrix, c) for c in clases] p_dict[percentage] = p r_dict[percentage] = r f_dict[percentage] = f save_dir = path + 'Metricas/'
] paths = paths[0:10] # Entreno y clasifico con árboles partial_fit = partial(parallel.fit_tree, feature_filter=feature_filter, folds=folds) pool = Pool(processes=n_processes, maxtasksperchild=2) resultados_tree = pool.map(partial_fit, paths) pool.close() pool.join() # Imprimo y guardo resultados obtenidos for i, r in enumerate(resultados_tree): r.to_csv(result_path + 'result_tree_' + str(i) + '.csv') matrix = metrics.hard_matrix(r) print 'Tree ' + str(i) + ' f_score: ' + str( metrics.weighted_f_score(matrix)) # Entreno y clasifico con rf partial_fit = partial(parallel.fit_rf, feature_filter=feature_filter, folds=folds) pool = Pool(processes=n_processes, maxtasksperchild=2) resultados_rf = pool.map(partial_fit, paths) pool.close() pool.join() # Imprimo y guardo resultados obtenidos for i, r in enumerate(resultados_rf): r.to_csv(result_path + 'result_rf_' + str(i) + '.csv')
feature_filter = args.feature_filter paths = [sets_path + catalog + '_sampled_' + str(i) + '.csv' for i in xrange(100)] paths = paths[0:10] # Entreno y clasifico con árboles partial_fit = partial(parallel.fit_tree, feature_filter=feature_filter, folds=folds) pool = Pool(processes=n_processes, maxtasksperchild=2) resultados_tree = pool.map(partial_fit, paths) pool.close() pool.join() # Imprimo y guardo resultados obtenidos for i, r in enumerate(resultados_tree): r.to_csv(result_path + 'result_tree_' + str(i) + '.csv') matrix = metrics.hard_matrix(r) print 'Tree ' + str(i) + ' f_score: ' + str(metrics.weighted_f_score(matrix)) # Entreno y clasifico con rf partial_fit = partial(parallel.fit_rf, feature_filter=feature_filter, folds=folds) pool = Pool(processes=n_processes, maxtasksperchild=2) resultados_rf = pool.map(partial_fit, paths) pool.close() pool.join() # Imprimo y guardo resultados obtenidos for i, r in enumerate(resultados_rf): r.to_csv(result_path + 'result_rf_' + str(i) + '.csv') matrix = metrics.hard_matrix(r) print 'RF ' + str(i) + ' f_score: ' + str(metrics.weighted_f_score(matrix))
valores_accuracy = [] valores_recall = [] valores_fscore = [] x_values = [] x_values_fscore = [] # Para cada porcentaje de confianza for i in xrange(100): # Obtengo las predicciones con una confianza mayor a cierto umbral porcentaje = float(i) / 100 aux = result[result['trust'] > porcentaje] # matrix = metrics.confusion_matrix(aux) matrix = metrics.hard_matrix(aux) # Si la precision es menor que cero, es porque no habian datos que superaran tal nivel de confianza precision = metrics.accuracy(matrix, clase) if precision >= 0: valores_accuracy.append(precision) valores_recall.append(metrics.recall(matrix, clase)) x_values.append(porcentaje) # Si el f_score es menor que cero, es porque no habian datos que superaran tal nivel de confianza f_score = metrics.f_score(matrix, clase) if f_score >= 0: valores_fscore.append(f_score) x_values_fscore.append(porcentaje) #graf(clase, x_values, valores_accuracy, 'Accuracy')
p_dict = {} r_dict = {} f_dict = {} w_dict = {} files = [f for f in os.listdir(result_dir) if '.csv' in f] for f in files: pattern = re.compile('[0-9]+') percentage = int(pattern.search(f).group()) result = pd.read_csv(result_dir + 'result_' + str(percentage) + '.csv', index_col=0) if how == 'soft': matrix = metrics.confusion_matrix(result) elif how == 'hard': matrix = metrics.hard_matrix(result) matrix.to_csv(path + 'Metricas/' + how + '_matrix_' + str(percentage) + '.csv') w_dict[percentage] = metrics.weighted_f_score(matrix) clases = matrix.columns.tolist() p = [metrics.precision(matrix, c) for c in clases] r = [metrics.recall(matrix, c) for c in clases] f = [metrics.f_score(matrix, c) for c in clases] p_dict[percentage] = p r_dict[percentage] = r f_dict[percentage] = f
valores_accuracy = [] valores_recall = [] valores_fscore = [] x_values = [] x_values_fscore = [] # Para cada porcentaje de confianza for i in xrange(100): # Obtengo las predicciones con una confianza mayor a cierto umbral porcentaje = float(i)/100 aux = result[result['trust'] > porcentaje] # matrix = metrics.confusion_matrix(aux) matrix = metrics.hard_matrix(aux) # Si la precision es menor que cero, es porque no habian datos que superaran tal nivel de confianza precision = metrics.accuracy(matrix, clase) if precision >= 0: valores_accuracy.append(precision) valores_recall.append(metrics.recall(matrix, clase)) x_values.append(porcentaje) # Si el f_score es menor que cero, es porque no habian datos que superaran tal nivel de confianza f_score = metrics.f_score(matrix, clase) if f_score >= 0: valores_fscore.append(f_score) x_values_fscore.append(porcentaje) #graf(clase, x_values, valores_accuracy, 'Accuracy')