def get_cross_validation_results(self, param_dict, print_per_fold=False): all_acc = [] all_auc = [] all_f1 = [] all_precision = [] all_recall = [] for f in range(self.num_cross_folds): train_tasks, val_tasks = helper.loadCrossValData( self.datasets_path, self.file_prefix, f, fix_y=True) preds, true_y = self.get_preds_true_for_task( train_tasks, val_tasks, param_dict) if preds is None or true_y is None: continue acc, auc, f1, precision, recall = helper.computeAllMetricsForPreds( preds, true_y) all_acc.append(acc) all_auc.append(auc) all_f1.append(f1) all_precision.append(precision) all_recall.append(recall) if print_per_fold: print "Fold", f, "acc", acc, "auc", auc, "f1", f1, "precision", precision, "recall", recall if print_per_fold: print "accs for all folds", all_acc print "aucs for all folds", all_auc # Add results to the dictionary param_dict['val_acc'] = np.nanmean(all_acc) param_dict['val_auc'] = np.nanmean(all_auc) param_dict['val_f1'] = np.nanmean(all_f1) param_dict['val_precision'] = np.nanmean(all_precision) param_dict['val_recall'] = np.nanmean(all_recall) return param_dict
def getCrossValidationResults(self, results_dict, C, beta, kernel, v, regularizer, save_plots=False, print_per_fold=True): all_acc = [] all_auc = [] all_f1 = [] all_precision = [] all_recall = [] if not self.users_as_tasks: per_task_accs = [[] for i in range(self.n_tasks)] per_task_aucs = [[] for i in range(self.n_tasks)] per_task_f1 = [[] for i in range(self.n_tasks)] per_task_precision = [[] for i in range(self.n_tasks)] per_task_recall = [[] for i in range(self.n_tasks)] for f in range(self.num_cross_folds): train_tasks, val_tasks = helper.loadCrossValData( self.datasets_path, self.file_prefix, f, reshape=False, fix_y=True) converged = self.initializeAndTrainMTMKL(train_tasks, C, beta, kernel, v, regularizer) if not converged: all_acc.append(np.nan) all_auc.append(np.nan) all_f1.append(np.nan) all_precision.append(np.nan) all_recall.append(np.nan) continue # Get results! fold_preds = [] fold_true_y = [] for t in range(self.n_tasks): preds = self.classifier.predictOneTask(val_tasks, t) true_y = list(val_tasks[t]['Y'].flatten()) if not self.users_as_tasks: # save the per-task results t_acc, t_auc, t_f1, t_precision, t_recall = helper.computeAllMetricsForPreds( preds, true_y) per_task_accs[t].append(t_acc) per_task_aucs[t].append(t_auc) per_task_f1[t].append(t_f1) per_task_precision[t].append(t_precision) per_task_recall[t].append(t_recall) if print_per_fold: print "Fold", f, "Task", val_tasks[t][ 'Name'], "acc", t_acc, "auc", t_auc, "f1", t_f1, "precision", t_precision, "recall", t_recall fold_preds.extend(preds) fold_true_y.extend(true_y) acc, auc, f1, precision, recall = helper.computeAllMetricsForPreds( fold_preds, fold_true_y) all_acc.append(acc) all_auc.append(auc) all_f1.append(f1) all_precision.append(precision) all_recall.append(recall) if print_per_fold: print "Fold", f, "acc", acc, "auc", auc, "f1", f1, "precision", precision, "recall", recall print "accs for all folds", all_acc print "aucs for all folds", all_auc # Add results to the dictionary results_dict['val_acc'] = np.nanmean(all_acc) results_dict['val_auc'] = np.nanmean(all_auc) results_dict['val_f1'] = np.nanmean(all_f1) results_dict['val_precision'] = np.nanmean(all_precision) results_dict['val_recall'] = np.nanmean(all_recall) # Add per-task results to the dictionary if not self.users_as_tasks: for t in range(self.n_tasks): task_name = val_tasks[t]['Name'] results_dict[ 'TaskAcc-' + helper.getFriendlyLabelName(task_name)] = np.nanmean( per_task_accs[t]) results_dict[ 'TaskAuc-' + helper.getFriendlyLabelName(task_name)] = np.nanmean( per_task_aucs[t]) results_dict[ 'TaskF1-' + helper.getFriendlyLabelName(task_name)] = np.nanmean( per_task_f1[t]) results_dict[ 'TaskPrecision-' + helper.getFriendlyLabelName(task_name)] = np.nanmean( per_task_precision[t]) results_dict[ 'TaskRecall-' + helper.getFriendlyLabelName(task_name)] = np.nanmean( per_task_recall[t]) return results_dict
def getCrossValidationResults(self, results_dict, tau10, tau20, sigma_mult, mu_mult, save_plots=False, print_per_fold=False): if save_plots: same_task_matrix = np.zeros((self.n_tasks, self.n_tasks)) clusters = [0] * self.num_cross_folds all_acc = [] all_auc = [] all_f1 = [] all_precision = [] all_recall = [] if not self.users_as_tasks: per_task_accs = [[] for i in range(self.n_tasks)] per_task_aucs = [[] for i in range(self.n_tasks)] per_task_f1 = [[] for i in range(self.n_tasks)] per_task_precision = [[] for i in range(self.n_tasks)] per_task_recall = [[] for i in range(self.n_tasks)] for f in range(self.num_cross_folds): train_tasks, val_tasks = helper.loadCrossValData( self.datasets_path, self.file_prefix, f, reshape=True) self.initializeHBLRModel(train_tasks) self.setClassifierToSetting(tau10, tau20, sigma_mult, mu_mult) self.classifier.trainUntilConverged() clusters[f] = self.classifier.K if save_plots: same_task_matrix = self.updateSameTaskMatrix(same_task_matrix) # Get results! fold_preds = [] fold_true_y = [] for t in range(self.n_tasks): preds = self.classifier.predictBinary(val_tasks[t]['X'], t) true_y = list(val_tasks[t]['Y'].flatten()) if len(preds) == 0 or len(true_y) == 0: continue if not self.users_as_tasks: # save the per-task results t_acc, t_auc, t_f1, t_precision, t_recall = helper.computeAllMetricsForPreds( preds, true_y) per_task_accs[t].append(t_acc) per_task_aucs[t].append(t_auc) per_task_f1[t].append(t_f1) per_task_precision[t].append(t_precision) per_task_recall[t].append(t_recall) if print_per_fold: print "Fold", f, "Task", val_tasks[t][ 'Name'], "acc", t_acc, "auc", t_auc, "f1", t_f1, "precision", t_precision, "recall", t_recall fold_preds.extend(preds) fold_true_y.extend(true_y) acc, auc, f1, precision, recall = helper.computeAllMetricsForPreds( fold_preds, fold_true_y) all_acc.append(acc) all_auc.append(auc) all_f1.append(f1) all_precision.append(precision) all_recall.append(recall) if print_per_fold: print "Fold", f, "acc", acc, "auc", auc, "f1", f1, "precision", precision, "recall", recall print "accs for all folds", all_acc print "aucs for all folds", all_auc print "clusters for all folds", clusters if save_plots: self.plotAccuracyAucAndClusters(all_acc, all_auc, clusters) self.saveHintonPlot(same_task_matrix, self.num_cross_folds) pd.DataFrame(same_task_matrix).to_csv(self.results_path + self.save_prefix + "-same_task_matrix.csv") # Add results to the dictionary results_dict['val_acc'] = np.nanmean(all_acc) results_dict['val_auc'] = np.nanmean(all_auc) results_dict['val_f1'] = np.nanmean(all_f1) results_dict['val_precision'] = np.nanmean(all_precision) results_dict['val_recall'] = np.nanmean(all_recall) results_dict['num_clusters'] = np.nanmean(clusters) # Add per-task results to the dictionary if not self.users_as_tasks: for t in range(self.n_tasks): task_name = val_tasks[t]['Name'] results_dict[ 'TaskAcc-' + helper.getFriendlyLabelName(task_name)] = np.nanmean( per_task_accs[t]) results_dict[ 'TaskAuc-' + helper.getFriendlyLabelName(task_name)] = np.nanmean( per_task_aucs[t]) results_dict[ 'TaskF1-' + helper.getFriendlyLabelName(task_name)] = np.nanmean( per_task_f1[t]) results_dict[ 'TaskPrecision-' + helper.getFriendlyLabelName(task_name)] = np.nanmean( per_task_precision[t]) results_dict[ 'TaskRecall-' + helper.getFriendlyLabelName(task_name)] = np.nanmean( per_task_recall[t]) return results_dict