def eval_features(probs, labels, data_loader, num_features, split, is_acc): if is_acc: ImageNet_folder_label_dict = misc.load_ImageNet_label_dict() loader_label_folder_dict = { v: k for k, v, in data_loader.dataset.data.class_to_idx.items() } loader_label_holder = labels else: top1, top5 = "N/A", "N/A" m_scores, m_std = calculate_kl_div(probs[:num_features], splits=split) if is_acc: converted_labels = [] for loader_label in loader_label_holder: converted_labels.append(ImageNet_folder_label_dict[ loader_label_folder_dict[loader_label]]) pred = torch.argmax(probs, 1).detach().cpu().numpy() - 1 top1 = top_k_accuracy_score([i + 1 for i in converted_labels], probs[:, 1:1001].detach().cpu().numpy(), k=1) top5 = top_k_accuracy_score([i + 1 for i in converted_labels], probs[:, 1:1001].detach().cpu().numpy(), k=5) return m_scores, m_std, top1, top5
def compute_and_log_metrics(self, y_true: np.ndarray, y_pred: np.ndarray, y_proba: np.ndarray, subset: str): self.log(f"{subset}_macro_f1", f1_score(y_true, y_pred, average="macro", zero_division=0)) # if only two classes, skip top_k accuracy since not enough classes if self.num_classes > 2: for k in DEFAULT_TOP_K: if k < self.num_classes: self.log( f"{subset}_top_{k}_accuracy", top_k_accuracy_score( y_true.argmax( axis=1 ), # top k accuracy only supports single label case y_proba, labels=np.arange(y_proba.shape[1]), k=k, ), ) else: self.log(f"{subset}_accuracy", accuracy_score(y_true, y_pred)) for metric_name, label, metric in compute_species_specific_metrics( y_true, y_pred, self.species): self.log(f"species/{subset}_{metric_name}/{label}", metric)
def accuracy(y_true, y_pred, top_k=2): y_pred = bug_probability(y_pred) y_true = np.argmax(y_true, axis=1) top_k_acc_score = top_k_accuracy_score(y_true, y_pred, k=top_k, labels=np.arange(80)) return top_k_acc_score
def main(): args = parse_arguments() path_to_csv = os.path.join(args.out_dir, 'perf_clf.csv') if args.quantize: suffix = '_q' else: suffix = '' model_name = f'model_{args.model_arch}.pt' embeddings_train = np.load(os.path.join(args.input_dir, f'embeddings_{args.model_arch}_train{suffix}.npy')) embeddings_val = np.load(os.path.join(args.input_dir, f'embeddings_{args.model_arch}_val{suffix}.npy')) X_train = embeddings_train[:, :-1] y_train = embeddings_train[:, -1].astype(np.int64) X_val = embeddings_val[:, :-1] y_val = embeddings_val[:, -1].astype(np.int64) clf = GaussianNaiveBayes() clf.fit(X_train, y_train) y_jll_train = clf.predict_likelihood(X_train) y_jll_val = clf.predict_likelihood(X_val) acc_train = accuracy_score(y_train, y_jll_train.argmax(-1)) acc_val = accuracy_score(y_val, y_jll_val.argmax(-1)) print(model_name) print('\tAccuracy@1: {:.2%}'.format(top_k_accuracy_score(y_val, y_jll_val, k=1))) print('\tAccuracy@5: {:.2%}'.format(top_k_accuracy_score(y_val, y_jll_val, k=5))) print('\tAccuracy@10: {:.2%}'.format(top_k_accuracy_score(y_val, y_jll_val, k=10))) header = '' mode = 'a' if not os.path.exists(path_to_csv): header += 'architecture;train_acc;val_acc' mode += '+' with open(path_to_csv, mode) as csv: if header: csv.write(f'{header}\n') result = f'{args.model_arch}{suffix};{acc_train};{acc_val}' csv.write(f'{result}\n')
def test_top_k_accuracy_evaluator(self): gts = [[0.4, 1.0, 1.0], [0.4666666, 0.7333333, 1.0]] for k_idx, top_k in enumerate([1, 2, 5]): for i, (targets, predictions) in enumerate(zip(self.TARGETS, self.PREDICTIONS)): eval = TopKAccuracyEvaluator(top_k) eval.add_predictions(predictions, targets) top_k_acc = eval.get_report()[f"accuracy_top{top_k}"] import sklearn.metrics as sm if predictions.shape[1] == 2: predictions = predictions[:, 1] self.assertAlmostEqual(sm.top_k_accuracy_score(targets, predictions, k=top_k), top_k_acc) self.assertAlmostEqual(top_k_acc, gts[i][k_idx], places=5)
def multiclass_report(x_train, y_train, x_val, y_val, clf=None, dataset_name=None): """Utility function to score classifier Pass in the classifier if you want to test train, test times etc. """ n_classes = len(set(y_train)) labels = sorted(list(set(y_train))) with Timer() as train_time: clf.fit(x_train, y_train) with Timer() as test_time: y_pred_proba = clf.predict_proba(x_val) y_pred = np.argmax(y_pred_proba, axis=1) results = { 'Train time': train_time.elapsed, 'Test time': test_time.elapsed } results['clf'] = clf.__class__.__name__ results['dataset'] = dataset_name results['Weighted Fscore'] = metrics.f1_score(y_val, y_pred, average='weighted') results['Top-1 score'] = metrics.top_k_accuracy_score(y_val, y_pred_proba, k=1) results['Top-5 score'] = metrics.top_k_accuracy_score( y_val, y_pred_proba, k=5) if n_classes > 5 else None results['n_classes'] = n_classes results['n_train_samples'] = len(x_train) results['n_test_samples'] = len(x_val) return results
def get_metrics(self): all_true = self.true_labels all_scores = self.pred_scores metric_dict = {} for k in [1, 3, 5]: metric_dict[f"Acc@{k}"] = metrics.top_k_accuracy_score( y_true=all_true, y_score=all_scores, k=k, labels=list(range(all_scores.shape[1]))) return metric_dict
def test(self, images, targets, use_aux=False): result = self.net.forward(images) result = self.wrap_result(result, use_aux) loss_record = self.calculate_losses(result, targets, use_aux) # Accuracy counts model_prediction = result['final'].cpu().numpy() targets_np = targets.cpu().numpy() accuracy = { 'top1': top_k_accuracy_score(targets_np, model_prediction, k=1, normalize=False, labels=range(self.n_classes)), 'top5': top_k_accuracy_score(targets_np, model_prediction, k=5, normalize=False, labels=range(self.n_classes)) } return [loss_record, accuracy]
def evaluate_objective(self, data_split, neg_sampling_strategy=None, negative_factor=1): at = [1, 3, 5, 10] count = 0 scores = defaultdict(list) for input_nodes, seeds, blocks in getattr(self, f"{data_split}_loader"): blocks = [blk.to(self.device) for blk in blocks] if self.masker is None: masked = None else: masked = self.masker.get_mask(self.seeds_to_python(seeds)) src_embs = self._graph_embeddings(input_nodes, blocks, masked=masked) node_embs_, element_embs_, labels = self.prepare_for_prediction( src_embs, seeds, self.target_embedding_fn, negative_factor=negative_factor, neg_sampling_strategy=neg_sampling_strategy, train_embeddings=False ) # indices = self.seeds_to_global(seeds).tolist() # labels = self.target_embedder[indices] # labels = torch.LongTensor(labels).to(self.device) acc, loss, logits = self.compute_acc_loss(node_embs_, element_embs_, labels, return_logits=True) y_pred = nn.functional.softmax(logits, dim=-1).to("cpu").numpy() y_true = np.zeros(y_pred.shape) y_true[np.arange(0, y_true.shape[0]), labels.to("cpu").numpy()] = 1. if self.measure_scores: if count % self.dilate_scores == 0: y_true_onehot = np.array(y_true) labels = list(range(y_true_onehot.shape[1])) for k in at: scores[f"ndcg@{k}"].append(ndcg_score(y_true, y_pred, k=k)) scores[f"acc@{k}"].append( top_k_accuracy_score(y_true_onehot.argmax(-1), y_pred, k=k, labels=labels) ) scores["Loss"].append(loss.item()) scores["Accuracy"].append(acc) count += 1 if count == 0: count += 1 scores = {key: sum_scores(val) for key, val in scores.items()} return scores
def validation_step(self, batch, batch_idx): sequence = batch["sequence"] sequence_lengths = batch["sequence_lengths"] target = batch["target"] last_items = batch["last_item"] logits = self.forward(batch) loss = self.criterion(logits, target, sequence_lengths) last_item_predictions = torch.softmax(logits[:, -1], dim=1) accuracies = { f"valid_acc@{k}": top_k_accuracy_score(last_items.detach().cpu().numpy(), last_item_predictions.detach().cpu().numpy(), k=k, labels=np.arange(self.num_items)) for k in [20, 50, 100] } return {"valid_loss": loss, **accuracies}
def _update_metrics(self, prediction, ground_truth): top_k_accuracy = top_k_accuracy_score(ground_truth, prediction, k=self._k) prediction = prediction.argmax(axis=1) accuracy = accuracy_score(ground_truth, prediction) precision, recall, fscore, _ = precision_recall_fscore_support( ground_truth, prediction, average='macro', zero_division=1) self.accuracy = self._compute_moving_average(self.accuracy, accuracy * 100) self.top_k_accuracy = self._compute_moving_average( self.top_k_accuracy, top_k_accuracy * 100) self.precision = self._compute_moving_average(self.precision, precision * 100) self.recall = self._compute_moving_average(self.recall, recall * 100) self.fscore = self._compute_moving_average(self.fscore, fscore * 100)
def test_model(self, test_category_to_docIDs: dict, categories_to_corpus: dict) -> dict: """ Test model performance on test set. Calculates metrics: accuracy, top2, top3, precision, recall and AUC. Parameters ---------- test_category_to_docIDs: dict Category to DocIDs for test data. categories_to_corpus: dict Category to category market matrix corpus. Returns ------- metrics: dict Dictionary with accuracy, top2, top3, precision, recall and auc """ n_batches = 10 metrics = { "accuracy": 0, "top2": 0, "top3": 0, "precision": 0, "recall": 0, "time": 0, } # Load data: X_test_all, y_test_all = self.load_batch_data(categories_to_corpus, test_category_to_docIDs, -1) n_docs_per_batch = int(len(X_test_all) / n_batches) for i in range(n_batches - 1): print(f"Test {i}") X_test = X_test_all[i * n_docs_per_batch:(i + 1) * n_docs_per_batch] y_test = y_test_all[i * n_docs_per_batch:(i + 1) * n_docs_per_batch] # Predict data: y_predicted = self.skmodel.predict(X_test) start = time() y_score = self.skmodel.predict_proba(X_test) end = time() # Calculate time elapsed total_time = end - start # Calculate metrics: test_accuracy = accuracy_score(y_true=y_test, y_pred=y_predicted) test_precision = precision_score(y_true=y_test, y_pred=y_predicted, average="macro") test_recall = recall_score(y_true=y_test, y_pred=y_predicted, average="macro") test_top2 = top_k_accuracy_score(y_true=y_test, y_score=y_score, k=2, labels=np.unique(y_test)) test_top3 = top_k_accuracy_score(y_true=y_test, y_score=y_score, k=3, labels=np.unique(y_test)) # Save and Report metrics: metrics["accuracy"] = (metrics["accuracy"] + test_accuracy) / 2 metrics["top2"] = (metrics["top2"] + test_top2) / 2 metrics["top3"] = (metrics["top3"] + test_top3) / 2 metrics["precision"] = (metrics["precision"] + test_precision) / 2 metrics["recall"] = (metrics["recall"] + test_recall) / 2 metrics["time"] = (metrics["time"] + total_time) / 2 print(metrics) # Delete to save space: del X_test del y_test return metrics
# normalization x_test = x_test.astype('float32')/255 # load model model = load_model('../Models/fruits_keras_model6.h5') # evaluate model on test dataset pred_prob = model.predict(x_test) #pred_class = model.predict_classes(x_test) pred_class = np.argmax(pred_prob, axis=-1) # reduce to 1D array # pred_class = pred_class[:, 0] # print(pred_class) # metrics accuracy = accuracy_score(y_test, pred_class) k_accuracy = top_k_accuracy_score(y_test, pred_prob, k=5) print('accuracy = %.3f' % (accuracy * 100.0), 'top-5 accuracy = %.3f' % (k_accuracy*100)) """precision = precision_score(y_test, pred_class) recall = recall_score(y_test, pred_class)""" report = classification_report(y_test, pred_class) print("Classification Report: ") print(report) """f1 = f1_score(y_test, pred_class,average='macro') print("f1 score: ", f1)""" confusionMatrix = confusion_matrix( y_test, pred_class) # row(true), column(predicted) np.set_printoptions(threshold=sys.maxsize) print("Confusion matrix: ") print(confusionMatrix) np.set_printoptions(threshold=False)
def run(config: dict, holdout: bool, debug: bool) -> None: log("Run with configuration:") log(f"{config}") seed_everything(config["seed"]) with span("Load train and test set:"): train_test_set = load_train_test_set(config) log(f"{train_test_set.shape}") emb_df = pd.read_csv("./data/interim/emb_df.csv") n_emb = emb_df.shape[1] - 1 emb_cols = [str(i) for i in range(n_emb)] emb_df.rename(columns={"city_id": "past_city_id"}, inplace=True) with span("Preprocessing:"): with span("Shift target values for input sequence."): unk_city_id = 0 train_test_set["past_city_id"] = ( train_test_set.groupby("utrip_id")["city_id"].shift(1).fillna( unk_city_id).astype(int)) unk_hotel_country = "UNK" train_test_set["past_hotel_country"] = ( train_test_set.groupby("utrip_id")["hotel_country"].shift( 1).fillna(unk_hotel_country).astype(str)) train_test_set = pd.merge(train_test_set, emb_df, on="past_city_id", how="left") train_test_set[emb_cols] = train_test_set[emb_cols].fillna(0) train_test_set["city_embedding"] = train_test_set[emb_cols].apply( lambda x: list(x), axis=1) with span("Encode of target values."): target_le = preprocessing.LabelEncoder() train_test_set["city_id"] = target_le.fit_transform( train_test_set["city_id"]) train_test_set["past_city_id"] = target_le.transform( train_test_set["past_city_id"]) with span("Add features."): log("Convert data type of checkin and checkout.") train_test_set["checkin"] = pd.to_datetime( train_test_set["checkin"]) train_test_set["checkout"] = pd.to_datetime( train_test_set["checkout"]) log("Create month_checkin feature.") train_test_set["month_checkin"] = train_test_set[ "checkin"].dt.month train_test_set["year_checkin"] = train_test_set["checkin"].dt.year log("Create days_stay feature.") train_test_set["days_stay"] = ( train_test_set["checkout"] - train_test_set["checkin"]).dt.days.apply(lambda x: np.log10(x)) log("Create num_checkin feature.") train_test_set["num_checkin"] = (train_test_set.groupby( "utrip_id")["checkin"].rank().apply(lambda x: np.log10(x))) log("Create days_move feature.") train_test_set["past_checkout"] = train_test_set.groupby( "utrip_id")["checkout"].shift(1) train_test_set["days_move"] = ( (train_test_set["checkin"] - train_test_set["past_checkout"] ).dt.days.fillna(0).apply(lambda x: np.log1p(x))) log("Create aggregation features.") num_visit_drop_duplicates = train_test_set.query("city_id != 0")[[ "user_id", "city_id" ]].drop_duplicates().groupby("city_id").size().apply( lambda x: np.log1p(x)).reset_index() num_visit_drop_duplicates.columns = [ "past_city_id", "num_visit_drop_duplicates" ] num_visit = train_test_set.query("city_id != 0")[[ "user_id", "city_id" ]].groupby("city_id").size().apply( lambda x: np.log1p(x)).reset_index() num_visit.columns = ["past_city_id", "num_visit"] num_visit_same_city = train_test_set[ train_test_set['city_id'] == train_test_set['city_id'].shift( 1)].groupby("city_id").size().apply( lambda x: np.log1p(x)).reset_index() num_visit_same_city.columns = [ "past_city_id", "num_visit_same_city" ] train_test_set = pd.merge(train_test_set, num_visit_drop_duplicates, on="past_city_id", how="left") train_test_set = pd.merge(train_test_set, num_visit, on="past_city_id", how="left") train_test_set = pd.merge(train_test_set, num_visit_same_city, on="past_city_id", how="left") train_test_set["num_visit_drop_duplicates"].fillna(0, inplace=True) train_test_set["num_visit"].fillna(0, inplace=True) train_test_set["num_visit_same_city"].fillna(0, inplace=True) train_test_set["num_stay_consecutively"] = train_test_set.groupby( ["utrip_id", "past_city_id"])["past_city_id"].rank( method="first").fillna(1).apply(lambda x: np.log1p(x)) with span("Encode of categorical values."): cat_le = {} for c in CATEGORICAL_COLS: le = preprocessing.LabelEncoder() train_test_set[c] = le.fit_transform( train_test_set[c].fillna("UNK").astype(str).values) cat_le[c] = le train = train_test_set[train_test_set["row_num"].isnull()] test = train_test_set[~train_test_set["row_num"].isnull()] with span("aggregate features by utrip_id"): x_train, x_test_using_train, x_test = [], [], [] for c in ["city_id", "past_city_id" ] + CATEGORICAL_COLS + NUMERICAL_COLS: x_train.append(train.groupby("utrip_id")[c].apply(list)) x_test.append(test.groupby("utrip_id")[c].apply(list)) x_test_using_train.append( test.groupby("utrip_id")[c].apply(lambda x: list(x)[:-1])) x_train = pd.concat(x_train, axis=1) x_test = pd.concat(x_test, axis=1) x_test_using_train = pd.concat(x_test_using_train, axis=1) with span("sampling training data"): x_train["n_trips"] = x_train["city_id"].map(lambda x: len(x)) x_test_using_train["n_trips"] = x_test_using_train["city_id"].map( lambda x: len(x)) x_train = (x_train.query("n_trips > 2").sort_values( "n_trips").reset_index(drop=True)) x_test_using_train = ( x_test_using_train.sort_values("n_trips").reset_index( drop=True)) x_test = x_test.reset_index(drop=True) log(f"x_train: {x_train.shape}, x_test: {x_test.shape}") if debug: log("'--debug' specified. Shrink data size into 1000.") x_train = x_train.iloc[:1000] x_test = x_test.iloc[:1000] config["params"]["num_epochs"] = 2 log(f"x_train: {x_train.shape}, x_test: {x_test.shape}") with span("Prepare data loader for test:"): test_dataset = Dataset(x_test, is_train=False) test_dataloader = torch.utils.data.DataLoader( test_dataset, batch_size=1, num_workers=os.cpu_count(), pin_memory=True, collate_fn=Collator(is_train=False), shuffle=False, ) with span("Get folds:"): cv = StratifiedKFold( n_splits=config["fold"]["n_splits"], shuffle=config["fold"]["shuffle"], ) folds = cv.split(x_train, pd.cut(x_train["n_trips"], 5, labels=False)) log("Training:") oof_preds = np.zeros((len(x_train), len(target_le.classes_)), dtype=np.float32) test_preds = np.zeros((len(x_test), len(target_le.classes_)), dtype=np.float32) for i_fold, (trn_idx, val_idx) in enumerate(folds): if holdout and i_fold > 0: break with span(f"Fold = {i_fold}"): x_trn = x_train.loc[trn_idx, :] x_val = x_train.loc[val_idx, :] x_trn = pd.concat([x_trn, x_test_using_train], axis=0, ignore_index=True) train_dataset = Dataset(x_trn, is_train=True) valid_dataset = Dataset(x_val, is_train=True) train_dataloader = torch.utils.data.DataLoader( train_dataset, batch_size=config["params"]["bacth_size"], num_workers=os.cpu_count(), pin_memory=True, collate_fn=Collator(is_train=True), shuffle=True, ) valid_dataloader = torch.utils.data.DataLoader( valid_dataset, batch_size=1, num_workers=os.cpu_count(), pin_memory=True, collate_fn=Collator(is_train=True), shuffle=False, ) model_cls = MODELS[config["model_name"]] model = model_cls( n_city_id=len(target_le.classes_), n_booker_country=len(cat_le["booker_country"].classes_), n_device_class=len(cat_le["device_class"].classes_), n_affiliate_id=len(cat_le["affiliate_id"].classes_), n_month_checkin=len(cat_le["month_checkin"].classes_), n_hotel_country=len(cat_le["past_hotel_country"].classes_), emb_dim=config["params"]["emb_dim"], rnn_dim=config["params"]["rnn_dim"], dropout=config["params"]["dropout"], rnn_dropout=config["params"]["rnn_dropout"], ) if i_fold == 0: log(f"{summary(model)}") criterion = FocalLossWithOutOneHot(gamma=0.5) # Prepare optimizer param_optimizer = list(model.named_parameters()) no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"] optimizer_grouped_parameters = [ { "params": [ p for n, p in param_optimizer if not any(nd in n for nd in no_decay) ], "weight_decay": 0.01, }, { "params": [ p for n, p in param_optimizer if any(nd in n for nd in no_decay) ], "weight_decay": 0.0, }, ] optimizer = AdamW( optimizer_grouped_parameters, lr=1e-4, weight_decay=0.01, ) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, T_max=30, eta_min=1e-6) logdir = (Path(config["output_dir_path"]) / config["exp_name"] / f"fold{i_fold}") loaders = {"train": train_dataloader, "valid": valid_dataloader} runner = CustomRunner(device=DEVICE) runner.train( model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, main_metric="accuracy04", minimize_metric=False, logdir=logdir, num_epochs=config["params"]["num_epochs"], verbose=True, ) log("Predictions using validation data") oof_preds[val_idx, :] = np.array( list( map( lambda x: x.cpu().numpy()[-1, :], runner.predict_loader( loader=valid_dataloader, resume=f"{logdir}/checkpoints/best.pth", model=model, ), ))) y_val = x_val["city_id"].map(lambda x: x[-1]).values score = top_k_accuracy_score(y_val, oof_preds[val_idx, :], k=4, labels=np.arange( len(target_le.classes_))) log(f"val acc@4: {score}") np.save( Path(config["output_dir_path"]) / config["exp_name"] / f"y_val_pred_fold{i_fold}", oof_preds[val_idx, :], ) test_preds_ = np.array( list( map( lambda x: x.cpu().numpy()[-1, :], runner.predict_loader( loader=test_dataloader, resume=f"{logdir}/checkpoints/best.pth", model=model, ), ))) test_preds += test_preds_ / cv.n_splits np.save( Path(config["output_dir_path"]) / config["exp_name"] / f"y_test_pred_fold{i_fold}", test_preds_, ) log("Evaluation OOF valies:") y_train = x_train["city_id"].map(lambda x: x[-1]) score = top_k_accuracy_score(y_train, oof_preds, k=4, labels=np.arange(len(target_le.classes_))) log(f"oof acc@4: {score}") log("Save files:") np.save( Path(config["output_dir_path"]) / config["exp_name"] / f"y_oof_pred", oof_preds, ) np.save( Path(config["output_dir_path"]) / config["exp_name"] / f"y_test_pred", test_preds, )
verb_labels = list(verbs_classes.values()) noun_labels = list(nouns_classes.values()) else: verb_labels = list(verbs_categories_classes.values()) noun_labels = list(nouns_categories_classes.values()) # compute and display the metrics print('\nCompute metrics') y_true_verb_labels = np.argmax(y_true_verb, axis=-1) y_pred_verb_labels = np.argmax(y_pred_verb, axis=-1) metrics = { 'verb_top_1_accuracy': [accuracy_score(y_true_verb_labels, y_pred_verb_labels)], 'verb_top_3_accuracy': [ top_k_accuracy_score(y_true_verb_labels, y_pred_verb, k=3, labels=verb_labels) ], 'verb_top_5_accuracy': [ top_k_accuracy_score(y_true_verb_labels, y_pred_verb, k=5, labels=verb_labels) ], 'verb_confusion_matrix': [ confusion_matrix(y_true_verb_labels, y_pred_verb_labels, labels=verb_labels) ] }
def analyse_od(model: str, dataset: str, split: str, pivot_file: TextIO): """ TODO """ if split == "kh": return source_dataset = load_dataset(f"{dataset}.txt") label_indices = get_label_indices(source_dataset) numeric_labels = list(range(len(label_indices))) num_labels = len(numeric_labels) split_name = split if split != "kh" else f"kh-{model}" split_path = f"{dataset}.strat-0.15.{split_name}.splits" holdout_dataset = load_dataset(os.path.join(split_path, "holdout.txt")) schedule_dataset = load_dataset(os.path.join(split_path, "schedule.txt")) y_true = [label_indices[label] for label in holdout_dataset.values()] splitter = TopNSplitter(50) iteration = 0 cumulative_corrections = 0 _, remaining_dataset = splitter(schedule_dataset) while True: holdout_predictions_path = os.path.join( split_path, f"{model}/{iteration}/predictions") if not os.path.exists(holdout_predictions_path): break holdout_predictions = load_rois_predictions(holdout_predictions_path, holdout_dataset, num_labels) y_score = list(holdout_predictions.values()) y_score = [ coerce_incorrect(num_labels, truth, prediction) for truth, prediction in zip(y_true, y_score) ] top_1 = top_k_accuracy_score(y_true, y_score, k=1, labels=numeric_labels, normalize=True) pivot_file.write(",".join( map(str, [ model, dataset, split, iteration, "holdout", "accuracy", top_1 ])) + "\n") update_dataset, remaining_dataset = splitter(remaining_dataset) update_predictions_path = os.path.join( split_path, f"{model}/{iteration}/update_predictions") if os.path.exists(update_predictions_path): update_y_true = [ label_indices[label] for label in update_dataset.values() ] update_predictions = load_rois_predictions(update_predictions_path, update_dataset, num_labels) update_y_score = list(update_predictions.values()) update_y_score = [ coerce_incorrect(num_labels, truth, prediction) for truth, prediction in zip(update_y_true, update_y_score) ] update_top_1 = top_k_accuracy_score(update_y_true, update_y_score, k=1, labels=numeric_labels, normalize=True) pivot_file.write(",".join( map(str, [ model, dataset, split, iteration, "update", "accuracy", update_top_1 ])) + "\n") cumulative_corrections += int((1 - update_top_1) * 50) pivot_file.write(",".join( map(str, [ model, dataset, split, iteration, "update", "cumulative_corrections", cumulative_corrections ])) + "\n") iteration += 1
def train_and_validate(model, train_data_loader, val_data_loader, cfg, experiment=None): # Set visible devices parallel_model = cfg.performance.parallel_mode os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" # Set cuda cuda_available = torch.cuda.is_available() if cuda_available: torch.cuda.empty_cache() device = torch.device('cuda') else: device = torch.device('cpu') model.to(device) # CUDNN Auto-tuner. Use True when input size and model is static torch.backends.cudnn.benchmark = cfg.performance.cuddn_auto_tuner if cfg.training.freeze_lower: for p in model.parameters(): p.requires_grad = False model.Linear_layer.weight.requires_grad = True model.Linear_layer.bias.requires_grad = True # Create Criterion and Optimizer if cfg.optimizer.loss_function == 'hinge': metric_name = 'R2' goal_type = 'regression' # Set loss criterion criterion = HingeLossRegression(cfg.optimizer.loss_epsilon, reduction=None) # Hinge loss is dependent on L2 regularization so we cannot use AdamW optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=cfg.optimizer.learning_rate, weight_decay=cfg.optimizer.weight_decay) elif cfg.optimizer.loss_function == 'mse': metric_name = 'R2' goal_type = 'regression' # Set loss criterion criterion = nn.MSELoss(reduction='none') # Set optimizer optimizer = torch.optim.AdamW(filter(lambda p: p.requires_grad, model.parameters()), lr=cfg.optimizer.learning_rate, weight_decay=cfg.optimizer.weight_decay) elif cfg.optimizer.loss_function == 'cross-entropy': metric_name = 'Accuracy' goal_type = 'classification' # Get counts for each class # Instantiate class counts to 1 instead of 0 to prevent division by zero in case data is missing class_counts = np.array(cfg.model.n_classes*[1]) for i in train_data_loader.dataset.unique_exams['target'].value_counts().index: class_counts[i] = train_data_loader.dataset.unique_exams['target'].value_counts().loc[i] # Calculate the inverse normalized ratio for each class weights = class_counts / class_counts.sum() weights = 1 / weights weights = weights / weights.sum() weights = torch.FloatTensor(weights).cuda() criterion = nn.CrossEntropyLoss(weight=weights, reduction='none') optimizer = torch.optim.AdamW(filter(lambda p: p.requires_grad, model.parameters()), lr=cfg.optimizer.learning_rate, weight_decay=cfg.optimizer.weight_decay) elif cfg.optimizer.loss_function == 'all-threshold': metric_name = 'Accuracy' goal_type = 'ordinal-regression' # Get counts for each class # Instantiate class counts to 1 instead of 0 to prevent division by zero in case data is missing class_counts = np.array(len(train_data_loader.dataset.unique_exams['target'].unique())*[1]) for i in train_data_loader.dataset.unique_exams['target'].value_counts().index: class_counts[i] = train_data_loader.dataset.unique_exams['target'].value_counts().loc[i] # Calculate the inverse normalized ratio for each class weights = class_counts / class_counts.sum() weights = 1 / weights weights = weights / weights.sum() weights = torch.FloatTensor(weights).cuda() criterion = OrdinalRegressionAT(sample_weights=weights, reduction=None) optimizer = torch.optim.AdamW(filter(lambda p: p.requires_grad, model.parameters()), lr=cfg.optimizer.learning_rate, weight_decay=cfg.optimizer.weight_decay) if parallel_model: print("Available GPUS: {}".format(torch.cuda.device_count())) model = nn.DataParallel(model) use_half_prec = cfg.performance.half_precision # Initialize GradScaler for autocasting scaler = GradScaler(enabled=use_half_prec) print('Model parameters: {}'.format(sum(p.numel() for p in model.parameters() if p.requires_grad))) mem_params = sum([param.nelement() * param.element_size() for param in model.parameters()]) mem_buffs = sum([buf.nelement() * buf.element_size() for buf in model.buffers()]) mem = mem_params + mem_buffs # in bytes print('Model memory size: {}'.format(mem)) # Initialize scheduler use_scheduler = cfg.optimizer.use_scheduler if use_scheduler: scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=cfg.optimizer.s_patience, factor=cfg.optimizer.s_factor) #scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=0.01, total_steps=len(train_data_loader)) #scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[5,150,350], gamma=0.1) # Maximum value used for gradient clipping = max fp16/2 gradient_clipping = cfg.performance.gradient_clipping max_norm = cfg.performance.gradient_clipping_max_norm # Set anomaly detection torch.autograd.set_detect_anomaly(cfg.performance.anomaly_detection) # Begin training max_val_metric = -10000 for i in range(cfg.training.epochs): start_time_epoch = time.time() batch_time_t = AverageMeter() data_time_t = AverageMeter() losses_t = AverageMeter() metric_values_t = AverageMeter() batch_time_v = AverageMeter() data_time_v = AverageMeter() losses_v = AverageMeter() metric_values_v = AverageMeter() if goal_type == 'classification': top3_values_v = AverageMeter() top5_values_v = AverageMeter() end_time_t = time.time() # Training model.train() for j, (inputs_t, targets_t, indexes_t, _, _) in enumerate(train_data_loader): # Update timer for data retrieval data_time_t.update(time.time() - end_time_t) # Move input to CUDA if available if cuda_available: if len(inputs_t) > 1: for p, inp in enumerate(inputs_t): if not torch.isfinite(inp).all(): raise ValueError('Input from dataloader not finite') inputs_t[p] = inp.to(device, non_blocking=True) else: if not torch.isfinite(inputs_t).all(): raise ValueError('Input from dataloader not finite') inputs_t = inputs_t.to(device, non_blocking=True) if goal_type == 'classification': targets_t = targets_t.long().squeeze() targets_t = targets_t.to(device, non_blocking=True) # Do forward and backwards pass # Get model train output and train loss with autocast(enabled=use_half_prec): outputs_t = model(inputs_t) if goal_type == 'ordinal-regression': loss_t = criterion(outputs_t, targets_t, model.module.thresholds) else: loss_t = criterion(outputs_t, targets_t) loss_mean_t = loss_t.mean() if cfg.data_loader.weighted_sampler: for index, loss in zip(indexes_t, loss_t.cpu().detach()): loss_ratio = loss/loss_mean_t.cpu().detach() loss_ratio = torch.clamp(loss_ratio, min=0.1, max=3) train_data_loader.sampler.weights[index] = loss_ratio # Zero grads optimizer.zero_grad() # Backwards pass scaler.scale(loss_mean_t).backward() # Gradient Clipping if gradient_clipping: scaler.unscale_(optimizer) torch.nn.utils.clip_grad_value_(model.parameters(), max_norm) # Step and update scaler.step(optimizer) scaler.update() # Calculate and update metrics try: if not torch.isfinite(outputs_t).all(): raise ValueError('Output from model not finite') metric_targets_t = targets_t.cpu().detach().numpy() metric_outputs_t = outputs_t.cpu().detach().numpy() if goal_type == 'regression': metric_t = r2_score(metric_targets_t, metric_outputs_t) elif goal_type == 'classification': predictions_t = np.argmax(metric_outputs_t, 1) metric_t = accuracy_score(metric_targets_t, predictions_t) elif goal_type == 'ordinal-regression': labels_t = get_ORAT_labels(metric_outputs_t, model.module.thresholds) metric_t = accuracy_score(metric_targets_t, labels_t) metric_values_t.update(metric_t) losses_t.update(loss_mean_t) except ValueError as ve: print('Failed to calculate {} with error: {} and output: {}'.format(metric_name, ve, outputs_t)) # Update timer for batch batch_time_t.update(time.time() - end_time_t) if j % 100 == 0: print('Training Batch: [{}/{}] in epoch: {} \t ' 'Training Time: {batch_time.val:.3f} ({batch_time.avg:.3f}) \t ' 'Training Data Time: {data_time.val:.3f} ({data_time.avg:.3f}) \t ' 'Training Loss: {loss.val:.4f} ({loss.avg:.4f}) \t ' 'Training {metric_name} Score: {metric.val:.3f} ({metric.avg:.3f}) \t' .format(j+1, len(train_data_loader), i + 1, batch_time=batch_time_t, data_time=data_time_t, loss=losses_t, metric_name=metric_name, metric=metric_values_t)) # Reset end timer end_time_t = time.time() # End of training epoch prints and updates print('Finished Training Epoch: {} \t ' 'Training Time: {batch_time.avg:.3f} \t ' 'Training Data Time: {data_time.avg:.3f}) \t ' 'Training Loss: {loss.avg:.4f} \t ' 'Training {metric_name} score: {metric.avg:.3f} \t' .format(i+1, batch_time=batch_time_t, data_time=data_time_t, loss=losses_t, metric_name=metric_name, metric=metric_values_t)) if cfg.logging.logging_enabled: log_train_metrics(experiment, losses_t.avg, metric_values_t.avg, optimizer.param_groups[0]['lr']) # Validation # Only run validation every 10 epochs to save training time if i % 10 == 0: end_time_v = time.time() model.eval() if goal_type == 'classification': all_result_v = np.zeros((0, cfg.model.n_classes)) else: all_result_v = np.zeros((0)) all_target_v = np.zeros((0)) all_uids_v = np.zeros((0)) all_loss_v = np.zeros((0)) for k, (inputs_v, targets_v, _, uids_v, _) in enumerate(val_data_loader): # Update timer for data retrieval data_time_v.update(time.time() - end_time_v) # Move input to CUDA if available if cuda_available: if len(inputs_v) > 1: for p, inp in enumerate(inputs_v): inputs_v[p] = inp.to(device, non_blocking=True) else: inputs_v = inputs_v.to(device, non_blocking=True) if goal_type == 'classification': targets_v = targets_v.long().squeeze() targets_v = targets_v.to(device, non_blocking=True) with torch.no_grad(): # Get model validation output and validation loss with autocast(enabled=use_half_prec): outputs_v = model(inputs_v) if goal_type == 'ordinal-regression': loss_v = criterion(outputs_v, targets_v, model.module.thresholds) else: loss_v = criterion(outputs_v, targets_v) loss_mean_v = loss_v.mean() # Update timer for batch batch_time_v.update(time.time() - end_time_v) # Update metrics if cfg.evaluation.use_best_sample: if goal_type == 'classification': all_result_v = np.concatenate((all_result_v, outputs_v.cpu().detach().numpy())) all_target_v = np.concatenate((all_target_v, targets_v.cpu().detach().numpy())) all_loss_v = np.concatenate((all_loss_v, loss_v.cpu().detach().numpy())) else: all_result_v = np.concatenate((all_result_v, outputs_v.squeeze(dim=1).cpu().detach().numpy())) all_target_v = np.concatenate((all_target_v, targets_v.squeeze(dim=1).cpu().detach().numpy())) all_loss_v = np.concatenate((all_loss_v, loss_v.cpu().squeeze(dim=1).detach().numpy())) all_uids_v = np.concatenate((all_uids_v, uids_v)) else: metric_targets_v = targets_v.cpu().detach().numpy() metric_outputs_v = outputs_v.cpu().detach().numpy() if goal_type == 'regression': metric_v = r2_score(metric_targets_v, metric_outputs_v) elif goal_type == 'classification': predictions_v = np.argmax(metric_outputs_v, 1) metric_v = accuracy_score(metric_targets_v, predictions_v) elif goal_type == 'ordinal-regression': labels_v = get_ORAT_labels(metric_outputs_v, model.module.thresholds) metric_v = accuracy_score(metric_targets_v, labels_v) metric_values_v.update(metric_v) losses_v.update(loss_mean_v) if k % 100 == 0: print('Validation Batch: [{}/{}] in epoch: {} \t ' 'Validation Time: {batch_time.val:.3f} ({batch_time.avg:.3f}) \t ' 'Validation Data Time: {data_time.val:.3f} ({data_time.avg:.3f}) \t ' 'Validation Loss: {loss.val:.4f} ({loss.avg:.4f}) \t ' 'Validation {metric_name}: {metric.val:.3f} ({metric.avg:.3f})\t' .format(k + 1, len(val_data_loader), i + 1, batch_time=batch_time_v, data_time=data_time_v, loss=losses_v, metric_name=metric_name, metric=metric_values_v)) end_time_v = time.time() if cfg.evaluation.use_best_sample: # As results are over all possible combinations of views in each examination # each different combination needs to have a weight equal to its ratio. val_data = np.array((all_uids_v, all_loss_v)) val_data = val_data.transpose(1, 0) pd_val_data = pd.DataFrame(val_data, columns=['us_id', 'loss']) pd_val_data['loss'] = pd_val_data['loss'].astype(np.float32) val_ue = pd_val_data.drop_duplicates(subset='us_id')[['us_id', 'loss']] all_mean_loss = [] for ue in val_ue.itertuples(): exam_results = pd_val_data[pd_val_data['us_id'] == ue.us_id] num_combinations = len(exam_results) weight = 1/num_combinations mean_exam_loss = exam_results['loss'].mean() all_mean_loss.append(mean_exam_loss) for indx in exam_results.index: pd_val_data.loc[indx, 'metric_weight'] = weight np_loss = np.array(all_mean_loss, dtype=np.float32) loss_mean_v = np_loss.mean() weights = pd_val_data['metric_weight'].to_numpy() if goal_type == 'regression': metric_v = r2_score(all_target_v, all_result_v, sample_weight=weights) elif goal_type == 'classification': top3_v = top_k_accuracy_score(all_target_v.astype(np.int), all_result_v, k=3, sample_weight=weights) top5_v = top_k_accuracy_score(all_target_v.astype(np.int), all_result_v, k=5, sample_weight=weights) predictions_v = np.argmax(all_result_v, 1) metric_v = accuracy_score(all_target_v.astype(np.int), predictions_v, sample_weight=weights) elif goal_type == 'ordinal-regression': labels_v = get_ORAT_labels(all_result_v, model.module.thresholds) metric_v = accuracy_score(all_target_v.astype(np.int), labels_v, sample_weight=weights) else: loss_mean_v = losses_v.avg metric_v = metric_values_v.avg # End of validation epoch prints and updates print('Finished Validation Epoch: {} \t ' 'Validation Time: {batch_time.avg:.3f} \t ' 'Validation Data Time: {data_time.avg:.3f} \t ' 'Validation Loss: {loss:.4f} \t ' 'Validation {metric_name}: {metric:.3f}\t' .format(i+1, batch_time=batch_time_v, data_time=data_time_v, loss=loss_mean_v, metric_name=metric_name , metric=metric_v)) if goal_type == 'regression': print('Example targets: {} \n Example outputs: {}'.format(torch.squeeze(targets_v), torch.squeeze(outputs_v))) if use_scheduler: scheduler.step(loss_mean_v) if cfg.training.checkpointing_enabled and cfg.logging.logging_enabled: experiment_id = experiment["sys/id"].fetch() if metric_v > max_val_metric: checkpoint_name = cfg.training.checkpoint_save_path + cfg.model.name + '_' + cfg.data.type + '_'\ + cfg.data.name + '_exp_' + experiment_id + '.pth' save_checkpoint(checkpoint_name, model, optimizer) max_val_metric = metric_v elif cfg.training.checkpointing_enabled: if metric_v > max_val_metric: checkpoint_name = cfg.training.checkpoint_save_path + cfg.model.name + '_' + cfg.data.type + '_'\ + cfg.data.name + '_test' + '.pth' save_checkpoint(checkpoint_name, model, optimizer) max_val_metric = metric_v if cfg.logging.logging_enabled: log_val_metrics(experiment, loss_mean_v, metric_v, max_val_metric) epoch_time = time.time() - start_time_epoch rem_epochs = cfg.training.epochs - (i+1) rem_time = rem_epochs * epoch_time print('Epoch {} completed. Time to complete: {}. Estimated remaining time: {}'.format(i+1, epoch_time, format_time(rem_time)))