def main(): options = get_options() options = initialize(options) options = create_dataset(options, train=True) options = create_dataset(options, train=False) model = get_model(options) optimizer = get_optimizer(options, model) scheduler = get_scheduler(options, optimizer) # Criterions are like `torch.nn.CrossEntropyLoss()` criterion = get_criterion(options, model) metrics = get_metrics(options) model = convert_dtype(options.dtype, model) criterion = convert_dtype(options.dtype, criterion) if options.use_cuda: model.cuda() criterion.cuda() options = checkpoint.maybe_resume(options, model, optimizer, scheduler) controlflow = get_controlflow(options) controlflow(model=model, optimizer=optimizer, criterion=criterion, metrics=metrics, scheduler=scheduler, options=options)
def main(): options = get_options() options = initialize(options) options = create_dataset(options, train=True) options = create_dataset(options, train=False) model = get_model(options) optimizer = get_optimizer(options, model) scheduler = get_scheduler(options, optimizer) # Criterions are like `torch.nn.CrossEntropyLoss()` criterion = get_criterion(options) metrics = get_metrics(options) if options.use_cuda: model.cuda() criterion.cuda() options = checkpoint.maybe_resume(options, model, optimizer, scheduler) controlflow = get_controlflow(options) controlflow(model=model, optimizer=optimizer, criterion=criterion, metrics=metrics, scheduler=scheduler, options=options)
def construct_policy_frontier(preds_df, outcomes_df, reward_params, validate=True, test_outcomes_df=None, num_trials=20): # Get names of outcome columns outcome_cols = outcomes_df.drop(columns=['example_id']).columns # Get all reward parameter combinations to be tested param_names = list(reward_params[0].keys()) metric_names = None # Cohorts for which we are going to compute policy performance frontiers cohorts_to_evaluate = ['train', 'val'] if validate else ['train', 'test'] frontiers_dict = {} for cohort in cohorts_to_evaluate: logging.info(f"Evaluating models on {cohort} cohort...") all_stats = [] for trial in range(num_trials): logging.info(f"Calculating metrics for split {trial}") is_train = (cohort == 'train') preds_for_split_df = preds_df[(preds_df['split_ct'] == trial) & (preds_df['is_train'] == is_train)].reset_index(drop=True) for i, combo in enumerate(reward_params): logging.info(f"Evaluating at parameter setting {i} / {len(reward_params)}") preds_for_split_df = preds_for_split_df.rename(columns={ f'predicted_prob_{outcome}': outcome for outcome in outcome_cols })[['example_id'] + list(outcome_cols)] preds_for_split_df['action'] = preds_for_split_df.apply( lambda x: get_policy_for_row(x, combo), axis=1 ) outcomes_to_merge_df = outcomes_df if cohort != 'test' else test_outcomes_df policy_outcomes_df = preds_for_split_df[['example_id', 'action']].merge(outcomes_to_merge_df, on='example_id', how='inner') metrics = get_metrics(policy_outcomes_df) if metric_names is None: metric_names = list(metrics.keys()) curr_reward_param_list = [combo[param_name] for param_name in param_names] stats_for_trial_combo = [metrics[metric_name] for metric_name in metric_names] all_stats.append(curr_reward_param_list + stats_for_trial_combo) all_stats = pd.DataFrame(all_stats, columns=param_names + metric_names) all_stats_means = all_stats.groupby(param_names).mean().reset_index() frontiers_dict[cohort] = all_stats_means return frontiers_dict
def get_stats_for_train_val_preds(train_preds_outcomes, val_preds_outcomes, threshold_setting): outcome_order = ['NIT', 'SXT', 'CIP', 'LVX'] train_preds_outcomes['action'] = train_preds_outcomes.apply( lambda x: get_policy(x, threshold_setting, outcomes=outcome_order), axis=1) val_preds_outcomes['action'] = val_preds_outcomes.apply( lambda x: get_policy(x, threshold_setting, outcomes=outcome_order), axis=1) train_metrics = get_metrics(train_preds_outcomes) val_metrics = get_metrics(val_preds_outcomes) return train_metrics, val_metrics
def import_losses_and_metrics(config): """Import losses and metrics from configuration """ loss_dict = dict([(loss['task_id'], loss['name']) for loss in config['losses']]) losses = custom_losses.get_losses(loss_dict) metric_dict = dict([(metric['task_id'], metric['name']) for metric in config['metrics']]) metrics = custom_metrics.get_metrics(metric_dict) return losses, metrics
def on_epoch_end(self, epoch, logs={}): self.epoch_index += 1 self.losses.append(logs['loss']) self.val_losses.append(logs['val_loss']) loss_line, = self.ax.plot(range(1, self.epoch_index + 1), self.losses, 'g-', label='Training Loss') val_loss_line, = self.ax.plot(range(1, self.epoch_index + 1), self.val_losses, 'r-', label='Validation Loss') self.ax.legend(handles=[loss_line, val_loss_line]) self.ax.set_ylim( (MetricsCallback.GRAPH_MIN, MetricsCallback.GRAPH_MAX)) self.fig.canvas.draw() if logs['val_loss'] < self.best_val_loss: self.val_loss_reductions += 1 self.best_val_loss = logs['val_loss'] self.best_weights = self.model.get_weights() print '\r \r' # to remove the previous line of verbose output of model fit #time.sleep(0.1) info('Found lower val loss for epoch {} => {}'.format( self.epoch_index, round(logs['val_loss'], 5))) if self.val_loss_reductions % MetricsCallback.EPOCHS_BEFORE_VALIDATION == 0: info('Validation Loss Reduced {} times'.format( self.val_loss_reductions)) info('Evaluating on Validation Data') Xv_file, yv_file = get_data_files(self.base_load_directory, self.classifications_type, self.level, 'validation') Xv, yv = get_data(Xv_file, yv_file, mmap=True) yvp = self.model.predict_generator(generator=batch_generator( Xv_file, yv_file, self.batch_size, is_mlp=self.is_mlp, validate=True), max_q_size=QUEUE_SIZE, val_samples=yv.shape[1]) yvp_binary = get_binary_0_5(yvp) info('Generating Validation Metrics') validation_metrics = get_metrics(yv, yvp, yvp_binary) print "****** Validation Metrics: Cov Err: {:.3f} | Top 3: {:.3f} | Top 5: {:.3f} | F1 Micro: {:.3f} | F1 Macro: {:.3f}".format( validation_metrics['coverage_error'], validation_metrics['top_3'], validation_metrics['top_5'], validation_metrics['f1_micro'], validation_metrics['f1_macro']) self.metrics_dict[self.epoch_index] = validation_metrics
def test_detect_metrics(): tps, fns, fps = 0, 0, 0 for img_path in glob(test_folder + "/*.jpg"): json_path = str(img_path)[:-3] + "json" img = cv2.imread(str(img_path)) results = engine.diff_areas(img) predictions = [item['bbox'] for item in results] ground_truths = get_bbox_from_json(read_json_file(json_path)) tp, fn, fp = get_metrics(predictions, ground_truths, iou_thres=0.3) tps += tp fns += fn fps += fp precision = tps / (tps + fps) recall = tps / (tps + fns) f1_score = 2 * precision * recall / (precision + recall) print( f"precision: {precision:.4f}, recall: {recall:.4f}, f1_score: {f1_score:.4f}" )
def run_nb_streams( stream_trained, stream_untrained, model, drift_detector, batch_size=32, print_every=1, device="cpu", ): """ Runs the trained stream to collect the labels, and then runs the untrained stream to detect changes between the models. Args: stream_trained (WOSStream): the Web of Science stream on which the model was trained stream_untrained (WOSStream): the Web of Science stream to be compared against the trained one model (NaiveBayes): the Naive Bayes model to evaluate drift_detector: the drift detector used to detect concept drift batch_size (int): number of batches print_every (int): how often we print device (str): cpu or cuda Returns: a list of accuracies plus, potential warnings or drifts """ i = 0 # Accuracies list (tuples of accuracy, and drift level) trained_accuracies = [] labels = [] print("Running trained stream...") while stream_trained.has_more_samples(): # Get the batch from the stream if stream_trained.n_remaining_samples() >= batch_size: x_, _ = stream_trained.next_sample(batch_size) else: break # Unpack x_ (we do not need the sequence lengths for NB) x = x_[0].numpy() # Take the maximum over the axis 1 x = np.amax(x, axis=1) # Get the predictions and metrics y_pred = model.predict(x) labels.append(y_pred) # Print if necessary if i % print_every == print_every - 1: print("Accuracy: {}".format(1.0)) # Add to drift detector drift_detector.add_element(1 - np.random.uniform(low=0.9, high=1.0)) if drift_detector.detected_warning_zone(): trained_accuracies.append((1.0, "W")) print("Warning zone") elif drift_detector.detected_change(): trained_accuracies.append((1.0, "D")) print("Drift detected") else: trained_accuracies.append((1.0, "N")) i += 1 i = 0 running_acc = 0.0 # Accuracies list (tuples of accuracy, and drift level) untrained_accuracies = [] print("Running untrained stream...") while stream_untrained.has_more_samples(): # Get the batch from the stream if stream_untrained.n_remaining_samples() >= batch_size: x_, _ = stream_untrained.next_sample(batch_size) y = labels[i] else: break # Unpack x_ (we do not need the sequence lengths for NB) x = x_[0].numpy() # Take the maximum over the axis 1 x = np.amax(x, axis=1) # Get the predictions and metrics y_pred = model.predict(x) metrics = get_metrics(labels=y, predictions=y_pred, no_labels=stream_untrained.n_classes) accuracy = metrics["accuracy"] # Print if necessary running_acc += accuracy if i % print_every == print_every - 1: print("Accuracy: {}".format(running_acc / print_every)) running_acc = 0.0 # Add to drift detector drift_detector.add_element(1 - accuracy) if drift_detector.detected_warning_zone(): untrained_accuracies.append((accuracy, "W")) print("Warning zone") elif drift_detector.detected_change(): untrained_accuracies.append((accuracy, "D")) print("Drift detected") else: untrained_accuracies.append((accuracy, "N")) i += 1 return trained_accuracies, untrained_accuracies
# Run python evaluate_model.py [path to array of predictions] [path to test/validation dataframe] import sys from PIL import Image from random import randint import numpy as np import pandas as pd import math import warnings import pdb from matplotlib import pyplot as plt from utils.metrics import get_metrics from utils.data_loader_utils import read_imgs_keraspp, read_imgs_keraspp_stacked if len(sys.argv) != 3: print( "You should run: python evaluate_model.py [path to array of predictions] [path to test dataframe]" ) sys.exit() predictions = np.load(sys.argv[1]) test_df = pd.read_csv(sys.argv[2]) x_true, y_true = read_imgs_keraspp(test_df) y_true = y_true.flatten() y_pred = predictions.flatten() get_metrics(y_true, y_pred, binarized=False)
signature=False, threshold=threshold) for compare in metric_comparisons: metric_df = shuffle_metric_results[compare].assign(permutation=i) all_shuffle_results[compare].append(metric_df) # In[7]: # Get ROC curve information for model sets roc_scores = [] roc_curve_data = [] for split in roc_model_split_focus: results_subset_df = results_df.query("Metadata_model_split == @split") for shuffle in [True, False]: roc_auc_val, roc_df = get_metrics(df=results_subset_df, return_roc_curve=True, shuffle=shuffle) roc_scores.append(pd.Series([roc_auc_val, split, shuffle])) roc_curve_data.append( roc_df.assign(model_split=split, shuffled=shuffle)) roc_scores_df = pd.DataFrame(roc_scores) roc_scores_df.columns = ["roc_auc", "model_split", "shuffled"] roc_curve_data_df = pd.concat(roc_curve_data).reset_index(drop=True) # In[8]: # Output performance results for compare in metric_comparisons: full_results_df = real_metric_results[compare]
def run_stream_lstm( stream, model, drift_detector, batch_size=1, print_every=1, noise_stds=None, warm_start=None, device="cpu", ): """ Runs a stream on the LSTM model using the given drift detector. Args: stream (WOSStream): the Web of Science stream to be run model (LSTM): the LSTM model to evaluate drift_detector: the drift detector used to detect concept drift batch_size (int): number of batches print_every (int): how often we print noise_stds (list): a list of standard deviations for the gradual noise. If none, no noise is added warm_start (int): after which batch we start adding noise. device (str): cpu or cuda Returns: a list of accuracies plus, potential warnings or drifts """ i = 0 running_acc = 0.0 # Accuracies list (tuples of accuracy, and drift level) accuracies = [] while stream.has_more_samples(): # Get the batch from the stream if stream.n_remaining_samples() >= batch_size: x_, y = stream.next_sample(batch_size) else: break x, seq_lens = x_ # Add noise if we have standard deviations if i >= warm_start and noise_stds is not None: print("Adding noise") std = torch.zeros_like(x) + noise_stds[i - warm_start] noise = torch.normal(0, std) x = x + noise # Move the batch to device x = x.to(device) y = torch.from_numpy(y).to(device) seq_lens = torch.tensor(seq_lens).to(device) # Get predictions and accuracy predictions, _ = model((x, seq_lens)) metrics = get_metrics( labels=y, predictions=predictions, no_labels=stream.n_classes ) accuracy = metrics["accuracy"] # Print if necessary running_acc += accuracy if i % print_every == print_every - 1: print("Accuracy: {}".format(running_acc / print_every)) running_acc = 0.0 # Add to drift detector drift_detector.add_element(1 - accuracy) if drift_detector.detected_warning_zone(): accuracies.append((accuracy, "W")) print("Warning zone") elif drift_detector.detected_change(): accuracies.append((accuracy, "D")) print("Drift detected") else: accuracies.append((accuracy, "N")) i += 1 return accuracies
def run_stream_nb( stream, model, drift_detector, batch_size=1, print_every=1, noise_stds=None, warm_start=None, device="cpu", ): """ Runs a stream on the LSTM model using the given drift detector. Args: stream (WOSStream): the Web of Science stream to be run model (NaiveBayes): the Naive Bayes model to evaluate drift_detector: the drift detector used to detect concept drift batch_size (int): number of batches print_every (int): how often we print noise_stds (list): a list of standard deviations for the gradual noise. If none, no noise is added warm_start (int): after which batch we start adding noise. device (str): cpu or cuda Returns: a list of accuracies plus, potential warnings or drifts """ i = 0 running_acc = 0.0 # Accuracies list (tuples of accuracy, and drift level) accuracies = [] while stream.has_more_samples(): # Get the batch from the stream if stream.n_remaining_samples() >= batch_size: x_, y = stream.next_sample(batch_size) else: break # Unpack x_ (we do not need the sequence lengths for NB) x = x_[0].numpy() # Take the maximum over the axis 1 x = np.amax(x, axis=1) # Add noise if we have standard deviations if i >= warm_start and noise_stds is not None: print("Adding noise") noise = np.random.normal(0, noise_stds[i - warm_start], x.shape) x = x + noise # Get the predictions and metrics y_pred = model.predict(x) metrics = get_metrics(labels=y, predictions=y_pred, no_labels=stream.n_classes) accuracy = metrics["accuracy"] # Print if necessary running_acc += accuracy if i % print_every == print_every - 1: print("Accuracy: {}".format(running_acc / print_every)) running_acc = 0.0 # Add to drift detector drift_detector.add_element(1 - accuracy) if drift_detector.detected_warning_zone(): accuracies.append((accuracy, "W")) print("Warning zone") elif drift_detector.detected_change(): accuracies.append((accuracy, "D")) print("Drift detected") else: accuracies.append((accuracy, "N")) i += 1 return accuracies
def _train_epoch(self, epoch): self.model.train() if self.rank == 0: wrt_mode = 'train' y_true = [] y_score = [] y_score_b = [] tic = time.time() self._reset_metrics() tbar = tqdm(self.train_loader, ncols=160) for batch_idx, (img, gt, Sgt, Lgt, mask) in enumerate(tbar): if self.rank == 0: self.data_time.update(time.time() - tic) img = img.to(self.rank, non_blocking=True) gt = gt.to(self.rank, non_blocking=True) mask = mask.to(self.rank, non_blocking=True) Sgt = Sgt.to(self.rank, non_blocking=True) Lgt = Lgt.to(self.rank, non_blocking=True) # LOSS & OPTIMIZE self.optimizer.zero_grad() with torch.cuda.amp.autocast(enabled=True): if self.gt_num == 1: predict = self.model(img) loss = self.loss(predict, gt) elif self.gt_num == 2: s, predict = self.model(img) loss = self.loss(predict, gt, s, Sgt) else: l, s, predict = self.model(img) loss = self.loss(predict, gt, s, Sgt, l, Lgt) self.scaler.scale(loss).backward() self.scaler.step(self.optimizer) self.scaler.update() if self.rank == 0: self.total_loss.update(loss.item()) # measure elapsed time self.batch_time.update(time.time() - tic) tic = time.time() # LOGGING & TENSORBOARD if batch_idx % self.log_step == 0: wrt_step = (epoch - 1) * len(self.train_loader) + batch_idx predict = torch.sigmoid(predict).cpu().detach().numpy().ravel() predict_b = np.where(predict >= 0.5, 1, 0) # predict_b = torch.where(predict >= 0.5, torch.full_like(predict, 1), torch.full_like(predict, 0)) mask = mask.cpu().detach().numpy().ravel() y_true = gt.cpu().detach().numpy().ravel()[mask == 1] y_score = predict[mask == 1] y_score_b = predict_b[mask == 1] # FOR EVAL and INFO if self.rank == 0: self._update_seg_metrics(*eval_metrics(y_true, y_score_b)) metrics = get_metrics(self.tn, self.fp, self.fn, self.tp) tbar.set_description( 'TRAIN ({}) | Loss: {:.4f} | Acc {:.4f} Pre {:.4f} Sen {:.4f} Spe {:.4f} f1 {:.4f} IOU {:.4f} |B {:.2f} D {:.2f} |'.format( epoch, self.total_loss.average, *metrics.values(), self.batch_time.average, self.data_time.average)) # METRICS TO TENSORBOARD if self.rank == 0: metrics = get_metrics_full(self.tn, self.fp, self.fn, self.tp, y_true, y_score, y_score_b) self.writer.add_scalar(f'{wrt_mode}/loss', self.total_loss.average, epoch) for k, v in list(metrics.items())[:-1]: self.writer.add_scalar(f'{wrt_mode}/{k}', v, epoch) for i, opt_group in enumerate(self.optimizer.param_groups): self.writer.add_scalar(f'{wrt_mode}/Learning_rate_{i}', opt_group['lr'], epoch) # self.writer.add_scalar(f'{self.wrt_mode}/Momentum_{k}', opt_group['momentum'], self.wrt_step) self.lr_scheduler.step()
n_jobs=1, #eta0 is the learning rate when we use constant configuration random_state=SVM_SEED, learning_rate='optimal', eta0=0.0, class_weight=SVM_CLASS_WEIGHTS, warm_start=False), n_jobs=1) clf.fit(X, y) # Training Metrics info('Evaluating on Training Data') yp = clf.predict(X) yp_score = clf.decision_function(X) info('Calculating training metrics') training_metrics = get_metrics(y, yp_score, yp) print "** Training Metrics: Cov Err: {:.3f}, Avg Labels: {:.3f}, \n\t\t Top 1: {:.3f}, Top 3: {:.3f}, Top 5: {:.3f}, \n\t\t F1 Micro: {:.3f}, F1 Macro: {:.3f}, Total Pos: {:,d}".format( training_metrics['coverage_error'], training_metrics['average_num_of_labels'], training_metrics['top_1'], training_metrics['top_3'], training_metrics['top_5'], training_metrics['f1_micro'], training_metrics['f1_macro'], training_metrics['total_positive']) # Get the validation data info('Getting Valdiation Data') Xv = pickle.load(open(data_validation_location, 'r')) validation_data_docids = pickle.load(open(data_validation_docids_location, "r")) yv = get_label_data(classifications, validation_data_docids, doc_classification_map)
def run_lstm_streams( stream_trained, stream_untrained, model, drift_detector, batch_size=32, print_every=1, device="cpu", ): """ Runs the trained stream to collect the labels, and then runs the untrained stream to detect changes between the models. Args: stream_trained (WOSStream): the Web of Science stream on which the model was trained stream_untrained (WOSStream): the Web of Science stream to be compared against the trained one model (LSTM): the LSTM model to evaluate drift_detector: the drift detector used to detect concept drift batch_size (int): number of batches print_every (int): how often we print device (str): cpu or cuda Returns: a list of accuracies plus, potential warnings or drifts """ i = 0 # Accuracies list (tuples of accuracy, and drift level) trained_accuracies = [] labels = [] print("Running trained stream...") while stream_trained.has_more_samples(): # Get the batch from the stream if stream_trained.n_remaining_samples() >= batch_size: x_, _ = stream_trained.next_sample(batch_size) else: break x, seq_lens = x_ # Move the batch to device x = x.to(device) seq_lens = torch.tensor(seq_lens).to(device) # Get predictions and add them to labels predictions, _ = model((x, seq_lens)) labels.append(predictions.argmax(dim=1)) # Print if necessary if i % print_every == print_every - 1: print("Accuracy: {}".format(1.0)) # Add to drift detector drift_detector.add_element(1 - np.random.uniform(low=0.9, high=1.0)) if drift_detector.detected_warning_zone(): trained_accuracies.append((1.0, "W")) print("Warning zone") elif drift_detector.detected_change(): trained_accuracies.append((1.0, "D")) print("Drift detected") else: trained_accuracies.append((1.0, "N")) i += 1 i = 0 running_acc = 0.0 # Accuracies list (tuples of accuracy, and drift level) untrained_accuracies = [] print("Running untrained stream...") while stream_untrained.has_more_samples(): # Get the batch from the stream if stream_untrained.n_remaining_samples() >= batch_size: x_, _ = stream_untrained.next_sample(batch_size) y = labels[i] else: break x, seq_lens = x_ # Move the batch to device x = x.to(device) seq_lens = torch.tensor(seq_lens).to(device) # Get predictions and accuracy predictions, _ = model((x, seq_lens)) metrics = get_metrics( labels=y.detach().numpy(), predictions=predictions, no_labels=stream_untrained.n_classes, ) accuracy = metrics["accuracy"] # Print if necessary running_acc += accuracy if i % print_every == print_every - 1: print("Accuracy: {}".format(running_acc / print_every)) running_acc = 0.0 # Add to drift detector drift_detector.add_element(1 - accuracy) if drift_detector.detected_warning_zone(): untrained_accuracies.append((accuracy, "W")) print("Warning zone") elif drift_detector.detected_change(): untrained_accuracies.append((accuracy, "D")) print("Drift detected") else: untrained_accuracies.append((accuracy, "N")) i += 1 return trained_accuracies, untrained_accuracies
def main(): args = parse_arguments() # Model and experiment identification model_id = args.arc experiment_path = os.path.join(args.checkpoint, args.arc) model_path = os.path.join(experiment_path, model_id + '_entire.pt') print("Testing model: " + args.arc + "\n") # Reporter reporter = Reporter(experiment_path, model_id + '_report.json') reporter.load(os.path.join(experiment_path, model_id + '_report.json')) # Augments augments = ['white_noise'] augments = {key: False for key in augments} # Set seed train.set_seed(args.seed) # Read test df _, test_dataset = loader.load_train_partitions( args.partition_path, window_size=int(args.time_window * args.sampling_rate), fs=args.sampling_rate, augments=augments) print("Test data information") print(test_dataset) # Generate data loaders test_loader = DataLoader(test_dataset, shuffle=False, batch_size=args.batch_size, drop_last=False) # Build model model = torch.load(model_path) if ("sgru" in args.arc): net_class = 'rnn' else: net_class = 'cnn' # Select device if (torch.cuda.is_available() and args.cuda): device = torch.device("cuda") model.to(device) print("Running on GPU") else: device = torch.device("cpu") print("Running on CPU") # Loss function criterion = nn.CrossEntropyLoss() # Test model labels, predictions, metrics = train.test_model(model, test_loader, criterion, device, args.batch_size, net_class=net_class) # Save confussión matrix target_names = [] dict_path = os.path.join(args.partition_path, 'classes_index.json') classes_index = json.load(open(dict_path, 'r')) for index in range(len(predictions[0])): target_names.append(classes_index[str(index)]) result = analyzer.get_metrics(labels, predictions, target_names=target_names) analyzer.plot_confusion_matrix(result[1], target_names, os.path.join(experiment_path, model_id + '_confusion.png'), normalize=False) # Store metrics test_metrics = { 'loss': metrics['test_loss'], 'accuracy': metrics['test_accuracy'], 'report': result[0] } reporter.report('test_metrics', test_metrics)
def _valid_epoch(self, epoch): if self.rank == 0: logger.info('\n###### EVALUATION ######') wrt_mode = 'val' self._reset_metrics() val_img = [] y_true = [] y_score = [] y_score_b = [] self.model.eval() tbar = tqdm(self.val_loader, ncols=160) with torch.no_grad(): for batch_idx, (img, gt, Sgt, Lgt, mask) in enumerate(tbar): img = img.to(self.rank, non_blocking=True) gt = gt.to(self.rank, non_blocking=True) mask = mask.to(self.rank, non_blocking=True) Sgt = Sgt.to(self.rank, non_blocking=True) Lgt = Lgt.to(self.rank, non_blocking=True) # LOSS with torch.cuda.amp.autocast(enabled=True): if self.gt_num == 1: predict = self.model(img) loss = self.loss(predict, gt) elif self.gt_num == 2: s, predict = self.model(img) loss = self.loss(predict, gt, s, Sgt) else: l, s, predict = self.model(img) loss = self.loss(predict, gt, s, Sgt, l, Lgt) if self.rank == 0: self.total_loss.update(loss.item()) predict = torch.sigmoid(predict).cpu().detach().numpy() predict_b = np.where(predict >= 0.5, 1, 0) mask = mask.cpu().detach().numpy().ravel() y_true = gt.cpu().detach().numpy().ravel()[mask == 1] y_score = predict.ravel()[mask == 1] y_score_b = predict_b.ravel()[mask == 1] # FOR EVAL and INFO self._update_seg_metrics(*eval_metrics(y_true, y_score_b)) metrics = get_metrics(self.tn, self.fp, self.fn, self.tp) tbar.set_description( 'EVAL ({}) | Loss: {:.4f} | Acc {:.4f} Pre {:.4f} Sen {:.4f} Spe {:.4f} f1 {:.4f} IOU {:.4f} |'.format( epoch, self.total_loss.average, *metrics.values())) # LIST OF IMAGE TO VIZ (15 images) if batch_idx < 10: val_img.extend([img[0].data.cpu(), gt[0].data.cpu(), torch.tensor(predict_b[0])]) if self.rank == 0: val_img = torch.stack(val_img, 0) val_img = make_grid(val_img, nrow=3, padding=2) if self.show is True: plt.figure(figsize=(12, 36)) plt.imshow(transforms.ToPILImage()(val_img.squeeze(0)).convert('L'), cmap='gray') plt.show() # LOGGING & TENSORBOARD wrt_step = epoch metrics = get_metrics_full(self.tn, self.fp, self.fn, self.tp, y_true, y_score, y_score_b) self.writer.add_image(f'{wrt_mode}/inputs_targets_predictions', val_img, wrt_step) self.writer.add_scalar(f'{wrt_mode}/loss', self.total_loss.average, wrt_step) for k, v in list(metrics.items())[:-1]: self.writer.add_scalar(f'{wrt_mode}/{k}', v, wrt_step) log = { 'val_loss': self.total_loss.average, **metrics } return log
max_q_size=QUEUE_SIZE) # using the recorded weights of the best recorded validation loss last_model_weights = model.get_weights() info('Evaluating on Validation Data using saved best weights') model.set_weights(metrics_callback.best_weights) yvp = model.predict_generator(generator=batch_generator(Xv_file, yv_file, NN_BATCH_SIZE, is_mlp=True, validate=True), max_q_size=QUEUE_SIZE, val_samples=len(validation_docs_list)) yvp_binary = get_binary_0_5(yvp) info('Generating Validation Metrics') validation_metrics = get_metrics(yv, yvp, yvp_binary) print "****** Validation Metrics: Cov Err: {:.3f} | Top 3: {:.3f} | Top 5: {:.3f} | F1 Micro: {:.3f} | F1 Macro: {:.3f}".format( validation_metrics['coverage_error'], validation_metrics['top_3'], validation_metrics['top_5'], validation_metrics['f1_micro'], validation_metrics['f1_macro']) best_validation_metrics = validation_metrics time.sleep(0.2) param_results_dict[GLOBAL_VARS.NN_MODEL_NAME] = dict() param_results_dict[GLOBAL_VARS.NN_MODEL_NAME][ 'best_validation_metrics'] = best_validation_metrics param_results_dict[GLOBAL_VARS.NN_MODEL_NAME]['epochs'] = len( history.history['val_loss']) param_results_dict[GLOBAL_VARS.NN_MODEL_NAME][ 'best_weights'] = metrics_callback.best_weights param_results_dict[GLOBAL_VARS.NN_MODEL_NAME][
def evaluate_model(model_dicts_list, train_val_splits, outcomes_df, reward_params, test_outcomes_df=None, include_defer=False): all_stats = defaultdict(list) reward_param_names, metric_names = sorted(reward_params[0].keys()), None param_settings_array = np.array( [[param_setting[param_name] for param_name in reward_param_names] for param_setting in reward_params]) for train_val_split, model_dict in zip(train_val_splits, model_dicts_list): for cohort_name, cohort_df in train_val_split.items(): stats_for_param = [] for param_setting in reward_params: logging.info("Storing primary outcomes...") # Compute metrics of interest here current_model = model_dict[tuple(param_setting.items())] cohort_actions_df = current_model.get_actions(cohort_df) if cohort_name == 'test': cohort_actions_outcomes_df = cohort_actions_df.merge( test_outcomes_df, on='example_id') else: cohort_actions_outcomes_df = cohort_actions_df.merge( outcomes_df, on='example_id') metrics = get_metrics(cohort_actions_outcomes_df) if not ( include_defer) else get_metrics_with_deferral( cohort_actions_outcomes_df) if metric_names is None: metric_names = list(metrics.keys()) stats_for_param.append( [metrics[name] for name in metric_names]) all_stats[cohort_name].append(np.array(stats_for_param)) columns = reward_param_names + metric_names + [ f'{metric}_stdev' for metric in metric_names ] stats_dict_final = {} for cohort_name, stats_for_cohort in all_stats.items(): stats_means = np.array(stats_for_cohort).mean(axis=0) stats_stdevs = np.array(stats_for_cohort).std(axis=0) stats_final = np.hstack( [np.array(param_settings_array), stats_means, stats_stdevs]) logging.info("Completed calculating means") stats_dict_final[cohort_name] = pd.DataFrame(stats_final, columns=columns) return stats_dict_final
def run_stream_with_mapping( stream, model, mapping, batch_size=1, print_every=1, device="cpu", ): """ Runs a stream with a mapping to convert from the stream's inputs embeddings space to the embedding space outputted by the mapping. Args: stream (WOSStream): the Web of Science stream to be run model (LSTM): the LSTM model to evaluate mapping (Mapping): the mapping used to change embedding spaces batch_size (int): number of batches print_every (int): how often we print device (str): cpu or cuda Returns: a list of accuracies """ # Initialize variables for tracking i = 0 running_acc = 0.0 accuracies = [] if type(mapping) == np.ndarray: mapping = torch.tensor(mapping.mapping, dtype=torch.float) else: mapping = mapping.mapping mapping.eval() # Run stream while stream.has_more_samples(): # Get the batch from the stream if stream.n_remaining_samples() >= batch_size: x_, y = stream.next_sample(batch_size) else: break x, seq_lens = x_ # Put in the mapping to transform to the other embedding space if type(mapping) == torch.tensor: x = x.matmul(mapping.T).to(device) else: with torch.no_grad(): x = mapping(x) y = torch.from_numpy(y).to(device) seq_lens = torch.tensor(seq_lens).to(device) # Get predictions and accuracy predictions, _ = model((x, seq_lens)) metrics = get_metrics( labels=y, predictions=predictions, no_labels=stream.n_classes ) accuracy = metrics["accuracy"] # Print if necessary running_acc += accuracy if i % print_every == print_every - 1: print("Accuracy: {}".format(running_acc / print_every)) accuracies.append(running_acc / print_every) running_acc = 0.0 i += 1 return accuracies
def train_nb_wos_holdout( epochs=1, batch_size=utils.BATCH_SIZE, transform=True, transformer_model=TransformerModel.BERT, print_every=10, device="cpu", ): """ Trains the Naive Bayes model on the Web of Science dataset. Args: epochs (int): number of times the stream is run batch_size (int): the batch size transform (bool): transform the dataset or not transformer_model (TransformerModel): the transformer model to use print_every (int): print stats parameter device (string): the device to run the training on (cpu or gpu) """ # Prepare the stream stream = WOSStream(transformer_model=transformer_model, transform=transform, device=device) stream.prepare_for_use() # Define model model = GaussianNB() model_name = "naive-bayes-wos-{}-ver-{}-holdout".format( transformer_model.name, stream.version) model_path = os.path.join(PATH, model_name) os.makedirs(model_path, exist_ok=True) all_labels = np.arange(stream.n_classes) print("Starting training...") train_accuracies, test_metrics_list = [], [] for epoch in range(epochs): # Initialize the running loss and accuracy running_accuracy = 0.0 # Start iterating over the dataset i = 0 while stream.has_more_samples(): # Get the batch from the stream if stream.n_remaining_samples() >= batch_size: x_, y = stream.next_sample(batch_size) else: break # Unpack x_ (we do not need the sequence lengths for NB) x = x_[0].numpy() # Take the maximum over the axis 1 x = np.amax(x, axis=1) # Partial fit the model model.partial_fit(x, y, classes=all_labels) # Update running accuracy running_accuracy += accuracy_score(y, model.predict(x)) # Print statistics if i % print_every == print_every - 1: # Evaluate the model on the test set x_test_, y_test = stream.get_test_set() x_test = x_test_[0].numpy() x_test = np.amax(x_test, axis=1) y_pred = model.predict(x_test) test_metrics = get_metrics(y_pred, y_test, no_labels=stream.n_classes) accuracy = running_accuracy / print_every # Print every 10 batches print("[{}/{} epochs, {}/{} batches] train accuracy: {:.4f}, " "test (accuracy: {:.4f}, precision: {:.4f}, " "recall: {:.4f}, f1: {:.4f})".format( epoch + 1, epochs, i + 1, stream.n_samples // batch_size + 1, accuracy, test_metrics["accuracy"], test_metrics["precision"], test_metrics["recall"], test_metrics["macro_f1"], )) train_accuracies.append(accuracy) test_metrics_list.append(test_metrics) running_accuracy = 0 # Increment i i += 1 stream.restart() # Save model print("Finished training. Saving model..") dump(model, os.path.join(model_path, "model.joblib")) print("Done!") return train_accuracies, test_metrics_list