def validate( conf, coordinator, model, criterion, metrics, data_loader, label="test_loader", display=True, ): """A function for model evaluation.""" if data_loader is None: return None # switch to evaluation mode. model.eval() # place the model to the device. if conf.graph.on_cuda: model = model.cuda() # evaluate on test_loader. tracker_te = RuntimeTracker(metrics_to_track=metrics.metric_names) for _input, _target in data_loader: # load data and check performance. data_batch = create_dataset.load_data_batch( conf, _input, _target, is_training=False ) with torch.no_grad(): inference( conf, model, criterion, metrics, data_batch, tracker_te, is_training=False, ) # place back model to the cpu. if conf.graph.on_cuda: model = model.cpu() # display the test stat. perf = tracker_te() if label is not None: display_test_stat(conf, coordinator, tracker_te, label) if display: conf.logger.log(f"The validation performance = {perf}.") return perf
def _evaluate(_model, label): # define stat. tracker_te = RuntimeTracker(metrics_to_track=metrics.metric_names) # switch to evaluation mode _model.eval() # define hidden state for RNN. _hidden = ( model.module.init_hidden(conf.batch_size) if "DataParallel" == model.__class__.__name__ else model.init_hidden(conf.batch_size) ) for batch in data_loader["val_loader"]: # load data and check performance. _input, _target = batch.text, batch.target # repackage the hidden. _hidden = ( model.module.repackage_hidden(_hidden) if "DataParallel" == model.__class__.__name__ else model.repackage_hidden(_hidden) ) with torch.no_grad(): _, _hidden = inference( conf, _model, criterion, metrics, _input, _target, _hidden, tracker_te, ) # display the test stat. display_test_stat(conf, scheduler, tracker_te, label) # get global (mean) performance global_performance = tracker_te.evaluate_global_metrics() return global_performance
def _evaluate(_model, label): # define stat. tracker_te = RuntimeTracker(metrics_to_track=metrics.metric_names) # switch to evaluation mode _model.eval() for _input, _target in data_loader["val_loader"]: # load data and check performance. _input, _target = load_data_batch(conf, _input, _target) with torch.no_grad(): inference(_model, criterion, metrics, _input, _target, tracker_te) # display the test stat. display_test_stat(conf, scheduler, tracker_te, label) # get global (mean) performance global_performance = tracker_te.evaluate_global_metrics() return global_performance
def ensembled_validate( conf, coordinator, models, criterion, metrics, data_loader, label="test_loader", ensemble_scheme=None, ): """A function for model evaluation.""" if data_loader is None: return None # switch to evaluation mode. for model in models: model.eval() # place the model to the device. if conf.graph.on_cuda: model = model.cuda() # evaluate on test_loader. tracker_te = RuntimeTracker(metrics_to_track=metrics.metric_names) for _input, _target in data_loader: # load data and check performance. data_batch = create_dataset.load_data_batch( conf, _input, _target, is_training=False ) with torch.no_grad(): # ensemble. if ( ensemble_scheme is None or ensemble_scheme == "avg_losses" or ensemble_scheme == "avg_logits" ): outputs = [] for model in models: outputs.append(model(data_batch["input"])) output = sum(outputs) / len(outputs) elif ensemble_scheme == "avg_probs": outputs = [] for model in models: outputs.append(F.softmax(model(data_batch["input"]))) output = sum(outputs) / len(outputs) # eval the performance. loss = torch.FloatTensor([0]) performance = metrics.evaluate(loss, output, data_batch["target"]) # update the tracker. tracker_te.update_metrics( [loss.item()] + performance, n_samples=data_batch["input"].size(0) ) # place back model to the cpu. for model in models: if conf.graph.on_cuda: model = model.cpu() # display the test stat. if label is not None: display_test_stat(conf, coordinator, tracker_te, label) perf = tracker_te() conf.logger.log(f"The performance of the ensenmbled model: {perf}.") return perf