def inference_epoch_end(self, outputs: EPOCH_OUTPUT, stage: Stage) -> Dict[str, float]: targets_all = aggregate_over_epoch(outputs=outputs, metric="targets") subgroup_inf_all = aggregate_over_epoch(outputs=outputs, metric="subgroup_inf") logits_y_all = aggregate_over_epoch(outputs=outputs, metric="logits_y") preds_y_all = hard_prediction(logits_y_all) dt = em.DataTuple( x=pd.DataFrame( torch.rand_like(subgroup_inf_all).detach().cpu().numpy(), columns=["x0"], ), s=pd.DataFrame(subgroup_inf_all.detach().cpu().numpy(), columns=["s"]), y=pd.DataFrame(targets_all.detach().cpu().numpy(), columns=["y"]), ) return em.run_metrics( predictions=em.Prediction( hard=pd.Series(preds_y_all.detach().cpu().numpy())), actual=dt, metrics=[em.Accuracy(), em.RenyiCorrelation(), em.Yanovich()], per_sens_metrics=[em.Accuracy(), em.ProbPos(), em.TPR()], )
def inference_epoch_end(self, outputs: EPOCH_OUTPUT, stage: Stage) -> Dict[str, float]: targets_all = aggregate_over_epoch(outputs=outputs, metric="targets") subgroup_inf_all = aggregate_over_epoch(outputs=outputs, metric="subgroup_inf") preds_all = aggregate_over_epoch(outputs=outputs, metric="preds") mean_preds = preds_all.mean(-1) mean_preds_s0 = preds_all[subgroup_inf_all == 0].mean(-1) mean_preds_s1 = preds_all[subgroup_inf_all == 1].mean(-1) dt = em.DataTuple( x=pd.DataFrame( torch.rand_like(subgroup_inf_all, dtype=torch.float).detach().cpu().numpy(), columns=["x0"], ), s=pd.DataFrame(subgroup_inf_all.detach().cpu().numpy(), columns=["s"]), y=pd.DataFrame(targets_all.detach().cpu().numpy(), columns=["y"]), ) results_dict = em.run_metrics( predictions=em.Prediction(hard=pd.Series((preds_all > 0).detach().cpu().numpy())), actual=dt, metrics=[em.Accuracy(), em.RenyiCorrelation(), em.Yanovich()], per_sens_metrics=[em.Accuracy(), em.ProbPos(), em.TPR()], ) results_dict.update( { "DP_Gap": float((mean_preds_s0 - mean_preds_s1).abs().item()), "mean_pred": float(mean_preds.item()), } ) return results_dict
def _inference_epoch_end(self, output_results: List[Dict[str, Tensor]], stage: str) -> None: all_y = torch.cat([_r["y"] for _r in output_results], 0) all_s = torch.cat([_r["s"] for _r in output_results], 0) all_preds = torch.cat([_r["preds"] for _r in output_results], 0) dt = em.DataTuple( x=pd.DataFrame(torch.rand_like(all_s, dtype=float).detach().cpu().numpy(), columns=["x0"]), s=pd.DataFrame(all_s.detach().cpu().numpy(), columns=["s"]), y=pd.DataFrame(all_y.detach().cpu().numpy(), columns=["y"]), ) results = em.run_metrics( predictions=em.Prediction( hard=pd.Series(all_preds.detach().cpu().numpy())), actual=dt, metrics=[em.Accuracy(), em.RenyiCorrelation(), em.Yanovich()], per_sens_metrics=[em.Accuracy(), em.ProbPos(), em.TPR()], ) tm_acc = self.val_acc if stage == "val" else self.test_acc acc = tm_acc.compute().item() results_dict = {f"{stage}/acc": acc} results_dict.update( {f"{stage}/{self.target}_{k}": v for k, v in results.items()}) self.log_dict(results_dict)
def _run_epoch( self, model, dataloader, optimize=False, save_activations=False, reweight=False, bit_pretrained=False, adv_metrics=False, ): """Runs the model on a given dataloader. Note: The latter item in the returned tuple is what is necessary to run GEORGECluster.train and GEORGECluster.evaluate. Args: model(nn.Module): A PyTorch model. dataloader(DataLoader): The dataloader. The dataset within must subclass GEORGEDataset. optimize(bool, optional): If True, the model is trained on self.criterion. save_activations(bool, optional): If True, saves the activations in `outputs`. Default is False. bit_pretrained(bool, optional): If True, assumes bit_pretrained and does not evaluate performance metrics Returns: metrics(Dict[str, Any]) A dictionary object that stores the metrics defined in self.config['metric_types']. outputs(Dict[str, Any]) A dictionary object that stores artifacts necessary for model analysis, including labels, activations, and predictions. """ dataset = dataloader.dataset self._check_dataset(dataset) type_to_num_classes = { label_type: dataset.get_num_classes(label_type) for label_type in LABEL_TYPES if label_type in dataset.Y_dict.keys() } outputs = defaultdict(list) activations_handle = self._init_activations_hook(model, outputs["activations"]) if optimize: progress_prefix = "Training" model.train() else: progress_prefix = "Evaluation" model.eval() with tqdm(desc=progress_prefix, total=len(dataloader)) as pbar: for inputs, targets in dataloader: if self.use_cuda: inputs, targets = move_to_device([inputs, targets], device=self.device) type_to_labels = {} for label_type in type_to_num_classes.keys(): type_to_labels[label_type] = targets[label_type] outputs[label_type].append(targets[label_type]) if optimize and not bit_pretrained: logits = model(inputs) loss_targets = targets["superclass"] co = self.criterion(logits, loss_targets, targets["subclass"].long()) loss, (losses, corrects), _ = co self.optimizer.zero_grad() loss.backward() self.optimizer.step() else: with torch.no_grad(): logits = model(inputs) loss_targets = targets["superclass"] co = self.criterion(logits, loss_targets, targets["subclass"].long()) loss, (losses, corrects), _ = co if logits.size(1) == 1: probs = logits.sigmoid().squeeze() preds = probs.round() else: probs = logits.softmax(dim=1) preds = logits.argmax(dim=1) outputs["probs"].append(probs.detach().cpu()) outputs["preds"].append(preds.detach().cpu()) outputs["losses"].append(losses.detach().cpu()) outputs["targets"].append(loss_targets.detach().cpu()) pbar.set_postfix(loss=loss.item(), acc=corrects.float().mean()) pbar.update() if not save_activations: outputs['activations'].pop() # delete activations outputs_cat = {} for key, value in outputs.items(): if value: value = torch.cat(value, dim=0).detach().cpu().numpy() outputs_cat[key] = value del outputs superclass_labels = pd.DataFrame(outputs_cat["superclass"], columns=["superclass"]) subclass_labels = pd.DataFrame(outputs_cat["true_subclass"], columns=["subclass"]) actual = em.DataTuple(x=subclass_labels, s=subclass_labels, y=superclass_labels) predictions = em.Prediction(pd.Series(outputs_cat["preds"])) outputs_cat["metrics"] = compute_metrics( predictions=predictions, actual=actual, s_dim=dataset.get_num_classes("true_subclass") ) if activations_handle: activations_handle.remove() return outputs_cat["metrics"], outputs_cat
def evaluate( cfg: Config, step: int, train_data: "Dataset[Tuple[Tensor, Tensor, Tensor]]", test_data: "Dataset[Tuple[Tensor, Tensor, Tensor]]", name: str, eval_on_recon: bool = True, pred_s: bool = False, save_to_csv: Optional[Path] = None, cluster_test_metrics: Optional[Dict[str, float]] = None, cluster_context_metrics: Optional[Dict[str, float]] = None, ): input_shape = next(iter(train_data))[0].shape additional_entries = {} if cluster_test_metrics is not None: additional_entries.update( {f"Clust/Test {k}": v for k, v in cluster_test_metrics.items()}) if cluster_context_metrics is not None: additional_entries.update({ f"Clust/Context {k}": v for k, v in cluster_context_metrics.items() }) if cfg.data.dataset in (DS.cmnist, DS.celeba, DS.genfaces): train_loader = DataLoader(train_data, batch_size=cfg.fdm.batch_size, shuffle=True, pin_memory=True) test_loader = DataLoader(test_data, batch_size=cfg.fdm.test_batch_size, shuffle=False, pin_memory=True) clf: Classifier = fit_classifier( cfg, input_shape, train_data=train_loader, train_on_recon=eval_on_recon, pred_s=pred_s, test_data=test_loader, ) preds, labels, sens = clf.predict_dataset(test_loader, device=torch.device( cfg.misc._device)) preds = em.Prediction(hard=pd.Series(preds)) if cfg.data.dataset == DS.cmnist: sens_name = "colour" elif cfg.data.dataset == DS.celeba: sens_name = cfg.data.celeba_sens_attr else: sens_name = "sens_Label" sens_pd = pd.DataFrame(sens.numpy().astype(np.float32), columns=[sens_name]) labels_pd = pd.DataFrame(labels, columns=["labels"]) actual = em.DataTuple(x=sens_pd, s=sens_pd, y=sens_pd if pred_s else labels_pd) compute_metrics( cfg, preds, actual, name, "pytorch_classifier", step=step, save_to_csv=save_to_csv, results_csv=cfg.misc.results_csv, use_wandb=cfg.misc.use_wandb, additional_entries=additional_entries, ) else: if not isinstance(train_data, em.DataTuple): train_data, test_data = get_data_tuples(train_data, test_data) train_data, test_data = make_tuple_from_data(train_data, test_data, pred_s=pred_s) for eth_clf in [em.LR(), em.LRCV()]: # , em.LRCV(), em.SVM(kernel="linear")]: preds = eth_clf.run(train_data, test_data) compute_metrics( cfg, preds, test_data, name, eth_clf.name, step=step, save_to_csv=save_to_csv, results_csv=cfg.misc.results_csv, use_wandb=cfg.misc.use_wandb, additional_entries=additional_entries, )