def compute_predictions(model, dataset, batch_size=1, workers=0) -> pd.DataFrame: df = defaultdict(list) for batch in tqdm( DataLoader(dataset, batch_size=batch_size, num_workers=workers, shuffle=False, drop_last=False, pin_memory=True)): batch = any2device(batch, device="cuda") image_ids = batch[INPUT_IMAGE_ID_KEY] df[INPUT_IMAGE_ID_KEY].extend(image_ids) outputs = model(**batch) if OUTPUT_PRED_MODIFICATION_FLAG in outputs: df[OUTPUT_PRED_MODIFICATION_FLAG].extend( to_numpy(outputs[OUTPUT_PRED_MODIFICATION_FLAG]).flatten()) if OUTPUT_PRED_MODIFICATION_TYPE in outputs: df[OUTPUT_PRED_MODIFICATION_TYPE].extend( to_numpy(outputs[OUTPUT_PRED_MODIFICATION_TYPE]).tolist()) df = pd.DataFrame.from_dict(df) return df
def main(): parser = argparse.ArgumentParser() parser.add_argument("-dd", "--data-dir", type=str, default=os.environ.get("KAGGLE_2020_ALASKA2")) args = parser.parse_args() data_dir = args.data_dir cover = os.path.join(data_dir, "Cover") JMiPOD = os.path.join(data_dir, "JMiPOD") JUNIWARD = os.path.join(data_dir, "JUNIWARD") UERD = os.path.join(data_dir, "UERD") dataset = (fs.find_images_in_dir(cover) + fs.find_images_in_dir(JMiPOD) + fs.find_images_in_dir(JUNIWARD) + fs.find_images_in_dir(UERD)) # dataset = dataset[:500] mean, std = compute_mean_std(tqdm(dataset)) print(mean.size()) print(std.size()) print( "Mean", np.array2string(to_numpy(mean), precision=2, separator=",", max_line_width=119)) print( "Std ", np.array2string(to_numpy(std), precision=2, separator=",", max_line_width=119))
def compute_trn_predictions(model, dataset, fp16=False, batch_size=1, workers=0) -> pd.DataFrame: df = defaultdict(list) for batch in tqdm( DataLoader(dataset, batch_size=batch_size, num_workers=workers, shuffle=False, drop_last=False, pin_memory=True)): batch = any2device(batch, device="cuda") if fp16 and INPUT_FEATURES_JPEG_FLOAT in batch: batch[INPUT_FEATURES_JPEG_FLOAT] = batch[ INPUT_FEATURES_JPEG_FLOAT].half() if INPUT_TRUE_MODIFICATION_FLAG in batch: y_trues = to_numpy(batch[INPUT_TRUE_MODIFICATION_FLAG]).flatten() df[INPUT_TRUE_MODIFICATION_FLAG].extend(y_trues) if INPUT_TRUE_MODIFICATION_TYPE in batch: y_labels = to_numpy(batch[INPUT_TRUE_MODIFICATION_TYPE]).flatten() df[INPUT_TRUE_MODIFICATION_TYPE].extend(y_labels) image_ids = batch[INPUT_IMAGE_ID_KEY] df[INPUT_IMAGE_ID_KEY].extend(image_ids) outputs = model(**batch) if OUTPUT_PRED_MODIFICATION_FLAG in outputs: df[OUTPUT_PRED_MODIFICATION_FLAG].extend( to_numpy(outputs[OUTPUT_PRED_MODIFICATION_FLAG]).flatten()) if OUTPUT_PRED_MODIFICATION_TYPE in outputs: df[OUTPUT_PRED_MODIFICATION_TYPE].extend( outputs[OUTPUT_PRED_MODIFICATION_TYPE].tolist()) if OUTPUT_PRED_EMBEDDING in outputs: df[OUTPUT_PRED_EMBEDDING].extend( outputs[OUTPUT_PRED_EMBEDDING].tolist()) # Save also TTA predictions for future use if OUTPUT_PRED_MODIFICATION_FLAG + "_tta" in outputs: df[OUTPUT_PRED_MODIFICATION_FLAG + "_tta"].extend( to_numpy(outputs[OUTPUT_PRED_MODIFICATION_FLAG + "_tta"]).tolist()) if OUTPUT_PRED_MODIFICATION_TYPE + "_tta" in outputs: df[OUTPUT_PRED_MODIFICATION_TYPE + "_tta"].extend( to_numpy(outputs[OUTPUT_PRED_MODIFICATION_TYPE + "_tta"]).tolist()) df = pd.DataFrame.from_dict(df) return df
def on_batch_end(self, runner: IRunner): image_ids = runner.input[self.image_id_key] outputs = to_numpy(runner.output[self.output_key].detach()) targets = to_numpy(runner.input[self.input_key].detach()) for img_id, y_true, y_pred in zip(image_ids, targets, outputs): if img_id not in self.scores_per_image: self.scores_per_image[img_id] = {"intersection": 0, "union": 0} y_true_labels = self.inputs_to_labels(y_true) y_pred_labels = self.outputs_to_labels(y_pred) intersection = (y_true_labels * y_pred_labels).sum() union = y_true_labels.sum() + y_pred_labels.sum() - intersection self.scores_per_image[img_id]["intersection"] += float( intersection) self.scores_per_image[img_id]["union"] += float(union)
def valid_fn(epoch, valid_dataloader, criterion, device): model.eval() pred_scores = [] true_scores = [] for batch_idx, batch_data in enumerate(valid_dataloader): batch_data = any2device(batch_data, device) outputs = model(**batch_data) y_pred = outputs[OUTPUT_PRED_MODIFICATION_TYPE] y_true = batch_data[INPUT_TRUE_MODIFICATION_TYPE] loss = criterion(y_pred, y_true) pred_scores.extend(to_numpy(parse_classifier_probas(y_pred))) true_scores.extend(to_numpy(y_true)) xm.master_print(f"Batch: {batch_idx}, loss: {loss.item()}") val_wauc = alaska_weighted_auc(xla_all_gather(true_scores, device), xla_all_gather(pred_scores, device)) xm.master_print(f"Valid epoch: {epoch}, wAUC: {val_wauc}") return val_wauc
def on_loader_end(self, runner: IRunner): eps = 1e-7 ious_per_image = [] # Gather statistics from all nodes all_gathered_scores_per_image = all_gather(self.scores_per_image) n = len(self.thresholds) all_scores_per_image = defaultdict(lambda: { "intersection": np.zeros(n), "union": np.zeros(n) }) for scores_per_image in all_gathered_scores_per_image: for image_id, values in scores_per_image.items(): all_scores_per_image[image_id]["intersection"] += values[ "intersection"] all_scores_per_image[image_id]["union"] += values["union"] for image_id, values in all_scores_per_image.items(): intersection = values["intersection"] union = values["union"] metric = intersection / (union + eps) ious_per_image.append(metric) thresholds = to_numpy(self.thresholds) iou = np.mean(ious_per_image, axis=0) assert len(iou) == len(thresholds) threshold_index = np.argmax(iou) iou_at_threshold = iou[threshold_index] threshold_value = thresholds[threshold_index] runner.loader_metrics[self.prefix + "/" + "threshold"] = float(threshold_value) runner.loader_metrics[self.prefix] = float(iou_at_threshold) if get_rank() in {-1, 0}: logger = get_tensorboard_logger(runner) logger.add_histogram(self.prefix, iou, global_step=runner.epoch)
def test_calibartion(): oof_predictions = pd.read_csv( "/old_models/May07_16_48_rgb_resnet34_fold0/oof_predictions.csv") print( "Uncalibrated", alaska_weighted_auc(oof_predictions["y_true"].values, oof_predictions["y_pred"].values)) # ir = IR(out_of_bounds="clip") # ir.fit(oof_predictions["y_pred"].values, oof_predictions["y_true"].values) # p_calibrated = ir.transform(oof_predictions["y_pred"].values) # print("IR", alaska_weighted_auc(oof_predictions["y_true"].values, p_calibrated)) # # lr = LR() # lr.fit(oof_predictions["y_pred"].values.reshape(-1, 1), oof_predictions["y_true"].values) # p_calibrated = lr.predict_proba(oof_predictions["y_pred"].values.reshape(-1, 1)) # print("LR", alaska_weighted_auc(oof_predictions["y_true"].values, p_calibrated[:, 1])) x = torch.from_numpy(oof_predictions["y_pred"].values) x = torch.sigmoid(logit(x) * 100) x = to_numpy(x) print("Temp", alaska_weighted_auc(oof_predictions["y_true"].values, x))
def on_batch_end(self, runner): pred_probas = self.outputs_to_probas(runner.output[self.output_key]) true_labels = runner.input[self.input_key] self.y_trues.extend(to_numpy(true_labels)) self.y_preds.extend(to_numpy(pred_probas))
def parse_and_softmax(x): if isinstance(x, str): x = np.fromstring(x[1:-1], dtype=np.float32, sep=",") x = torch.tensor(x).softmax(dim=0) return to_numpy(x)