示例#1
0
def evaluate(gpu: int, config: dict, shared_dict, barrier, eval_ds, backbone):
    # --- Setup DistributedDataParallel --- #
    rank = config["nr"] * config["gpus"] + gpu
    torch.distributed.init_process_group(backend='nccl',
                                         init_method='env://',
                                         world_size=config["world_size"],
                                         rank=rank)

    if gpu == 0:
        print("# --- Start evaluating --- #")

    # Choose device
    torch.cuda.set_device(gpu)

    # --- Online transform performed on the device (GPU):
    eval_online_cuda_transform = data_transforms.get_eval_online_cuda_transform(
        config)

    if "samples" in config:
        rng_samples = random.Random(0)
        eval_ds = torch.utils.data.Subset(
            eval_ds, rng_samples.sample(range(len(eval_ds)),
                                        config["samples"]))
        # eval_ds = torch.utils.data.Subset(eval_ds, range(config["samples"]))

    eval_sampler = torch.utils.data.distributed.DistributedSampler(
        eval_ds, num_replicas=config["world_size"], rank=rank)

    eval_ds = torch.utils.data.DataLoader(
        eval_ds,
        batch_size=config["optim_params"]["eval_batch_size"],
        pin_memory=True,
        sampler=eval_sampler,
        num_workers=config["num_workers"])

    model = FrameFieldModel(config,
                            backbone=backbone,
                            eval_transform=eval_online_cuda_transform)
    model.cuda(gpu)

    if config["use_amp"] and APEX_AVAILABLE:
        amp.register_float_function(torch, 'sigmoid')
        model = amp.initialize(model, opt_level="O1")
    elif config["use_amp"] and not APEX_AVAILABLE and gpu == 0:
        print_utils.print_warning(
            "WARNING: Cannot use amp because the apex library is not available!"
        )

    # Wrap the model for distributed training
    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu])

    evaluator = Evaluator(gpu,
                          config,
                          shared_dict,
                          barrier,
                          model,
                          run_dirpath=config["eval_params"]["run_dirpath"])
    split_name = config["fold"][0]
    evaluator.evaluate(split_name, eval_ds)
def get_stat_from_all(stat_filepath_format, method_info, tolerances,
                      stat_name):
    stat_list = [0 for _ in tolerances]
    for i, tolerance in enumerate(tolerances):
        filepath = stat_filepath_format.format(method_info["name"], tolerance)
        stats = python_utils.load_json(filepath)
        if stats:
            stat_list[i] = stats[stat_name]
        else:
            print_utils.print_warning(
                "WARNING: could not open {}".format(filepath))
    return stat_list
def compute_geom_prob(geom, prob_map, output_debug=False):
    assert len(prob_map.shape
               ) == 2, "prob_map should have size (H, W), not {}".format(
                   prob_map.shape)

    if isinstance(geom, Iterable):
        return [
            compute_geom_prob(_geom, prob_map, output_debug=output_debug)
            for _geom in geom
        ]
    elif isinstance(geom, shapely.geometry.Polygon):
        # --- Cut with geom bounds:
        minx, miny, maxx, maxy = geom.bounds
        minx = int(minx)
        miny = int(miny)
        maxx = int(maxx) + 1
        maxy = int(maxy) + 1
        geom = shapely.affinity.translate(geom, xoff=-minx, yoff=-miny)
        prob_map = prob_map[miny:maxy, minx:maxx]

        # --- Rasterize TODO: better rasterization (or sampling) of polygon ?
        raster = np.zeros(prob_map.shape, dtype=np.uint8)
        exterior_array = np.round(np.array(geom.exterior.coords)).astype(
            np.int32)
        interior_array_list = [
            np.round(np.array(interior.coords)).astype(np.int32)
            for interior in geom.interiors
        ]
        cv2.fillPoly(raster, [exterior_array], color=1)
        cv2.fillPoly(raster, interior_array_list, color=0)

        raster_sum = np.sum(raster)
        if 0 < raster_sum:
            polygon_prob = np.sum(raster * prob_map) / raster_sum
        else:
            polygon_prob = 0
            if output_debug:
                print_utils.print_warning(
                    "WARNING: empty polygon raster in polygonize_tracing.compute_polygon_prob()."
                )

        return polygon_prob
    else:
        raise NotImplementedError(
            f"Geometry of type {type(geom)} not implemented!")
示例#4
0
def plot_metric(dirpath, info_list):
    legend = []
    for info in info_list:
        metrics_filepath = os.path.join(dirpath, info["metrics_filepath"])
        metrics = python_utils.load_json(metrics_filepath)
        if metrics:
            max_angle_diffs = np.array(metrics["max_angle_diffs"])
            total = len(max_angle_diffs)
            angle_thresholds = range(0, 91)
            fraction_under_threshold_list = []
            for angle_threshold in angle_thresholds:
                fraction_under_threshold = np.sum(
                    max_angle_diffs < angle_threshold) / total
                fraction_under_threshold_list.append(fraction_under_threshold)
            # Plot
            plt.plot(angle_thresholds, fraction_under_threshold_list)

            # Compute mean
            mean_error = np.mean(max_angle_diffs)

            legend.append(f"{info['name']}: {mean_error:.1f}°")

        else:
            print_utils.print_warning("WARNING: could not open {}".format(
                info["metrics_filepath"]))

    plt.legend(legend, loc='lower right')
    plt.xlabel("Threshold (degrees)")
    plt.ylabel("Fraction of detections")
    axes = plt.gca()
    axes.set_xlim([0, 90])
    axes.set_ylim([0, 1])
    title = f"Cumulative max tangent angle error per detection"
    plt.title(title)
    plt.savefig(title.lower().replace(" ", "_") + ".pdf")
    plt.show()
示例#5
0
def train(gpu, config, shared_dict, barrier, train_ds, val_ds, backbone):
    # --- Set seeds --- #
    torch.manual_seed(
        2
    )  # For DistributedDataParallel: make sure all models are initialized identically
    # torch.backends.cudnn.deterministic = True
    # torch.backends.cudnn.benchmark = False
    # os.environ['CUDA_LAUNCH_BLOCKING'] = 1
    torch.autograd.set_detect_anomaly(True)

    # --- Setup DistributedDataParallel --- #
    rank = config["nr"] * config["gpus"] + gpu
    torch.distributed.init_process_group(backend='nccl',
                                         init_method='env://',
                                         world_size=config["world_size"],
                                         rank=rank)

    if gpu == 0:
        print("# --- Start training --- #")

    # --- Setup run --- #
    # Setup run on process 0:
    if gpu == 0:
        shared_dict["run_dirpath"], shared_dict[
            "init_checkpoints_dirpath"] = local_utils.setup_run(config)
    barrier.wait(
    )  # Wait on all processes so that shared_dict is synchronized.

    # Choose device
    torch.cuda.set_device(gpu)

    # --- Online transform performed on the device (GPU):
    train_online_cuda_transform = data_transforms.get_online_cuda_transform(
        config, augmentations=config["data_aug_params"]["enable"])
    if val_ds is not None:
        eval_online_cuda_transform = data_transforms.get_online_cuda_transform(
            config, augmentations=False)
    else:
        eval_online_cuda_transform = None

    if "samples" in config:
        rng_samples = random.Random(0)
        train_ds = torch.utils.data.Subset(
            train_ds,
            rng_samples.sample(range(len(train_ds)), config["samples"]))
        if val_ds is not None:
            val_ds = torch.utils.data.Subset(
                val_ds,
                rng_samples.sample(range(len(val_ds)), config["samples"]))
        # test_ds = torch.utils.data.Subset(test_ds, list(range(config["samples"])))

    if gpu == 0:
        print(f"Train dataset has {len(train_ds)} samples.")

    train_sampler = torch.utils.data.distributed.DistributedSampler(
        train_ds, num_replicas=config["world_size"], rank=rank)
    val_sampler = None
    if val_ds is not None:
        val_sampler = torch.utils.data.distributed.DistributedSampler(
            val_ds, num_replicas=config["world_size"], rank=rank)
    if "samples" in config:
        eval_batch_size = min(2 * config["optim_params"]["batch_size"],
                              config["samples"])
    else:
        eval_batch_size = 2 * config["optim_params"]["batch_size"]

    init_dl = torch.utils.data.DataLoader(train_ds,
                                          batch_size=eval_batch_size,
                                          pin_memory=True,
                                          sampler=train_sampler,
                                          num_workers=config["num_workers"],
                                          drop_last=True)
    train_dl = torch.utils.data.DataLoader(
        train_ds,
        batch_size=config["optim_params"]["batch_size"],
        shuffle=False,
        pin_memory=True,
        sampler=train_sampler,
        num_workers=config["num_workers"],
        drop_last=True)
    if val_ds is not None:
        val_dl = torch.utils.data.DataLoader(val_ds,
                                             batch_size=eval_batch_size,
                                             pin_memory=True,
                                             sampler=val_sampler,
                                             num_workers=config["num_workers"],
                                             drop_last=True)
    else:
        val_dl = None

    model = FrameFieldModel(config,
                            backbone=backbone,
                            train_transform=train_online_cuda_transform,
                            eval_transform=eval_online_cuda_transform)
    model.cuda(gpu)
    if gpu == 0:
        print("Model has {} trainable params".format(
            count_trainable_params(model)))

    loss_func = losses.build_combined_loss(config).cuda(gpu)
    # Compute learning rate
    lr = min(
        config["optim_params"]["base_lr"] *
        config["optim_params"]["batch_size"] * config["world_size"],
        config["optim_params"]["max_lr"])

    if config["optim_params"]["optimizer"] == "Adam":
        optimizer = torch.optim.Adam(
            model.parameters(),
            lr=lr,
            # weight_decay=config["optim_params"]["weight_decay"],
            eps=1e-8  # Increase if instability is detected
        )
    elif config["optim_params"]["optimizer"] == "RMSProp":
        optimizer = torch.optim.RMSprop(model.parameters(), lr=lr)
    else:
        raise NotImplementedError(
            f"Optimizer {config['optim_params']['optimizer']} not recognized")
    # optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=0.9)

    if config["use_amp"] and APEX_AVAILABLE:
        amp.register_float_function(torch, 'sigmoid')
        model, optimizer = amp.initialize(model, optimizer, opt_level="O1")
    elif config["use_amp"] and not APEX_AVAILABLE and gpu == 0:
        print_utils.print_warning(
            "WARNING: Cannot use amp because the apex library is not available!"
        )

    # Wrap the model for distributed training
    model = torch.nn.parallel.DistributedDataParallel(
        model, device_ids=[gpu], find_unused_parameters=True)

    # def lr_warmup_func(epoch):
    #     if epoch < config["warmup_epochs"]:
    #         coef = 1 + (config["warmup_factor"] - 1) * (config["warmup_epochs"] - epoch) / config["warmup_epochs"]
    #     else:
    #         coef = 1
    #     return coef
    # lr_scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lr_warmup_func)
    # lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', verbose=True)
    lr_scheduler = torch.optim.lr_scheduler.ExponentialLR(
        optimizer, config["optim_params"]["gamma"])

    trainer = Trainer(
        rank,
        gpu,
        config,
        model,
        optimizer,
        loss_func,
        run_dirpath=shared_dict["run_dirpath"],
        init_checkpoints_dirpath=shared_dict["init_checkpoints_dirpath"],
        lr_scheduler=lr_scheduler)
    trainer.fit(train_dl, val_dl=val_dl, init_dl=init_dl)
示例#6
0
def eval_one(annotation_filename, run_results_dirpath, cocoGt, config, annType, pool=None):
    print("---eval_one")
    annotation_name = os.path.splitext(annotation_filename)[0]
    if "samples" in config:
        stats_filepath = os.path.join(run_results_dirpath,
                                      "{}.stats.{}.{}.json".format("test", annotation_name, config["samples"]))
        metrics_filepath = os.path.join(run_results_dirpath,
                                      "{}.metrics.{}.{}.json".format("test", annotation_name, config["samples"]))
    else:
        stats_filepath = os.path.join(run_results_dirpath, "{}.stats.{}.json".format("test", annotation_name))
        metrics_filepath = os.path.join(run_results_dirpath, "{}.metrics.{}.json".format("test", annotation_name))

    res_filepath = os.path.join(run_results_dirpath, annotation_filename)
    if not os.path.exists(res_filepath):
        print_utils.print_warning("WARNING: result not found at filepath {}".format(res_filepath))
        return
    print_utils.print_info("Evaluate {} annotations:".format(annotation_filename))
    try:
        cocoDt = cocoGt.loadRes(res_filepath)
    except AssertionError as e:
        print_utils.print_error("ERROR: {}".format(e))
        print_utils.print_info("INFO: continuing by removing unrecognised images")
        res = json.load(open(res_filepath))
        print("Initial res length:", len(res))
        annsImgIds = [ann["image_id"] for ann in res]
        image_id_rm = set(annsImgIds) - set(cocoGt.getImgIds())
        print_utils.print_warning("Remove {} image ids!".format(len(image_id_rm)))
        new_res = [ann for ann in res if ann["image_id"] not in image_id_rm]
        print("New res length:", len(new_res))
        cocoDt = cocoGt.loadRes(new_res)
        # {4601886185638229705, 4602408603195004682, 4597274499619802317, 4600985465712755606, 4597238470822783353,
        #  4597418614807878173}


    # image_id = 0
    # annotation_ids = cocoDt.getAnnIds(imgIds=image_id)
    # annotation_list = cocoDt.loadAnns(annotation_ids)
    # print(annotation_list)

    if not os.path.exists(stats_filepath):
        # Run COCOeval
        cocoEval = COCOeval(cocoGt, cocoDt, annType)
        cocoEval.evaluate()
        cocoEval.accumulate()
        cocoEval.summarize()

        # Save stats
        stats = {}
        stat_names = ["AP", "AP_50", "AP_75", "AP_S", "AP_M", "AP_L", "AR", "AR_50", "AR_75", "AR_S", "AR_M", "AR_L"]
        assert len(stat_names) == cocoEval.stats.shape[0]
        for i, stat_name in enumerate(stat_names):
            stats[stat_name] = cocoEval.stats[i]

        python_utils.save_json(stats_filepath, stats)
    else:
        print("COCO stats already computed, skipping...")

    if not os.path.exists(metrics_filepath):
        # Verify that cocoDt has polygonal segmentation masks and not raster masks:
        if isinstance(cocoDt.loadAnns(cocoDt.getAnnIds(imgIds=cocoDt.getImgIds()[0]))[0]["segmentation"], list):
            metrics = {}
            # Run additionnal metrics
            print_utils.print_info("INFO: Running contour metrics")
            contour_eval = ContourEval(cocoGt, cocoDt)
            max_angle_diffs = contour_eval.evaluate(pool=pool)
            metrics["max_angle_diffs"] = list(max_angle_diffs)
            python_utils.save_json(metrics_filepath, metrics)
    else:
        print("Contour metrics already computed, skipping...")