def evaluate(gpu: int, config: dict, shared_dict, barrier, eval_ds, backbone): # --- Setup DistributedDataParallel --- # rank = config["nr"] * config["gpus"] + gpu torch.distributed.init_process_group(backend='nccl', init_method='env://', world_size=config["world_size"], rank=rank) if gpu == 0: print("# --- Start evaluating --- #") # Choose device torch.cuda.set_device(gpu) # --- Online transform performed on the device (GPU): eval_online_cuda_transform = data_transforms.get_eval_online_cuda_transform( config) if "samples" in config: rng_samples = random.Random(0) eval_ds = torch.utils.data.Subset( eval_ds, rng_samples.sample(range(len(eval_ds)), config["samples"])) # eval_ds = torch.utils.data.Subset(eval_ds, range(config["samples"])) eval_sampler = torch.utils.data.distributed.DistributedSampler( eval_ds, num_replicas=config["world_size"], rank=rank) eval_ds = torch.utils.data.DataLoader( eval_ds, batch_size=config["optim_params"]["eval_batch_size"], pin_memory=True, sampler=eval_sampler, num_workers=config["num_workers"]) model = FrameFieldModel(config, backbone=backbone, eval_transform=eval_online_cuda_transform) model.cuda(gpu) if config["use_amp"] and APEX_AVAILABLE: amp.register_float_function(torch, 'sigmoid') model = amp.initialize(model, opt_level="O1") elif config["use_amp"] and not APEX_AVAILABLE and gpu == 0: print_utils.print_warning( "WARNING: Cannot use amp because the apex library is not available!" ) # Wrap the model for distributed training model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu]) evaluator = Evaluator(gpu, config, shared_dict, barrier, model, run_dirpath=config["eval_params"]["run_dirpath"]) split_name = config["fold"][0] evaluator.evaluate(split_name, eval_ds)
def get_stat_from_all(stat_filepath_format, method_info, tolerances, stat_name): stat_list = [0 for _ in tolerances] for i, tolerance in enumerate(tolerances): filepath = stat_filepath_format.format(method_info["name"], tolerance) stats = python_utils.load_json(filepath) if stats: stat_list[i] = stats[stat_name] else: print_utils.print_warning( "WARNING: could not open {}".format(filepath)) return stat_list
def compute_geom_prob(geom, prob_map, output_debug=False): assert len(prob_map.shape ) == 2, "prob_map should have size (H, W), not {}".format( prob_map.shape) if isinstance(geom, Iterable): return [ compute_geom_prob(_geom, prob_map, output_debug=output_debug) for _geom in geom ] elif isinstance(geom, shapely.geometry.Polygon): # --- Cut with geom bounds: minx, miny, maxx, maxy = geom.bounds minx = int(minx) miny = int(miny) maxx = int(maxx) + 1 maxy = int(maxy) + 1 geom = shapely.affinity.translate(geom, xoff=-minx, yoff=-miny) prob_map = prob_map[miny:maxy, minx:maxx] # --- Rasterize TODO: better rasterization (or sampling) of polygon ? raster = np.zeros(prob_map.shape, dtype=np.uint8) exterior_array = np.round(np.array(geom.exterior.coords)).astype( np.int32) interior_array_list = [ np.round(np.array(interior.coords)).astype(np.int32) for interior in geom.interiors ] cv2.fillPoly(raster, [exterior_array], color=1) cv2.fillPoly(raster, interior_array_list, color=0) raster_sum = np.sum(raster) if 0 < raster_sum: polygon_prob = np.sum(raster * prob_map) / raster_sum else: polygon_prob = 0 if output_debug: print_utils.print_warning( "WARNING: empty polygon raster in polygonize_tracing.compute_polygon_prob()." ) return polygon_prob else: raise NotImplementedError( f"Geometry of type {type(geom)} not implemented!")
def plot_metric(dirpath, info_list): legend = [] for info in info_list: metrics_filepath = os.path.join(dirpath, info["metrics_filepath"]) metrics = python_utils.load_json(metrics_filepath) if metrics: max_angle_diffs = np.array(metrics["max_angle_diffs"]) total = len(max_angle_diffs) angle_thresholds = range(0, 91) fraction_under_threshold_list = [] for angle_threshold in angle_thresholds: fraction_under_threshold = np.sum( max_angle_diffs < angle_threshold) / total fraction_under_threshold_list.append(fraction_under_threshold) # Plot plt.plot(angle_thresholds, fraction_under_threshold_list) # Compute mean mean_error = np.mean(max_angle_diffs) legend.append(f"{info['name']}: {mean_error:.1f}°") else: print_utils.print_warning("WARNING: could not open {}".format( info["metrics_filepath"])) plt.legend(legend, loc='lower right') plt.xlabel("Threshold (degrees)") plt.ylabel("Fraction of detections") axes = plt.gca() axes.set_xlim([0, 90]) axes.set_ylim([0, 1]) title = f"Cumulative max tangent angle error per detection" plt.title(title) plt.savefig(title.lower().replace(" ", "_") + ".pdf") plt.show()
def train(gpu, config, shared_dict, barrier, train_ds, val_ds, backbone): # --- Set seeds --- # torch.manual_seed( 2 ) # For DistributedDataParallel: make sure all models are initialized identically # torch.backends.cudnn.deterministic = True # torch.backends.cudnn.benchmark = False # os.environ['CUDA_LAUNCH_BLOCKING'] = 1 torch.autograd.set_detect_anomaly(True) # --- Setup DistributedDataParallel --- # rank = config["nr"] * config["gpus"] + gpu torch.distributed.init_process_group(backend='nccl', init_method='env://', world_size=config["world_size"], rank=rank) if gpu == 0: print("# --- Start training --- #") # --- Setup run --- # # Setup run on process 0: if gpu == 0: shared_dict["run_dirpath"], shared_dict[ "init_checkpoints_dirpath"] = local_utils.setup_run(config) barrier.wait( ) # Wait on all processes so that shared_dict is synchronized. # Choose device torch.cuda.set_device(gpu) # --- Online transform performed on the device (GPU): train_online_cuda_transform = data_transforms.get_online_cuda_transform( config, augmentations=config["data_aug_params"]["enable"]) if val_ds is not None: eval_online_cuda_transform = data_transforms.get_online_cuda_transform( config, augmentations=False) else: eval_online_cuda_transform = None if "samples" in config: rng_samples = random.Random(0) train_ds = torch.utils.data.Subset( train_ds, rng_samples.sample(range(len(train_ds)), config["samples"])) if val_ds is not None: val_ds = torch.utils.data.Subset( val_ds, rng_samples.sample(range(len(val_ds)), config["samples"])) # test_ds = torch.utils.data.Subset(test_ds, list(range(config["samples"]))) if gpu == 0: print(f"Train dataset has {len(train_ds)} samples.") train_sampler = torch.utils.data.distributed.DistributedSampler( train_ds, num_replicas=config["world_size"], rank=rank) val_sampler = None if val_ds is not None: val_sampler = torch.utils.data.distributed.DistributedSampler( val_ds, num_replicas=config["world_size"], rank=rank) if "samples" in config: eval_batch_size = min(2 * config["optim_params"]["batch_size"], config["samples"]) else: eval_batch_size = 2 * config["optim_params"]["batch_size"] init_dl = torch.utils.data.DataLoader(train_ds, batch_size=eval_batch_size, pin_memory=True, sampler=train_sampler, num_workers=config["num_workers"], drop_last=True) train_dl = torch.utils.data.DataLoader( train_ds, batch_size=config["optim_params"]["batch_size"], shuffle=False, pin_memory=True, sampler=train_sampler, num_workers=config["num_workers"], drop_last=True) if val_ds is not None: val_dl = torch.utils.data.DataLoader(val_ds, batch_size=eval_batch_size, pin_memory=True, sampler=val_sampler, num_workers=config["num_workers"], drop_last=True) else: val_dl = None model = FrameFieldModel(config, backbone=backbone, train_transform=train_online_cuda_transform, eval_transform=eval_online_cuda_transform) model.cuda(gpu) if gpu == 0: print("Model has {} trainable params".format( count_trainable_params(model))) loss_func = losses.build_combined_loss(config).cuda(gpu) # Compute learning rate lr = min( config["optim_params"]["base_lr"] * config["optim_params"]["batch_size"] * config["world_size"], config["optim_params"]["max_lr"]) if config["optim_params"]["optimizer"] == "Adam": optimizer = torch.optim.Adam( model.parameters(), lr=lr, # weight_decay=config["optim_params"]["weight_decay"], eps=1e-8 # Increase if instability is detected ) elif config["optim_params"]["optimizer"] == "RMSProp": optimizer = torch.optim.RMSprop(model.parameters(), lr=lr) else: raise NotImplementedError( f"Optimizer {config['optim_params']['optimizer']} not recognized") # optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=0.9) if config["use_amp"] and APEX_AVAILABLE: amp.register_float_function(torch, 'sigmoid') model, optimizer = amp.initialize(model, optimizer, opt_level="O1") elif config["use_amp"] and not APEX_AVAILABLE and gpu == 0: print_utils.print_warning( "WARNING: Cannot use amp because the apex library is not available!" ) # Wrap the model for distributed training model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[gpu], find_unused_parameters=True) # def lr_warmup_func(epoch): # if epoch < config["warmup_epochs"]: # coef = 1 + (config["warmup_factor"] - 1) * (config["warmup_epochs"] - epoch) / config["warmup_epochs"] # else: # coef = 1 # return coef # lr_scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lr_warmup_func) # lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', verbose=True) lr_scheduler = torch.optim.lr_scheduler.ExponentialLR( optimizer, config["optim_params"]["gamma"]) trainer = Trainer( rank, gpu, config, model, optimizer, loss_func, run_dirpath=shared_dict["run_dirpath"], init_checkpoints_dirpath=shared_dict["init_checkpoints_dirpath"], lr_scheduler=lr_scheduler) trainer.fit(train_dl, val_dl=val_dl, init_dl=init_dl)
def eval_one(annotation_filename, run_results_dirpath, cocoGt, config, annType, pool=None): print("---eval_one") annotation_name = os.path.splitext(annotation_filename)[0] if "samples" in config: stats_filepath = os.path.join(run_results_dirpath, "{}.stats.{}.{}.json".format("test", annotation_name, config["samples"])) metrics_filepath = os.path.join(run_results_dirpath, "{}.metrics.{}.{}.json".format("test", annotation_name, config["samples"])) else: stats_filepath = os.path.join(run_results_dirpath, "{}.stats.{}.json".format("test", annotation_name)) metrics_filepath = os.path.join(run_results_dirpath, "{}.metrics.{}.json".format("test", annotation_name)) res_filepath = os.path.join(run_results_dirpath, annotation_filename) if not os.path.exists(res_filepath): print_utils.print_warning("WARNING: result not found at filepath {}".format(res_filepath)) return print_utils.print_info("Evaluate {} annotations:".format(annotation_filename)) try: cocoDt = cocoGt.loadRes(res_filepath) except AssertionError as e: print_utils.print_error("ERROR: {}".format(e)) print_utils.print_info("INFO: continuing by removing unrecognised images") res = json.load(open(res_filepath)) print("Initial res length:", len(res)) annsImgIds = [ann["image_id"] for ann in res] image_id_rm = set(annsImgIds) - set(cocoGt.getImgIds()) print_utils.print_warning("Remove {} image ids!".format(len(image_id_rm))) new_res = [ann for ann in res if ann["image_id"] not in image_id_rm] print("New res length:", len(new_res)) cocoDt = cocoGt.loadRes(new_res) # {4601886185638229705, 4602408603195004682, 4597274499619802317, 4600985465712755606, 4597238470822783353, # 4597418614807878173} # image_id = 0 # annotation_ids = cocoDt.getAnnIds(imgIds=image_id) # annotation_list = cocoDt.loadAnns(annotation_ids) # print(annotation_list) if not os.path.exists(stats_filepath): # Run COCOeval cocoEval = COCOeval(cocoGt, cocoDt, annType) cocoEval.evaluate() cocoEval.accumulate() cocoEval.summarize() # Save stats stats = {} stat_names = ["AP", "AP_50", "AP_75", "AP_S", "AP_M", "AP_L", "AR", "AR_50", "AR_75", "AR_S", "AR_M", "AR_L"] assert len(stat_names) == cocoEval.stats.shape[0] for i, stat_name in enumerate(stat_names): stats[stat_name] = cocoEval.stats[i] python_utils.save_json(stats_filepath, stats) else: print("COCO stats already computed, skipping...") if not os.path.exists(metrics_filepath): # Verify that cocoDt has polygonal segmentation masks and not raster masks: if isinstance(cocoDt.loadAnns(cocoDt.getAnnIds(imgIds=cocoDt.getImgIds()[0]))[0]["segmentation"], list): metrics = {} # Run additionnal metrics print_utils.print_info("INFO: Running contour metrics") contour_eval = ContourEval(cocoGt, cocoDt) max_angle_diffs = contour_eval.evaluate(pool=pool) metrics["max_angle_diffs"] = list(max_angle_diffs) python_utils.save_json(metrics_filepath, metrics) else: print("Contour metrics already computed, skipping...")