def test(cfg, model, distributed): if distributed: model = model.module torch.cuda.empty_cache() # TODO check if it helps iou_types = ("bbox",) if cfg.MODEL.MASK_ON: iou_types = iou_types + ("segm",) output_folders = [None] * len(cfg.DATASETS.TEST) dataset_names = cfg.DATASETS.TEST if cfg.OUTPUT_DIR: for idx, dataset_name in enumerate(dataset_names): output_folder = os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name) mkdir(output_folder) output_folders[idx] = output_folder data_loaders_val = make_data_loader(cfg, is_train=False, is_distributed=distributed) for output_folder, dataset_name, data_loader_val in zip(output_folders, dataset_names, data_loaders_val): inference( model, data_loader_val, dataset_name=dataset_name, iou_types=iou_types, box_only=cfg.MODEL.RPN_ONLY, device=cfg.MODEL.DEVICE, expected_results=cfg.TEST.EXPECTED_RESULTS, expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL, output_folder=output_folder, ) synchronize()
def main(): parser = argparse.ArgumentParser(description="PyTorch Object Detection Training") parser.add_argument( "--config-file", default="", metavar="FILE", help="path to config file", type=str, ) parser.add_argument("--local_rank", type=int, default=0) parser.add_argument( "--skip-test", dest="skip_test", help="Do not test the final model", action="store_true", ) parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() num_gpus = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 args.distributed = num_gpus > 1 if args.distributed: torch.cuda.set_device(args.local_rank) torch.distributed.deprecated.init_process_group( backend="nccl", init_method="env://" ) cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() output_dir = cfg.OUTPUT_DIR if output_dir: mkdir(output_dir) logger = setup_logger("maskrcnn_benchmark", output_dir, get_rank()) logger.info("Using {} GPUs".format(num_gpus)) logger.info(args) logger.info("Collecting env info (might take some time)") logger.info("\n" + collect_env_info()) logger.info("Loaded configuration file {}".format(args.config_file)) with open(args.config_file, "r") as cf: config_str = "\n" + cf.read() logger.info(config_str) logger.info("Running with config:\n{}".format(cfg)) model = train(cfg, args.local_rank, args.distributed) if not args.skip_test: test(cfg, model, args.distributed)
def do_train( cfg, model, data_loader_support, data_loader_query, data_loader_val_support, data_loader_val_test, optimizer, scheduler, checkpointer, device, checkpoint_period, test_period, arguments, meters, meters_val, ): logger = logging.getLogger("maskrcnn_benchmark.trainer") logger.info("Start training") # meters = MetricLogger(delimiter=" ") max_iter = len(data_loader_support) start_iter = arguments["iteration"] model.train() start_training_time = time.time() end = time.time() batch_cls_json_file = cfg.MODEL.FEW_SHOT.SUP_INDICE_CLS with open(batch_cls_json_file, 'r') as f: batch_cls_sup = json.load(f) if cfg.MODEL.QRY_BALANCE: qry_cls_json_file = cfg.MODEL.QRY_INDICE_CLS with open(qry_cls_json_file, 'r') as f: batch_cls_qry = json.load(f) iou_types = ("bbox",) if cfg.MODEL.MASK_ON: iou_types = iou_types + ("segm",) if cfg.MODEL.KEYPOINT_ON: iou_types = iou_types + ("keypoints",) rank = dist.get_rank() # if is_main_process(): # import pdb # pdb.set_trace() # else: # return # for name, param in model. named_parameters(): # print(name, param, True if param.grad is not None else False) query_iterator = data_loader_query.__iter__() # print('len(data_loader_query):', len(data_loader_query)) # import pdb; pdb.set_trace() weights_novel_all = [] iteration_qry = 0 for iteration, (images_sup, targets_sup, idx) in enumerate(data_loader_support, start_iter): if any(len(target) < 1 for target in targets_sup): logger.error(f"Iteration={iteration + 1} || Image Ids used for training support {idx} || targets Length={[len(target) for target in targets_sup]}") continue data_time = time.time() - end batch_id = batch_cls_sup[rank][iteration] iteration = iteration + 1 arguments["iteration"] = iteration scheduler.step() images_sup = images_sup.to(device) targets_sup = [target.to(device) for target in targets_sup] # update weight: # print(targets_sup) # if is_main_process(): # import pdb # pdb.set_trace() # else: # return # print(iteration, idx, batch_id, targets_sup[0].extra_fields) weight_novel = model(images_sup, targets_sup, is_support=True, batch_id=batch_id) # weights_novel[rank] = weight_novel # print('batch_id', batch_id, weight_novel[:10]) # weight_novel = {batch_id:weight_novel} torch.cuda.empty_cache() # synchronize() weights_novel = [torch.empty_like(weight_novel) for i in range(dist.get_world_size())] weights_novel = torch.cat( diffdist.functional.all_gather(weights_novel, weight_novel)) # print(weights_novel[:,:10]) # if is_main_process(): # import pdb # pdb.set_trace() # else: # return weights_novel_all.append(weights_novel) # # print(weights_novel_all) # print(torch.cat(weights_novel_all).size()) # print(torch.cat(weights_novel_all)[:,:10]) # (torch.cat(gather_list) * torch.cat(gather_list)).mean().backward() # print(weights_novel) if iteration % iter_size == 0: optimizer.zero_grad() losses_reduced = 0 loss_dict_all = {} for i in range(iter_size_qry): images_qry, targets_qry, idx = query_iterator.next() images_qry = images_qry.to(device) targets_qry = [target.to(device) for target in targets_qry] if cfg.MODEL.QRY_BALANCE: batch_id_qry = batch_cls_qry[rank][iteration_qry] iteration_qry += 1 loss_dict = model(images_qry, targets_qry, is_query=True, batch_id=batch_id_qry, weights_novel=torch.cat(weights_novel_all)) else: loss_dict = model(images_qry, targets_qry, is_query=True, weights_novel=torch.cat(weights_novel_all)) # if is_main_process(): # print('loss_dict', loss_dict) losses = sum(loss for loss in loss_dict.values() ) / iter_size_qry # losses.backward(retain_graph=True) with amp.scale_loss(losses, optimizer) as scaled_losses: scaled_losses.backward(retain_graph=True) torch.cuda.empty_cache() loss_dict_all = add_dict(loss_dict_all, loss_dict) loss_dict_all = avg_dict(loss_dict_all) # if is_main_process(): # print('loss_dict_all', loss_dict_all) # reduce losses over all GPUs for logging purposes loss_dict_reduced = reduce_loss_dict(loss_dict_all) # if is_main_process(): # print('loss_dict_reduced', loss_dict_reduced) losses_reduced = sum(loss for loss in loss_dict_reduced.values()) # losses_dict_reduced = add_dict(losses_dict_reduced, loss_dict_reduced) meters.update(iteration / iter_size_qry, loss=losses_reduced, lr=optimizer.param_groups[0]["lr"], **loss_dict_reduced) weights_novel_all = [] # (weights_novel * weights_novel).mean().backward() # for name, param in model. named_parameters(): # if 'backbone' not in name: # print(name, True if param.grad is not None else False) optimizer.step() batch_time = time.time() - end end = time.time() meters.update(iteration, time=batch_time, data=data_time) eta_seconds = meters.time.global_avg * (max_iter - iteration) eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) torch.cuda.empty_cache() if iteration % 20 == 0 or iteration == max_iter: logger.info( meters.delimiter.join( [ "eta: {eta}", "iter: {iter}", "{meters}", "lr: {lr:.6f}", "max mem: {memory:.0f}", ] ).format( eta=eta_string, iter=iteration, meters=str(meters), lr=optimizer.param_groups[0]["lr"], memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0, ) ) if iteration % checkpoint_period == 0: checkpointer.save("model_{:07d}".format(iteration), **arguments) if data_loader_val_support is not None and test_period > 0 and iteration % test_period == 0: # meters_val = MetricLogger(delimiter=" ") synchronize() # """ model.train() with torch.no_grad(): weights_novel_val_sup_all = [] current_classifier_novel = torch.zeros( [iter_size * nGPU, 1024]).to(device) # print(current_classifier_novel) avg_steps = 0 for iteration_val_sup, (images_val_sup, targets_val_sup, idx_val_sup) in enumerate(tqdm(data_loader_val_support)): if any(len(target) < 1 for target in targets_val_sup): logger.error(f"Iteration={iteration + 1} || Image Ids used for training support {idx_val_sup} || targets Length={[len(target) for target in targets_val_sup]}") continue batch_id_val_sup = batch_cls_sup[rank][int( iteration_val_sup)] # print(iteration_val_sup) images_val_sup = images_val_sup.to(device) targets_val_sup = [target.to(device) for target in targets_val_sup] weight_novel_val_sup = model(images_val_sup, targets_val_sup, is_support=True, batch_id=batch_id_val_sup) # weights_novel[rank] = weight_novel_val_sup # print(weight_novel_val_sup.size()) # print('before', weight_novel_val_sup) # print('batch_id', batch_id, weight_novel_val_sup[:10]) # weight_novel_val_sup = {batch_id:weight_novel_val_sup} torch.cuda.empty_cache() # synchronize() weights_novel_val_sup = [torch.empty_like(weight_novel_val_sup) for i in range(dist.get_world_size())] dist.all_gather(weights_novel_val_sup, weight_novel_val_sup) # weights_novel_val_sup = torch.cat( # all_gather(weight_novel_val_sup)) # print('after', weights_novel_val_sup) # print(idx, weights_novel_val_sup) # print(weights_novel_val_sup[:,:10]) # if is_main_process(): # import pdb # pdb.set_trace() # else: # return weights_novel_val_sup_all.append( torch.cat(weights_novel_val_sup)) # print('length', len(weights_novel_val_sup_all)) if (iteration_val_sup + 1) % iter_size_qry == 0: # print(torch.cat(weights_novel_val_sup_all).size()) # weights_novel_val_sup_all = [] avg_steps += 1 # print('current_classifier_novel', current_classifier_novel) # print('weights_novel_val_sup_all', weights_novel_val_sup_all) current_classifier_novel = current_classifier_novel + \ torch.cat(weights_novel_val_sup_all) weights_novel_val_sup_all = [] # if is_main_process(): # import pdb # pdb.set_trace() # else: # return # print(iteration_val_sup) current_classifier_novel_avg = current_classifier_novel / avg_steps model.module.roi_heads.box.cls_weights = torch.cat([model.module.roi_heads.box.predictor.cls_score.weight, current_classifier_novel_avg]) # """ output_folder = os.path.join(cfg.OUTPUT_DIR, "Validation") mkdir(output_folder) np.save(os.path.join(output_folder, 'cls_weights_'+str(iteration / iter_size_qry)), np.array(model.module.roi_heads.box.cls_weights.cpu().data)) res_infer = inference( # The result can be used for additional logging, e. g. for TensorBoard model, iteration / iter_size, # The method changes the segmentation mask format in a data loader, # so every time a new data loader is created: make_data_loader(cfg, is_train=False, is_distributed=( get_world_size() > 1), is_for_period=True), dataset_name="[Validation]", iou_types=iou_types, box_only=False if cfg.MODEL.RETINANET_ON else cfg.MODEL.RPN_ONLY, device=cfg.MODEL.DEVICE, expected_results=cfg.TEST.EXPECTED_RESULTS, expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL, output_folder=output_folder, ) # import pdb; pdb.set_trace() if res_infer: meters_val.update(iteration / iter_size, **res_infer) synchronize() # print('eval') model.train() """ with torch.no_grad(): # Should be one image for each GPU: for iteration_val, (images_val, targets_val, _) in enumerate(tqdm(data_loader_val_test)): images_val = images_val.to(device) targets_val = [target.to(device) for target in targets_val] loss_dict = model(images_val, targets_val) losses = sum(loss for loss in loss_dict.values()) loss_dict_reduced = reduce_loss_dict(loss_dict) losses_reduced = sum( loss for loss in loss_dict_reduced.values()) meters_val.update( iteration / iter_size, loss=losses_reduced, **loss_dict_reduced) """ synchronize() logger.info( meters_val.delimiter.join( [ "[Validation]: ", "eta: {eta}", "iter: {iter}", "{meters}", "lr: {lr:.6f}", "max mem: {memory:.0f}", ] ).format( eta=eta_string, iter=iteration / iter_size, meters=str(meters_val), lr=optimizer.param_groups[0]["lr"], memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0, ) ) # """ if iteration == max_iter: checkpointer.save("model_final", **arguments) # import json # json.dump(model.module.roi_heads.box.cls_weights, open(os.path.join(output_folder, 'cls_weights.json'), 'w')) total_training_time = time.time() - start_training_time total_time_str = str(datetime.timedelta(seconds=total_training_time)) logger.info( "Total training time: {} ({:.4f} s / it)".format( total_time_str, total_training_time / (max_iter) ) )
def main(): mlperf_log.ROOT_DIR_MASKRCNN = os.path.dirname(os.path.abspath(__file__)) parser = argparse.ArgumentParser( description="PyTorch Object Detection Training") parser.add_argument( "--config-file", default="", metavar="FILE", help="path to config file", type=str, ) parser.add_argument("--local_rank", type=int, default=0) parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() num_gpus = int( os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 args.distributed = num_gpus > 1 if is_main_process: # Setting logging file parameters for compliance logging os.environ["COMPLIANCE_FILE"] = './MASKRCNN_complVv0.5.0_' + str( datetime.datetime.now()) mlperf_log.LOG_FILE = os.getenv("COMPLIANCE_FILE") mlperf_log._FILE_HANDLER = logging.FileHandler(mlperf_log.LOG_FILE) mlperf_log._FILE_HANDLER.setLevel(logging.DEBUG) mlperf_log.LOGGER.addHandler(mlperf_log._FILE_HANDLER) if args.distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend="nccl", init_method="env://") synchronize() print_mlperf(key=mlperf_log.RUN_START) # setting seeds - needs to be timed, so after RUN_START if is_main_process(): master_seed = random.SystemRandom().randint(0, 2**32 - 1) seed_tensor = torch.tensor(master_seed, dtype=torch.float32, device=torch.device("cuda")) else: seed_tensor = torch.tensor(0, dtype=torch.float32, device=torch.device("cuda")) torch.distributed.broadcast(seed_tensor, 0) master_seed = int(seed_tensor.item()) else: print_mlperf(key=mlperf_log.RUN_START) # random master seed, random.SystemRandom() uses /dev/urandom on Unix master_seed = random.SystemRandom().randint(0, 2**32 - 1) # actually use the random seed args.seed = master_seed # random number generator with seed set to master_seed random_number_generator = random.Random(master_seed) print_mlperf(key=mlperf_log.RUN_SET_RANDOM_SEED, value=master_seed) cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() output_dir = cfg.OUTPUT_DIR if output_dir: mkdir(output_dir) logger = setup_logger("maskrcnn_benchmark", output_dir, get_rank()) logger.info("Using {} GPUs".format(num_gpus)) logger.info(args) # generate worker seeds, one seed for every distributed worker worker_seeds = generate_seeds( random_number_generator, torch.distributed.get_world_size() if torch.distributed.is_initialized() else 1) # todo sharath what if CPU # broadcast seeds from rank=0 to other workers worker_seeds = broadcast_seeds(worker_seeds, device='cuda') # Setting worker seeds logger.info("Worker {}: Setting seed {}".format( args.local_rank, worker_seeds[args.local_rank])) torch.manual_seed(worker_seeds[args.local_rank]) logger.info("Collecting env info (might take some time)") logger.info("\n" + collect_env_info()) logger.info("Loaded configuration file {}".format(args.config_file)) with open(args.config_file, "r") as cf: config_str = "\n" + cf.read() logger.info(config_str) logger.info("Running with config:\n{}".format(cfg)) model = train(cfg, args.local_rank, args.distributed) print_mlperf(key=mlperf_log.RUN_FINAL)
def main(): parser = argparse.ArgumentParser( description="PyTorch Object Detection Training") parser.add_argument( "--config-file", default= "../configs/kidney/e2e_mask_rcnn_X_101_32x8d_FPN_1x_liver_using_pretrained_model.yaml", metavar="FILE", help="path to config file", type=str, ) parser.add_argument("--local_rank", type=int, default=0) parser.add_argument( "--skip-test", dest="skip_test", help="Do not test the final model", action="store_true", ) parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() num_gpus = int( os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 args.distributed = num_gpus > 1 if args.distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend="nccl", init_method="env://") cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) if cfg.OUTPUT_SUB_DIR: output_dir = os.path.join(cfg.OUTPUT_DIR, cfg.OUTPUT_SUB_DIR) else: now = time.localtime() time_dir_name = "%04d%02d%02d-%02d%02d%02d" % ( now.tm_year, now.tm_mon, now.tm_mday, now.tm_hour, now.tm_min, now.tm_sec) output_dir = os.path.join(cfg.OUTPUT_DIR, time_dir_name) cfg.merge_from_list(["OUTPUT_DIR", output_dir]) cfg.freeze() if output_dir: mkdir(output_dir) logger = setup_logger("maskrcnn_benchmark", output_dir, get_rank()) logger.info("Using {} GPUs".format(num_gpus)) logger.info(args) logger.info("Collecting env info (might take some time)") logger.info("\n" + collect_env_info()) logger.info("Loaded configuration file {}".format(args.config_file)) with open(args.config_file, "r") as cf: config_str = "\n" + cf.read() logger.info(config_str) logger.info("Running with config:\n{}".format(cfg)) model = train(cfg, args.local_rank, args.distributed)
def main(): torch.cuda.set_device(7) parser = argparse.ArgumentParser( description="PyTorch Object Detection Training") parser.add_argument( "--config-file", default= "/home/SelfDriving/maskrcnn/maskrcnn-benchmark/configs/e2e_faster_rcnn_R_50_C4_1x.yaml", metavar="FILE", help="path to config file", type=str, ) parser.add_argument("--local_rank", type=int, default=0) parser.add_argument( "--skip-test", dest="skip_test", help="Do not test the final model", action="store_true", ) parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() num_gpus = int( os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 args.distributed = num_gpus > 1 if args.distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend="nccl", init_method="env://") synchronize() cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() output_dir = cfg.OUTPUT_DIR if output_dir: mkdir(output_dir) logger = setup_logger("maskrcnn_benchmark", output_dir, get_rank()) logger.info("Using {} GPUs".format(num_gpus)) logger.info(args) logger.info("Collecting env info (might take some time)") logger.info("\n" + collect_env_info()) logger.info("Loaded configuration file {}".format(args.config_file)) with open(args.config_file, "r") as cf: config_str = "\n" + cf.read() logger.info(config_str) logger.info("Running with config:\n{}".format(cfg)) output_config_path = os.path.join(cfg.OUTPUT_DIR, 'config.yml') logger.info("Saving config into: {}".format(output_config_path)) # save overloaded model config in the output directory save_config(cfg, output_config_path) model = train(cfg, args.local_rank, args.distributed) if not args.skip_test: run_test(cfg, model, args.distributed)
def main(): args = parse_args() num_gpus = int( os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 distributed = num_gpus > 1 if distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend="nccl", init_method="env://") cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.merge_from_list(["MODEL.WEIGHT", args.weight]) output_dir = os.path.dirname(cfg.MODEL.WEIGHT) cfg.OUTPUT_DIR = output_dir if not os.path.exists(output_dir): os.makedirs(output_dir) cfg.freeze() save_dir = "" logger = setup_logger("maskrcnn_benchmark", save_dir, get_rank()) logger.info("Using {} GPUs".format(num_gpus)) logger.info(cfg) logger.info("Collecting env info (might take some time)") logger.info("\n" + collect_env_info()) model = build_detection_model(cfg) model.to(cfg.MODEL.DEVICE) checkpointer = DetectronCheckpointer(cfg, model, save_dir=cfg.MODEL.WEIGHT) _ = checkpointer.load(cfg.MODEL.WEIGHT, cfg.TRAIN.IGNORE_LIST) iou_types = ("bbox", ) if cfg.MODEL.MASK_ON: iou_types = iou_types + ("segm", ) output_folders = [None] * len(cfg.DATASETS.TEST) dataset_names = cfg.DATASETS.TEST if cfg.OUTPUT_DIR: for idx, dataset_name in enumerate(dataset_names): output_folder = os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name) mkdir(output_folder) output_folders[idx] = output_folder data_loaders_val = make_data_loader(cfg, is_train=False, is_distributed=distributed) # default `log_dir` is "runs" - we'll be more specific here # tb_writer = SummaryWriter('runs/6dvnet_test_3d_1') for output_folder, dataset_name, data_loader_val in zip( output_folders, dataset_names, data_loaders_val): # dataiter = iter(data_loader_val) # images, bbox, labels = dataiter.next() # create grid of images # img_grid = make_grid(images.tensors) # show images # matplotlib_imshow(img_grid, one_channel=False) # write to tensorboard # tb_writer.add_image('6dvnet_test_3d_1', img_grid) # # tb_writer.add_graph(model, images.tensors) # tb_writer.close() inference( model, data_loader_val, dataset_name=dataset_name, iou_types=iou_types, box_only=cfg.MODEL.RPN_ONLY, device=cfg.MODEL.DEVICE, expected_results=cfg.TEST.EXPECTED_RESULTS, expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL, output_folder=output_folder, cfg=cfg, ) synchronize()
def main(): # Build a parser for arguments parser = argparse.ArgumentParser(description="Action Prediction Training") parser.add_argument( "--config-file", default= "/home/SelfDriving/maskrcnn/maskrcnn-benchmark/configs/baseline.yaml", metavar="FILE", help="path to maskrcnn_benchmark config file", type=str, ) parser.add_argument( "--weight_decay", default=1e-4, help="Weight decay", ) parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) parser.add_argument("--initLR", help="Initial learning rate", default=0.0001) parser.add_argument( "--freeze", default=False, help="If freeze faster rcnn", ) parser.add_argument("--imageroot", type=str, help="Directory to the images", default="/data6/SRIP19_SelfDriving/bdd12k/data1/") parser.add_argument( "--gtroot", type=str, help="Directory to the groundtruth", default= "/data6/SRIP19_SelfDriving/bdd12k/annotations/12k_gt_train_5_actions.json" ) parser.add_argument( "--reasonroot", type=str, help="Directory to the explanations", default= "/data6/SRIP19_SelfDriving/bdd12k/annotations/train_reason_img.json") parser.add_argument("--imWidth", type=int, help="Crop to width", default=1280) parser.add_argument("--imHeight", type=int, help="Crop to height", default=720) parser.add_argument("--batch_size", type=int, help="Batch Size", default=1) parser.add_argument("--experiment", type=str, help="Give this experiment a name", default=str(datetime.datetime.now())) parser.add_argument( "--model_root", type=str, help="Directory to the trained model", default= "/data6/SRIP19_SelfDriving/bdd100k/trained_model/Outputs/model_final_apt.pth" ) parser.add_argument("--val", action='store_true', default=False, help='Validation or not') parser.add_argument("--num_epoch", default=20, help="The number of epoch for training", type=int) parser.add_argument("--from_checkpoint", default=False, help="If we need load weights from checkpoint.") parser.add_argument( "--checkpoint", default=".", help="The path to the checkpoint weights.", type=str, ) args = parser.parse_args() print(args) cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() if torch.cuda.is_available(): print("CUDA device is available.") # output directory outdir = cfg.OUTPUT_DIR print("Save path:", outdir) if outdir: mkdir(outdir) # logger = setup_logger("training", outdir) train(cfg, args)
def run_test(cfg, model, distributed): if distributed: model = model.module torch.cuda.empty_cache() # TODO check if it helps iou_types = ("bbox", ) if cfg.MODEL.MASK_ON: iou_types = iou_types + ("segm", ) if cfg.MODEL.KEYPOINT_ON: iou_types = iou_types + ("keypoints", ) # output_folders = [None] * len(cfg.DATASETS.TEST) # dataset_names = cfg.DATASETS.TEST dataset_names = cfg.DATASETS.NAS_VAL if not cfg.NAS.TRAIN_SINGLE_MODEL else cfg.DATASETS.TEST output_folders = [None] * len(dataset_names) if cfg.OUTPUT_DIR: for idx, dataset_name in enumerate(dataset_names): output_folder = os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name) mkdir(output_folder) output_folders[idx] = output_folder data_loaders_val = make_data_loader(cfg, is_train=False, is_distributed=distributed) if cfg.NAS.TRAIN_SINGLE_MODEL: if get_rank() == 0: print('==' * 20, 'Evaluating single model...', '==' * 20) for output_folder, dataset_name, data_loader_val in zip( output_folders, dataset_names, data_loaders_val): inference( model, data_loader_val, dataset_name=dataset_name, iou_types=iou_types, box_only=False if cfg.MODEL.RETINANET_ON else cfg.MODEL.RPN_ONLY, bbox_aug=cfg.TEST.BBOX_AUG.ENABLED, device=cfg.MODEL.DEVICE, expected_results=cfg.TEST.EXPECTED_RESULTS, expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL, output_folder=output_folder, c2d_json_path=cfg.MODEL.SEG_BRANCH.JSON_PATH, cfg=cfg, ) synchronize() if get_rank() == 0: if 'coco' in cfg.DATASETS.NAME.lower(): print('Evaluating panoptic results on COCO...') os.system( 'sh panoptic_scripts/bash_coco_val_evaluate.sh {} | tee pq_results' .format(cfg.OUTPUT_DIR)) elif not cfg.NAS.SKIP_NAS_TEST: if get_rank() == 0: print('==' * 10, 'Start NAS testing', '==' * 10) timer = Timer() timer.tic() searcher = PathPrioritySearch(cfg, base_dir='./nas_test') searcher.generate_fair_test( ) # load cache results and generate new model for test searcher.search(model, output_folders, dataset_names, distributed) searcher.save_topk() total_time = timer.toc() total_time_str = get_time_str(total_time) if get_rank() == 0: print('Finish NAS testing, total time:{}'.format(total_time_str)) os._exit(0) else: print('Skipping NAS testing...')
def main(): parser = argparse.ArgumentParser( description="PyTorch Object Detection Inference") parser.add_argument( "--config-file", default= "/private/home/fmassa/github/detectron.pytorch_v2/configs/e2e_faster_rcnn_R_50_C4_1x_caffe2.yaml", metavar="FILE", help="path to config file", ) parser.add_argument("--local_rank", type=int, default=0) parser.add_argument( "--ckpt", help= "The path to the checkpoint for test, default is the latest checkpoint.", default=None, ) parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() num_gpus = int( os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 distributed = num_gpus > 1 if distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend="nccl", init_method="env://") synchronize() cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() save_dir = "" logger = setup_logger("maskrcnn_benchmark", save_dir, get_rank()) logger.info("Using {} GPUs".format(num_gpus)) logger.info(cfg) logger.info("Collecting env info (might take some time)") logger.info("\n" + collect_env_info()) model = build_detection_model(cfg) model.to(cfg.MODEL.DEVICE) # Initialize mixed-precision if necessary use_mixed_precision = cfg.DTYPE == 'float16' amp_handle = amp.init(enabled=use_mixed_precision, verbose=cfg.AMP_VERBOSE) output_dir = cfg.OUTPUT_DIR checkpointer = DetectronCheckpointer(cfg, model, save_dir=output_dir) ckpt = cfg.MODEL.WEIGHT if args.ckpt is None else args.ckpt _ = checkpointer.load(ckpt, use_latest=args.ckpt is None) iou_types = ("bbox", ) if cfg.MODEL.MASK_ON: iou_types = iou_types + ("segm", ) if cfg.MODEL.KEYPOINT_ON: iou_types = iou_types + ("keypoints", ) output_folders = [None] * len(cfg.DATASETS.TEST) dataset_names = cfg.DATASETS.TEST if cfg.OUTPUT_DIR: for idx, dataset_name in enumerate(dataset_names): output_folder = os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name) mkdir(output_folder) output_folders[idx] = output_folder data_loaders_val = make_data_loader(cfg, is_train=False, is_distributed=distributed) for output_folder, dataset_name, data_loader_val in zip( output_folders, dataset_names, data_loaders_val): inference( model, data_loader_val, dataset_name=dataset_name, iou_types=iou_types, box_only=False if cfg.MODEL.RETINANET_ON else cfg.MODEL.RPN_ONLY, device=cfg.MODEL.DEVICE, expected_results=cfg.TEST.EXPECTED_RESULTS, expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL, output_folder=output_folder, ) synchronize()
def main(): parser = argparse.ArgumentParser( description="PyTorch Object Detection Inference") parser.add_argument( "--config-file", default= "/private/home/fmassa/github/detectron.pytorch_v2/configs/e2e_faster_rcnn_R_50_C4_1x_caffe2.yaml", metavar="FILE", help="path to config file", ) parser.add_argument( "--json-file", default="", metavar="FILE", help="path to prediction bbox json file", ) # parser.add_argument("--local_rank", type=int, default=0) parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() # num_gpus = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 # distributed = num_gpus > 1 # if distributed: # torch.cuda.set_device(args.local_rank) # torch.distributed.init_process_group( # backend="nccl", init_method="env://" # ) # synchronize() cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() save_dir = "" logger = setup_logger("maskrcnn_benchmark", save_dir, get_rank()) # logger.info("Using {} GPUs".format(num_gpus)) # logger.info(cfg) logger.info("Collecting env info (might take some time)") logger.info("\n" + collect_env_info()) # model = build_detection_model(cfg) # model.to(cfg.MODEL.DEVICE) output_dir = cfg.OUTPUT_DIR # checkpointer = DetectronCheckpointer(cfg, model, save_dir=output_dir) # _ = checkpointer.load(cfg.MODEL.WEIGHT) iou_types = ("bbox", ) # if cfg.MODEL.MASK_ON: # iou_types = iou_types + ("segm",) # if cfg.MODEL.KEYPOINT_ON: # iou_types = iou_types + ("keypoints",) output_folders = [None] * len(cfg.DATASETS.TEST) dataset_names = cfg.DATASETS.TEST if cfg.OUTPUT_DIR: for idx, dataset_name in enumerate(dataset_names): output_folder = os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name) mkdir(output_folder) output_folders[idx] = output_folder data_loaders_val = make_data_loader(cfg, is_train=False, is_distributed=False) for output_folder, dataset_name, data_loader_val in zip( output_folders, dataset_names, data_loaders_val): # inference( # model, # data_loader_val, # dataset_name=dataset_name, # iou_types=iou_types, # box_only=False if cfg.MODEL.FCOS_ON or cfg.MODEL.RETINANET_ON else cfg.MODEL.RPN_ONLY, # device=cfg.MODEL.DEVICE, # expected_results=cfg.TEST.EXPECTED_RESULTS, # expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL, # output_folder=output_folder, # ) # extra_args = dict( # box_only=False, # iou_types=iou_types, # expected_results=cfg.TEST.EXPECTED_RESULTS, # expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL, # ) dataset = data_loader_val.dataset # evaluate(dataset=dataset, # predictions=predictions, # output_folder=output_folder, # only_human=True, # **extra_args) do_coco_json_evaluation( dataset=dataset, json_file=args.json_file, box_only=False, output_folder=output_folder, iou_types=iou_types, expected_results=cfg.TEST.EXPECTED_RESULTS, expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL) synchronize()
def main(): # Build a parser for arguments parser = argparse.ArgumentParser(description="Action Prediction Training") parser.add_argument( "--config-file", default= "/home/selfdriving/maskrcnn-benchmark/configs/e2e_faster_rcnn_I3D_resnet101.yaml", metavar="FILE", help="path to maskrcnn_benchmark config file", type=str, ) parser.add_argument( "--weight_decay", default=1e-4, help="Weight decay", ) parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) parser.add_argument("--initLR", help="Initial learning rate", default=0.0001) parser.add_argument( "--freeze", default=False, help="If freeze faster rcnn", ) parser.add_argument("--featureRoot", type=str, help="Directory to the features", default="/home/selfdriving/I3D/features_val/") parser.add_argument( "--bboxRoot", type=str, help="Directory to the bbox groundtruth", default= "/home/selfdriving/mrcnn/output/inference/bdd100k_val/last_preds.pth") parser.add_argument( "--actionRoot", type=str, help="Directory to the action label groundtruth", default="/home/selfdriving/I3D/data/4action_reason.json") parser.add_argument("--batch_size", type=int, help="Batch Size", default=1) parser.add_argument("--experiment", type=str, help="Give this experiment a name", default=str(datetime.datetime.now())) parser.add_argument( "--model_root", type=str, help="Directory to the trained model", default="/home/selfdriving/mrcnn/video_output/model3/net_Final.pth") parser.add_argument("--val", action='store_true', default=False, help='Validation or not') parser.add_argument("--num_epoch", default=20, help="The number of epoch for training", type=int) parser.add_argument("--from_checkpoint", default=False, help="If we need load weights from checkpoint.") parser.add_argument( "--checkpoint", default=".", help="The path to the checkpoint weights.", type=str, ) args = parser.parse_args() print(args) cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() if torch.cuda.is_available(): print("CUDA device is available.") # output directory outdir = cfg.OUTPUT_DIR print("Save path:", outdir) if outdir: mkdir(outdir) # logger = setup_logger("training", outdir) test(cfg, args)
def main(): mlperf_log.ROOT_DIR_MASKRCNN = os.path.dirname(os.path.abspath(__file__)) # mlperf_log.LOGGER.propagate = False parser = argparse.ArgumentParser( description="PyTorch Object Detection Training") parser.add_argument( "--config-file", default= "/private/home/fmassa/github/detectron.pytorch/configs/rpn_r50.py", metavar="FILE", help="path to config file", ) parser.add_argument("--local_rank", type=int, default=0) parser.add_argument('--skip-test', dest='skip_test', help='Do not test the model', action='store_true') parser.add_argument("--fp16", action="store_true", help="Enable multi-precision training") parser.add_argument("--min_bbox_map", type=float, default=0.377, help="Target BBOX MAP") parser.add_argument("--min_mask_map", type=float, default=0.339, help="Target SEGM/MASK MAP") parser.add_argument("--seed", type=int, default=1, help="Seed") parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() args.distributed = (int(os.environ["WORLD_SIZE"]) > 1 if "WORLD_SIZE" in os.environ else False) if args.distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend="nccl", init_method="env://") # to synchronize start of time torch.distributed.broadcast(torch.tensor([1], device="cuda"), 0) torch.cuda.synchronize() if torch.distributed.get_rank() == 0: # Setting logging file parameters for compliance logging os.environ["COMPLIANCE_FILE"] = '/MASKRCNN_complVv0.5.0_' + str( datetime.datetime.now()) mlperf_log.LOG_FILE = os.getenv("COMPLIANCE_FILE") mlperf_log._FILE_HANDLER = logging.FileHandler(mlperf_log.LOG_FILE) mlperf_log._FILE_HANDLER.setLevel(logging.DEBUG) mlperf_log.LOGGER.addHandler(mlperf_log._FILE_HANDLER) print_mlperf(key=mlperf_log.RUN_START) # Setting seed seed_tensor = torch.tensor(0, dtype=torch.float32, device=torch.device("cuda")) if torch.distributed.get_rank() == 0: # seed = int(time.time()) # random master seed, random.SystemRandom() uses /dev/urandom on Unix master_seed = random.SystemRandom().randint(0, 2**32 - 1) seed_tensor = torch.tensor(master_seed, dtype=torch.float32, device=torch.device("cuda")) torch.distributed.broadcast(seed_tensor, 0) master_seed = int(seed_tensor.item()) else: # Setting logging file parameters for compliance logging os.environ["COMPLIANCE_FILE"] = '/MASKRCNN_complVv0.5.0_' + str( datetime.datetime.now()) mlperf_log.LOG_FILE = os.getenv("COMPLIANCE_FILE") mlperf_log._FILE_HANDLER = logging.FileHandler(mlperf_log.LOG_FILE) mlperf_log._FILE_HANDLER.setLevel(logging.DEBUG) mlperf_log.LOGGER.addHandler(mlperf_log._FILE_HANDLER) print_mlperf(key=mlperf_log.RUN_START) # random master seed, random.SystemRandom() uses /dev/urandom on Unix master_seed = random.SystemRandom().randint(0, 2**32 - 1) args.seed = master_seed # random number generator with seed set to master_seed random_number_generator = random.Random(master_seed) print_mlperf(key=mlperf_log.RUN_SET_RANDOM_SEED, value=master_seed) cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) if args.skip_test: cfg.DO_ONLINE_MAP_EVAL = False cfg.freeze() output_dir = cfg.OUTPUT_DIR if output_dir: mkdir(output_dir) #logger = setup_logger("maskrcnn_benchmark", output_dir, args.local_rank) logger = setup_logger("maskrcnn_benchmark", None, args.local_rank) logger.info(args) # generate worker seeds, one seed for every distributed worker worker_seeds = generate_seeds( random_number_generator, torch.distributed.get_world_size() if torch.distributed.is_initialized() else 1) # todo sharath what if CPU # broadcast seeds from rank=0 to other workers worker_seeds = broadcast_seeds(worker_seeds, device='cuda') # Setting worker seeds logger.info("Worker {}: Setting seed {}".format( args.local_rank, worker_seeds[args.local_rank])) torch.manual_seed(worker_seeds[args.local_rank]) logger.info("Loaded configuration file {}".format(args.config_file)) with open(args.config_file, "r") as cf: config_str = "\n" + cf.read() logger.info(config_str) logger.info("Running with config:\n{}".format(cfg)) model = train(cfg, random_number_generator, args.local_rank, args.distributed, args, args.fp16) print_mlperf(key=mlperf_log.RUN_FINAL)
def main(): parser = argparse.ArgumentParser( description="PyTorch Object Detection Inference") parser.add_argument( "--config-file", default= "/private/home/fmassa/github/detectron.pytorch_v2/configs/e2e_faster_rcnn_R_50_C4_1x_caffe2.yaml", metavar="FILE", help="path to config file", ) parser.add_argument("--local_rank", type=int, default=0) parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() num_gpus = int( os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 distributed = num_gpus > 1 if distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend="nccl", init_method="env://") cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() save_dir = "" logger = setup_logger("maskrcnn_benchmark", save_dir, get_rank()) logger.info("Using {} GPUs".format(num_gpus)) logger.info(cfg) logger.info("Collecting env info (might take some time)") logger.info("\n" + collect_env_info()) model = build_detection_model(cfg) model.to(cfg.MODEL.DEVICE) output_dir = cfg.OUTPUT_DIR checkpointer = DetectronCheckpointer(cfg, model, save_dir=output_dir) _ = checkpointer.load(cfg.MODEL.WEIGHT) iou_types = ("bbox", ) if cfg.MODEL.MASK_ON: iou_types = iou_types + ("segm", ) output_folders = [None] * len(cfg.DATASETS.TEST) dataset_names = cfg.DATASETS.TEST if cfg.OUTPUT_DIR: for idx, dataset_name in enumerate(dataset_names): output_folder = os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name) mkdir(output_folder) output_folders[idx] = output_folder if cfg.TEST.MULTI_SCALE: data_loaders_val = [] for min_size_test, max_size_test in cfg.TEST.MULTI_SIZES: cfg.defrost() cfg.INPUT.MIN_SIZE_TEST = min_size_test cfg.INPUT.MAX_SIZE_TEST = max_size_test cfg.freeze() data_loaders_val.extend( make_data_loader(cfg, is_train=False, is_distributed=distributed)) output_folders = output_folders * len(cfg.TEST.MULTI_SIZES) dataset_names = dataset_names * len(cfg.TEST.MULTI_SIZES) else: data_loaders_val = make_data_loader(cfg, is_train=False, is_distributed=distributed) predictions = [] for output_folder, dataset_name, data_loader_val in zip( output_folders, dataset_names, data_loaders_val): prediction = inference( model, data_loader_val, dataset_name=dataset_name, iou_types=iou_types, box_only=cfg.MODEL.RPN_ONLY, device=cfg.MODEL.DEVICE, expected_results=cfg.TEST.EXPECTED_RESULTS, expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL, output_folder=output_folder, ) synchronize() predictions.append(prediction) if cfg.TEST.MULTI_SCALE: logger.info("Processing multi-scale bbox voting....") voted_predictions = voting( predictions, args.local_rank) # box_voting(predictions, args.local_rank) torch.save(voted_predictions, os.path.join(output_folders[0], 'predictions.pth')) extra_args = dict( box_only=cfg.MODEL.RPN_ONLY, iou_types=iou_types, expected_results=cfg.TEST.EXPECTED_RESULTS, expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL, ) evaluate(dataset=data_loaders_val[0].dataset, predictions=voted_predictions, output_folder=output_folders[0], **extra_args) else: for prediction, output_folder, dataset_name, data_loader_val in zip( predictions, output_folders, dataset_names, data_loaders_val): extra_args = dict( box_only=cfg.MODEL.RPN_ONLY, iou_types=iou_types, expected_results=cfg.TEST.EXPECTED_RESULTS, expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL, ) evaluate(dataset=data_loader_val.dataset, predictions=prediction, output_folder=output_folder, **extra_args) return 0
def main(): parser = argparse.ArgumentParser( description="PyTorch Object Detection Training") # 配置文件参数 parser.add_argument( "--config-file", default="", metavar="FILE", # 用于help 信息输出 help="path to config file", type=str, ) # 当前进程使用的GPU标号 parser.add_argument("--local_rank", type=int, default=0) # 还不清楚这是干嘛的 parser.add_argument( "--skip-test", dest="skip_test", help="Do not test the final model", action= "store_true", # 指定action='store_true'或者‘store_False’之后,在运行程序添加参数时直接输入变量名,可以省略对应的默认值True或者False ) # 更改config文件的信息 # 例:opts=['SOLVER.IMS_PER_BATCH', '2', 'SOLVER.BASE_LR', '0.0025'] parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() num_gpus = int( os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 args.distributed = num_gpus > 1 # 多GPU训练 if args.distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend="nccl", init_method="env://") synchronize() # 把config文件融入到cfg文件 cfg.merge_from_file(args.config_file) # 把opts列表融入到cfg文件 cfg.merge_from_list(args.opts) # 使cfg及其所有子节点不可变 cfg.freeze() # 训练结果存放的目录 # _C.OUTPUT_DIR = "." output_dir = cfg.OUTPUT_DIR if output_dir: mkdir(output_dir) # 输出一些信息 logger = setup_logger("maskrcnn_benchmark", output_dir, get_rank()) logger.info("Using {} GPUs".format(num_gpus)) logger.info(args) logger.info("Collecting env info (might take some time)") logger.info("\n" + collect_env_info()) logger.info("Loaded configuration file {}".format(args.config_file)) with open(args.config_file, "r") as cf: config_str = "\n" + cf.read() logger.info(config_str) logger.info("Running with config:\n{}".format(cfg)) # 输出配置文件目录 output_config_path = os.path.join(cfg.OUTPUT_DIR, 'config.yml') logger.info("Saving config into: {}".format(output_config_path)) # save overloaded model config in the output directory save_config(cfg, output_config_path) model = train(cfg, args.local_rank, args.distributed) if not args.skip_test: run_test(cfg, model, args.distributed)
def main(): parser = argparse.ArgumentParser( description="PyTorch Object Detection Inference") parser.add_argument( "--config-file", default= "/private/home/fmassa/github/detectron.pytorch_v2/configs/e2e_faster_rcnn_R_50_C4_1x_caffe2.yaml", metavar="FILE", help="path to config file", ) parser.add_argument("--local_rank", type=int, default=0) parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() num_gpus = int( os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 distributed = num_gpus > 1 if distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend="nccl", init_method="env://") synchronize() cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() save_dir = "." logger = setup_logger("maskrcnn_benchmark", save_dir, get_rank(), filename='test_all_BDD_ckpts_log.txt') logger.info("Using {} GPUs".format(num_gpus)) logger.info(cfg) logger.info("Collecting env info (might take some time)") logger.info("\n" + collect_env_info()) # initialize model model = build_detection_model(cfg, save_features=False) model.to(cfg.MODEL.DEVICE) output_dir = cfg.OUTPUT_DIR checkpointer = DetectronCheckpointer(cfg, model, save_dir=output_dir) # initialize test type, output folders and dataloader iou_types = ("bbox", ) if cfg.MODEL.MASK_ON: iou_types = iou_types + ("segm", ) if cfg.MODEL.KEYPOINT_ON: iou_types = iou_types + ("keypoints", ) output_folders = [None] * len(cfg.DATASETS.TEST) dataset_names = cfg.DATASETS.TEST if cfg.OUTPUT_DIR: for idx, dataset_name in enumerate(dataset_names): output_folder = os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name) mkdir(output_folder) output_folders[idx] = output_folder data_loaders_val = make_data_loader(cfg, is_train=False, is_distributed=distributed) # Testing on multiple checkpoints if the weight is a directory instead of a .pth file if cfg.MODEL.WEIGHT.endswith('.pth') or cfg.MODEL.WEIGHT.endswith('.pkl'): all_ckpy_names = [cfg.MODEL.WEIGHT] else: all_ckpy_names = sorted( glob.glob(os.path.join(cfg.MODEL.WEIGHT, '*.pth'))) logger.info("Testing on checkpoints:", all_ckpy_names) for ckpt_name in all_ckpy_names: logger.info("Testing {}".format(ckpt_name)) _ = checkpointer.load(ckpt_name) #cfg.MODEL.WEIGHT) for output_folder, dataset_name, data_loader_val in zip( output_folders, dataset_names, data_loaders_val): # if the inference is done, only do the evaluation # if os.path.isfile(os.path.join(output_folder, "predictions.pth")): # logger.info("Inference was done, only do evaluation!") # predictions = torch.load(os.path.join(output_folder, "predictions.pth")) # extra_args = dict( # box_only=False if cfg.MODEL.RETINANET_ON else cfg.MODEL.RPN_ONLY, # iou_types=iou_types, # expected_results=cfg.TEST.EXPECTED_RESULTS, # expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL, # ) # evaluate(dataset=data_loader_val.dataset, # predictions=predictions, # output_folder=output_folder, # **extra_args) # else: # logger.info("No inference was done, run inference first") inference( model, data_loader_val, dataset_name=dataset_name, iou_types=iou_types, box_only=False if cfg.MODEL.RETINANET_ON else cfg.MODEL.RPN_ONLY, device=cfg.MODEL.DEVICE, expected_results=cfg.TEST.EXPECTED_RESULTS, expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL, output_folder=output_folder, convert_pred_coco2cityscapes=cfg.DATASETS.CONVERT, ) synchronize()
def main(): # setting default class_num = 21 # change numbers of class batch_size = 2 # change training batch size save_period = 50 # each 5000 iterations save and test once max_iteration = 400000 # train how much iterations lr_reduce_step = (300000, 340000) # reduce learning rate at 300000 and 340000 iterations save_path = 'checkpoints/test' # where to save the model (ex. modify checkpoint/XXXX) train_mode = 'kd' # choose training mode (teacher/student/kd) parser = argparse.ArgumentParser(description="PyTorch Object Detection Training") parser.add_argument( "--teacher-config-file", default="../configs/centermask/centermask_V_19_eSE_FPN_ms_3x.yaml", metavar="FILE", help="path to config file", type=str, ) parser.add_argument( "--student-config-file", default="../configs/centermask/centermask_V_19_eSE_FPN_lite_res600_ms_bs16_4x.yaml", metavar="FILE", help="path to config file", type=str, ) parser.add_argument("--local_rank", type=int, default=0) parser.add_argument( "--skip-test", dest="skip_test", help="Do not test the final model", action="store_true", ) parser.add_argument( "--opts", help="Modify config options using the command-line", default=['MODEL.FCOS.NUM_CLASSES', class_num, 'SOLVER.CHECKPOINT_PERIOD', save_period, 'SOLVER.TEST_PERIOD', save_period, 'SOLVER.IMS_PER_BATCH', batch_size, 'SOLVER.MAX_ITER', max_iteration, 'SOLVER.STEPS', lr_reduce_step, 'OUTPUT_DIR', save_path], nargs=argparse.REMAINDER, ) # setting kd loss if train_mode == 'kd': parser.add_argument('--loss_head', default=True) parser.add_argument('--loss_correlation', default=True) parser.add_argument('--loss_featuremap', default=False) else: # always False parser.add_argument('--loss_head', default=False) parser.add_argument('--loss_correlation', default=False) parser.add_argument('--loss_featuremap', default=False) global args args = parser.parse_args() num_gpus = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 args.distributed = num_gpus > 1 if args.distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group( backend="nccl", init_method="env://" ) synchronize() t_cfg = copy.deepcopy(cfg) cfg.merge_from_file(args.student_config_file) cfg.merge_from_list(args.opts) cfg.freeze() t_cfg.merge_from_file(args.teacher_config_file) t_cfg.merge_from_list(args.opts) t_cfg.freeze() if train_mode == 'teacher': total_cfg = [t_cfg] elif train_mode == 'student': total_cfg = [cfg] else: total_cfg = [cfg, t_cfg] output_dir = total_cfg[0].OUTPUT_DIR if output_dir: mkdir(output_dir) logger = setup_logger("maskrcnn_benchmark", output_dir, get_rank()) logger.info("Using {} GPUs".format(num_gpus)) logger.info(args) logger.info("Collecting env info (might take some time)") logger.info("\n" + collect_env_info()) if train_mode == 'teacher': logger.info("Loaded configuration file {}".format(args.teacher_config_file)) else: logger.info("Loaded configuration file {}".format(args.student_config_file)) with open(args.student_config_file, "r") as cf: config_str = "\n" + cf.read() logger.info(config_str) logger.info("Running with config:\n{}".format(total_cfg[0])) model = train(total_cfg, args.local_rank, args.distributed) if not args.skip_test: run_test(total_cfg[0], model, args.distributed)
def main(): parser = argparse.ArgumentParser(description="PyTorch Object Detection Training") parser.add_argument( "--config-file", default="data/occlusion_net_train.yaml", metavar="FILE", help="path to config file", type=str, ) parser.add_argument("--local_rank", type=int, default=0) parser.add_argument( "--cometml-tag", dest="cometml_tag", default="occlusion-net", ) parser.add_argument( "--skip-test", dest="skip_test", help="Do not test the final model", action="store_true", ) parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() if USE_COMETML == True: experiment.add_tag(args.cometml_tag) num_gpus = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 args.distributed = num_gpus > 1 if args.distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group( backend="nccl", init_method="env://" ) synchronize() cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() output_dir = cfg.OUTPUT_DIR if output_dir: mkdir(output_dir) logger = setup_logger("maskrcnn_benchmark", output_dir, get_rank()) logger.info("Using {} GPUs".format(num_gpus)) logger.info(args) logger.info("Collecting env info (might take some time)") logger.info("\n" + collect_env_info()) logger.info("Loaded configuration file {}".format(args.config_file)) with open(args.config_file, "r") as cf: config_str = "\n" + cf.read() logger.info(config_str) logger.info("Running with config:\n{}".format(cfg)) model = train(cfg, args.local_rank, args.distributed) if not args.skip_test: run_test(cfg, model, args.distributed)
def main(): # apply_prior prior_mask # 0 - - # 1 Y - # 2 - Y # 3 Y Y parser = argparse.ArgumentParser( description="PyTorch Object Detection Inference") parser.add_argument( "--config-file", default="", metavar="FILE", help="path to config file", ) parser.add_argument("--local_rank", type=int, default=0) parser.add_argument( "--ckpt", help= "The path to the checkpoint for test, default is the latest checkpoint.", default=None, ) parser.add_argument('--num_iteration', dest='num_iteration', help='Specify which weight to load', default=-1, type=int) parser.add_argument('--object_thres', dest='object_thres', help='Object threshold', default=0.4, type=float) # used to be 0.4 or 0.05 parser.add_argument('--human_thres', dest='human_thres', help='Human threshold', default=0.6, type=float) parser.add_argument('--prior_flag', dest='prior_flag', help='whether use prior_flag', default=1, type=int) parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() num_gpus = int( os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 distributed = num_gpus > 1 and torch.cuda.is_available() if distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend="nccl", init_method="env://") synchronize() print('prior flag: {}'.format(args.prior_flag)) ROOT_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) args.config_file = os.path.join(ROOT_DIR, args.config_file) cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() save_dir = "" logger = setup_logger("DRG.inference", save_dir, get_rank()) logger.info("Using {} GPUs".format(num_gpus)) logger.info(cfg) logger.info("Collecting env info (might take some time)") logger.info("\n" + collect_env_info()) model = build_detection_model(cfg) device = torch.device( "cuda") if torch.cuda.is_available() else torch.device("cpu") model.to(device) # Initialize mixed-precision if necessary use_mixed_precision = cfg.DTYPE == 'float16' amp_handle = amp.init(enabled=use_mixed_precision, verbose=cfg.AMP_VERBOSE) output_dir = cfg.OUTPUT_DIR checkpointer = DetectronCheckpointer(cfg, model, save_dir=output_dir) if args.num_iteration != -1: args.ckpt = os.path.join(cfg.OUTPUT_DIR, 'model_%07d.pth' % args.num_iteration) ckpt = cfg.MODEL.WEIGHT if args.ckpt is None else args.ckpt logger.info("Testing checkpoint {}".format(ckpt)) _ = checkpointer.load(ckpt, use_latest=args.ckpt is None) output_folders = [None] * len(cfg.DATASETS.TEST) dataset_names = cfg.DATASETS.TEST if cfg.OUTPUT_DIR: for idx, dataset_name in enumerate(dataset_names): if args.num_iteration != -1: output_folder = os.path.join(cfg.OUTPUT_DIR, "inference_sp", dataset_name, "model_%07d" % args.num_iteration) else: output_folder = os.path.join(cfg.OUTPUT_DIR, "inference_sp", dataset_name) mkdir(output_folder) output_folders[idx] = output_folder opt = {} opt['word_dim'] = 300 opt['use_thres_dic'] = 1 for output_folder, dataset_name in zip(output_folders, dataset_names): data = DatasetCatalog.get(dataset_name) data_args = data["args"] test_detection = pickle.load(open(data_args['test_detection_file'], "rb"), encoding='latin1') word_embeddings = pickle.load(open(data_args['word_embedding_file'], "rb"), encoding='latin1') opt['thres_dic'] = pickle.load(open(data_args['threshold_dic'], "rb"), encoding='latin1') output_file = os.path.join(output_folder, 'detection.pkl') # hico_folder = os.path.join(output_folder, 'HICO') output_map_folder = os.path.join(output_folder, 'map') logger.info("Output will be saved in {}".format(output_file)) logger.info("Start evaluation on {} dataset.".format(dataset_name)) run_test(model, dataset_name=dataset_name, test_detection=test_detection, word_embeddings=word_embeddings, output_file=output_file, object_thres=args.object_thres, human_thres=args.human_thres, device=device, cfg=cfg, opt=opt) # Generate_HICO_detection(output_file, hico_folder) compute_hico_map(output_map_folder, output_file, 'test')
def main(): parser = argparse.ArgumentParser( description="PyTorch Object Detection Training") parser.add_argument( "--config-file", default="", metavar="FILE", help="path to config file", type=str, ) parser.add_argument("--local_rank", type=int, default=0) parser.add_argument( "--skip-test", dest="skip_test", help="Do not test the final model", action="store_true", ) parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) parser.add_argument( "--build-model", default="", metavar="FILE", help="path to NAS model build file", type=str, ) args = parser.parse_args() num_gpus = int( os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 args.distributed = num_gpus > 1 if args.distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend="nccl", init_method="env://") synchronize() cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() output_dir = cfg.OUTPUT_DIR if output_dir: mkdir(output_dir) logger = setup_logger("maskrcnn_benchmark", output_dir, get_rank()) logger.info("Using {} GPUs".format(num_gpus)) logger.info(args) logger.info("Collecting env info (might take some time)") logger.info("\n" + collect_env_info()) logger.info("Loaded configuration file {}".format(args.config_file)) with open(args.config_file, "r") as cf: config_str = "\n" + cf.read() logger.info(config_str) logger.info("Running with config:\n{}".format(cfg)) output_config_path = os.path.join(cfg.OUTPUT_DIR, 'config.yml') logger.info("Saving config into: {}".format(output_config_path)) # save overloaded model config in the output directory save_config(cfg, output_config_path) if cfg.NAS.TRAIN_SINGLE_MODEL: assert len( args.build_model) != 0, 'args.build_model should be provided' model_config = json.load(open(args.build_model, 'r')) if isinstance(model_config, list): assert len(model_config) == 1 model_config = model_config[0] print('Training single model:', model_config) model = train(cfg, args.local_rank, args.distributed, model_config) else: model = train(cfg, args.local_rank, args.distributed) if not args.skip_test: run_test(cfg, model, args.distributed)
def main(): parser = argparse.ArgumentParser( description="PyTorch Object Detection Training") parser.add_argument( "--config-file", default="", metavar="FILE", help="path to config file", type=str, ) parser.add_argument("--local_rank", type=int, default=os.getenv('LOCAL_RANK', 0)) parser.add_argument("--max_steps", type=int, default=0, help="Override number of training steps in the config") parser.add_argument( "--skip-test", dest="skip_test", help="Do not test the final model", action="store_true", ) parser.add_argument("--fp16", help="Mixed precision training", action="store_true") parser.add_argument("--amp", help="Mixed precision training", action="store_true") parser.add_argument('--skip_checkpoint', default=False, action='store_true', help="Whether to save checkpoints") parser.add_argument( "--json-summary", help="Out file for DLLogger", default="dllogger.out", type=str, ) parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() args.fp16 = args.fp16 or args.amp num_gpus = int( os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 args.distributed = num_gpus > 1 if args.distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend="nccl", init_method="env://") synchronize() cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) # Redundant option - Override config parameter with command line input if args.max_steps > 0: cfg.SOLVER.MAX_ITER = args.max_steps if args.skip_checkpoint: cfg.SAVE_CHECKPOINT = False cfg.freeze() output_dir = cfg.OUTPUT_DIR if output_dir: mkdir(output_dir) logger = setup_logger("maskrcnn_benchmark", output_dir, get_rank()) if is_main_process(): dllogger.init(backends=[ dllogger.JSONStreamBackend(verbosity=dllogger.Verbosity.VERBOSE, filename=args.json_summary), dllogger.StdOutBackend(verbosity=dllogger.Verbosity.VERBOSE, step_format=format_step) ]) else: dllogger.init(backends=[]) dllogger.log(step="PARAMETER", data={"gpu_count": num_gpus}) # dllogger.log(step="PARAMETER", data={"environment_info": collect_env_info()}) dllogger.log(step="PARAMETER", data={"config_file": args.config_file}) with open(args.config_file, "r") as cf: config_str = "\n" + cf.read() dllogger.log(step="PARAMETER", data={"config": cfg}) if args.fp16: fp16 = True else: fp16 = False model, iters_per_epoch = train(cfg, args.local_rank, args.distributed, fp16, dllogger) if not args.skip_test: if not cfg.PER_EPOCH_EVAL: test_model(cfg, model, args.distributed, iters_per_epoch, dllogger)
_ = checkpointer.load_pth_file(args.load_pth) else: _ = checkpointer.load(cfg.MODEL.WEIGHT) iou_types = ("bbox", ) if cfg.MODEL.MASK_ON: iou_types = iou_types + ("segm", ) if cfg.MODEL.KEYPOINT_ON: iou_types = iou_types + ("keypoints", ) output_folders = [None] * len(cfg.DATASETS.TEST) dataset_names = cfg.DATASETS.TEST if cfg.OUTPUT_DIR: for idx, dataset_name in enumerate(dataset_names): output_folder = os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name) mkdir(output_folder) output_folders[idx] = output_folder data_loaders_val = make_data_loader(cfg, is_train=False, is_distributed=distributed) for output_folder, dataset_name, data_loader_val in zip( output_folders, dataset_names, data_loaders_val): inference( model, data_loader_val, dataset_name=dataset_name, iou_types=iou_types, box_only=False if cfg.MODEL.RETINANET_ON else cfg.MODEL.RPN_ONLY, device=cfg.MODEL.DEVICE, expected_results=cfg.TEST.EXPECTED_RESULTS, expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
def train_maskscoring_rcnn(config_file): import sys sys.path.append('./detection_model/maskscoring_rcnn') # Set up custom environment before nearly anything else is imported # NOTE: this should be the first import (no not reorder) import argparse import os os.environ['CUDA_VISIBLE_DEVICES'] = '0,1,2' import torch #from maskrcnn_benchmark.config import cfg from maskrcnn_benchmark.data import make_data_loader from maskrcnn_benchmark.solver import make_lr_scheduler from maskrcnn_benchmark.solver import make_optimizer from maskrcnn_benchmark.engine.inference import inference from maskrcnn_benchmark.engine.trainer import do_train from maskrcnn_benchmark.modeling.detector import build_detection_model from maskrcnn_benchmark.utils.checkpoint import DetectronCheckpointer from maskrcnn_benchmark.utils.collect_env import collect_env_info from maskrcnn_benchmark.utils.comm import synchronize, get_rank from maskrcnn_benchmark.utils.imports import import_file from maskrcnn_benchmark.utils.logger import setup_logger from maskrcnn_benchmark.utils.miscellaneous import mkdir from yacs.config import CfgNode as CN global total_iter total_iter = 0 def read_config_file(config_file): """ read config information form yaml file """ f = open(config_file) opt = CN.load_cfg(f) return opt opt = read_config_file(config_file) def train(cfg, local_rank, distributed): model = build_detection_model(cfg) device = torch.device(cfg.MODEL.DEVICE) model.to(device) optimizer = make_optimizer(cfg, model) scheduler = make_lr_scheduler(cfg, optimizer) if distributed: model = torch.nn.parallel.deprecated.DistributedDataParallel( model, device_ids=[local_rank], output_device=local_rank, # this should be removed if we update BatchNorm stats broadcast_buffers=False, ) arguments = {} arguments["iteration"] = 0 output_dir = cfg.OUTPUT_DIR save_to_disk = get_rank() == 0 checkpointer = DetectronCheckpointer(cfg, model, optimizer, scheduler, output_dir, save_to_disk) extra_checkpoint_data = checkpointer.load(cfg.MODEL.WEIGHT) arguments.update(extra_checkpoint_data) data_loader = make_data_loader( cfg, is_train=True, is_distributed=distributed, start_iter=arguments["iteration"], ) checkpoint_period = cfg.SOLVER.CHECKPOINT_PERIOD do_train( model, data_loader, optimizer, scheduler, checkpointer, device, checkpoint_period, arguments, ) return model def test(cfg, model, distributed): if distributed: model = model.module torch.cuda.empty_cache() # TODO check if it helps iou_types = ("bbox", ) if cfg.MODEL.MASK_ON: iou_types = iou_types + ("segm", ) output_folders = [None] * len(cfg.DATASETS.TEST) if cfg.OUTPUT_DIR: dataset_names = cfg.DATASETS.TEST for idx, dataset_name in enumerate(dataset_names): output_folder = os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name) mkdir(output_folder) output_folders[idx] = output_folder data_loaders_val = make_data_loader(cfg, is_train=False, is_distributed=distributed) for output_folder, data_loader_val in zip(output_folders, data_loaders_val): inference( model, data_loader_val, iou_types=iou_types, box_only=cfg.MODEL.RPN_ONLY, device=cfg.MODEL.DEVICE, expected_results=cfg.TEST.EXPECTED_RESULTS, expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL, output_folder=output_folder, maskiou_on=cfg.MODEL.MASKIOU_ON) synchronize() # parser = argparse.ArgumentParser(description="PyTorch Object Detection Training") # parser.add_argument( # "--config-file", # default="configs/e2e_ms_rcnn_R_50_FPN_1x.yaml", # metavar="FILE", # help="path to config file", # type=str, # ) # parser.add_argument("--local_rank", type=int, default=0) # parser.add_argument( # "--skip-test", # dest="skip_test", # help="Do not test the final model", # action="store_true", # ) # parser.add_argument( # "opts", # help="Modify config options using the command-line", # default=None, # nargs=argparse.REMAINDER, # ) # # args = parser.parse_args() num_gpus = int( os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 # num_gpus = 2 print('num_gpus = ', num_gpus) opt.distributed = num_gpus > 1 if opt.distributed: torch.cuda.set_device(opt.local_rank) torch.distributed.deprecated.init_process_group(backend="nccl", init_method="env://") output_dir = opt.OUTPUT_DIR if output_dir: mkdir(output_dir) logger = setup_logger("maskrcnn_benchmark", output_dir, get_rank()) logger.info("Using {} GPUs".format(num_gpus)) #logger.info(args) logger.info("Collecting env info (might take some time)") logger.info("\n" + collect_env_info()) logger.info("Loaded configuration file {}".format(config_file)) # with open(args.config_file, "r") as cf: # config_str = "\n" + cf.read() # logger.info(config_str) logger.info("Running with config:\n{}".format(opt)) model = train(opt, opt.local_rank, opt.distributed) if not opt.skip_test: test(opt, model, opt.distributed)
def main(): parser = argparse.ArgumentParser(description="PyTorch Object Detection Inference") parser.add_argument( "--config-file", default="/private/home/fmassa/github/detectron.pytorch_v2/configs/e2e_faster_rcnn_R_50_C4_1x_caffe2.yaml", metavar="FILE", help="path to config file", ) parser.add_argument("--local_rank", type=int, default=0) parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() num_gpus = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 distributed = num_gpus > 1 if distributed: torch.cuda.set_device(args.local_rank) torch.distributed.deprecated.init_process_group( backend="nccl", init_method="env://" ) cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() save_dir = "" logger = setup_logger("maskrcnn_benchmark", save_dir, get_rank()) logger.info("Using {} GPUs".format(num_gpus)) logger.info(cfg) logger.info("Collecting env info (might take some time)") logger.info("\n" + collect_env_info()) model = build_detection_model(cfg) model.to(cfg.MODEL.DEVICE) output_dir = cfg.OUTPUT_DIR checkpointer = DetectronCheckpointer(cfg, model, save_dir=output_dir) _ = checkpointer.load(cfg.MODEL.WEIGHT) iou_types = ("bbox",) if cfg.MODEL.MASK_ON: iou_types = iou_types + ("segm",) output_folders = [None] * len(cfg.DATASETS.TEST) dataset_names = cfg.DATASETS.TEST if cfg.OUTPUT_DIR: for idx, dataset_name in enumerate(dataset_names): output_folder = os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name) mkdir(output_folder) output_folders[idx] = output_folder data_loaders_val = make_data_loader(cfg, is_train=False, is_distributed=distributed) for output_folder, dataset_name, data_loader_val in zip(output_folders, dataset_names, data_loaders_val): inference( model, data_loader_val, dataset_name=dataset_name, iou_types=iou_types, box_only=cfg.MODEL.RPN_ONLY, device=cfg.MODEL.DEVICE, expected_results=cfg.TEST.EXPECTED_RESULTS, expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL, output_folder=output_folder, ) synchronize()
def main(): parser = argparse.ArgumentParser( description="PyTorch Object Detection Inference") parser.add_argument( "--config-file", default="configs/e2e_faster_rcnn_R_50_FPN_1x.yaml", metavar="FILE", help="path to config file", ) parser.add_argument("--local_rank", type=int, default=0) parser.add_argument( "--ckpt", help= "The path to the checkpoint for test, default is the latest checkpoint.", default=None, ) parser.add_argument('--num_iteration', dest='num_iteration', help='Specify which weight to load', default=-1, type=int) parser.add_argument('--object_thres', dest='object_thres', help='Object threshold', default=0.1, type=float) # used to be 0.4 or 0.05 parser.add_argument('--human_thres', dest='human_thres', help='Human threshold', default=0.8, type=float) parser.add_argument('--prior_flag', dest='prior_flag', help='whether use prior_flag', default=1, type=int) parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() num_gpus = int( os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 distributed = num_gpus > 1 and torch.cuda.is_available() if distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend="nccl", init_method="env://") synchronize() ROOT_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) # DATA_DIR = os.path.abspath(os.path.join(ROOT_DIR, 'Data')) args.config_file = os.path.join(ROOT_DIR, args.config_file) cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() save_dir = "" logger = setup_logger("DRG", save_dir, get_rank()) logger.info("Using {} GPUs".format(num_gpus)) logger.info(cfg) logger.info("Collecting env info (might take some time)") logger.info("\n" + collect_env_info()) model = build_detection_model(cfg) # model.to(cfg.MODEL.DEVICE) device = torch.device( "cuda") if torch.cuda.is_available() else torch.device("cpu") model.to(device) # Initialize mixed-precision if necessary use_mixed_precision = cfg.DTYPE == 'float16' amp_handle = amp.init(enabled=use_mixed_precision, verbose=cfg.AMP_VERBOSE) output_dir = cfg.OUTPUT_DIR checkpointer = DetectronCheckpointer(cfg, model, save_dir=output_dir) if args.num_iteration != -1: args.ckpt = os.path.join(cfg.OUTPUT_DIR, 'model_%07d.pth' % args.num_iteration) ckpt = cfg.MODEL.WEIGHT if args.ckpt is None else args.ckpt logger.info("Testing checkpoint {}".format(ckpt)) _ = checkpointer.load(ckpt, use_latest=args.ckpt is None) # iou_types = ("bbox",) output_folders = [None] * len(cfg.DATASETS.TEST) dataset_names = cfg.DATASETS.TEST if cfg.OUTPUT_DIR: for idx, dataset_name in enumerate(dataset_names): if args.num_iteration != -1: output_folder = os.path.join(cfg.OUTPUT_DIR, "inference_ho", dataset_name, "model_%07d" % args.num_iteration) else: output_folder = os.path.join(cfg.OUTPUT_DIR, "inference_ho", dataset_name) mkdir(output_folder) output_folders[idx] = output_folder opt = {} opt['word_dim'] = 300 for output_folder, dataset_name in zip(output_folders, dataset_names): data = DatasetCatalog.get(dataset_name) data_args = data["args"] im_dir = data_args['im_dir'] test_detection = pickle.load(open(data_args['test_detection_file'], "rb"), encoding='latin1') prior_mask = pickle.load(open(data_args['prior_mask'], "rb"), encoding='latin1') action_dic = json.load(open(data_args['action_index'])) action_dic_inv = {y: x for x, y in action_dic.items()} vcoco_test_ids = open(data_args['vcoco_test_ids_file'], 'r') test_image_id_list = [int(line.rstrip()) for line in vcoco_test_ids] vcocoeval = VCOCOeval(data_args['vcoco_test_file'], data_args['ann_file'], data_args['vcoco_test_ids_file']) word_embeddings = pickle.load(open(data_args['word_embedding_file'], "rb"), encoding='latin1') output_file = os.path.join(output_folder, 'detection.pkl') output_dict_file = os.path.join( output_folder, 'detection_app_{}_new.pkl'.format(dataset_name)) logger.info("Output will be saved in {}".format(output_file)) logger.info("Start evaluation on {} dataset({} images).".format( dataset_name, len(test_image_id_list))) run_test(model, dataset_name=dataset_name, im_dir=im_dir, test_detection=test_detection, word_embeddings=word_embeddings, test_image_id_list=test_image_id_list, prior_mask=prior_mask, action_dic_inv=action_dic_inv, output_file=output_file, output_dict_file=output_dict_file, object_thres=args.object_thres, human_thres=args.human_thres, prior_flag=args.prior_flag, device=device, cfg=cfg) synchronize() vcocoeval._do_eval(output_file, ovr_thresh=0.5)
def main(): parser = argparse.ArgumentParser(description="PyTorch Object Detection Training") parser.add_argument( "--config-file", default="", metavar="FILE", help="path to config file", type=str, ) parser.add_argument("--local_rank", type=int, default=0) parser.add_argument( "--skip-test", dest="skip_test", help="Do not test the final model", action="store_true", ) parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() num_gpus = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 args.distributed = num_gpus > 1 size = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 "MASTER_ADDR" "MASTER_PORT" "RANK" "WORLD_SIZE" if args.distributed: dist_url = "tcp://"+os.environ["MASTER_ADDR"]+":"+os.environ["MASTER_PORT"] rank = os.environ["RANK"] print("dist_url: ",dist_url) print("rank: " ,rank) torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group( backend="nccl", init_method=dist_url,rank=rank,world_size=size ) synchronize() cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() output_dir = cfg.OUTPUT_DIR if output_dir: mkdir(output_dir) logger = setup_logger("maskrcnn_benchmark", output_dir, get_rank()) logger.info("Using {} GPUs".format(num_gpus)) logger.info(args) logger.info("Collecting env info (might take some time)") logger.info("\n" + collect_env_info()) logger.info("Loaded configuration file {}".format(args.config_file)) with open(args.config_file, "r") as cf: config_str = "\n" + cf.read() logger.info(config_str) logger.info("Running with config:\n{}".format(cfg)) model = train(cfg, args.local_rank, args.distributed) if not args.skip_test: run_test(cfg, model, args.distributed)
dataNew = save_dir2 + '/' + str(i) + '.mat' scio.savemat(dataNew, {'data': output}) if __name__ == "__main__": parser = argparse.ArgumentParser( description="PyTorch Visual Relationship Detection Training") parser.add_argument( "--config-file", default="/configs/RLM_Net_predicate_recognition_stage.yaml", metavar="FILE", help="path to config file", type=str, ) args = parser.parse_args() cfg.merge_from_file(os.getcwd() + args.config_file) cfg.FILTERMODE = True cfg.OUTPUT_DIR = "outputs/output_predicate_recognition_stage" cfg.freeze() if cfg.OUTPUT_DIR: mkdir(cfg.OUTPUT_DIR) logger = setup_logger("maskrcnn_benchmark", cfg.OUTPUT_DIR, get_rank()) logger.info(args) logger.info("Running with config:\n{}".format(cfg)) step = str(65000).zfill(7) main(step, cfg)
def main(): save_dir = "" logger = setup_logger("maskrcnn_benchmark", save_dir, get_rank()) parser = argparse.ArgumentParser( description="PyTorch Object Detection Inference") parser.add_argument( "--config-file", default="configs/caffe2/e2e_mask_rcnn_R_50_FPN_1x_caffe2.yaml", metavar="FILE", help="path to config file", ) parser.add_argument("--local_rank", type=int, default=0) parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() num_gpus = int( os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 distributed = num_gpus > 1 if distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend="nccl", init_method="env://") synchronize() cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() logger.info("Using {} GPUs".format(num_gpus)) logger.info(cfg) logger.info("Collecting env info (might take some time)") logger.info("\n" + collect_env_info()) model = build_detection_model(cfg) model.to(cfg.MODEL.DEVICE) output_dir = cfg.OUTPUT_DIR checkpointer = DetectronCheckpointer(cfg, model, save_dir=output_dir) _ = checkpointer.load(cfg.MODEL.WEIGHT) iou_types = ("bbox", ) if cfg.MODEL.MASK_ON: iou_types = iou_types + ("segm", ) if cfg.MODEL.KEYPOINT_ON: iou_types = iou_types + ("keypoints", ) output_folders = [None] * len(cfg.DATASETS.TEST) dataset_names = cfg.DATASETS.TEST if cfg.OUTPUT_DIR: for idx, dataset_name in enumerate(dataset_names): output_folder = os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name) mkdir(output_folder) output_folders[idx] = output_folder data_loaders_val = make_data_loader(cfg, split=False, is_distributed=distributed) for output_folder, dataset_name, data_loader_val in zip( output_folders, dataset_names, data_loaders_val): predictions = inference( model, data_loader_val, dataset_name=dataset_name, device=cfg.MODEL.DEVICE, output_folder=output_folder, ) eval( predictions, data_loader_val, iou_types=iou_types, box_only=False if cfg.MODEL.RETINANET_ON else cfg.MODEL.RPN_ONLY, expected_results=cfg.TEST.EXPECTED_RESULTS, expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL, ) synchronize()
def main(step, cfg): model = build_detection_model(cfg) model.to(cfg.MODEL.DEVICE) model2 = rating_model(cfg) model2.to(cfg.MODEL.DEVICE) print(model2) model3 = predicate_model(cfg) model3.to(cfg.MODEL.DEVICE) print(model3) backbone_parameters = torch.load(os.getcwd() + cfg.CONFIG.backbone_weight, map_location=torch.device("cpu")) newdict = {} newdict['model'] = removekey(backbone_parameters['model'], []) load_state_dict(model, newdict.pop("model")) rating_parameters = torch.load(os.getcwd() + cfg.CONFIG.rating_weight, map_location=torch.device("cpu")) newdict = {} newdict['model'] = removekey(rating_parameters['model'], []) load_state_dict(model2, newdict.pop("model")) predicate_parameters = torch.load( os.getcwd() + "/outputs/output_predicate_recognition_stage/model3_" + step + ".pth", map_location=torch.device("cpu")) newdict = {} newdict['model'] = removekey(predicate_parameters['model'], []) load_state_dict(model3, newdict.pop("model")) output_folders = [None] * len(cfg.DATASETS.TEST) dataset_names = cfg.DATASETS.TEST if cfg.OUTPUT_DIR: for idx, dataset_name in enumerate(dataset_names): output_folder = os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name, 'step', step) mkdir(output_folder) output_folders[idx] = output_folder data_loaders_val = make_data_loader(cfg, is_train=False, is_distributed=False) for output_folder, dataset_name, data_loader_val in zip( output_folders, dataset_names, data_loaders_val): entire_test( model, model2, model3, data_loader_val, dataset_name=dataset_name, iou_types=("bbox", ), box_only=cfg.MODEL.RPN_ONLY, device=cfg.MODEL.DEVICE, expected_results=cfg.TEST.EXPECTED_RESULTS, expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL, output_folder=output_folder, ) synchronize() #transform results into matlab output_folder2 = os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name, 'step', step, 'predictions2.pth') predictions2 = torch.load(output_folder2) save_dir2 = os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name, 'extraction/predicate_eval', step) if not os.path.exists(save_dir2): os.makedirs(save_dir2) for i in range(len(predictions2)): output = predictions2[i] output = output.numpy() dataNew = save_dir2 + '/' + str(i) + '.mat' scio.savemat(dataNew, {'data': output})
def main(): parser = argparse.ArgumentParser( description="PyTorch Object Detection Training") parser.add_argument( "--config-file", default="", metavar="FILE", help="path to config file", type=str, ) parser.add_argument("--local_rank", type=int, default=0) parser.add_argument( "--skip-test", dest="skip_test", help="Do not test the final model", action="store_true", ) parser.add_argument( "--only-test", dest="only_test", help="test the final model", action="store_true", ) parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() num_gpus = int( os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 args.distributed = num_gpus > 1 if args.distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend="nccl", init_method="env://") cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) intact_cfg(cfg) cfg.freeze() if cfg.MODEL.RPN__ONLY: args.skip_test = True cfg['DEBUG']['eval_in_train'] = -1 #check_data(cfg) train_example_num = get_train_example_num(cfg) croi = '_CROI' if cfg.MODEL.CORNER_ROI else '' cfg['OUTPUT_DIR'] = f'{cfg.OUTPUT_DIR}_Tr{train_example_num}{croi}' if not cfg.MODEL.CLASS_SPECIFIC: cfg['OUTPUT_DIR'] += '_CA' if cfg.MODEL.RPN__ONLY: cfg['OUTPUT_DIR'] += '_RpnOnly' loss_weights = cfg.MODEL.LOSS.WEIGHTS if loss_weights[4] > 0: k = int(loss_weights[4] * 100) cfg['OUTPUT_DIR'] += f'_CorGeo{k}' if loss_weights[5] > 0: k = int(loss_weights[5]) p = int(loss_weights[6]) cfg['OUTPUT_DIR'] += f'_CorSem{k}-{p}' output_dir = cfg.OUTPUT_DIR if output_dir: mkdir(output_dir) cfn = os.path.basename(args.config_file) shutil.copyfile(args.config_file, f"{output_dir}/{cfn}") default_cfn = 'maskrcnn_benchmark/config/defaults.py' shutil.copyfile(default_cfn, f"{output_dir}/default.py") train_fns = 'data3d/suncg_utils/SuncgTorch/train_test_splited/train.txt' shutil.copyfile(train_fns, f"{output_dir}/train.txt") val_fns = 'data3d/suncg_utils/SuncgTorch/train_test_splited/val.txt' shutil.copyfile(train_fns, f"{output_dir}/val.txt") logger = setup_logger("maskrcnn_benchmark", output_dir, get_rank()) logger.info("Using {} GPUs".format(num_gpus)) logger.info(args) logger.info("Collecting env info (might take some time)") logger.info("\n" + collect_env_info()) logger.info("Loaded configuration file {}".format(args.config_file)) with open(args.config_file, "r") as cf: config_str = "\n" + cf.read() logger.info(config_str) logger.info("Running with config:\n{}".format(cfg)) min_loss = 10000 epochs_between_test = cfg.SOLVER.EPOCHS_BETWEEN_TEST for loop in range(cfg.SOLVER.EPOCHS // cfg.SOLVER.EPOCHS_BETWEEN_TEST): model, min_loss = train(cfg, args.local_rank, args.distributed, loop, args.only_test, min_loss) if not args.skip_test: test( cfg, model, args.distributed, epoch=(1 + loop) * epochs_between_test - 1, ) if args.only_test: break
def main(): # Build a parser for arguments parser = argparse.ArgumentParser(description="Action Prediction Training") parser.add_argument( "--config-file", default="/home/SelfDriving/maskrcnn/maskrcnn-benchmark/configs/baseline.yaml", metavar="FILE", help="path to maskrcnn_benchmark config file", type=str, ) parser.add_argument( "--is_cat", default=False, help="If we use concatenation on object features", type=bool, ) parser.add_argument( "--side", default=False, help="If we use side task", type=bool, ) parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) parser.add_argument( "--imageroot", type=str, help="Directory to the images", default="/data6/SRIP19_SelfDriving/bdd12k/data1/" ) parser.add_argument( "--gtroot", type=str, help="Directory to the groundtruth", default="/data6/SRIP19_SelfDriving/bdd12k/annotations/12k_gt_val_5_actions.json" ) parser.add_argument( "--reasonroot", type=str, help="Directory to the reason gt", default="/data6/SRIP19_SelfDriving/bdd12k/annotations/val_reason_img.json" ) parser.add_argument( "--imWidth", type=int, help="Crop to width", default=1280 ) parser.add_argument( "--imHeight", type=int, help="Crop to height", default=720 ) parser.add_argument( "--batch_size", type=int, help="Batch Size", default=1 ) parser.add_argument( "--is_savemaps", type=bool, help="Whether save attention maps", default=False ) parser.add_argument( "--model_root", type=str, help="Directory to the trained model", default="/data6/SRIP19_SelfDriving/bdd12k/Outputs/08_28_side_v1/finetune/net_10.pth" ) parser.add_argument( "--output_dir", type=str, help="Directory to the trained model", default="/data6/SRIP19_SelfDriving/bdd12k/Outputs/08_28_side_v1/" ) args = parser.parse_args() print(args) cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() if torch.cuda.is_available(): print("CUDA device is available.") # output directory outdir = cfg.OUTPUT_DIR print("Save path:", outdir) if outdir: mkdir(outdir) # logger = setup_logger("training", outdir) test(cfg, args)
def main(): parser = argparse.ArgumentParser(description="PyTorch Object Detection Training") parser.add_argument( "--config-file", default="", metavar="FILE", help="path to config file", type=str, ) parser.add_argument("--local_rank", type=int, default=0) parser.add_argument( "--skip-test", dest="skip_test", help="Do not test the final model", action="store_true", default=True # add by hui ) # ################################################ add by hui ################################################# parser.add_argument( "--temp", help="whether generate to temp output", default=False, type=bool ) # ################################################################################################# parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() num_gpus = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 args.distributed = num_gpus > 1 if args.distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group( backend="nccl", init_method="env://" ) synchronize() cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) # ################### change by hui ################################################# if args.temp: if os.path.exists("./outputs/temp"): shutil.rmtree('./outputs/temp') adaptive_config_change("OUTPUT_DIR", cfg.OUTPUT_DIR, './outputs/temp') cfg.freeze() some_pre_deal() ################################################################################################## output_dir = cfg.OUTPUT_DIR if output_dir: mkdir(output_dir) logger = setup_logger("maskrcnn_benchmark", output_dir, get_rank()) logger.info("Using {} GPUs".format(num_gpus)) logger.info(args) logger.info("Collecting env info (might take some time)") logger.info("\n" + collect_env_info()) logger.info("Loaded configuration file {}".format(args.config_file)) with open(args.config_file, "r") as cf: config_str = "\n" + cf.read() logger.info(config_str) logger.info("Running with config:\n{}".format(cfg)) model = train(cfg, args.local_rank, args.distributed) if not args.skip_test: run_test(cfg, model, args.distributed)
def main(): parser = argparse.ArgumentParser( description="PyTorch Object Detection Training") parser.add_argument( "--config-file", default="", metavar="FILE", help="path to config file", type=str, ) parser.add_argument("--local_rank", type=int, default=0) parser.add_argument( "--skip-test", dest="skip_test", help="Do not test the final model", action="store_true", ) parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() import random import torch.backends.cudnn as cudnn import numpy as np seed = 1 torch.manual_seed(seed) torch.cuda.manual_seed_all(seed + 1) random.seed(seed + 2) np.random.seed(seed + 3) print('use seed') cudnn.deterministic = True num_gpus = int( os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 args.distributed = num_gpus > 1 if args.distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend="nccl", init_method="env://") synchronize() cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() output_dir = os.path.join( cfg.OUTPUT_DIR, cfg.SUBDIR, 'GPU' + str(num_gpus) + '_LR' + str(cfg.SOLVER.BASE_LR)) if output_dir: mkdir(output_dir) logger = setup_logger("maskrcnn_benchmark", output_dir, get_rank()) logger.info("Using {} GPUs".format(num_gpus)) logger.info(args) logger.info("Collecting env info (might take some time)") logger.info("\n" + collect_env_info()) logger.info("Loaded configuration file {}".format(args.config_file)) with open(args.config_file, "r") as cf: config_str = "\n" + cf.read() logger.info(config_str) logger.info("Running with config:\n{}".format(cfg)) model = train(cfg, args.local_rank, args.distributed) if not args.skip_test: run_test(cfg, model, args.distributed)