def __init__( self, cfg, confidence_threshold=0.7, show_mask_heatmaps=False, masks_per_dim=2, min_image_size=224, ): self.cfg = cfg.clone() self.model = build_detection_model(cfg) self.model.eval() self.device = torch.device(cfg.MODEL.DEVICE) self.model.to(self.device) self.min_image_size = min_image_size checkpointer = DetectronCheckpointer(cfg, self.model) _ = checkpointer.load(cfg.MODEL.WEIGHT) self.transforms = self.build_transform() mask_threshold = -1 if show_mask_heatmaps else 0.5 self.masker = Masker(threshold=mask_threshold, padding=1) # used to make colors for each class self.palette = torch.tensor([2 ** 25 - 1, 2 ** 15 - 1, 2 ** 21 - 1]) self.cpu_device = torch.device("cpu") self.confidence_threshold = confidence_threshold self.show_mask_heatmaps = show_mask_heatmaps self.masks_per_dim = masks_per_dim
def train(cfg, local_rank, distributed): model = build_detection_model(cfg) device = torch.device(cfg.MODEL.DEVICE) model.to(device) optimizer = make_optimizer(cfg, model) scheduler = make_lr_scheduler(cfg, optimizer) if distributed: model = torch.nn.parallel.deprecated.DistributedDataParallel( model, device_ids=[local_rank], output_device=local_rank, # this should be removed if we update BatchNorm stats broadcast_buffers=False, ) arguments = {} arguments["iteration"] = 0 output_dir = cfg.OUTPUT_DIR save_to_disk = get_rank() == 0 checkpointer = DetectronCheckpointer( cfg, model, optimizer, scheduler, output_dir, save_to_disk ) extra_checkpoint_data = checkpointer.load(cfg.MODEL.WEIGHT) arguments.update(extra_checkpoint_data) data_loader = make_data_loader( cfg, is_train=True, is_distributed=distributed, start_iter=arguments["iteration"], ) checkpoint_period = cfg.SOLVER.CHECKPOINT_PERIOD do_train( model, data_loader, optimizer, scheduler, checkpointer, device, checkpoint_period, arguments, ) return model
def main(): parser = argparse.ArgumentParser( description="PyTorch Object Detection Inference") parser.add_argument( "--config-file", default= "/private/home/fmassa/github/detectron.pytorch_v2/configs/e2e_faster_rcnn_R_50_C4_1x_caffe2.yaml", metavar="FILE", help="path to config file", ) parser.add_argument("--local_rank", type=int, default=0) parser.add_argument( "--ckpt", help= "The path to the checkpoint for test, default is the latest checkpoint.", default=None, ) parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() num_gpus = int( os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 distributed = num_gpus > 1 if distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend="nccl", init_method="env://") synchronize() cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() save_dir = "" logger = setup_logger("maskrcnn_benchmark", save_dir, get_rank()) logger.info("Using {} GPUs".format(num_gpus)) logger.info(cfg) logger.info("Collecting env info (might take some time)") logger.info("\n" + collect_env_info()) model = build_detection_model(cfg) model.to(cfg.MODEL.DEVICE) output_dir = cfg.OUTPUT_DIR checkpointer = DetectronCheckpointer(cfg, model, save_dir=output_dir) ckpt = cfg.MODEL.WEIGHT if args.ckpt is None else args.ckpt _ = checkpointer.load(ckpt, use_latest=args.ckpt is None) """ # Initialize mixed-precision if necessary use_mixed_precision = cfg.DTYPE == 'float16' amp_handle = amp.init(enabled=use_mixed_precision, verbose=cfg.AMP_VERBOSE) output_dir = cfg.OUTPUT_DIR checkpointer = DetectronCheckpointer(cfg, model, save_dir=output_dir) ckpt = cfg.MODEL.WEIGHT if args.ckpt is None else args.ckpt _ = checkpointer.load(ckpt, use_latest=args.ckpt is None) iou_types = ("bbox",) if cfg.MODEL.MASK_ON: iou_types = iou_types + ("segm",) if cfg.MODEL.KEYPOINT_ON: iou_types = iou_types + ("keypoints",) output_folders = [None] * len(cfg.DATASETS.TEST) dataset_names = cfg.DATASETS.TEST if cfg.OUTPUT_DIR: for idx, dataset_name in enumerate(dataset_names): output_folder = os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name) mkdir(output_folder) output_folders[idx] = output_folder data_loaders_val = make_data_loader(cfg, is_train=False, is_distributed=distributed) for output_folder, dataset_name, data_loader_val in zip(output_folders, dataset_names, data_loaders_val): inference( model, data_loader_val, dataset_name=dataset_name, iou_types=iou_types, box_only=False if cfg.MODEL.RETINANET_ON else cfg.MODEL.RPN_ONLY, device=cfg.MODEL.DEVICE, expected_results=cfg.TEST.EXPECTED_RESULTS, expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL, output_folder=output_folder, ) synchronize() """ from maskrcnn_benchmark.data.transforms.build import build_transforms from PIL import Image import torchvision.transforms.functional as F transform = build_transforms(cfg, is_train=False) img_dir = "demo_imgs" res_dir = "demo_res" model.eval() imgs = os.listdir(img_dir) for img in imgs: img_path = os.path.join(img_dir, img) img_pil = Image.open(img_path) # for i in range( 360 ): original_img = img_pil # original_img = F.rotate( img_pil, 45, expand=True ) origin_w, origin_h = original_img.size img, target = transform(original_img, None) print(img.shape) img = img.view((1, img.shape[0], img.shape[1], img.shape[2])) h, w = img.shape[2:] if h % 32 != 0: new_h = (h // 32 + 1) * 32 else: new_h = h if w % 32 != 0: new_w = (w // 32 + 1) * 32 else: new_w = w ratio_w = 1. * new_w / w ratio_h = 1. * new_h / h padded_img = torch.zeros((1, 3, new_h, new_w)).float() padded_img[:, :, :h, :w] = img prediction = model(padded_img.cuda())[0] prediction = prediction.resize( (origin_w * ratio_w, origin_h * ratio_h)) hboxes = prediction.bbox.cpu() rboxes = prediction.get_field("rboxes").cpu() ratios = prediction.get_field("ratios").cpu() scores = prediction.get_field("scores").cpu() # labels = prediction.get_field( "labels" ).cpu() for rbox, ratio, score in zip(rboxes, ratios, scores): print(rbox) print(ratio, score) h_idx = ratios > 0.8 # print(hboxes) h = hboxes[h_idx] hboxes_vtx = torch.stack([ h[:, 0], h[:, 1], h[:, 2], h[:, 1], h[:, 2], h[:, 3], h[:, 0], h[:, 3] ]).permute(1, 0) rboxes[h_idx] = hboxes_vtx # rboxes = rboxes.data.numpy().astype( np.int32 ) rboxes = rboxes.data.numpy() keep = poly_nms( np.hstack([rboxes, scores.cpu().data.numpy()[:, np.newaxis] ]).astype(np.double), 0.1) rboxes = rboxes[keep].astype(np.int32) scores = scores[keep] hboxes = hboxes[keep] keep = np.where(scores > 0.6) rboxes = rboxes[keep] scores = scores[keep].tolist() hboxes = hboxes[keep] # rboxes = list( map( minAreaRect, rboxes ) ) if len(rboxes) > 0: rboxes = np.vstack(rboxes) else: rboxes = np.array(rboxes) # vis( img_info["file_name"], rboxes ) # img = cv2.imread( original_img ) img = np.array(original_img.convert("RGB"))[:, :, ::-1].copy() cv2.polylines(img, rboxes.reshape(-1, 4, 2).astype(np.int32), True, (0, 255, 255), thickness=2, lineType=cv2.LINE_AA) filename = img_path.split("/")[-1] cv2.imwrite("{}/{}".format(res_dir, filename), img)
def train(cfg, local_rank, distributed): model = build_detection_model(cfg) device = torch.device(cfg.MODEL.DEVICE) model.to(device) optimizer = make_optimizer(cfg, model) scheduler = make_lr_scheduler(cfg, optimizer) # Initialize mixed-precision training use_mixed_precision = cfg.DTYPE == "float16" amp_opt_level = 'O1' if use_mixed_precision else 'O0' model, optimizer = amp.initialize(model, optimizer, opt_level=amp_opt_level) if distributed: model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[local_rank], output_device=local_rank, # this should be removed if we update BatchNorm stats broadcast_buffers=False, ) arguments = {} arguments["iteration"] = 0 output_dir = cfg.OUTPUT_DIR save_to_disk = get_rank() == 0 checkpointer = DetectronCheckpointer(cfg, model, optimizer, scheduler, output_dir, save_to_disk) extra_checkpoint_data = checkpointer.load(cfg.MODEL.WEIGHT) arguments.update(extra_checkpoint_data) data_loader = make_data_loader( cfg, is_train=True, is_distributed=distributed, start_iter=arguments["iteration"], ) test_period = cfg.SOLVER.TEST_PERIOD if test_period > 0: data_loader_val = make_data_loader(cfg, is_train=False, is_distributed=distributed, is_for_period=True) else: data_loader_val = None checkpoint_period = cfg.SOLVER.CHECKPOINT_PERIOD do_train( cfg, model, data_loader, data_loader_val, optimizer, scheduler, checkpointer, device, checkpoint_period, test_period, arguments, ) return model
images_per_gpu = 1 start_iter = 0 sampler = torch.utils.data.sampler.RandomSampler(dataset) batch_sampler = make_batch_data_sampler(dataset, sampler, False, images_per_gpu, num_iters, start_iter) collator = BBoxAugCollator() if not True and cfg.TEST.BBOX_AUG.ENABLED else \ BatchCollator(cfg.DATALOADER.SIZE_DIVISIBILITY) num_workers = cfg.DATALOADER.NUM_WORKERS data_loader = torch.utils.data.DataLoader( dataset, num_workers=num_workers, batch_sampler=batch_sampler, collate_fn=collator, ) model = build_detection_model(cfg) device = torch.device(cfg.MODEL.DEVICE) model.to(device) optimizer = make_optimizer(cfg, model) scheduler = make_lr_scheduler(cfg, optimizer) use_mixed_precision = cfg.DTYPE == "float16" amp_opt_level = 'O1' if use_mixed_precision else 'O0' model, optimizer = amp.initialize(model, optimizer, opt_level=amp_opt_level) arguments = {} arguments["iteration"] = 0 output_dir = "/home/lab/github/maskrcnn-benchmark/egohands_output"
def main(): parser = argparse.ArgumentParser(description="PyTorch Object Detection Inference") parser.add_argument( "--config-file", default="/private/home/fmassa/github/detectron.pytorch_v2/configs/e2e_faster_rcnn_R_50_C4_1x_caffe2.yaml", metavar="FILE", help="path to config file", ) parser.add_argument("--local_rank", type=int, default=0) parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() num_gpus = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 distributed = num_gpus > 1 if distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group( backend="nccl", init_method="env://" ) synchronize() cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() save_dir = "" logger = setup_logger("maskrcnn_benchmark", save_dir, get_rank()) logger.info("Using {} GPUs".format(num_gpus)) logger.info(cfg) logger.info("Collecting env info (might take some time)") logger.info("\n" + collect_env_info()) model = build_detection_model(cfg) model.to(cfg.MODEL.DEVICE) # Initialize mixed-precision if necessary use_mixed_precision = cfg.DTYPE == 'float16' amp_handle = amp.init(enabled=use_mixed_precision, verbose=cfg.AMP_VERBOSE) output_dir = cfg.OUTPUT_DIR checkpointer = DetectronCheckpointer(cfg, model, save_dir=output_dir) _ = checkpointer.load(cfg.MODEL.WEIGHT) iou_types = ("bbox",) if cfg.MODEL.MASK_ON: iou_types = iou_types + ("segm",) if cfg.MODEL.KEYPOINT_ON: iou_types = iou_types + ("keypoints",) output_folders = [None] * len(cfg.DATASETS.TEST) dataset_names = cfg.DATASETS.TEST if cfg.OUTPUT_DIR: for idx, dataset_name in enumerate(dataset_names): output_folder = os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name) mkdir(output_folder) output_folders[idx] = output_folder data_loaders_val = make_data_loader(cfg, is_train=False, is_distributed=distributed) for output_folder, dataset_name, data_loader_val in zip(output_folders, dataset_names, data_loaders_val): inference( model, data_loader_val, dataset_name=dataset_name, iou_types=iou_types, box_only=False if cfg.MODEL.RETINANET_ON else cfg.MODEL.RPN_ONLY, device=cfg.MODEL.DEVICE, expected_results=cfg.TEST.EXPECTED_RESULTS, expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL, output_folder=output_folder, ) synchronize()
def build_model(self): # BASE MODEL model = build_detection_model(self.cfg) model.eval() return model
def main(): parser = argparse.ArgumentParser( description="PyTorch Object Detection Inference") parser.add_argument( "--config-file", required=True, metavar="FILE", help="path to config file", ) parser.add_argument( '--model-path', type=Path, help=('Path to model pickle file. If not specified, the latest ' 'checkpoint, if it exists, or cfg.MODEL.WEIGHT is loaded.')) parser.add_argument( '--output-dir', default='{cfg_OUTPUT_DIR}/inference-{model_stem}', help=('Output directory. Can use variables {cfg_OUTPUT_DIR}, which is ' 'replaced by cfg.OUTPUT_DIR, and {model_stem}, which is ' 'replaced by the stem of the file used to load weights.')) parser.add_argument("--local_rank", type=int, default=0) parser.add_argument( "--ckpt", help= "The path to the checkpoint for test, default is the latest checkpoint.", default=None, ) parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() num_gpus = int( os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 distributed = num_gpus > 1 if distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend="nccl", init_method="env://") synchronize() cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() model = build_detection_model(cfg) model.to(cfg.MODEL.DEVICE) assert cfg.OUTPUT_DIR, 'cfg.OUTPUT_DIR must not be empty.' checkpointer = DetectronCheckpointer(cfg, model, save_dir=cfg.OUTPUT_DIR) if args.model_path: load_path = str(args.model_path.resolve()) load_msg = 'Loading model from --model-path: %s' % load_path else: if checkpointer.has_checkpoint(): load_path = checkpointer.get_checkpoint_file() load_msg = 'Loading model from latest checkpoint: %s' % load_path else: load_path = cfg.MODEL.WEIGHT load_msg = 'Loading model from cfg.MODEL.WEIGHT: %s' % load_path output_dir = Path( args.output_dir.format(cfg_OUTPUT_DIR=cfg.OUTPUT_DIR, model_stem=Path(load_path).stem)) output_dir.mkdir(exist_ok=True, parents=True) file_logger = common_setup(__file__, output_dir, args) # We can't log the load_msg until we setup the output directory, but we # can't get the output directory until we figure out which model to load. # So we save load_msg and log it here. logging.info(load_msg) logging.info('Output inference results to: %s' % output_dir) logger = logging.getLogger("maskrcnn_benchmark") logger.info("Using {} GPUs".format(num_gpus)) file_logger.info('Config:') file_logger.info(cfg) file_logger.info("Collecting env info (might take some time)") file_logger.info("\n" + collect_env_info()) # Initialize mixed-precision if necessary use_mixed_precision = cfg.DTYPE == 'float16' amp_handle = amp.init(enabled=use_mixed_precision, verbose=cfg.AMP_VERBOSE) output_dir = cfg.OUTPUT_DIR checkpointer = DetectronCheckpointer(cfg, model, save_dir=output_dir) ckpt = cfg.MODEL.WEIGHT if args.ckpt is None else args.ckpt _ = checkpointer.load(ckpt, use_latest=args.ckpt is None) iou_types = ("bbox", ) if cfg.MODEL.MASK_ON: iou_types = iou_types + ("segm", ) if cfg.MODEL.KEYPOINT_ON: iou_types = iou_types + ("keypoints", ) output_folders = [None] * len(cfg.DATASETS.TEST) dataset_names = cfg.DATASETS.TEST for idx, dataset_name in enumerate(dataset_names): output_folder = output_dir / dataset_name mkdir(output_folder) output_folders[idx] = output_folder data_loaders_val = make_data_loader(cfg, is_train=False, is_distributed=distributed) for output_folder, dataset_name, data_loader_val in zip( output_folders, dataset_names, data_loaders_val): inference( model, data_loader_val, dataset_name=dataset_name, iou_types=iou_types, box_only=False if cfg.MODEL.RETINANET_ON else cfg.MODEL.RPN_ONLY, device=cfg.MODEL.DEVICE, expected_results=cfg.TEST.EXPECTED_RESULTS, expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL, output_folder=output_folder, ) synchronize()
def train(cfg, local_rank, distributed): # Model logging print_mlperf(key=mlperf_log.INPUT_BATCH_SIZE, value=cfg.SOLVER.IMS_PER_BATCH) print_mlperf(key=mlperf_log.BATCH_SIZE_TEST, value=cfg.TEST.IMS_PER_BATCH) print_mlperf(key=mlperf_log.INPUT_MEAN_SUBTRACTION, value=cfg.INPUT.PIXEL_MEAN) print_mlperf(key=mlperf_log.INPUT_NORMALIZATION_STD, value=cfg.INPUT.PIXEL_STD) print_mlperf(key=mlperf_log.INPUT_RESIZE) print_mlperf(key=mlperf_log.INPUT_RESIZE_ASPECT_PRESERVING) print_mlperf(key=mlperf_log.MIN_IMAGE_SIZE, value=cfg.INPUT.MIN_SIZE_TRAIN) print_mlperf(key=mlperf_log.MAX_IMAGE_SIZE, value=cfg.INPUT.MAX_SIZE_TRAIN) print_mlperf(key=mlperf_log.INPUT_RANDOM_FLIP) print_mlperf(key=mlperf_log.RANDOM_FLIP_PROBABILITY, value=0.5) print_mlperf(key=mlperf_log.FG_IOU_THRESHOLD, value=cfg.MODEL.RPN.FG_IOU_THRESHOLD) print_mlperf(key=mlperf_log.BG_IOU_THRESHOLD, value=cfg.MODEL.RPN.BG_IOU_THRESHOLD) print_mlperf(key=mlperf_log.RPN_PRE_NMS_TOP_N_TRAIN, value=cfg.MODEL.RPN.PRE_NMS_TOP_N_TRAIN) print_mlperf(key=mlperf_log.RPN_PRE_NMS_TOP_N_TEST, value=cfg.MODEL.RPN.PRE_NMS_TOP_N_TEST) print_mlperf(key=mlperf_log.RPN_POST_NMS_TOP_N_TRAIN, value=cfg.MODEL.RPN.FPN_POST_NMS_TOP_N_TRAIN) print_mlperf(key=mlperf_log.RPN_POST_NMS_TOP_N_TEST, value=cfg.MODEL.RPN.FPN_POST_NMS_TOP_N_TEST) print_mlperf(key=mlperf_log.ASPECT_RATIOS, value=cfg.MODEL.RPN.ASPECT_RATIOS) print_mlperf(key=mlperf_log.BACKBONE, value=cfg.MODEL.BACKBONE.CONV_BODY) print_mlperf(key=mlperf_log.NMS_THRESHOLD, value=cfg.MODEL.RPN.NMS_THRESH) model = build_detection_model(cfg) device = torch.device(cfg.MODEL.DEVICE) model.to(device) optimizer = make_optimizer(cfg, model) # Optimizer logging print_mlperf(key=mlperf_log.OPT_NAME, value=mlperf_log.SGD_WITH_MOMENTUM) print_mlperf(key=mlperf_log.OPT_LR, value=cfg.SOLVER.BASE_LR) print_mlperf(key=mlperf_log.OPT_MOMENTUM, value=cfg.SOLVER.MOMENTUM) print_mlperf(key=mlperf_log.OPT_WEIGHT_DECAY, value=cfg.SOLVER.WEIGHT_DECAY) scheduler = make_lr_scheduler(cfg, optimizer) if distributed: model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[local_rank], output_device=local_rank, # this should be removed if we update BatchNorm stats broadcast_buffers=False, ) arguments = {} arguments["iteration"] = 0 output_dir = cfg.OUTPUT_DIR save_to_disk = get_rank() == 0 checkpointer = DetectronCheckpointer(cfg, model, optimizer, scheduler, output_dir, save_to_disk) arguments["save_checkpoints"] = cfg.SAVE_CHECKPOINTS extra_checkpoint_data = checkpointer.load(cfg.MODEL.WEIGHT) arguments.update(extra_checkpoint_data) data_loader, iters_per_epoch = make_data_loader( cfg, is_train=True, is_distributed=distributed, start_iter=arguments["iteration"], ) checkpoint_period = cfg.SOLVER.CHECKPOINT_PERIOD # set the callback function to evaluate and potentially # early exit each epoch if cfg.PER_EPOCH_EVAL: per_iter_callback_fn = functools.partial( mlperf_test_early_exit, iters_per_epoch=iters_per_epoch, tester=functools.partial(test, cfg=cfg), model=model, distributed=distributed, min_bbox_map=cfg.MLPERF.MIN_BBOX_MAP, min_segm_map=cfg.MLPERF.MIN_SEGM_MAP) else: per_iter_callback_fn = None start_train_time = time.time() do_train( model, data_loader, optimizer, scheduler, checkpointer, device, checkpoint_period, arguments, per_iter_start_callback_fn=functools.partial( mlperf_log_epoch_start, iters_per_epoch=iters_per_epoch), per_iter_end_callback_fn=per_iter_callback_fn, ) end_train_time = time.time() total_training_time = end_train_time - start_train_time print("&&&& MLPERF METRIC THROUGHPUT per GPU={:.4f} iterations / s".format( (arguments["iteration"] * 1.0) / total_training_time)) return model
def train(cfg, local_rank, distributed, fp16, dllogger, args): model = build_detection_model(cfg) device = torch.device(cfg.MODEL.DEVICE) model.to(device) optimizer = make_optimizer(cfg, model) scheduler = make_lr_scheduler(cfg, optimizer) if use_amp: # Initialize mixed-precision training if fp16: use_mixed_precision = True else: use_mixed_precision = cfg.DTYPE == "float16" amp_opt_level = 'O1' if use_mixed_precision else 'O0' model, optimizer = amp.initialize(model, optimizer, opt_level=amp_opt_level) if distributed: if use_apex_ddp: model = DDP(model, delay_allreduce=True) else: model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[local_rank], output_device=local_rank, # this should be removed if we update BatchNorm stats broadcast_buffers=False, ) if is_main_process(): print(model) arguments = {} arguments["iteration"] = 0 output_dir = cfg.OUTPUT_DIR save_to_disk = get_rank() == 0 checkpointer = DetectronCheckpointer(cfg, model, optimizer, scheduler, output_dir, save_to_disk) extra_checkpoint_data = checkpointer.load(cfg.MODEL.WEIGHT) arguments.update(extra_checkpoint_data) data_loader, iters_per_epoch = make_data_loader( cfg, is_train=True, is_distributed=distributed, start_iter=arguments["iteration"], ) checkpoint_period = cfg.SOLVER.CHECKPOINT_PERIOD args.iters_per_epoch = iters_per_epoch # set the callback function to evaluate and potentially # early exit each epoch if cfg.PER_EPOCH_EVAL: per_iter_callback_fn = functools.partial( mlperf_test_early_exit, iters_per_epoch=iters_per_epoch, tester=functools.partial(test, cfg=cfg, dllogger=dllogger, args=args), model=model, distributed=distributed, min_bbox_map=cfg.MIN_BBOX_MAP, min_segm_map=cfg.MIN_MASK_MAP, args=args) else: per_iter_callback_fn = None do_train( model, data_loader, optimizer, scheduler, checkpointer, device, checkpoint_period, arguments, use_amp, cfg, dllogger, args, per_iter_end_callback_fn=per_iter_callback_fn, ) return model, iters_per_epoch
def train(cfg, args): # torch.cuda.set_device(7) # Initialize the network model = build_detection_model(cfg) if torch.cuda.device_count() > 1: print("Let's use", torch.cuda.device_count(), "GPUs!") model = nn.DataParallel(model) # model load weights # print(torch.load(cfg.MODEL.WEIGHT)) # checkpoint = torch.load(cfg.MODEL.WEIGHT)['model'] print("Load from checkpoint?", bool(args.from_checkpoint)) if not bool(args.from_checkpoint): # path = '/data6/SRIP19_SelfDriving/bdd100k/trained_model/Outputs/model_final_apt.pth' path = args.model_root checkpoint = torch.load(path) model.load_state_dict(checkpoint) else: checkpoint = torch.load(args.checkpoint) model.load_state_dict(checkpoint) #model.train() print("Freeze faster rcnn?", bool(args.freeze)) for i, child in enumerate(model.children()): #print(i) #print(child) if i < 3: for param in child.parameters(): # param.requires_grad = False param.requires_grad = not (bool(args.freeze)) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model.to(device) outdir = cfg.OUTPUT_DIR class_weights = [1, 2, 2, 2] w = torch.FloatTensor(class_weights).cuda() # criterion = nn.MultiLabelSoftMarginLoss() criterion = nn.BCEWithLogitsLoss(pos_weight=w).cuda() criterion2 = nn.BCEWithLogitsLoss().cuda() # Initialize the optimizer optimizer = optim.Adam(model.parameters(), lr=float(args.initLR), weight_decay=float(args.weight_decay)) # Initialize DataLoader Dataset = BatchLoader(imageRoot=args.imageroot, gtRoot=args.gtroot, reasonRoot=args.reasonroot, cropSize=(args.imHeight, args.imWidth)) dataloader = DataLoader(Dataset, batch_size=int(args.batch_size), num_workers=0, shuffle=True) # lossArr = [] # AccuracyArr = [] for epoch in range(0, args.num_epoch): trainingLog = open( outdir + ('trainingLogTogether_{0}.txt'.format(epoch)), 'w') trainingLog.write(str(args)) lossArr = [] AccuracyArr = [] AccSideArr = [] for i, dataBatch in enumerate(dataloader): # Read data img_cpu = dataBatch['img'] imBatch = img_cpu.to(device) target_cpu = dataBatch['target'] targetBatch = target_cpu.to(device) # ori_img_cpu = dataBatch['ori_img'] if cfg.MODEL.SIDE: reason_cpu = (dataBatch['reason']).type(torch.FloatTensor) reasonBatch = reason_cpu.to(device) optimizer.zero_grad() if cfg.MODEL.SIDE: pred, pred_reason = model(imBatch) #print(pred, pred_reason) # Joint loss loss1 = criterion(pred, targetBatch) loss2 = criterion2(pred_reason, reasonBatch) loss = loss1 + loss2 else: pred = model(imBatch) loss = criterion(pred, targetBatch) # torch.cuda.empty_cache() # pred, selected_boxes = model(imBatch) # DrawBbox(ori_img_cpu[0], selected_boxes[0]) # plt.clf() # plt.close() # print(pred) # print(targetBatch) loss.backward() optimizer.step() loss_cpu = loss.cpu().data.item() lossArr.append(loss_cpu) meanLoss = np.mean(np.array(lossArr)) # Calculate accuracy predict = torch.sigmoid(pred) > 0.5 f1 = f1_score(target_cpu.data.numpy(), predict.cpu().data.numpy(), average='samples') AccuracyArr.append(f1) meanAcc = np.mean(np.array(AccuracyArr)) if cfg.MODEL.SIDE: predict_reason = torch.sigmoid(pred_reason) > 0.5 f1_side = f1_score(reason_cpu.data.numpy(), predict_reason.cpu().data.numpy(), average='samples') AccSideArr.append(f1_side) if i % 50 == 0: print('prediction logits:', pred) print('ground truth:', targetBatch.cpu().data.numpy()) print( 'Epoch %d Iteration %d: Loss %.5f Accumulated Loss %.5f' % (epoch, i, loss_cpu, meanLoss)) trainingLog.write( 'Epoch %d Iteration %d: Loss %.5f Accumulated Loss %.5f \n' % (epoch, i, loss_cpu, meanLoss)) print( 'Epoch %d Iteration %d Action Prediction: F1 %.5f Accumulated F1 %.5f' % (epoch, i, AccuracyArr[-1], meanAcc)) if cfg.MODEL.SIDE: meanAccSide = np.mean(AccSideArr) print( 'Epoch %d Iteration %d Side Task: F1 %.5f Accumulated F1 %.5f' % (epoch, i, AccSideArr[-1], meanAccSide)) if epoch in [int(0.4 * args.num_epoch), int(0.7 * args.num_epoch)] and i == 0: print('The learning rate is being decreased at Iteration %d', i) trainingLog.write( 'The learning rate is being decreased at Iteration %d \n' % i) for param_group in optimizer.param_groups: param_group['lr'] /= 10 if (epoch + 1) % 2 == 0: torch.save(model.state_dict(), (outdir + 'net_%d.pth' % (epoch + 1))) #if args.val and epoch % 10 == 0: # print("Validation...") # run_test(cfg, args) print("Saving final model...") torch.save(model.state_dict(), (outdir + 'net_Final.pth')) print("Done!")
def train(cfg, local_rank, distributed, use_tensorboard=False): model = build_detection_model(cfg) device = torch.device(cfg.MODEL.DEVICE) model.to(device) optimizer = make_optimizer(cfg, model) scheduler = make_lr_scheduler(cfg, optimizer) # Initialize mixed-precision training use_mixed_precision = cfg.DTYPE == "float16" amp_opt_level = 'O1' if use_mixed_precision else 'O0' model, optimizer = amp.initialize(model, optimizer, opt_level=amp_opt_level) if distributed: model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[local_rank], output_device=local_rank, # this should be removed if we update BatchNorm stats broadcast_buffers=False, ) arguments = {} arguments["iteration"] = 0 output_dir = cfg.OUTPUT_DIR save_to_disk = get_rank() == 0 checkpointer = DetectronCheckpointer(cfg, model, optimizer, scheduler, output_dir, save_to_disk) # load_scheduler_only_epoch will prefer the scheduler specified in the # config rather than the one in the checkpoint, and will load only the # last_epoch from the checkpoint. extra_checkpoint_data = checkpointer.load( cfg.MODEL.WEIGHT, load_model_only=cfg.MODEL.LOAD_ONLY_WEIGHTS, load_scheduler_only_epoch=True) if not cfg.MODEL.LOAD_ONLY_WEIGHTS: arguments.update(extra_checkpoint_data) data_loader = make_data_loader( cfg, is_train=True, is_distributed=distributed, start_iter=arguments["iteration"], ) checkpoint_period = cfg.SOLVER.CHECKPOINT_PERIOD if use_tensorboard: meters = TensorboardLogger(log_dir=output_dir, exp_name=cfg.TENSORBOARD_EXP_NAME, start_iter=arguments['iteration'], delimiter=" ") else: meters = MetricLogger(delimiter=" ") do_train(model, data_loader, optimizer, scheduler, checkpointer, device, checkpoint_period, arguments, meters) return model
def main(cfg_text, cfg_segment): # Load saved LSTM network language_model = build_detection_model(cfg_text) language_model.to(cfg_text.MODEL.DEVICE) output_dir = cfg_text.OUTPUT_DIR checkpointer = DetectronCheckpointer(cfg_text, language_model, save_dir=output_dir) _ = checkpointer.load(cfg_text.MODEL.WEIGHT) language_model.eval() # Load saved segmentation network seg_model = SegmentationHelper(cfg_segment) # Split=False is Test set data_loaders = make_data_loader(cfg_text, split=False, is_distributed=False) for dataset_index, data_loader in enumerate(data_loaders): fine_gt = [] seg_iou = [] bbox_iou = [] for index, instance in tqdm( enumerate(data_loader), desc=cfg_text.DATASETS.TEST[dataset_index]): #Group images image_indexes = [x.get_field('img_id')[0] for x in instance[0][2]] unique_indexes, unique_mask, unique_inverse = np.unique( image_indexes, return_index=True, return_inverse=True) with torch.no_grad(): prediction = language_model(instance[0], device=cfg_text.MODEL.DEVICE) segmentation_prediction = seg_model.run_on_image( instance[0][0][unique_mask]) _, pred_ind = prediction[:, -1, :].max(1) for j in range(len(pred_ind)): segs = segmentation_prediction[unique_inverse[j]] label = pred_ind[j] ann_seg = instance[0][2][j].get_field('ann_target')[0] fine_gt.append(ann_seg.get_field('labels').item()) label_mask = segs.get_field('labels') == label if any(label_mask): score, top_ind = segs[label_mask].get_field('scores').max( 0) top_seg = segs[label_mask][top_ind] bbox_iou.append( IOU(top_seg.bbox.tolist()[0], ann_seg.bbox.tolist()[0])) if top_seg.has_field('mask'): top_mask = top_seg.get_field('mask').squeeze() ann_mask = ann_seg.get_field('masks').masks[0].mask seg_iou.append(IOU(top_mask, ann_mask)) else: seg_iou.append(0.0) else: bbox_iou.append(0.0) seg_iou.append(0.0) with open( '{}/{}_baseline_report.txt'.format( cfg_text.OUTPUT_DIR, cfg_text.DATASETS.TEST[dataset_index]), 'w') as f: f.write("Mean Segmentation IOU: {}\n".format(np.mean(seg_iou))) f.write("Mean Bounding Box IOU: {}\n".format(np.mean(bbox_iou))) f.write("\n Class \t Seg IOU \t BBox IOU \t Support") for label in data_loaders[0].dataset.coco.cats.values(): mask = torch.Tensor(fine_gt) == label['id'] seg_iou = torch.Tensor(seg_iou) bbox_iou = torch.Tensor(bbox_iou) f.write("\n{} \t {:.2f} \t {:.2f} \t{:d}".format( label['name'], torch.mean(seg_iou[mask]), torch.mean(bbox_iou[mask]), torch.sum(mask)))
def train(cfg, local_rank, distributed): # original = torch.load('/home/zoey/nas/zoey/github/maskrcnn-benchmark/checkpoints/renderpy150000/model_0025000.pth') # # new = {"model": original["model"]} # torch.save(new, '/home/zoey/nas/zoey/github/maskrcnn-benchmark/checkpoints/finetune/model_0000000.pth') model = build_detection_model(cfg) device = torch.device(cfg.MODEL.DEVICE) model.to(device) optimizer = make_optimizer(cfg, model) scheduler = make_lr_scheduler(cfg, optimizer) # Initialize mixed-precision training use_mixed_precision = cfg.DTYPE == "float16" amp_opt_level = 'O1' if use_mixed_precision else 'O0' model, optimizer = amp.initialize(model, optimizer, opt_level=amp_opt_level) if distributed: model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[local_rank], output_device=local_rank, # this should be removed if we update BatchNorm stats broadcast_buffers=False, ) arguments = {} arguments["iteration"] = 0 output_dir = cfg.OUTPUT_DIR save_to_disk = get_rank() == 0 checkpointer = DetectronCheckpointer(cfg, model, optimizer, scheduler, output_dir, save_to_disk) extra_checkpoint_data = checkpointer.load(cfg.MODEL.WEIGHT) # extra_checkpoint_data = checkpointer.load('/home/zoey/nas/zoey/github/maskrcnn-benchmark/checkpoints/renderpy150000/model_0025000.pth') arguments.update(extra_checkpoint_data) # print(cfg) data_loader = make_data_loader( cfg, is_train=True, is_distributed=distributed, start_iter=arguments["iteration"], ) checkpoint_period = cfg.SOLVER.CHECKPOINT_PERIOD do_train( model, data_loader, optimizer, scheduler, checkpointer, device, checkpoint_period, arguments, ) return model
def train(cfg, args): # torch.cuda.set_device(5) detector = build_detection_model(cfg) #print(detector) detector.eval() device = torch.device(cfg.MODEL.DEVICE) detector.to(device) outdir = cfg.OUTPUT_DIR # checkpointer = DetectronCheckpointer(cfg, detector, save_dir=outdir) # ckpt = cfg.MODEL.WEIGHT # _ = checkpointer.load(ckpt) # Initialize the network model = baseline() class_weights = [1, 1, 5, 5] # could be adjusted class_weights = torch.FloatTensor(class_weights).to(device) criterion = nn.CrossEntropyLoss(weight=class_weights) # Initialize optimizer optimizer = optim.Adam(model.parameters(), lr=float(args.initLR), weight_decay=0.001) # Initialize image batch imBatch = Variable(torch.FloatTensor(args.batch_size, 3, args.imHeight, args.imWidth)) # imBatch = Variable(torch.FloatTensor(args.batch_size, 3, 736, 1280)) targetBatch = Variable(torch.LongTensor(args.batch_size, 1)) # Move network and batch to gpu imBatch = imBatch.cuda(device) targetBatch = targetBatch.cuda(device) model = model.cuda(device) # Initialize dataloader Dataset = BatchLoader( imageRoot = args.imageroot, gtRoot = args.gtroot, cropSize = (args.imWidth, args.imHeight) ) dataloader = DataLoader(Dataset, batch_size=int(args.batch_size), num_workers=0, shuffle=True) lossArr = [] AccuracyArr = [] accuracy = 0 iteration = 0 for epoch in range(0, 10): trainingLog = open(outdir + ('trainingLog_{0}.txt'.format(epoch)), 'w') accuracy = 0 for i, dataBatch in enumerate(dataloader): iteration = i + 1 # Read data, under construction img_cpu = dataBatch['img'] # if args.batch_size == 1: # img_list = to_image_list(img_cpu[0,:,:], cfg.DATALOADER.SIZE_DIVISIBILITY) # else: # img_list = to_image_list(img_cpu, cfg.DATALOADER.SIZE_DIVISIBILITY) # img_list = to_image_list(img_cpu[0,:,:], cfg.DATALOADER.SIZE_DIVISIBILITY) # imBatch.data.copy_(img_list.tensors) # Tensor.shape(BatchSize, 3, Height, Width) imBatch.data.copy_(img_cpu) target_cpu = dataBatch['target'] # print(target_cpu) targetBatch.data.copy_(target_cpu) # Train network RoIPool_module = detector.roi_heads.box.feature_extractor.pooler RoIPredictor = detector.roi_heads.box.predictor RoIProc = detector.roi_heads.box.post_processor Backbone = detector.backbone hook_roi = SimpleHook(RoIPool_module) hook_backbone = SimpleHook(Backbone) hook_pred = SimpleHook(RoIPredictor) hook_proc = SimpleHook(RoIProc) out_detector = detector(imBatch) features_roi = hook_roi.output.data features_backbone = hook_backbone.output[0].data # only use the bottom one # choose boxes with high scores thresh = 10 cls_logit = hook_pred.output[0].data cls_logit = torch.max(cls_logit, dim=1) ind = torch.ge(cls_logit[0], torch.FloatTensor([thresh]).to(device)) features_roi = features_roi[ind] optimizer.zero_grad() # pred = model(features_roi, features_backbone) pred = model(features_roi, features_backbone) # print('target:', targetBatch[0,:][0]) loss = criterion(pred, targetBatch[0,:]) action = pred.cpu().argmax().data.numpy() loss.backward() optimizer.step() if action == target_cpu.data.numpy()[0]: accuracy += 1 lossArr.append(loss.cpu().data.item()) AccuracyArr.append(accuracy/iteration) meanLoss = np.mean(np.array(lossArr)) if iteration%100 == 0: print('prediction:', pred) print('predicted action:', action) print('ground truth:', target_cpu.data.numpy()[0]) print('Epoch %d Iteration %d: Loss %.5f Accumulated Loss %.5f' % (epoch, iteration, lossArr[-1], meanLoss )) trainingLog.write('Epoch %d Iteration %d: Loss %.5f Accumulated Loss %.5f \n' % (epoch, iteration, lossArr[-1], meanLoss )) print('Epoch %d Iteration %d: Accumulated Accuracy %.5f' % (epoch, iteration, AccuracyArr[-1])) trainingLog.write('Epoch %d Iteration %d: Accumulated Accuracy %.5f \n' % (epoch, iteration, AccuracyArr[-1])) if epoch in [4,7] and iteration == 1: print('The learning rate is being decreased at Iteration %d', iteration) trainingLog.write('The learning rate is being decreased at Iteration %d \n' % iteration) for param_group in optimizer.param_groups: param_group['lr'] /= 10 if iteration == args.MaxIteration: torch.save(model.state_dict(), (outdir + 'netFinal_%d.pth' % (epoch+1))) break if iteration >= args.MaxIteration: break if (epoch+1) % 2 == 0: torch.save(model.state_dict(), (outdir + 'netFinal_%d.pth' % (epoch+1)))
def main(): parser = argparse.ArgumentParser(description="PyTorch Object Detection Inference") parser.add_argument( "--config-file", default="/private/home/fmassa/github/detectron.pytorch_v2/configs/e2e_faster_rcnn_R_50_C4_1x_caffe2.yaml", metavar="FILE", help="path to config file", ) parser.add_argument("--local_rank", type=int, default=0) parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() num_gpus = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 distributed = num_gpus > 1 if distributed: torch.cuda.set_device(args.local_rank) torch.distributed.deprecated.init_process_group( backend="nccl", init_method="env://" ) cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() save_dir = "" logger = setup_logger("maskrcnn_benchmark", save_dir, get_rank()) logger.info("Using {} GPUs".format(num_gpus)) logger.info(cfg) logger.info("Collecting env info (might take some time)") logger.info("\n" + collect_env_info()) model = build_detection_model(cfg) model.to(cfg.MODEL.DEVICE) output_dir = cfg.OUTPUT_DIR checkpointer = DetectronCheckpointer(cfg, model, save_dir=output_dir) _ = checkpointer.load(cfg.MODEL.WEIGHT) iou_types = ("bbox",) if cfg.MODEL.MASK_ON: iou_types = iou_types + ("segm",) output_folders = [None] * len(cfg.DATASETS.TEST) dataset_names = cfg.DATASETS.TEST if cfg.OUTPUT_DIR: for idx, dataset_name in enumerate(dataset_names): output_folder = os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name) mkdir(output_folder) output_folders[idx] = output_folder data_loaders_val = make_data_loader(cfg, is_train=False, is_distributed=distributed) for output_folder, dataset_name, data_loader_val in zip(output_folders, dataset_names, data_loaders_val): inference( model, data_loader_val, dataset_name=dataset_name, iou_types=iou_types, box_only=cfg.MODEL.RPN_ONLY, device=cfg.MODEL.DEVICE, expected_results=cfg.TEST.EXPECTED_RESULTS, expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL, output_folder=output_folder, ) synchronize()
def test(cfg, args): # load detector detector = build_detection_model(cfg) detector.eval() device = torch.device(cfg.MODEL.DEVICE) detector.to(device) outdir = cfg.OUTPUT_DIR # load network model = baseline() model.load_state_dict(torch.load(args.model_root)) # Initialize image batch # imBatch = Variable(torch.FloatTensor(args.batch_size, 3, args.imHeight, args.imWidth)) imBatch = Variable(torch.FloatTensor(args.batch_size, 3, 736, 1280)) targetBatch = Variable(torch.LongTensor(args.batch_size, 1)) # Move network and batch to gpu imBatch = imBatch.cuda(device) targetBatch = targetBatch.cuda(device) model = model.cuda(device) # Initialize dataloader Dataset = BatchLoader(imageRoot=args.imageroot, gtRoot=args.gtroot, cropSize=(args.imWidth, args.imHeight)) dataloader = DataLoader(Dataset, batch_size=args.batch_size, num_workers=0, shuffle=True) length = Dataset.__len__() AccuracyArr = [] accuracy = 0 # test SaveFilename = (outdir + 'TestingLog.txt') TestingLog = open(SaveFilename, 'w') print('Save to ', SaveFilename) for i, dataBatch in enumerate(dataloader): # Read data, under construction. now it is hard-code img_cpu = dataBatch['img'] img_list = to_image_list(img_cpu[0, :, :], cfg.DATALOADER.SIZE_DIVISIBILITY) imBatch.data.copy_( img_list.tensors) # Tensor.shape(BatchSize, 3, Height, Width) target_cpu = dataBatch['target'] # print(target_cpu) targetBatch.data.copy_(target_cpu) # grap features from detector RoIPool_module = detector.roi_heads.box.feature_extractor.pooler Backbone = detector.backbone hook_roi = SimpleHook(RoIPool_module) hook_backbone = SimpleHook(Backbone) out_detector = detector(imBatch) features_roi = hook_roi.output.data features_backbone = hook_backbone.output[ 0].data # only use the bottom one pred = model(features_roi, features_backbone) action = pred.cpu().argmax().data.numpy() print('predicted action:', action) print('ground truth:', target_cpu.data.numpy()[0]) if action == target_cpu.data.numpy()[0]: accuracy += 1 AccuracyArr.append(accuracy / (i + 1)) print('Iteration %d / %d: Accumulated Accuracy %.5f' % (i + 1, length, AccuracyArr[-1])) TestingLog.write('Iteration %d / %d: Accumulated Accuracy %.5f \n' % (i + 1, length, AccuracyArr[-1]))