def main(): # Parse cmd line args args = parse_args() # Load config options cfg.merge_from_file(args.cfg_file) cfg.merge_from_list(args.opts) assert_and_infer_cfg() cfg.PATHS.OUT_DIR = os.path.join(cfg.PATHS.OUT_DIR, cfg.PATHS.EXPERIMENT_NAME) cfg.PATHS.TB_OUT_DIR = os.path.join(cfg.PATHS.OUT_DIR, "tb_logs", cfg.PATHS.TIMESTAMP) cfg.PATHS.MODEL_OUT_DIR = os.path.join(cfg.PATHS.OUT_DIR, "saved_models", cfg.PATHS.TIMESTAMP) cfg.freeze() # Ensure that the output dir exists try: os.makedirs(cfg.PATHS.OUT_DIR, exist_ok=True) os.makedirs(cfg.PATHS.MODEL_OUT_DIR, exist_ok=False) os.makedirs(cfg.PATHS.TB_OUT_DIR, exist_ok=False) except FileExistsError: print("Wait for a minute and try again :)") exit() if cfg.TUNE_LR: pass # Save the config dump_cfg() # let's gooo run()
def parse_args(): parser = argparse.ArgumentParser( description="Validate the correctness of the model's outputs") parser.add_argument('--config-file', type=str, default='', help='path to the configuration file') parser.add_argument('--model', type=str, default='', help='path to the model') parser.add_argument('--score-thresh', type=float, default=0.15, help='detection score threshold') parser.add_argument('--baseline-path', type=str, default='', help='path to the baseline') parser.add_argument('opts', default=None, help='modify configuration using the command line', nargs=argparse.REMAINDER) args = parser.parse_args() if os.path.isfile(args.config_file): cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() return args
def parse_args(): parser = argparse.ArgumentParser(description='YOLACT training script') parser.add_argument('--config-file', type=str, default='', help='path to the configuration file') parser.add_argument('--model', type=str, default='', help='path to the model') parser.add_argument('--image', type=str, default='', help='path to the image or folder of images') parser.add_argument('--score-thresh', type=float, default=0.15, help='detection score threshold') parser.add_argument('--alpha', type=float, default=0.45, help='alpha') parser.add_argument('opts', default=None, help='modify configuration using the command line', nargs=argparse.REMAINDER) args = parser.parse_args() if os.path.isfile(args.config_file): cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() return args
def main(): parser = argparse.ArgumentParser(description="Baseline Experiment Eval") parser.add_argument( "--config-file", metavar="FILE", help="path to config file", type=str, ) parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() cfg.merge_from_file(args.config_file) cfg.EXPERIMENT_NAME = args.config_file.split('/')[-1][:-5] cfg.merge_from_list(args.opts) cfg.freeze() # Seeding random.seed(cfg.SEED) np.random.seed(cfg.SEED) torch.manual_seed(cfg.SEED) torch.cuda.manual_seed(cfg.SEED) torch.cuda.manual_seed_all(cfg.SEED) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False # This can slow down training # load the data test_loader = torch.utils.data.DataLoader(get_dataset(cfg, 'test'), batch_size=cfg.TEST.BATCH_SIZE, shuffle=False, pin_memory=True) task1, task2 = get_tasks(cfg) model = get_model(cfg, task1, task2) ckpt_path = os.path.join(cfg.SAVE_DIR, cfg.EXPERIMENT_NAME, 'ckpt-%s.pth' % str(cfg.TEST.CKPT_ID).zfill(5)) print("Evaluating Checkpoint at %s" % ckpt_path) ckpt = torch.load(ckpt_path) model.load_state_dict(ckpt['model_state_dict']) model.eval() if cfg.CUDA: model = model.cuda() task1_metric, task2_metric = evaluate(test_loader, model, task1, task2) for k, v in task1_metric.items(): print('{}: {:.3f}'.format(k, v)) for k, v in task2_metric.items(): print('{}: {:.3f}'.format(k, v))
def parse_args(): parser = argparse.ArgumentParser(description='Export weights of trained model from D. Bolya') parser.add_argument('--config-file', type=str, default='', help='path to the configuration file') parser.add_argument('--trained-model', type=str, default='', help='path to the trained model') parser.add_argument('--exported-model', type=str, default='', help='path to the exported model') parser.add_argument('opts', default=None, help='modify configuration using the command line', nargs=argparse.REMAINDER) args = parser.parse_args() if os.path.isfile(args.config_file): cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() return args
def main(): parser = argparse.ArgumentParser(description="PyTorch Segmentation Inference") parser.add_argument( "--config-file", default="./configs/Encoder_UNet.yaml", metavar="FILE", help="path to config file", ) parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() save_dir = cfg.OUTPUT_DIR logger = setup_logger("core", save_dir) logger.info(cfg) logger.info("Collecting env info (might take some time)") logger.info("\n" + collect_env_info()) model = build_segmentation_model(cfg) model.to(cfg.MODEL.DEVICE) output_dir = cfg.OUTPUT_DIR checkpointer = SegmentationCheckpointer(cfg, model, save_dir=output_dir) _ = checkpointer.load(cfg.MODEL.WEIGHT) dataset_names = cfg.DATASETS.TEST output_folders = [None] * len(cfg.DATASETS.TEST) if cfg.OUTPUT_DIR: for idx, dataset_name in enumerate(dataset_names): output_folder = os.path.join(cfg.OUTPUT_DIR, "inference", cfg.MODEL.ENCODER + '_' + cfg.MODEL.ARCHITECTURE, dataset_name) mkdir(output_folder) output_folders[idx] = output_folder else: raise RuntimeError("Output directory is missing!") test_data_loaders = make_data_loader(cfg, split='test') for output_folder, dataset_name, test_data_loader in zip(output_folders, dataset_names, test_data_loaders): inference( model, test_data_loader, dataset_name=dataset_name, device=cfg.MODEL.DEVICE, output_folder=output_folder, )
def main(): parser = argparse.ArgumentParser(description="PyTorch Segmentation") parser.add_argument( "--config-file", default="./configs/Encoder_UNet.yaml", metavar="FILE", help="path to config file", type=str, ) parser.add_argument( "--skip-test", default=True, help="whether to run testing script with the best model", type=bool, ) args = parser.parse_args() tuner_params = nni.get_next_parameter() tuner_params_list = list() for key, value in tuner_params.items(): tuner_params_list.append(key) tuner_params_list.append(value) cfg.merge_from_file(args.config_file) cfg.merge_from_list(tuner_params_list) cfg.update({'OUTPUT_DIR': os.path.join('./training_dir', os.path.basename(args.config_file).split('.yaml')[0], '_'.join([str(i) for i in tuner_params_list]))}) cfg.freeze() os.environ["CUDA_VISIBLE_DEVICES"] = str(cfg.MODEL.GPU_NUM) output_dir = cfg.OUTPUT_DIR if output_dir: mkdir(output_dir) logger = setup_logger("core", output_dir) logger.info(args) logger.info("Collecting env info (might take some time)") logger.info("\n" + collect_env_info()) logger.info("Loaded configuration file {}".format(args.config_file)) with open(args.config_file, "r") as cf: config_str = "\n" + cf.read() logger.info(config_str) logger.info("Running with config:\n{}".format(cfg)) logger.debug(tuner_params) best_model = train(cfg) if not args.skip_test: run_test(cfg, best_model)
def parse_args(): parser = argparse.ArgumentParser(description='YOLACT training script') parser.add_argument('--config-file', type=str, default='', help='path to the configuration file') parser.add_argument('opts', default=None, help='modify configuration using the command line', nargs=argparse.REMAINDER) args = parser.parse_args() if os.path.isfile(args.config_file): cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze()
def main(): parser = argparse.ArgumentParser( description="PyTorch Object Detection Webcam Demo") parser.add_argument( "--config-file", default="../configs/caffe2/e2e_mask_rcnn_R_50_FPN_1x_caffe2.yaml", metavar="FILE", help="path to config file", ) parser.add_argument( "--confidence-threshold", type=float, default=0.7, help="Minimum score for the prediction to be shown", ) parser.add_argument( "--min-image-size", type=int, default=224, help="Smallest size of the image to feed to the model. " "Model was trained with 800, which gives best results", ) parser.add_argument( "--show-mask-heatmaps", dest="show_mask_heatmaps", help="Show a heatmap probability for the top masks-per-dim masks", action="store_true", ) parser.add_argument( "--masks-per-dim", type=int, default=2, help="Number of heatmaps per dimension to show", ) parser.add_argument( "opts", help="Modify model config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() # load config from file and command-line arguments cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() # prepare object that handles inference plus adds predictions on top of image coco_demo = COCODemo( cfg, confidence_threshold=args.confidence_threshold, show_mask_heatmaps=args.show_mask_heatmaps, masks_per_dim=args.masks_per_dim, min_image_size=args.min_image_size, ) cam = cv2.VideoCapture(0) while True: start_time = time.time() ret_val, img = cam.read() composite = coco_demo.run_on_opencv_image(img) print("Time: {:.2f} s / img".format(time.time() - start_time)) cv2.imshow("COCO detections", composite) if cv2.waitKey(1) == 27: break # esc to quit cv2.destroyAllWindows()
fpr, tpr, thresholds = roc_curve(pre_test_label, prediction) roc_auc = auc(fpr, tpr) plt.plot(fpr, tpr, lw=1, label=model.model_name + '(area = {0:.2f})'.format(roc_auc)) print(model.model_name + "测试集正确率为:" + "{:.2f}".format(acc * 100) + "%") plt.plot([0, 1], [0, 1], '--', color=(0.6, 0.6, 0.6), label='Luck') # 画对角线 plt.xlim([-0.05, 1.05]) # 设置x、y轴的上下限,设置宽一点,以免和边缘重合,可以更好的观察图像的整体 plt.ylim([-0.05, 1.05]) plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') # 可以使用中文,但需要导入一些库即字体 plt.title('Receiver operating characteristic example') plt.legend(loc="lower right") plt.show() if __name__ == "__main__": parser = argparse.ArgumentParser( description="PyTorch Object Detection Training") parser.add_argument( "--config-file", default="", metavar="FILE", help="path to config file", type=str, ) args = parser.parse_args() if args.config_file is not '': cfg.merge_from_file(args.config_file) compare(cfg)
def main(): parser = argparse.ArgumentParser( description="PyTorch Object Detection Webcam Demo") parser.add_argument( "--config-file", default="configs/packdet/packdet_R_50_FPN_1x_fe-128-12-2_m4_sep.yaml", metavar="FILE", help="path to config file", ) parser.add_argument( "--weights", default="models/packdet_R_50_FPN_1x_fe-128-12-2_m4_sep.pth", metavar="FILE", help="path to the trained model", ) parser.add_argument( "--images-dir", default="demo/images", metavar="DIR", help="path to demo images directory", ) parser.add_argument( "--results-dir", default="demo/results", metavar="DIR", help="path to save the results", ) parser.add_argument( "--min-image-size", type=int, default=800, help="Smallest size of the image to feed to the model. " "Model was trained with 800, which gives best results", ) parser.add_argument( "opts", help="Modify model config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() # load config from file and command-line arguments cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.MODEL.WEIGHT = args.weights cfg.freeze() # The following per-class thresholds are computed by maximizing # per-class f-measure in their precision-recall curve. # Please see compute_thresholds_for_classes() in coco_eval.py for details. # you could copy the thrs in packdet/packdet.py here thresholds_for_classes = [ 0.4902425706386566, 0.5381519794464111, 0.5067052841186523, 0.5437142252922058, 0.5588839054107666, 0.5276558995246887, 0.49406325817108154, 0.49073269963264465, 0.4806738495826721, 0.4823538064956665, 0.6076132655143738, 0.6440929770469666, 0.5771225690841675, 0.5104134678840637, 0.518393337726593, 0.5853402018547058, 0.5871560573577881, 0.5754503607749939, 0.5476232767105103, 0.5239601135253906, 0.528354823589325, 0.5842380523681641, 0.529585063457489, 0.5392818450927734, 0.4744971990585327, 0.5273094773292542, 0.47029709815979004, 0.47505930066108704, 0.4859939515590668, 0.5534765124320984, 0.44751793146133423, 0.586391031742096, 0.5289603471755981, 0.4418090879917145, 0.49789053201675415, 0.5277994871139526, 0.5256999731063843, 0.49595320224761963, 0.4759668707847595, 0.5057507753372192, 0.47086426615715027, 0.5383470058441162, 0.5014472603797913, 0.4778696298599243, 0.4438300132751465, 0.5047871470451355, 0.4818137586116791, 0.4980989098548889, 0.5201641917228699, 0.493553102016449, 0.4972984790802002, 0.49542945623397827, 0.5166008472442627, 0.5381780862808228, 0.45813074707984924, 0.4879375994205475, 0.4892300069332123, 0.525837779045105, 0.4814700484275818, 0.4686848223209381, 0.4951763153076172, 0.5807622075080872, 0.572121262550354, 0.5322854518890381, 0.4996817708015442, 0.47158145904541016, 0.5802334547042847, 0.5267660617828369, 0.5192787647247314, 0.5104144811630249, 0.492986798286438, 0.5321716070175171, 0.5432604551315308, 0.43284663558006287, 0.572494626045227, 0.4790349006652832, 0.5585607886314392, 0.5282813310623169, 0.3120556175708771, 0.5559245347976685 ] demo_im_names = os.listdir(args.images_dir) demo_im_names.sort() print('{} images to test'.format(len(demo_im_names))) # prepare object that handles inference plus adds predictions on top of image coco_demo = COCODemo( cfg, confidence_thresholds_for_classes=thresholds_for_classes, min_image_size=args.min_image_size) # for im_name in demo_im_names: # img = cv2.imread(os.path.join(args.images_dir, im_name)) # if img is None: # continue # start_time = time.time() # composite = coco_demo.run_on_opencv_image(img) # print("{}\tinference time: {:.2f}s".format(im_name, time.time() - start_time)) # cv2.imwrite(os.path.join('result', im_name), composite) # # cv2.imshow(im_name, composite) # print("Press any keys to exit ...") # cv2.waitKey() # cv2.destroyAllWindows() # plt if not os.path.exists(args.results_dir): os.makedirs(args.results_dir) for i, im_name in enumerate(demo_im_names): img = cv2.imread(os.path.join(args.images_dir, im_name)) if img is None: continue start_time = time.time() coco_demo.run_det_on_opencv_image_plt( img, os.path.join(args.results_dir, im_name)) print("{}, {}\tinference time: {:.2f}s".format( i, im_name, time.time() - start_time)) print("Done!")
def main(): parser = argparse.ArgumentParser(description="PyTorch MTLNAS Eval") parser.add_argument( "--config-file", metavar="FILE", help="path to config file", type=str, ) parser.add_argument("--local_rank", type=int, default=0) parser.add_argument("--port", type=int, default=29502) parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() # Preparing for DDP training logging = args.local_rank == 0 num_gpus = int( os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 distributed = num_gpus > 1 if distributed: os.environ['MASTER_ADDR'] = '127.0.0.1' os.environ['MASTER_PORT'] = str(args.port) torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend="nccl", init_method="env://") synchronize() cfg.merge_from_file(args.config_file) cfg.EXPERIMENT_NAME = args.config_file.split('/')[-1][:-5] cfg.merge_from_list(args.opts) # Adjust batch size for distributed training assert cfg.TRAIN.BATCH_SIZE % num_gpus == 0 cfg.TRAIN.BATCH_SIZE = int(cfg.TRAIN.BATCH_SIZE // num_gpus) assert cfg.TEST.BATCH_SIZE % num_gpus == 0 cfg.TEST.BATCH_SIZE = int(cfg.TEST.BATCH_SIZE // num_gpus) cfg.freeze() # Seeding random.seed(cfg.SEED) np.random.seed(cfg.SEED) torch.manual_seed(cfg.SEED) torch.cuda.manual_seed(cfg.SEED) torch.cuda.manual_seed_all(cfg.SEED) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False # This can slow down training if not os.path.exists(os.path.join(cfg.SAVE_DIR, cfg.EXPERIMENT_NAME)) and logging: os.makedirs(os.path.join(cfg.SAVE_DIR, cfg.EXPERIMENT_NAME)) # load the data test_data = get_dataset(cfg, 'test') if distributed: test_sampler = torch.utils.data.distributed.DistributedSampler( test_data) else: test_sampler = None test_loader = torch.utils.data.DataLoader(test_data, batch_size=cfg.TEST.BATCH_SIZE, shuffle=False, sampler=test_sampler) task1, task2 = get_tasks(cfg) model = get_model(cfg, task1, task2) if cfg.CUDA: model = model.cuda() ckpt_path = os.path.join(cfg.SAVE_DIR, cfg.EXPERIMENT_NAME, 'ckpt-%s.pth' % str(cfg.TEST.CKPT_ID).zfill(5)) print("Evaluating Checkpoint at %s" % ckpt_path) ckpt = torch.load(ckpt_path) # compatibility with ddp saved checkpoint when evaluating without ddp pretrain_dict = { k.replace('module.', ''): v for k, v in ckpt['model_state_dict'].items() } model_dict = model.state_dict() model_dict.update(pretrain_dict) model.load_state_dict(model_dict) if distributed: model = nn.SyncBatchNorm.convert_sync_batchnorm(model) model = MyDataParallel(model, device_ids=[args.local_rank], output_device=args.local_rank, find_unused_parameters=True) model.eval() task1_metric, task2_metric = evaluate(test_loader, model, task1, task2, distributed, args.local_rank) if logging: for k, v in task1_metric.items(): print('{}: {:.9f}'.format(k, v)) for k, v in task2_metric.items(): print('{}: {:.9f}'.format(k, v))
def main(): parser = argparse.ArgumentParser(description="PyTorch MTLNAS Training") parser.add_argument( "--config-file", metavar="FILE", help="path to config file", type=str, ) parser.add_argument("--local_rank", type=int, default=0) parser.add_argument("--port", type=int, default=29501) parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() # Preparing for DDP training logging = args.local_rank == 0 num_gpus = int( os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 distributed = num_gpus > 1 if distributed: os.environ['MASTER_ADDR'] = '127.0.0.1' os.environ['MASTER_PORT'] = str(args.port) torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend="nccl", init_method="env://") synchronize() cfg.merge_from_file(args.config_file) cfg.EXPERIMENT_NAME = args.config_file.split('/')[-1][:-5] cfg.merge_from_list(args.opts) # Adjust batch size for distributed training assert cfg.TRAIN.BATCH_SIZE % num_gpus == 0 cfg.TRAIN.BATCH_SIZE = int(cfg.TRAIN.BATCH_SIZE // num_gpus) assert cfg.TEST.BATCH_SIZE % num_gpus == 0 cfg.TEST.BATCH_SIZE = int(cfg.TEST.BATCH_SIZE // num_gpus) cfg.freeze() timestamp = datetime.datetime.now().strftime("%Y-%m-%d~%H:%M:%S") experiment_log_dir = os.path.join(cfg.LOG_DIR, cfg.EXPERIMENT_NAME, timestamp) if not os.path.exists(experiment_log_dir) and logging: os.makedirs(experiment_log_dir) writer = SummaryWriter(logdir=experiment_log_dir) printf = get_print(experiment_log_dir) printf("Training with Config: ") printf(cfg) # Seeding os.environ['PYTHONHASHSEED'] = str(cfg.SEED) random.seed(cfg.SEED) np.random.seed(cfg.SEED) torch.manual_seed(cfg.SEED) torch.cuda.manual_seed(cfg.SEED) torch.cuda.manual_seed_all(cfg.SEED) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False # This can slow down training if not os.path.exists(os.path.join(cfg.SAVE_DIR, cfg.EXPERIMENT_NAME)) and logging: os.makedirs(os.path.join(cfg.SAVE_DIR, cfg.EXPERIMENT_NAME)) # load the data train_full_data = get_dataset(cfg, 'train') num_train = len(train_full_data) indices = list(range(num_train)) split = int(np.floor(cfg.ARCH.TRAIN_SPLIT * num_train)) # load the data if cfg.TRAIN.EVAL_CKPT: test_data = get_dataset(cfg, 'val') if distributed: test_sampler = torch.utils.data.distributed.DistributedSampler( test_data) else: test_sampler = None test_loader = torch.utils.data.DataLoader( test_data, batch_size=cfg.TEST.BATCH_SIZE, shuffle=False, sampler=test_sampler) task1, task2 = get_tasks(cfg) model = get_model(cfg, task1, task2) if cfg.CUDA: model = model.cuda() if distributed: # Important: Double check if BN is working as expected if cfg.TRAIN.APEX: printf("using apex synced BN") model = apex.parallel.convert_syncbn_model(model) else: model = nn.SyncBatchNorm.convert_sync_batchnorm(model) model = MyDataParallel(model, device_ids=[args.local_rank], output_device=args.local_rank, find_unused_parameters=True) # hacky way to pick params nddr_params = [] fc8_weights = [] fc8_bias = [] base_params = [] for k, v in model.named_net_parameters(): if 'paths' in k: nddr_params.append(v) elif model.net1.fc_id in k: if 'weight' in k: fc8_weights.append(v) else: assert 'bias' in k fc8_bias.append(v) else: assert 'alpha' not in k base_params.append(v) assert len(nddr_params) > 0 and len(fc8_weights) > 0 and len(fc8_bias) > 0 parameter_dict = [{ 'params': base_params }, { 'params': fc8_weights, 'lr': cfg.TRAIN.LR * cfg.TRAIN.FC8_WEIGHT_FACTOR }, { 'params': fc8_bias, 'lr': cfg.TRAIN.LR * cfg.TRAIN.FC8_BIAS_FACTOR }, { 'params': nddr_params, 'lr': cfg.TRAIN.LR * cfg.TRAIN.NDDR_FACTOR }] optimizer = optim.SGD(parameter_dict, lr=cfg.TRAIN.LR, momentum=cfg.TRAIN.MOMENTUM, weight_decay=cfg.TRAIN.WEIGHT_DECAY) if cfg.ARCH.OPTIMIZER == 'sgd': arch_optimizer = torch.optim.SGD( model.arch_parameters(), lr=cfg.ARCH.LR, momentum=cfg.TRAIN.MOMENTUM, # TODO: separate this param weight_decay=cfg.ARCH.WEIGHT_DECAY) else: arch_optimizer = torch.optim.Adam(model.arch_parameters(), lr=cfg.ARCH.LR, betas=(0.5, 0.999), weight_decay=cfg.ARCH.WEIGHT_DECAY) if cfg.TRAIN.SCHEDULE == 'Poly': if cfg.TRAIN.WARMUP > 0.: scheduler = optim.lr_scheduler.LambdaLR( optimizer, lambda step: min(1., float(step) / cfg.TRAIN.WARMUP) * (1 - float(step) / cfg.TRAIN.STEPS)**cfg.TRAIN.POWER, last_epoch=-1) arch_scheduler = optim.lr_scheduler.LambdaLR( arch_optimizer, lambda step: min(1., float(step) / cfg.TRAIN.WARMUP) * (1 - float(step) / cfg.TRAIN.STEPS)**cfg.TRAIN.POWER, last_epoch=-1) else: scheduler = optim.lr_scheduler.LambdaLR( optimizer, lambda step: (1 - float(step) / cfg.TRAIN.STEPS)**cfg.TRAIN.POWER, last_epoch=-1) arch_scheduler = optim.lr_scheduler.LambdaLR( arch_optimizer, lambda step: (1 - float(step) / cfg.TRAIN.STEPS)**cfg.TRAIN.POWER, last_epoch=-1) elif cfg.TRAIN.SCHEDULE == 'Cosine': scheduler = optim.lr_scheduler.CosineAnnealingLR( optimizer, cfg.TRAIN.STEPS) arch_scheduler = optim.lr_scheduler.CosineAnnealingLR( arch_optimizer, cfg.TRAIN.STEPS) elif cfg.TRAIN.SCHEDULE == 'Step': milestones = (np.array([0.6, 0.9]) * cfg.TRAIN.STEPS).astype('int') scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones, gamma=0.1) arch_scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones, gamma=0.1) else: raise NotImplementedError if cfg.TRAIN.APEX: model, [arch_optimizer, optimizer] = amp.initialize(model, [arch_optimizer, optimizer], opt_level="O1", num_losses=2) model.train() steps = 0 while steps < cfg.TRAIN.STEPS: # Initialize train/val dataloader below this shuffle operation # to ensure both arch and weights gets to see all the data, # but not at the same time during mixed data training if cfg.ARCH.MIXED_DATA: np.random.shuffle(indices) train_data = torch.utils.data.Subset(train_full_data, indices[:split]) val_data = torch.utils.data.Subset(train_full_data, indices[split:num_train]) if distributed: train_sampler = torch.utils.data.distributed.DistributedSampler( train_data) val_sampler = torch.utils.data.distributed.DistributedSampler( val_data) else: train_sampler = None val_sampler = None train_loader = torch.utils.data.DataLoader( train_data, batch_size=cfg.TRAIN.BATCH_SIZE, pin_memory=True, sampler=train_sampler) val_loader = torch.utils.data.DataLoader( val_data, batch_size=cfg.TRAIN.BATCH_SIZE, pin_memory=True, sampler=val_sampler) val_iter = iter(val_loader) if distributed: train_sampler.set_epoch(steps) # steps is used to seed RNG val_sampler.set_epoch(steps) for batch_idx, (image, label_1, label_2) in enumerate(train_loader): if cfg.CUDA: image, label_1, label_2 = image.cuda(), label_1.cuda( ), label_2.cuda() # get a random minibatch from the search queue without replacement val_batch = next(val_iter, None) if val_batch is None: # val_iter has reached its end val_sampler.set_epoch(steps) val_iter = iter(val_loader) val_batch = next(val_iter) image_search, label_1_search, label_2_search = val_batch image_search = image_search.cuda() label_1_search, label_2_search = label_1_search.cuda( ), label_2_search.cuda() # setting flag for training arch parameters model.arch_train() assert model.arch_training arch_optimizer.zero_grad() arch_result = model.loss(image_search, (label_1_search, label_2_search)) arch_loss = arch_result.loss # Mixed Precision if cfg.TRAIN.APEX: with amp.scale_loss(arch_loss, arch_optimizer, loss_id=0) as scaled_loss: scaled_loss.backward() else: arch_loss.backward() arch_optimizer.step() model.arch_eval() assert not model.arch_training optimizer.zero_grad() result = model.loss(image, (label_1, label_2)) out1, out2 = result.out1, result.out2 loss1 = result.loss1 loss2 = result.loss2 loss = result.loss # Mixed Precision if cfg.TRAIN.APEX: with amp.scale_loss(loss, optimizer, loss_id=1) as scaled_loss: scaled_loss.backward() else: loss.backward() optimizer.step() if cfg.ARCH.SEARCHSPACE == 'GeneralizedMTLNAS': model.step() # update model temperature scheduler.step() if cfg.ARCH.OPTIMIZER == 'sgd': arch_scheduler.step() # Print out the loss periodically. if steps % cfg.TRAIN.LOG_INTERVAL == 0 and logging: printf( 'Train Step: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}\tLoss1: {:.6f}\tLoss2: {:.6f}' .format(steps, batch_idx * len(image), len(train_loader.dataset), 100. * batch_idx / len(train_loader), loss.data.item(), loss1.data.item(), loss2.data.item())) # Log to tensorboard writer.add_scalar('lr', scheduler.get_lr()[0], steps) writer.add_scalar('arch_lr', arch_scheduler.get_lr()[0], steps) writer.add_scalar('loss/overall', loss.data.item(), steps) writer.add_image( 'image', process_image(image[0], train_full_data.image_mean), steps) task1.log_visualize(out1, label_1, loss1, writer, steps) task2.log_visualize(out2, label_2, loss2, writer, steps) if cfg.ARCH.ENTROPY_REGULARIZATION: writer.add_scalar('loss/entropy_weight', arch_result.entropy_weight, steps) writer.add_scalar('loss/entropy_loss', arch_result.entropy_loss.data.item(), steps) if cfg.ARCH.L1_REGULARIZATION: writer.add_scalar('loss/l1_weight', arch_result.l1_weight, steps) writer.add_scalar('loss/l1_loss', arch_result.l1_loss.data.item(), steps) if cfg.ARCH.SEARCHSPACE == 'GeneralizedMTLNAS': writer.add_scalar('temperature', model.get_temperature(), steps) alpha1 = torch.sigmoid( model.net1_alphas).detach().cpu().numpy() alpha2 = torch.sigmoid( model.net2_alphas).detach().cpu().numpy() alpha1_path = os.path.join(experiment_log_dir, 'alpha1') if not os.path.isdir(alpha1_path): os.makedirs(alpha1_path) alpha2_path = os.path.join(experiment_log_dir, 'alpha2') if not os.path.isdir(alpha2_path): os.makedirs(alpha2_path) heatmap1 = save_heatmap( alpha1, os.path.join(alpha1_path, "%s_alpha1.png" % str(steps).zfill(5))) heatmap2 = save_heatmap( alpha2, os.path.join(alpha2_path, "%s_alpha2.png" % str(steps).zfill(5))) writer.add_image('alpha/net1', heatmap1, steps) writer.add_image('alpha/net2', heatmap2, steps) network_path = os.path.join(experiment_log_dir, 'network') if not os.path.isdir(network_path): os.makedirs(network_path) connectivity_plot = save_connectivity( alpha1, alpha2, model.net1_connectivity_matrix, model.net2_connectivity_matrix, os.path.join(network_path, "%s_network.png" % str(steps).zfill(5))) writer.add_image('network', connectivity_plot, steps) if steps % cfg.TRAIN.EVAL_INTERVAL == 0: if distributed: state_dict = model.module.state_dict() else: state_dict = model.state_dict() checkpoint = { 'cfg': cfg, 'step': steps, 'model_state_dict': state_dict, 'optimizer_state_dict': optimizer.state_dict(), 'scheduler_state_dict': scheduler.state_dict(), 'loss': loss, 'loss1': loss1, 'loss2': loss2, 'task1_metric': None, 'task2_metric': None, } if cfg.TRAIN.EVAL_CKPT: model.eval() torch.cuda.empty_cache() # TODO check if it helps task1_metric, task2_metric = evaluate( test_loader, model, task1, task2, distributed, args.local_rank) if logging: for k, v in task1_metric.items(): writer.add_scalar('eval/{}'.format(k), v, steps) for k, v in task2_metric.items(): writer.add_scalar('eval/{}'.format(k), v, steps) for k, v in task1_metric.items(): printf('{}: {:.3f}'.format(k, v)) for k, v in task2_metric.items(): printf('{}: {:.3f}'.format(k, v)) checkpoint['task1_metric'] = task1_metric checkpoint['task2_metric'] = task2_metric model.train() torch.cuda.empty_cache() # TODO check if it helps if logging and steps % cfg.TRAIN.SAVE_INTERVAL == 0: torch.save( checkpoint, os.path.join(cfg.SAVE_DIR, cfg.EXPERIMENT_NAME, 'ckpt-%s.pth' % str(steps).zfill(5))) if steps >= cfg.TRAIN.STEPS: break steps += 1 # train for one extra iteration to allow time for tensorboard logging..
def main(): parser = argparse.ArgumentParser(description="PyTorch Object Detection Inference") parser.add_argument( "--config-file", default="/private/home/fmassa/github/detectron.pytorch_v2/configs/e2e_faster_rcnn_R_50_C4_1x_caffe2.yaml", metavar="FILE", help="path to config file", ) parser.add_argument("--local_rank", type=int, default=0) parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() num_gpus = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 distributed = num_gpus > 1 if distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group( backend="nccl", init_method="env://" ) synchronize() cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() save_dir = "" logger = setup_logger("core", save_dir, get_rank()) logger.info("Using {} GPUs".format(num_gpus)) logger.info(cfg) logger.info("Collecting env info (might take some time)") logger.info("\n" + collect_env_info()) model = build_detection_model(cfg) model.to(cfg.MODEL.DEVICE) num_parameters = sum([param.nelement() for param in model.parameters()]) logger.info('# parameters totally: '+str(num_parameters)) output_dir = cfg.OUTPUT_DIR checkpointer = DetectronCheckpointer(cfg, model, save_dir=output_dir) _ = checkpointer.load(cfg.MODEL.WEIGHT, is_train=False) suffix = cfg.MODEL.WEIGHT.split('/')[-1][:-4] iou_types = ("bbox",) if cfg.MODEL.MASK_ON: iou_types = iou_types + ("segm",) if cfg.MODEL.KEYPOINT_ON: iou_types = iou_types + ("keypoints",) output_folders = [None] * len(cfg.DATASETS.TEST) dataset_names = cfg.DATASETS.TEST if cfg.OUTPUT_DIR: for idx, dataset_name in enumerate(dataset_names): output_folder = os.path.join(cfg.OUTPUT_DIR, "inference_"+suffix, dataset_name) mkdir(output_folder) output_folders[idx] = output_folder data_loaders_val = make_data_loader(cfg, is_train=False, is_distributed=distributed) for output_folder, dataset_name, data_loader_val in zip(output_folders, dataset_names, data_loaders_val): inference( model, data_loader_val, dataset_name=dataset_name, iou_types=iou_types, box_only=False if cfg.MODEL.FCOS_ON or cfg.MODEL.PACKDET_ON or cfg.MODEL.RETINAPACK_ON or cfg.MODEL.RETINANET_ON else cfg.MODEL.RPN_ONLY, device=cfg.MODEL.DEVICE, expected_results=cfg.TEST.EXPECTED_RESULTS, expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL, output_folder=output_folder, ) synchronize()
def main(): parser = argparse.ArgumentParser( description="PyTorch Object Detection Training") parser.add_argument( "--config-file", default="", metavar="FILE", help="path to config file", type=str, ) parser.add_argument("--local_rank", type=int, default=0) parser.add_argument( "--skip-test", dest="skip_test", help="Do not test the final model", action="store_true", ) parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() num_gpus = int( os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 args.distributed = num_gpus > 1 if args.distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend="nccl", init_method="env://") synchronize() cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() output_dir = cfg.OUTPUT_DIR if output_dir: mkdir(output_dir) logger = setup_logger("core", output_dir, get_rank()) logger.info("Using {} GPUs".format(num_gpus)) logger.info(args) logger.info("Collecting env info (might take some time)") logger.info("\n" + collect_env_info()) logger.info("Loaded configuration file {}".format(args.config_file)) with open(args.config_file, "r") as cf: config_str = "\n" + cf.read() logger.info(config_str) logger.info("Running with config:\n{}".format(cfg)) model = train(cfg, args.local_rank, args.distributed) if not args.skip_test: run_test(cfg, model, args.distributed)
def main(): parser = argparse.ArgumentParser(description="Baseline Experiment Training") parser.add_argument( "--config-file", metavar="FILE", help="path to config file", type=str, ) parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() cfg.merge_from_file(args.config_file) cfg.EXPERIMENT_NAME = args.config_file.split('/')[-1][:-5] cfg.merge_from_list(args.opts) cfg.freeze() timestamp = datetime.datetime.now().strftime("%Y-%m-%d~%H:%M:%S") experiment_log_dir = os.path.join(cfg.LOG_DIR, cfg.EXPERIMENT_NAME, timestamp) if not os.path.exists(experiment_log_dir): os.makedirs(experiment_log_dir) writer = SummaryWriter(logdir=experiment_log_dir) printf = get_print(experiment_log_dir) printf("Training with Config: ") printf(cfg) # Seeding random.seed(cfg.SEED) np.random.seed(cfg.SEED) torch.manual_seed(cfg.SEED) torch.cuda.manual_seed(cfg.SEED) torch.cuda.manual_seed_all(cfg.SEED) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False # This can slow down training if not os.path.exists(os.path.join(cfg.SAVE_DIR, cfg.EXPERIMENT_NAME)): os.makedirs(os.path.join(cfg.SAVE_DIR, cfg.EXPERIMENT_NAME)) # load the data train_data = get_dataset(cfg, 'train') train_loader = torch.utils.data.DataLoader( train_data, batch_size=cfg.TRAIN.BATCH_SIZE, shuffle=True, pin_memory=True) # load the data if cfg.TRAIN.EVAL_CKPT: test_loader = torch.utils.data.DataLoader( get_dataset(cfg, 'val'), batch_size=cfg.TEST.BATCH_SIZE, shuffle=False, pin_memory=True) task1, task2 = get_tasks(cfg) model = get_model(cfg, task1, task2) if cfg.CUDA: model = model.cuda() # hacky way to pick params nddr_params = [] fc8_weights = [] fc8_bias = [] base_params = [] for k, v in model.named_parameters(): if 'nddrs' in k: nddr_params.append(v) elif model.net1.fc_id in k: if 'weight' in k: fc8_weights.append(v) else: assert 'bias' in k fc8_bias.append(v) else: base_params.append(v) if not cfg.MODEL.SINGLETASK and not cfg.MODEL.SHAREDFEATURE: assert len(nddr_params) > 0 and len(fc8_weights) > 0 and len(fc8_bias) > 0 parameter_dict = [ {'params': fc8_weights, 'lr': cfg.TRAIN.LR * cfg.TRAIN.FC8_WEIGHT_FACTOR}, {'params': fc8_bias, 'lr': cfg.TRAIN.LR * cfg.TRAIN.FC8_BIAS_FACTOR}, {'params': nddr_params, 'lr': cfg.TRAIN.LR * cfg.TRAIN.NDDR_FACTOR} ] if not cfg.TRAIN.FREEZE_BASE: parameter_dict.append({'params': base_params}) else: printf("Frozen net weights") optimizer = optim.SGD(parameter_dict, lr=cfg.TRAIN.LR, momentum=cfg.TRAIN.MOMENTUM, weight_decay=cfg.TRAIN.WEIGHT_DECAY) if cfg.TRAIN.SCHEDULE == 'Poly': if cfg.TRAIN.WARMUP > 0.: scheduler = optim.lr_scheduler.LambdaLR(optimizer, lambda step: min(1., float(step) / cfg.TRAIN.WARMUP) * (1 - float(step) / cfg.TRAIN.STEPS) ** cfg.TRAIN.POWER, last_epoch=-1) else: scheduler = optim.lr_scheduler.LambdaLR(optimizer, lambda step: (1 - float(step) / cfg.TRAIN.STEPS) ** cfg.TRAIN.POWER, last_epoch=-1) elif cfg.TRAIN.SCHEDULE == 'Cosine': scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, cfg.TRAIN.STEPS) else: raise NotImplementedError if cfg.TRAIN.APEX: model, optimizer = amp.initialize(model, optimizer, opt_level="O1") model.train() steps = 0 while steps < cfg.TRAIN.STEPS: for batch_idx, (image, label_1, label_2) in enumerate(train_loader): if cfg.CUDA: image, label_1, label_2 = image.cuda(), label_1.cuda(), label_2.cuda() optimizer.zero_grad() result = model.loss(image, (label_1, label_2)) out1, out2 = result.out1, result.out2 loss1 = result.loss1 loss2 = result.loss2 loss = result.loss # Mixed Precision if cfg.TRAIN.APEX: with amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() else: loss.backward() optimizer.step() model.step() # update model step count scheduler.step() # Print out the loss periodically. if steps % cfg.TRAIN.LOG_INTERVAL == 0: printf('Train Step: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}\tLoss1: {:.6f}\tLoss2: {:.6f}'.format( steps, batch_idx * len(image), len(train_loader.dataset), 100. * batch_idx / len(train_loader), loss.data.item(), loss1.data.item(), loss2.data.item())) # Log to tensorboard writer.add_scalar('lr', scheduler.get_lr()[0], steps) writer.add_scalar('loss/overall', loss.data.item(), steps) task1.log_visualize(out1, label_1, loss1, writer, steps) task2.log_visualize(out2, label_2, loss2, writer, steps) writer.add_image('image', process_image(image[0], train_data.image_mean), steps) if steps % cfg.TRAIN.SAVE_INTERVAL == 0: checkpoint = { 'cfg': cfg, 'step': steps, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'scheduler_state_dict': scheduler.state_dict(), 'loss': loss, 'loss1': loss1, 'loss2': loss2, 'task1_metric': None, 'task2_metric': None, } if cfg.TRAIN.EVAL_CKPT: model.eval() task1_metric, task2_metric = evaluate(test_loader, model, task1, task2) for k, v in task1_metric.items(): writer.add_scalar('eval/{}'.format(k), v, steps) for k, v in task2_metric.items(): writer.add_scalar('eval/{}'.format(k), v, steps) for k, v in task1_metric.items(): printf('{}: {:.3f}'.format(k, v)) for k, v in task2_metric.items(): printf('{}: {:.3f}'.format(k, v)) checkpoint['task1_metric'] = task1_metric checkpoint['task2_metric'] = task2_metric model.train() torch.save(checkpoint, os.path.join(cfg.SAVE_DIR, cfg.EXPERIMENT_NAME, 'ckpt-%s.pth' % str(steps).zfill(5))) if steps >= cfg.TRAIN.STEPS: break steps += 1