예제 #1
0
def main():
    update_config("configs/coco/resnet_v1_101_coco_trainval_fpn_dcn_end2end_ohem.yaml")
    log_init(filename=config.TRAIN.model_prefix + "train.log")
    msg = pprint.pformat(config)
    logging.info(msg)
    os.environ["MXNET_CUDNN_AUTOTUNE_DEFAULT"] = "0"
    os.environ["MXNET_GPU_MEM_POOL_TYPE"] = "Round"

    ctx = [mx.gpu(int(i)) for i in config.gpus.split(',')]
    ctx = ctx * config.network.IM_PER_GPU
    train_net(ctx, config.TRAIN.begin_epoch, config.TRAIN.lr, config.TRAIN.lr_step)
예제 #2
0
def main():
    update_config(
        "configs/voc/resnet_v1_50_voc0712_rfcn_dcn_end2end_ohem_one_gpu.yaml")
    log_init(filename=config.TRAIN.model_prefix + "train.log")
    msg = pprint.pformat(config)
    logging.info(msg)
    os.environ["MXNET_CUDNN_AUTOTUNE_DEFAULT"] = "0"
    os.environ["MXNET_GPU_MEM_POOL_TYPE"] = "Round"

    ctx = [mx.gpu(int(i)) for i in config.gpus.split(',')]
    train_net(ctx, config.network.pretrained, config.network.pretrained_epoch,
              config.TRAIN.model_prefix, config.TRAIN.begin_epoch,
              config.TRAIN.end_epoch, config.TRAIN.lr, config.TRAIN.lr_step)
예제 #3
0
def main():
    os.environ["MXNET_CUDNN_AUTOTUNE_DEFAULT"] = "0"
    os.environ["MXNET_GPU_MEM_POOL_TYPE"] = "Round"
    args = parse_args()
    setattr(mobula.config, "NVCC", args.nvcc)

    config = easydict.EasyDict()
    config.gpus = [int(x) for x in str(args.gpus).split(',')]
    config.dataset = easydict.EasyDict()
    config.dataset.NUM_CLASSES = args.num_classes
    config.dataset.dataset_type = args.dataset_type
    config.dataset.dataset_path = args.dataset_root
    config.retinanet = easydict.EasyDict()
    config.retinanet.network = easydict.EasyDict()
    config.retinanet.network.FPN_STRIDES = [8, 16, 32, 64, 128]
    config.retinanet.network.BASE_SIZES = [(32, 32), (64, 64), (128, 128),
                                           (256, 256), (512, 512)]
    config.retinanet.network.SCALES = [2**0, 2**(1 / 2), 2**(2 / 3)]
    config.retinanet.network.RATIOS = [1 / 2, 1, 2]
    config.retinanet.network.bbox_norm_coef = [0.1, 0.1, 0.2, 0.2]

    config.TRAIN = easydict.EasyDict()
    config.TRAIN.batch_size = 2 * len(config.gpus)
    config.TRAIN.lr = 0.01 * config.TRAIN.batch_size / 16
    config.TRAIN.warmup_lr = config.TRAIN.lr
    config.TRAIN.warmup_step = 1000
    config.TRAIN.wd = 1e-4
    config.TRAIN.momentum = .9
    config.TRAIN.log_path = "output/{}/RetinaNet-hflip".format(
        config.dataset.dataset_type, config.TRAIN.lr)
    config.TRAIN.log_interval = 100
    config.TRAIN.cls_focal_loss_alpha = .25
    config.TRAIN.cls_focal_loss_gamma = 2
    config.TRAIN.image_short_size = 600
    config.TRAIN.image_max_long_size = 1333
    config.TRAIN.aspect_grouping = True
    # if aspect_grouping is set to False, all images will be pad to (PAD_H, PAD_W)
    config.TRAIN.PAD_H = 768
    config.TRAIN.PAD_W = 768
    config.TRAIN.begin_epoch = 0
    config.TRAIN.end_epoch = 28
    config.TRAIN.lr_step = [6, 8]
    config.TRAIN.FLIP = True
    config.TRAIN.resume = None
    config.TRAIN.trainer_resume = None

    config.network = easydict.EasyDict()
    config.network.FIXED_PARAMS = []
    config.network.sync_bn = True
    config.network.use_global_stats = False if config.network.sync_bn else True
    config.network.merge_backbone_bn = False

    config.val = easydict.EasyDict()
    if args.demo:
        config.val.params_file = args.demo_params
        config.val.viz = args.viz
        demo_net(config)
    else:
        os.makedirs(config.TRAIN.log_path, exist_ok=True)
        log_init(filename=os.path.join(config.TRAIN.log_path,
                                       "train_{}.log".format(time.time())))
        msg = pprint.pformat(config)
        logging.info(msg)
        train_net(config)
예제 #4
0
def main():
    os.environ["MXNET_CUDNN_AUTOTUNE_DEFAULT"] = "0"
    load_mobula_ops()

    args = parse_args()
    setattr(mobula.config, "NVCC", args.nvcc)
    setattr(mobula.config, "SHOW_BUILDING_COMMAND", True)
    config = easydict.EasyDict()
    config.gpus = [int(x) for x in str(args.gpus).split(',')]
    config.dataset = easydict.EasyDict()
    config.dataset.NUM_CLASSES = args.num_classes
    config.dataset.dataset_type = args.dataset_type
    config.dataset.dataset_path = args.dataset_root
    config.dataset.max_bbox_number = 200

    config.retinanet = easydict.EasyDict()
    config.retinanet.network = easydict.EasyDict()
    config.retinanet.network.FPN_STRIDES = [8, 16, 32, 64, 128]
    config.retinanet.network.BASE_SIZES = [(32, 32), (64, 64), (128, 128),
                                           (256, 256), (512, 512)]
    config.retinanet.network.SCALES = [2**0, 2**(1 / 3), 2**(2 / 3)]
    config.retinanet.network.RATIOS = [1 / 2, 1, 2]
    config.retinanet.network.bbox_norm_coef = [1, 1, 1, 1]

    config.TRAIN = easydict.EasyDict()
    config.TRAIN.batch_size = args.im_per_gpu * len(config.gpus)
    config.TRAIN.lr = 0.01 * config.TRAIN.batch_size / 16
    config.TRAIN.warmup_lr = config.TRAIN.lr * 1 / 3
    config.TRAIN.warmup_step = int(1000 * 16 / config.TRAIN.batch_size)
    config.TRAIN.wd = 1e-4
    config.TRAIN.momentum = .9
    config.TRAIN.log_interval = 100
    config.TRAIN.cls_focal_loss_alpha = .25
    config.TRAIN.cls_focal_loss_gamma = 2
    config.TRAIN.image_short_size = 800
    config.TRAIN.image_max_long_size = 1333
    config.TRAIN.aspect_grouping = True
    config.TRAIN.negative_iou_threshold = .4
    config.TRAIN.positive_iou_threshold = .5
    # if aspect_grouping is set to False, all images will be pad to (PAD_H, PAD_W)
    config.TRAIN.PAD_H = 768
    config.TRAIN.PAD_W = 768
    config.TRAIN.begin_epoch = 0
    config.TRAIN.end_epoch = 28
    config.TRAIN.lr_step = [5]
    config.TRAIN.FLIP = True
    config.TRAIN.resume = None
    config.TRAIN.trainer_resume = None

    config.network = easydict.EasyDict()
    config.network.BACKBONE = easydict.EasyDict()
    config.network.BACKBONE.name = "resnetv1b"
    config.network.BACKBONE.kwargs = easydict.EasyDict()
    config.network.BACKBONE.kwargs.num_layers = 50
    config.network.BACKBONE.kwargs.pretrained = True
    config.network.BACKBONE.kwargs.norm_kwargs = {
        "num_devices": len(config.gpus)
    }
    config.network.BACKBONE.kwargs.norm_layer = FrozenBatchNorm2d
    config.network.FIXED_PARAMS = [".*layers1.*", ".*resnetv1b_conv0.*"]
    config.network.fpn_neck_feature_dim = 256
    config.TRAIN.log_path = "output/{}/RetinaNet-hflip-{}-{}".format(
        config.dataset.dataset_type, config.TRAIN.lr,
        config.TRAIN.image_short_size, config.TRAIN.image_max_long_size)

    config.val = easydict.EasyDict()
    if args.demo:
        config.val.params_file = args.demo_params
        config.val.viz = args.viz
        demo_net(config)
    else:
        os.makedirs(config.TRAIN.log_path, exist_ok=True)
        log_init(filename=os.path.join(config.TRAIN.log_path,
                                       "train_{}.log".format(time.time())))
        msg = pprint.pformat(config)
        logging.info(msg)
        train_net(config)
예제 #5
0
def main():
    os.environ["MXNET_CUDNN_AUTOTUNE_DEFAULT"] = "0"
    # os.environ["MXNET_GPU_MEM_POOL_TYPE"] = "Round"
    load_mobula_ops()
    args = parse_args()
    setattr(mobula.config, "NVCC", args.nvcc)
    setattr(mobula.config, "SHOW_BUILDING_COMMAND", True)

    config = easydict.EasyDict()
    config.gpus = [int(x) for x in str(args.gpus).split(',')]
    config.use_hvd = args.hvd
    if config.use_hvd:
        import horovod.mxnet as hvd
        hvd.init()
        config.gpus = [hvd.local_rank()]

    config.dataset = easydict.EasyDict()
    config.dataset.NUM_CLASSES = args.num_classes
    config.dataset.dataset_type = args.dataset_type
    config.dataset.dataset_path = args.dataset_root

    config.FCOS = easydict.EasyDict()
    config.FCOS.network = easydict.EasyDict()

    config.FCOS.network.FPN_SCALES = [8, 16, 32, 64, 128]
    config.FCOS.network.FPN_MINIMUM_DISTANCES = [0, 64, 128, 256, 512]
    config.FCOS.network.FPN_MAXIMUM_DISTANCES = [64, 128, 256, 512, 4096]
    config.TRAIN = easydict.EasyDict()
    config.TRAIN.batch_size = args.im_per_gpu * len(config.gpus)
    config.TRAIN.lr = 0.01 * config.TRAIN.batch_size / 16
    config.TRAIN.warmup_lr = config.TRAIN.lr * 1 / 3
    config.TRAIN.warmup_step = int(1000 * 16 / config.TRAIN.batch_size)
    config.TRAIN.wd = 1e-4
    config.TRAIN.momentum = .9
    config.TRAIN.log_interval = 100
    config.TRAIN.cls_focal_loss_alpha = .25
    config.TRAIN.cls_focal_loss_gamma = 2
    config.TRAIN.image_short_size = 800
    config.TRAIN.image_max_long_size = 1333

    config.TRAIN.aspect_grouping = True
    # if aspect_grouping is set to False, all images will be pad to (PAD_H, PAD_W)
    config.TRAIN.PAD_H = 768
    config.TRAIN.PAD_W = 768
    config.TRAIN.begin_epoch = 0
    config.TRAIN.end_epoch = 28
    config.TRAIN.lr_step = [5]
    config.TRAIN.FLIP = True
    config.TRAIN.resume = None
    config.TRAIN.trainer_resume = None
    config.TRAIN.USE_FP16 = False
    if config.TRAIN.USE_FP16:
        os.environ["MXNET_SAFE_ACCUMULATION"] = "1"
    config.network = easydict.EasyDict()
    config.network.BACKBONE = easydict.EasyDict()
    config.network.BACKBONE.name = "resnetv1"
    config.network.BACKBONE.kwargs = easydict.EasyDict()
    config.network.BACKBONE.kwargs.pretrained = True

    config.network.FIXED_PARAMS = [".*stage1.*", ".*resnetv10_conv0.*"]
    config.network.use_global_stats = True
    config.network.sync_bn = False
    config.network.fpn_neck_feature_dim = 256
    if config.TRAIN.USE_FP16:
        assert config.network.sync_bn is False, "Sync BatchNorm is not supported by amp."

    config.val = easydict.EasyDict()
    if os.path.exists(args.config):
        with open(args.config) as f:
            config_loaded = yaml.load(f, yaml.CLoader)

            def update_config(c, v):
                for k in v.keys():
                    if isinstance(v[k], dict):
                        update_config(c[k], v[k])
                    else:
                        c[k] = v[k]

            update_config(config, config_loaded)
    else:
        logging.info("Escape loading config since it does not exist.")

    config.TRAIN.log_path = "output/{}/{}-{}-{}-{}/reg_weighted_by_centerness_focal_alpha_gamma_lr_{}_{}_{}".format(
        "FCOS-{}-p5-{}".format(config.network.BACKBONE.name, args.extra_flag),
        "fp16" if config.TRAIN.USE_FP16 else "fp32",
        "sync_bn" if config.network.sync_bn else "normal_bn",
        "hvd" if config.use_hvd else "", config.dataset.dataset_type,
        config.TRAIN.lr, config.TRAIN.image_short_size,
        config.TRAIN.image_max_long_size)

    if args.demo:
        config.val.params_file = args.demo_params
        config.val.viz = args.viz
        demo_net(config)
    else:
        os.makedirs(config.TRAIN.log_path, exist_ok=True)
        log_init(filename=os.path.join(config.TRAIN.log_path,
                                       "train_{}.log".format(time.time())))
        msg = pprint.pformat(config)
        logging.info(msg)
        train_net(config)