示例#1
0
def train_net(args, ctx):
    logger.auto_set_dir()

    sym_instance = resnet101_deeplab_new()
    sym = sym_instance.get_symbol(NUM_CLASSES, is_train=True,use_global_stats=False)
    eval_sym_instance = resnet101_deeplab_new()
    eval_sym = eval_sym_instance.get_symbol(NUM_CLASSES, is_train=False,use_global_stats=True)

    # setup multi-gpu
    gpu_nums = len(ctx)
    input_batch_size = args.batch_size * gpu_nums

    train_data = get_data("train_aug", DATA_DIR, LIST_DIR, len(ctx))
    test_data = get_data("val", DATA_DIR, LIST_DIR, len(ctx))

    # infer shape
    data_shape_dict = {'data':(args.batch_size, 3, args.crop_size[0],args.crop_size[1])
                       ,'label':(args.batch_size, 1, args.crop_size[0],args.crop_size[1])}

    pprint.pprint(data_shape_dict)
    sym_instance.infer_shape(data_shape_dict)


    # load and initialize params
    epoch_string = args.load.rsplit("-",2)[1]
    begin_epoch = 1
    if not args.scratch:
        begin_epoch = int(epoch_string)
        logger.info('continue training from {}'.format(begin_epoch))
        arg_params, aux_params = load_init_param(args.load, convert=True)
    else:
        logger.info(args.load)
        arg_params, aux_params = load_init_param(args.load, convert=True)
        sym_instance.init_weights(arg_params, aux_params)

    # check parameter shapes
    sym_instance.check_parameter_shapes(arg_params, aux_params, data_shape_dict)

    data_names = ['data']
    label_names = ['label']

    mod = MutableModule(sym, data_names=data_names, label_names=label_names,context=ctx, fixed_param_prefix=fixed_param_prefix)

    # decide training params
    # metric
    fcn_loss_metric = metric.FCNLogLossMetric(args.frequent,PascalVOC12.class_num())
    eval_metrics = mx.metric.CompositeEvalMetric()
    eval_metrics.add(fcn_loss_metric)

    # callback
    batch_end_callbacks = [callback.Speedometer(input_batch_size, frequent=args.frequent)]
    #batch_end_callbacks = [mx.callback.ProgressBar(total=train_data.size/train_data.batch_size)]
    epoch_end_callbacks = \
        [mx.callback.module_checkpoint(mod, os.path.join(logger.get_logger_dir(),"mxnetgo"), period=1, save_optimizer_states=True),
         ]


    lr_scheduler = StepScheduler(train_data.size()*EPOCH_SCALE,lr_step_list)

    # optimizer
    optimizer_params = {
                        'learning_rate': init_lr,
                      'lr_scheduler': lr_scheduler,
                        }

    logger.info("epoch scale = {}".format(EPOCH_SCALE))
    mod.fit(train_data=train_data, args = args, eval_sym= eval_sym, eval_sym_instance=eval_sym_instance, eval_data=test_data, eval_metric=eval_metrics, epoch_end_callback=epoch_end_callbacks,
            batch_end_callback=batch_end_callbacks, kvstore=kvstore,
            optimizer='adam', optimizer_params=optimizer_params,
            arg_params=arg_params, aux_params=aux_params, begin_epoch=begin_epoch, num_epoch=end_epoch,epoch_scale=EPOCH_SCALE, validation_on_last=validation_on_last)
示例#2
0
def train_net(args, ctx):
    logger.auto_set_dir()

    from symbols.symbol_resnet import resnet101_deeplab_new

    # load symbol
    shutil.copy2(os.path.join(curr_path, 'symbols', 'symbol_resnet.py'),
                 logger.get_logger_dir()
                 )  #copy file to logger dir for debug convenience

    sym_instance = resnet101_deeplab_new()
    sym = sym_instance.get_symbol(NUM_CLASSES, is_train=True, memonger=False)

    #digraph = mx.viz.plot_network(sym, save_format='pdf')
    #digraph.render()

    # setup multi-gpu
    gpu_nums = len(ctx)
    input_batch_size = args.batch_size * gpu_nums

    train_data = get_data("train", DATA_DIR, LIST_DIR, len(ctx))
    test_data = get_data("val", DATA_DIR, LIST_DIR, len(ctx))

    # infer max shape
    max_scale = [args.crop_size]
    max_data_shape = [('data', (args.batch_size, 3,
                                max([v[0] for v in max_scale]),
                                max([v[1] for v in max_scale])))]
    max_label_shape = [('label', (args.batch_size, 1,
                                  max([v[0] for v in max_scale]),
                                  max([v[1] for v in max_scale])))]

    # infer shape
    data_shape_dict = {
        'data': (args.batch_size, 3, args.crop_size[0], args.crop_size[1]),
        'label': (args.batch_size, 1, args.crop_size[0], args.crop_size[1])
    }

    pprint.pprint(data_shape_dict)
    sym_instance.infer_shape(data_shape_dict)

    eval_sym_instance = resnet101_deeplab_new()

    # load and initialize params
    epoch_string = args.load.rsplit("-", 2)[1]
    begin_epoch = 1
    if not args.scratch:
        begin_epoch = int(epoch_string)
        logger.info('continue training from {}'.format(begin_epoch))
        arg_params, aux_params = load_init_param(args.load, convert=True)
    else:
        logger.info(args.load)
        arg_params, aux_params = load_init_param(args.load, convert=True)
        sym_instance.init_weights(arg_params, aux_params)

    # check parameter shapes
    sym_instance.check_parameter_shapes(arg_params, aux_params,
                                        data_shape_dict)

    data_names = ['data']
    label_names = ['label']

    mod = MutableModule(
        sym,
        data_names=data_names,
        label_names=label_names,
        context=ctx,
        max_data_shapes=[max_data_shape for _ in xrange(gpu_nums)],
        max_label_shapes=[max_label_shape for _ in xrange(gpu_nums)],
        fixed_param_prefix=fixed_param_prefix)

    # decide training params
    # metric
    fcn_loss_metric = metric.FCNLogLossMetric(args.frequent)
    eval_metrics = mx.metric.CompositeEvalMetric()

    for child_metric in [fcn_loss_metric]:
        eval_metrics.add(child_metric)

    # callback
    batch_end_callbacks = [
        callback.Speedometer(input_batch_size, frequent=args.frequent)
    ]
    #batch_end_callbacks = [mx.callback.ProgressBar(total=train_data.size/train_data.batch_size)]
    epoch_end_callbacks = \
        [mx.callback.module_checkpoint(mod, os.path.join(logger.get_logger_dir(),"mxnetgo"), period=1, save_optimizer_states=True),
         ]

    lr_scheduler = StepScheduler(train_data.size() * EPOCH_SCALE, lr_step_list)

    # optimizer
    optimizer_params = {
        'momentum': 0.9,
        'wd': 0.0005,
        'learning_rate': 2.5e-4,
        'lr_scheduler': lr_scheduler,
        'rescale_grad': 1.0,
        'clip_gradient': None
    }

    logger.info("epoch scale = {}".format(EPOCH_SCALE))
    mod.fit(train_data=train_data,
            args=args,
            eval_sym_instance=eval_sym_instance,
            eval_data=test_data,
            eval_metric=eval_metrics,
            epoch_end_callback=epoch_end_callbacks,
            batch_end_callback=batch_end_callbacks,
            kvstore=kvstore,
            optimizer='sgd',
            optimizer_params=optimizer_params,
            arg_params=arg_params,
            aux_params=aux_params,
            begin_epoch=begin_epoch,
            num_epoch=end_epoch,
            epoch_scale=EPOCH_SCALE,
            validation_on_last=validation_on_last)
示例#3
0
def train_net(args, ctx):
    logger.auto_set_dir()

    from symbols.symbol_resnet_deeplabv2 import resnet101_deeplab_new
    sym_instance = resnet101_deeplab_new()
    sym = sym_instance.get_symbol(NUM_CLASSES,
                                  is_train=True,
                                  use_global_stats=False)

    # setup multi-gpu
    gpu_nums = len(ctx)
    input_batch_size = args.batch_size * gpu_nums

    train_dataflow = get_data("train", LIST_DIR, len(ctx))
    val_dataflow = get_data("val", LIST_DIR, len(ctx))

    eval_sym_instance = resnet101_deeplab_new()
    eval_sym = eval_sym_instance.get_symbol(args.class_num,
                                            is_train=False,
                                            use_global_stats=True)

    # infer shape
    data_shape_dict = {
        'data': (args.batch_size, 3, args.crop_size[0], args.crop_size[1]),
        'label': (args.batch_size, 1, args.crop_size[0], args.crop_size[1])
    }

    pprint.pprint(data_shape_dict)
    sym_instance.infer_shape(data_shape_dict)

    # load and initialize params
    epoch_string = args.load.rsplit("-", 2)[1]
    begin_epoch = 1
    if not args.scratch:
        begin_epoch = int(epoch_string)
        logger.info('continue training from {}'.format(begin_epoch))
        arg_params, aux_params = load_init_param(args.load, convert=True)
    else:
        logger.info(args.load)
        arg_params, aux_params = load_init_param(args.load, convert=True)
        sym_instance.init_weights(arg_params, aux_params)

    # check parameter shapes
    sym_instance.check_parameter_shapes(arg_params, aux_params,
                                        data_shape_dict)
    mod = MutableModule(sym,
                        data_names=['data'],
                        label_names=['label'],
                        context=ctx,
                        fixed_param_prefix=fixed_param_prefix)

    # decide training params
    # metric
    fcn_loss_metric = metric.FCNLogLossMetric(args.frequent,
                                              Cityscapes.class_num())
    eval_metrics = mx.metric.CompositeEvalMetric()

    # rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric, cls_metric, bbox_metric
    for child_metric in [fcn_loss_metric]:
        eval_metrics.add(child_metric)

    # callback
    batch_end_callbacks = [
        callback.Speedometer(input_batch_size, frequent=args.frequent)
    ]
    epoch_end_callbacks = \
        [mx.callback.module_checkpoint(mod, os.path.join(logger.get_logger_dir(),"mxnetgo"), period=1, save_optimizer_states=True),
         ]

    lr_scheduler = StepScheduler(train_dataflow.size() * EPOCH_SCALE,
                                 lr_step_list)

    # optimizer
    optimizer_params = {
        'momentum': 0.9,
        'wd': 0.0005,
        'learning_rate': 2.5e-4,
        'lr_scheduler': lr_scheduler,
        'rescale_grad': 1.0,
        'clip_gradient': None
    }

    logger.info("epoch scale = {}".format(EPOCH_SCALE))
    mod.fit(train_data=train_dataflow,
            args=args,
            eval_sym=eval_sym,
            eval_sym_instance=eval_sym_instance,
            eval_data=val_dataflow,
            eval_metric=eval_metrics,
            epoch_end_callback=epoch_end_callbacks,
            batch_end_callback=batch_end_callbacks,
            kvstore=kvstore,
            optimizer='sgd',
            optimizer_params=optimizer_params,
            arg_params=arg_params,
            aux_params=aux_params,
            begin_epoch=begin_epoch,
            num_epoch=end_epoch,
            epoch_scale=EPOCH_SCALE,
            validation_on_last=validation_on_last)