def __init__(self, network, prefix, epoch, ctx_id=0, mask_nms=True):
        self.ctx_id = ctx_id
        self.ctx = mx.gpu(self.ctx_id)
        self.mask_nms = mask_nms
        #self.nms_threshold = 0.3
        #self._bbox_pred = nonlinear_pred
        if not self.mask_nms:
            self.nms = gpu_nms_wrapper(config.TEST.NMS, self.ctx_id)
        else:
            self.nms = gpu_nms_wrapper(config.TEST.RPN_NMS_THRESH, self.ctx_id)
        #self.nms = py_nms_wrapper(config.TEST.NMS)

        sym = eval('get_' + network + '_mask_test')(
            num_classes=config.NUM_CLASSES, num_anchors=config.NUM_ANCHORS)
        #arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch)
        arg_params, aux_params = load_param(prefix,
                                            epoch,
                                            convert=True,
                                            ctx=self.ctx,
                                            process=True)
        split = False
        max_image_shape = (1, 3, 1600, 1600)
        #max_image_shape = (1,3,1200,2200)
        max_data_shapes = [("data", max_image_shape), ("im_info", (1, 3))]
        mod = MutableModule(symbol=sym,
                            data_names=["data", "im_info"],
                            label_names=None,
                            max_data_shapes=max_data_shapes,
                            context=self.ctx)
        mod.bind(data_shapes=max_data_shapes,
                 label_shapes=None,
                 for_training=False)
        mod.init_params(arg_params=arg_params, aux_params=aux_params)
        self.model = mod
        pass
示例#2
0
def get_net(symbol, prefix, epoch, ctx):
    arg_params, aux_params = load_param(prefix, epoch, convert=True, ctx=ctx, process=True)

    # infer shape
    data_shape_dict = dict(DATA_SHAPES)
    arg_names, aux_names = symbol.list_arguments(), symbol.list_auxiliary_states()
    arg_shape, _, aux_shape = symbol.infer_shape(**data_shape_dict)
    arg_shape_dict = dict(zip(arg_names, arg_shape))
    aux_shape_dict = dict(zip(aux_names, aux_shape))

    # check shapes
    for k in symbol.list_arguments():
        if k in data_shape_dict or 'label' in k:
            continue
        assert k in arg_params, k + ' not initialized'
        assert arg_params[k].shape == arg_shape_dict[k], \
            'shape inconsistent for ' + k + ' inferred ' + str(arg_shape_dict[k]) + ' provided ' + str(
                arg_params[k].shape)
    for k in symbol.list_auxiliary_states():
        assert k in aux_params, k + ' not initialized'
        assert aux_params[k].shape == aux_shape_dict[k], \
            'shape inconsistent for ' + k + ' inferred ' + str(aux_shape_dict[k]) + ' provided ' + str(
                aux_params[k].shape)

    predictor = Predictor(symbol, DATA_NAMES, LABEL_NAMES, context=ctx,
                          provide_data=DATA_SHAPES, provide_label=LABEL_SHAPES,
                          arg_params=arg_params, aux_params=aux_params)
    return predictor
示例#3
0
def get_net(symbol, prefix, epoch, ctx):
    arg_params, aux_params = load_param(prefix, epoch, convert=True, ctx=ctx, process=True)

    # infer shape
    data_shape_dict = dict(DATA_SHAPES)
    arg_names, aux_names = symbol.list_arguments(), symbol.list_auxiliary_states()
    arg_shape, _, aux_shape = symbol.infer_shape(**data_shape_dict)
    arg_shape_dict = dict(zip(arg_names, arg_shape))
    aux_shape_dict = dict(zip(aux_names, aux_shape))

    # check shapes
    for k in symbol.list_arguments():
        if k in data_shape_dict or 'label' in k:
            continue
        assert k in arg_params, k + ' not initialized'
        assert arg_params[k].shape == arg_shape_dict[k], \
            'shape inconsistent for ' + k + ' inferred ' + str(arg_shape_dict[k]) + ' provided ' + str(arg_params[k].shape)
    for k in symbol.list_auxiliary_states():
        assert k in aux_params, k + ' not initialized'
        assert aux_params[k].shape == aux_shape_dict[k], \
            'shape inconsistent for ' + k + ' inferred ' + str(aux_shape_dict[k]) + ' provided ' + str(aux_params[k].shape)

    predictor = Predictor(symbol, DATA_NAMES, LABEL_NAMES, context=ctx,
                          provide_data=DATA_SHAPES, provide_label=LABEL_SHAPES,
                          arg_params=arg_params, aux_params=aux_params)
    return predictor
示例#4
0
def test_rcnn(network, dataset, image_set,
              dataset_path,
              ctx, prefix, epoch,
              vis, shuffle, has_rpn, proposal, max_box, thresh):
    # set config
    assert has_rpn, "only end-to-end case was checked in this project."
    config.TEST.HAS_RPN = True

    # load symbol and testing data
    sym = eval('get_' + network)(is_train=False, num_classes=config.NUM_CLASSES, num_anchors=config.NUM_ANCHORS)
    imdb = eval(dataset)(image_set, dataset_path)
    roidb = imdb.gt_roidb()
    roidb = filter_roidb(roidb)
    imdb.num_images = len(roidb)

    # get test data iter
    test_data = TestLoader(roidb, batch_size=1, shuffle=shuffle, has_rpn=has_rpn, nThreads=default.prefetch_thread_num)

    # load model
    arg_params, aux_params = load_param(prefix, epoch, convert=True, ctx=ctx, process=True)

    # infer shape
    data_shape_dict = dict(test_data.provide_data)
    arg_shape, _, aux_shape = sym.infer_shape(**data_shape_dict)
    arg_shape_dict = dict(zip(sym.list_arguments(), arg_shape))
    aux_shape_dict = dict(zip(sym.list_auxiliary_states(), aux_shape))

    # check parameters
    for k in sym.list_arguments():
        if k in data_shape_dict or 'label' in k:
            continue
        assert k in arg_params, k + ' not initialized'
        assert arg_params[k].shape == arg_shape_dict[k], \
            'shape inconsistent for ' + k + ' inferred ' + str(arg_shape_dict[k]) + ' provided ' + str(arg_params[k].shape)
    for k in sym.list_auxiliary_states():
        assert k in aux_params, k + ' not initialized'
        assert aux_params[k].shape == aux_shape_dict[k], \
            'shape inconsistent for ' + k + ' inferred ' + str(aux_shape_dict[k]) + ' provided ' + str(aux_params[k].shape)

    # decide maximum shape
    data_names = [k[0] for k in test_data.provide_data]
    label_names = None  # [k[0] for k in test_data.provide_label]
    max_data_shape = [('data', (config.NUM_IMAGES_3DCE, config.NUM_SLICES, config.MAX_SIZE, config.MAX_SIZE))]
    if not has_rpn:
        max_data_shape.append(('rois', (1, config.TEST.PROPOSAL_POST_NMS_TOP_N + 30, 5)))

    # create predictor
    predictor = Predictor(sym, data_names, label_names,
                          context=ctx, max_data_shapes=max_data_shape,
                          provide_data=test_data.provide_data, #provide_label=test_data.provide_label,
                          arg_params=arg_params, aux_params=aux_params)

    # start detection
    acc = pred_eval(predictor, test_data, imdb, vis=vis, max_box=max_box, thresh=thresh)

    return acc
示例#5
0
def demo_maskrcnn(network, dataset, image_set, root_path, dataset_path, result_path,
              ctx, prefix, epoch,
              vis, shuffle, has_rpn, proposal, thresh):
    # set config
    if has_rpn:
        config.TEST.HAS_RPN = True

    # print config
    pprint.pprint(config)

    # load symbol and testing data
    if has_rpn:
        sym = eval('get_' + network + '_mask_test')(num_classes=config.NUM_CLASSES, num_anchors=config.NUM_ANCHORS)
        imdb = eval(dataset)(image_set, root_path, dataset_path)
        roidb = imdb.gt_roidb()
    else:
        raise NotImplementedError

    test_data = TestLoader(roidb, batch_size=1, shuffle=shuffle, has_rpn=has_rpn)

    # load model
    arg_params, aux_params = load_param(prefix, epoch, convert=True, ctx=ctx, process=True)

    # infer shape
    data_shape_dict = dict(test_data.provide_data)
    arg_shape, _, aux_shape = sym.infer_shape(**data_shape_dict)
    arg_shape_dict = dict(zip(sym.list_arguments(), arg_shape))
    aux_shape_dict = dict(zip(sym.list_auxiliary_states(), aux_shape))

    # check parameters
    for k in sym.list_arguments():
        if k in data_shape_dict or 'label' in k:
            continue
        assert k in arg_params, k + ' not initialized'
        assert arg_params[k].shape == arg_shape_dict[k], \
            'shape inconsistent for ' + k + ' inferred ' + str(arg_shape_dict[k]) + ' provided ' + str(arg_params[k].shape)
    for k in sym.list_auxiliary_states():
        assert k in aux_params, k + ' not initialized'
        assert aux_params[k].shape == aux_shape_dict[k], \
            'shape inconsistent for ' + k + ' inferred ' + str(aux_shape_dict[k]) + ' provided ' + str(aux_params[k].shape)

    # decide maximum shape
    data_names = [k[0] for k in test_data.provide_data]
    label_names = None
    max_data_shape = [('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]
    if not has_rpn:
        max_data_shape.append(('rois', (1, config.TEST.PROPOSAL_POST_NMS_TOP_N + 30, 5)))

    # create predictor
    predictor = Predictor(sym, data_names, label_names,
                          context=ctx, max_data_shapes=max_data_shape,
                          provide_data=test_data.provide_data, provide_label=test_data.provide_label,
                          arg_params=arg_params, aux_params=aux_params)

    pred_demo_mask(predictor, test_data, imdb, roidb, result_path, vis=vis, thresh=thresh)
示例#6
0
def get_net(data, sym, prefix, epoch, ctx):
    # get predictor
    data = [[mx.nd.array(data[i][name]) for name in DATA_NAMES] for i in xrange(len(data))]
    max_data_shape = [('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]
    provide_data = [[(k, v.shape) for k, v in zip(DATA_NAMES, data[i])] for i in xrange(len(data))]
    provide_label = [None for i in xrange(len(data))]
    arg_params, aux_params = load_param(prefix, epoch, process=True)
    #print DATA_NAMES, LABEL_NAMES, ctx, max_data_shape, provide_data, provide_label
    predictor = Predictor(sym, DATA_NAMES, LABEL_NAMES,
                          context=[ctx], max_data_shapes=max_data_shape,
                          provide_data=provide_data, provide_label=provide_label,
                          arg_params=arg_params, aux_params=aux_params)
    return predictor
示例#7
0
def get_net(symbol, prefix, epoch, ctx):
    arg_params, aux_params = load_param(prefix,
                                        epoch,
                                        convert=True,
                                        ctx=ctx,
                                        process=True)
    predictor = Predictor(symbol,
                          DATA_NAMES,
                          LABEL_NAMES,
                          context=ctx,
                          provide_data=DATA_SHAPES,
                          provide_label=LABEL_SHAPES,
                          arg_params=arg_params,
                          aux_params=aux_params)
    return predictor
示例#8
0
def get_net(prefix, epoch, ctx):
    arg_params, aux_params = load_param(prefix,
                                        epoch,
                                        convert=True,
                                        ctx=ctx,
                                        process=True)

    predictor = Predictor(gensym.gen_sym_infer,
                          DATA_NAMES,
                          LABEL_NAMES,
                          context=ctx,
                          max_data_shapes=dict(DATA_SHAPES),
                          provide_data=DATA_SHAPES,
                          provide_label=LABEL_SHAPES,
                          arg_params=arg_params,
                          aux_params=aux_params)
    return predictor
示例#9
0
def get_net(prefix, epoch, ctx):
    arg_params, aux_params = load_param(prefix,
                                        epoch,
                                        convert=True,
                                        ctx=ctx,
                                        process=True)

    # infer shape
    data_shape_dict = dict(DATA_SHAPES)

    symbol = gen_sym_infer(data_shape_dict,
                           len(ctx) if isinstance(ctx, list) else 1)
    # data = mx.symbol.Variable(name="data", shape=(1,3,600,903))
    # im_info = mx.symbol.Variable(name="im_info", shape=(1,3))
    # symbol = get_vgg_text_rpn_test(data, im_info)

    arg_names, aux_names = symbol.list_arguments(
    ), symbol.list_auxiliary_states()
    arg_shape, _, aux_shape = symbol.infer_shape_partial()
    arg_shape_dict = dict(zip(arg_names, arg_shape))
    aux_shape_dict = dict(zip(aux_names, aux_shape))

    # check shapes
    for k in symbol.list_arguments():
        if k in data_shape_dict or 'label' in k:
            continue
        assert k in arg_params, k + ' not initialized'
        assert arg_params[k].shape == arg_shape_dict[k], \
            'shape inconsistent for ' + k + ' inferred ' + str(arg_shape_dict[k]) + ' provided ' + str(arg_params[k].shape)
    for k in symbol.list_auxiliary_states():
        assert k in aux_params, k + ' not initialized'
        assert aux_params[k].shape == aux_shape_dict[k], \
            'shape inconsistent for ' + k + ' inferred ' + str(aux_shape_dict[k]) + ' provided ' + str(aux_params[k].shape)

    predictor = Predictor(gen_sym_infer,
                          DATA_NAMES,
                          LABEL_NAMES,
                          context=ctx,
                          max_data_shapes=data_shape_dict,
                          provide_data=DATA_SHAPES,
                          provide_label=LABEL_SHAPES,
                          arg_params=arg_params,
                          aux_params=aux_params)
    return predictor
示例#10
0
def train_net(args,
              ctx,
              pretrained,
              epoch,
              prefix,
              begin_epoch,
              end_epoch,
              lr=0.001,
              lr_step='5'):
    # setup config
    config.TRAIN.BATCH_IMAGES = 1
    config.TRAIN.BATCH_ROIS = 128
    config.TRAIN.END2END = True
    config.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED = True

    # load symbol
    sym = eval('get_' + args.network + '_train')(
        num_classes=config.NUM_CLASSES, num_anchors=config.NUM_ANCHORS)
    feat_sym = sym.get_internals()['rpn_cls_score_output']

    # setup multi-gpu
    batch_size = len(ctx)
    input_batch_size = config.TRAIN.BATCH_IMAGES * batch_size

    # print config
    logger.info(pprint.pformat(config))

    # load dataset and prepare imdb for training
    image_sets = [iset for iset in args.image_set.split('+')]
    roidbs = [
        load_gt_roidb(args.dataset,
                      image_set,
                      args.root_path,
                      args.dataset_path,
                      flip=not args.no_flip) for image_set in image_sets
    ]
    roidb = merge_roidb(roidbs)
    roidb = filter_roidb(roidb)

    # load training data
    train_data = AnchorLoader(feat_sym,
                              roidb,
                              batch_size=input_batch_size,
                              shuffle=not args.no_shuffle,
                              ctx=ctx,
                              work_load_list=args.work_load_list,
                              feat_stride=config.RPN_FEAT_STRIDE,
                              anchor_scales=config.ANCHOR_SCALES,
                              anchor_ratios=config.ANCHOR_RATIOS,
                              aspect_grouping=config.TRAIN.ASPECT_GROUPING)

    # infer max shape
    max_data_shape = [('data', (input_batch_size, 3,
                                max([v[0] for v in config.SCALES]),
                                max([v[1] for v in config.SCALES])))]
    max_data_shape, max_label_shape = train_data.infer_shape(max_data_shape)
    max_data_shape.append(('gt_boxes', (input_batch_size, 100, 5)))
    logger.info('providing maximum shape %s %s' %
                (max_data_shape, max_label_shape))

    # infer shape
    data_shape_dict = dict(train_data.provide_data + train_data.provide_label)
    arg_shape, out_shape, aux_shape = sym.infer_shape(**data_shape_dict)
    arg_shape_dict = dict(zip(sym.list_arguments(), arg_shape))
    out_shape_dict = dict(zip(sym.list_outputs(), out_shape))
    aux_shape_dict = dict(zip(sym.list_auxiliary_states(), aux_shape))
    logger.info('output shape %s' % pprint.pformat(out_shape_dict))

    # load and initialize params
    if args.resume:
        arg_params, aux_params = load_param(prefix, begin_epoch, convert=True)
    else:
        arg_params, aux_params = load_param(pretrained, epoch, convert=True)
        arg_params['rpn_conv_3x3_weight'] = mx.random.normal(
            0, 0.01, shape=arg_shape_dict['rpn_conv_3x3_weight'])
        arg_params['rpn_conv_3x3_bias'] = mx.nd.zeros(
            shape=arg_shape_dict['rpn_conv_3x3_bias'])
        arg_params['rpn_cls_score_weight'] = mx.random.normal(
            0, 0.01, shape=arg_shape_dict['rpn_cls_score_weight'])
        arg_params['rpn_cls_score_bias'] = mx.nd.zeros(
            shape=arg_shape_dict['rpn_cls_score_bias'])
        arg_params['rpn_bbox_pred_weight'] = mx.random.normal(
            0, 0.01, shape=arg_shape_dict['rpn_bbox_pred_weight'])
        arg_params['rpn_bbox_pred_bias'] = mx.nd.zeros(
            shape=arg_shape_dict['rpn_bbox_pred_bias'])
        arg_params['cls_score_weight'] = mx.random.normal(
            0, 0.01, shape=arg_shape_dict['cls_score_weight'])
        arg_params['cls_score_bias'] = mx.nd.zeros(
            shape=arg_shape_dict['cls_score_bias'])
        arg_params['bbox_pred_weight'] = mx.random.normal(
            0, 0.001, shape=arg_shape_dict['bbox_pred_weight'])
        arg_params['bbox_pred_bias'] = mx.nd.zeros(
            shape=arg_shape_dict['bbox_pred_bias'])

    # check parameter shapes
    for k in sym.list_arguments():
        if k in data_shape_dict:
            continue
        assert k in arg_params, k + ' not initialized'
        assert arg_params[k].shape == arg_shape_dict[k], \
            'shape inconsistent for ' + k + ' inferred ' + str(arg_shape_dict[k]) + ' provided ' + str(arg_params[k].shape)
    for k in sym.list_auxiliary_states():
        assert k in aux_params, k + ' not initialized'
        assert aux_params[k].shape == aux_shape_dict[k], \
            'shape inconsistent for ' + k + ' inferred ' + str(aux_shape_dict[k]) + ' provided ' + str(aux_params[k].shape)

    # create solver
    fixed_param_prefix = config.FIXED_PARAMS
    data_names = [k[0] for k in train_data.provide_data]
    label_names = [k[0] for k in train_data.provide_label]
    mod = MutableModule(sym,
                        data_names=data_names,
                        label_names=label_names,
                        logger=logger,
                        context=ctx,
                        work_load_list=args.work_load_list,
                        max_data_shapes=max_data_shape,
                        max_label_shapes=max_label_shape,
                        fixed_param_prefix=fixed_param_prefix)

    # decide training params
    # metric
    rpn_eval_metric = metric.RPNAccMetric()
    rpn_cls_metric = metric.RPNLogLossMetric()
    rpn_bbox_metric = metric.RPNL1LossMetric()
    eval_metric = metric.RCNNAccMetric()
    cls_metric = metric.RCNNLogLossMetric()
    bbox_metric = metric.RCNNL1LossMetric()
    eval_metrics = mx.metric.CompositeEvalMetric()
    for child_metric in [
            rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric,
            cls_metric, bbox_metric
    ]:
        eval_metrics.add(child_metric)
    # callback
    batch_end_callback = mx.callback.Speedometer(train_data.batch_size,
                                                 frequent=args.frequent,
                                                 auto_reset=False)
    means = np.tile(np.array(config.TRAIN.BBOX_MEANS), config.NUM_CLASSES)
    stds = np.tile(np.array(config.TRAIN.BBOX_STDS), config.NUM_CLASSES)
    epoch_end_callback = callback.do_checkpoint(prefix, means, stds)
    # decide learning rate
    base_lr = lr
    lr_factor = 0.1
    lr_epoch = [int(epoch) for epoch in lr_step.split(',')]
    lr_epoch_diff = [
        epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch
    ]
    lr = base_lr * (lr_factor**(len(lr_epoch) - len(lr_epoch_diff)))
    lr_iters = [
        int(epoch * len(roidb) / batch_size) for epoch in lr_epoch_diff
    ]
    logger.info('lr %f lr_epoch_diff %s lr_iters %s' %
                (lr, lr_epoch_diff, lr_iters))
    lr_scheduler = mx.lr_scheduler.MultiFactorScheduler(lr_iters, lr_factor)
    # optimizer
    optimizer_params = {
        'momentum': 0.9,
        'wd': 0.0005,
        'learning_rate': lr,
        'lr_scheduler': lr_scheduler,
        'rescale_grad': (1.0 / batch_size),
        'clip_gradient': 5
    }

    # train
    mod.fit(train_data,
            eval_metric=eval_metrics,
            epoch_end_callback=epoch_end_callback,
            batch_end_callback=batch_end_callback,
            kvstore=args.kvstore,
            optimizer='sgd',
            optimizer_params=optimizer_params,
            arg_params=arg_params,
            aux_params=aux_params,
            begin_epoch=begin_epoch,
            num_epoch=end_epoch)
示例#11
0
def test_rpn(network, dataset, image_set, root_path, dataset_path, ctx, prefix,
             epoch, vis, shuffle, thresh):
    # rpn generate proposal config
    config.TEST.HAS_RPN = True

    # print config
    pprint(config)

    # load symbol
    sym = eval('get_' + network + '_rpn_test')(num_anchors=config.NUM_ANCHORS)

    # load dataset and prepare imdb for training
    imdb = eval(dataset)(image_set, root_path, dataset_path)
    roidb = imdb.gt_roidb()

    # (possibly) group the roidb by aspect
    horizontal_inds, vertical_inds = [], []
    for ind, roirec in enumerate(roidb):
        if roirec['width'] > roirec['height']:
            horizontal_inds.append(ind)
        else:
            vertical_inds.append(ind)

    aspect_group = True if len(horizontal_inds) > 0 and len(
        vertical_inds) > 0 else False
    print("aspect_group={}".format(aspect_group))

    if aspect_group:
        horizontal_roidb = [roidb[ind] for ind in horizontal_inds]
        vertical_roidb = [roidb[ind] for ind in vertical_inds]
        l1 = TestLoader(horizontal_roidb,
                        batch_size=len(ctx),
                        shuffle=shuffle,
                        has_rpn=True)
        l2 = TestLoader(vertical_roidb,
                        batch_size=len(ctx),
                        shuffle=shuffle,
                        has_rpn=True)
        test_data = SequentialLoader(iters=[l1, l2])
    else:
        test_data = TestLoader(roidb,
                               batch_size=len(ctx),
                               shuffle=shuffle,
                               has_rpn=True)

    # sanity check
    _, out_shape, _ = sym.get_internals().infer_shape(
        **dict(test_data.provide_data))
    out_names = sym.get_internals().list_outputs()
    pprint_with_newlines(zip(out_names, out_shape), "output shape: ")

    # load model
    arg_params, aux_params = load_param(prefix, epoch, convert=True, ctx=None)

    # infer shape
    data_shape_dict = dict(test_data.provide_data)
    arg_shape, _, aux_shape = sym.infer_shape(**data_shape_dict)
    arg_shape_dict = dict(zip(sym.list_arguments(), arg_shape))
    aux_shape_dict = dict(zip(sym.list_auxiliary_states(), aux_shape))

    # check parameters
    for k in sym.list_arguments():
        if k in data_shape_dict or 'label' in k:
            continue
        assert k in arg_params, k + ' not initialized'
        assert arg_params[k].shape == arg_shape_dict[k], \
            'shape inconsistent for ' + k + ' inferred ' + str(arg_shape_dict[k]) + ' provided ' + str(
                arg_params[k].shape)
    for k in sym.list_auxiliary_states():
        assert k in aux_params, k + ' not initialized'
        assert aux_params[k].shape == aux_shape_dict[k], \
            'shape inconsistent for ' + k + ' inferred ' + str(aux_shape_dict[k]) + ' provided ' + str(
                aux_params[k].shape)

    # decide maximum shape
    data_names = [k[0] for k in test_data.provide_data]
    label_names = None if test_data.provide_label is None else [
        k[0] for k in test_data.provide_label
    ]
    max_data_shape = [('data', (len(ctx), 3, max([v[0]
                                                  for v in config.SCALES]),
                                max([v[1] for v in config.SCALES])))]

    # create predictor
    predictor = Predictor(sym,
                          data_names,
                          label_names,
                          context=ctx,
                          max_data_shapes=max_data_shape,
                          provide_data=test_data.provide_data,
                          provide_label=test_data.provide_label,
                          arg_params=arg_params,
                          aux_params=aux_params)

    # start testing
    imdb_boxes, original_boxes = generate_proposals(predictor,
                                                    test_data,
                                                    imdb,
                                                    vis=vis,
                                                    thresh=thresh)

    if aspect_group:
        # imdb_boxes = [imdb_boxes[ind] for ind in (horizontal_inds + vertical_inds)]
        # original_boxes = [original_boxes[ind] for ind in (horizontal_inds + vertical_inds)]
        reordered_imdb_boxes, reordered_original_boxes = [
            None
        ] * len(imdb_boxes), [None] * len(imdb_boxes)
        for i, orig_ind in enumerate(horizontal_inds + vertical_inds):
            reordered_imdb_boxes[orig_ind] = imdb_boxes[i]
            reordered_original_boxes[orig_ind] = original_boxes[i]
        imdb_boxes, original_boxes = reordered_imdb_boxes, reordered_original_boxes

    # save results
    rpn_folder = os.path.join(imdb.root_path, 'rpn_data')
    if not os.path.exists(rpn_folder):
        os.mkdir(rpn_folder)

    rpn_file = os.path.join(rpn_folder, imdb.name + '_rpn.pkl')
    with open(rpn_file, 'wb') as f:
        cPickle.dump(imdb_boxes, f, cPickle.HIGHEST_PROTOCOL)

    if thresh > 0:
        full_rpn_file = os.path.join(rpn_folder, imdb.name + '_full_rpn.pkl')
        with open(full_rpn_file, 'wb') as f:
            cPickle.dump(original_boxes, f, cPickle.HIGHEST_PROTOCOL)

    print 'wrote rpn proposals to {}'.format(rpn_file)

    imdb.evaluate_recall(roidb, candidate_boxes=imdb_boxes)
示例#12
0
def train_net(args, ctx, pretrained, epoch, prefix, begin_epoch, end_epoch,
              lr=0.001, lr_step='5'):
    # set up logger
    logging.basicConfig()
    logger = logging.getLogger()
    logger.setLevel(logging.INFO)

    # setup config
    config.TRAIN.BATCH_IMAGES = 1
    config.TRAIN.BATCH_ROIS = 128
    config.TRAIN.END2END = True
    config.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED = True

    # load symbol
    sym = eval('get_' + args.network + '_train')(num_classes=config.NUM_CLASSES, num_anchors=config.NUM_ANCHORS)
    feat_sym = sym.get_internals()['rpn_cls_score_output']

    # setup multi-gpu
    batch_size = len(ctx)
    input_batch_size = config.TRAIN.BATCH_IMAGES * batch_size

    # print config
    pprint.pprint(config)

    # load dataset and prepare imdb for training
    image_sets = [iset for iset in args.image_set.split('+')]
    roidbs = [load_gt_roidb(args.dataset, image_set, args.root_path, args.dataset_path,
                            flip=not args.no_flip)
              for image_set in image_sets]
    roidb = merge_roidb(roidbs)
    roidb = filter_roidb(roidb)

    # load training data
    train_data = AnchorLoader(feat_sym, roidb, batch_size=input_batch_size, shuffle=not args.no_shuffle,
                              ctx=ctx, work_load_list=args.work_load_list,
                              feat_stride=config.RPN_FEAT_STRIDE, anchor_scales=config.ANCHOR_SCALES,
                              anchor_ratios=config.ANCHOR_RATIOS, aspect_grouping=config.TRAIN.ASPECT_GROUPING)

    # infer max shape
    max_data_shape = [('data', (input_batch_size, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]
    max_data_shape, max_label_shape = train_data.infer_shape(max_data_shape)
    max_data_shape.append(('gt_boxes', (input_batch_size, 100, 5)))
    print('providing maximum shape', max_data_shape, max_label_shape)

    # infer shape
    data_shape_dict = dict(train_data.provide_data + train_data.provide_label)
    arg_shape, out_shape, aux_shape = sym.infer_shape(**data_shape_dict)
    arg_shape_dict = dict(zip(sym.list_arguments(), arg_shape))
    out_shape_dict = dict(zip(sym.list_outputs(), out_shape))
    aux_shape_dict = dict(zip(sym.list_auxiliary_states(), aux_shape))
    print('output shape')
    pprint.pprint(out_shape_dict)

    # load and initialize params
    if args.resume:
        arg_params, aux_params = load_param(prefix, begin_epoch, convert=True)
    else:
        arg_params, aux_params = load_param(pretrained, epoch, convert=True)
        arg_params['rpn_conv_3x3_weight'] = mx.random.normal(0, 0.01, shape=arg_shape_dict['rpn_conv_3x3_weight'])
        arg_params['rpn_conv_3x3_bias'] = mx.nd.zeros(shape=arg_shape_dict['rpn_conv_3x3_bias'])
        arg_params['rpn_cls_score_weight'] = mx.random.normal(0, 0.01, shape=arg_shape_dict['rpn_cls_score_weight'])
        arg_params['rpn_cls_score_bias'] = mx.nd.zeros(shape=arg_shape_dict['rpn_cls_score_bias'])
        arg_params['rpn_bbox_pred_weight'] = mx.random.normal(0, 0.01, shape=arg_shape_dict['rpn_bbox_pred_weight'])
        arg_params['rpn_bbox_pred_bias'] = mx.nd.zeros(shape=arg_shape_dict['rpn_bbox_pred_bias'])
        arg_params['cls_score_weight'] = mx.random.normal(0, 0.01, shape=arg_shape_dict['cls_score_weight'])
        arg_params['cls_score_bias'] = mx.nd.zeros(shape=arg_shape_dict['cls_score_bias'])
        arg_params['bbox_pred_weight'] = mx.random.normal(0, 0.001, shape=arg_shape_dict['bbox_pred_weight'])
        arg_params['bbox_pred_bias'] = mx.nd.zeros(shape=arg_shape_dict['bbox_pred_bias'])

    # check parameter shapes
    for k in sym.list_arguments():
        if k in data_shape_dict:
            continue
        assert k in arg_params, k + ' not initialized'
        assert arg_params[k].shape == arg_shape_dict[k], \
            'shape inconsistent for ' + k + ' inferred ' + str(arg_shape_dict[k]) + ' provided ' + str(arg_params[k].shape)
    for k in sym.list_auxiliary_states():
        assert k in aux_params, k + ' not initialized'
        assert aux_params[k].shape == aux_shape_dict[k], \
            'shape inconsistent for ' + k + ' inferred ' + str(aux_shape_dict[k]) + ' provided ' + str(aux_params[k].shape)

    # create solver
    fixed_param_prefix = config.FIXED_PARAMS
    data_names = [k[0] for k in train_data.provide_data]
    label_names = [k[0] for k in train_data.provide_label]
    mod = MutableModule(sym, data_names=data_names, label_names=label_names,
                        logger=logger, context=ctx, work_load_list=args.work_load_list,
                        max_data_shapes=max_data_shape, max_label_shapes=max_label_shape,
                        fixed_param_prefix=fixed_param_prefix)

    # decide training params
    # metric
    rpn_eval_metric = metric.RPNAccMetric()
    rpn_cls_metric = metric.RPNLogLossMetric()
    rpn_bbox_metric = metric.RPNL1LossMetric()
    eval_metric = metric.RCNNAccMetric()
    cls_metric = metric.RCNNLogLossMetric()
    bbox_metric = metric.RCNNL1LossMetric()
    eval_metrics = mx.metric.CompositeEvalMetric()
    for child_metric in [rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric, cls_metric, bbox_metric]:
        eval_metrics.add(child_metric)
    # callback
    batch_end_callback = callback.Speedometer(train_data.batch_size, frequent=args.frequent)
    means = np.tile(np.array(config.TRAIN.BBOX_MEANS), config.NUM_CLASSES)
    stds = np.tile(np.array(config.TRAIN.BBOX_STDS), config.NUM_CLASSES)
    epoch_end_callback = callback.do_checkpoint(prefix, means, stds)
    # decide learning rate
    base_lr = lr
    lr_factor = 0.1
    lr_epoch = [int(epoch) for epoch in lr_step.split(',')]
    lr_epoch_diff = [epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch]
    lr = base_lr * (lr_factor ** (len(lr_epoch) - len(lr_epoch_diff)))
    lr_iters = [int(epoch * len(roidb) / batch_size) for epoch in lr_epoch_diff]
    print('lr', lr, 'lr_epoch_diff', lr_epoch_diff, 'lr_iters', lr_iters)
    lr_scheduler = mx.lr_scheduler.MultiFactorScheduler(lr_iters, lr_factor)
    # optimizer
    optimizer_params = {'momentum': 0.9,
                        'wd': 0.0005,
                        'learning_rate': lr,
                        'lr_scheduler': lr_scheduler,
                        'rescale_grad': (1.0 / batch_size),
                        'clip_gradient': 5}

    # train
    mod.fit(train_data, eval_metric=eval_metrics, epoch_end_callback=epoch_end_callback,
            batch_end_callback=batch_end_callback, kvstore=args.kvstore,
            optimizer='sgd', optimizer_params=optimizer_params,
            arg_params=arg_params, aux_params=aux_params, begin_epoch=begin_epoch, num_epoch=end_epoch)
示例#13
0
def train_net(args,
              ctx,
              pretrained,
              epoch,
              prefix,
              begin_epoch,
              end_epoch,
              lr=0.001,
              lr_step='5'):
    # setup config
    #init_config()
    #print(config)
    # setup multi-gpu

    input_batch_size = config.TRAIN.BATCH_IMAGES * len(ctx)

    # print config
    logger.info(pprint.pformat(config))

    # load dataset and prepare imdb for training
    image_sets = [iset for iset in args.image_set.split('+')]
    roidbs = [
        load_gt_roidb(args.dataset,
                      image_set,
                      args.root_path,
                      args.dataset_path,
                      flip=not args.no_flip) for image_set in image_sets
    ]
    roidb = merge_roidb(roidbs)
    roidb = filter_roidb(roidb)

    # load symbol
    #sym = eval('get_' + args.network + '_train')(num_classes=config.NUM_CLASSES, num_anchors=config.NUM_ANCHORS)
    #feat_sym = sym.get_internals()['rpn_cls_score_output']
    #train_data = AnchorLoader(feat_sym, roidb, batch_size=input_batch_size, shuffle=not args.no_shuffle,
    #                          ctx=ctx, work_load_list=args.work_load_list,
    #                          feat_stride=config.RPN_FEAT_STRIDE, anchor_scales=config.ANCHOR_SCALES,
    #                          anchor_ratios=config.ANCHOR_RATIOS, aspect_grouping=config.TRAIN.ASPECT_GROUPING)

    sym = eval('get_' + args.network + '_train')()
    #print(sym.get_internals())
    feat_sym = []
    for stride in config.RPN_FEAT_STRIDE:
        feat_sym.append(sym.get_internals()['rpn_cls_score_stride%s_output' %
                                            stride])

    #train_data = AnchorLoaderFPN(feat_sym, roidb, batch_size=input_batch_size, shuffle=not args.no_shuffle,
    #                              ctx=ctx, work_load_list=args.work_load_list)
    train_data = CropLoader(feat_sym,
                            roidb,
                            batch_size=input_batch_size,
                            shuffle=not args.no_shuffle,
                            ctx=ctx,
                            work_load_list=args.work_load_list)

    # infer max shape
    max_data_shape = [('data', (1, 3, max([v[1] for v in config.SCALES]),
                                max([v[1] for v in config.SCALES])))]
    #max_data_shape = [('data', (1, 3, max([v[1] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]
    max_data_shape, max_label_shape = train_data.infer_shape(max_data_shape)
    max_data_shape.append(('gt_boxes', (1, roidb[0]['max_num_boxes'], 5)))
    logger.info('providing maximum shape %s %s' %
                (max_data_shape, max_label_shape))

    # infer shape
    data_shape_dict = dict(train_data.provide_data + train_data.provide_label)
    arg_shape, out_shape, aux_shape = sym.infer_shape(**data_shape_dict)
    arg_shape_dict = dict(zip(sym.list_arguments(), arg_shape))
    out_shape_dict = dict(zip(sym.list_outputs(), out_shape))
    aux_shape_dict = dict(zip(sym.list_auxiliary_states(), aux_shape))
    logger.info('output shape %s' % pprint.pformat(out_shape_dict))

    # load and initialize params
    if args.resume:
        arg_params, aux_params = load_param(prefix, begin_epoch, convert=True)
    else:
        arg_params, aux_params = load_param(pretrained, epoch, convert=True)
        #for k in ['rpn_conv_3x3', 'rpn_cls_score', 'rpn_bbox_pred', 'cls_score', 'bbox_pred']:
        #  _k = k+"_weight"
        #  if _k in arg_shape_dict:
        #    v = 0.001 if _k.startswith('bbox_') else 0.01
        #    arg_params[_k] = mx.random.normal(0, v, shape=arg_shape_dict[_k])
        #    print('init %s with normal %.5f'%(_k,v))
        #  _k = k+"_bias"
        #  if _k in arg_shape_dict:
        #    arg_params[_k] = mx.nd.zeros(shape=arg_shape_dict[_k])
        #    print('init %s with zero'%(_k))

        for k, v in arg_shape_dict.iteritems():
            if k.find('upsampling') >= 0:
                print('initializing upsampling_weight', k)
                arg_params[k] = mx.nd.zeros(shape=v)
                init = mx.init.Initializer()
                init._init_bilinear(k, arg_params[k])
                #print(args[k])

    # check parameter shapes
    #for k in sym.list_arguments():
    #    if k in data_shape_dict:
    #        continue
    #    assert k in arg_params, k + ' not initialized'
    #    assert arg_params[k].shape == arg_shape_dict[k], \
    #        'shape inconsistent for ' + k + ' inferred ' + str(arg_shape_dict[k]) + ' provided ' + str(arg_params[k].shape)
    #for k in sym.list_auxiliary_states():
    #    assert k in aux_params, k + ' not initialized'
    #    assert aux_params[k].shape == aux_shape_dict[k], \
    #        'shape inconsistent for ' + k + ' inferred ' + str(aux_shape_dict[k]) + ' provided ' + str(aux_params[k].shape)

    # create solver
    fixed_param_prefix = config.FIXED_PARAMS
    data_names = [k[0] for k in train_data.provide_data]
    label_names = [k[0] for k in train_data.provide_label]
    #mod = MutableModule(sym, data_names=data_names, label_names=label_names,
    #                    logger=logger, context=ctx, work_load_list=args.work_load_list,
    #                    max_data_shapes=max_data_shape, max_label_shapes=max_label_shape,
    #                    fixed_param_prefix=fixed_param_prefix)
    fixed_param_names = get_fixed_params(sym, fixed_param_prefix)
    print('fixed', fixed_param_names, file=sys.stderr)
    mod = Module(sym,
                 data_names=data_names,
                 label_names=label_names,
                 logger=logger,
                 context=ctx,
                 work_load_list=args.work_load_list,
                 fixed_param_names=fixed_param_names)

    # decide training params
    # metric
    eval_metrics = mx.metric.CompositeEvalMetric()
    #if len(sym.list_outputs())>4:
    #  metric_names = ['RPNAccMetric', 'RPNLogLossMetric', 'RPNL1LossMetric', 'RCNNAccMetric', 'RCNNLogLossMetric', 'RCNNL1LossMetric']
    #else:#train rpn only
    #print('sym', sym.list_outputs())
    #metric_names = ['RPNAccMetric', 'RPNLogLossMetric', 'RPNL1LossMetric']
    mids = [0, 4, 8]
    for mid in mids:
        _metric = metric.RPNAccMetric(pred_idx=mid, label_idx=mid + 1)
        eval_metrics.add(_metric)
        #_metric = metric.RPNLogLossMetric(pred_idx=mid, label_idx=mid+1)
        #eval_metrics.add(_metric)
        _metric = metric.RPNL1LossMetric(loss_idx=mid + 2, weight_idx=mid + 3)
        eval_metrics.add(_metric)

    #rpn_eval_metric = metric.RPNAccMetric()
    #rpn_cls_metric = metric.RPNLogLossMetric()
    #rpn_bbox_metric = metric.RPNL1LossMetric()
    #eval_metric = metric.RCNNAccMetric()
    #cls_metric = metric.RCNNLogLossMetric()
    #bbox_metric = metric.RCNNL1LossMetric()
    #for child_metric in [rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric, cls_metric, bbox_metric]:
    #    eval_metrics.add(child_metric)
    # callback
    means = np.tile(np.array(config.TRAIN.BBOX_MEANS), config.NUM_CLASSES)
    stds = np.tile(np.array(config.TRAIN.BBOX_STDS), config.NUM_CLASSES)
    #epoch_end_callback = callback.do_checkpoint(prefix, means, stds)
    epoch_end_callback = None
    # decide learning rate
    base_lr = lr
    lr_factor = 0.1
    lr_epoch = [int(epoch) for epoch in lr_step.split(',')]
    lr_epoch_diff = [
        epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch
    ]
    lr = base_lr * (lr_factor**(len(lr_epoch) - len(lr_epoch_diff)))
    lr_iters = [
        int(epoch * len(roidb) / input_batch_size) for epoch in lr_epoch_diff
    ]

    #lr_iters = [36000,42000] #TODO
    #lr_iters = [40000,50000,60000] #TODO
    #lr_iters = [40,50,60] #TODO
    end_epoch = 10000
    #lr_iters = [4,8] #TODO
    logger.info('lr %f lr_epoch_diff %s lr_iters %s' %
                (lr, lr_epoch_diff, lr_iters))
    #lr_scheduler = mx.lr_scheduler.MultiFactorScheduler(lr_iters, lr_factor)
    # optimizer
    opt = optimizer.SGD(learning_rate=lr,
                        momentum=0.9,
                        wd=0.0005,
                        rescale_grad=1.0 / len(ctx),
                        clip_gradient=None)
    initializer = mx.init.Xavier()
    #initializer = mx.init.Xavier(rnd_type='gaussian', factor_type="out", magnitude=2) #resnet style

    if len(ctx) > 1:
        train_data = mx.io.PrefetchingIter(train_data)

    _cb = mx.callback.Speedometer(train_data.batch_size,
                                  frequent=args.frequent,
                                  auto_reset=False)
    global_step = [0]

    def save_model(epoch):
        arg, aux = mod.get_params()
        all_layers = mod.symbol.get_internals()
        outs = []
        for stride in config.RPN_FEAT_STRIDE:
            num_anchors = config.RPN_ANCHOR_CFG[str(stride)]['NUM_ANCHORS']
            _name = 'rpn_cls_score_stride%d_output' % stride
            rpn_cls_score = all_layers[_name]

            # prepare rpn data
            rpn_cls_score_reshape = mx.symbol.Reshape(
                data=rpn_cls_score,
                shape=(0, 2, -1, 0),
                name="rpn_cls_score_reshape_stride%d" % stride)

            rpn_cls_prob = mx.symbol.SoftmaxActivation(
                data=rpn_cls_score_reshape,
                mode="channel",
                name="rpn_cls_prob_stride%d" % stride)
            rpn_cls_prob_reshape = mx.symbol.Reshape(
                data=rpn_cls_prob,
                shape=(0, 2 * num_anchors, -1, 0),
                name='rpn_cls_prob_reshape_stride%d' % stride)
            _name = 'rpn_bbox_pred_stride%d_output' % stride
            rpn_bbox_pred = all_layers[_name]
            outs.append(rpn_cls_prob_reshape)
            outs.append(rpn_bbox_pred)
        _sym = mx.sym.Group(outs)
        mx.model.save_checkpoint(prefix, epoch, _sym, arg, aux)

    def _batch_callback(param):
        #global global_step
        _cb(param)
        global_step[0] += 1
        mbatch = global_step[0]
        for _iter in lr_iters:
            if mbatch == _iter:
                opt.lr *= 0.1
                print('lr change to',
                      opt.lr,
                      ' in batch',
                      mbatch,
                      file=sys.stderr)
                break

        if mbatch % 1000 == 0:
            print('saving final checkpoint', mbatch, file=sys.stderr)
            save_model(mbatch)

        if mbatch == lr_iters[-1]:
            print('saving final checkpoint', mbatch, file=sys.stderr)
            save_model(0)
            #arg, aux = mod.get_params()
            #mx.model.save_checkpoint(prefix, 99, mod.symbol, arg, aux)
            sys.exit(0)

    # train
    mod.fit(train_data,
            eval_metric=eval_metrics,
            epoch_end_callback=epoch_end_callback,
            batch_end_callback=_batch_callback,
            kvstore=args.kvstore,
            optimizer=opt,
            initializer=initializer,
            allow_missing=True,
            arg_params=arg_params,
            aux_params=aux_params,
            begin_epoch=begin_epoch,
            num_epoch=end_epoch)
示例#14
0
def train_net(args,
              ctx,
              pretrained,
              epoch,
              prefix,
              begin_epoch,
              end_epoch,
              lr=0.001,
              lr_step='5'):
    # set up logger
    logging.basicConfig()
    logger = logging.getLogger()
    logger.setLevel(logging.INFO)

    # setup config
    config.TRAIN.BATCH_IMAGES = 1
    config.TRAIN.BATCH_ROIS = 128
    config.TRAIN.END2END = True
    config.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED = True

    # load symbol

    if args.use_global_context or args.use_roi_align:
        sym = eval('get_' + args.network + '_train')(
            num_classes=config.NUM_CLASSES,
            num_anchors=config.NUM_ANCHORS,
            use_global_context=args.use_global_context,
            use_roi_align=args.use_roi_align)
    else:
        sym = eval('get_' + args.network + '_train')(
            num_classes=config.NUM_CLASSES, num_anchors=config.NUM_ANCHORS)

    feat_sym = sym.get_internals()['rpn_cls_score_output']

    # setup multi-gpu
    batch_size = len(ctx)
    input_batch_size = config.TRAIN.BATCH_IMAGES * batch_size

    # print config
    pprint.pprint(config)

    if not args.use_ava_recordio:
        # load dataset and prepare imdb for training
        image_sets = [iset for iset in args.image_set.split('+')]
        roidbs = [
            load_gt_roidb(args.dataset,
                          image_set,
                          args.root_path,
                          args.dataset_path,
                          flip=not args.no_flip) for image_set in image_sets
        ]
        roidb = merge_roidb(roidbs)
        roidb = filter_roidb(roidb)

        # load training data
        train_data = AnchorLoader(
            feat_sym,
            roidb,
            batch_size=input_batch_size,
            shuffle=not args.no_shuffle,
            ctx=ctx,
            work_load_list=args.work_load_list,
            feat_stride=config.RPN_FEAT_STRIDE,
            anchor_scales=config.ANCHOR_SCALES,
            anchor_ratios=config.ANCHOR_RATIOS,
            aspect_grouping=config.TRAIN.ASPECT_GROUPING,
            use_data_augmentation=args.use_data_augmentation)
    else:
        f = open(args.classes_names)
        classes = ['__background__']
        for line in f.readlines():
            classes.append(line.strip().split(' ')[0])

        path_imgidx = args.ava_recordio_name + '.idx'
        path_imgrec = args.ava_recordio_name + '.rec'

        record = mx.recordio.MXIndexedRecordIO(path_imgidx, path_imgrec, 'r')  # pylint: disable=redefined-variable-type

        train_data = AnchorLoaderAvaRecordIO(
            feat_sym,
            record,
            classes,
            batch_size=input_batch_size,
            shuffle=not args.no_shuffle,
            ctx=ctx,
            work_load_list=args.work_load_list,
            feat_stride=config.RPN_FEAT_STRIDE,
            anchor_scales=config.ANCHOR_SCALES,
            anchor_ratios=config.ANCHOR_RATIOS,
            aspect_grouping=config.TRAIN.ASPECT_GROUPING,
            use_data_augmentation=args.use_data_augmentation)

    # infer max shape
    max_data_shape = [('data', (input_batch_size, 3,
                                max([v[0] for v in config.SCALES]),
                                max([v[1] for v in config.SCALES])))]
    max_data_shape, max_label_shape = train_data.infer_shape(max_data_shape)
    max_data_shape.append(('gt_boxes', (input_batch_size, 100, 5)))
    print('providing maximum shape', max_data_shape, max_label_shape)

    # infer shape
    data_shape_dict = dict(train_data.provide_data + train_data.provide_label)
    arg_shape, out_shape, aux_shape = sym.infer_shape(**data_shape_dict)
    arg_shape_dict = dict(zip(sym.list_arguments(), arg_shape))
    out_shape_dict = dict(zip(sym.list_outputs(), out_shape))
    aux_shape_dict = dict(zip(sym.list_auxiliary_states(), aux_shape))
    print('output shape')
    pprint.pprint(out_shape_dict)
    print('arg shape')
    #  pprint.pprint(arg_shape_dict)
    # load and initialize params
    if args.resume:
        arg_params, aux_params = load_param(prefix, begin_epoch, convert=True)
    else:
        arg_params, aux_params = load_param(pretrained, epoch, convert=True)
        arg_params['rpn_conv_3x3_weight'] = mx.random.normal(
            0, 0.01, shape=arg_shape_dict['rpn_conv_3x3_weight'])
        arg_params['rpn_conv_3x3_bias'] = mx.nd.zeros(
            shape=arg_shape_dict['rpn_conv_3x3_bias'])
        arg_params['rpn_cls_score_weight'] = mx.random.normal(
            0, 0.01, shape=arg_shape_dict['rpn_cls_score_weight'])
        arg_params['rpn_cls_score_bias'] = mx.nd.zeros(
            shape=arg_shape_dict['rpn_cls_score_bias'])
        arg_params['rpn_bbox_pred_weight'] = mx.random.normal(
            0, 0.01, shape=arg_shape_dict['rpn_bbox_pred_weight'])
        arg_params['rpn_bbox_pred_bias'] = mx.nd.zeros(
            shape=arg_shape_dict['rpn_bbox_pred_bias'])
        arg_params['cls_score_weight'] = mx.random.normal(
            0, 0.01, shape=arg_shape_dict['cls_score_weight'])
        arg_params['cls_score_bias'] = mx.nd.zeros(
            shape=arg_shape_dict['cls_score_bias'])
        arg_params['bbox_pred_weight'] = mx.random.normal(
            0, 0.001, shape=arg_shape_dict['bbox_pred_weight'])
        arg_params['bbox_pred_bias'] = mx.nd.zeros(
            shape=arg_shape_dict['bbox_pred_bias'])

    if args.use_global_context:
        # additional params for using global context
        """
        for arg_param_name in sym.list_arguments():
            if 'stage5' in arg_param_name:
                # print(arg_param_name, arg_param_name.replace('stage5', 'stage4'))
                arg_params[arg_param_name] = arg_params[arg_param_name.replace('stage5', 'stage4')].copy()  # params of stage5 is initialized from stage4
        arg_params['bn2_gamma'] = arg_params['bn1_gamma'].copy()
        arg_params['bn2_beta'] = arg_params['bn1_beta'].copy()
        """
        for aux_param_name in sym.list_auxiliary_states():
            if 'stage5' in aux_param_name:
                # print(aux_param_name, aux_param_name.replace('stage5', 'stage4'))
                aux_params[aux_param_name] = aux_params[aux_param_name.replace(
                    'stage5', 'stage4')].copy(
                    )  # params of stage5 is initialized from stage4
        aux_params['bn2_moving_mean'] = aux_params['bn1_moving_mean'].copy()
        aux_params['bn2_moving_var'] = aux_params['bn1_moving_var'].copy()

    # check parameter shapes
    for k in sym.list_arguments():
        if k in data_shape_dict:
            continue
        assert k in arg_params, k + ' not initialized'
        assert arg_params[k].shape == arg_shape_dict[k], \
            'shape inconsistent for ' + k + ' inferred ' + str(arg_shape_dict[k]) + ' provided ' + str(arg_params[k].shape)
    for k in sym.list_auxiliary_states():
        assert k in aux_params, k + ' not initialized'
        assert aux_params[k].shape == aux_shape_dict[k], \
            'shape inconsistent for ' + k + ' inferred ' + str(aux_shape_dict[k]) + ' provided ' + str(aux_params[k].shape)

    # create solver
    fixed_param_prefix = config.FIXED_PARAMS
    data_names = [k[0] for k in train_data.provide_data]
    label_names = [k[0] for k in train_data.provide_label]
    mod = MutableModule(sym,
                        data_names=data_names,
                        label_names=label_names,
                        logger=logger,
                        context=ctx,
                        work_load_list=args.work_load_list,
                        max_data_shapes=max_data_shape,
                        max_label_shapes=max_label_shape,
                        fixed_param_prefix=fixed_param_prefix)

    # decide training params
    # metric
    rpn_eval_metric = metric.RPNAccMetric()
    rpn_cls_metric = metric.RPNLogLossMetric()
    rpn_bbox_metric = metric.RPNL1LossMetric()
    eval_metric = metric.RCNNAccMetric()
    cls_metric = metric.RCNNLogLossMetric()
    bbox_metric = metric.RCNNL1LossMetric()
    eval_metrics = mx.metric.CompositeEvalMetric()
    for child_metric in [
            rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric,
            cls_metric, bbox_metric
    ]:
        eval_metrics.add(child_metric)
    # callback
    batch_end_callback = callback.Speedometer(train_data.batch_size,
                                              frequent=args.frequent)
    means = np.tile(np.array(config.TRAIN.BBOX_MEANS), config.NUM_CLASSES)
    stds = np.tile(np.array(config.TRAIN.BBOX_STDS), config.NUM_CLASSES)
    epoch_end_callback = callback.do_checkpoint(prefix, means, stds)
    # decide learning rate
    base_lr = lr
    lr_factor = 0.1
    lr_epoch = [int(epoch) for epoch in lr_step.split(',')]
    lr_epoch_diff = [
        epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch
    ]
    lr = base_lr * (lr_factor**(len(lr_epoch) - len(lr_epoch_diff)))
    if not args.use_ava_recordio:
        lr_iters = [
            int(epoch * len(roidb) / batch_size) for epoch in lr_epoch_diff
        ]
    else:
        lr_iters = [
            int(epoch * train_data.provide_size() / batch_size)
            for epoch in lr_epoch_diff
        ]
    print('lr', lr, 'lr_epoch_diff', lr_epoch_diff, 'lr_iters', lr_iters)
    lr_scheduler = mx.lr_scheduler.MultiFactorScheduler(lr_iters, lr_factor)
    # optimizer
    optimizer_params = {
        'momentum': 0.9,
        'wd': 0.0005,
        'learning_rate': lr,
        'lr_scheduler': lr_scheduler,
        'rescale_grad': (1.0 / batch_size),
        'clip_gradient': 5
    }

    # train
    mod.fit(train_data,
            eval_metric=eval_metrics,
            epoch_end_callback=epoch_end_callback,
            batch_end_callback=batch_end_callback,
            kvstore=args.kvstore,
            optimizer='sgd',
            optimizer_params=optimizer_params,
            arg_params=arg_params,
            aux_params=aux_params,
            begin_epoch=begin_epoch,
            num_epoch=end_epoch)
示例#15
0
文件: demo.py 项目: kekedan/mx-rcnn
def get_net(prefix, epoch, ctx):
    args, auxs = load_param(prefix, epoch, convert=True, ctx=ctx)
    sym = get_vgg_test()
    detector = Detector(sym, ctx, args, auxs)
    return detector
示例#16
0
def train_net(args, ctx, pretrained, epoch, prefix, begin_epoch, end_epoch,
              lr=0.001, lr_step='5'):
    # set up logger
    logging.basicConfig()
    logger = logging.getLogger()
    logger.setLevel(logging.INFO)

    # setup config
    config.TRAIN.BATCH_IMAGES = 1
    config.TRAIN.BATCH_ROIS = 128
    config.TRAIN.END2END = True
    config.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED = True

    # load symbol
    sym_instance = eval('symbol_' + args.network)()
    sym_gen = sym_instance.get_symbol
    sym = sym_gen(46,config,is_train=True)
    feat_sym = sym.get_internals()['rpn_cls_score_output']

    # setup multi-gpu
    batch_size = len(ctx)
    input_batch_size = config.TRAIN.BATCH_IMAGES * batch_size

    # print config
    pprint.pprint(config)

    # load dataset and prepare imdb for training
    dataset = Dataset(args.root_path,args.dataset,args.subset,split = args.split)
    roidb = dataset.gt_roidb()
    W = dataset.W

    # load training data
    train_data = AnchorLoader(feat_sym, roidb, batch_size=input_batch_size, shuffle=not args.no_shuffle,
                              ctx=ctx, work_load_list=args.work_load_list,
                              feat_stride=config.RPN_FEAT_STRIDE, anchor_scales=config.ANCHOR_SCALES,
                              anchor_ratios=config.ANCHOR_RATIOS)

    # infer max shape
    max_data_shape = [('data', (input_batch_size, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]
    max_data_shape, max_label_shape = train_data.infer_shape(max_data_shape)
    max_data_shape.append(('gt_boxes', (input_batch_size, 100, 5)))
    print('providing maximum shape', max_data_shape, max_label_shape)

    # infer shape
    #get a new symbol
    bucket_key = train_data.bucket_key
    print(train_data.provide_data)
    data_shape_dict = dict(train_data.provide_data + train_data.provide_label)
    sym_instance.infer_shape(data_shape_dict)
    #arg_shape, out_shape, aux_shape = curr_sym.infer_shape(**data_shape_dict)
    #arg_shape_dict = dict(zip(curr_sym.list_arguments(), arg_shape))
    #out_shape_dict = dict(zip(curr_sym.list_outputs(), out_shape))
    #aux_shape_dict = dict(zip(curr_sym.list_auxiliary_states(), aux_shape))
    #del arg_shape_dict['lstm_parameters']
    #print(curr_sym.list_arguments())
    #print(aux_shape_dict)

    # load and initialize params
    if args.resume:
        print("continue training from epoch {}".format(begin_epoch))
        arg_params, aux_params = load_param(prefix, begin_epoch, convert=True)
    else:
        
        arg_params, aux_params = load_param(pretrained, epoch, convert=True)
        if config.RNN.USE_W2V:
            arg_params['embed_weight'] = mx.nd.array(W)
        else:
            arg_params['embed_weight'] = mx.random.uniform(0,0.01,shape=arg_shape_dict['embed_weight'])
        sym_instance.init_weight(config,arg_params,aux_params)
    #no checking
    #for k in arg_shape_dict.iterkeys():
     #   if k in data_shape_dict:
      #      continue
       # assert k in arg_params, k + ' not initialized'
        #assert arg_params[k].shape == arg_shape_dict[k], \
         #   'shape inconsistent for ' + k + ' inferred ' + str(arg_shape_dict[k]) + ' provided ' + str(arg_params[k].shape)
    #for k in sym.list_auxiliary_states():
     #   assert k in aux_params, k + ' not initialized'
      #  assert aux_params[k].shape == aux_shape_dict[k], \
       #     'shape inconsistent for ' + k + ' inferred ' + str(aux_shape_dict[k]) + ' provided ' + str(aux_params[k].shape)

    # create solver
    fixed_param_prefix = config.FIXED_PARAMS
    data_names = [k[0] for k in train_data.provide_data]
    label_names = [k[0] for k in train_data.provide_label]
    mod = MutableModule(sym_gen,config, data_names=data_names, label_names=label_names,
                        logger=logger, context=ctx, work_load_list=args.work_load_list,
                        max_data_shapes=max_data_shape, max_label_shapes=max_label_shape,
                        fixed_param_prefix=fixed_param_prefix)

    # decide training params
    # metric
    rpn_eval_metric = metric.RPNAccMetric()
    rpn_cls_metric = metric.RPNLogLossMetric()
    rpn_bbox_metric = metric.RPNL1LossMetric()
    eval_metric = metric.RCNNAccMetric()
    cls_metric = metric.RCNNLogLossMetric()
    bbox_metric = metric.RCNNL1LossMetric()
    eval_metrics = mx.metric.CompositeEvalMetric()
    for child_metric in [rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric, cls_metric, bbox_metric]:
        eval_metrics.add(child_metric)
    # callback
    batch_end_callback = callback.Speedometer(train_data.batch_size, frequent=args.frequent)
    means = np.tile(np.array(config.TRAIN.BBOX_MEANS), config.NUM_CLASSES)
    stds = np.tile(np.array(config.TRAIN.BBOX_STDS), config.NUM_CLASSES)
    epoch_end_callback = callback.do_checkpoint(config.ENCODER_CELL,prefix, means, stds)
    # decide learning rate
    base_lr = lr
    lr_factor = 0.1
    lr_epoch = [int(epoch) for epoch in lr_step.split(',')]
    lr_epoch_diff = [epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch]
    lr = base_lr * (lr_factor ** (len(lr_epoch) - len(lr_epoch_diff)))
    lr_iters = [int(epoch * len(roidb) / batch_size) for epoch in lr_epoch_diff]
    print('lr', lr, 'lr_epoch_diff', lr_epoch_diff, 'lr_iters', lr_iters)
    lr_scheduler = mx.lr_scheduler.MultiFactorScheduler(lr_iters, lr_factor)
    # optimizer
    optimizer_params = {'momentum': 0.9,
                        'wd': 0.0005,
                        'learning_rate': lr,
                        'lr_scheduler': lr_scheduler,
                        'rescale_grad': (1.0 / batch_size),
                        'clip_gradient': 5}
    #initializer for fused RNN
    #TODO:not successfully added,try ask it on github issues.
    initializer = mx.initializer.FusedRNN(init=mx.init.Xavier(factor_type='in', magnitude=2.34),
                                          num_hidden = 1024,num_layers=2,mode='lstm')
    # train
    mod.fit(train_data, eval_metric=eval_metrics, epoch_end_callback=epoch_end_callback,
            batch_end_callback=batch_end_callback, kvstore=args.kvstore,
            optimizer='sgd', optimizer_params=optimizer_params,allow_missing=True,initializer=mx.init.Xavier(factor_type='in', magnitude=2.34),
            arg_params=arg_params, aux_params=aux_params, begin_epoch=begin_epoch, num_epoch=end_epoch)
示例#17
0
def train_net(args,
              ctx,
              pretrained,
              epoch,
              prefix,
              begin_epoch,
              end_epoch,
              lr=0.001,
              lr_step=50000):
    # set up logger
    logging.basicConfig()
    logger = logging.getLogger()
    logger.setLevel(logging.INFO)

    # setup config
    config.TRAIN.HAS_RPN = True
    config.TRAIN.BATCH_SIZE = 1
    config.TRAIN.BATCH_IMAGES = 1
    config.TRAIN.BATCH_ROIS = 128
    config.TRAIN.END2END = True
    config.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED = True
    config.TRAIN.BG_THRESH_LO = 0.0

    # load symbol
    sym = eval('get_' + args.network + '_train')()
    feat_sym = sym.get_internals()['rpn_cls_score_output']

    # setup multi-gpu
    config.TRAIN.BATCH_IMAGES *= len(ctx)
    config.TRAIN.BATCH_SIZE *= len(ctx)

    # print config
    pprint.pprint(config)

    # load dataset and prepare imdb for training
    imdb = eval(args.dataset)(args.image_set, args.root_path,
                              args.dataset_path)
    roidb = imdb.gt_roidb()
    if args.flip:
        roidb = imdb.append_flipped_images(roidb)

    # load training data
    train_data = AnchorLoader(feat_sym,
                              roidb,
                              batch_size=config.TRAIN.BATCH_SIZE,
                              shuffle=True,
                              ctx=ctx,
                              work_load_list=args.work_load_list)

    # infer max shape
    max_data_shape = [('data', (config.TRAIN.BATCH_SIZE, 3, 1000, 1000))]
    max_data_shape, max_label_shape = train_data.infer_shape(max_data_shape)
    max_data_shape.append(('gt_boxes', (config.TRAIN.BATCH_SIZE, 100, 5)))
    print 'providing maximum shape', max_data_shape, max_label_shape

    # load pretrained
    arg_params, aux_params = load_param(pretrained, epoch, convert=True)

    # infer shape
    data_shape_dict = dict(train_data.provide_data + train_data.provide_label)
    arg_shape, out_shape, aux_shape = sym.infer_shape(**data_shape_dict)
    arg_shape_dict = dict(zip(sym.list_arguments(), arg_shape))
    out_shape_dict = dict(zip(sym.list_outputs(), out_shape))
    aux_shape_dict = dict(zip(sym.list_auxiliary_states(), aux_shape))
    print 'output shape'
    pprint.pprint(out_shape_dict)

    # initialize params
    if not args.resume:
        arg_params['rpn_conv_3x3_weight'] = mx.random.normal(
            0, 0.01, shape=arg_shape_dict['rpn_conv_3x3_weight'])
        arg_params['rpn_conv_3x3_bias'] = mx.nd.zeros(
            shape=arg_shape_dict['rpn_conv_3x3_bias'])
        arg_params['rpn_cls_score_weight'] = mx.random.normal(
            0, 0.01, shape=arg_shape_dict['rpn_cls_score_weight'])
        arg_params['rpn_cls_score_bias'] = mx.nd.zeros(
            shape=arg_shape_dict['rpn_cls_score_bias'])
        arg_params['rpn_bbox_pred_weight'] = mx.random.normal(
            0, 0.01, shape=arg_shape_dict['rpn_bbox_pred_weight'])
        arg_params['rpn_bbox_pred_bias'] = mx.nd.zeros(
            shape=arg_shape_dict['rpn_bbox_pred_bias'])
        arg_params['cls_score_weight'] = mx.random.normal(
            0, 0.01, shape=arg_shape_dict['cls_score_weight'])
        arg_params['cls_score_bias'] = mx.nd.zeros(
            shape=arg_shape_dict['cls_score_bias'])
        arg_params['bbox_pred_weight'] = mx.random.normal(
            0, 0.001, shape=arg_shape_dict['bbox_pred_weight'])
        arg_params['bbox_pred_bias'] = mx.nd.zeros(
            shape=arg_shape_dict['bbox_pred_bias'])

    # check parameter shapes
    for k in sym.list_arguments():
        if k in data_shape_dict:
            continue
        assert arg_params[k].shape == arg_shape_dict[k], \
            'shape inconsistent for ' + k + ' inferred ' + str(arg_shape_dict[k]) + ' provided ' + str(arg_params[k].shape)
    for k in sym.list_auxiliary_states():
        assert aux_params[k].shape == aux_shape_dict[k], \
            'shape inconsistent for ' + k + ' inferred ' + str(aux_shape_dict[k]) + ' provided ' + str(aux_params[k].shape)

    # create solver
    fixed_param_prefix = ['conv1', 'conv2']
    data_names = [k[0] for k in train_data.provide_data]
    label_names = [k[0] for k in train_data.provide_label]
    mod = MutableModule(sym,
                        data_names=data_names,
                        label_names=label_names,
                        logger=logger,
                        context=ctx,
                        work_load_list=args.work_load_list,
                        max_data_shapes=max_data_shape,
                        max_label_shapes=max_label_shape,
                        fixed_param_prefix=fixed_param_prefix)

    # decide training params
    # metric
    rpn_eval_metric = metric.RPNAccMetric()
    rpn_cls_metric = metric.RPNLogLossMetric()
    rpn_bbox_metric = metric.RPNL1LossMetric()
    eval_metric = metric.RCNNAccMetric()
    cls_metric = metric.RCNNLogLossMetric()
    bbox_metric = metric.RCNNL1LossMetric()
    eval_metrics = mx.metric.CompositeEvalMetric()
    for child_metric in [
            rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric,
            cls_metric, bbox_metric
    ]:
        eval_metrics.add(child_metric)
    # callback
    batch_end_callback = callback.Speedometer(train_data.batch_size,
                                              frequent=args.frequent)
    means = np.tile(np.array(config.TRAIN.BBOX_MEANS), imdb.num_classes)
    stds = np.tile(np.array(config.TRAIN.BBOX_STDS), imdb.num_classes)
    epoch_end_callback = callback.do_checkpoint(prefix, means, stds)
    # optimizer
    optimizer_params = {
        'momentum': 0.9,
        'wd': 0.0005,
        'learning_rate': lr,
        'lr_scheduler': mx.lr_scheduler.FactorScheduler(lr_step, 0.1),
        'rescale_grad': (1.0 / config.TRAIN.BATCH_SIZE)
    }

    # train
    mod.fit(train_data,
            eval_metric=eval_metrics,
            epoch_end_callback=epoch_end_callback,
            batch_end_callback=batch_end_callback,
            kvstore=args.kvstore,
            optimizer='sgd',
            optimizer_params=optimizer_params,
            arg_params=arg_params,
            aux_params=aux_params,
            begin_epoch=begin_epoch,
            num_epoch=end_epoch)
示例#18
0
def init_params(args, sym, train_data):
    # infer shape
    data_shape_dict = dict(train_data.provide_data + train_data.provide_label)
    arg_shape, out_shape, aux_shape = sym.infer_shape(**data_shape_dict)
    arg_shape_dict = dict(zip(sym.list_arguments(), arg_shape))
    out_shape_dict = dict(zip(sym.list_outputs(), out_shape))
    aux_shape_dict = dict(zip(sym.list_auxiliary_states(), aux_shape))
    logger.info('output shape %s' % pprint.pformat(out_shape_dict))

    if args.resume:  # load params from previously trained model
        arg_params, aux_params = load_param(args.e2e_prefix, args.begin_epoch, convert=True)
    else:  # initialize weights from pretrained model and random numbers
        arg_params, aux_params = load_param(args.pretrained, args.pretrained_epoch, convert=True)

        # deal with multiple input CT slices, see 3DCE paper.
        # if NUM_SLICES = 3, pretrained weights won't be changed
        # if NUM_SLICES > 3, extra input channels in conv1_1 will be initialized to 0
        nCh = config.NUM_SLICES
        w1 = arg_params['conv1_1_weight'].asnumpy()
        w1_new = np.zeros((64, nCh, 3, 3), dtype=float)
        w1_new[:, (nCh - 3) / 2:(nCh - 3) / 2 + 3, :, :] = w1

        arg_params['conv1_1_new_weight'] = mx.nd.array(w1_new)
        arg_params['conv1_1_new_bias'] = arg_params['conv1_1_bias']
        del arg_params['conv1_1_weight']

        arg_params['rpn_conv_3x3_weight'] = mx.random.normal(0, 0.01, shape=arg_shape_dict['rpn_conv_3x3_weight'])
        arg_params['rpn_conv_3x3_bias'] = mx.nd.zeros(shape=arg_shape_dict['rpn_conv_3x3_bias'])
        arg_params['rpn_cls_score_weight'] = mx.random.normal(0, 0.01, shape=arg_shape_dict['rpn_cls_score_weight'])
        arg_params['rpn_cls_score_bias'] = mx.nd.zeros(shape=arg_shape_dict['rpn_cls_score_bias'])
        arg_params['rpn_bbox_pred_weight'] = mx.random.normal(0, 0.001, shape=arg_shape_dict['rpn_bbox_pred_weight'])
        arg_params['rpn_bbox_pred_bias'] = mx.nd.zeros(shape=arg_shape_dict['rpn_bbox_pred_bias'])

        if config.FRAMEWORK == '3DCE':
            arg_params['conv_new_1_weight'] = mx.random.normal(0, 0.01, shape=arg_shape_dict['conv_new_1_weight'])
            arg_params['conv_new_1_bias'] = mx.nd.zeros(shape=arg_shape_dict['conv_new_1_bias'])
            arg_params['fc6_weight'] = mx.random.normal(0, 0.001, shape=arg_shape_dict['fc6_weight'])
            arg_params['fc6_bias'] = mx.nd.zeros(shape=arg_shape_dict['fc6_bias'])

            arg_params['cls_score_weight'] = mx.random.normal(0, 0.01, shape=arg_shape_dict['cls_score_weight'])
            arg_params['cls_score_bias'] = mx.nd.zeros(shape=arg_shape_dict['cls_score_bias'])
            arg_params['bbox_pred_weight'] = mx.random.normal(0, 0.001, shape=arg_shape_dict['bbox_pred_weight'])
            arg_params['bbox_pred_bias'] = mx.nd.zeros(shape=arg_shape_dict['bbox_pred_bias'])

        elif config.FRAMEWORK == 'RFCN':
            arg_params['conv_new_1_weight'] = mx.random.normal(0, 0.01, shape=arg_shape_dict['conv_new_1_weight'])
            arg_params['conv_new_1_bias'] = mx.nd.zeros(shape=arg_shape_dict['conv_new_1_bias'])
            arg_params['rfcn_cls_weight'] = mx.random.normal(0, 0.01, shape=arg_shape_dict['rfcn_cls_weight'])
            arg_params['rfcn_cls_bias'] = mx.nd.zeros(shape=arg_shape_dict['rfcn_cls_bias'])
            arg_params['rfcn_bbox_weight'] = mx.random.normal(0, 0.01, shape=arg_shape_dict['rfcn_bbox_weight'])
            arg_params['rfcn_bbox_bias'] = mx.nd.zeros(shape=arg_shape_dict['rfcn_bbox_bias'])

        elif config.FRAMEWORK == 'Faster':
            arg_params['fc6_small_weight'] = mx.random.normal(0, 0.001, shape=arg_shape_dict['fc6_small_weight'])
            arg_params['fc6_small_bias'] = mx.nd.zeros(shape=arg_shape_dict['fc6_small_bias'])
            arg_params['fc7_small_weight'] = mx.random.normal(0, 0.001, shape=arg_shape_dict['fc7_small_weight'])
            arg_params['fc7_small_bias'] = mx.nd.zeros(shape=arg_shape_dict['fc7_small_bias'])

            arg_params['cls_score_weight'] = mx.random.normal(0, 0.01, shape=arg_shape_dict['cls_score_weight'])
            arg_params['cls_score_bias'] = mx.nd.zeros(shape=arg_shape_dict['cls_score_bias'])
            arg_params['bbox_pred_weight'] = mx.random.normal(0, 0.001, shape=arg_shape_dict['bbox_pred_weight'])
            arg_params['bbox_pred_bias'] = mx.nd.zeros(shape=arg_shape_dict['bbox_pred_bias'])

    # check parameter shapes
    for k in sym.list_arguments():
        if k in data_shape_dict:
            continue
        assert k in arg_params, k + ' not initialized'
        assert arg_params[k].shape == arg_shape_dict[k], \
            'shape inconsistent for ' + k + ' inferred ' + str(arg_shape_dict[k]) + ' provided ' + str(arg_params[k].shape)
    for k in sym.list_auxiliary_states():
        assert k in aux_params, k + ' not initialized'
        assert aux_params[k].shape == aux_shape_dict[k], \
            'shape inconsistent for ' + k + ' inferred ' + str(aux_shape_dict[k]) + ' provided ' + str(aux_params[k].shape)

    logger.info('load param done')
    return arg_params, aux_params
示例#19
0
def train_net(network,
              dataset,
              image_set,
              root_path,
              dataset_path,
              frequent,
              kvstore,
              work_load_list,
              no_flip,
              no_shuffle,
              resume,
              ctx,
              pretrained,
              epoch,
              prefix,
              begin_epoch,
              end_epoch,
              train_shared,
              lr,
              lr_step,
              proposal,
              maskrcnn_stage=None):

    # set up logger
    logging.basicConfig()
    logger = logging.getLogger()
    logger.setLevel(logging.INFO)

    # setup config
    config.TRAIN.BATCH_IMAGES = 1
    config.TRAIN.BATCH_ROIS = 128
    config.TRAIN.END2END = True
    config.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED = True

    # load symbol
    sym = eval('get_' + network + '_train')(num_classes=config.NUM_CLASSES,
                                            num_anchors=config.NUM_ANCHORS)
    feat_sym = []
    for stride in config.RPN_FEAT_STRIDE:
        feat_sym.append(sym.get_internals()['rpn_cls_score_stride%s_output' %
                                            stride])

    # setup multi-gpu
    batch_size = len(ctx)
    input_batch_size = config.TRAIN.BATCH_IMAGES * batch_size

    # print config
    logger.info(pprint.pformat(config))

    roidb_file = root_path + '/cache/' + dataset + '_roidb_with_mask.pkl'
    mean_file = root_path + '/cache/' + dataset + '_roidb_mean.pkl'
    std_file = root_path + '/cache/' + dataset + '_roidb_std.pkl'
    if maskrcnn_stage is not None:
        roidb_file = root_path + '/cache/' + dataset + '_roidb_with_mask_' + maskrcnn_stage + '.pkl'
        mean_file = root_path + '/cache/' + dataset + '_roidb_mean_' + maskrcnn_stage + '.pkl'
        std_file = root_path + '/cache/' + dataset + '_roidb_std_' + maskrcnn_stage + '.pkl'

    if osp.exists(roidb_file) and osp.exists(mean_file) and osp.exists(
            std_file):
        print('Load ' + roidb_file)
        with open(roidb_file, 'r') as f:
            roidb = pkl.load(f)
        print('Load ' + mean_file)
        with open(mean_file, 'r') as f:
            means = pkl.load(f)
        print('Load ' + std_file)
        with open(std_file, 'r') as f:
            stds = pkl.load(f)
    else:
        # load dataset and prepare imdb for training
        image_sets = [iset for iset in image_set.split('+')]
        roidbs = [
            load_proposal_roidb(dataset,
                                image_set,
                                root_path,
                                dataset_path,
                                proposal=proposal,
                                append_gt=True,
                                flip=not no_flip) for image_set in image_sets
        ]
        roidb = merge_roidb(roidbs)

        def filter_roidb(roidb):
            """ remove roidb entries without usable rois """
            def is_valid(entry):
                """ valid images have at least 1 fg or bg roi """
                overlaps = entry['max_overlaps']
                fg_inds = np.where(overlaps >= config.TRAIN.FG_THRESH)[0]
                bg_inds = np.where((overlaps < config.TRAIN.BG_THRESH_HI) &
                                   (overlaps >= config.TRAIN.BG_THRESH_LO))[0]
                valid = len(fg_inds) > 0 and len(bg_inds) > 0
                return valid

            num = len(roidb)
            filtered_roidb = [entry for entry in roidb if is_valid(entry)]
            num_after = len(filtered_roidb)
            print('filtered %d roidb entries: %d -> %d' %
                  (num - num_after, num, num_after))

            return filtered_roidb

        roidb = filter_roidb(roidb)
        means, stds = add_bbox_regression_targets(roidb)
        add_assign_targets(roidb)
        add_mask_targets(roidb)
        for file, obj in zip([roidb_file, mean_file, std_file],
                             [roidb, means, stds]):
            with open(file, 'w') as f:
                pkl.dump(obj, f, -1)

    # load training data
    train_data = MaskROIIter(roidb,
                             batch_size=input_batch_size,
                             shuffle=not no_shuffle,
                             ctx=ctx,
                             work_load_list=work_load_list,
                             aspect_grouping=config.TRAIN.ASPECT_GROUPING)
    # infer max shape
    max_data_shape = [('data', (input_batch_size, 3,
                                max([v[0] for v in config.SCALES]),
                                max([v[1] for v in config.SCALES])))]
    max_label_shape = []
    for s in config.RCNN_FEAT_STRIDE:
        max_data_shape.append(('rois_stride%s' % s,
                               (input_batch_size, config.TRAIN.BATCH_ROIS, 5)))
        max_label_shape.append(('label_stride%s' % s,
                                (input_batch_size, config.TRAIN.BATCH_ROIS)))
        max_label_shape.append(
            ('bbox_target_stride%s' % s,
             (input_batch_size,
              config.TRAIN.BATCH_ROIS * config.NUM_CLASSES * 4)))
        max_label_shape.append(
            ('bbox_weight_stride%s' % s,
             (input_batch_size,
              config.TRAIN.BATCH_ROIS * config.NUM_CLASSES * 4)))
        max_label_shape.append(('mask_target_stride%s' % s,
                                (input_batch_size, config.TRAIN.BATCH_ROIS,
                                 config.NUM_CLASSES, 28, 28)))
        max_label_shape.append(('mask_weight_stride%s' % s,
                                (input_batch_size, config.TRAIN.BATCH_ROIS,
                                 config.NUM_CLASSES, 1, 1)))
    # infer shape
    data_shape_dict = dict(train_data.provide_data + train_data.provide_label)

    arg_shape, out_shape, aux_shape = sym.infer_shape(**data_shape_dict)
    arg_shape_dict = dict(zip(sym.list_arguments(), arg_shape))
    out_shape_dict = zip(sym.list_outputs(), out_shape)
    aux_shape_dict = dict(zip(sym.list_auxiliary_states(), aux_shape))
    print('output shape:')
    pprint.pprint(out_shape_dict)

    # load and initialize params
    if resume:
        arg_params, aux_params = load_param(prefix, begin_epoch, convert=True)
    else:
        arg_params, aux_params = load_param(pretrained, epoch, convert=True)
        init_bbox_pred = mx.init.Normal(sigma=0.001)
        init_internal = mx.init.Normal(sigma=0.01)
        init = mx.init.Xavier(factor_type="in",
                              rnd_type='gaussian',
                              magnitude=2)
        for k in sym.list_arguments():
            if k in data_shape_dict:
                continue
            if k not in arg_params:
                print('init', k)
                arg_params[k] = mx.nd.zeros(shape=arg_shape_dict[k])
                init_internal(k, arg_params[k])
                if k in ['rcnn_fc_bbox_weight', 'bbox_pred_weight']:
                    init_bbox_pred(k, arg_params[k])
                if k.endswith('bias'):
                    arg_params[k] = mx.nd.zeros(shape=arg_shape_dict[k])
                if 'ctx_red_weight' in k:
                    ctx_shape = np.array(arg_shape_dict[k])
                    ctx_shape[1] /= 2
                    arg_params[k][:] = np.concatenate((np.eye(
                        ctx_shape[1]).reshape(ctx_shape), np.zeros(ctx_shape)),
                                                      axis=1)

        for k in sym.list_auxiliary_states():
            if k not in aux_params:
                print('init', k)
                aux_params[k] = mx.nd.zeros(shape=aux_shape_dict[k])
                init(k, aux_params[k])

    # check parameter shapes
    for k in sym.list_arguments():
        if k in data_shape_dict:
            continue
        assert k in arg_params, k + ' not initialized'
        assert arg_params[k].shape == arg_shape_dict[k], \
            'shape inconsistent for ' + k + ' inferred ' + str(arg_shape_dict[k]) + ' provided ' + str(arg_params[k].shape)
    for k in sym.list_auxiliary_states():
        assert k in aux_params, k + ' not initialized'
        assert aux_params[k].shape == aux_shape_dict[k], \
            'shape inconsistent for ' + k + ' inferred ' + str(aux_shape_dict[k]) + ' provided ' + str(aux_params[k].shape)

    # create solver
    data_names = [k[0] for k in train_data.provide_data]
    label_names = [k[0] for k in train_data.provide_label]
    if train_shared:
        fixed_param_prefix = config.FIXED_PARAMS_SHARED
    else:
        fixed_param_prefix = config.FIXED_PARAMS
    mod = MutableModule(sym,
                        data_names=data_names,
                        label_names=label_names,
                        logger=logger,
                        context=ctx,
                        work_load_list=work_load_list,
                        max_data_shapes=max_data_shape,
                        max_label_shapes=max_label_shape,
                        fixed_param_prefix=fixed_param_prefix)

    # decide training params
    # metric
    eval_metric = metric.RCNNAccMetric()
    cls_metric = metric.RCNNLogLossMetric()
    bbox_metric = metric.RCNNRegLossMetric()
    mask_acc_metric = metric.MaskAccMetric()
    mask_log_metric = metric.MaskLogLossMetric()
    eval_metrics = mx.metric.CompositeEvalMetric()
    for child_metric in [
            eval_metric, cls_metric, bbox_metric, mask_acc_metric,
            mask_log_metric
    ]:
        eval_metrics.add(child_metric)
    # callback
    batch_end_callback = callback.Speedometer(train_data.batch_size,
                                              frequent=frequent)
    epoch_end_callback = callback.do_checkpoint(prefix, means, stds)
    # decide learning rate
    base_lr = lr
    lr_factor = 0.1
    lr_epoch = [int(epoch) for epoch in lr_step.split(',')]
    lr_epoch_diff = [
        epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch
    ]
    lr = base_lr * (lr_factor**(len(lr_epoch) - len(lr_epoch_diff)))
    lr_iters = [
        int(epoch * len(roidb) / batch_size) for epoch in lr_epoch_diff
    ]
    print('lr', lr, 'lr_epoch_diff', lr_epoch_diff, 'lr_iters', lr_iters)
    lr_scheduler = mx.lr_scheduler.MultiFactorScheduler(lr_iters, lr_factor)
    # optimizer
    optimizer_params = {
        'momentum': 0.9,
        'wd': 0.0001,
        'learning_rate': lr,
        'lr_scheduler': lr_scheduler,
        'rescale_grad': (1.0 / batch_size),
        'clip_gradient': 5
    }

    # train
    mod.fit(train_data,
            eval_metric=eval_metrics,
            epoch_end_callback=epoch_end_callback,
            batch_end_callback=batch_end_callback,
            kvstore=kvstore,
            optimizer='sgd',
            optimizer_params=optimizer_params,
            arg_params=arg_params,
            aux_params=aux_params,
            begin_epoch=begin_epoch,
            num_epoch=end_epoch)