def __init__(self, network, prefix, epoch, ctx_id=0, mask_nms=True): self.ctx_id = ctx_id self.ctx = mx.gpu(self.ctx_id) self.mask_nms = mask_nms #self.nms_threshold = 0.3 #self._bbox_pred = nonlinear_pred if not self.mask_nms: self.nms = gpu_nms_wrapper(config.TEST.NMS, self.ctx_id) else: self.nms = gpu_nms_wrapper(config.TEST.RPN_NMS_THRESH, self.ctx_id) #self.nms = py_nms_wrapper(config.TEST.NMS) sym = eval('get_' + network + '_mask_test')( num_classes=config.NUM_CLASSES, num_anchors=config.NUM_ANCHORS) #arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch) arg_params, aux_params = load_param(prefix, epoch, convert=True, ctx=self.ctx, process=True) split = False max_image_shape = (1, 3, 1600, 1600) #max_image_shape = (1,3,1200,2200) max_data_shapes = [("data", max_image_shape), ("im_info", (1, 3))] mod = MutableModule(symbol=sym, data_names=["data", "im_info"], label_names=None, max_data_shapes=max_data_shapes, context=self.ctx) mod.bind(data_shapes=max_data_shapes, label_shapes=None, for_training=False) mod.init_params(arg_params=arg_params, aux_params=aux_params) self.model = mod pass
def get_net(symbol, prefix, epoch, ctx): arg_params, aux_params = load_param(prefix, epoch, convert=True, ctx=ctx, process=True) # infer shape data_shape_dict = dict(DATA_SHAPES) arg_names, aux_names = symbol.list_arguments(), symbol.list_auxiliary_states() arg_shape, _, aux_shape = symbol.infer_shape(**data_shape_dict) arg_shape_dict = dict(zip(arg_names, arg_shape)) aux_shape_dict = dict(zip(aux_names, aux_shape)) # check shapes for k in symbol.list_arguments(): if k in data_shape_dict or 'label' in k: continue assert k in arg_params, k + ' not initialized' assert arg_params[k].shape == arg_shape_dict[k], \ 'shape inconsistent for ' + k + ' inferred ' + str(arg_shape_dict[k]) + ' provided ' + str( arg_params[k].shape) for k in symbol.list_auxiliary_states(): assert k in aux_params, k + ' not initialized' assert aux_params[k].shape == aux_shape_dict[k], \ 'shape inconsistent for ' + k + ' inferred ' + str(aux_shape_dict[k]) + ' provided ' + str( aux_params[k].shape) predictor = Predictor(symbol, DATA_NAMES, LABEL_NAMES, context=ctx, provide_data=DATA_SHAPES, provide_label=LABEL_SHAPES, arg_params=arg_params, aux_params=aux_params) return predictor
def get_net(symbol, prefix, epoch, ctx): arg_params, aux_params = load_param(prefix, epoch, convert=True, ctx=ctx, process=True) # infer shape data_shape_dict = dict(DATA_SHAPES) arg_names, aux_names = symbol.list_arguments(), symbol.list_auxiliary_states() arg_shape, _, aux_shape = symbol.infer_shape(**data_shape_dict) arg_shape_dict = dict(zip(arg_names, arg_shape)) aux_shape_dict = dict(zip(aux_names, aux_shape)) # check shapes for k in symbol.list_arguments(): if k in data_shape_dict or 'label' in k: continue assert k in arg_params, k + ' not initialized' assert arg_params[k].shape == arg_shape_dict[k], \ 'shape inconsistent for ' + k + ' inferred ' + str(arg_shape_dict[k]) + ' provided ' + str(arg_params[k].shape) for k in symbol.list_auxiliary_states(): assert k in aux_params, k + ' not initialized' assert aux_params[k].shape == aux_shape_dict[k], \ 'shape inconsistent for ' + k + ' inferred ' + str(aux_shape_dict[k]) + ' provided ' + str(aux_params[k].shape) predictor = Predictor(symbol, DATA_NAMES, LABEL_NAMES, context=ctx, provide_data=DATA_SHAPES, provide_label=LABEL_SHAPES, arg_params=arg_params, aux_params=aux_params) return predictor
def test_rcnn(network, dataset, image_set, dataset_path, ctx, prefix, epoch, vis, shuffle, has_rpn, proposal, max_box, thresh): # set config assert has_rpn, "only end-to-end case was checked in this project." config.TEST.HAS_RPN = True # load symbol and testing data sym = eval('get_' + network)(is_train=False, num_classes=config.NUM_CLASSES, num_anchors=config.NUM_ANCHORS) imdb = eval(dataset)(image_set, dataset_path) roidb = imdb.gt_roidb() roidb = filter_roidb(roidb) imdb.num_images = len(roidb) # get test data iter test_data = TestLoader(roidb, batch_size=1, shuffle=shuffle, has_rpn=has_rpn, nThreads=default.prefetch_thread_num) # load model arg_params, aux_params = load_param(prefix, epoch, convert=True, ctx=ctx, process=True) # infer shape data_shape_dict = dict(test_data.provide_data) arg_shape, _, aux_shape = sym.infer_shape(**data_shape_dict) arg_shape_dict = dict(zip(sym.list_arguments(), arg_shape)) aux_shape_dict = dict(zip(sym.list_auxiliary_states(), aux_shape)) # check parameters for k in sym.list_arguments(): if k in data_shape_dict or 'label' in k: continue assert k in arg_params, k + ' not initialized' assert arg_params[k].shape == arg_shape_dict[k], \ 'shape inconsistent for ' + k + ' inferred ' + str(arg_shape_dict[k]) + ' provided ' + str(arg_params[k].shape) for k in sym.list_auxiliary_states(): assert k in aux_params, k + ' not initialized' assert aux_params[k].shape == aux_shape_dict[k], \ 'shape inconsistent for ' + k + ' inferred ' + str(aux_shape_dict[k]) + ' provided ' + str(aux_params[k].shape) # decide maximum shape data_names = [k[0] for k in test_data.provide_data] label_names = None # [k[0] for k in test_data.provide_label] max_data_shape = [('data', (config.NUM_IMAGES_3DCE, config.NUM_SLICES, config.MAX_SIZE, config.MAX_SIZE))] if not has_rpn: max_data_shape.append(('rois', (1, config.TEST.PROPOSAL_POST_NMS_TOP_N + 30, 5))) # create predictor predictor = Predictor(sym, data_names, label_names, context=ctx, max_data_shapes=max_data_shape, provide_data=test_data.provide_data, #provide_label=test_data.provide_label, arg_params=arg_params, aux_params=aux_params) # start detection acc = pred_eval(predictor, test_data, imdb, vis=vis, max_box=max_box, thresh=thresh) return acc
def demo_maskrcnn(network, dataset, image_set, root_path, dataset_path, result_path, ctx, prefix, epoch, vis, shuffle, has_rpn, proposal, thresh): # set config if has_rpn: config.TEST.HAS_RPN = True # print config pprint.pprint(config) # load symbol and testing data if has_rpn: sym = eval('get_' + network + '_mask_test')(num_classes=config.NUM_CLASSES, num_anchors=config.NUM_ANCHORS) imdb = eval(dataset)(image_set, root_path, dataset_path) roidb = imdb.gt_roidb() else: raise NotImplementedError test_data = TestLoader(roidb, batch_size=1, shuffle=shuffle, has_rpn=has_rpn) # load model arg_params, aux_params = load_param(prefix, epoch, convert=True, ctx=ctx, process=True) # infer shape data_shape_dict = dict(test_data.provide_data) arg_shape, _, aux_shape = sym.infer_shape(**data_shape_dict) arg_shape_dict = dict(zip(sym.list_arguments(), arg_shape)) aux_shape_dict = dict(zip(sym.list_auxiliary_states(), aux_shape)) # check parameters for k in sym.list_arguments(): if k in data_shape_dict or 'label' in k: continue assert k in arg_params, k + ' not initialized' assert arg_params[k].shape == arg_shape_dict[k], \ 'shape inconsistent for ' + k + ' inferred ' + str(arg_shape_dict[k]) + ' provided ' + str(arg_params[k].shape) for k in sym.list_auxiliary_states(): assert k in aux_params, k + ' not initialized' assert aux_params[k].shape == aux_shape_dict[k], \ 'shape inconsistent for ' + k + ' inferred ' + str(aux_shape_dict[k]) + ' provided ' + str(aux_params[k].shape) # decide maximum shape data_names = [k[0] for k in test_data.provide_data] label_names = None max_data_shape = [('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))] if not has_rpn: max_data_shape.append(('rois', (1, config.TEST.PROPOSAL_POST_NMS_TOP_N + 30, 5))) # create predictor predictor = Predictor(sym, data_names, label_names, context=ctx, max_data_shapes=max_data_shape, provide_data=test_data.provide_data, provide_label=test_data.provide_label, arg_params=arg_params, aux_params=aux_params) pred_demo_mask(predictor, test_data, imdb, roidb, result_path, vis=vis, thresh=thresh)
def get_net(data, sym, prefix, epoch, ctx): # get predictor data = [[mx.nd.array(data[i][name]) for name in DATA_NAMES] for i in xrange(len(data))] max_data_shape = [('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))] provide_data = [[(k, v.shape) for k, v in zip(DATA_NAMES, data[i])] for i in xrange(len(data))] provide_label = [None for i in xrange(len(data))] arg_params, aux_params = load_param(prefix, epoch, process=True) #print DATA_NAMES, LABEL_NAMES, ctx, max_data_shape, provide_data, provide_label predictor = Predictor(sym, DATA_NAMES, LABEL_NAMES, context=[ctx], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) return predictor
def get_net(symbol, prefix, epoch, ctx): arg_params, aux_params = load_param(prefix, epoch, convert=True, ctx=ctx, process=True) predictor = Predictor(symbol, DATA_NAMES, LABEL_NAMES, context=ctx, provide_data=DATA_SHAPES, provide_label=LABEL_SHAPES, arg_params=arg_params, aux_params=aux_params) return predictor
def get_net(prefix, epoch, ctx): arg_params, aux_params = load_param(prefix, epoch, convert=True, ctx=ctx, process=True) predictor = Predictor(gensym.gen_sym_infer, DATA_NAMES, LABEL_NAMES, context=ctx, max_data_shapes=dict(DATA_SHAPES), provide_data=DATA_SHAPES, provide_label=LABEL_SHAPES, arg_params=arg_params, aux_params=aux_params) return predictor
def get_net(prefix, epoch, ctx): arg_params, aux_params = load_param(prefix, epoch, convert=True, ctx=ctx, process=True) # infer shape data_shape_dict = dict(DATA_SHAPES) symbol = gen_sym_infer(data_shape_dict, len(ctx) if isinstance(ctx, list) else 1) # data = mx.symbol.Variable(name="data", shape=(1,3,600,903)) # im_info = mx.symbol.Variable(name="im_info", shape=(1,3)) # symbol = get_vgg_text_rpn_test(data, im_info) arg_names, aux_names = symbol.list_arguments( ), symbol.list_auxiliary_states() arg_shape, _, aux_shape = symbol.infer_shape_partial() arg_shape_dict = dict(zip(arg_names, arg_shape)) aux_shape_dict = dict(zip(aux_names, aux_shape)) # check shapes for k in symbol.list_arguments(): if k in data_shape_dict or 'label' in k: continue assert k in arg_params, k + ' not initialized' assert arg_params[k].shape == arg_shape_dict[k], \ 'shape inconsistent for ' + k + ' inferred ' + str(arg_shape_dict[k]) + ' provided ' + str(arg_params[k].shape) for k in symbol.list_auxiliary_states(): assert k in aux_params, k + ' not initialized' assert aux_params[k].shape == aux_shape_dict[k], \ 'shape inconsistent for ' + k + ' inferred ' + str(aux_shape_dict[k]) + ' provided ' + str(aux_params[k].shape) predictor = Predictor(gen_sym_infer, DATA_NAMES, LABEL_NAMES, context=ctx, max_data_shapes=data_shape_dict, provide_data=DATA_SHAPES, provide_label=LABEL_SHAPES, arg_params=arg_params, aux_params=aux_params) return predictor
def train_net(args, ctx, pretrained, epoch, prefix, begin_epoch, end_epoch, lr=0.001, lr_step='5'): # setup config config.TRAIN.BATCH_IMAGES = 1 config.TRAIN.BATCH_ROIS = 128 config.TRAIN.END2END = True config.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED = True # load symbol sym = eval('get_' + args.network + '_train')( num_classes=config.NUM_CLASSES, num_anchors=config.NUM_ANCHORS) feat_sym = sym.get_internals()['rpn_cls_score_output'] # setup multi-gpu batch_size = len(ctx) input_batch_size = config.TRAIN.BATCH_IMAGES * batch_size # print config logger.info(pprint.pformat(config)) # load dataset and prepare imdb for training image_sets = [iset for iset in args.image_set.split('+')] roidbs = [ load_gt_roidb(args.dataset, image_set, args.root_path, args.dataset_path, flip=not args.no_flip) for image_set in image_sets ] roidb = merge_roidb(roidbs) roidb = filter_roidb(roidb) # load training data train_data = AnchorLoader(feat_sym, roidb, batch_size=input_batch_size, shuffle=not args.no_shuffle, ctx=ctx, work_load_list=args.work_load_list, feat_stride=config.RPN_FEAT_STRIDE, anchor_scales=config.ANCHOR_SCALES, anchor_ratios=config.ANCHOR_RATIOS, aspect_grouping=config.TRAIN.ASPECT_GROUPING) # infer max shape max_data_shape = [('data', (input_batch_size, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))] max_data_shape, max_label_shape = train_data.infer_shape(max_data_shape) max_data_shape.append(('gt_boxes', (input_batch_size, 100, 5))) logger.info('providing maximum shape %s %s' % (max_data_shape, max_label_shape)) # infer shape data_shape_dict = dict(train_data.provide_data + train_data.provide_label) arg_shape, out_shape, aux_shape = sym.infer_shape(**data_shape_dict) arg_shape_dict = dict(zip(sym.list_arguments(), arg_shape)) out_shape_dict = dict(zip(sym.list_outputs(), out_shape)) aux_shape_dict = dict(zip(sym.list_auxiliary_states(), aux_shape)) logger.info('output shape %s' % pprint.pformat(out_shape_dict)) # load and initialize params if args.resume: arg_params, aux_params = load_param(prefix, begin_epoch, convert=True) else: arg_params, aux_params = load_param(pretrained, epoch, convert=True) arg_params['rpn_conv_3x3_weight'] = mx.random.normal( 0, 0.01, shape=arg_shape_dict['rpn_conv_3x3_weight']) arg_params['rpn_conv_3x3_bias'] = mx.nd.zeros( shape=arg_shape_dict['rpn_conv_3x3_bias']) arg_params['rpn_cls_score_weight'] = mx.random.normal( 0, 0.01, shape=arg_shape_dict['rpn_cls_score_weight']) arg_params['rpn_cls_score_bias'] = mx.nd.zeros( shape=arg_shape_dict['rpn_cls_score_bias']) arg_params['rpn_bbox_pred_weight'] = mx.random.normal( 0, 0.01, shape=arg_shape_dict['rpn_bbox_pred_weight']) arg_params['rpn_bbox_pred_bias'] = mx.nd.zeros( shape=arg_shape_dict['rpn_bbox_pred_bias']) arg_params['cls_score_weight'] = mx.random.normal( 0, 0.01, shape=arg_shape_dict['cls_score_weight']) arg_params['cls_score_bias'] = mx.nd.zeros( shape=arg_shape_dict['cls_score_bias']) arg_params['bbox_pred_weight'] = mx.random.normal( 0, 0.001, shape=arg_shape_dict['bbox_pred_weight']) arg_params['bbox_pred_bias'] = mx.nd.zeros( shape=arg_shape_dict['bbox_pred_bias']) # check parameter shapes for k in sym.list_arguments(): if k in data_shape_dict: continue assert k in arg_params, k + ' not initialized' assert arg_params[k].shape == arg_shape_dict[k], \ 'shape inconsistent for ' + k + ' inferred ' + str(arg_shape_dict[k]) + ' provided ' + str(arg_params[k].shape) for k in sym.list_auxiliary_states(): assert k in aux_params, k + ' not initialized' assert aux_params[k].shape == aux_shape_dict[k], \ 'shape inconsistent for ' + k + ' inferred ' + str(aux_shape_dict[k]) + ' provided ' + str(aux_params[k].shape) # create solver fixed_param_prefix = config.FIXED_PARAMS data_names = [k[0] for k in train_data.provide_data] label_names = [k[0] for k in train_data.provide_label] mod = MutableModule(sym, data_names=data_names, label_names=label_names, logger=logger, context=ctx, work_load_list=args.work_load_list, max_data_shapes=max_data_shape, max_label_shapes=max_label_shape, fixed_param_prefix=fixed_param_prefix) # decide training params # metric rpn_eval_metric = metric.RPNAccMetric() rpn_cls_metric = metric.RPNLogLossMetric() rpn_bbox_metric = metric.RPNL1LossMetric() eval_metric = metric.RCNNAccMetric() cls_metric = metric.RCNNLogLossMetric() bbox_metric = metric.RCNNL1LossMetric() eval_metrics = mx.metric.CompositeEvalMetric() for child_metric in [ rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric, cls_metric, bbox_metric ]: eval_metrics.add(child_metric) # callback batch_end_callback = mx.callback.Speedometer(train_data.batch_size, frequent=args.frequent, auto_reset=False) means = np.tile(np.array(config.TRAIN.BBOX_MEANS), config.NUM_CLASSES) stds = np.tile(np.array(config.TRAIN.BBOX_STDS), config.NUM_CLASSES) epoch_end_callback = callback.do_checkpoint(prefix, means, stds) # decide learning rate base_lr = lr lr_factor = 0.1 lr_epoch = [int(epoch) for epoch in lr_step.split(',')] lr_epoch_diff = [ epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch ] lr = base_lr * (lr_factor**(len(lr_epoch) - len(lr_epoch_diff))) lr_iters = [ int(epoch * len(roidb) / batch_size) for epoch in lr_epoch_diff ] logger.info('lr %f lr_epoch_diff %s lr_iters %s' % (lr, lr_epoch_diff, lr_iters)) lr_scheduler = mx.lr_scheduler.MultiFactorScheduler(lr_iters, lr_factor) # optimizer optimizer_params = { 'momentum': 0.9, 'wd': 0.0005, 'learning_rate': lr, 'lr_scheduler': lr_scheduler, 'rescale_grad': (1.0 / batch_size), 'clip_gradient': 5 } # train mod.fit(train_data, eval_metric=eval_metrics, epoch_end_callback=epoch_end_callback, batch_end_callback=batch_end_callback, kvstore=args.kvstore, optimizer='sgd', optimizer_params=optimizer_params, arg_params=arg_params, aux_params=aux_params, begin_epoch=begin_epoch, num_epoch=end_epoch)
def test_rpn(network, dataset, image_set, root_path, dataset_path, ctx, prefix, epoch, vis, shuffle, thresh): # rpn generate proposal config config.TEST.HAS_RPN = True # print config pprint(config) # load symbol sym = eval('get_' + network + '_rpn_test')(num_anchors=config.NUM_ANCHORS) # load dataset and prepare imdb for training imdb = eval(dataset)(image_set, root_path, dataset_path) roidb = imdb.gt_roidb() # (possibly) group the roidb by aspect horizontal_inds, vertical_inds = [], [] for ind, roirec in enumerate(roidb): if roirec['width'] > roirec['height']: horizontal_inds.append(ind) else: vertical_inds.append(ind) aspect_group = True if len(horizontal_inds) > 0 and len( vertical_inds) > 0 else False print("aspect_group={}".format(aspect_group)) if aspect_group: horizontal_roidb = [roidb[ind] for ind in horizontal_inds] vertical_roidb = [roidb[ind] for ind in vertical_inds] l1 = TestLoader(horizontal_roidb, batch_size=len(ctx), shuffle=shuffle, has_rpn=True) l2 = TestLoader(vertical_roidb, batch_size=len(ctx), shuffle=shuffle, has_rpn=True) test_data = SequentialLoader(iters=[l1, l2]) else: test_data = TestLoader(roidb, batch_size=len(ctx), shuffle=shuffle, has_rpn=True) # sanity check _, out_shape, _ = sym.get_internals().infer_shape( **dict(test_data.provide_data)) out_names = sym.get_internals().list_outputs() pprint_with_newlines(zip(out_names, out_shape), "output shape: ") # load model arg_params, aux_params = load_param(prefix, epoch, convert=True, ctx=None) # infer shape data_shape_dict = dict(test_data.provide_data) arg_shape, _, aux_shape = sym.infer_shape(**data_shape_dict) arg_shape_dict = dict(zip(sym.list_arguments(), arg_shape)) aux_shape_dict = dict(zip(sym.list_auxiliary_states(), aux_shape)) # check parameters for k in sym.list_arguments(): if k in data_shape_dict or 'label' in k: continue assert k in arg_params, k + ' not initialized' assert arg_params[k].shape == arg_shape_dict[k], \ 'shape inconsistent for ' + k + ' inferred ' + str(arg_shape_dict[k]) + ' provided ' + str( arg_params[k].shape) for k in sym.list_auxiliary_states(): assert k in aux_params, k + ' not initialized' assert aux_params[k].shape == aux_shape_dict[k], \ 'shape inconsistent for ' + k + ' inferred ' + str(aux_shape_dict[k]) + ' provided ' + str( aux_params[k].shape) # decide maximum shape data_names = [k[0] for k in test_data.provide_data] label_names = None if test_data.provide_label is None else [ k[0] for k in test_data.provide_label ] max_data_shape = [('data', (len(ctx), 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))] # create predictor predictor = Predictor(sym, data_names, label_names, context=ctx, max_data_shapes=max_data_shape, provide_data=test_data.provide_data, provide_label=test_data.provide_label, arg_params=arg_params, aux_params=aux_params) # start testing imdb_boxes, original_boxes = generate_proposals(predictor, test_data, imdb, vis=vis, thresh=thresh) if aspect_group: # imdb_boxes = [imdb_boxes[ind] for ind in (horizontal_inds + vertical_inds)] # original_boxes = [original_boxes[ind] for ind in (horizontal_inds + vertical_inds)] reordered_imdb_boxes, reordered_original_boxes = [ None ] * len(imdb_boxes), [None] * len(imdb_boxes) for i, orig_ind in enumerate(horizontal_inds + vertical_inds): reordered_imdb_boxes[orig_ind] = imdb_boxes[i] reordered_original_boxes[orig_ind] = original_boxes[i] imdb_boxes, original_boxes = reordered_imdb_boxes, reordered_original_boxes # save results rpn_folder = os.path.join(imdb.root_path, 'rpn_data') if not os.path.exists(rpn_folder): os.mkdir(rpn_folder) rpn_file = os.path.join(rpn_folder, imdb.name + '_rpn.pkl') with open(rpn_file, 'wb') as f: cPickle.dump(imdb_boxes, f, cPickle.HIGHEST_PROTOCOL) if thresh > 0: full_rpn_file = os.path.join(rpn_folder, imdb.name + '_full_rpn.pkl') with open(full_rpn_file, 'wb') as f: cPickle.dump(original_boxes, f, cPickle.HIGHEST_PROTOCOL) print 'wrote rpn proposals to {}'.format(rpn_file) imdb.evaluate_recall(roidb, candidate_boxes=imdb_boxes)
def train_net(args, ctx, pretrained, epoch, prefix, begin_epoch, end_epoch, lr=0.001, lr_step='5'): # set up logger logging.basicConfig() logger = logging.getLogger() logger.setLevel(logging.INFO) # setup config config.TRAIN.BATCH_IMAGES = 1 config.TRAIN.BATCH_ROIS = 128 config.TRAIN.END2END = True config.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED = True # load symbol sym = eval('get_' + args.network + '_train')(num_classes=config.NUM_CLASSES, num_anchors=config.NUM_ANCHORS) feat_sym = sym.get_internals()['rpn_cls_score_output'] # setup multi-gpu batch_size = len(ctx) input_batch_size = config.TRAIN.BATCH_IMAGES * batch_size # print config pprint.pprint(config) # load dataset and prepare imdb for training image_sets = [iset for iset in args.image_set.split('+')] roidbs = [load_gt_roidb(args.dataset, image_set, args.root_path, args.dataset_path, flip=not args.no_flip) for image_set in image_sets] roidb = merge_roidb(roidbs) roidb = filter_roidb(roidb) # load training data train_data = AnchorLoader(feat_sym, roidb, batch_size=input_batch_size, shuffle=not args.no_shuffle, ctx=ctx, work_load_list=args.work_load_list, feat_stride=config.RPN_FEAT_STRIDE, anchor_scales=config.ANCHOR_SCALES, anchor_ratios=config.ANCHOR_RATIOS, aspect_grouping=config.TRAIN.ASPECT_GROUPING) # infer max shape max_data_shape = [('data', (input_batch_size, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))] max_data_shape, max_label_shape = train_data.infer_shape(max_data_shape) max_data_shape.append(('gt_boxes', (input_batch_size, 100, 5))) print('providing maximum shape', max_data_shape, max_label_shape) # infer shape data_shape_dict = dict(train_data.provide_data + train_data.provide_label) arg_shape, out_shape, aux_shape = sym.infer_shape(**data_shape_dict) arg_shape_dict = dict(zip(sym.list_arguments(), arg_shape)) out_shape_dict = dict(zip(sym.list_outputs(), out_shape)) aux_shape_dict = dict(zip(sym.list_auxiliary_states(), aux_shape)) print('output shape') pprint.pprint(out_shape_dict) # load and initialize params if args.resume: arg_params, aux_params = load_param(prefix, begin_epoch, convert=True) else: arg_params, aux_params = load_param(pretrained, epoch, convert=True) arg_params['rpn_conv_3x3_weight'] = mx.random.normal(0, 0.01, shape=arg_shape_dict['rpn_conv_3x3_weight']) arg_params['rpn_conv_3x3_bias'] = mx.nd.zeros(shape=arg_shape_dict['rpn_conv_3x3_bias']) arg_params['rpn_cls_score_weight'] = mx.random.normal(0, 0.01, shape=arg_shape_dict['rpn_cls_score_weight']) arg_params['rpn_cls_score_bias'] = mx.nd.zeros(shape=arg_shape_dict['rpn_cls_score_bias']) arg_params['rpn_bbox_pred_weight'] = mx.random.normal(0, 0.01, shape=arg_shape_dict['rpn_bbox_pred_weight']) arg_params['rpn_bbox_pred_bias'] = mx.nd.zeros(shape=arg_shape_dict['rpn_bbox_pred_bias']) arg_params['cls_score_weight'] = mx.random.normal(0, 0.01, shape=arg_shape_dict['cls_score_weight']) arg_params['cls_score_bias'] = mx.nd.zeros(shape=arg_shape_dict['cls_score_bias']) arg_params['bbox_pred_weight'] = mx.random.normal(0, 0.001, shape=arg_shape_dict['bbox_pred_weight']) arg_params['bbox_pred_bias'] = mx.nd.zeros(shape=arg_shape_dict['bbox_pred_bias']) # check parameter shapes for k in sym.list_arguments(): if k in data_shape_dict: continue assert k in arg_params, k + ' not initialized' assert arg_params[k].shape == arg_shape_dict[k], \ 'shape inconsistent for ' + k + ' inferred ' + str(arg_shape_dict[k]) + ' provided ' + str(arg_params[k].shape) for k in sym.list_auxiliary_states(): assert k in aux_params, k + ' not initialized' assert aux_params[k].shape == aux_shape_dict[k], \ 'shape inconsistent for ' + k + ' inferred ' + str(aux_shape_dict[k]) + ' provided ' + str(aux_params[k].shape) # create solver fixed_param_prefix = config.FIXED_PARAMS data_names = [k[0] for k in train_data.provide_data] label_names = [k[0] for k in train_data.provide_label] mod = MutableModule(sym, data_names=data_names, label_names=label_names, logger=logger, context=ctx, work_load_list=args.work_load_list, max_data_shapes=max_data_shape, max_label_shapes=max_label_shape, fixed_param_prefix=fixed_param_prefix) # decide training params # metric rpn_eval_metric = metric.RPNAccMetric() rpn_cls_metric = metric.RPNLogLossMetric() rpn_bbox_metric = metric.RPNL1LossMetric() eval_metric = metric.RCNNAccMetric() cls_metric = metric.RCNNLogLossMetric() bbox_metric = metric.RCNNL1LossMetric() eval_metrics = mx.metric.CompositeEvalMetric() for child_metric in [rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric, cls_metric, bbox_metric]: eval_metrics.add(child_metric) # callback batch_end_callback = callback.Speedometer(train_data.batch_size, frequent=args.frequent) means = np.tile(np.array(config.TRAIN.BBOX_MEANS), config.NUM_CLASSES) stds = np.tile(np.array(config.TRAIN.BBOX_STDS), config.NUM_CLASSES) epoch_end_callback = callback.do_checkpoint(prefix, means, stds) # decide learning rate base_lr = lr lr_factor = 0.1 lr_epoch = [int(epoch) for epoch in lr_step.split(',')] lr_epoch_diff = [epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch] lr = base_lr * (lr_factor ** (len(lr_epoch) - len(lr_epoch_diff))) lr_iters = [int(epoch * len(roidb) / batch_size) for epoch in lr_epoch_diff] print('lr', lr, 'lr_epoch_diff', lr_epoch_diff, 'lr_iters', lr_iters) lr_scheduler = mx.lr_scheduler.MultiFactorScheduler(lr_iters, lr_factor) # optimizer optimizer_params = {'momentum': 0.9, 'wd': 0.0005, 'learning_rate': lr, 'lr_scheduler': lr_scheduler, 'rescale_grad': (1.0 / batch_size), 'clip_gradient': 5} # train mod.fit(train_data, eval_metric=eval_metrics, epoch_end_callback=epoch_end_callback, batch_end_callback=batch_end_callback, kvstore=args.kvstore, optimizer='sgd', optimizer_params=optimizer_params, arg_params=arg_params, aux_params=aux_params, begin_epoch=begin_epoch, num_epoch=end_epoch)
def train_net(args, ctx, pretrained, epoch, prefix, begin_epoch, end_epoch, lr=0.001, lr_step='5'): # setup config #init_config() #print(config) # setup multi-gpu input_batch_size = config.TRAIN.BATCH_IMAGES * len(ctx) # print config logger.info(pprint.pformat(config)) # load dataset and prepare imdb for training image_sets = [iset for iset in args.image_set.split('+')] roidbs = [ load_gt_roidb(args.dataset, image_set, args.root_path, args.dataset_path, flip=not args.no_flip) for image_set in image_sets ] roidb = merge_roidb(roidbs) roidb = filter_roidb(roidb) # load symbol #sym = eval('get_' + args.network + '_train')(num_classes=config.NUM_CLASSES, num_anchors=config.NUM_ANCHORS) #feat_sym = sym.get_internals()['rpn_cls_score_output'] #train_data = AnchorLoader(feat_sym, roidb, batch_size=input_batch_size, shuffle=not args.no_shuffle, # ctx=ctx, work_load_list=args.work_load_list, # feat_stride=config.RPN_FEAT_STRIDE, anchor_scales=config.ANCHOR_SCALES, # anchor_ratios=config.ANCHOR_RATIOS, aspect_grouping=config.TRAIN.ASPECT_GROUPING) sym = eval('get_' + args.network + '_train')() #print(sym.get_internals()) feat_sym = [] for stride in config.RPN_FEAT_STRIDE: feat_sym.append(sym.get_internals()['rpn_cls_score_stride%s_output' % stride]) #train_data = AnchorLoaderFPN(feat_sym, roidb, batch_size=input_batch_size, shuffle=not args.no_shuffle, # ctx=ctx, work_load_list=args.work_load_list) train_data = CropLoader(feat_sym, roidb, batch_size=input_batch_size, shuffle=not args.no_shuffle, ctx=ctx, work_load_list=args.work_load_list) # infer max shape max_data_shape = [('data', (1, 3, max([v[1] for v in config.SCALES]), max([v[1] for v in config.SCALES])))] #max_data_shape = [('data', (1, 3, max([v[1] for v in config.SCALES]), max([v[1] for v in config.SCALES])))] max_data_shape, max_label_shape = train_data.infer_shape(max_data_shape) max_data_shape.append(('gt_boxes', (1, roidb[0]['max_num_boxes'], 5))) logger.info('providing maximum shape %s %s' % (max_data_shape, max_label_shape)) # infer shape data_shape_dict = dict(train_data.provide_data + train_data.provide_label) arg_shape, out_shape, aux_shape = sym.infer_shape(**data_shape_dict) arg_shape_dict = dict(zip(sym.list_arguments(), arg_shape)) out_shape_dict = dict(zip(sym.list_outputs(), out_shape)) aux_shape_dict = dict(zip(sym.list_auxiliary_states(), aux_shape)) logger.info('output shape %s' % pprint.pformat(out_shape_dict)) # load and initialize params if args.resume: arg_params, aux_params = load_param(prefix, begin_epoch, convert=True) else: arg_params, aux_params = load_param(pretrained, epoch, convert=True) #for k in ['rpn_conv_3x3', 'rpn_cls_score', 'rpn_bbox_pred', 'cls_score', 'bbox_pred']: # _k = k+"_weight" # if _k in arg_shape_dict: # v = 0.001 if _k.startswith('bbox_') else 0.01 # arg_params[_k] = mx.random.normal(0, v, shape=arg_shape_dict[_k]) # print('init %s with normal %.5f'%(_k,v)) # _k = k+"_bias" # if _k in arg_shape_dict: # arg_params[_k] = mx.nd.zeros(shape=arg_shape_dict[_k]) # print('init %s with zero'%(_k)) for k, v in arg_shape_dict.iteritems(): if k.find('upsampling') >= 0: print('initializing upsampling_weight', k) arg_params[k] = mx.nd.zeros(shape=v) init = mx.init.Initializer() init._init_bilinear(k, arg_params[k]) #print(args[k]) # check parameter shapes #for k in sym.list_arguments(): # if k in data_shape_dict: # continue # assert k in arg_params, k + ' not initialized' # assert arg_params[k].shape == arg_shape_dict[k], \ # 'shape inconsistent for ' + k + ' inferred ' + str(arg_shape_dict[k]) + ' provided ' + str(arg_params[k].shape) #for k in sym.list_auxiliary_states(): # assert k in aux_params, k + ' not initialized' # assert aux_params[k].shape == aux_shape_dict[k], \ # 'shape inconsistent for ' + k + ' inferred ' + str(aux_shape_dict[k]) + ' provided ' + str(aux_params[k].shape) # create solver fixed_param_prefix = config.FIXED_PARAMS data_names = [k[0] for k in train_data.provide_data] label_names = [k[0] for k in train_data.provide_label] #mod = MutableModule(sym, data_names=data_names, label_names=label_names, # logger=logger, context=ctx, work_load_list=args.work_load_list, # max_data_shapes=max_data_shape, max_label_shapes=max_label_shape, # fixed_param_prefix=fixed_param_prefix) fixed_param_names = get_fixed_params(sym, fixed_param_prefix) print('fixed', fixed_param_names, file=sys.stderr) mod = Module(sym, data_names=data_names, label_names=label_names, logger=logger, context=ctx, work_load_list=args.work_load_list, fixed_param_names=fixed_param_names) # decide training params # metric eval_metrics = mx.metric.CompositeEvalMetric() #if len(sym.list_outputs())>4: # metric_names = ['RPNAccMetric', 'RPNLogLossMetric', 'RPNL1LossMetric', 'RCNNAccMetric', 'RCNNLogLossMetric', 'RCNNL1LossMetric'] #else:#train rpn only #print('sym', sym.list_outputs()) #metric_names = ['RPNAccMetric', 'RPNLogLossMetric', 'RPNL1LossMetric'] mids = [0, 4, 8] for mid in mids: _metric = metric.RPNAccMetric(pred_idx=mid, label_idx=mid + 1) eval_metrics.add(_metric) #_metric = metric.RPNLogLossMetric(pred_idx=mid, label_idx=mid+1) #eval_metrics.add(_metric) _metric = metric.RPNL1LossMetric(loss_idx=mid + 2, weight_idx=mid + 3) eval_metrics.add(_metric) #rpn_eval_metric = metric.RPNAccMetric() #rpn_cls_metric = metric.RPNLogLossMetric() #rpn_bbox_metric = metric.RPNL1LossMetric() #eval_metric = metric.RCNNAccMetric() #cls_metric = metric.RCNNLogLossMetric() #bbox_metric = metric.RCNNL1LossMetric() #for child_metric in [rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric, cls_metric, bbox_metric]: # eval_metrics.add(child_metric) # callback means = np.tile(np.array(config.TRAIN.BBOX_MEANS), config.NUM_CLASSES) stds = np.tile(np.array(config.TRAIN.BBOX_STDS), config.NUM_CLASSES) #epoch_end_callback = callback.do_checkpoint(prefix, means, stds) epoch_end_callback = None # decide learning rate base_lr = lr lr_factor = 0.1 lr_epoch = [int(epoch) for epoch in lr_step.split(',')] lr_epoch_diff = [ epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch ] lr = base_lr * (lr_factor**(len(lr_epoch) - len(lr_epoch_diff))) lr_iters = [ int(epoch * len(roidb) / input_batch_size) for epoch in lr_epoch_diff ] #lr_iters = [36000,42000] #TODO #lr_iters = [40000,50000,60000] #TODO #lr_iters = [40,50,60] #TODO end_epoch = 10000 #lr_iters = [4,8] #TODO logger.info('lr %f lr_epoch_diff %s lr_iters %s' % (lr, lr_epoch_diff, lr_iters)) #lr_scheduler = mx.lr_scheduler.MultiFactorScheduler(lr_iters, lr_factor) # optimizer opt = optimizer.SGD(learning_rate=lr, momentum=0.9, wd=0.0005, rescale_grad=1.0 / len(ctx), clip_gradient=None) initializer = mx.init.Xavier() #initializer = mx.init.Xavier(rnd_type='gaussian', factor_type="out", magnitude=2) #resnet style if len(ctx) > 1: train_data = mx.io.PrefetchingIter(train_data) _cb = mx.callback.Speedometer(train_data.batch_size, frequent=args.frequent, auto_reset=False) global_step = [0] def save_model(epoch): arg, aux = mod.get_params() all_layers = mod.symbol.get_internals() outs = [] for stride in config.RPN_FEAT_STRIDE: num_anchors = config.RPN_ANCHOR_CFG[str(stride)]['NUM_ANCHORS'] _name = 'rpn_cls_score_stride%d_output' % stride rpn_cls_score = all_layers[_name] # prepare rpn data rpn_cls_score_reshape = mx.symbol.Reshape( data=rpn_cls_score, shape=(0, 2, -1, 0), name="rpn_cls_score_reshape_stride%d" % stride) rpn_cls_prob = mx.symbol.SoftmaxActivation( data=rpn_cls_score_reshape, mode="channel", name="rpn_cls_prob_stride%d" % stride) rpn_cls_prob_reshape = mx.symbol.Reshape( data=rpn_cls_prob, shape=(0, 2 * num_anchors, -1, 0), name='rpn_cls_prob_reshape_stride%d' % stride) _name = 'rpn_bbox_pred_stride%d_output' % stride rpn_bbox_pred = all_layers[_name] outs.append(rpn_cls_prob_reshape) outs.append(rpn_bbox_pred) _sym = mx.sym.Group(outs) mx.model.save_checkpoint(prefix, epoch, _sym, arg, aux) def _batch_callback(param): #global global_step _cb(param) global_step[0] += 1 mbatch = global_step[0] for _iter in lr_iters: if mbatch == _iter: opt.lr *= 0.1 print('lr change to', opt.lr, ' in batch', mbatch, file=sys.stderr) break if mbatch % 1000 == 0: print('saving final checkpoint', mbatch, file=sys.stderr) save_model(mbatch) if mbatch == lr_iters[-1]: print('saving final checkpoint', mbatch, file=sys.stderr) save_model(0) #arg, aux = mod.get_params() #mx.model.save_checkpoint(prefix, 99, mod.symbol, arg, aux) sys.exit(0) # train mod.fit(train_data, eval_metric=eval_metrics, epoch_end_callback=epoch_end_callback, batch_end_callback=_batch_callback, kvstore=args.kvstore, optimizer=opt, initializer=initializer, allow_missing=True, arg_params=arg_params, aux_params=aux_params, begin_epoch=begin_epoch, num_epoch=end_epoch)
def train_net(args, ctx, pretrained, epoch, prefix, begin_epoch, end_epoch, lr=0.001, lr_step='5'): # set up logger logging.basicConfig() logger = logging.getLogger() logger.setLevel(logging.INFO) # setup config config.TRAIN.BATCH_IMAGES = 1 config.TRAIN.BATCH_ROIS = 128 config.TRAIN.END2END = True config.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED = True # load symbol if args.use_global_context or args.use_roi_align: sym = eval('get_' + args.network + '_train')( num_classes=config.NUM_CLASSES, num_anchors=config.NUM_ANCHORS, use_global_context=args.use_global_context, use_roi_align=args.use_roi_align) else: sym = eval('get_' + args.network + '_train')( num_classes=config.NUM_CLASSES, num_anchors=config.NUM_ANCHORS) feat_sym = sym.get_internals()['rpn_cls_score_output'] # setup multi-gpu batch_size = len(ctx) input_batch_size = config.TRAIN.BATCH_IMAGES * batch_size # print config pprint.pprint(config) if not args.use_ava_recordio: # load dataset and prepare imdb for training image_sets = [iset for iset in args.image_set.split('+')] roidbs = [ load_gt_roidb(args.dataset, image_set, args.root_path, args.dataset_path, flip=not args.no_flip) for image_set in image_sets ] roidb = merge_roidb(roidbs) roidb = filter_roidb(roidb) # load training data train_data = AnchorLoader( feat_sym, roidb, batch_size=input_batch_size, shuffle=not args.no_shuffle, ctx=ctx, work_load_list=args.work_load_list, feat_stride=config.RPN_FEAT_STRIDE, anchor_scales=config.ANCHOR_SCALES, anchor_ratios=config.ANCHOR_RATIOS, aspect_grouping=config.TRAIN.ASPECT_GROUPING, use_data_augmentation=args.use_data_augmentation) else: f = open(args.classes_names) classes = ['__background__'] for line in f.readlines(): classes.append(line.strip().split(' ')[0]) path_imgidx = args.ava_recordio_name + '.idx' path_imgrec = args.ava_recordio_name + '.rec' record = mx.recordio.MXIndexedRecordIO(path_imgidx, path_imgrec, 'r') # pylint: disable=redefined-variable-type train_data = AnchorLoaderAvaRecordIO( feat_sym, record, classes, batch_size=input_batch_size, shuffle=not args.no_shuffle, ctx=ctx, work_load_list=args.work_load_list, feat_stride=config.RPN_FEAT_STRIDE, anchor_scales=config.ANCHOR_SCALES, anchor_ratios=config.ANCHOR_RATIOS, aspect_grouping=config.TRAIN.ASPECT_GROUPING, use_data_augmentation=args.use_data_augmentation) # infer max shape max_data_shape = [('data', (input_batch_size, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))] max_data_shape, max_label_shape = train_data.infer_shape(max_data_shape) max_data_shape.append(('gt_boxes', (input_batch_size, 100, 5))) print('providing maximum shape', max_data_shape, max_label_shape) # infer shape data_shape_dict = dict(train_data.provide_data + train_data.provide_label) arg_shape, out_shape, aux_shape = sym.infer_shape(**data_shape_dict) arg_shape_dict = dict(zip(sym.list_arguments(), arg_shape)) out_shape_dict = dict(zip(sym.list_outputs(), out_shape)) aux_shape_dict = dict(zip(sym.list_auxiliary_states(), aux_shape)) print('output shape') pprint.pprint(out_shape_dict) print('arg shape') # pprint.pprint(arg_shape_dict) # load and initialize params if args.resume: arg_params, aux_params = load_param(prefix, begin_epoch, convert=True) else: arg_params, aux_params = load_param(pretrained, epoch, convert=True) arg_params['rpn_conv_3x3_weight'] = mx.random.normal( 0, 0.01, shape=arg_shape_dict['rpn_conv_3x3_weight']) arg_params['rpn_conv_3x3_bias'] = mx.nd.zeros( shape=arg_shape_dict['rpn_conv_3x3_bias']) arg_params['rpn_cls_score_weight'] = mx.random.normal( 0, 0.01, shape=arg_shape_dict['rpn_cls_score_weight']) arg_params['rpn_cls_score_bias'] = mx.nd.zeros( shape=arg_shape_dict['rpn_cls_score_bias']) arg_params['rpn_bbox_pred_weight'] = mx.random.normal( 0, 0.01, shape=arg_shape_dict['rpn_bbox_pred_weight']) arg_params['rpn_bbox_pred_bias'] = mx.nd.zeros( shape=arg_shape_dict['rpn_bbox_pred_bias']) arg_params['cls_score_weight'] = mx.random.normal( 0, 0.01, shape=arg_shape_dict['cls_score_weight']) arg_params['cls_score_bias'] = mx.nd.zeros( shape=arg_shape_dict['cls_score_bias']) arg_params['bbox_pred_weight'] = mx.random.normal( 0, 0.001, shape=arg_shape_dict['bbox_pred_weight']) arg_params['bbox_pred_bias'] = mx.nd.zeros( shape=arg_shape_dict['bbox_pred_bias']) if args.use_global_context: # additional params for using global context """ for arg_param_name in sym.list_arguments(): if 'stage5' in arg_param_name: # print(arg_param_name, arg_param_name.replace('stage5', 'stage4')) arg_params[arg_param_name] = arg_params[arg_param_name.replace('stage5', 'stage4')].copy() # params of stage5 is initialized from stage4 arg_params['bn2_gamma'] = arg_params['bn1_gamma'].copy() arg_params['bn2_beta'] = arg_params['bn1_beta'].copy() """ for aux_param_name in sym.list_auxiliary_states(): if 'stage5' in aux_param_name: # print(aux_param_name, aux_param_name.replace('stage5', 'stage4')) aux_params[aux_param_name] = aux_params[aux_param_name.replace( 'stage5', 'stage4')].copy( ) # params of stage5 is initialized from stage4 aux_params['bn2_moving_mean'] = aux_params['bn1_moving_mean'].copy() aux_params['bn2_moving_var'] = aux_params['bn1_moving_var'].copy() # check parameter shapes for k in sym.list_arguments(): if k in data_shape_dict: continue assert k in arg_params, k + ' not initialized' assert arg_params[k].shape == arg_shape_dict[k], \ 'shape inconsistent for ' + k + ' inferred ' + str(arg_shape_dict[k]) + ' provided ' + str(arg_params[k].shape) for k in sym.list_auxiliary_states(): assert k in aux_params, k + ' not initialized' assert aux_params[k].shape == aux_shape_dict[k], \ 'shape inconsistent for ' + k + ' inferred ' + str(aux_shape_dict[k]) + ' provided ' + str(aux_params[k].shape) # create solver fixed_param_prefix = config.FIXED_PARAMS data_names = [k[0] for k in train_data.provide_data] label_names = [k[0] for k in train_data.provide_label] mod = MutableModule(sym, data_names=data_names, label_names=label_names, logger=logger, context=ctx, work_load_list=args.work_load_list, max_data_shapes=max_data_shape, max_label_shapes=max_label_shape, fixed_param_prefix=fixed_param_prefix) # decide training params # metric rpn_eval_metric = metric.RPNAccMetric() rpn_cls_metric = metric.RPNLogLossMetric() rpn_bbox_metric = metric.RPNL1LossMetric() eval_metric = metric.RCNNAccMetric() cls_metric = metric.RCNNLogLossMetric() bbox_metric = metric.RCNNL1LossMetric() eval_metrics = mx.metric.CompositeEvalMetric() for child_metric in [ rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric, cls_metric, bbox_metric ]: eval_metrics.add(child_metric) # callback batch_end_callback = callback.Speedometer(train_data.batch_size, frequent=args.frequent) means = np.tile(np.array(config.TRAIN.BBOX_MEANS), config.NUM_CLASSES) stds = np.tile(np.array(config.TRAIN.BBOX_STDS), config.NUM_CLASSES) epoch_end_callback = callback.do_checkpoint(prefix, means, stds) # decide learning rate base_lr = lr lr_factor = 0.1 lr_epoch = [int(epoch) for epoch in lr_step.split(',')] lr_epoch_diff = [ epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch ] lr = base_lr * (lr_factor**(len(lr_epoch) - len(lr_epoch_diff))) if not args.use_ava_recordio: lr_iters = [ int(epoch * len(roidb) / batch_size) for epoch in lr_epoch_diff ] else: lr_iters = [ int(epoch * train_data.provide_size() / batch_size) for epoch in lr_epoch_diff ] print('lr', lr, 'lr_epoch_diff', lr_epoch_diff, 'lr_iters', lr_iters) lr_scheduler = mx.lr_scheduler.MultiFactorScheduler(lr_iters, lr_factor) # optimizer optimizer_params = { 'momentum': 0.9, 'wd': 0.0005, 'learning_rate': lr, 'lr_scheduler': lr_scheduler, 'rescale_grad': (1.0 / batch_size), 'clip_gradient': 5 } # train mod.fit(train_data, eval_metric=eval_metrics, epoch_end_callback=epoch_end_callback, batch_end_callback=batch_end_callback, kvstore=args.kvstore, optimizer='sgd', optimizer_params=optimizer_params, arg_params=arg_params, aux_params=aux_params, begin_epoch=begin_epoch, num_epoch=end_epoch)
def get_net(prefix, epoch, ctx): args, auxs = load_param(prefix, epoch, convert=True, ctx=ctx) sym = get_vgg_test() detector = Detector(sym, ctx, args, auxs) return detector
def train_net(args, ctx, pretrained, epoch, prefix, begin_epoch, end_epoch, lr=0.001, lr_step='5'): # set up logger logging.basicConfig() logger = logging.getLogger() logger.setLevel(logging.INFO) # setup config config.TRAIN.BATCH_IMAGES = 1 config.TRAIN.BATCH_ROIS = 128 config.TRAIN.END2END = True config.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED = True # load symbol sym_instance = eval('symbol_' + args.network)() sym_gen = sym_instance.get_symbol sym = sym_gen(46,config,is_train=True) feat_sym = sym.get_internals()['rpn_cls_score_output'] # setup multi-gpu batch_size = len(ctx) input_batch_size = config.TRAIN.BATCH_IMAGES * batch_size # print config pprint.pprint(config) # load dataset and prepare imdb for training dataset = Dataset(args.root_path,args.dataset,args.subset,split = args.split) roidb = dataset.gt_roidb() W = dataset.W # load training data train_data = AnchorLoader(feat_sym, roidb, batch_size=input_batch_size, shuffle=not args.no_shuffle, ctx=ctx, work_load_list=args.work_load_list, feat_stride=config.RPN_FEAT_STRIDE, anchor_scales=config.ANCHOR_SCALES, anchor_ratios=config.ANCHOR_RATIOS) # infer max shape max_data_shape = [('data', (input_batch_size, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))] max_data_shape, max_label_shape = train_data.infer_shape(max_data_shape) max_data_shape.append(('gt_boxes', (input_batch_size, 100, 5))) print('providing maximum shape', max_data_shape, max_label_shape) # infer shape #get a new symbol bucket_key = train_data.bucket_key print(train_data.provide_data) data_shape_dict = dict(train_data.provide_data + train_data.provide_label) sym_instance.infer_shape(data_shape_dict) #arg_shape, out_shape, aux_shape = curr_sym.infer_shape(**data_shape_dict) #arg_shape_dict = dict(zip(curr_sym.list_arguments(), arg_shape)) #out_shape_dict = dict(zip(curr_sym.list_outputs(), out_shape)) #aux_shape_dict = dict(zip(curr_sym.list_auxiliary_states(), aux_shape)) #del arg_shape_dict['lstm_parameters'] #print(curr_sym.list_arguments()) #print(aux_shape_dict) # load and initialize params if args.resume: print("continue training from epoch {}".format(begin_epoch)) arg_params, aux_params = load_param(prefix, begin_epoch, convert=True) else: arg_params, aux_params = load_param(pretrained, epoch, convert=True) if config.RNN.USE_W2V: arg_params['embed_weight'] = mx.nd.array(W) else: arg_params['embed_weight'] = mx.random.uniform(0,0.01,shape=arg_shape_dict['embed_weight']) sym_instance.init_weight(config,arg_params,aux_params) #no checking #for k in arg_shape_dict.iterkeys(): # if k in data_shape_dict: # continue # assert k in arg_params, k + ' not initialized' #assert arg_params[k].shape == arg_shape_dict[k], \ # 'shape inconsistent for ' + k + ' inferred ' + str(arg_shape_dict[k]) + ' provided ' + str(arg_params[k].shape) #for k in sym.list_auxiliary_states(): # assert k in aux_params, k + ' not initialized' # assert aux_params[k].shape == aux_shape_dict[k], \ # 'shape inconsistent for ' + k + ' inferred ' + str(aux_shape_dict[k]) + ' provided ' + str(aux_params[k].shape) # create solver fixed_param_prefix = config.FIXED_PARAMS data_names = [k[0] for k in train_data.provide_data] label_names = [k[0] for k in train_data.provide_label] mod = MutableModule(sym_gen,config, data_names=data_names, label_names=label_names, logger=logger, context=ctx, work_load_list=args.work_load_list, max_data_shapes=max_data_shape, max_label_shapes=max_label_shape, fixed_param_prefix=fixed_param_prefix) # decide training params # metric rpn_eval_metric = metric.RPNAccMetric() rpn_cls_metric = metric.RPNLogLossMetric() rpn_bbox_metric = metric.RPNL1LossMetric() eval_metric = metric.RCNNAccMetric() cls_metric = metric.RCNNLogLossMetric() bbox_metric = metric.RCNNL1LossMetric() eval_metrics = mx.metric.CompositeEvalMetric() for child_metric in [rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric, cls_metric, bbox_metric]: eval_metrics.add(child_metric) # callback batch_end_callback = callback.Speedometer(train_data.batch_size, frequent=args.frequent) means = np.tile(np.array(config.TRAIN.BBOX_MEANS), config.NUM_CLASSES) stds = np.tile(np.array(config.TRAIN.BBOX_STDS), config.NUM_CLASSES) epoch_end_callback = callback.do_checkpoint(config.ENCODER_CELL,prefix, means, stds) # decide learning rate base_lr = lr lr_factor = 0.1 lr_epoch = [int(epoch) for epoch in lr_step.split(',')] lr_epoch_diff = [epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch] lr = base_lr * (lr_factor ** (len(lr_epoch) - len(lr_epoch_diff))) lr_iters = [int(epoch * len(roidb) / batch_size) for epoch in lr_epoch_diff] print('lr', lr, 'lr_epoch_diff', lr_epoch_diff, 'lr_iters', lr_iters) lr_scheduler = mx.lr_scheduler.MultiFactorScheduler(lr_iters, lr_factor) # optimizer optimizer_params = {'momentum': 0.9, 'wd': 0.0005, 'learning_rate': lr, 'lr_scheduler': lr_scheduler, 'rescale_grad': (1.0 / batch_size), 'clip_gradient': 5} #initializer for fused RNN #TODO:not successfully added,try ask it on github issues. initializer = mx.initializer.FusedRNN(init=mx.init.Xavier(factor_type='in', magnitude=2.34), num_hidden = 1024,num_layers=2,mode='lstm') # train mod.fit(train_data, eval_metric=eval_metrics, epoch_end_callback=epoch_end_callback, batch_end_callback=batch_end_callback, kvstore=args.kvstore, optimizer='sgd', optimizer_params=optimizer_params,allow_missing=True,initializer=mx.init.Xavier(factor_type='in', magnitude=2.34), arg_params=arg_params, aux_params=aux_params, begin_epoch=begin_epoch, num_epoch=end_epoch)
def train_net(args, ctx, pretrained, epoch, prefix, begin_epoch, end_epoch, lr=0.001, lr_step=50000): # set up logger logging.basicConfig() logger = logging.getLogger() logger.setLevel(logging.INFO) # setup config config.TRAIN.HAS_RPN = True config.TRAIN.BATCH_SIZE = 1 config.TRAIN.BATCH_IMAGES = 1 config.TRAIN.BATCH_ROIS = 128 config.TRAIN.END2END = True config.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED = True config.TRAIN.BG_THRESH_LO = 0.0 # load symbol sym = eval('get_' + args.network + '_train')() feat_sym = sym.get_internals()['rpn_cls_score_output'] # setup multi-gpu config.TRAIN.BATCH_IMAGES *= len(ctx) config.TRAIN.BATCH_SIZE *= len(ctx) # print config pprint.pprint(config) # load dataset and prepare imdb for training imdb = eval(args.dataset)(args.image_set, args.root_path, args.dataset_path) roidb = imdb.gt_roidb() if args.flip: roidb = imdb.append_flipped_images(roidb) # load training data train_data = AnchorLoader(feat_sym, roidb, batch_size=config.TRAIN.BATCH_SIZE, shuffle=True, ctx=ctx, work_load_list=args.work_load_list) # infer max shape max_data_shape = [('data', (config.TRAIN.BATCH_SIZE, 3, 1000, 1000))] max_data_shape, max_label_shape = train_data.infer_shape(max_data_shape) max_data_shape.append(('gt_boxes', (config.TRAIN.BATCH_SIZE, 100, 5))) print 'providing maximum shape', max_data_shape, max_label_shape # load pretrained arg_params, aux_params = load_param(pretrained, epoch, convert=True) # infer shape data_shape_dict = dict(train_data.provide_data + train_data.provide_label) arg_shape, out_shape, aux_shape = sym.infer_shape(**data_shape_dict) arg_shape_dict = dict(zip(sym.list_arguments(), arg_shape)) out_shape_dict = dict(zip(sym.list_outputs(), out_shape)) aux_shape_dict = dict(zip(sym.list_auxiliary_states(), aux_shape)) print 'output shape' pprint.pprint(out_shape_dict) # initialize params if not args.resume: arg_params['rpn_conv_3x3_weight'] = mx.random.normal( 0, 0.01, shape=arg_shape_dict['rpn_conv_3x3_weight']) arg_params['rpn_conv_3x3_bias'] = mx.nd.zeros( shape=arg_shape_dict['rpn_conv_3x3_bias']) arg_params['rpn_cls_score_weight'] = mx.random.normal( 0, 0.01, shape=arg_shape_dict['rpn_cls_score_weight']) arg_params['rpn_cls_score_bias'] = mx.nd.zeros( shape=arg_shape_dict['rpn_cls_score_bias']) arg_params['rpn_bbox_pred_weight'] = mx.random.normal( 0, 0.01, shape=arg_shape_dict['rpn_bbox_pred_weight']) arg_params['rpn_bbox_pred_bias'] = mx.nd.zeros( shape=arg_shape_dict['rpn_bbox_pred_bias']) arg_params['cls_score_weight'] = mx.random.normal( 0, 0.01, shape=arg_shape_dict['cls_score_weight']) arg_params['cls_score_bias'] = mx.nd.zeros( shape=arg_shape_dict['cls_score_bias']) arg_params['bbox_pred_weight'] = mx.random.normal( 0, 0.001, shape=arg_shape_dict['bbox_pred_weight']) arg_params['bbox_pred_bias'] = mx.nd.zeros( shape=arg_shape_dict['bbox_pred_bias']) # check parameter shapes for k in sym.list_arguments(): if k in data_shape_dict: continue assert arg_params[k].shape == arg_shape_dict[k], \ 'shape inconsistent for ' + k + ' inferred ' + str(arg_shape_dict[k]) + ' provided ' + str(arg_params[k].shape) for k in sym.list_auxiliary_states(): assert aux_params[k].shape == aux_shape_dict[k], \ 'shape inconsistent for ' + k + ' inferred ' + str(aux_shape_dict[k]) + ' provided ' + str(aux_params[k].shape) # create solver fixed_param_prefix = ['conv1', 'conv2'] data_names = [k[0] for k in train_data.provide_data] label_names = [k[0] for k in train_data.provide_label] mod = MutableModule(sym, data_names=data_names, label_names=label_names, logger=logger, context=ctx, work_load_list=args.work_load_list, max_data_shapes=max_data_shape, max_label_shapes=max_label_shape, fixed_param_prefix=fixed_param_prefix) # decide training params # metric rpn_eval_metric = metric.RPNAccMetric() rpn_cls_metric = metric.RPNLogLossMetric() rpn_bbox_metric = metric.RPNL1LossMetric() eval_metric = metric.RCNNAccMetric() cls_metric = metric.RCNNLogLossMetric() bbox_metric = metric.RCNNL1LossMetric() eval_metrics = mx.metric.CompositeEvalMetric() for child_metric in [ rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric, cls_metric, bbox_metric ]: eval_metrics.add(child_metric) # callback batch_end_callback = callback.Speedometer(train_data.batch_size, frequent=args.frequent) means = np.tile(np.array(config.TRAIN.BBOX_MEANS), imdb.num_classes) stds = np.tile(np.array(config.TRAIN.BBOX_STDS), imdb.num_classes) epoch_end_callback = callback.do_checkpoint(prefix, means, stds) # optimizer optimizer_params = { 'momentum': 0.9, 'wd': 0.0005, 'learning_rate': lr, 'lr_scheduler': mx.lr_scheduler.FactorScheduler(lr_step, 0.1), 'rescale_grad': (1.0 / config.TRAIN.BATCH_SIZE) } # train mod.fit(train_data, eval_metric=eval_metrics, epoch_end_callback=epoch_end_callback, batch_end_callback=batch_end_callback, kvstore=args.kvstore, optimizer='sgd', optimizer_params=optimizer_params, arg_params=arg_params, aux_params=aux_params, begin_epoch=begin_epoch, num_epoch=end_epoch)
def init_params(args, sym, train_data): # infer shape data_shape_dict = dict(train_data.provide_data + train_data.provide_label) arg_shape, out_shape, aux_shape = sym.infer_shape(**data_shape_dict) arg_shape_dict = dict(zip(sym.list_arguments(), arg_shape)) out_shape_dict = dict(zip(sym.list_outputs(), out_shape)) aux_shape_dict = dict(zip(sym.list_auxiliary_states(), aux_shape)) logger.info('output shape %s' % pprint.pformat(out_shape_dict)) if args.resume: # load params from previously trained model arg_params, aux_params = load_param(args.e2e_prefix, args.begin_epoch, convert=True) else: # initialize weights from pretrained model and random numbers arg_params, aux_params = load_param(args.pretrained, args.pretrained_epoch, convert=True) # deal with multiple input CT slices, see 3DCE paper. # if NUM_SLICES = 3, pretrained weights won't be changed # if NUM_SLICES > 3, extra input channels in conv1_1 will be initialized to 0 nCh = config.NUM_SLICES w1 = arg_params['conv1_1_weight'].asnumpy() w1_new = np.zeros((64, nCh, 3, 3), dtype=float) w1_new[:, (nCh - 3) / 2:(nCh - 3) / 2 + 3, :, :] = w1 arg_params['conv1_1_new_weight'] = mx.nd.array(w1_new) arg_params['conv1_1_new_bias'] = arg_params['conv1_1_bias'] del arg_params['conv1_1_weight'] arg_params['rpn_conv_3x3_weight'] = mx.random.normal(0, 0.01, shape=arg_shape_dict['rpn_conv_3x3_weight']) arg_params['rpn_conv_3x3_bias'] = mx.nd.zeros(shape=arg_shape_dict['rpn_conv_3x3_bias']) arg_params['rpn_cls_score_weight'] = mx.random.normal(0, 0.01, shape=arg_shape_dict['rpn_cls_score_weight']) arg_params['rpn_cls_score_bias'] = mx.nd.zeros(shape=arg_shape_dict['rpn_cls_score_bias']) arg_params['rpn_bbox_pred_weight'] = mx.random.normal(0, 0.001, shape=arg_shape_dict['rpn_bbox_pred_weight']) arg_params['rpn_bbox_pred_bias'] = mx.nd.zeros(shape=arg_shape_dict['rpn_bbox_pred_bias']) if config.FRAMEWORK == '3DCE': arg_params['conv_new_1_weight'] = mx.random.normal(0, 0.01, shape=arg_shape_dict['conv_new_1_weight']) arg_params['conv_new_1_bias'] = mx.nd.zeros(shape=arg_shape_dict['conv_new_1_bias']) arg_params['fc6_weight'] = mx.random.normal(0, 0.001, shape=arg_shape_dict['fc6_weight']) arg_params['fc6_bias'] = mx.nd.zeros(shape=arg_shape_dict['fc6_bias']) arg_params['cls_score_weight'] = mx.random.normal(0, 0.01, shape=arg_shape_dict['cls_score_weight']) arg_params['cls_score_bias'] = mx.nd.zeros(shape=arg_shape_dict['cls_score_bias']) arg_params['bbox_pred_weight'] = mx.random.normal(0, 0.001, shape=arg_shape_dict['bbox_pred_weight']) arg_params['bbox_pred_bias'] = mx.nd.zeros(shape=arg_shape_dict['bbox_pred_bias']) elif config.FRAMEWORK == 'RFCN': arg_params['conv_new_1_weight'] = mx.random.normal(0, 0.01, shape=arg_shape_dict['conv_new_1_weight']) arg_params['conv_new_1_bias'] = mx.nd.zeros(shape=arg_shape_dict['conv_new_1_bias']) arg_params['rfcn_cls_weight'] = mx.random.normal(0, 0.01, shape=arg_shape_dict['rfcn_cls_weight']) arg_params['rfcn_cls_bias'] = mx.nd.zeros(shape=arg_shape_dict['rfcn_cls_bias']) arg_params['rfcn_bbox_weight'] = mx.random.normal(0, 0.01, shape=arg_shape_dict['rfcn_bbox_weight']) arg_params['rfcn_bbox_bias'] = mx.nd.zeros(shape=arg_shape_dict['rfcn_bbox_bias']) elif config.FRAMEWORK == 'Faster': arg_params['fc6_small_weight'] = mx.random.normal(0, 0.001, shape=arg_shape_dict['fc6_small_weight']) arg_params['fc6_small_bias'] = mx.nd.zeros(shape=arg_shape_dict['fc6_small_bias']) arg_params['fc7_small_weight'] = mx.random.normal(0, 0.001, shape=arg_shape_dict['fc7_small_weight']) arg_params['fc7_small_bias'] = mx.nd.zeros(shape=arg_shape_dict['fc7_small_bias']) arg_params['cls_score_weight'] = mx.random.normal(0, 0.01, shape=arg_shape_dict['cls_score_weight']) arg_params['cls_score_bias'] = mx.nd.zeros(shape=arg_shape_dict['cls_score_bias']) arg_params['bbox_pred_weight'] = mx.random.normal(0, 0.001, shape=arg_shape_dict['bbox_pred_weight']) arg_params['bbox_pred_bias'] = mx.nd.zeros(shape=arg_shape_dict['bbox_pred_bias']) # check parameter shapes for k in sym.list_arguments(): if k in data_shape_dict: continue assert k in arg_params, k + ' not initialized' assert arg_params[k].shape == arg_shape_dict[k], \ 'shape inconsistent for ' + k + ' inferred ' + str(arg_shape_dict[k]) + ' provided ' + str(arg_params[k].shape) for k in sym.list_auxiliary_states(): assert k in aux_params, k + ' not initialized' assert aux_params[k].shape == aux_shape_dict[k], \ 'shape inconsistent for ' + k + ' inferred ' + str(aux_shape_dict[k]) + ' provided ' + str(aux_params[k].shape) logger.info('load param done') return arg_params, aux_params
def train_net(network, dataset, image_set, root_path, dataset_path, frequent, kvstore, work_load_list, no_flip, no_shuffle, resume, ctx, pretrained, epoch, prefix, begin_epoch, end_epoch, train_shared, lr, lr_step, proposal, maskrcnn_stage=None): # set up logger logging.basicConfig() logger = logging.getLogger() logger.setLevel(logging.INFO) # setup config config.TRAIN.BATCH_IMAGES = 1 config.TRAIN.BATCH_ROIS = 128 config.TRAIN.END2END = True config.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED = True # load symbol sym = eval('get_' + network + '_train')(num_classes=config.NUM_CLASSES, num_anchors=config.NUM_ANCHORS) feat_sym = [] for stride in config.RPN_FEAT_STRIDE: feat_sym.append(sym.get_internals()['rpn_cls_score_stride%s_output' % stride]) # setup multi-gpu batch_size = len(ctx) input_batch_size = config.TRAIN.BATCH_IMAGES * batch_size # print config logger.info(pprint.pformat(config)) roidb_file = root_path + '/cache/' + dataset + '_roidb_with_mask.pkl' mean_file = root_path + '/cache/' + dataset + '_roidb_mean.pkl' std_file = root_path + '/cache/' + dataset + '_roidb_std.pkl' if maskrcnn_stage is not None: roidb_file = root_path + '/cache/' + dataset + '_roidb_with_mask_' + maskrcnn_stage + '.pkl' mean_file = root_path + '/cache/' + dataset + '_roidb_mean_' + maskrcnn_stage + '.pkl' std_file = root_path + '/cache/' + dataset + '_roidb_std_' + maskrcnn_stage + '.pkl' if osp.exists(roidb_file) and osp.exists(mean_file) and osp.exists( std_file): print('Load ' + roidb_file) with open(roidb_file, 'r') as f: roidb = pkl.load(f) print('Load ' + mean_file) with open(mean_file, 'r') as f: means = pkl.load(f) print('Load ' + std_file) with open(std_file, 'r') as f: stds = pkl.load(f) else: # load dataset and prepare imdb for training image_sets = [iset for iset in image_set.split('+')] roidbs = [ load_proposal_roidb(dataset, image_set, root_path, dataset_path, proposal=proposal, append_gt=True, flip=not no_flip) for image_set in image_sets ] roidb = merge_roidb(roidbs) def filter_roidb(roidb): """ remove roidb entries without usable rois """ def is_valid(entry): """ valid images have at least 1 fg or bg roi """ overlaps = entry['max_overlaps'] fg_inds = np.where(overlaps >= config.TRAIN.FG_THRESH)[0] bg_inds = np.where((overlaps < config.TRAIN.BG_THRESH_HI) & (overlaps >= config.TRAIN.BG_THRESH_LO))[0] valid = len(fg_inds) > 0 and len(bg_inds) > 0 return valid num = len(roidb) filtered_roidb = [entry for entry in roidb if is_valid(entry)] num_after = len(filtered_roidb) print('filtered %d roidb entries: %d -> %d' % (num - num_after, num, num_after)) return filtered_roidb roidb = filter_roidb(roidb) means, stds = add_bbox_regression_targets(roidb) add_assign_targets(roidb) add_mask_targets(roidb) for file, obj in zip([roidb_file, mean_file, std_file], [roidb, means, stds]): with open(file, 'w') as f: pkl.dump(obj, f, -1) # load training data train_data = MaskROIIter(roidb, batch_size=input_batch_size, shuffle=not no_shuffle, ctx=ctx, work_load_list=work_load_list, aspect_grouping=config.TRAIN.ASPECT_GROUPING) # infer max shape max_data_shape = [('data', (input_batch_size, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))] max_label_shape = [] for s in config.RCNN_FEAT_STRIDE: max_data_shape.append(('rois_stride%s' % s, (input_batch_size, config.TRAIN.BATCH_ROIS, 5))) max_label_shape.append(('label_stride%s' % s, (input_batch_size, config.TRAIN.BATCH_ROIS))) max_label_shape.append( ('bbox_target_stride%s' % s, (input_batch_size, config.TRAIN.BATCH_ROIS * config.NUM_CLASSES * 4))) max_label_shape.append( ('bbox_weight_stride%s' % s, (input_batch_size, config.TRAIN.BATCH_ROIS * config.NUM_CLASSES * 4))) max_label_shape.append(('mask_target_stride%s' % s, (input_batch_size, config.TRAIN.BATCH_ROIS, config.NUM_CLASSES, 28, 28))) max_label_shape.append(('mask_weight_stride%s' % s, (input_batch_size, config.TRAIN.BATCH_ROIS, config.NUM_CLASSES, 1, 1))) # infer shape data_shape_dict = dict(train_data.provide_data + train_data.provide_label) arg_shape, out_shape, aux_shape = sym.infer_shape(**data_shape_dict) arg_shape_dict = dict(zip(sym.list_arguments(), arg_shape)) out_shape_dict = zip(sym.list_outputs(), out_shape) aux_shape_dict = dict(zip(sym.list_auxiliary_states(), aux_shape)) print('output shape:') pprint.pprint(out_shape_dict) # load and initialize params if resume: arg_params, aux_params = load_param(prefix, begin_epoch, convert=True) else: arg_params, aux_params = load_param(pretrained, epoch, convert=True) init_bbox_pred = mx.init.Normal(sigma=0.001) init_internal = mx.init.Normal(sigma=0.01) init = mx.init.Xavier(factor_type="in", rnd_type='gaussian', magnitude=2) for k in sym.list_arguments(): if k in data_shape_dict: continue if k not in arg_params: print('init', k) arg_params[k] = mx.nd.zeros(shape=arg_shape_dict[k]) init_internal(k, arg_params[k]) if k in ['rcnn_fc_bbox_weight', 'bbox_pred_weight']: init_bbox_pred(k, arg_params[k]) if k.endswith('bias'): arg_params[k] = mx.nd.zeros(shape=arg_shape_dict[k]) if 'ctx_red_weight' in k: ctx_shape = np.array(arg_shape_dict[k]) ctx_shape[1] /= 2 arg_params[k][:] = np.concatenate((np.eye( ctx_shape[1]).reshape(ctx_shape), np.zeros(ctx_shape)), axis=1) for k in sym.list_auxiliary_states(): if k not in aux_params: print('init', k) aux_params[k] = mx.nd.zeros(shape=aux_shape_dict[k]) init(k, aux_params[k]) # check parameter shapes for k in sym.list_arguments(): if k in data_shape_dict: continue assert k in arg_params, k + ' not initialized' assert arg_params[k].shape == arg_shape_dict[k], \ 'shape inconsistent for ' + k + ' inferred ' + str(arg_shape_dict[k]) + ' provided ' + str(arg_params[k].shape) for k in sym.list_auxiliary_states(): assert k in aux_params, k + ' not initialized' assert aux_params[k].shape == aux_shape_dict[k], \ 'shape inconsistent for ' + k + ' inferred ' + str(aux_shape_dict[k]) + ' provided ' + str(aux_params[k].shape) # create solver data_names = [k[0] for k in train_data.provide_data] label_names = [k[0] for k in train_data.provide_label] if train_shared: fixed_param_prefix = config.FIXED_PARAMS_SHARED else: fixed_param_prefix = config.FIXED_PARAMS mod = MutableModule(sym, data_names=data_names, label_names=label_names, logger=logger, context=ctx, work_load_list=work_load_list, max_data_shapes=max_data_shape, max_label_shapes=max_label_shape, fixed_param_prefix=fixed_param_prefix) # decide training params # metric eval_metric = metric.RCNNAccMetric() cls_metric = metric.RCNNLogLossMetric() bbox_metric = metric.RCNNRegLossMetric() mask_acc_metric = metric.MaskAccMetric() mask_log_metric = metric.MaskLogLossMetric() eval_metrics = mx.metric.CompositeEvalMetric() for child_metric in [ eval_metric, cls_metric, bbox_metric, mask_acc_metric, mask_log_metric ]: eval_metrics.add(child_metric) # callback batch_end_callback = callback.Speedometer(train_data.batch_size, frequent=frequent) epoch_end_callback = callback.do_checkpoint(prefix, means, stds) # decide learning rate base_lr = lr lr_factor = 0.1 lr_epoch = [int(epoch) for epoch in lr_step.split(',')] lr_epoch_diff = [ epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch ] lr = base_lr * (lr_factor**(len(lr_epoch) - len(lr_epoch_diff))) lr_iters = [ int(epoch * len(roidb) / batch_size) for epoch in lr_epoch_diff ] print('lr', lr, 'lr_epoch_diff', lr_epoch_diff, 'lr_iters', lr_iters) lr_scheduler = mx.lr_scheduler.MultiFactorScheduler(lr_iters, lr_factor) # optimizer optimizer_params = { 'momentum': 0.9, 'wd': 0.0001, 'learning_rate': lr, 'lr_scheduler': lr_scheduler, 'rescale_grad': (1.0 / batch_size), 'clip_gradient': 5 } # train mod.fit(train_data, eval_metric=eval_metrics, epoch_end_callback=epoch_end_callback, batch_end_callback=batch_end_callback, kvstore=kvstore, optimizer='sgd', optimizer_params=optimizer_params, arg_params=arg_params, aux_params=aux_params, begin_epoch=begin_epoch, num_epoch=end_epoch)