def __init__(self, prefix, epoch, ctx_id=0, test_mode=False): self.ctx_id = ctx_id self.test_mode = test_mode self.fpn_keys = [] fpn_stride = [] fpn_base_size = [] self._feat_stride_fpn = [32, 16, 8] for s in self._feat_stride_fpn: self.fpn_keys.append('stride%s' % s) fpn_stride.append(int(s)) fpn_base_size.append(16) self._scales = np.array([32, 16, 8, 4, 2, 1]) self._ratios = np.array([1.0] * len(self._feat_stride_fpn)) self._anchors_fpn = dict( zip( self.fpn_keys, generate_anchors_fpn(base_size=fpn_base_size, scales=self._scales, ratios=self._ratios))) self._num_anchors = dict( zip(self.fpn_keys, [anchors.shape[0] for anchors in self._anchors_fpn.values()])) self._rpn_pre_nms_top_n = 1000 # self._rpn_post_nms_top_n = rpn_post_nms_top_n # self.score_threshold = 0.05 self.nms_threshold = 0.3 self._bbox_pred = nonlinear_pred sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch) if self.ctx_id >= 0: self.ctx = mx.gpu(self.ctx_id) self.nms = gpu_nms_wrapper(self.nms_threshold, self.ctx_id) else: self.ctx = mx.cpu() self.nms = cpu_nms_wrapper(self.nms_threshold) # self.nms = py_nms_wrapper(self.nms_threshold) # self.pixel_means = np.array([103.939, 116.779, 123.68]) #BGR,VGG16 self.pixel_means = np.array([0.0, 0.0, 0.0]) #BGR,R50 if not self.test_mode: image_size = (640, 640) self.model = mx.mod.Module(symbol=sym, context=self.ctx, label_names=None) self.model.bind(data_shapes=[('data', (1, 3, image_size[0], image_size[1]))], for_training=False) self.model.set_params(arg_params, aux_params) else: from rcnn.core.module import MutableModule image_size = (2400, 2400) data_shape = [('data', (1, 3, image_size[0], image_size[1]))] self.model = MutableModule(symbol=sym, data_names=['data'], label_names=None, context=self.ctx, max_data_shapes=data_shape) self.model.bind(data_shape, None, for_training=False) self.model.set_params(arg_params, aux_params)
def __init__(self, symbol, data_names, label_names, context=mx.cpu(), max_data_shapes=None, provide_data=None, provide_label=None, arg_params=None, aux_params=None): self._mod = MutableModule(symbol, data_names, label_names, context=context, max_data_shapes=max_data_shapes) self._mod.bind(provide_data, provide_label, for_training=False) self._mod.init_params(arg_params=arg_params, aux_params=aux_params)
def __init__(self, gpu=0, test_mode=False): self.ctx_id = gpu self.ctx = mx.gpu(self.ctx_id) self.fpn_keys = [] fpn_stride = [] fpn_base_size = [] self._feat_stride_fpn = [32, 16, 8] for s in self._feat_stride_fpn: self.fpn_keys.append('stride%s' % s) fpn_stride.append(int(s)) fpn_base_size.append(16) self._scales = np.array([32, 16, 8, 4, 2, 1]) self._ratios = np.array([1.0] * len(self._feat_stride_fpn)) self._anchors_fpn = dict( zip( self.fpn_keys, generate_anchors_fpn(base_size=fpn_base_size, scales=self._scales, ratios=self._ratios))) self._num_anchors = dict( zip(self.fpn_keys, [anchors.shape[0] for anchors in self._anchors_fpn.values()])) self._rpn_pre_nms_top_n = 1000 # self._rpn_post_nms_top_n = rpn_post_nms_top_n # self.score_threshold = 0.05 self.nms_threshold = config.TEST.NMS self._bbox_pred = nonlinear_pred base_path = os.path.dirname(__file__) sym, arg_params, aux_params = mx.model.load_checkpoint( base_path + '/model/model-v2.0/e2e', 1) self.nms = gpu_nms_wrapper(self.nms_threshold, self.ctx_id) self.pixel_means = np.array([103.939, 116.779, 123.68]) # BGR if not test_mode: image_size = (640, 640) self.model = mx.mod.Module(symbol=sym, context=self.ctx, label_names=None) self.model.bind(data_shapes=[('data', (1, 3, image_size[0], image_size[1]))], for_training=False) self.model.set_params(arg_params, aux_params) else: from rcnn.core.module import MutableModule image_size = (640, 640) data_shape = [('data', (1, 3, image_size[0], image_size[1]))] self.model = MutableModule(symbol=sym, data_names=['data'], label_names=None, context=self.ctx, max_data_shapes=data_shape) self.model.bind(data_shape, None, for_training=False) self.model.set_params(arg_params, aux_params) print('init ssh success')
def __init__(self, prefix, epoch, ctx_id=1, test_mode=False): self.ctx_id = ctx_id self.ctx = mx.gpu(self.ctx_id) self.keys = [] strides = [] base_size = [] scales = [] self.feat_strides = config.RPN_FEAT_STRIDE for s in self.feat_strides: self.keys.append('stride%s' % s) strides.append(int(s)) base_size.append(config.RPN_ANCHOR_CFG[str(s)]['BASE_SIZE']) scales += config.RPN_ANCHOR_CFG[str(s)]['SCALES'] # self._scales = np.array([32, 16, 8, 4, 2, 1]) self._scales = np.array(scales) self._ratios = np.array([1.0]*len(self.feat_strides)) # self._anchors_fpn = dict(list(zip(self.keys, generate_anchors_fpn()))) self._anchors_fpn = dict(list(zip(self.keys, generate_anchors_fpn(base_size=base_size, scales=self._scales, ratios=self._ratios)))) self._num_anchors = dict(zip(self.keys, [anchors.shape[0] for anchors in self._anchors_fpn.values()])) self._rpn_pre_nms_top_n = 1000 #self._rpn_post_nms_top_n = rpn_post_nms_top_n self.nms_threshold = config.test_nms_threshold #值越大,同一个人脸产生的预测框越多 self._bbox_pred = nonlinear_pred sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch) self.nms = gpu_nms_wrapper(self.nms_threshold, self.ctx_id) self.pixel_means = np.array([103.939, 116.779, 123.68]) #BGR self.pixel_means = np.array([127., 127., 127.]) if not test_mode: image_size = (640, 640) self.model = mx.mod.Module(symbol=sym, context=self.ctx, label_names=None) self.model.bind(data_shapes=[('data', (1, 3, image_size[0], image_size[1]))], for_training=False) self.model.set_params(arg_params, aux_params) else: from rcnn.core.module import MutableModule image_size = (2400, 2400) data_shape = [('data', (1, 3, image_size[0], image_size[1]))] self.model = MutableModule(symbol=sym, data_names=['data'], label_names=None, context=self.ctx, max_data_shapes=data_shape) self.model.bind(data_shape, None, for_training=False) self.model.set_params(arg_params, aux_params)
def __init__(self, network, prefix, epoch, ctx_id=0, mask_nms=True): self.ctx_id = ctx_id self.ctx = mx.gpu(self.ctx_id) self.mask_nms = mask_nms #self.nms_threshold = 0.3 #self._bbox_pred = nonlinear_pred if not self.mask_nms: self.nms = gpu_nms_wrapper(config.TEST.NMS, self.ctx_id) else: self.nms = gpu_nms_wrapper(config.TEST.RPN_NMS_THRESH, self.ctx_id) #self.nms = py_nms_wrapper(config.TEST.NMS) sym = eval('get_' + network + '_mask_test')( num_classes=config.NUM_CLASSES, num_anchors=config.NUM_ANCHORS) #arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch) arg_params, aux_params = load_param(prefix, epoch, convert=True, ctx=self.ctx, process=True) split = False max_image_shape = (1, 3, 1600, 1600) #max_image_shape = (1,3,1200,2200) max_data_shapes = [("data", max_image_shape), ("im_info", (1, 3))] mod = MutableModule(symbol=sym, data_names=["data", "im_info"], label_names=None, max_data_shapes=max_data_shapes, context=self.ctx) mod.bind(data_shapes=max_data_shapes, label_shapes=None, for_training=False) mod.init_params(arg_params=arg_params, aux_params=aux_params) self.model = mod pass
class Predictor(object): def __init__(self, symbol, data_names, label_names, context=mx.cpu(), max_data_shapes=None, provide_data=None, provide_label=None, arg_params=None, aux_params=None): self._mod = MutableModule(symbol, data_names, label_names, context=context, max_data_shapes=max_data_shapes) self._mod.bind(provide_data, provide_label, for_training=False) self._mod.init_params(arg_params=arg_params, aux_params=aux_params) def predict(self, data_batch): self._mod.forward(data_batch) return dict(zip(self._mod.output_names, self._mod.get_outputs()))
def __init__(self, prefix, epoch, ctx_id=0, test_mode=False): self.ctx_id = ctx_id self.ctx = mx.gpu(self.ctx_id) self.fpn_keys = [] fpn_stride = [] fpn_base_size = [] self._feat_stride_fpn = [32, 16, 8] for s in self._feat_stride_fpn: self.fpn_keys.append('stride%s'%s) fpn_stride.append(int(s)) fpn_base_size.append(16) self._scales = np.array([32,16,8,4,2,1]) self._ratios = np.array([1.0]*len(self._feat_stride_fpn)) self._anchors_fpn = dict(zip(self.fpn_keys, generate_anchors_fpn(base_size=fpn_base_size, scales=self._scales, ratios=self._ratios))) self._num_anchors = dict(zip(self.fpn_keys, [anchors.shape[0] for anchors in self._anchors_fpn.values()])) self._rpn_pre_nms_top_n = 1000 #self._rpn_post_nms_top_n = rpn_post_nms_top_n #self.score_threshold = 0.05 self.nms_threshold = 0.3 self._bbox_pred = nonlinear_pred sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch) self.nms = gpu_nms_wrapper(self.nms_threshold, self.ctx_id) self.pixel_means = np.array([103.939, 116.779, 123.68]) #BGR if not test_mode: image_size = (640, 640) self.model = mx.mod.Module(symbol=sym, context=self.ctx, label_names = None) self.model.bind(data_shapes=[('data', (1, 3, image_size[0], image_size[1]))], for_training=False) self.model.set_params(arg_params, aux_params) else: from rcnn.core.module import MutableModule image_size = (2400, 2400) data_shape = [('data', (1,3,image_size[0], image_size[1]))] self.model = MutableModule(symbol=sym, data_names=['data'], label_names=None, context=self.ctx, max_data_shapes=data_shape) self.model.bind(data_shape, None, for_training=False) self.model.set_params(arg_params, aux_params)
def train_net(args, ctx, pretrained, epoch, prefix, begin_epoch, end_epoch, lr=0.001, lr_step='5'): # set up logger logging.basicConfig() logger = logging.getLogger() logger.setLevel(logging.INFO) # setup config config.TRAIN.BATCH_IMAGES = 1 config.TRAIN.BATCH_ROIS = 128 config.TRAIN.END2END = True config.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED = True # load symbol if args.use_global_context or args.use_roi_align: sym = eval('get_' + args.network + '_train')( num_classes=config.NUM_CLASSES, num_anchors=config.NUM_ANCHORS, use_global_context=args.use_global_context, use_roi_align=args.use_roi_align) else: sym = eval('get_' + args.network + '_train')( num_classes=config.NUM_CLASSES, num_anchors=config.NUM_ANCHORS) feat_sym = sym.get_internals()['rpn_cls_score_output'] # setup multi-gpu batch_size = len(ctx) input_batch_size = config.TRAIN.BATCH_IMAGES * batch_size # print config pprint.pprint(config) if not args.use_ava_recordio: # load dataset and prepare imdb for training image_sets = [iset for iset in args.image_set.split('+')] roidbs = [ load_gt_roidb(args.dataset, image_set, args.root_path, args.dataset_path, flip=not args.no_flip) for image_set in image_sets ] roidb = merge_roidb(roidbs) roidb = filter_roidb(roidb) # load training data train_data = AnchorLoader( feat_sym, roidb, batch_size=input_batch_size, shuffle=not args.no_shuffle, ctx=ctx, work_load_list=args.work_load_list, feat_stride=config.RPN_FEAT_STRIDE, anchor_scales=config.ANCHOR_SCALES, anchor_ratios=config.ANCHOR_RATIOS, aspect_grouping=config.TRAIN.ASPECT_GROUPING, use_data_augmentation=args.use_data_augmentation) else: f = open(args.classes_names) classes = ['__background__'] for line in f.readlines(): classes.append(line.strip().split(' ')[0]) path_imgidx = args.ava_recordio_name + '.idx' path_imgrec = args.ava_recordio_name + '.rec' record = mx.recordio.MXIndexedRecordIO(path_imgidx, path_imgrec, 'r') # pylint: disable=redefined-variable-type train_data = AnchorLoaderAvaRecordIO( feat_sym, record, classes, batch_size=input_batch_size, shuffle=not args.no_shuffle, ctx=ctx, work_load_list=args.work_load_list, feat_stride=config.RPN_FEAT_STRIDE, anchor_scales=config.ANCHOR_SCALES, anchor_ratios=config.ANCHOR_RATIOS, aspect_grouping=config.TRAIN.ASPECT_GROUPING, use_data_augmentation=args.use_data_augmentation) # infer max shape max_data_shape = [('data', (input_batch_size, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))] max_data_shape, max_label_shape = train_data.infer_shape(max_data_shape) max_data_shape.append(('gt_boxes', (input_batch_size, 100, 5))) print('providing maximum shape', max_data_shape, max_label_shape) # infer shape data_shape_dict = dict(train_data.provide_data + train_data.provide_label) arg_shape, out_shape, aux_shape = sym.infer_shape(**data_shape_dict) arg_shape_dict = dict(zip(sym.list_arguments(), arg_shape)) out_shape_dict = dict(zip(sym.list_outputs(), out_shape)) aux_shape_dict = dict(zip(sym.list_auxiliary_states(), aux_shape)) print('output shape') pprint.pprint(out_shape_dict) print('arg shape') # pprint.pprint(arg_shape_dict) # load and initialize params if args.resume: arg_params, aux_params = load_param(prefix, begin_epoch, convert=True) else: arg_params, aux_params = load_param(pretrained, epoch, convert=True) arg_params['rpn_conv_3x3_weight'] = mx.random.normal( 0, 0.01, shape=arg_shape_dict['rpn_conv_3x3_weight']) arg_params['rpn_conv_3x3_bias'] = mx.nd.zeros( shape=arg_shape_dict['rpn_conv_3x3_bias']) arg_params['rpn_cls_score_weight'] = mx.random.normal( 0, 0.01, shape=arg_shape_dict['rpn_cls_score_weight']) arg_params['rpn_cls_score_bias'] = mx.nd.zeros( shape=arg_shape_dict['rpn_cls_score_bias']) arg_params['rpn_bbox_pred_weight'] = mx.random.normal( 0, 0.01, shape=arg_shape_dict['rpn_bbox_pred_weight']) arg_params['rpn_bbox_pred_bias'] = mx.nd.zeros( shape=arg_shape_dict['rpn_bbox_pred_bias']) arg_params['cls_score_weight'] = mx.random.normal( 0, 0.01, shape=arg_shape_dict['cls_score_weight']) arg_params['cls_score_bias'] = mx.nd.zeros( shape=arg_shape_dict['cls_score_bias']) arg_params['bbox_pred_weight'] = mx.random.normal( 0, 0.001, shape=arg_shape_dict['bbox_pred_weight']) arg_params['bbox_pred_bias'] = mx.nd.zeros( shape=arg_shape_dict['bbox_pred_bias']) if args.use_global_context: # additional params for using global context """ for arg_param_name in sym.list_arguments(): if 'stage5' in arg_param_name: # print(arg_param_name, arg_param_name.replace('stage5', 'stage4')) arg_params[arg_param_name] = arg_params[arg_param_name.replace('stage5', 'stage4')].copy() # params of stage5 is initialized from stage4 arg_params['bn2_gamma'] = arg_params['bn1_gamma'].copy() arg_params['bn2_beta'] = arg_params['bn1_beta'].copy() """ for aux_param_name in sym.list_auxiliary_states(): if 'stage5' in aux_param_name: # print(aux_param_name, aux_param_name.replace('stage5', 'stage4')) aux_params[aux_param_name] = aux_params[aux_param_name.replace( 'stage5', 'stage4')].copy( ) # params of stage5 is initialized from stage4 aux_params['bn2_moving_mean'] = aux_params['bn1_moving_mean'].copy() aux_params['bn2_moving_var'] = aux_params['bn1_moving_var'].copy() # check parameter shapes for k in sym.list_arguments(): if k in data_shape_dict: continue assert k in arg_params, k + ' not initialized' assert arg_params[k].shape == arg_shape_dict[k], \ 'shape inconsistent for ' + k + ' inferred ' + str(arg_shape_dict[k]) + ' provided ' + str(arg_params[k].shape) for k in sym.list_auxiliary_states(): assert k in aux_params, k + ' not initialized' assert aux_params[k].shape == aux_shape_dict[k], \ 'shape inconsistent for ' + k + ' inferred ' + str(aux_shape_dict[k]) + ' provided ' + str(aux_params[k].shape) # create solver fixed_param_prefix = config.FIXED_PARAMS data_names = [k[0] for k in train_data.provide_data] label_names = [k[0] for k in train_data.provide_label] mod = MutableModule(sym, data_names=data_names, label_names=label_names, logger=logger, context=ctx, work_load_list=args.work_load_list, max_data_shapes=max_data_shape, max_label_shapes=max_label_shape, fixed_param_prefix=fixed_param_prefix) # decide training params # metric rpn_eval_metric = metric.RPNAccMetric() rpn_cls_metric = metric.RPNLogLossMetric() rpn_bbox_metric = metric.RPNL1LossMetric() eval_metric = metric.RCNNAccMetric() cls_metric = metric.RCNNLogLossMetric() bbox_metric = metric.RCNNL1LossMetric() eval_metrics = mx.metric.CompositeEvalMetric() for child_metric in [ rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric, cls_metric, bbox_metric ]: eval_metrics.add(child_metric) # callback batch_end_callback = callback.Speedometer(train_data.batch_size, frequent=args.frequent) means = np.tile(np.array(config.TRAIN.BBOX_MEANS), config.NUM_CLASSES) stds = np.tile(np.array(config.TRAIN.BBOX_STDS), config.NUM_CLASSES) epoch_end_callback = callback.do_checkpoint(prefix, means, stds) # decide learning rate base_lr = lr lr_factor = 0.1 lr_epoch = [int(epoch) for epoch in lr_step.split(',')] lr_epoch_diff = [ epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch ] lr = base_lr * (lr_factor**(len(lr_epoch) - len(lr_epoch_diff))) if not args.use_ava_recordio: lr_iters = [ int(epoch * len(roidb) / batch_size) for epoch in lr_epoch_diff ] else: lr_iters = [ int(epoch * train_data.provide_size() / batch_size) for epoch in lr_epoch_diff ] print('lr', lr, 'lr_epoch_diff', lr_epoch_diff, 'lr_iters', lr_iters) lr_scheduler = mx.lr_scheduler.MultiFactorScheduler(lr_iters, lr_factor) # optimizer optimizer_params = { 'momentum': 0.9, 'wd': 0.0005, 'learning_rate': lr, 'lr_scheduler': lr_scheduler, 'rescale_grad': (1.0 / batch_size), 'clip_gradient': 5 } # train mod.fit(train_data, eval_metric=eval_metrics, epoch_end_callback=epoch_end_callback, batch_end_callback=batch_end_callback, kvstore=args.kvstore, optimizer='sgd', optimizer_params=optimizer_params, arg_params=arg_params, aux_params=aux_params, begin_epoch=begin_epoch, num_epoch=end_epoch)
class SSHDetector: def __init__(self, prefix, epoch, ctx_id=0, test_mode=False): self.ctx_id = -1 if ctx_id >= 0: self.ctx = mx.gpu(ctx_id) else: self.ctx = mx.cpu() self.fpn_keys = [] fpn_stride = [] fpn_base_size = [] self._feat_stride_fpn = [32, 16, 8] for s in self._feat_stride_fpn: self.fpn_keys.append('stride%s' % s) fpn_stride.append(int(s)) fpn_base_size.append(16) self._scales = np.array([32, 16, 8, 4, 2, 1]) self._ratios = np.array([1.0] * len(self._feat_stride_fpn)) # self._anchors_fpn = dict(zip(self.fpn_keys, generate_anchors_fpn(base_size=fpn_base_size, scales=self._scales, ratios=self._ratios))) self._anchors_fpn = dict(zip(self.fpn_keys, generate_anchors_fpn())) self._num_anchors = dict( zip(self.fpn_keys, [anchors.shape[0] for anchors in self._anchors_fpn.values()])) self._rpn_pre_nms_top_n = 1000 # self._rpn_post_nms_top_n = rpn_post_nms_top_n # self.score_threshold = 0.05 self.nms_threshold = 0.3 self._bbox_pred = nonlinear_pred sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch) self.nms = gpu_nms_wrapper(self.nms_threshold, self.ctx_id) self.pixel_means = np.array([103.939, 116.779, 123.68]) # BGR self.pixel_means = config.PIXEL_MEANS print('means', self.pixel_means) if not test_mode: image_size = (640, 640) self.model = mx.mod.Module(symbol=sym, context=self.ctx, label_names=None) self.model.bind(data_shapes=[('data', (1, 3, image_size[0], image_size[1]))], for_training=False) self.model.set_params(arg_params, aux_params) else: from rcnn.core.module import MutableModule image_size = (2400, 2400) data_shape = [('data', (1, 3, image_size[0], image_size[1]))] self.model = MutableModule(symbol=sym, data_names=['data'], label_names=None, context=self.ctx, max_data_shapes=data_shape) self.model.bind(data_shape, None, for_training=False) self.model.set_params(arg_params, aux_params) def detect(self, img, threshold=0.05, scales=[1.0]): proposals_list = [] scores_list = [] for im_scale in scales: if im_scale != 1.0: im = cv2.resize(img, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) else: im = img im = im.astype(np.float32) # self.model.bind(data_shapes=[('data', (1, 3, image_size[0], image_size[1]))], for_training=False) im_info = [im.shape[0], im.shape[1], im_scale] im_tensor = np.zeros((1, 3, im.shape[0], im.shape[1])) for i in range(3): im_tensor[0, i, :, :] = im[:, :, 2 - i] - self.pixel_means[2 - i] data = nd.array(im_tensor) db = mx.io.DataBatch(data=(data, ), provide_data=[('data', data.shape)]) self.model.forward(db, is_train=False) net_out = self.model.get_outputs() pre_nms_topN = self._rpn_pre_nms_top_n # post_nms_topN = self._rpn_post_nms_top_n # min_size_dict = self._rpn_min_size_fpn for s in self._feat_stride_fpn: if len(scales) > 1 and s == 32 and im_scale == scales[-1]: continue _key = 'stride%s' % s stride = int(s) idx = 0 if s == 16: idx = 2 elif s == 8: idx = 4 print('getting', im_scale, stride, idx, len(net_out), data.shape, file=sys.stderr) scores = net_out[idx].asnumpy() # print(scores.shape) idx += 1 # print('scores',stride, scores.shape, file=sys.stderr) scores = scores[:, self._num_anchors['stride%s' % s]:, :, :] bbox_deltas = net_out[idx].asnumpy() # if DEBUG: # print 'im_size: ({}, {})'.format(im_info[0], im_info[1]) # print 'scale: {}'.format(im_info[2]) _height, _width = int(im_info[0] / stride), int(im_info[1] / stride) height, width = bbox_deltas.shape[2], bbox_deltas.shape[3] A = self._num_anchors['stride%s' % s] K = height * width anchors = anchors_plane( height, width, stride, self._anchors_fpn['stride%s' % s].astype(np.float32)) # print((height, width), (_height, _width), anchors.shape, bbox_deltas.shape, scores.shape, file=sys.stderr) anchors = anchors.reshape((K * A, 4)) # print('pre', bbox_deltas.shape, height, width) bbox_deltas = self._clip_pad(bbox_deltas, (height, width)) # print('after', bbox_deltas.shape, height, width) bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape( (-1, 4)) scores = self._clip_pad(scores, (height, width)) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) # print(anchors.shape, bbox_deltas.shape, A, K, file=sys.stderr) proposals = self._bbox_pred(anchors, bbox_deltas) # proposals = anchors proposals = clip_boxes(proposals, im_info[:2]) # keep = self._filter_boxes(proposals, min_size_dict['stride%s'%s] * im_info[2]) # proposals = proposals[keep, :] # scores = scores[keep] # print('333', proposals.shape) scores_ravel = scores.ravel() order = scores_ravel.argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] proposals /= im_scale proposals_list.append(proposals) scores_list.append(scores) proposals = np.vstack(proposals_list) scores = np.vstack(scores_list) scores_ravel = scores.ravel() order = scores_ravel.argsort()[::-1] # if config.TEST.SCORE_THRESH>0.0: # _count = np.sum(scores_ravel>config.TEST.SCORE_THRESH) # order = order[:_count] # if pre_nms_topN > 0: # order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] det = np.hstack((proposals, scores)).astype(np.float32) # if np.shape(det)[0] == 0: # print("Something wrong with the input image(resolution is too low?), generate fake proposals for it.") # proposals = np.array([[1.0, 1.0, 2.0, 2.0]]*post_nms_topN, dtype=np.float32) # scores = np.array([[0.9]]*post_nms_topN, dtype=np.float32) # det = np.array([[1.0, 1.0, 2.0, 2.0, 0.9]]*post_nms_topN, dtype=np.float32) if self.nms_threshold < 1.0: keep = self.nms(det) det = det[keep, :] if threshold > 0.0: keep = np.where(det[:, 4] >= threshold)[0] det = det[keep, :] return det @staticmethod def _filter_boxes(boxes, min_size): """ Remove all boxes with any side smaller than min_size """ ws = boxes[:, 2] - boxes[:, 0] + 1 hs = boxes[:, 3] - boxes[:, 1] + 1 keep = np.where((ws >= min_size) & (hs >= min_size))[0] return keep @staticmethod def _clip_pad(tensor, pad_shape): """ Clip boxes of the pad area. :param tensor: [n, c, H, W] :param pad_shape: [h, w] :return: [n, c, h, w] """ H, W = tensor.shape[2:] h, w = pad_shape if h < H or w < W: tensor = tensor[:, :, :h, :w].copy() return tensor
def train_net(args, ctx, pretrained, epoch, prefix, begin_epoch, end_epoch, lr=0.001, lr_step='5'): # set up logger logging.basicConfig() logger = logging.getLogger() logger.setLevel(logging.INFO) # setup config config.TRAIN.BATCH_IMAGES = 1 config.TRAIN.BATCH_ROIS = 128 config.TRAIN.END2END = True config.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED = True # load symbol sym_instance = eval('symbol_' + args.network)() sym_gen = sym_instance.get_symbol sym = sym_gen(46,config,is_train=True) feat_sym = sym.get_internals()['rpn_cls_score_output'] # setup multi-gpu batch_size = len(ctx) input_batch_size = config.TRAIN.BATCH_IMAGES * batch_size # print config pprint.pprint(config) # load dataset and prepare imdb for training dataset = Dataset(args.root_path,args.dataset,args.subset,split = args.split) roidb = dataset.gt_roidb() W = dataset.W # load training data train_data = AnchorLoader(feat_sym, roidb, batch_size=input_batch_size, shuffle=not args.no_shuffle, ctx=ctx, work_load_list=args.work_load_list, feat_stride=config.RPN_FEAT_STRIDE, anchor_scales=config.ANCHOR_SCALES, anchor_ratios=config.ANCHOR_RATIOS) # infer max shape max_data_shape = [('data', (input_batch_size, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))] max_data_shape, max_label_shape = train_data.infer_shape(max_data_shape) max_data_shape.append(('gt_boxes', (input_batch_size, 100, 5))) print('providing maximum shape', max_data_shape, max_label_shape) # infer shape #get a new symbol bucket_key = train_data.bucket_key print(train_data.provide_data) data_shape_dict = dict(train_data.provide_data + train_data.provide_label) sym_instance.infer_shape(data_shape_dict) #arg_shape, out_shape, aux_shape = curr_sym.infer_shape(**data_shape_dict) #arg_shape_dict = dict(zip(curr_sym.list_arguments(), arg_shape)) #out_shape_dict = dict(zip(curr_sym.list_outputs(), out_shape)) #aux_shape_dict = dict(zip(curr_sym.list_auxiliary_states(), aux_shape)) #del arg_shape_dict['lstm_parameters'] #print(curr_sym.list_arguments()) #print(aux_shape_dict) # load and initialize params if args.resume: print("continue training from epoch {}".format(begin_epoch)) arg_params, aux_params = load_param(prefix, begin_epoch, convert=True) else: arg_params, aux_params = load_param(pretrained, epoch, convert=True) if config.RNN.USE_W2V: arg_params['embed_weight'] = mx.nd.array(W) else: arg_params['embed_weight'] = mx.random.uniform(0,0.01,shape=arg_shape_dict['embed_weight']) sym_instance.init_weight(config,arg_params,aux_params) #no checking #for k in arg_shape_dict.iterkeys(): # if k in data_shape_dict: # continue # assert k in arg_params, k + ' not initialized' #assert arg_params[k].shape == arg_shape_dict[k], \ # 'shape inconsistent for ' + k + ' inferred ' + str(arg_shape_dict[k]) + ' provided ' + str(arg_params[k].shape) #for k in sym.list_auxiliary_states(): # assert k in aux_params, k + ' not initialized' # assert aux_params[k].shape == aux_shape_dict[k], \ # 'shape inconsistent for ' + k + ' inferred ' + str(aux_shape_dict[k]) + ' provided ' + str(aux_params[k].shape) # create solver fixed_param_prefix = config.FIXED_PARAMS data_names = [k[0] for k in train_data.provide_data] label_names = [k[0] for k in train_data.provide_label] mod = MutableModule(sym_gen,config, data_names=data_names, label_names=label_names, logger=logger, context=ctx, work_load_list=args.work_load_list, max_data_shapes=max_data_shape, max_label_shapes=max_label_shape, fixed_param_prefix=fixed_param_prefix) # decide training params # metric rpn_eval_metric = metric.RPNAccMetric() rpn_cls_metric = metric.RPNLogLossMetric() rpn_bbox_metric = metric.RPNL1LossMetric() eval_metric = metric.RCNNAccMetric() cls_metric = metric.RCNNLogLossMetric() bbox_metric = metric.RCNNL1LossMetric() eval_metrics = mx.metric.CompositeEvalMetric() for child_metric in [rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric, cls_metric, bbox_metric]: eval_metrics.add(child_metric) # callback batch_end_callback = callback.Speedometer(train_data.batch_size, frequent=args.frequent) means = np.tile(np.array(config.TRAIN.BBOX_MEANS), config.NUM_CLASSES) stds = np.tile(np.array(config.TRAIN.BBOX_STDS), config.NUM_CLASSES) epoch_end_callback = callback.do_checkpoint(config.ENCODER_CELL,prefix, means, stds) # decide learning rate base_lr = lr lr_factor = 0.1 lr_epoch = [int(epoch) for epoch in lr_step.split(',')] lr_epoch_diff = [epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch] lr = base_lr * (lr_factor ** (len(lr_epoch) - len(lr_epoch_diff))) lr_iters = [int(epoch * len(roidb) / batch_size) for epoch in lr_epoch_diff] print('lr', lr, 'lr_epoch_diff', lr_epoch_diff, 'lr_iters', lr_iters) lr_scheduler = mx.lr_scheduler.MultiFactorScheduler(lr_iters, lr_factor) # optimizer optimizer_params = {'momentum': 0.9, 'wd': 0.0005, 'learning_rate': lr, 'lr_scheduler': lr_scheduler, 'rescale_grad': (1.0 / batch_size), 'clip_gradient': 5} #initializer for fused RNN #TODO:not successfully added,try ask it on github issues. initializer = mx.initializer.FusedRNN(init=mx.init.Xavier(factor_type='in', magnitude=2.34), num_hidden = 1024,num_layers=2,mode='lstm') # train mod.fit(train_data, eval_metric=eval_metrics, epoch_end_callback=epoch_end_callback, batch_end_callback=batch_end_callback, kvstore=args.kvstore, optimizer='sgd', optimizer_params=optimizer_params,allow_missing=True,initializer=mx.init.Xavier(factor_type='in', magnitude=2.34), arg_params=arg_params, aux_params=aux_params, begin_epoch=begin_epoch, num_epoch=end_epoch)
class SSHDetector: def __init__(self, gpu=0, test_mode=False): self.ctx_id = gpu self.ctx = mx.gpu(self.ctx_id) self.fpn_keys = [] fpn_stride = [] fpn_base_size = [] self._feat_stride_fpn = [32, 16, 8] for s in self._feat_stride_fpn: self.fpn_keys.append('stride%s' % s) fpn_stride.append(int(s)) fpn_base_size.append(16) self._scales = np.array([32, 16, 8, 4, 2, 1]) self._ratios = np.array([1.0] * len(self._feat_stride_fpn)) self._anchors_fpn = dict( zip(self.fpn_keys, generate_anchors_fpn(base_size=fpn_base_size, scales=self._scales, ratios=self._ratios))) self._num_anchors = dict(zip(self.fpn_keys, [anchors.shape[0] for anchors in self._anchors_fpn.values()])) self._rpn_pre_nms_top_n = 1000 # self._rpn_post_nms_top_n = rpn_post_nms_top_n # self.score_threshold = 0.05 self.nms_threshold = config.TEST.NMS self._bbox_pred = nonlinear_pred base_path = os.path.dirname(__file__) sym, arg_params, aux_params = mx.model.load_checkpoint(base_path + '/model/e2e', 0) self.nms = gpu_nms_wrapper(self.nms_threshold, self.ctx_id) self.pixel_means = np.array([103.939, 116.779, 123.68]) # BGR if not test_mode: image_size = (640, 640) self.model = mx.mod.Module(symbol=sym, context=self.ctx, label_names=None) self.model.bind(data_shapes=[('data', (1, 3, image_size[0], image_size[1]))], for_training=False) self.model.set_params(arg_params, aux_params) else: from rcnn.core.module import MutableModule image_size = (640, 640) data_shape = [('data', (1, 3, image_size[0], image_size[1]))] self.model = MutableModule(symbol=sym, data_names=['data'], label_names=None, context=self.ctx, max_data_shapes=data_shape) self.model.bind(data_shape, None, for_training=False) self.model.set_params(arg_params, aux_params) print('init ssh success') def get_boxes(self, im, scales_index): # prevent bigger axis from being more than max_size: if scales_index == 0: scales = [1.0] # 使用原始图片 elif scales_index == 1: scales = config.TEST.PYRAMID_SCALES # 切换到pyramidbox的scale方法 else: im_shape = im.shape target_h = 800. target_w = 1200. src_h = im_shape[0] + 20 * 2 src_w = im_shape[1] + 20 * 2 im_scale = min(target_w / src_w, target_h / src_h) if im_shape[0] < 800 and im_shape[1] < 1200: scales = [1.0] else: scales = [im_scale] return scales def detect(self, img, scales_index=0): proposals_list = [] scores_list = [] im_src = img.copy() CONSTANT = config.TEST.CONSTANT BLACK = [0, 0, 0] img = cv2.copyMakeBorder(img, CONSTANT, CONSTANT, CONSTANT, CONSTANT, cv2.BORDER_CONSTANT, value=BLACK) scales = self.get_boxes(img, scales_index) for im_scale in scales: if im_scale != 1.0: im = cv2.resize(img, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) else: im = img im = im.astype(np.float32) # self.model.bind(data_shapes=[('data', (1, 3, image_size[0], image_size[1]))], for_training=False) im_info = [im.shape[0], im.shape[1], im_scale] im_tensor = np.zeros((1, 3, im.shape[0], im.shape[1])) for i in range(3): im_tensor[0, i, :, :] = im[:, :, 2 - i] - self.pixel_means[2 - i] data = nd.array(im_tensor) db = mx.io.DataBatch(data=(data,), provide_data=[('data', data.shape)]) self.model.forward(db, is_train=False) net_out = self.model.get_outputs() pre_nms_topN = self._rpn_pre_nms_top_n for s in self._feat_stride_fpn: if len(scales) > 1 and s == 32 and im_scale == scales[-1]: continue _key = 'stride%s' % s stride = int(s) idx = 0 if s == 16: idx = 2 elif s == 8: idx = 4 # print('getting', im_scale, stride, idx, len(net_out), data.shape, file=sys.stderr) scores = net_out[idx].asnumpy() # print(scores.shape) idx += 1 # print('scores',stride, scores.shape, file=sys.stderr) scores = scores[:, self._num_anchors['stride%s' % s]:, :, :] bbox_deltas = net_out[idx].asnumpy() _height, _width = int(im_info[0] / stride), int(im_info[1] / stride) height, width = bbox_deltas.shape[2], bbox_deltas.shape[3] A = self._num_anchors['stride%s' % s] K = height * width anchors = anchors_plane(height, width, stride, self._anchors_fpn['stride%s' % s].astype(np.float32)) # print((height, width), (_height, _width), anchors.shape, bbox_deltas.shape, scores.shape, file=sys.stderr) anchors = anchors.reshape((K * A, 4)) # print('pre', bbox_deltas.shape, height, width) bbox_deltas = self._clip_pad(bbox_deltas, (height, width)) # print('after', bbox_deltas.shape, height, width) bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) scores = self._clip_pad(scores, (height, width)) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) # print(anchors.shape, bbox_deltas.shape, A, K, file=sys.stderr) proposals = self._bbox_pred(anchors, bbox_deltas) # proposals = anchors proposals = clip_boxes(proposals, im_info[:2]) scores_ravel = scores.ravel() order = scores_ravel.argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] proposals /= im_scale # #add by sai with pyramidbox to filt scale face # if im_scale > 1: # index = np.where( # np.minimum(proposals[:, 2] - proposals[:, 0] + 1, # proposals[:, 3] - proposals[:, 1] + 1) < 50)[0] # proposals = proposals[index, :] # scores = scores[index, :] # else: # index = np.where( # np.maximum(proposals[:, 2] - proposals[:, 0] + 1, # proposals[:, 3] - proposals[:, 1] + 1) > 20)[0] # proposals = proposals[index, :] # scores = scores[index, :] proposals_list.append(proposals) scores_list.append(scores) proposals = np.vstack(proposals_list) scores = np.vstack(scores_list) scores_ravel = scores.ravel() order = scores_ravel.argsort()[::-1] proposals = proposals[order, :] scores = scores[order] det = np.hstack((proposals, scores)).astype(np.float32) if self.nms_threshold < 1.0: keep = self.nms(det) det = det[keep, :] threshold = config.TEST.SCORE_THRESH if threshold > 0.0: keep = np.where(det[:, 4] >= threshold)[0] det = det[keep, :] # add by sai if det.shape[0] != 0: for i in range(det.shape[0]): det[i, :][0] = det[i, :][0] - CONSTANT det[i, :][1] = det[i, :][1] - CONSTANT det[i, :][2] = det[i, :][2] - CONSTANT det[i, :][3] = det[i, :][3] - CONSTANT if det[i, :][0] < 0: det[i, :][0] = 0 if det[i, :][2] > im_src.shape[1]: det[i, :][2] = im_src.shape[1] if det[i, :][1] < 0: det[i, :][1] = 0 if det[i, :][3] > im_src.shape[0]: det[i, :][3] = im_src.shape[0] return det @staticmethod def _filter_boxes(boxes, min_size): """ Remove all boxes with any side smaller than min_size """ ws = boxes[:, 2] - boxes[:, 0] + 1 hs = boxes[:, 3] - boxes[:, 1] + 1 keep = np.where((ws >= min_size) & (hs >= min_size))[0] return keep @staticmethod def _clip_pad(tensor, pad_shape): """ Clip boxes of the pad area. :param tensor: [n, c, H, W] :param pad_shape: [h, w] :return: [n, c, h, w] """ H, W = tensor.shape[2:] h, w = pad_shape if h < H or w < W: tensor = tensor[:, :, :h, :w].copy() return tensor
def train_net(args): if args.rand_seed > 0: np.random.seed(args.rand_seed) mx.random.seed(args.rand_seed) random.seed(args.rand_seed) # print config logger.info(pprint.pformat(config)) logger.info(pprint.pformat(args)) # load dataset and prepare imdb for training image_sets = [iset for iset in args.image_set.split('+')] roidbs = [load_gt_roidb(args.dataset, image_set, args.dataset_path, flip=args.flip) for image_set in image_sets] roidb = merge_roidb(roidbs) roidb = filter_roidb(roidb) samplepcnt = args.begin_sample if samplepcnt == 100: sroidb = roidb else: sroidb = sample_roidb(roidb, samplepcnt) # Sample by percentage of all images logger.info('Sampling %d pcnt : %d training slices' % (samplepcnt, len(sroidb))) # Debug to see if we can concatenate ROIDB's #print(sroidb) #dir(sroidb) #newroidb = sroidb + roidb #newroidb = append_roidb(sroidb, roidb) #print( "--Append test: " + str(len(sroidb)) +" " + str(len(roidb)) + " = " + str(len(newroidb)) ) # load symbol sym = eval('get_' + args.network)(is_train=True, num_classes=config.NUM_CLASSES, num_anchors=config.NUM_ANCHORS) feat_sym = sym.get_internals()['rpn_cls_score_output'] # setup multi-gpu ctx = [mx.gpu(int(i)) for i in args.gpus.split(',')] batch_size = len(ctx) input_batch_size = config.TRAIN.SAMPLES_PER_BATCH * batch_size # load training data train_data = AnchorLoader(feat_sym, sroidb, batch_size=input_batch_size, shuffle=args.shuffle, ctx=ctx, work_load_list=args.work_load_list, feat_stride=config.RPN_FEAT_STRIDE, anchor_scales=config.ANCHOR_SCALES, anchor_ratios=config.ANCHOR_RATIOS, aspect_grouping=config.TRAIN.ASPECT_GROUPING, nThreads=default.prefetch_thread_num) # infer max shape max_data_shape = [('data', (input_batch_size*config.NUM_IMAGES_3DCE, config.NUM_SLICES, config.MAX_SIZE, config.MAX_SIZE))] max_data_shape, max_label_shape = train_data.infer_shape(max_data_shape) max_data_shape.append(('gt_boxes', (input_batch_size*config.NUM_IMAGES_3DCE, 5, 5))) logger.info('providing maximum shape %s %s' % (max_data_shape, max_label_shape)) # load and initialize and check params arg_params, aux_params = init_params(args, sym, train_data) # create solver fixed_param_prefix = config.FIXED_PARAMS data_names = [k[0] for k in train_data.provide_data] label_names = [k[0] for k in train_data.provide_label] mod = MutableModule(sym, data_names=data_names, label_names=label_names, logger=logger, context=ctx, work_load_list=args.work_load_list, max_data_shapes=max_data_shape, max_label_shapes=max_label_shape, fixed_param_prefix=fixed_param_prefix) # decide training params # metric # rpn_eval_metric = metric.RPNAccMetric() rpn_cls_metric = metric.RPNLogLossMetric() rpn_bbox_metric = metric.RPNL1LossMetric() # eval_metric = metric.RCNNAccMetric() cls_metric = metric.RCNNLogLossMetric() bbox_metric = metric.RCNNL1LossMetric() eval_metrics = mx.metric.CompositeEvalMetric() for child_metric in [rpn_cls_metric, rpn_bbox_metric, cls_metric, bbox_metric]: eval_metrics.add(child_metric) # callback batch_end_callback = callback.Speedometer(train_data.batch_size, frequent=args.frequent) means = np.tile(np.array(config.TRAIN.BBOX_MEANS), config.NUM_CLASSES) stds = np.tile(np.array(config.TRAIN.BBOX_STDS), config.NUM_CLASSES) epoch_end_callback = (callback.do_checkpoint(args.e2e_prefix, means, stds), callback.do_validate(args.e2e_prefix)) arg_names = [x for x in sym.list_arguments() if x not in data_names+label_names] opt = get_optimizer(args, arg_names, len(sroidb) / input_batch_size, args.iter_size) # train default.testing = False mod.fit(train_data, roidb, eval_metric=eval_metrics, epoch_end_callback=epoch_end_callback, batch_end_callback=batch_end_callback, kvstore=args.kvstore, optimizer=opt, iter_size=args.iter_size, arg_params=arg_params, aux_params=aux_params, begin_epoch=args.begin_epoch, num_epoch=args.e2e_epoch)
def train_net(network, dataset, image_set, root_path, dataset_path, frequent, kvstore, work_load_list, no_flip, no_shuffle, resume, ctx, pretrained, epoch, prefix, begin_epoch, end_epoch, train_shared, lr, lr_step, proposal, maskrcnn_stage=None): # set up logger logging.basicConfig() logger = logging.getLogger() logger.setLevel(logging.INFO) # setup config config.TRAIN.BATCH_IMAGES = 1 config.TRAIN.BATCH_ROIS = 128 config.TRAIN.END2END = True config.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED = True # load symbol sym = eval('get_' + network + '_train')(num_classes=config.NUM_CLASSES, num_anchors=config.NUM_ANCHORS) feat_sym = [] for stride in config.RPN_FEAT_STRIDE: feat_sym.append(sym.get_internals()['rpn_cls_score_stride%s_output' % stride]) # setup multi-gpu batch_size = len(ctx) input_batch_size = config.TRAIN.BATCH_IMAGES * batch_size # print config logger.info(pprint.pformat(config)) roidb_file = root_path + '/cache/' + dataset + '_roidb_with_mask.pkl' mean_file = root_path + '/cache/' + dataset + '_roidb_mean.pkl' std_file = root_path + '/cache/' + dataset + '_roidb_std.pkl' if maskrcnn_stage is not None: roidb_file = root_path + '/cache/' + dataset + '_roidb_with_mask_' + maskrcnn_stage + '.pkl' mean_file = root_path + '/cache/' + dataset + '_roidb_mean_' + maskrcnn_stage + '.pkl' std_file = root_path + '/cache/' + dataset + '_roidb_std_' + maskrcnn_stage + '.pkl' if osp.exists(roidb_file) and osp.exists(mean_file) and osp.exists( std_file): print('Load ' + roidb_file) with open(roidb_file, 'r') as f: roidb = pkl.load(f) print('Load ' + mean_file) with open(mean_file, 'r') as f: means = pkl.load(f) print('Load ' + std_file) with open(std_file, 'r') as f: stds = pkl.load(f) else: # load dataset and prepare imdb for training image_sets = [iset for iset in image_set.split('+')] roidbs = [ load_proposal_roidb(dataset, image_set, root_path, dataset_path, proposal=proposal, append_gt=True, flip=not no_flip) for image_set in image_sets ] roidb = merge_roidb(roidbs) def filter_roidb(roidb): """ remove roidb entries without usable rois """ def is_valid(entry): """ valid images have at least 1 fg or bg roi """ overlaps = entry['max_overlaps'] fg_inds = np.where(overlaps >= config.TRAIN.FG_THRESH)[0] bg_inds = np.where((overlaps < config.TRAIN.BG_THRESH_HI) & (overlaps >= config.TRAIN.BG_THRESH_LO))[0] valid = len(fg_inds) > 0 and len(bg_inds) > 0 return valid num = len(roidb) filtered_roidb = [entry for entry in roidb if is_valid(entry)] num_after = len(filtered_roidb) print('filtered %d roidb entries: %d -> %d' % (num - num_after, num, num_after)) return filtered_roidb roidb = filter_roidb(roidb) means, stds = add_bbox_regression_targets(roidb) add_assign_targets(roidb) add_mask_targets(roidb) for file, obj in zip([roidb_file, mean_file, std_file], [roidb, means, stds]): with open(file, 'w') as f: pkl.dump(obj, f, -1) # load training data train_data = MaskROIIter(roidb, batch_size=input_batch_size, shuffle=not no_shuffle, ctx=ctx, work_load_list=work_load_list, aspect_grouping=config.TRAIN.ASPECT_GROUPING) # infer max shape max_data_shape = [('data', (input_batch_size, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))] max_label_shape = [] for s in config.RCNN_FEAT_STRIDE: max_data_shape.append(('rois_stride%s' % s, (input_batch_size, config.TRAIN.BATCH_ROIS, 5))) max_label_shape.append(('label_stride%s' % s, (input_batch_size, config.TRAIN.BATCH_ROIS))) max_label_shape.append( ('bbox_target_stride%s' % s, (input_batch_size, config.TRAIN.BATCH_ROIS * config.NUM_CLASSES * 4))) max_label_shape.append( ('bbox_weight_stride%s' % s, (input_batch_size, config.TRAIN.BATCH_ROIS * config.NUM_CLASSES * 4))) max_label_shape.append(('mask_target_stride%s' % s, (input_batch_size, config.TRAIN.BATCH_ROIS, config.NUM_CLASSES, 28, 28))) max_label_shape.append(('mask_weight_stride%s' % s, (input_batch_size, config.TRAIN.BATCH_ROIS, config.NUM_CLASSES, 1, 1))) # infer shape data_shape_dict = dict(train_data.provide_data + train_data.provide_label) arg_shape, out_shape, aux_shape = sym.infer_shape(**data_shape_dict) arg_shape_dict = dict(zip(sym.list_arguments(), arg_shape)) out_shape_dict = zip(sym.list_outputs(), out_shape) aux_shape_dict = dict(zip(sym.list_auxiliary_states(), aux_shape)) print('output shape:') pprint.pprint(out_shape_dict) # load and initialize params if resume: arg_params, aux_params = load_param(prefix, begin_epoch, convert=True) else: arg_params, aux_params = load_param(pretrained, epoch, convert=True) init_bbox_pred = mx.init.Normal(sigma=0.001) init_internal = mx.init.Normal(sigma=0.01) init = mx.init.Xavier(factor_type="in", rnd_type='gaussian', magnitude=2) for k in sym.list_arguments(): if k in data_shape_dict: continue if k not in arg_params: print('init', k) arg_params[k] = mx.nd.zeros(shape=arg_shape_dict[k]) init_internal(k, arg_params[k]) if k in ['rcnn_fc_bbox_weight', 'bbox_pred_weight']: init_bbox_pred(k, arg_params[k]) if k.endswith('bias'): arg_params[k] = mx.nd.zeros(shape=arg_shape_dict[k]) if 'ctx_red_weight' in k: ctx_shape = np.array(arg_shape_dict[k]) ctx_shape[1] /= 2 arg_params[k][:] = np.concatenate((np.eye( ctx_shape[1]).reshape(ctx_shape), np.zeros(ctx_shape)), axis=1) for k in sym.list_auxiliary_states(): if k not in aux_params: print('init', k) aux_params[k] = mx.nd.zeros(shape=aux_shape_dict[k]) init(k, aux_params[k]) # check parameter shapes for k in sym.list_arguments(): if k in data_shape_dict: continue assert k in arg_params, k + ' not initialized' assert arg_params[k].shape == arg_shape_dict[k], \ 'shape inconsistent for ' + k + ' inferred ' + str(arg_shape_dict[k]) + ' provided ' + str(arg_params[k].shape) for k in sym.list_auxiliary_states(): assert k in aux_params, k + ' not initialized' assert aux_params[k].shape == aux_shape_dict[k], \ 'shape inconsistent for ' + k + ' inferred ' + str(aux_shape_dict[k]) + ' provided ' + str(aux_params[k].shape) # create solver data_names = [k[0] for k in train_data.provide_data] label_names = [k[0] for k in train_data.provide_label] if train_shared: fixed_param_prefix = config.FIXED_PARAMS_SHARED else: fixed_param_prefix = config.FIXED_PARAMS mod = MutableModule(sym, data_names=data_names, label_names=label_names, logger=logger, context=ctx, work_load_list=work_load_list, max_data_shapes=max_data_shape, max_label_shapes=max_label_shape, fixed_param_prefix=fixed_param_prefix) # decide training params # metric eval_metric = metric.RCNNAccMetric() cls_metric = metric.RCNNLogLossMetric() bbox_metric = metric.RCNNRegLossMetric() mask_acc_metric = metric.MaskAccMetric() mask_log_metric = metric.MaskLogLossMetric() eval_metrics = mx.metric.CompositeEvalMetric() for child_metric in [ eval_metric, cls_metric, bbox_metric, mask_acc_metric, mask_log_metric ]: eval_metrics.add(child_metric) # callback batch_end_callback = callback.Speedometer(train_data.batch_size, frequent=frequent) epoch_end_callback = callback.do_checkpoint(prefix, means, stds) # decide learning rate base_lr = lr lr_factor = 0.1 lr_epoch = [int(epoch) for epoch in lr_step.split(',')] lr_epoch_diff = [ epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch ] lr = base_lr * (lr_factor**(len(lr_epoch) - len(lr_epoch_diff))) lr_iters = [ int(epoch * len(roidb) / batch_size) for epoch in lr_epoch_diff ] print('lr', lr, 'lr_epoch_diff', lr_epoch_diff, 'lr_iters', lr_iters) lr_scheduler = mx.lr_scheduler.MultiFactorScheduler(lr_iters, lr_factor) # optimizer optimizer_params = { 'momentum': 0.9, 'wd': 0.0001, 'learning_rate': lr, 'lr_scheduler': lr_scheduler, 'rescale_grad': (1.0 / batch_size), 'clip_gradient': 5 } # train mod.fit(train_data, eval_metric=eval_metrics, epoch_end_callback=epoch_end_callback, batch_end_callback=batch_end_callback, kvstore=kvstore, optimizer='sgd', optimizer_params=optimizer_params, arg_params=arg_params, aux_params=aux_params, begin_epoch=begin_epoch, num_epoch=end_epoch)
class SSHDetector: def __init__(self, prefix, epoch, ctx_id=1, test_mode=False): self.ctx_id = ctx_id self.ctx = mx.gpu(self.ctx_id) self.keys = [] strides = [] base_size = [] scales = [] self.feat_strides = config.RPN_FEAT_STRIDE for s in self.feat_strides: self.keys.append('stride%s' % s) strides.append(int(s)) base_size.append(config.RPN_ANCHOR_CFG[str(s)]['BASE_SIZE']) scales += config.RPN_ANCHOR_CFG[str(s)]['SCALES'] # self._scales = np.array([32, 16, 8, 4, 2, 1]) self._scales = np.array(scales) self._ratios = np.array([1.0]*len(self.feat_strides)) # self._anchors_fpn = dict(list(zip(self.keys, generate_anchors_fpn()))) self._anchors_fpn = dict(list(zip(self.keys, generate_anchors_fpn(base_size=base_size, scales=self._scales, ratios=self._ratios)))) self._num_anchors = dict(zip(self.keys, [anchors.shape[0] for anchors in self._anchors_fpn.values()])) self._rpn_pre_nms_top_n = 1000 #self._rpn_post_nms_top_n = rpn_post_nms_top_n self.nms_threshold = config.test_nms_threshold #值越大,同一个人脸产生的预测框越多 self._bbox_pred = nonlinear_pred sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch) self.nms = gpu_nms_wrapper(self.nms_threshold, self.ctx_id) self.pixel_means = np.array([103.939, 116.779, 123.68]) #BGR self.pixel_means = np.array([127., 127., 127.]) if not test_mode: image_size = (640, 640) self.model = mx.mod.Module(symbol=sym, context=self.ctx, label_names=None) self.model.bind(data_shapes=[('data', (1, 3, image_size[0], image_size[1]))], for_training=False) self.model.set_params(arg_params, aux_params) else: from rcnn.core.module import MutableModule image_size = (2400, 2400) data_shape = [('data', (1, 3, image_size[0], image_size[1]))] self.model = MutableModule(symbol=sym, data_names=['data'], label_names=None, context=self.ctx, max_data_shapes=data_shape) self.model.bind(data_shape, None, for_training=False) self.model.set_params(arg_params, aux_params) def detect(self, img, threshold=0.5, scales=[1.0]): proposals_list = [] proposals_kp_list = [] scores_list = [] for im_scale in scales: if im_scale != 1.0: im = cv2.resize(img, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) else: im = img im = im.astype(np.float32) # im_shape = im.shape # self.model.bind(data_shapes=[('data', (1, 3, im_shape[0], im_shape[1]))], for_training=False) im_info = [im.shape[0], im.shape[1], im_scale] im_tensor = np.zeros((1, 3, im.shape[0], im.shape[1])) for i in range(3): im_tensor[0, i, :, :] = im[:, :, 2 - i] - self.pixel_means[2 - i] #bgr2rgb mxnet rgb opencv bgr data = nd.array(im_tensor) db = mx.io.DataBatch(data=(data,), provide_data=[('data', data.shape)]) timea = datetime.datetime.now() self.model.forward(db, is_train=False) timeb = datetime.datetime.now() diff = timeb - timea print('forward uses', diff.total_seconds(), 'seconds') net_out = self.model.get_outputs() #网络的输出为len=9的list,针对三个不同的stride,分为三大块的list,其中每个list分别代表score,bbox,kpoint三个维度的结果, pre_nms_topN = self._rpn_pre_nms_top_n #post_nms_topN = self._rpn_post_nms_top_n #min_size_dict = self._rpn_min_size_fpn for s in self.feat_strides: _key = 'stride%s' % s # print(_key) stride = int(s) if s == self.feat_strides[0]: idx = 0 if s == self.feat_strides[1]: idx = 3 elif s == self.feat_strides[2]: idx = 6 # print('getting', im_scale, stride, idx, len(net_out), data.shape, file=sys.stderr) scores = net_out[idx].asnumpy() #获取每个stride下的分类得分 idx += 1 # print('scores',stride, scores.shape, file=sys.stderr) scores = scores[:, self._num_anchors['stride%s'%s]:, :, :] #去掉了其中lable的值??? bbox_deltas = net_out[idx].asnumpy() idx += 1 _height, _width = int(im_info[0] / stride), int(im_info[1] / stride) height, width = bbox_deltas.shape[2], bbox_deltas.shape[3] # kpoint kpoint_deltas = net_out[idx].asnumpy() A = self._num_anchors['stride%s' % s] K = height * width anchors = anchors_plane(height, width, stride, self._anchors_fpn['stride%s' % s].astype(np.float32)) #RP映射回原图中的坐标位置 # print((height, width), (_height, _width), anchors.shape, bbox_deltas.shape, scores.shape, file=sys.stderr) anchors = anchors.reshape((K * A, 4)) # print('predict bbox_deltas', bbox_deltas.shape, height, width) bbox_deltas = self._clip_pad(bbox_deltas, (height, width)) # print('after clip pad', bbox_deltas.shape, height, width) bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) kpoint_deltas = self._clip_pad(kpoint_deltas, (height, width)) kpoint_deltas = kpoint_deltas.transpose((0, 2, 3, 1)).reshape((-1, 10)) scores = self._clip_pad(scores, (height, width)) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) # print(anchors.shape, bbox_deltas.shape, A, K, file=sys.stderr) proposals = self._bbox_pred(anchors, bbox_deltas) proposals = clip_boxes(proposals, im_info[:2]) #将超出图像的坐标去除掉 proposals_kp = kpoint_pred(anchors, kpoint_deltas) proposals_kp = clip_points(proposals_kp, im_info[:2]) #取出score的top N scores_ravel = scores.ravel() order = scores_ravel.argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] proposals_kp = proposals_kp[order, :] scores = scores[order] proposals /= im_scale proposals_kp /= im_scale proposals_list.append(proposals) proposals_kp_list.append(proposals_kp) scores_list.append(scores) proposals = np.vstack(proposals_list) proposals_kp = np.vstack(proposals_kp_list) scores = np.vstack(scores_list) scores_ravel = scores.ravel() order = scores_ravel.argsort()[::-1] #if config.TEST.SCORE_THRESH>0.0: # _count = np.sum(scores_ravel>config.TEST.SCORE_THRESH) # order = order[:_count] #if pre_nms_topN > 0: # order = order[:pre_nms_topN] proposals = proposals[order, :] proposals_kp = proposals_kp[order, :] scores = scores[order] det = np.hstack((proposals, scores, proposals_kp)).astype(np.float32) #if np.shape(det)[0] == 0: # print("Something wrong with the input image(resolution is too low?), generate fake proposals for it.") # proposals = np.array([[1.0, 1.0, 2.0, 2.0]]*post_nms_topN, dtype=np.float32) # scores = np.array([[0.9]]*post_nms_topN, dtype=np.float32) # det = np.array([[1.0, 1.0, 2.0, 2.0, 0.9]]*post_nms_topN, dtype=np.float32) if self.nms_threshold < 1.0: keep = self.nms(det) det = det[keep, :] if threshold > 0.0: keep = np.where(det[:, 4] >= threshold)[0] det = det[keep, :] return det @staticmethod def _filter_boxes(boxes, min_size): """ Remove all boxes with any side smaller than min_size """ ws = boxes[:, 2] - boxes[:, 0] + 1 hs = boxes[:, 3] - boxes[:, 1] + 1 keep = np.where((ws >= min_size) & (hs >= min_size))[0] return keep @staticmethod def _clip_pad(tensor, pad_shape): """ Clip boxes of the pad area. :param tensor: [n, c, H, W] :param pad_shape: [h, w] :return: [n, c, h, w] """ H, W = tensor.shape[2:] h, w = pad_shape if h < H or w < W: tensor = tensor[:, :, :h, :w].copy() return tensor
class SSHDetector: def __init__(self, prefix, epoch, ctx_id=0, test_mode=False): self.ctx_id = ctx_id self.ctx = mx.gpu(self.ctx_id) self.fpn_keys = [] fpn_stride = [] fpn_base_size = [] self._feat_stride_fpn = [32, 16, 8] for s in self._feat_stride_fpn: self.fpn_keys.append('stride%s'%s) fpn_stride.append(int(s)) fpn_base_size.append(16) self._scales = np.array([32,16,8,4,2,1]) self._ratios = np.array([1.0]*len(self._feat_stride_fpn)) self._anchors_fpn = dict(zip(self.fpn_keys, generate_anchors_fpn(base_size=fpn_base_size, scales=self._scales, ratios=self._ratios))) self._num_anchors = dict(zip(self.fpn_keys, [anchors.shape[0] for anchors in self._anchors_fpn.values()])) self._rpn_pre_nms_top_n = 1000 #self._rpn_post_nms_top_n = rpn_post_nms_top_n #self.score_threshold = 0.05 self.nms_threshold = 0.3 self._bbox_pred = nonlinear_pred sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch) self.nms = gpu_nms_wrapper(self.nms_threshold, self.ctx_id) self.pixel_means = np.array([103.939, 116.779, 123.68]) #BGR if not test_mode: image_size = (640, 640) self.model = mx.mod.Module(symbol=sym, context=self.ctx, label_names = None) self.model.bind(data_shapes=[('data', (1, 3, image_size[0], image_size[1]))], for_training=False) self.model.set_params(arg_params, aux_params) else: from rcnn.core.module import MutableModule image_size = (2400, 2400) data_shape = [('data', (1,3,image_size[0], image_size[1]))] self.model = MutableModule(symbol=sym, data_names=['data'], label_names=None, context=self.ctx, max_data_shapes=data_shape) self.model.bind(data_shape, None, for_training=False) self.model.set_params(arg_params, aux_params) def detect(self, img, threshold=0.05, scales=[1.0]): proposals_list = [] scores_list = [] for im_scale in scales: if im_scale!=1.0: im = cv2.resize(img, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) else: im = img im = im.astype(np.float32) #self.model.bind(data_shapes=[('data', (1, 3, image_size[0], image_size[1]))], for_training=False) im_info = [im.shape[0], im.shape[1], im_scale] im_tensor = np.zeros((1, 3, im.shape[0], im.shape[1])) for i in range(3): im_tensor[0, i, :, :] = im[:, :, 2 - i] - self.pixel_means[2 - i] data = nd.array(im_tensor) db = mx.io.DataBatch(data=(data,), provide_data=[('data', data.shape)]) self.model.forward(db, is_train=False) net_out = self.model.get_outputs() pre_nms_topN = self._rpn_pre_nms_top_n #post_nms_topN = self._rpn_post_nms_top_n #min_size_dict = self._rpn_min_size_fpn for s in self._feat_stride_fpn: if len(scales)>1 and s==32 and im_scale==scales[-1]: continue _key = 'stride%s'%s stride = int(s) idx = 0 if s==16: idx=2 elif s==8: idx=4 print('getting', im_scale, stride, idx, len(net_out), data.shape, file=sys.stderr) scores = net_out[idx].asnumpy() #print(scores.shape) idx+=1 #print('scores',stride, scores.shape, file=sys.stderr) scores = scores[:, self._num_anchors['stride%s'%s]:, :, :] bbox_deltas = net_out[idx].asnumpy() #if DEBUG: # print 'im_size: ({}, {})'.format(im_info[0], im_info[1]) # print 'scale: {}'.format(im_info[2]) _height, _width = int(im_info[0] / stride), int(im_info[1] / stride) height, width = bbox_deltas.shape[2], bbox_deltas.shape[3] A = self._num_anchors['stride%s'%s] K = height * width anchors = anchors_plane(height, width, stride, self._anchors_fpn['stride%s'%s].astype(np.float32)) #print((height, width), (_height, _width), anchors.shape, bbox_deltas.shape, scores.shape, file=sys.stderr) anchors = anchors.reshape((K * A, 4)) #print('pre', bbox_deltas.shape, height, width) bbox_deltas = self._clip_pad(bbox_deltas, (height, width)) #print('after', bbox_deltas.shape, height, width) bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) scores = self._clip_pad(scores, (height, width)) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) #print(anchors.shape, bbox_deltas.shape, A, K, file=sys.stderr) proposals = self._bbox_pred(anchors, bbox_deltas) #proposals = anchors proposals = clip_boxes(proposals, im_info[:2]) #keep = self._filter_boxes(proposals, min_size_dict['stride%s'%s] * im_info[2]) #proposals = proposals[keep, :] #scores = scores[keep] #print('333', proposals.shape) scores_ravel = scores.ravel() order = scores_ravel.argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] proposals /= im_scale proposals_list.append(proposals) scores_list.append(scores) proposals = np.vstack(proposals_list) scores = np.vstack(scores_list) scores_ravel = scores.ravel() order = scores_ravel.argsort()[::-1] #if config.TEST.SCORE_THRESH>0.0: # _count = np.sum(scores_ravel>config.TEST.SCORE_THRESH) # order = order[:_count] #if pre_nms_topN > 0: # order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] det = np.hstack((proposals, scores)).astype(np.float32) #if np.shape(det)[0] == 0: # print("Something wrong with the input image(resolution is too low?), generate fake proposals for it.") # proposals = np.array([[1.0, 1.0, 2.0, 2.0]]*post_nms_topN, dtype=np.float32) # scores = np.array([[0.9]]*post_nms_topN, dtype=np.float32) # det = np.array([[1.0, 1.0, 2.0, 2.0, 0.9]]*post_nms_topN, dtype=np.float32) if self.nms_threshold<1.0: keep = self.nms(det) det = det[keep, :] if threshold>0.0: keep = np.where(det[:, 4] >= threshold)[0] det = det[keep, :] return det @staticmethod def _filter_boxes(boxes, min_size): """ Remove all boxes with any side smaller than min_size """ ws = boxes[:, 2] - boxes[:, 0] + 1 hs = boxes[:, 3] - boxes[:, 1] + 1 keep = np.where((ws >= min_size) & (hs >= min_size))[0] return keep @staticmethod def _clip_pad(tensor, pad_shape): """ Clip boxes of the pad area. :param tensor: [n, c, H, W] :param pad_shape: [h, w] :return: [n, c, h, w] """ H, W = tensor.shape[2:] h, w = pad_shape if h < H or w < W: tensor = tensor[:, :, :h, :w].copy() return tensor
def train_net(args, ctx, pretrained, epoch, prefix, begin_epoch, end_epoch, lr=0.001, lr_step='5'): # set up logger logging.basicConfig() logger = logging.getLogger() logger.setLevel(logging.INFO) # setup config config.TRAIN.BATCH_IMAGES = 1 config.TRAIN.BATCH_ROIS = 128 config.TRAIN.END2END = True config.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED = True # load symbol sym = eval('get_' + args.network + '_train')(num_classes=config.NUM_CLASSES, num_anchors=config.NUM_ANCHORS) feat_sym = sym.get_internals()['rpn_cls_score_output'] # setup multi-gpu batch_size = len(ctx) input_batch_size = config.TRAIN.BATCH_IMAGES * batch_size # print config pprint.pprint(config) # load dataset and prepare imdb for training image_sets = [iset for iset in args.image_set.split('+')] roidbs = [load_gt_roidb(args.dataset, image_set, args.root_path, args.dataset_path, flip=not args.no_flip) for image_set in image_sets] roidb = merge_roidb(roidbs) roidb = filter_roidb(roidb) # load training data train_data = AnchorLoader(feat_sym, roidb, batch_size=input_batch_size, shuffle=not args.no_shuffle, ctx=ctx, work_load_list=args.work_load_list, feat_stride=config.RPN_FEAT_STRIDE, anchor_scales=config.ANCHOR_SCALES, anchor_ratios=config.ANCHOR_RATIOS, aspect_grouping=config.TRAIN.ASPECT_GROUPING) # infer max shape max_data_shape = [('data', (input_batch_size, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))] max_data_shape, max_label_shape = train_data.infer_shape(max_data_shape) max_data_shape.append(('gt_boxes', (input_batch_size, 100, 5))) print('providing maximum shape', max_data_shape, max_label_shape) # infer shape data_shape_dict = dict(train_data.provide_data + train_data.provide_label) arg_shape, out_shape, aux_shape = sym.infer_shape(**data_shape_dict) arg_shape_dict = dict(zip(sym.list_arguments(), arg_shape)) out_shape_dict = dict(zip(sym.list_outputs(), out_shape)) aux_shape_dict = dict(zip(sym.list_auxiliary_states(), aux_shape)) print('output shape') pprint.pprint(out_shape_dict) # load and initialize params if args.resume: arg_params, aux_params = load_param(prefix, begin_epoch, convert=True) else: arg_params, aux_params = load_param(pretrained, epoch, convert=True) arg_params['rpn_conv_3x3_weight'] = mx.random.normal(0, 0.01, shape=arg_shape_dict['rpn_conv_3x3_weight']) arg_params['rpn_conv_3x3_bias'] = mx.nd.zeros(shape=arg_shape_dict['rpn_conv_3x3_bias']) arg_params['rpn_cls_score_weight'] = mx.random.normal(0, 0.01, shape=arg_shape_dict['rpn_cls_score_weight']) arg_params['rpn_cls_score_bias'] = mx.nd.zeros(shape=arg_shape_dict['rpn_cls_score_bias']) arg_params['rpn_bbox_pred_weight'] = mx.random.normal(0, 0.01, shape=arg_shape_dict['rpn_bbox_pred_weight']) arg_params['rpn_bbox_pred_bias'] = mx.nd.zeros(shape=arg_shape_dict['rpn_bbox_pred_bias']) arg_params['cls_score_weight'] = mx.random.normal(0, 0.01, shape=arg_shape_dict['cls_score_weight']) arg_params['cls_score_bias'] = mx.nd.zeros(shape=arg_shape_dict['cls_score_bias']) arg_params['bbox_pred_weight'] = mx.random.normal(0, 0.001, shape=arg_shape_dict['bbox_pred_weight']) arg_params['bbox_pred_bias'] = mx.nd.zeros(shape=arg_shape_dict['bbox_pred_bias']) # check parameter shapes for k in sym.list_arguments(): if k in data_shape_dict: continue assert k in arg_params, k + ' not initialized' assert arg_params[k].shape == arg_shape_dict[k], \ 'shape inconsistent for ' + k + ' inferred ' + str(arg_shape_dict[k]) + ' provided ' + str(arg_params[k].shape) for k in sym.list_auxiliary_states(): assert k in aux_params, k + ' not initialized' assert aux_params[k].shape == aux_shape_dict[k], \ 'shape inconsistent for ' + k + ' inferred ' + str(aux_shape_dict[k]) + ' provided ' + str(aux_params[k].shape) # create solver fixed_param_prefix = config.FIXED_PARAMS data_names = [k[0] for k in train_data.provide_data] label_names = [k[0] for k in train_data.provide_label] mod = MutableModule(sym, data_names=data_names, label_names=label_names, logger=logger, context=ctx, work_load_list=args.work_load_list, max_data_shapes=max_data_shape, max_label_shapes=max_label_shape, fixed_param_prefix=fixed_param_prefix) # decide training params # metric rpn_eval_metric = metric.RPNAccMetric() rpn_cls_metric = metric.RPNLogLossMetric() rpn_bbox_metric = metric.RPNL1LossMetric() eval_metric = metric.RCNNAccMetric() cls_metric = metric.RCNNLogLossMetric() bbox_metric = metric.RCNNL1LossMetric() eval_metrics = mx.metric.CompositeEvalMetric() for child_metric in [rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric, cls_metric, bbox_metric]: eval_metrics.add(child_metric) # callback batch_end_callback = callback.Speedometer(train_data.batch_size, frequent=args.frequent) means = np.tile(np.array(config.TRAIN.BBOX_MEANS), config.NUM_CLASSES) stds = np.tile(np.array(config.TRAIN.BBOX_STDS), config.NUM_CLASSES) epoch_end_callback = callback.do_checkpoint(prefix, means, stds) # decide learning rate base_lr = lr lr_factor = 0.1 lr_epoch = [int(epoch) for epoch in lr_step.split(',')] lr_epoch_diff = [epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch] lr = base_lr * (lr_factor ** (len(lr_epoch) - len(lr_epoch_diff))) lr_iters = [int(epoch * len(roidb) / batch_size) for epoch in lr_epoch_diff] print('lr', lr, 'lr_epoch_diff', lr_epoch_diff, 'lr_iters', lr_iters) lr_scheduler = mx.lr_scheduler.MultiFactorScheduler(lr_iters, lr_factor) # optimizer optimizer_params = {'momentum': 0.9, 'wd': 0.0005, 'learning_rate': lr, 'lr_scheduler': lr_scheduler, 'rescale_grad': (1.0 / batch_size), 'clip_gradient': 5} # train mod.fit(train_data, eval_metric=eval_metrics, epoch_end_callback=epoch_end_callback, batch_end_callback=batch_end_callback, kvstore=args.kvstore, optimizer='sgd', optimizer_params=optimizer_params, arg_params=arg_params, aux_params=aux_params, begin_epoch=begin_epoch, num_epoch=end_epoch)
def train_net(args, ctx, pretrained, epoch, prefix, begin_epoch, end_epoch, lr=0.001, lr_step='5'): # setup config config.TRAIN.BATCH_IMAGES = 1 config.TRAIN.BATCH_ROIS = 128 config.TRAIN.END2END = True config.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED = True # load symbol sym = eval('get_' + args.network + '_train')( num_classes=config.NUM_CLASSES, num_anchors=config.NUM_ANCHORS) feat_sym = sym.get_internals()['rpn_cls_score_output'] # setup multi-gpu batch_size = len(ctx) input_batch_size = config.TRAIN.BATCH_IMAGES * batch_size # print config logger.info(pprint.pformat(config)) # load dataset and prepare imdb for training image_sets = [iset for iset in args.image_set.split('+')] roidbs = [ load_gt_roidb(args.dataset, image_set, args.root_path, args.dataset_path, flip=not args.no_flip) for image_set in image_sets ] roidb = merge_roidb(roidbs) roidb = filter_roidb(roidb) # load training data train_data = AnchorLoader(feat_sym, roidb, batch_size=input_batch_size, shuffle=not args.no_shuffle, ctx=ctx, work_load_list=args.work_load_list, feat_stride=config.RPN_FEAT_STRIDE, anchor_scales=config.ANCHOR_SCALES, anchor_ratios=config.ANCHOR_RATIOS, aspect_grouping=config.TRAIN.ASPECT_GROUPING) # infer max shape max_data_shape = [('data', (input_batch_size, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))] max_data_shape, max_label_shape = train_data.infer_shape(max_data_shape) max_data_shape.append(('gt_boxes', (input_batch_size, 100, 5))) logger.info('providing maximum shape %s %s' % (max_data_shape, max_label_shape)) # infer shape data_shape_dict = dict(train_data.provide_data + train_data.provide_label) arg_shape, out_shape, aux_shape = sym.infer_shape(**data_shape_dict) arg_shape_dict = dict(zip(sym.list_arguments(), arg_shape)) out_shape_dict = dict(zip(sym.list_outputs(), out_shape)) aux_shape_dict = dict(zip(sym.list_auxiliary_states(), aux_shape)) logger.info('output shape %s' % pprint.pformat(out_shape_dict)) # load and initialize params if args.resume: arg_params, aux_params = load_param(prefix, begin_epoch, convert=True) else: arg_params, aux_params = load_param(pretrained, epoch, convert=True) arg_params['rpn_conv_3x3_weight'] = mx.random.normal( 0, 0.01, shape=arg_shape_dict['rpn_conv_3x3_weight']) arg_params['rpn_conv_3x3_bias'] = mx.nd.zeros( shape=arg_shape_dict['rpn_conv_3x3_bias']) arg_params['rpn_cls_score_weight'] = mx.random.normal( 0, 0.01, shape=arg_shape_dict['rpn_cls_score_weight']) arg_params['rpn_cls_score_bias'] = mx.nd.zeros( shape=arg_shape_dict['rpn_cls_score_bias']) arg_params['rpn_bbox_pred_weight'] = mx.random.normal( 0, 0.01, shape=arg_shape_dict['rpn_bbox_pred_weight']) arg_params['rpn_bbox_pred_bias'] = mx.nd.zeros( shape=arg_shape_dict['rpn_bbox_pred_bias']) arg_params['cls_score_weight'] = mx.random.normal( 0, 0.01, shape=arg_shape_dict['cls_score_weight']) arg_params['cls_score_bias'] = mx.nd.zeros( shape=arg_shape_dict['cls_score_bias']) arg_params['bbox_pred_weight'] = mx.random.normal( 0, 0.001, shape=arg_shape_dict['bbox_pred_weight']) arg_params['bbox_pred_bias'] = mx.nd.zeros( shape=arg_shape_dict['bbox_pred_bias']) # check parameter shapes for k in sym.list_arguments(): if k in data_shape_dict: continue assert k in arg_params, k + ' not initialized' assert arg_params[k].shape == arg_shape_dict[k], \ 'shape inconsistent for ' + k + ' inferred ' + str(arg_shape_dict[k]) + ' provided ' + str(arg_params[k].shape) for k in sym.list_auxiliary_states(): assert k in aux_params, k + ' not initialized' assert aux_params[k].shape == aux_shape_dict[k], \ 'shape inconsistent for ' + k + ' inferred ' + str(aux_shape_dict[k]) + ' provided ' + str(aux_params[k].shape) # create solver fixed_param_prefix = config.FIXED_PARAMS data_names = [k[0] for k in train_data.provide_data] label_names = [k[0] for k in train_data.provide_label] mod = MutableModule(sym, data_names=data_names, label_names=label_names, logger=logger, context=ctx, work_load_list=args.work_load_list, max_data_shapes=max_data_shape, max_label_shapes=max_label_shape, fixed_param_prefix=fixed_param_prefix) # decide training params # metric rpn_eval_metric = metric.RPNAccMetric() rpn_cls_metric = metric.RPNLogLossMetric() rpn_bbox_metric = metric.RPNL1LossMetric() eval_metric = metric.RCNNAccMetric() cls_metric = metric.RCNNLogLossMetric() bbox_metric = metric.RCNNL1LossMetric() eval_metrics = mx.metric.CompositeEvalMetric() for child_metric in [ rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric, cls_metric, bbox_metric ]: eval_metrics.add(child_metric) # callback batch_end_callback = mx.callback.Speedometer(train_data.batch_size, frequent=args.frequent, auto_reset=False) means = np.tile(np.array(config.TRAIN.BBOX_MEANS), config.NUM_CLASSES) stds = np.tile(np.array(config.TRAIN.BBOX_STDS), config.NUM_CLASSES) epoch_end_callback = callback.do_checkpoint(prefix, means, stds) # decide learning rate base_lr = lr lr_factor = 0.1 lr_epoch = [int(epoch) for epoch in lr_step.split(',')] lr_epoch_diff = [ epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch ] lr = base_lr * (lr_factor**(len(lr_epoch) - len(lr_epoch_diff))) lr_iters = [ int(epoch * len(roidb) / batch_size) for epoch in lr_epoch_diff ] logger.info('lr %f lr_epoch_diff %s lr_iters %s' % (lr, lr_epoch_diff, lr_iters)) lr_scheduler = mx.lr_scheduler.MultiFactorScheduler(lr_iters, lr_factor) # optimizer optimizer_params = { 'momentum': 0.9, 'wd': 0.0005, 'learning_rate': lr, 'lr_scheduler': lr_scheduler, 'rescale_grad': (1.0 / batch_size), 'clip_gradient': 5 } # train mod.fit(train_data, eval_metric=eval_metrics, epoch_end_callback=epoch_end_callback, batch_end_callback=batch_end_callback, kvstore=args.kvstore, optimizer='sgd', optimizer_params=optimizer_params, arg_params=arg_params, aux_params=aux_params, begin_epoch=begin_epoch, num_epoch=end_epoch)
def train_net(args, ctx, pretrained, epoch, prefix, begin_epoch, end_epoch, lr=0.001, lr_step=50000): # set up logger logging.basicConfig() logger = logging.getLogger() logger.setLevel(logging.INFO) # setup config config.TRAIN.HAS_RPN = True config.TRAIN.BATCH_SIZE = 1 config.TRAIN.BATCH_IMAGES = 1 config.TRAIN.BATCH_ROIS = 128 config.TRAIN.END2END = True config.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED = True config.TRAIN.BG_THRESH_LO = 0.0 # load symbol sym = eval('get_' + args.network + '_train')() feat_sym = sym.get_internals()['rpn_cls_score_output'] # setup multi-gpu config.TRAIN.BATCH_IMAGES *= len(ctx) config.TRAIN.BATCH_SIZE *= len(ctx) # print config pprint.pprint(config) # load dataset and prepare imdb for training imdb = eval(args.dataset)(args.image_set, args.root_path, args.dataset_path) roidb = imdb.gt_roidb() if args.flip: roidb = imdb.append_flipped_images(roidb) # load training data train_data = AnchorLoader(feat_sym, roidb, batch_size=config.TRAIN.BATCH_SIZE, shuffle=True, ctx=ctx, work_load_list=args.work_load_list) # infer max shape max_data_shape = [('data', (config.TRAIN.BATCH_SIZE, 3, 1000, 1000))] max_data_shape, max_label_shape = train_data.infer_shape(max_data_shape) max_data_shape.append(('gt_boxes', (config.TRAIN.BATCH_SIZE, 100, 5))) print 'providing maximum shape', max_data_shape, max_label_shape # load pretrained arg_params, aux_params = load_param(pretrained, epoch, convert=True) # infer shape data_shape_dict = dict(train_data.provide_data + train_data.provide_label) arg_shape, out_shape, aux_shape = sym.infer_shape(**data_shape_dict) arg_shape_dict = dict(zip(sym.list_arguments(), arg_shape)) out_shape_dict = dict(zip(sym.list_outputs(), out_shape)) aux_shape_dict = dict(zip(sym.list_auxiliary_states(), aux_shape)) print 'output shape' pprint.pprint(out_shape_dict) # initialize params if not args.resume: arg_params['rpn_conv_3x3_weight'] = mx.random.normal( 0, 0.01, shape=arg_shape_dict['rpn_conv_3x3_weight']) arg_params['rpn_conv_3x3_bias'] = mx.nd.zeros( shape=arg_shape_dict['rpn_conv_3x3_bias']) arg_params['rpn_cls_score_weight'] = mx.random.normal( 0, 0.01, shape=arg_shape_dict['rpn_cls_score_weight']) arg_params['rpn_cls_score_bias'] = mx.nd.zeros( shape=arg_shape_dict['rpn_cls_score_bias']) arg_params['rpn_bbox_pred_weight'] = mx.random.normal( 0, 0.01, shape=arg_shape_dict['rpn_bbox_pred_weight']) arg_params['rpn_bbox_pred_bias'] = mx.nd.zeros( shape=arg_shape_dict['rpn_bbox_pred_bias']) arg_params['cls_score_weight'] = mx.random.normal( 0, 0.01, shape=arg_shape_dict['cls_score_weight']) arg_params['cls_score_bias'] = mx.nd.zeros( shape=arg_shape_dict['cls_score_bias']) arg_params['bbox_pred_weight'] = mx.random.normal( 0, 0.001, shape=arg_shape_dict['bbox_pred_weight']) arg_params['bbox_pred_bias'] = mx.nd.zeros( shape=arg_shape_dict['bbox_pred_bias']) # check parameter shapes for k in sym.list_arguments(): if k in data_shape_dict: continue assert arg_params[k].shape == arg_shape_dict[k], \ 'shape inconsistent for ' + k + ' inferred ' + str(arg_shape_dict[k]) + ' provided ' + str(arg_params[k].shape) for k in sym.list_auxiliary_states(): assert aux_params[k].shape == aux_shape_dict[k], \ 'shape inconsistent for ' + k + ' inferred ' + str(aux_shape_dict[k]) + ' provided ' + str(aux_params[k].shape) # create solver fixed_param_prefix = ['conv1', 'conv2'] data_names = [k[0] for k in train_data.provide_data] label_names = [k[0] for k in train_data.provide_label] mod = MutableModule(sym, data_names=data_names, label_names=label_names, logger=logger, context=ctx, work_load_list=args.work_load_list, max_data_shapes=max_data_shape, max_label_shapes=max_label_shape, fixed_param_prefix=fixed_param_prefix) # decide training params # metric rpn_eval_metric = metric.RPNAccMetric() rpn_cls_metric = metric.RPNLogLossMetric() rpn_bbox_metric = metric.RPNL1LossMetric() eval_metric = metric.RCNNAccMetric() cls_metric = metric.RCNNLogLossMetric() bbox_metric = metric.RCNNL1LossMetric() eval_metrics = mx.metric.CompositeEvalMetric() for child_metric in [ rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric, cls_metric, bbox_metric ]: eval_metrics.add(child_metric) # callback batch_end_callback = callback.Speedometer(train_data.batch_size, frequent=args.frequent) means = np.tile(np.array(config.TRAIN.BBOX_MEANS), imdb.num_classes) stds = np.tile(np.array(config.TRAIN.BBOX_STDS), imdb.num_classes) epoch_end_callback = callback.do_checkpoint(prefix, means, stds) # optimizer optimizer_params = { 'momentum': 0.9, 'wd': 0.0005, 'learning_rate': lr, 'lr_scheduler': mx.lr_scheduler.FactorScheduler(lr_step, 0.1), 'rescale_grad': (1.0 / config.TRAIN.BATCH_SIZE) } # train mod.fit(train_data, eval_metric=eval_metrics, epoch_end_callback=epoch_end_callback, batch_end_callback=batch_end_callback, kvstore=args.kvstore, optimizer='sgd', optimizer_params=optimizer_params, arg_params=arg_params, aux_params=aux_params, begin_epoch=begin_epoch, num_epoch=end_epoch)