def alternate_train(args, ctx, pretrained, epoch, rpn_epoch, rpn_lr, rpn_lr_step): # basic config begin_epoch = 0 config.TRAIN.BG_THRESH_LO = 0.0 logger.info('########## TRAIN RPN WITH IMAGENET INIT') train_rpn(args.network, args.dataset, args.image_set, args.root_path, args.dataset_path, args.frequent, args.kvstore, args.work_load_list, args.no_flip, args.no_shuffle, args.resume, ctx, pretrained, epoch, 'model/rpn1', begin_epoch, rpn_epoch, train_shared=False, lr=rpn_lr, lr_step=rpn_lr_step)
def main(): args = parse_args() logger.info('Called with argument: %s' % args) ctx = [mx.gpu(int(i)) for i in args.gpus.split(',')] alternate_train(args, ctx, args.pretrained, args.pretrained_epoch, args.rpn_epoch, args.rpn_lr, args.rpn_lr_step, args.rcnn_epoch, args.rcnn_lr, args.rcnn_lr_step)
def main(): args = parse_args() logger.info('Called with argument: %s' % args) ctx = [mx.gpu(int(i)) for i in args.gpus.split(',')] alternate_train(args, ctx, args.pretrained, args.pretrained_epoch, args.rpn_epoch, args.rpn_lr, args.rpn_lr_step, args.rcnn_epoch, args.rcnn_lr, args.rcnn_lr_step)
def main(): args = parse_args() logger.info('Called with argument: %s' % args) ctx = mx.gpu(args.gpu) test_rcnn(args.network, args.dataset, args.image_set, args.root_path, args.dataset_path, ctx, args.prefix, args.epoch, args.vis, args.shuffle, args.has_rpn, args.proposal, args.thresh)
def main(): global args args = parse_args() args.pyramid = False args.bbox_vote = False if args.mode == 1: args.pyramid = True args.bbox_vote = True logger.info('Called with argument: %s' % args) test(args)
def main(): args = parse_args() logger.info('Called with argument: %s' % args) ctx = [mx.gpu(int(i)) for i in args.gpus.split(',')] train_net(args, ctx, args.pretrained, args.pretrained_epoch, args.prefix, args.begin_epoch, args.end_epoch, lr=args.lr, lr_step=args.lr_step)
def test_net(prefix, iter_no): logger.info('Testing ...') default.testing = True # ctx = mx.gpu(int(default.val_gpu)) ctx = mx.gpu(int(default.gpus.split(',')[0])) acc = test_rcnn(default.network, default.dataset, default.test_image_set, default.dataset_path, ctx, prefix, iter_no, default.val_vis, default.val_shuffle, default.val_has_rpn, default.proposal, default.val_max_box, default.val_thresh) prop_file = 'proposals_%s_%s.mat' % (default.test_image_set, default.exp_name) savemat(prop_file, default.res_dict) default.testing = False
def main(): args = parse_args() logger.info('Called with argument: %s' % args) #ctx = [mx.gpu(int(i)) for i in args.gpus.split(',')] ctx = [] cvd = os.environ['CUDA_VISIBLE_DEVICES'].strip() if len(cvd)>0: for i in range(len(cvd.split(','))): ctx.append(mx.gpu(i)) if len(ctx)==0: ctx = [mx.cpu()] print('use cpu') else: print('gpu num:', len(ctx)) train_net(args, ctx, args.pretrained, args.pretrained_epoch, args.prefix, args.begin_epoch, args.end_epoch, lr=args.lr, lr_step=args.lr_step)
def demo_net(predictor, image_name, result_txt, vis=False): """ generate data_batch -> im_detect -> post process :param predictor: Predictor :param image_name: image name :param vis: will save as a new image if not visualized :return: None """ assert os.path.exists(image_name), image_name + ' not found' print image_name im = cv2.imread(image_name) data_batch, data_names, im_scale = generate_batch(im) scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, im_scale) result_txt.write( image_name.split('/')[-2] + '/' + image_name.split('/')[-1] + '\n') all_boxes = [[] for _ in CLASSES] for cls in CLASSES: cls_ind = CLASSES.index(cls) cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] cls_scores = scores[:, cls_ind, np.newaxis] keep = np.where(cls_scores >= CONF_THRESH)[0] dets = np.hstack((cls_boxes, cls_scores)).astype(np.float32)[keep, :] keep = nms(dets) all_boxes[cls_ind] = dets[keep, :] boxes_this_image = [[]] + [all_boxes[j] for j in range(1, len(CLASSES))] # print results #logger.info('---class---') #logger.info('[[x1, x2, y1, y2, confidence]]') for ind, boxes in enumerate(boxes_this_image): #print ind if len(boxes) > 0: logger.info('---%s---' % CLASSES[ind]) logger.info('%s' % boxes) #print len(boxes) for iii in range(0, len(boxes)): result_txt.write( str(boxes[iii][0]) + ' ' + str(boxes[iii][1]) + ' ' + str(boxes[iii][2]) + ' ' + str(boxes[iii][3]) + ' ' + str(boxes[iii][4]) + ' ') result_txt.write('\n')
def demo_net(predictor, image_name, vis=False): """ generate data_batch -> im_detect -> post process :param predictor: Predictor :param image_name: image name :param vis: will save as a new image if not visualized :return: None """ assert os.path.exists(image_name), image_name + ' not found' im = cv2.imread(image_name) data_batch, data_names, im_scale = generate_batch(im) scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, im_scale) all_boxes = [[] for _ in CLASSES] for cls in CLASSES: cls_ind = CLASSES.index(cls) cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] cls_scores = scores[:, cls_ind, np.newaxis] print(cls_scores.shape) keep = np.where(cls_scores >= CONF_THRESH)[0] dets = np.hstack((cls_boxes, cls_scores)).astype(np.float32)[keep, :] keep = nms(dets) all_boxes[cls_ind] = dets[keep, :] boxes_this_image = [[]] + [all_boxes[j] for j in range(1, len(CLASSES))] # print results logger.info('---class---') logger.info('[[x1, x2, y1, y2, confidence]]') for ind, boxes in enumerate(boxes_this_image): if len(boxes) > 0: logger.info('---%s---' % CLASSES[ind]) logger.info('%s' % boxes) if vis: vis_all_detection(data_dict['data'].asnumpy(), boxes_this_image, CLASSES, im_scale) else: idx = [i for i, v in enumerate(image_name) if v == '/'][-1] result_file = "data/VOCdevkit/results/test/" + image_name[idx+1:] result_file = result_file.replace('.', '_result.') logger.info('results saved to %s' % result_file) im = draw_all_detection(data_dict['data'].asnumpy(), boxes_this_image, CLASSES, im_scale) cv2.imwrite(result_file, im)
def get_optimizer(args, arg_names, num_iter_per_epoch, iter_size): # decide learning rate base_lr = args.e2e_lr lr_factor = args.lr_factor lr_epoch = [float(ep) for ep in args.e2e_lr_step.split(',')] lr_epoch_diff = [ ep - args.begin_epoch for ep in lr_epoch if ep > args.begin_epoch ] lr = base_lr * (lr_factor**(len(lr_epoch) - len(lr_epoch_diff))) lr_iters = [ int(ep * num_iter_per_epoch / iter_size) for ep in lr_epoch_diff ] logger.info('lr %f lr_epoch_diff %s lr_iters %s' % (lr, lr_epoch_diff, lr_iters)) lr_scheduler = mx.lr_scheduler.MultiFactorScheduler(lr_iters, lr_factor) # optimizer lr_dict = dict() wd_dict = dict() param_idx2name = {} for i, arg_name in enumerate(arg_names): param_idx2name[i] = arg_name lr_dict[arg_name] = 1. if arg_name.endswith( '_bias' ): # for biases, set the learning rate to 2, weight_decay to 0 wd_dict[arg_name] = 0 lr_dict[arg_name] = 2 optimizer_params = { 'momentum': 0.9, 'wd': args.weight_decay, 'learning_rate': lr, 'lr_scheduler': lr_scheduler, # 'rescale_grad': (1.0 / batch_size), # rescale_grad is done in loss functions 'param_idx2name': param_idx2name, 'clip_gradient': 5 } opt = mx.optimizer.SGD(**optimizer_params) opt.set_wd_mult(wd_dict) opt.set_lr_mult(lr_dict) return opt
def validate(prefix, iter_no): logger.info('Validating ...') default.testing = True ctx = mx.gpu(int(default.val_gpu)) # ctx = mx.gpu(int(default.gpus.split(',')[0])) epoch = iter_no + 1 acc = test_rcnn(default.network, default.dataset, default.val_image_set, default.dataset_path, ctx, prefix, epoch, default.val_vis, default.val_shuffle, default.val_has_rpn, default.proposal, default.val_max_box, default.val_thresh) fn = '%s-%04d.params' % (prefix, epoch) fn_to_del = None if len(default.accs.keys()) == 0: default.best_model = fn default.best_acc = acc default.best_epoch = epoch else: if acc > default.best_acc: fn_to_del = default.best_model default.best_model = fn default.best_acc = acc default.best_epoch = epoch else: fn_to_del = fn default.accs[str(epoch)] = acc epochs = np.sort([int(a) for a in default.accs.keys()]).tolist() acc = 0 for e in epochs: print 'Iter %s: %.4f' % (e, default.accs[str(e)]) acc = default.accs[str(e)] sys.stdout.flush() if default.keep_best_model and fn_to_del: os.remove(fn_to_del) print fn_to_del, 'deleted to keep only the best model' sys.stdout.flush() default.testing = False return acc
def demo_net(predictor, image_name, vis=False): """ generate data_batch -> im_detect -> post process :param predictor: Predictor :param image_name: image name :param vis: will save as a new image if not visualized :return: None """ assert os.path.exists(image_name), image_name + ' not found' im = cv2.imread(image_name) data_batch, data_names, im_scale = generate_batch(im) scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, im_scale) all_boxes = [[] for _ in CLASSES] for cls in CLASSES: cls_ind = CLASSES.index(cls) cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] cls_scores = scores[:, cls_ind, np.newaxis] keep = np.where(cls_scores >= CONF_THRESH)[0] dets = np.hstack((cls_boxes, cls_scores)).astype(np.float32)[keep, :] keep = nms(dets) all_boxes[cls_ind] = dets[keep, :] boxes_this_image = [[]] + [all_boxes[j] for j in range(1, len(CLASSES))] # print results logger.info('---class---') logger.info('[[x1, x2, y1, y2, confidence]]') for ind, boxes in enumerate(boxes_this_image): if len(boxes) > 0: logger.info('---%s---' % CLASSES[ind]) logger.info('%s' % boxes) if vis: vis_all_detection(data_dict['data'].asnumpy(), boxes_this_image, CLASSES, im_scale) else: result_file = image_name.replace('.', '_result.') logger.info('results saved to %s' % result_file) im = draw_all_detection(data_dict['data'].asnumpy(), boxes_this_image, CLASSES, im_scale) cv2.imwrite(result_file, im)
def main(): args = parse_args() ctx = mx.gpu(args.gpu) symbol = get_vgg_test(num_classes=config.NUM_CLASSES, num_anchors=config.NUM_ANCHORS) predictor = get_net(symbol, args.prefix, args.epoch, ctx) if args.image: # single test image demo_net(predictor, args.image, args.vis) else: # a image dir for test # import pdb img_list = os.listdir(args.dir) num = len(img_list) for line in img_list: img_path = os.path.join(args.dir, line) if os.path.isfile(img_path) and os.path.splitext(img_path)[-1] in [ '.jpg' ]: # pdb.set_trace() logger.info('%s' % num) demo_net(predictor, img_path, args.vis) num = num - 1
def load_checkpoint(prefix, epoch): """ Load model checkpoint from file. :param prefix: Prefix of model name. :param epoch: Epoch number of model we would like to load. :return: (arg_params, aux_params) arg_params : dict of str to NDArray Model parameter, dict of name to NDArray of net's weights. aux_params : dict of str to NDArray Model parameter, dict of name to NDArray of net's auxiliary states. """ fn = '%s-%04d.params' % (prefix, epoch) logger.info('loading parameters from %s', fn) save_dict = mx.nd.load(fn) arg_params = {} aux_params = {} for k, v in save_dict.items(): tp, name = k.split(':', 1) if tp == 'arg': arg_params[name] = v if tp == 'aux': aux_params[name] = v return arg_params, aux_params
def demo_net(predictor, image_name, vis=True): """ generate data_batch -> im_detect -> post process :param predictor: Predictor :param image_name: image name :param vis: will save as a new image if not visualized :return: None """ BOOL = 0 assert os.path.exists(image_name), image_name + ' not found' im = cv2.imread(image_name) data_batch, data_names, im_scale = generate_batch(im) scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, im_scale) all_boxes = [[] for _ in Global.CLASSES] for cls in Global.CLASSES: cls_ind = Global.CLASSES.index(cls) cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] cls_scores = scores[:, cls_ind, np.newaxis] keep = np.where(cls_scores >= Global.conf_thresh_value)[0] dets = np.hstack((cls_boxes, cls_scores)).astype(np.float32)[keep, :] keep = py_nms_wrapper(Global.nms_thresh_value)(dets) all_boxes[cls_ind] = dets[keep, :] boxes_this_image = [[]] + [ all_boxes[j] for j in range(1, len(Global.CLASSES)) ] for ind, boxes in enumerate(boxes_this_image): if len(boxes) > 0: BOOL = 1 logger.info('---%s---' % Global.CLASSES[ind]) logger.info('%s' % boxes) result_file = image_name.replace(str(Global.open_img_dir), str(Global.save_path)) print result_file logger.info('results saved to %s' % result_file) im, CLASS, SCORE = draw_all_detection(data_dict['data'].asnumpy(), boxes_this_image, Global.CLASSES, im_scale) cv2.imwrite(result_file, im) Global.PICTURE_INFO[0].append(result_file) Global.PICTURE_INFO[1].append(CLASS) Global.PICTURE_INFO[2].append(SCORE) return CLASS, SCORE, BOOL
def train_net(args, ctx, pretrained, epoch, prefix, begin_epoch, end_epoch, lr=0.001, lr_step='5'): # setup config #init_config() #print(config) # setup multi-gpu input_batch_size = config.TRAIN.BATCH_IMAGES * len(ctx) # print config logger.info(pprint.pformat(config)) # load dataset and prepare imdb for training image_sets = [iset for iset in args.image_set.split('+')] roidbs = [ load_gt_roidb(args.dataset, image_set, args.root_path, args.dataset_path, flip=not args.no_flip) for image_set in image_sets ] #roidb = merge_roidb(roidbs) #roidb = filter_roidb(roidb) roidb = roidbs[0] # load symbol #sym = eval('get_' + args.network + '_train')(num_classes=config.NUM_CLASSES, num_anchors=config.NUM_ANCHORS) #feat_sym = sym.get_internals()['rpn_cls_score_output'] #train_data = AnchorLoader(feat_sym, roidb, batch_size=input_batch_size, shuffle=not args.no_shuffle, # ctx=ctx, work_load_list=args.work_load_list, # feat_stride=config.RPN_FEAT_STRIDE, anchor_scales=config.ANCHOR_SCALES, # anchor_ratios=config.ANCHOR_RATIOS, aspect_grouping=config.TRAIN.ASPECT_GROUPING) # load and initialize params sym = None if len(pretrained) == 0: arg_params = {} aux_params = {} else: logger.info('loading %s,%d' % (pretrained, epoch)) sym, arg_params, aux_params = mx.model.load_checkpoint( pretrained, epoch) #arg_params, aux_params = load_param(pretrained, epoch, convert=True) #for k in ['rpn_conv_3x3', 'rpn_cls_score', 'rpn_bbox_pred', 'cls_score', 'bbox_pred']: # _k = k+"_weight" # if _k in arg_shape_dict: # v = 0.001 if _k.startswith('bbox_') else 0.01 # arg_params[_k] = mx.random.normal(0, v, shape=arg_shape_dict[_k]) # print('init %s with normal %.5f'%(_k,v)) # _k = k+"_bias" # if _k in arg_shape_dict: # arg_params[_k] = mx.nd.zeros(shape=arg_shape_dict[_k]) # print('init %s with zero'%(_k)) sym = eval('get_' + args.network + '_train')(sym) feat_sym = [] for stride in config.RPN_FEAT_STRIDE: feat_sym.append( sym.get_internals()['face_rpn_cls_score_stride%s_output' % stride]) train_data = CropLoader(feat_sym, roidb, batch_size=input_batch_size, shuffle=not args.no_shuffle, ctx=ctx, work_load_list=args.work_load_list) # infer max shape max_data_shape = [('data', (1, 3, max([v[1] for v in config.SCALES]), max([v[1] for v in config.SCALES])))] #max_data_shape = [('data', (1, 3, max([v[1] for v in config.SCALES]), max([v[1] for v in config.SCALES])))] max_data_shape, max_label_shape = train_data.infer_shape(max_data_shape) max_data_shape.append(('gt_boxes', (1, roidb[0]['max_num_boxes'], 5))) logger.info('providing maximum shape %s %s' % (max_data_shape, max_label_shape)) # infer shape data_shape_dict = dict(train_data.provide_data + train_data.provide_label) arg_shape, out_shape, aux_shape = sym.infer_shape(**data_shape_dict) arg_shape_dict = dict(zip(sym.list_arguments(), arg_shape)) out_shape_dict = dict(zip(sym.list_outputs(), out_shape)) aux_shape_dict = dict(zip(sym.list_auxiliary_states(), aux_shape)) logger.info('output shape %s' % pprint.pformat(out_shape_dict)) for k, v in arg_shape_dict.items(): if k.find('upsampling') >= 0: print('initializing upsampling_weight', k) arg_params[k] = mx.nd.zeros(shape=v) init = mx.init.Initializer() init._init_bilinear(k, arg_params[k]) #print(args[k]) # check parameter shapes #for k in sym.list_arguments(): # if k in data_shape_dict: # continue # assert k in arg_params, k + ' not initialized' # assert arg_params[k].shape == arg_shape_dict[k], \ # 'shape inconsistent for ' + k + ' inferred ' + str(arg_shape_dict[k]) + ' provided ' + str(arg_params[k].shape) #for k in sym.list_auxiliary_states(): # assert k in aux_params, k + ' not initialized' # assert aux_params[k].shape == aux_shape_dict[k], \ # 'shape inconsistent for ' + k + ' inferred ' + str(aux_shape_dict[k]) + ' provided ' + str(aux_params[k].shape) fixed_param_prefix = config.FIXED_PARAMS # create solver data_names = [k[0] for k in train_data.provide_data] label_names = [k[0] for k in train_data.provide_label] fixed_param_names = get_fixed_params(sym, fixed_param_prefix) print('fixed', fixed_param_names, file=sys.stderr) mod = Module(sym, data_names=data_names, label_names=label_names, logger=logger, context=ctx, work_load_list=args.work_load_list, fixed_param_names=fixed_param_names) # metric eval_metrics = mx.metric.CompositeEvalMetric() mid = 0 for m in range(len(config.RPN_FEAT_STRIDE)): stride = config.RPN_FEAT_STRIDE[m] #mid = m*MSTEP _metric = metric.RPNAccMetric(pred_idx=mid, label_idx=mid + 1, name='RPNAcc_s%s' % stride) eval_metrics.add(_metric) mid += 2 #_metric = metric.RPNLogLossMetric(pred_idx=mid, label_idx=mid+1) #eval_metrics.add(_metric) _metric = metric.RPNL1LossMetric(loss_idx=mid, weight_idx=mid + 1, name='RPNL1Loss_s%s' % stride) eval_metrics.add(_metric) mid += 2 if config.FACE_LANDMARK: _metric = metric.RPNL1LossMetric(loss_idx=mid, weight_idx=mid + 1, name='RPNLandMarkL1Loss_s%s' % stride) eval_metrics.add(_metric) mid += 2 if config.HEAD_BOX: _metric = metric.RPNAccMetric(pred_idx=mid, label_idx=mid + 1, name='RPNAcc_head_s%s' % stride) eval_metrics.add(_metric) mid += 2 #_metric = metric.RPNLogLossMetric(pred_idx=mid, label_idx=mid+1) #eval_metrics.add(_metric) _metric = metric.RPNL1LossMetric(loss_idx=mid, weight_idx=mid + 1, name='RPNL1Loss_head_s%s' % stride) eval_metrics.add(_metric) mid += 2 # callback #means = np.tile(np.array(config.TRAIN.BBOX_MEANS), config.NUM_CLASSES) #stds = np.tile(np.array(config.TRAIN.BBOX_STDS), config.NUM_CLASSES) #epoch_end_callback = callback.do_checkpoint(prefix) epoch_end_callback = None # decide learning rate #base_lr = lr #lr_factor = 0.1 #lr = base_lr * (lr_factor ** (len(lr_epoch) - len(lr_epoch_diff))) lr_epoch = [int(epoch) for epoch in lr_step.split(',')] lr_epoch_diff = [ epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch ] lr_iters = [ int(epoch * len(roidb) / input_batch_size) for epoch in lr_epoch_diff ] lr_steps = [] if len(lr_iters) == 5: factors = [0.5, 0.5, 0.4, 0.1, 0.1] for i in range(5): lr_steps.append((lr_iters[i], factors[i])) elif len(lr_iters) == 8: #warmup for li in lr_iters[0:5]: lr_steps.append((li, 1.5849)) for li in lr_iters[5:]: lr_steps.append((li, 0.1)) else: for li in lr_iters: lr_steps.append((li, 0.1)) #lr_steps = [ (20,0.1), (40, 0.1) ] #XXX end_epoch = 10000 logger.info('lr %f lr_epoch_diff %s lr_steps %s' % (lr, lr_epoch_diff, lr_steps)) # optimizer opt = optimizer.SGD(learning_rate=lr, momentum=0.9, wd=0.0005, rescale_grad=1.0 / len(ctx), clip_gradient=None) initializer = mx.init.Xavier() #initializer = mx.init.Xavier(rnd_type='gaussian', factor_type="out", magnitude=2) #resnet style train_data = mx.io.PrefetchingIter(train_data) _cb = mx.callback.Speedometer(train_data.batch_size, frequent=args.frequent, auto_reset=False) global_step = [0] def save_model(epoch): arg, aux = mod.get_params() all_layers = mod.symbol.get_internals() outs = [] for stride in config.RPN_FEAT_STRIDE: num_anchors = config.RPN_ANCHOR_CFG[str(stride)]['NUM_ANCHORS'] _name = 'face_rpn_cls_score_stride%d_output' % stride rpn_cls_score = all_layers[_name] # prepare rpn data rpn_cls_score_reshape = mx.symbol.Reshape( data=rpn_cls_score, shape=(0, 2, -1, 0), name="face_rpn_cls_score_reshape_stride%d" % stride) rpn_cls_prob = mx.symbol.SoftmaxActivation( data=rpn_cls_score_reshape, mode="channel", name="face_rpn_cls_prob_stride%d" % stride) rpn_cls_prob_reshape = mx.symbol.Reshape( data=rpn_cls_prob, shape=(0, 2 * num_anchors, -1, 0), name='face_rpn_cls_prob_reshape_stride%d' % stride) _name = 'face_rpn_bbox_pred_stride%d_output' % stride rpn_bbox_pred = all_layers[_name] outs.append(rpn_cls_prob_reshape) outs.append(rpn_bbox_pred) if config.FACE_LANDMARK: _name = 'face_rpn_landmark_pred_stride%d_output' % stride rpn_landmark_pred = all_layers[_name] outs.append(rpn_landmark_pred) _sym = mx.sym.Group(outs) mx.model.save_checkpoint(prefix, epoch, _sym, arg, aux) def _batch_callback(param): #global global_step _cb(param) global_step[0] += 1 mbatch = global_step[0] for step in lr_steps: if mbatch == step[0]: opt.lr *= step[1] print('lr change to', opt.lr, ' in batch', mbatch, file=sys.stderr) break if mbatch == lr_steps[-1][0]: print('saving final checkpoint', mbatch, file=sys.stderr) save_model(0) #arg, aux = mod.get_params() #mx.model.save_checkpoint(prefix, 99, mod.symbol, arg, aux) sys.exit(0) if args.checkpoint is not None: _, arg_params, aux_params = mx.model.load_checkpoint( args.checkpoint, 0) # train mod.fit(train_data, eval_metric=eval_metrics, epoch_end_callback=checkpoint_callback('model/testR50'), batch_end_callback=_batch_callback, kvstore=args.kvstore, optimizer=opt, initializer=initializer, arg_params=arg_params, aux_params=aux_params, begin_epoch=begin_epoch, num_epoch=end_epoch)
def pred_eval(predictor, test_data, imdb, vis=False, thresh=1e-3): """ wrapper for calculating offline validation for faster data analysis in this example, all threshold are set by hand :param predictor: Predictor :param test_data: data iterator, must be non-shuffle :param imdb: image database :param vis: controls visualization :param thresh: valid detection threshold :return: """ assert vis or not test_data.shuffle data_names = [k[0] for k in test_data.provide_data] nms = py_nms_wrapper(config.TEST.NMS) # limit detections to max_per_image over all classes max_per_image = -1 num_images = imdb.num_images # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] i = 0 t = time.time() for im_info, data_batch in test_data: t1 = time.time() - t t = time.time() scale = im_info[0, 2] scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, scale) t2 = time.time() - t t = time.time() for j in range(1, imdb.num_classes): indexes = np.where(scores[:, j] > thresh)[0] cls_scores = scores[indexes, j, np.newaxis] cls_boxes = boxes[indexes, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores)) keep = nms(cls_dets) all_boxes[j][i] = cls_dets[keep, :] if max_per_image > 0: image_scores = np.hstack( [all_boxes[j][i][:, -1] for j in range(1, imdb.num_classes)]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in range(1, imdb.num_classes): keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] if vis: boxes_this_image = [[]] + [ all_boxes[j][i] for j in range(1, imdb.num_classes) ] vis_all_detection(data_dict['data'].asnumpy(), boxes_this_image, imdb.classes, scale) t3 = time.time() - t t = time.time() logger.info('testing %d/%d data %.4fs net %.4fs post %.4fs' % (i, imdb.num_images, t1, t2, t3)) i += 1 det_file = os.path.join(imdb.cache_path, imdb.name + '_detections.pkl') with open(det_file, 'wb') as f: cPickle.dump(all_boxes, f, protocol=cPickle.HIGHEST_PROTOCOL) imdb.evaluate_detections(all_boxes)
def generate_proposals(predictor, test_data, imdb, vis=False, thresh=0.): """ Generate detections results using RPN. :param predictor: Predictor :param test_data: data iterator, must be non-shuffled :param imdb: image database :param vis: controls visualization :param thresh: thresh for valid detections :return: list of detected boxes """ assert vis or not test_data.shuffle data_names = [k[0] for k in test_data.provide_data] i = 0 t = time.time() imdb_boxes = list() original_boxes = list() for im_info, data_batch in test_data: t1 = time.time() - t t = time.time() scale = im_info[0, 2] scores, boxes, data_dict = im_proposal(predictor, data_batch, data_names, scale) t2 = time.time() - t t = time.time() # assemble proposals dets = np.hstack((boxes, scores)) original_boxes.append(dets) # filter proposals keep = np.where(dets[:, 4:] > thresh)[0] dets = dets[keep, :] imdb_boxes.append(dets) if vis: vis_all_detection(data_dict['data'].asnumpy(), [dets], ['obj'], scale) logger.info('generating %d/%d ' % (i + 1, imdb.num_images) + 'proposal %d ' % (dets.shape[0]) + 'data %.4fs net %.4fs' % (t1, t2)) i += 1 assert len(imdb_boxes) == imdb.num_images, 'calculations not complete' # save results rpn_folder = os.path.join(imdb.root_path, 'rpn_data') if not os.path.exists(rpn_folder): os.mkdir(rpn_folder) rpn_file = os.path.join(rpn_folder, imdb.name + '_rpn.pkl') with open(rpn_file, 'wb') as f: cPickle.dump(imdb_boxes, f, cPickle.HIGHEST_PROTOCOL) if thresh > 0: full_rpn_file = os.path.join(rpn_folder, imdb.name + '_full_rpn.pkl') with open(full_rpn_file, 'wb') as f: cPickle.dump(original_boxes, f, cPickle.HIGHEST_PROTOCOL) logger.info('wrote rpn proposals to %s' % rpn_file) return imdb_boxes
def alternate_train(args, ctx, pretrained, epoch, rpn_epoch, rpn_lr, rpn_lr_step, rcnn_epoch, rcnn_lr, rcnn_lr_step): # basic config begin_epoch = 0 config.TRAIN.BG_THRESH_LO = 0.0 logger.info('########## TRAIN RPN WITH IMAGENET INIT') train_rpn(args.network, args.dataset, args.image_set, args.root_path, args.dataset_path, args.frequent, args.kvstore, args.work_load_list, args.no_flip, args.no_shuffle, args.resume, ctx, pretrained, epoch, 'model/rpn1', begin_epoch, rpn_epoch, train_shared=False, lr=rpn_lr, lr_step=rpn_lr_step) logger.info('########## GENERATE RPN DETECTION') image_sets = [iset for iset in args.image_set.split('+')] for image_set in image_sets: test_rpn(args.network, args.dataset, image_set, args.root_path, args.dataset_path, ctx[0], 'model/rpn1', rpn_epoch, vis=False, shuffle=False, thresh=0) logger.info('########## TRAIN RCNN WITH IMAGENET INIT AND RPN DETECTION') train_rcnn(args.network, args.dataset, args.image_set, args.root_path, args.dataset_path, args.frequent, args.kvstore, args.work_load_list, args.no_flip, args.no_shuffle, args.resume, ctx, pretrained, epoch, 'model/rcnn1', begin_epoch, rcnn_epoch, train_shared=False, lr=rcnn_lr, lr_step=rcnn_lr_step, proposal='rpn') logger.info('########## TRAIN RPN WITH RCNN INIT') train_rpn(args.network, args.dataset, args.image_set, args.root_path, args.dataset_path, args.frequent, args.kvstore, args.work_load_list, args.no_flip, args.no_shuffle, args.resume, ctx, 'model/rcnn1', rcnn_epoch, 'model/rpn2', begin_epoch, rpn_epoch, train_shared=True, lr=rpn_lr, lr_step=rpn_lr_step) logger.info('########## GENERATE RPN DETECTION') image_sets = [iset for iset in args.image_set.split('+')] for image_set in image_sets: test_rpn(args.network, args.dataset, image_set, args.root_path, args.dataset_path, ctx[0], 'model/rpn2', rpn_epoch, vis=False, shuffle=False, thresh=0) logger.info('########## COMBINE RPN2 WITH RCNN1') combine_model('model/rpn2', rpn_epoch, 'model/rcnn1', rcnn_epoch, 'model/rcnn2', 0) logger.info('########## TRAIN RCNN WITH RPN INIT AND DETECTION') train_rcnn(args.network, args.dataset, args.image_set, args.root_path, args.dataset_path, args.frequent, args.kvstore, args.work_load_list, args.no_flip, args.no_shuffle, args.resume, ctx, 'model/rcnn2', 0, 'model/rcnn2', begin_epoch, rcnn_epoch, train_shared=True, lr=rcnn_lr, lr_step=rcnn_lr_step, proposal='rpn') logger.info('########## COMBINE RPN2 WITH RCNN2') combine_model('model/rpn2', rpn_epoch, 'model/rcnn2', rcnn_epoch, 'model/final', 0)
def alternate_train(args, ctx, pretrained, epoch, rpn_epoch, rpn_lr, rpn_lr_step, rcnn_epoch, rcnn_lr, rcnn_lr_step): # basic config begin_epoch = 0 config.TRAIN.BG_THRESH_LO = 0.0 logger.info('########## TRAIN RPN WITH IMAGENET INIT') """ train_rpn(args.network, args.dataset, args.image_set, args.root_path, args.dataset_path, args.frequent, args.kvstore, args.work_load_list, args.no_flip, args.no_shuffle, args.resume, ctx, pretrained, epoch, 'model/rpn1', begin_epoch, rpn_epoch, train_shared=False, lr=rpn_lr, lr_step=rpn_lr_step) """ logger.info('########## GENERATE RPN DETECTION') image_sets = [iset for iset in args.image_set.split('+')] for image_set in image_sets: test_rpn(args.network, args.dataset, image_set, args.root_path, args.dataset_path, ctx[0], 'model/blouse', 0, vis=True, shuffle=False, thresh=0.97) logger.info('########## TRAIN RCNN WITH IMAGENET INIT AND RPN DETECTION') train_rcnn(args.network, args.dataset, args.image_set, args.root_path, args.dataset_path, args.frequent, args.kvstore, args.work_load_list, args.no_flip, args.no_shuffle, args.resume, ctx, pretrained, epoch, 'model/rcnn1', begin_epoch, rcnn_epoch, train_shared=False, lr=rcnn_lr, lr_step=rcnn_lr_step, proposal='rpn') logger.info('########## TRAIN RPN WITH RCNN INIT') train_rpn(args.network, args.dataset, args.image_set, args.root_path, args.dataset_path, args.frequent, args.kvstore, args.work_load_list, args.no_flip, args.no_shuffle, args.resume, ctx, 'model/rcnn1', rcnn_epoch, 'model/rpn2', begin_epoch, rpn_epoch, train_shared=True, lr=rpn_lr, lr_step=rpn_lr_step) logger.info('########## GENERATE RPN DETECTION') image_sets = [iset for iset in args.image_set.split('+')] for image_set in image_sets: test_rpn(args.network, args.dataset, image_set, args.root_path, args.dataset_path, ctx[0], 'model/rpn2', rpn_epoch, vis=False, shuffle=False, thresh=0) logger.info('########## COMBINE RPN2 WITH RCNN1') combine_model('model/rpn2', rpn_epoch, 'model/rcnn1', rcnn_epoch, 'model/rcnn2', 0) logger.info('########## TRAIN RCNN WITH RPN INIT AND DETECTION') train_rcnn(args.network, args.dataset, args.image_set, args.root_path, args.dataset_path, args.frequent, args.kvstore, args.work_load_list, args.no_flip, args.no_shuffle, args.resume, ctx, 'model/rcnn2', 0, 'model/rcnn2', begin_epoch, rcnn_epoch, train_shared=True, lr=rcnn_lr, lr_step=rcnn_lr_step, proposal='rpn') logger.info('########## COMBINE RPN2 WITH RCNN2') combine_model('model/rpn2', rpn_epoch, 'model/rcnn2', rcnn_epoch, 'model/final', 0)
def test_proposals(predictor, test_data, imdb, roidb, vis=False): """ Test detections results using RPN. :param predictor: Predictor :param test_data: data iterator, must be non-shuffled :param imdb: image database :param roidb: roidb :param vis: controls visualization :return: recall, mAP """ assert vis or not test_data.shuffle data_names = [k[0] for k in test_data.provide_data] #bbox_file = os.path.join(rpn_folder, imdb.name + '_bbox.txt') #bbox_f = open(bbox_file, 'w') i = 0 t = time.time() output_folder = os.path.join(imdb.root_path, 'output') if not os.path.exists(output_folder): os.mkdir(output_folder) imdb_boxes = list() original_boxes = list() gt_overlaps = np.zeros(0) overall = [0.0, 0.0] gt_max = np.array((0.0, 0.0)) num_pos = 0 #apply scale, for SSH #_, roidb = image.get_image(roidb) for im_info, data_batch in test_data: t1 = time.time() - t t = time.time() oscale = im_info[0, 2] #print('scale', scale, file=sys.stderr) scale = 1.0 #fix scale=1.0 for SSH face detector scores, boxes, data_dict = im_proposal(predictor, data_batch, data_names, scale) #print(scores.shape, boxes.shape, file=sys.stderr) t2 = time.time() - t t = time.time() # assemble proposals dets = np.hstack((boxes, scores)) original_boxes.append(dets) # filter proposals keep = np.where(dets[:, 4:] > config.TEST.SCORE_THRESH)[0] dets = dets[keep, :] imdb_boxes.append(dets) logger.info('generating %d/%d ' % (i + 1, imdb.num_images) + 'proposal %d ' % (dets.shape[0]) + 'data %.4fs net %.4fs' % (t1, t2)) #if dets.shape[0]==0: # continue if vis: vis_all_detection(data_dict['data'].asnumpy(), [dets], ['obj'], scale) boxes = dets #max_gt_overlaps = roidb[i]['gt_overlaps'].max(axis=1) #gt_inds = np.where((roidb[i]['gt_classes'] > 0) & (max_gt_overlaps == 1))[0] #gt_boxes = roidb[i]['boxes'][gt_inds, :] gt_boxes = roidb[i]['boxes'].copy( ) * oscale # as roidb is the original one, need to scale GT for SSH gt_areas = (gt_boxes[:, 2] - gt_boxes[:, 0] + 1) * (gt_boxes[:, 3] - gt_boxes[:, 1] + 1) num_pos += gt_boxes.shape[0] overlaps = bbox_overlaps(boxes.astype(np.float), gt_boxes.astype(np.float)) #print(im_info, gt_boxes.shape, boxes.shape, overlaps.shape, file=sys.stderr) _gt_overlaps = np.zeros((gt_boxes.shape[0])) # choose whatever is smaller to iterate #for j in range(gt_boxes.shape[0]): # print('gt %d,%d,%d,%d'% (gt_boxes[j][0], gt_boxes[j][1], gt_boxes[j][2]-gt_boxes[j][0], gt_boxes[j][3]-gt_boxes[j][1]), file=sys.stderr) # gt_max = np.maximum( gt_max, np.array( (gt_boxes[j][2], gt_boxes[j][3]) ) ) #print('gt max', gt_max, file=sys.stderr) #for j in range(boxes.shape[0]): # print('anchor_box %.2f,%.2f,%.2f,%.2f'% (boxes[j][0], boxes[j][1], boxes[j][2]-boxes[j][0], boxes[j][3]-boxes[j][1]), file=sys.stderr) #rounds = min(boxes.shape[0], gt_boxes.shape[0]) #for j in range(rounds): # # find which proposal maximally covers each gt box # argmax_overlaps = overlaps.argmax(axis=0) # print(j, 'argmax_overlaps', argmax_overlaps, file=sys.stderr) # # get the IoU amount of coverage for each gt box # max_overlaps = overlaps.max(axis=0) # print(j, 'max_overlaps', max_overlaps, file=sys.stderr) # # find which gt box is covered by most IoU # gt_ind = max_overlaps.argmax() # gt_ovr = max_overlaps.max() # assert (gt_ovr >= 0), '%s\n%s\n%s' % (boxes, gt_boxes, overlaps) # # find the proposal box that covers the best covered gt box # box_ind = argmax_overlaps[gt_ind] # print('max box', gt_ind, box_ind, (boxes[box_ind][0], boxes[box_ind][1], boxes[box_ind][2]-boxes[box_ind][0], boxes[box_ind][3]-boxes[box_ind][1], boxes[box_ind][4]), file=sys.stderr) # # record the IoU coverage of this gt box # _gt_overlaps[j] = overlaps[box_ind, gt_ind] # assert (_gt_overlaps[j] == gt_ovr) # # mark the proposal box and the gt box as used # overlaps[box_ind, :] = -1 # overlaps[:, gt_ind] = -1 if boxes.shape[0] > 0: _gt_overlaps = overlaps.max(axis=0) #print('max_overlaps', _gt_overlaps, file=sys.stderr) for j in range(len(_gt_overlaps)): if _gt_overlaps[j] > config.TEST.IOU_THRESH: continue print(j, 'failed', gt_boxes[j], 'max_overlap:', _gt_overlaps[j], file=sys.stderr) #_idx = np.where(overlaps[:,j]>0.4)[0] #print(j, _idx, file=sys.stderr) #print(overlaps[_idx,j], file=sys.stderr) #for __idx in _idx: # print(gt_boxes[j], boxes[__idx], overlaps[__idx,j], IOU(gt_boxes[j], boxes[__idx,0:4]), file=sys.stderr) # append recorded IoU coverage level found = (_gt_overlaps > config.TEST.IOU_THRESH).sum() _recall = found / float(gt_boxes.shape[0]) print('recall', _recall, gt_boxes.shape[0], boxes.shape[0], gt_areas, file=sys.stderr) overall[0] += found overall[1] += gt_boxes.shape[0] #gt_overlaps = np.hstack((gt_overlaps, _gt_overlaps)) #_recall = (gt_overlaps >= threshold).sum() / float(num_pos) _recall = float(overall[0]) / overall[1] print('recall_all', _recall, file=sys.stderr) boxes[:, 0:4] /= oscale _vec = roidb[i]['image'].split('/') out_dir = os.path.join(output_folder, _vec[-2]) if not os.path.exists(out_dir): os.mkdir(out_dir) out_file = os.path.join(out_dir, _vec[-1].replace('jpg', 'txt')) with open(out_file, 'w') as f: name = '/'.join(roidb[i]['image'].split('/')[-2:]) f.write("%s\n" % (name)) f.write("%d\n" % (boxes.shape[0])) for b in range(boxes.shape[0]): box = boxes[b] f.write( "%d %d %d %d %g \n" % (box[0], box[1], box[2] - box[0], box[3] - box[1], box[4])) i += 1 #bbox_f.close() return gt_overlaps = np.sort(gt_overlaps) recalls = np.zeros_like(thresholds) # compute recall for each IoU threshold for i, t in enumerate(thresholds): recalls[i] = (gt_overlaps >= t).sum() / float(num_pos) ar = recalls.mean() # print results print('average recall for {}: {:.3f}'.format(area_name, ar)) for threshold, recall in zip(thresholds, recalls): print('recall @{:.2f}: {:.3f}'.format(threshold, recall)) assert len(imdb_boxes) == imdb.num_images, 'calculations not complete' # save results rpn_file = os.path.join(rpn_folder, imdb.name + '_rpn.pkl') with open(rpn_file, 'wb') as f: pickle.dump(imdb_boxes, f, pickle.HIGHEST_PROTOCOL) logger.info('wrote rpn proposals to %s' % rpn_file) return imdb_boxes
def demo_net(predictor, image_name, vis=False): """ generate data_batch -> im_detect -> post process :param predictor: Predictor :param image_name: image name :param vis: will save as a new image if not visualized :return: None """ for image_name in listfile: ll = [] ll.append(image_name.split('/')[-1] + ',') assert os.path.exists(image_name), image_name + ' not found' im = cv2.imread(image_name) width, height, _ = im.shape data_batch, data_names, im_scale = generate_batch(im) scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, im_scale) all_boxes = [[] for _ in CLASSES] for cls in CLASSES: cls_ind = CLASSES.index(cls) cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] cls_scores = scores[:, cls_ind, np.newaxis] keep = np.where(cls_scores >= CONF_THRESH)[0] dets = np.hstack( (cls_boxes, cls_scores)).astype(np.float32)[keep, :] keep = nms(dets) all_boxes[cls_ind] = dets[keep, :] boxes_this_image = [[] ] + [all_boxes[j] for j in range(1, len(CLASSES))] if len(CLASSES) == 0: ll.extend([2, 0, 183, 272, 517]) # print results logger.info('---class---') logger.info('[[x1, x2, y1, y2, confidence]]') for ind, boxes in enumerate(boxes_this_image): if len(boxes) > 0: logger.info('---%s---' % CLASSES[ind]) logger.info('%s' % boxes) print(boxes) item = boxes[0] xmin = int(round(item[0])) ymin = int(round(item[1])) xmax = int(round(item[2])) ymax = int(round(item[3])) if xmin < 0: xmin = 0 if ymin < 0: ymin = 0 if xmax > width: xmax = width if ymax > height: ymax = height ll.extend([bq[CLASSES[ind]], xmin, ymin, xmax, ymax]) gl = [ str(i) for i in [ image_name.split('/')[-1], bq[CLASSES[ind]], item[4], xmin, ymin, xmax, ymax ] ] file2 = 'fasterrcnnrh.txt' with open(file2, 'a+') as f1: f1.write(','.join(gl)) f1.write('\n') print(ll) ll = [str(i) for i in ll] file = 'fasterrcnn.txt' with open(file, 'a+') as f: f.write(' '.join(ll)) f.write('\n')
def train_net(args, ctx, pretrained, epoch, prefix, begin_epoch, end_epoch, lr=0.001, lr_step='5'): # setup config #init_config() #print(config) # setup multi-gpu input_batch_size = config.TRAIN.BATCH_IMAGES * len(ctx) # print config logger.info(pprint.pformat(config)) # load dataset and prepare imdb for training image_sets = [iset for iset in args.image_set.split('+')] roidbs = [ load_gt_roidb(args.dataset, image_set, args.root_path, args.dataset_path, flip=not args.no_flip) for image_set in image_sets ] roidb = merge_roidb(roidbs) roidb = filter_roidb(roidb) # load symbol #sym = eval('get_' + args.network + '_train')(num_classes=config.NUM_CLASSES, num_anchors=config.NUM_ANCHORS) #feat_sym = sym.get_internals()['rpn_cls_score_output'] #train_data = AnchorLoader(feat_sym, roidb, batch_size=input_batch_size, shuffle=not args.no_shuffle, # ctx=ctx, work_load_list=args.work_load_list, # feat_stride=config.RPN_FEAT_STRIDE, anchor_scales=config.ANCHOR_SCALES, # anchor_ratios=config.ANCHOR_RATIOS, aspect_grouping=config.TRAIN.ASPECT_GROUPING) sym = eval('get_' + args.network + '_train')() #print(sym.get_internals()) feat_sym = [] for stride in config.RPN_FEAT_STRIDE: feat_sym.append(sym.get_internals()['rpn_cls_score_stride%s_output' % stride]) #train_data = AnchorLoaderFPN(feat_sym, roidb, batch_size=input_batch_size, shuffle=not args.no_shuffle, # ctx=ctx, work_load_list=args.work_load_list) train_data = CropLoader(feat_sym, roidb, batch_size=input_batch_size, shuffle=not args.no_shuffle, ctx=ctx, work_load_list=args.work_load_list) # infer max shape max_data_shape = [('data', (1, 3, max([v[1] for v in config.SCALES]), max([v[1] for v in config.SCALES])))] #max_data_shape = [('data', (1, 3, max([v[1] for v in config.SCALES]), max([v[1] for v in config.SCALES])))] max_data_shape, max_label_shape = train_data.infer_shape(max_data_shape) max_data_shape.append(('gt_boxes', (1, roidb[0]['max_num_boxes'], 5))) logger.info('providing maximum shape %s %s' % (max_data_shape, max_label_shape)) # infer shape data_shape_dict = dict(train_data.provide_data + train_data.provide_label) arg_shape, out_shape, aux_shape = sym.infer_shape(**data_shape_dict) arg_shape_dict = dict(zip(sym.list_arguments(), arg_shape)) out_shape_dict = dict(zip(sym.list_outputs(), out_shape)) aux_shape_dict = dict(zip(sym.list_auxiliary_states(), aux_shape)) logger.info('output shape %s' % pprint.pformat(out_shape_dict)) # load and initialize params if args.resume: arg_params, aux_params = load_param(prefix, begin_epoch, convert=True) else: arg_params, aux_params = load_param(pretrained, epoch, convert=True) #for k in ['rpn_conv_3x3', 'rpn_cls_score', 'rpn_bbox_pred', 'cls_score', 'bbox_pred']: # _k = k+"_weight" # if _k in arg_shape_dict: # v = 0.001 if _k.startswith('bbox_') else 0.01 # arg_params[_k] = mx.random.normal(0, v, shape=arg_shape_dict[_k]) # print('init %s with normal %.5f'%(_k,v)) # _k = k+"_bias" # if _k in arg_shape_dict: # arg_params[_k] = mx.nd.zeros(shape=arg_shape_dict[_k]) # print('init %s with zero'%(_k)) for k, v in arg_shape_dict.iteritems(): if k.find('upsampling') >= 0: print('initializing upsampling_weight', k) arg_params[k] = mx.nd.zeros(shape=v) init = mx.init.Initializer() init._init_bilinear(k, arg_params[k]) #print(args[k]) # check parameter shapes #for k in sym.list_arguments(): # if k in data_shape_dict: # continue # assert k in arg_params, k + ' not initialized' # assert arg_params[k].shape == arg_shape_dict[k], \ # 'shape inconsistent for ' + k + ' inferred ' + str(arg_shape_dict[k]) + ' provided ' + str(arg_params[k].shape) #for k in sym.list_auxiliary_states(): # assert k in aux_params, k + ' not initialized' # assert aux_params[k].shape == aux_shape_dict[k], \ # 'shape inconsistent for ' + k + ' inferred ' + str(aux_shape_dict[k]) + ' provided ' + str(aux_params[k].shape) # create solver fixed_param_prefix = config.FIXED_PARAMS data_names = [k[0] for k in train_data.provide_data] label_names = [k[0] for k in train_data.provide_label] #mod = MutableModule(sym, data_names=data_names, label_names=label_names, # logger=logger, context=ctx, work_load_list=args.work_load_list, # max_data_shapes=max_data_shape, max_label_shapes=max_label_shape, # fixed_param_prefix=fixed_param_prefix) fixed_param_names = get_fixed_params(sym, fixed_param_prefix) print('fixed', fixed_param_names, file=sys.stderr) mod = Module(sym, data_names=data_names, label_names=label_names, logger=logger, context=ctx, work_load_list=args.work_load_list, fixed_param_names=fixed_param_names) # decide training params # metric eval_metrics = mx.metric.CompositeEvalMetric() #if len(sym.list_outputs())>4: # metric_names = ['RPNAccMetric', 'RPNLogLossMetric', 'RPNL1LossMetric', 'RCNNAccMetric', 'RCNNLogLossMetric', 'RCNNL1LossMetric'] #else:#train rpn only #print('sym', sym.list_outputs()) #metric_names = ['RPNAccMetric', 'RPNLogLossMetric', 'RPNL1LossMetric'] mids = [0, 4, 8] for mid in mids: _metric = metric.RPNAccMetric(pred_idx=mid, label_idx=mid + 1) eval_metrics.add(_metric) #_metric = metric.RPNLogLossMetric(pred_idx=mid, label_idx=mid+1) #eval_metrics.add(_metric) _metric = metric.RPNL1LossMetric(loss_idx=mid + 2, weight_idx=mid + 3) eval_metrics.add(_metric) #rpn_eval_metric = metric.RPNAccMetric() #rpn_cls_metric = metric.RPNLogLossMetric() #rpn_bbox_metric = metric.RPNL1LossMetric() #eval_metric = metric.RCNNAccMetric() #cls_metric = metric.RCNNLogLossMetric() #bbox_metric = metric.RCNNL1LossMetric() #for child_metric in [rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric, cls_metric, bbox_metric]: # eval_metrics.add(child_metric) # callback means = np.tile(np.array(config.TRAIN.BBOX_MEANS), config.NUM_CLASSES) stds = np.tile(np.array(config.TRAIN.BBOX_STDS), config.NUM_CLASSES) #epoch_end_callback = callback.do_checkpoint(prefix, means, stds) epoch_end_callback = None # decide learning rate base_lr = lr lr_factor = 0.1 lr_epoch = [int(epoch) for epoch in lr_step.split(',')] lr_epoch_diff = [ epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch ] lr = base_lr * (lr_factor**(len(lr_epoch) - len(lr_epoch_diff))) lr_iters = [ int(epoch * len(roidb) / input_batch_size) for epoch in lr_epoch_diff ] #lr_iters = [36000,42000] #TODO #lr_iters = [40000,50000,60000] #TODO #lr_iters = [40,50,60] #TODO end_epoch = 10000 #lr_iters = [4,8] #TODO logger.info('lr %f lr_epoch_diff %s lr_iters %s' % (lr, lr_epoch_diff, lr_iters)) #lr_scheduler = mx.lr_scheduler.MultiFactorScheduler(lr_iters, lr_factor) # optimizer opt = optimizer.SGD(learning_rate=lr, momentum=0.9, wd=0.0005, rescale_grad=1.0 / len(ctx), clip_gradient=None) initializer = mx.init.Xavier() #initializer = mx.init.Xavier(rnd_type='gaussian', factor_type="out", magnitude=2) #resnet style if len(ctx) > 1: train_data = mx.io.PrefetchingIter(train_data) _cb = mx.callback.Speedometer(train_data.batch_size, frequent=args.frequent, auto_reset=False) global_step = [0] def save_model(epoch): arg, aux = mod.get_params() all_layers = mod.symbol.get_internals() outs = [] for stride in config.RPN_FEAT_STRIDE: num_anchors = config.RPN_ANCHOR_CFG[str(stride)]['NUM_ANCHORS'] _name = 'rpn_cls_score_stride%d_output' % stride rpn_cls_score = all_layers[_name] # prepare rpn data rpn_cls_score_reshape = mx.symbol.Reshape( data=rpn_cls_score, shape=(0, 2, -1, 0), name="rpn_cls_score_reshape_stride%d" % stride) rpn_cls_prob = mx.symbol.SoftmaxActivation( data=rpn_cls_score_reshape, mode="channel", name="rpn_cls_prob_stride%d" % stride) rpn_cls_prob_reshape = mx.symbol.Reshape( data=rpn_cls_prob, shape=(0, 2 * num_anchors, -1, 0), name='rpn_cls_prob_reshape_stride%d' % stride) _name = 'rpn_bbox_pred_stride%d_output' % stride rpn_bbox_pred = all_layers[_name] outs.append(rpn_cls_prob_reshape) outs.append(rpn_bbox_pred) _sym = mx.sym.Group(outs) mx.model.save_checkpoint(prefix, epoch, _sym, arg, aux) def _batch_callback(param): #global global_step _cb(param) global_step[0] += 1 mbatch = global_step[0] for _iter in lr_iters: if mbatch == _iter: opt.lr *= 0.1 print('lr change to', opt.lr, ' in batch', mbatch, file=sys.stderr) break if mbatch % 1000 == 0: print('saving final checkpoint', mbatch, file=sys.stderr) save_model(mbatch) if mbatch == lr_iters[-1]: print('saving final checkpoint', mbatch, file=sys.stderr) save_model(0) #arg, aux = mod.get_params() #mx.model.save_checkpoint(prefix, 99, mod.symbol, arg, aux) sys.exit(0) # train mod.fit(train_data, eval_metric=eval_metrics, epoch_end_callback=epoch_end_callback, batch_end_callback=_batch_callback, kvstore=args.kvstore, optimizer=opt, initializer=initializer, allow_missing=True, arg_params=arg_params, aux_params=aux_params, begin_epoch=begin_epoch, num_epoch=end_epoch)
def pred_eval(predictor, test_data, imdb, vis=False, thresh=1e-3): """ wrapper for calculating offline validation for faster data analysis in this example, all threshold are set by hand :param predictor: Predictor :param test_data: data iterator, must be non-shuffle :param imdb: image database :param vis: controls visualization :param thresh: valid detection threshold :return: """ assert vis or not test_data.shuffle data_names = [k[0] for k in test_data.provide_data] nms = py_nms_wrapper(config.TEST.NMS) # limit detections to max_per_image over all classes max_per_image = -1 num_images = imdb.num_images # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in range(num_images)] for _ in range(imdb.num_classes)] i = 0 t = time.time() for im_info, data_batch in test_data: t1 = time.time() - t t = time.time() scale = im_info[0, 2] scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, scale) t2 = time.time() - t t = time.time() for j in range(1, imdb.num_classes): indexes = np.where(scores[:, j] > thresh)[0] cls_scores = scores[indexes, j, np.newaxis] cls_boxes = boxes[indexes, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores)) keep = nms(cls_dets) all_boxes[j][i] = cls_dets[keep, :] if max_per_image > 0: image_scores = np.hstack([all_boxes[j][i][:, -1] for j in range(1, imdb.num_classes)]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in range(1, imdb.num_classes): keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] if vis: boxes_this_image = [[]] + [all_boxes[j][i] for j in range(1, imdb.num_classes)] vis_all_detection(data_dict['data'].asnumpy(), boxes_this_image, imdb.classes, scale) t3 = time.time() - t t = time.time() logger.info('testing %d/%d data %.4fs net %.4fs post %.4fs' % (i, imdb.num_images, t1, t2, t3)) i += 1 det_file = os.path.join(imdb.cache_path, imdb.name + '_detections.pkl') with open(det_file, 'wb') as f: pickle.dump(all_boxes, f, protocol=pickle.HIGHEST_PROTOCOL) imdb.evaluate_detections(all_boxes)
def train_net(args): if args.rand_seed > 0: np.random.seed(args.rand_seed) mx.random.seed(args.rand_seed) random.seed(args.rand_seed) # print config logger.info(pprint.pformat(config)) logger.info(pprint.pformat(args)) # load dataset and prepare imdb for training image_sets = [iset for iset in args.image_set.split('+')] roidbs = [load_gt_roidb(args.dataset, image_set, args.dataset_path, flip=args.flip) for image_set in image_sets] roidb = merge_roidb(roidbs) roidb = filter_roidb(roidb) samplepcnt = args.begin_sample if samplepcnt == 100: sroidb = roidb else: sroidb = sample_roidb(roidb, samplepcnt) # Sample by percentage of all images logger.info('Sampling %d pcnt : %d training slices' % (samplepcnt, len(sroidb))) # Debug to see if we can concatenate ROIDB's #print(sroidb) #dir(sroidb) #newroidb = sroidb + roidb #newroidb = append_roidb(sroidb, roidb) #print( "--Append test: " + str(len(sroidb)) +" " + str(len(roidb)) + " = " + str(len(newroidb)) ) # load symbol sym = eval('get_' + args.network)(is_train=True, num_classes=config.NUM_CLASSES, num_anchors=config.NUM_ANCHORS) feat_sym = sym.get_internals()['rpn_cls_score_output'] # setup multi-gpu ctx = [mx.gpu(int(i)) for i in args.gpus.split(',')] batch_size = len(ctx) input_batch_size = config.TRAIN.SAMPLES_PER_BATCH * batch_size # load training data train_data = AnchorLoader(feat_sym, sroidb, batch_size=input_batch_size, shuffle=args.shuffle, ctx=ctx, work_load_list=args.work_load_list, feat_stride=config.RPN_FEAT_STRIDE, anchor_scales=config.ANCHOR_SCALES, anchor_ratios=config.ANCHOR_RATIOS, aspect_grouping=config.TRAIN.ASPECT_GROUPING, nThreads=default.prefetch_thread_num) # infer max shape max_data_shape = [('data', (input_batch_size*config.NUM_IMAGES_3DCE, config.NUM_SLICES, config.MAX_SIZE, config.MAX_SIZE))] max_data_shape, max_label_shape = train_data.infer_shape(max_data_shape) max_data_shape.append(('gt_boxes', (input_batch_size*config.NUM_IMAGES_3DCE, 5, 5))) logger.info('providing maximum shape %s %s' % (max_data_shape, max_label_shape)) # load and initialize and check params arg_params, aux_params = init_params(args, sym, train_data) # create solver fixed_param_prefix = config.FIXED_PARAMS data_names = [k[0] for k in train_data.provide_data] label_names = [k[0] for k in train_data.provide_label] mod = MutableModule(sym, data_names=data_names, label_names=label_names, logger=logger, context=ctx, work_load_list=args.work_load_list, max_data_shapes=max_data_shape, max_label_shapes=max_label_shape, fixed_param_prefix=fixed_param_prefix) # decide training params # metric # rpn_eval_metric = metric.RPNAccMetric() rpn_cls_metric = metric.RPNLogLossMetric() rpn_bbox_metric = metric.RPNL1LossMetric() # eval_metric = metric.RCNNAccMetric() cls_metric = metric.RCNNLogLossMetric() bbox_metric = metric.RCNNL1LossMetric() eval_metrics = mx.metric.CompositeEvalMetric() for child_metric in [rpn_cls_metric, rpn_bbox_metric, cls_metric, bbox_metric]: eval_metrics.add(child_metric) # callback batch_end_callback = callback.Speedometer(train_data.batch_size, frequent=args.frequent) means = np.tile(np.array(config.TRAIN.BBOX_MEANS), config.NUM_CLASSES) stds = np.tile(np.array(config.TRAIN.BBOX_STDS), config.NUM_CLASSES) epoch_end_callback = (callback.do_checkpoint(args.e2e_prefix, means, stds), callback.do_validate(args.e2e_prefix)) arg_names = [x for x in sym.list_arguments() if x not in data_names+label_names] opt = get_optimizer(args, arg_names, len(sroidb) / input_batch_size, args.iter_size) # train default.testing = False mod.fit(train_data, roidb, eval_metric=eval_metrics, epoch_end_callback=epoch_end_callback, batch_end_callback=batch_end_callback, kvstore=args.kvstore, optimizer=opt, iter_size=args.iter_size, arg_params=arg_params, aux_params=aux_params, begin_epoch=args.begin_epoch, num_epoch=args.e2e_epoch)
def init_params(args, sym, train_data): # infer shape data_shape_dict = dict(train_data.provide_data + train_data.provide_label) arg_shape, out_shape, aux_shape = sym.infer_shape(**data_shape_dict) arg_shape_dict = dict(zip(sym.list_arguments(), arg_shape)) out_shape_dict = dict(zip(sym.list_outputs(), out_shape)) aux_shape_dict = dict(zip(sym.list_auxiliary_states(), aux_shape)) logger.info('output shape %s' % pprint.pformat(out_shape_dict)) if args.resume: # load params from previously trained model arg_params, aux_params = load_param(args.e2e_prefix, args.begin_epoch, convert=True) else: # initialize weights from pretrained model and random numbers arg_params, aux_params = load_param(args.pretrained, args.pretrained_epoch, convert=True) # deal with multiple input CT slices, see 3DCE paper. # if NUM_SLICES = 3, pretrained weights won't be changed # if NUM_SLICES > 3, extra input channels in conv1_1 will be initialized to 0 nCh = config.NUM_SLICES w1 = arg_params['conv1_1_weight'].asnumpy() w1_new = np.zeros((64, nCh, 3, 3), dtype=float) w1_new[:, (nCh - 3) / 2:(nCh - 3) / 2 + 3, :, :] = w1 arg_params['conv1_1_new_weight'] = mx.nd.array(w1_new) arg_params['conv1_1_new_bias'] = arg_params['conv1_1_bias'] del arg_params['conv1_1_weight'] arg_params['rpn_conv_3x3_weight'] = mx.random.normal(0, 0.01, shape=arg_shape_dict['rpn_conv_3x3_weight']) arg_params['rpn_conv_3x3_bias'] = mx.nd.zeros(shape=arg_shape_dict['rpn_conv_3x3_bias']) arg_params['rpn_cls_score_weight'] = mx.random.normal(0, 0.01, shape=arg_shape_dict['rpn_cls_score_weight']) arg_params['rpn_cls_score_bias'] = mx.nd.zeros(shape=arg_shape_dict['rpn_cls_score_bias']) arg_params['rpn_bbox_pred_weight'] = mx.random.normal(0, 0.001, shape=arg_shape_dict['rpn_bbox_pred_weight']) arg_params['rpn_bbox_pred_bias'] = mx.nd.zeros(shape=arg_shape_dict['rpn_bbox_pred_bias']) if config.FRAMEWORK == '3DCE': arg_params['conv_new_1_weight'] = mx.random.normal(0, 0.01, shape=arg_shape_dict['conv_new_1_weight']) arg_params['conv_new_1_bias'] = mx.nd.zeros(shape=arg_shape_dict['conv_new_1_bias']) arg_params['fc6_weight'] = mx.random.normal(0, 0.001, shape=arg_shape_dict['fc6_weight']) arg_params['fc6_bias'] = mx.nd.zeros(shape=arg_shape_dict['fc6_bias']) arg_params['cls_score_weight'] = mx.random.normal(0, 0.01, shape=arg_shape_dict['cls_score_weight']) arg_params['cls_score_bias'] = mx.nd.zeros(shape=arg_shape_dict['cls_score_bias']) arg_params['bbox_pred_weight'] = mx.random.normal(0, 0.001, shape=arg_shape_dict['bbox_pred_weight']) arg_params['bbox_pred_bias'] = mx.nd.zeros(shape=arg_shape_dict['bbox_pred_bias']) elif config.FRAMEWORK == 'RFCN': arg_params['conv_new_1_weight'] = mx.random.normal(0, 0.01, shape=arg_shape_dict['conv_new_1_weight']) arg_params['conv_new_1_bias'] = mx.nd.zeros(shape=arg_shape_dict['conv_new_1_bias']) arg_params['rfcn_cls_weight'] = mx.random.normal(0, 0.01, shape=arg_shape_dict['rfcn_cls_weight']) arg_params['rfcn_cls_bias'] = mx.nd.zeros(shape=arg_shape_dict['rfcn_cls_bias']) arg_params['rfcn_bbox_weight'] = mx.random.normal(0, 0.01, shape=arg_shape_dict['rfcn_bbox_weight']) arg_params['rfcn_bbox_bias'] = mx.nd.zeros(shape=arg_shape_dict['rfcn_bbox_bias']) elif config.FRAMEWORK == 'Faster': arg_params['fc6_small_weight'] = mx.random.normal(0, 0.001, shape=arg_shape_dict['fc6_small_weight']) arg_params['fc6_small_bias'] = mx.nd.zeros(shape=arg_shape_dict['fc6_small_bias']) arg_params['fc7_small_weight'] = mx.random.normal(0, 0.001, shape=arg_shape_dict['fc7_small_weight']) arg_params['fc7_small_bias'] = mx.nd.zeros(shape=arg_shape_dict['fc7_small_bias']) arg_params['cls_score_weight'] = mx.random.normal(0, 0.01, shape=arg_shape_dict['cls_score_weight']) arg_params['cls_score_bias'] = mx.nd.zeros(shape=arg_shape_dict['cls_score_bias']) arg_params['bbox_pred_weight'] = mx.random.normal(0, 0.001, shape=arg_shape_dict['bbox_pred_weight']) arg_params['bbox_pred_bias'] = mx.nd.zeros(shape=arg_shape_dict['bbox_pred_bias']) # check parameter shapes for k in sym.list_arguments(): if k in data_shape_dict: continue assert k in arg_params, k + ' not initialized' assert arg_params[k].shape == arg_shape_dict[k], \ 'shape inconsistent for ' + k + ' inferred ' + str(arg_shape_dict[k]) + ' provided ' + str(arg_params[k].shape) for k in sym.list_auxiliary_states(): assert k in aux_params, k + ' not initialized' assert aux_params[k].shape == aux_shape_dict[k], \ 'shape inconsistent for ' + k + ' inferred ' + str(aux_shape_dict[k]) + ' provided ' + str(aux_params[k].shape) logger.info('load param done') return arg_params, aux_params
def train_net(args, ctx, pretrained, epoch, prefix, begin_epoch, end_epoch, lr=0.001, lr_step='5'): # setup config config.TRAIN.BATCH_IMAGES = 1 config.TRAIN.BATCH_ROIS = 128 config.TRAIN.END2END = True config.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED = True # load symbol sym = eval('get_' + args.network + '_train')(num_classes=config.NUM_CLASSES, num_anchors=config.NUM_ANCHORS) feat_sym = sym.get_internals()['rpn_cls_score_output'] # setup multi-gpu batch_size = len(ctx) input_batch_size = config.TRAIN.BATCH_IMAGES * batch_size # print config logger.info(pprint.pformat(config)) # load dataset and prepare imdb for training image_sets = [iset for iset in args.image_set.split('+')] roidbs = [load_gt_roidb(args.dataset, image_set, args.root_path, args.dataset_path, flip=not args.no_flip) for image_set in image_sets] roidb = merge_roidb(roidbs) roidb = filter_roidb(roidb) # load training data train_data = AnchorLoader(feat_sym, roidb, batch_size=input_batch_size, shuffle=not args.no_shuffle, ctx=ctx, work_load_list=args.work_load_list, feat_stride=config.RPN_FEAT_STRIDE, anchor_scales=config.ANCHOR_SCALES, anchor_ratios=config.ANCHOR_RATIOS, aspect_grouping=config.TRAIN.ASPECT_GROUPING) # infer max shape max_data_shape = [('data', (input_batch_size, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))] max_data_shape, max_label_shape = train_data.infer_shape(max_data_shape) max_data_shape.append(('gt_boxes', (input_batch_size, 100, 5))) logger.info('providing maximum shape %s %s' % (max_data_shape, max_label_shape)) # infer shape data_shape_dict = dict(train_data.provide_data + train_data.provide_label) arg_shape, out_shape, aux_shape = sym.infer_shape(**data_shape_dict) arg_shape_dict = dict(zip(sym.list_arguments(), arg_shape)) out_shape_dict = dict(zip(sym.list_outputs(), out_shape)) aux_shape_dict = dict(zip(sym.list_auxiliary_states(), aux_shape)) logger.info('output shape %s' % pprint.pformat(out_shape_dict)) # load and initialize params if args.resume: arg_params, aux_params = load_param(prefix, begin_epoch, convert=True) else: arg_params, aux_params = load_param(pretrained, epoch, convert=True) arg_params['rpn_conv_3x3_weight'] = mx.random.normal(0, 0.01, shape=arg_shape_dict['rpn_conv_3x3_weight']) arg_params['rpn_conv_3x3_bias'] = mx.nd.zeros(shape=arg_shape_dict['rpn_conv_3x3_bias']) arg_params['rpn_cls_score_weight'] = mx.random.normal(0, 0.01, shape=arg_shape_dict['rpn_cls_score_weight']) arg_params['rpn_cls_score_bias'] = mx.nd.zeros(shape=arg_shape_dict['rpn_cls_score_bias']) arg_params['rpn_bbox_pred_weight'] = mx.random.normal(0, 0.01, shape=arg_shape_dict['rpn_bbox_pred_weight']) arg_params['rpn_bbox_pred_bias'] = mx.nd.zeros(shape=arg_shape_dict['rpn_bbox_pred_bias']) arg_params['cls_score_weight'] = mx.random.normal(0, 0.01, shape=arg_shape_dict['cls_score_weight']) arg_params['cls_score_bias'] = mx.nd.zeros(shape=arg_shape_dict['cls_score_bias']) arg_params['bbox_pred_weight'] = mx.random.normal(0, 0.001, shape=arg_shape_dict['bbox_pred_weight']) arg_params['bbox_pred_bias'] = mx.nd.zeros(shape=arg_shape_dict['bbox_pred_bias']) # check parameter shapes for k in sym.list_arguments(): if k in data_shape_dict: continue assert k in arg_params, k + ' not initialized' assert arg_params[k].shape == arg_shape_dict[k], \ 'shape inconsistent for ' + k + ' inferred ' + str(arg_shape_dict[k]) + ' provided ' + str(arg_params[k].shape) for k in sym.list_auxiliary_states(): assert k in aux_params, k + ' not initialized' assert aux_params[k].shape == aux_shape_dict[k], \ 'shape inconsistent for ' + k + ' inferred ' + str(aux_shape_dict[k]) + ' provided ' + str(aux_params[k].shape) # create solver fixed_param_prefix = config.FIXED_PARAMS data_names = [k[0] for k in train_data.provide_data] label_names = [k[0] for k in train_data.provide_label] mod = MutableModule(sym, data_names=data_names, label_names=label_names, logger=logger, context=ctx, work_load_list=args.work_load_list, max_data_shapes=max_data_shape, max_label_shapes=max_label_shape, fixed_param_prefix=fixed_param_prefix) # decide training params # metric rpn_eval_metric = metric.RPNAccMetric() rpn_cls_metric = metric.RPNLogLossMetric() rpn_bbox_metric = metric.RPNL1LossMetric() eval_metric = metric.RCNNAccMetric() cls_metric = metric.RCNNLogLossMetric() bbox_metric = metric.RCNNL1LossMetric() eval_metrics = mx.metric.CompositeEvalMetric() for child_metric in [rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric, cls_metric, bbox_metric]: eval_metrics.add(child_metric) # callback batch_end_callback = callback.Speedometer(train_data.batch_size, frequent=args.frequent) means = np.tile(np.array(config.TRAIN.BBOX_MEANS), config.NUM_CLASSES) stds = np.tile(np.array(config.TRAIN.BBOX_STDS), config.NUM_CLASSES) epoch_end_callback = callback.do_checkpoint(prefix, means, stds) # decide learning rate base_lr = lr lr_factor = 0.1 lr_epoch = [int(epoch) for epoch in lr_step.split(',')] lr_epoch_diff = [epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch] lr = base_lr * (lr_factor ** (len(lr_epoch) - len(lr_epoch_diff))) lr_iters = [int(epoch * len(roidb) / batch_size) for epoch in lr_epoch_diff] logger.info('lr %f lr_epoch_diff %s lr_iters %s' % (lr, lr_epoch_diff, lr_iters)) lr_scheduler = mx.lr_scheduler.MultiFactorScheduler(lr_iters, lr_factor) # optimizer optimizer_params = {'momentum': 0.9, 'wd': 0.0005, 'learning_rate': lr, 'lr_scheduler': lr_scheduler, 'rescale_grad': (1.0 / batch_size), 'clip_gradient': 5} # train mod.fit(train_data, eval_metric=eval_metrics, epoch_end_callback=epoch_end_callback, batch_end_callback=batch_end_callback, kvstore=args.kvstore, optimizer='sgd', optimizer_params=optimizer_params, arg_params=arg_params, aux_params=aux_params, begin_epoch=begin_epoch, num_epoch=end_epoch)
def main(): args = parse_args() logger.info('Called with argument: %s' % args) ctx = [mx.gpu(int(i)) for i in args.gpus.split(',')] train_net(args, ctx, args.pretrained, args.pretrained_epoch, args.prefix, args.begin_epoch, args.end_epoch, lr=args.lr, lr_step=args.lr_step)
def main(): global args args = parse_args() logger.info('Called with argument: %s' % args) test(args)
def train_net(args, ctx, pretrained, epoch, prefix, begin_epoch, end_epoch, lr=0.001, lr_step='5'): # setup config config.TRAIN.BATCH_IMAGES = 1 config.TRAIN.BATCH_ROIS = 128 config.TRAIN.END2END = True config.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED = True # load symbol sym = eval('get_' + args.network + '_train')( num_classes=config.NUM_CLASSES, num_anchors=config.NUM_ANCHORS) feat_sym = sym.get_internals()['rpn_cls_score_output'] # setup multi-gpu batch_size = len(ctx) input_batch_size = config.TRAIN.BATCH_IMAGES * batch_size # print config logger.info(pprint.pformat(config)) # load dataset and prepare imdb for training image_sets = [iset for iset in args.image_set.split('+')] roidbs = [ load_gt_roidb(args.dataset, image_set, args.root_path, args.dataset_path, flip=not args.no_flip) for image_set in image_sets ] roidb = merge_roidb(roidbs) roidb = filter_roidb(roidb) # load training data train_data = AnchorLoader(feat_sym, roidb, batch_size=input_batch_size, shuffle=not args.no_shuffle, ctx=ctx, work_load_list=args.work_load_list, feat_stride=config.RPN_FEAT_STRIDE, anchor_scales=config.ANCHOR_SCALES, anchor_ratios=config.ANCHOR_RATIOS, aspect_grouping=config.TRAIN.ASPECT_GROUPING) # infer max shape max_data_shape = [('data', (input_batch_size, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))] max_data_shape, max_label_shape = train_data.infer_shape(max_data_shape) max_data_shape.append(('gt_boxes', (input_batch_size, 100, 5))) logger.info('providing maximum shape %s %s' % (max_data_shape, max_label_shape)) # infer shape data_shape_dict = dict(train_data.provide_data + train_data.provide_label) arg_shape, out_shape, aux_shape = sym.infer_shape(**data_shape_dict) arg_shape_dict = dict(zip(sym.list_arguments(), arg_shape)) out_shape_dict = dict(zip(sym.list_outputs(), out_shape)) aux_shape_dict = dict(zip(sym.list_auxiliary_states(), aux_shape)) logger.info('output shape %s' % pprint.pformat(out_shape_dict)) # load and initialize params if args.resume: arg_params, aux_params = load_param(prefix, begin_epoch, convert=True) else: arg_params, aux_params = load_param(pretrained, epoch, convert=True) arg_params['rpn_conv_3x3_weight'] = mx.random.normal( 0, 0.01, shape=arg_shape_dict['rpn_conv_3x3_weight']) arg_params['rpn_conv_3x3_bias'] = mx.nd.zeros( shape=arg_shape_dict['rpn_conv_3x3_bias']) arg_params['rpn_cls_score_weight'] = mx.random.normal( 0, 0.01, shape=arg_shape_dict['rpn_cls_score_weight']) arg_params['rpn_cls_score_bias'] = mx.nd.zeros( shape=arg_shape_dict['rpn_cls_score_bias']) arg_params['rpn_bbox_pred_weight'] = mx.random.normal( 0, 0.01, shape=arg_shape_dict['rpn_bbox_pred_weight']) arg_params['rpn_bbox_pred_bias'] = mx.nd.zeros( shape=arg_shape_dict['rpn_bbox_pred_bias']) arg_params['cls_score_weight'] = mx.random.normal( 0, 0.01, shape=arg_shape_dict['cls_score_weight']) arg_params['cls_score_bias'] = mx.nd.zeros( shape=arg_shape_dict['cls_score_bias']) arg_params['bbox_pred_weight'] = mx.random.normal( 0, 0.001, shape=arg_shape_dict['bbox_pred_weight']) arg_params['bbox_pred_bias'] = mx.nd.zeros( shape=arg_shape_dict['bbox_pred_bias']) # check parameter shapes for k in sym.list_arguments(): if k in data_shape_dict: continue assert k in arg_params, k + ' not initialized' assert arg_params[k].shape == arg_shape_dict[k], \ 'shape inconsistent for ' + k + ' inferred ' + str(arg_shape_dict[k]) + ' provided ' + str(arg_params[k].shape) for k in sym.list_auxiliary_states(): assert k in aux_params, k + ' not initialized' assert aux_params[k].shape == aux_shape_dict[k], \ 'shape inconsistent for ' + k + ' inferred ' + str(aux_shape_dict[k]) + ' provided ' + str(aux_params[k].shape) # create solver fixed_param_prefix = config.FIXED_PARAMS data_names = [k[0] for k in train_data.provide_data] label_names = [k[0] for k in train_data.provide_label] mod = MutableModule(sym, data_names=data_names, label_names=label_names, logger=logger, context=ctx, work_load_list=args.work_load_list, max_data_shapes=max_data_shape, max_label_shapes=max_label_shape, fixed_param_prefix=fixed_param_prefix) # decide training params # metric rpn_eval_metric = metric.RPNAccMetric() rpn_cls_metric = metric.RPNLogLossMetric() rpn_bbox_metric = metric.RPNL1LossMetric() eval_metric = metric.RCNNAccMetric() cls_metric = metric.RCNNLogLossMetric() bbox_metric = metric.RCNNL1LossMetric() eval_metrics = mx.metric.CompositeEvalMetric() for child_metric in [ rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric, cls_metric, bbox_metric ]: eval_metrics.add(child_metric) # callback batch_end_callback = mx.callback.Speedometer(train_data.batch_size, frequent=args.frequent, auto_reset=False) means = np.tile(np.array(config.TRAIN.BBOX_MEANS), config.NUM_CLASSES) stds = np.tile(np.array(config.TRAIN.BBOX_STDS), config.NUM_CLASSES) epoch_end_callback = callback.do_checkpoint(prefix, means, stds) # decide learning rate base_lr = lr lr_factor = 0.1 lr_epoch = [int(epoch) for epoch in lr_step.split(',')] lr_epoch_diff = [ epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch ] lr = base_lr * (lr_factor**(len(lr_epoch) - len(lr_epoch_diff))) lr_iters = [ int(epoch * len(roidb) / batch_size) for epoch in lr_epoch_diff ] logger.info('lr %f lr_epoch_diff %s lr_iters %s' % (lr, lr_epoch_diff, lr_iters)) lr_scheduler = mx.lr_scheduler.MultiFactorScheduler(lr_iters, lr_factor) # optimizer optimizer_params = { 'momentum': 0.9, 'wd': 0.0005, 'learning_rate': lr, 'lr_scheduler': lr_scheduler, 'rescale_grad': (1.0 / batch_size), 'clip_gradient': 5 } # train mod.fit(train_data, eval_metric=eval_metrics, epoch_end_callback=epoch_end_callback, batch_end_callback=batch_end_callback, kvstore=args.kvstore, optimizer='sgd', optimizer_params=optimizer_params, arg_params=arg_params, aux_params=aux_params, begin_epoch=begin_epoch, num_epoch=end_epoch)
def generate_proposals(predictor, test_data, imdb, vis=False, thresh=0.): """ Generate detections results using RPN. :param predictor: Predictor :param test_data: data iterator, must be non-shuffled :param imdb: image database :param vis: controls visualization :param thresh: thresh for valid detections :return: list of detected boxes """ assert vis or not test_data.shuffle data_names = [k[0] for k in test_data.provide_data] i = 0 t = time.time() imdb_boxes = list() original_boxes = list() for im_info, data_batch in test_data: t1 = time.time() - t t = time.time() scale = im_info[0, 2] scores, boxes, data_dict = im_proposal(predictor, data_batch, data_names, scale) t2 = time.time() - t t = time.time() # assemble proposals dets = np.hstack((boxes, scores)) original_boxes.append(dets) # filter proposals keep = np.where(dets[:, 4:] > thresh)[0] dets = dets[keep, :] imdb_boxes.append(dets) if vis: vis_all_detection(data_dict['data'].asnumpy(), [dets], ['obj'], scale) logger.info('generating %d/%d ' % (i + 1, imdb.num_images) + 'proposal %d ' % (dets.shape[0]) + 'data %.4fs net %.4fs' % (t1, t2)) i += 1 assert len(imdb_boxes) == imdb.num_images, 'calculations not complete' # save results rpn_folder = os.path.join(imdb.root_path, 'rpn_data') if not os.path.exists(rpn_folder): os.mkdir(rpn_folder) rpn_file = os.path.join(rpn_folder, imdb.name + '_rpn.pkl') with open(rpn_file, 'wb') as f: pickle.dump(imdb_boxes, f, pickle.HIGHEST_PROTOCOL) if thresh > 0: full_rpn_file = os.path.join(rpn_folder, imdb.name + '_full_rpn.pkl') with open(full_rpn_file, 'wb') as f: pickle.dump(original_boxes, f, pickle.HIGHEST_PROTOCOL) logger.info('wrote rpn proposals to %s' % rpn_file) return imdb_boxes
merge_a_into_b(config_file, config) config.NUM_ANCHORS = len(config.ANCHOR_SCALES) * len(config.ANCHOR_RATIOS) if config.FRAMEWORK != '3DCE': assert config.NUM_IMAGES_3DCE == 1, "Combining multiple images is only possible in 3DCE" default_file = cfg_from_file('default.yml') merge_a_into_b(default_file, default) default.e2e_prefix = 'model/' + default.exp_name if default.begin_epoch != 0: default.resume = True default.accs = dict() if default.gpus == '': # auto select GPU import GPUtil deviceIDs = GPUtil.getAvailable(order='lowest', limit=1, maxMemory=.2) if len(deviceIDs) == 0: deviceIDs = GPUtil.getAvailable(order='lowest', limit=1, maxMemory=.9, maxLoad=1) GPUs = GPUtil.getGPUs() default.gpus = str(len(GPUs)-1-deviceIDs[0]) logger.info('using gpu '+default.gpus) default.val_gpu = default.gpus[0] # default.prefetch_thread_num = min(default.prefetch_thread_num, config.TRAIN.SAMPLES_PER_BATCH) train_net(default) # test the best model on the test set from test import test_net test_net(default.e2e_prefix, default.best_epoch)
def pred_eval(predictor, test_data, imdb, vis=False, max_box=-1, thresh=1e-3): """ wrapper for calculating offline validation for faster data analysis in this example, all threshold are set by hand :param predictor: Predictor :param test_data: data iterator, must be non-shuffle :param imdb: image database :param vis: controls visualization :param max_box: maximum number of boxes detected in each image :param thresh: valid detection threshold :return: """ # assert vis or not test_data.shuffle data_names = [k[0] for k in test_data.provide_data] nms = py_nms_wrapper(config.TEST.NMS) # limit detections to max_per_image over all classes max_per_image = max_box num_images = imdb.num_images # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] kept_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] all_gts = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] all_iminfos = [] all_imnames = [] all_crops = [] i = 0 _t = {'data': Timer(), 'im_detect' : Timer(), 'misc' : Timer()} _t['data'].tic() num_image = config.NUM_IMAGES_3DCE key_idx = (num_image - 1) / 2 # adjust image for 3DCE for im_info, imname, crop, data_batch in test_data: _t['data'].toc() _t['im_detect'].tic() all_iminfos.append(im_info) all_imnames.append(imname) all_crops.append(crop) # scale = im_info[0, 2] scale = 1. # we have scaled the label in get_image(), so no need to scale the pred_box gt_boxes = data_batch.label[0].asnumpy()[key_idx, :, :] data_batch.label = None scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, scale) _t['im_detect'].toc() _t['misc'].tic() for j in range(1, imdb.num_classes): indexes = np.where(scores[:, j] > thresh)[0] cls_scores = scores[indexes, j, np.newaxis] cls_boxes = boxes[indexes, j * 4:(j + 1) * 4] cls_boxes = map_box_back(cls_boxes, crop[2], crop[0], im_info[0,2]) cls_dets = np.hstack((cls_boxes, cls_scores)) keep = nms(cls_dets) all_boxes[j][i] = cls_dets[keep, :] all_gts[j][i] = map_box_back(gt_boxes, crop[2], crop[0], im_info[0,2]) if max_per_image > 0: image_scores = np.hstack([all_boxes[j][i][:, -1] for j in range(1, imdb.num_classes)]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in range(1, imdb.num_classes): keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] kept_boxes[j][i] = all_boxes[j][i][keep, :] if vis: boxes_this_image = [[]] + [kept_boxes[j][i] for j in range(1, imdb.num_classes)] vis_all_detection(data_dict['data'].asnumpy(), boxes_this_image, imdb.classes, scale) _t['misc'].toc() if i % 200 == 0: if i <= 400: logger.info('im_detect: {:d}/{:d} data {:.3f}s im_detect {:.3f}s misc {:.3f}s' .format(i, imdb.num_images, _t['data'].average_time, _t['im_detect'].average_time, _t['misc'].average_time)) else: print i, sys.stdout.flush() # logger.info('testing %d/%d data %.4fs net %.4fs post %.4fs' % (i, imdb.num_images, t1, t2, t3)) i += 1 _t['data'].tic() print sys.stdout.flush() det_file = os.path.join(imdb.cache_path, imdb.name + '_detections.pkl') with open(det_file, 'wb') as f: cPickle.dump(kept_boxes, f, protocol=cPickle.HIGHEST_PROTOCOL) default.res_dict = {'imname': all_imnames, 'boxes': all_boxes[1], 'gts': all_gts[1]} # default.res_dict = {'imname': all_imnames, 'im_info': all_iminfos, 'crops': all_crops, 'boxes': all_boxes[1], 'gts': all_gts[1]} acc = my_evaluate_detections(all_boxes, all_gts) sys.stdout.flush() return acc