def runTest(self): for keep_first_b in [True, False]: for label_schema in ["BIO", "BIO2"]: settings = reader.Settings(vocab=Vocab().data, is_training=True, label_schema=label_schema, negative_sample_ratio=0.2, hit_ans_negative_sample_ratio=0.25, keep_first_b=keep_first_b) filename = os.path.join(topdir, "test", "trn_data.gz") data_stream = reader.create_reader(filename, settings) total, at_least_one, one = 1000, 0, 0 for _, d in itertools.izip(xrange(total), data_stream()): labels = d[reader.LABELS] b_num = labels.count(0) if b_num >= 1: at_least_one += 1 if b_num == 1: one += 1 self.assertLess(at_least_one, total) if keep_first_b: self.assertEqual(one, at_least_one) else: self.assertLess(one, at_least_one)
def __init__(self, conf): self.conf = conf self.settings = reader.Settings(vocab=conf.vocab, is_training=False, label_schema=conf.label_schema) # init paddle paddle.init(use_gpu=conf.use_gpu, trainer_count=conf.trainer_count) # define network self.tags_layer = mLSTM_crf_network.inference_net(conf)
def __init__(self, conf): self.conf = conf self.settings = reader.Settings( vocab=conf.vocab, is_training=False, label_schema=conf.label_schema) # init paddle # TODO(lipeng17) v2 API does not support parallel_nn yet. Therefore, we # can only use CPU currently paddle.init(use_gpu=conf.use_gpu, trainer_count=conf.trainer_count) # define network self.tags_layer = network.inference_net(conf)
def runTest(self): settings = reader.Settings(vocab=Vocab().data, is_training=True, label_schema="BIO2", negative_sample_ratio=0.2, hit_ans_negative_sample_ratio=0.25, keep_first_b=True) filename = os.path.join(topdir, "test", "trn_data.gz") data_stream = reader.create_reader(filename, settings) q_uniq_ids, e_uniq_ids = set(), set() for _, d in itertools.izip(xrange(1000), data_stream()): q_uniq_ids.update(d[reader.Q_IDS]) e_uniq_ids.update(d[reader.E_IDS]) self.assertGreater(len(q_uniq_ids), 50) self.assertGreater(len(e_uniq_ids), 50)
def main(): args = parser.parse_args() print_arguments(args) check_cuda(args.use_gpu) data_dir = args.data_dir dataset = args.dataset assert dataset in ['pascalvoc', 'coco2014', 'coco2017'] # for pascalvoc label_file = 'label_list' train_file_list = 'trainval.txt' val_file_list = 'test.txt' if dataset == 'coco2014': train_file_list = 'annotations/instances_train2014.json' val_file_list = 'annotations/instances_val2014.json' elif dataset == 'coco2017': train_file_list = 'annotations/instances_train2017.json' val_file_list = 'annotations/instances_val2017.json' mean_BGR = [float(m) for m in args.mean_BGR.split(",")] image_shape = [int(m) for m in args.image_shape.split(",")] train_parameters[dataset]['image_shape'] = image_shape train_parameters[dataset]['batch_size'] = args.batch_size train_parameters[dataset]['lr'] = args.learning_rate train_parameters[dataset]['epoc_num'] = args.epoc_num train_parameters[dataset]['ap_version'] = args.ap_version data_args = reader.Settings(dataset=args.dataset, data_dir=data_dir, label_file=label_file, resize_h=image_shape[1], resize_w=image_shape[2], mean_value=mean_BGR, apply_distort=True, apply_expand=True, ap_version=args.ap_version) train(args, data_args, train_parameters[dataset], train_file_list=train_file_list, val_file_list=val_file_list)
def check_ratio(self, negative_sample_ratio): for keep_first_b in [True, False]: settings = reader.Settings( vocab=Vocab().data, is_training=True, label_schema="BIO2", negative_sample_ratio=negative_sample_ratio, hit_ans_negative_sample_ratio=0.25, keep_first_b=keep_first_b) filename = os.path.join(topdir, "test", "trn_data.gz") data_stream = reader.create_reader(filename, settings) total, negative_num = 5000, 0 for _, d in itertools.izip(xrange(total), data_stream()): labels = d[reader.LABELS] if labels.count(0) == 0: negative_num += 1 ratio = negative_num / float(total) self.assertLessEqual(math.fabs(ratio - negative_sample_ratio), 0.01)
def getF(): #模型加载 args = parser.parse_args() print_arguments(args) data_dir = 'data/Mydata' label_file = 'label_list' data_args = reader.Settings( dataset=args.dataset, data_dir=data_dir, label_file=label_file, resize_h=args.resize_h, resize_w=args.resize_w, mean_value=[args.mean_value_B, args.mean_value_G, args.mean_value_R], apply_distort=False, apply_expand=False, ap_version='') f=infer( args, data_args=data_args, model_dir=args.model_dir) return f
if __name__ == '__main__': args = parser.parse_args() print_arguments(args) data_dir = args.data_dir label_file = 'label_list' model_save_dir = args.model_save_dir train_file_list = 'trainval.txt' val_file_list = 'test.txt' data_args = reader.Settings( dataset=args.dataset, data_dir=data_dir, label_file=label_file, resize_h=args.resize_h, resize_w=args.resize_w, mean_value=[args.mean_value_B, args.mean_value_G, args.mean_value_R], apply_distort=args.apply_distort, apply_expand=args.apply_expand, ap_version=args.ap_version, toy=args.is_toy) train(args, train_file_list=train_file_list, val_file_list=val_file_list, data_args=data_args, learning_rate=args.learning_rate, batch_size=args.batch_size, num_passes=args.num_passes, model_save_dir=model_save_dir, pretrained_model=args.pretrained_model)
cards = os.environ.get('CUDA_VISIBLE_DEVICES') num = len(cards.split(",")) return num else: return args.num_devices if __name__ == '__main__': args = parser.parse_args() print_arguments(args) data_dir = os.path.join(args.data_dir, 'WIDER_train/images/') train_file_list = os.path.join( args.data_dir, 'wider_face_split/wider_face_train_bbx_gt.txt') mean_BGR = [float(m) for m in args.mean_BGR.split(",")] image_shape = [3, int(args.resize_h), int(args.resize_w)] train_parameters["image_shape"] = image_shape train_parameters["use_pyramidbox"] = args.use_pyramidbox train_parameters["batch_size"] = args.batch_size train_parameters["lr"] = args.learning_rate train_parameters["epoc_num"] = args.epoc_num config = reader.Settings(data_dir=data_dir, resize_h=image_shape[1], resize_w=image_shape[2], apply_distort=True, apply_expand=False, mean_value=mean_BGR, ap_version='11point') train(args, config, train_parameters, train_file_list)
if dev_file_list is not None: result = trainer.test(reader=dev_reader, feeding=feeding) print("Test with Pass %d, TestCost: %f, Detection mAP=%g" % (event.pass_id, result.cost, result.metrics["detection_evaluator"])) trainer.train(reader=train_reader, event_handler=event_handler, num_passes=cfg.TRAIN.NUM_PASS, feeding=feeding) if __name__ == "__main__": data_dir = "data/0908" label_file = "label_list.txt" init_model_path = "vgg/vgg_model.tar.gz" train_file_list = "data/0908/train_list.txt" dev_file_list = None paddle.init(use_gpu=True, trainer_count=1) data_args = reader.Settings(data_dir=data_dir, label_file=label_file, resize_h=cfg.IMG_HEIGHT, resize_w=cfg.IMG_WIDTH, mean_value=[57, 56, 58]) train(train_file_list=train_file_list, dev_file_list=dev_file_list, data_args=data_args, save_dir="checkpoints", init_model_path=init_model_path)
max_shrink = max_shrink - 0.3 elif max_shrink >= 4 and max_shrink < 5: max_shrink = max_shrink - 0.4 elif max_shrink >= 5: max_shrink = max_shrink - 0.5 elif max_shrink <= 0.1: max_shrink = 0.1 shrink = max_shrink if max_shrink < 1 else 1 return shrink, max_shrink if __name__ == '__main__': args = parser.parse_args() print_arguments(args) config = reader.Settings(data_dir=args.data_dir) place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) main_program = fluid.Program() startup_program = fluid.Program() image_shape = [3, 1024, 1024] with fluid.program_guard(main_program, startup_program): network = PyramidBox(data_shape=image_shape, sub_network=args.use_pyramidbox, is_infer=True) infer_program, nmsed_out = network.infer(main_program) fetches = [nmsed_out] exe.run(startup_program) fluid.io.load_persistables(exe, args.model_dir,
def train(conf): if not os.path.exists(conf.model_save_dir): os.makedirs(conf.model_save_dir, mode=0755) settings = reader.Settings( vocab=conf.vocab, is_training=True, label_schema=conf.label_schema, negative_sample_ratio=conf.negative_sample_ratio, hit_ans_negative_sample_ratio=conf.hit_ans_negative_sample_ratio, keep_first_b=conf.keep_first_b, seed=conf.seed) samples_per_pass = conf.batch_size * conf.batches_per_pass train_reader = paddle.batch( paddle.reader.buffered( reader.create_reader(conf.train_data_path, settings, samples_per_pass), size=samples_per_pass), batch_size=conf.batch_size) # TODO(lipeng17) v2 API does not support parallel_nn yet. Therefore, we can # only use CPU currently paddle.init( use_gpu=conf.use_gpu, trainer_count=conf.trainer_count, seed=conf.paddle_seed) # network config cost = network.training_net(conf) # create parameters # NOTE: parameter values are not initilized here, therefore, we need to # print parameter initialization info in the beginning of the first batch parameters = paddle.parameters.create(cost) # create optimizer rmsprop_optimizer = paddle.optimizer.RMSProp( learning_rate=conf.learning_rate, rho=conf.rho, epsilon=conf.epsilon, model_average=paddle.optimizer.ModelAverage( average_window=conf.average_window, max_average_window=conf.max_average_window)) # create trainer trainer = paddle.trainer.SGD( cost=cost, parameters=parameters, update_equation=rmsprop_optimizer) # begin training network def _event_handler(event): """ Define end batch and end pass event handler """ if isinstance(event, paddle.event.EndIteration): sys.stderr.write(".") batch_num = event.batch_id + 1 total_batch = conf.batches_per_pass * event.pass_id + batch_num if batch_num % conf.log_period == 0: sys.stderr.write("\n") logger.info("Total batch=%d Batch=%d CurrentCost=%f Eval: %s" \ % (total_batch, batch_num, event.cost, event.metrics)) if batch_num % conf.show_parameter_status_period == 0: show_parameter_status(parameters) elif isinstance(event, paddle.event.EndPass): save_model(trainer, conf.model_save_dir, parameters, event.pass_id) elif isinstance(event, paddle.event.BeginIteration): if event.batch_id == 0 and event.pass_id == 0: show_parameter_init_info(parameters) ## for debugging purpose #with utils.open_file("config", "w") as config: # print >> config, paddle.layer.parse_network(cost) trainer.train( reader=train_reader, event_handler=_event_handler, feeding=network.feeding, num_passes=conf.num_passes) logger.info("Training has finished.")
# profiling start = time.time() if not parallel: with profiler.profiler('All', 'total', '/tmp/profile_file'): run_time = run(num_iterations) else: run_time = run(num_iterations) end = time.time() total_time = end - start print("Total time: {0}, reader time: {1} s, run time: {2} s".format( total_time, total_time - np.sum(run_time), np.sum(run_time))) if __name__ == '__main__': import paddle paddle.enable_static() args = parser.parse_args() print_arguments(args) data_dir = os.path.join(args.data_dir, 'WIDER_train/images/') train_file_list = os.path.join( args.data_dir, 'wider_face_split/wider_face_train_bbx_gt.txt') config = reader.Settings(data_dir=data_dir, resize_h=args.resize_h, resize_w=args.resize_w, apply_expand=False, mean_value=[104., 117., 123.], ap_version='11point') train(args, config, train_file_list, optimizer_method="momentum")
dataset = 'pascalvoc' mean_BGR = [float(m) for m in args.mean_BGR.split(",")] image_shape = [int(m) for m in args.image_shape.split(",")] lr_epochs = [int(m) for m in args.lr_epochs.split(",")] lr_rates = [float(m) for m in args.lr_decay_rates.split(",")] train_parameters[dataset]['image_shape'] = image_shape train_parameters[dataset]['batch_size'] = args.batch_size train_parameters[dataset]['lr'] = args.learning_rate train_parameters[dataset]['epoc_num'] = args.epoc_num train_parameters[dataset]['ap_version'] = args.ap_version train_parameters[dataset]['lr_epochs'] = lr_epochs train_parameters[dataset]['lr_decay'] = lr_rates data_args = reader.Settings( dataset=dataset, data_dir=args.data_dir, label_file=label_file, resize_h=image_shape[1], resize_w=image_shape[2], mean_value=mean_BGR, apply_distort=True, apply_expand=True, ap_version = args.ap_version) if args.mode == 'train': train(args, data_args, train_parameters[dataset], train_list, val_list) elif args.mode == 'test': eval(args, data_args, train_parameters[dataset], val_list) else: infer(args, data_args)
def compute_score(model_dir, data_dir, test_list='annotations/instances_val2017.json', batch_size=32, height=300, width=300, num_classes=81, mean_value=[127.5, 127.5, 127.5]): """ compute score, mAP, flops of a model Args: model_dir (string): directory of model data_dir (string): directory of coco dataset, like '/your/path/to/coco', '/work/datasets/coco' Returns: tuple: score, mAP, flops. """ place = fluid.CUDAPlace(0) exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) [infer_program, feeded_var_names, target_var] = fluid.io.load_inference_model(dirname=model_dir, executor=exe) image_shape = [3, height, width] data_args = reader.Settings( dataset='coco2017', data_dir=data_dir, resize_h=height, resize_w=width, mean_value=mean_value, apply_distort=False, apply_expand=False, ap_version='cocoMAP') image = fluid.layers.data(name='image', shape=image_shape, dtype='float32') gt_box = fluid.layers.data( name='gt_box', shape=[4], dtype='float32', lod_level=1) gt_label = fluid.layers.data( name='gt_label', shape=[1], dtype='int32', lod_level=1) gt_iscrowd = fluid.layers.data( name='gt_iscrowd', shape=[1], dtype='int32', lod_level=1) gt_image_info = fluid.layers.data( name='gt_image_id', shape=[3], dtype='int32') test_reader = reader.test(data_args, test_list, batch_size) feeder = fluid.DataFeeder( place=place, feed_list=[image, gt_box, gt_label, gt_iscrowd, gt_image_info]) mAP = use_coco_api_compute_mAP(data_args, test_list, num_classes, test_reader, exe, infer_program, feeded_var_names, feeder, target_var, batch_size) total_flops_params, is_quantize = summary(infer_program) MAdds = np.sum(total_flops_params['flops']) / 2000000.0 if is_quantize: MAdds /= 2.0 print('mAP:', mAP) print('MAdds:', MAdds) if MAdds < 160.0: MAdds = 160.0 if MAdds > 1300.0: score = 0.0 else: score = mAP * 100 - (5.1249 * np.log(MAdds) - 14.499) print('score:', score) return score, mAP, MAdds
cocoEval.summarize() test() if __name__ == '__main__': args = parser.parse_args() print_arguments(args) assert args.dataset in ['coco2014', 'coco2017'] data_dir = './data/coco' if '2014' in args.dataset: test_list = 'annotations/instances_val2014.json' elif '2017' in args.dataset: test_list = 'annotations/instances_val2017.json' data_args = reader.Settings( dataset=args.dataset, data_dir=args.data_dir if len(args.data_dir) > 0 else data_dir, label_file='', resize_h=args.resize_h, resize_w=args.resize_w, mean_value=[args.mean_value_B, args.mean_value_G, args.mean_value_R], apply_distort=False, apply_expand=False, ap_version=args.ap_version) eval(args, data_args=data_args, test_list=args.test_list if len(args.test_list) > 0 else test_list, batch_size=args.batch_size, model_dir=args.model_dir)
save_batch_res(ret_res, img_w, img_h, fname_list, fout) test_data = [] fname_list = [] img_w = [] img_h = [] if len(test_data) > 0: ret_res = _infer(inferer, test_data, threshold) save_batch_res(ret_res, img_w, img_h, fname_list, fout) if __name__ == "__main__": model_dir = "checkpoints" paddle.init(use_gpu=True, trainer_count=1) data_args = reader.Settings( data_dir="data/0908", label_file="label_list.txt", resize_h=cfg.IMG_HEIGHT, resize_w=cfg.IMG_WIDTH, mean_value=[57, 56, 58]) infer( eval_file_list="data/0908/test_list.txt", save_path="infer_output.txt", data_args=data_args, batch_size=1, model_path=(os.path.join(model_dir, "params_pass_00259.tar.gz")), threshold=0.3)