def infer(model_path, image_shape, label_dict_path, infer_file_list_path): infer_file_list = get_file_list(infer_file_list_path) # 获取标签字典 char_dict = load_dict(label_dict_path) # 获取反转的标签字典 reversed_char_dict = load_reverse_dict(label_dict_path) # 获取字典大小 dict_size = len(char_dict) # 获取reader data_generator = DataGenerator(char_dict=char_dict, image_shape=image_shape) # 初始化PaddlePaddle paddle.init(use_gpu=True, trainer_count=2) # 加载训练好的参数 parameters = paddle.parameters.Parameters.from_tar(gzip.open(model_path)) # 获取网络模型 model = Model(dict_size, image_shape, is_infer=True) # 获取预测器 inferer = paddle.inference.Inference(output_layer=model.log_probs, parameters=parameters) # 开始预测 test_batch = [] labels = [] for i, (image, label) in enumerate( data_generator.infer_reader(infer_file_list)()): test_batch.append([image]) labels.append(label) infer_batch(inferer, test_batch, labels, reversed_char_dict)
def infer(img_path, model_path, image_shape, label_dict_path): # 获取标签字典 char_dict = load_dict(label_dict_path) # 获取反转的标签字典 reversed_char_dict = load_reverse_dict(label_dict_path) # 获取字典大小 dict_size = len(char_dict) # 获取reader my_reader = Reader(char_dict=char_dict, image_shape=image_shape) # 初始化PaddlePaddle paddle.init(use_gpu=True, trainer_count=1) # 加载训练好的参数 parameters = paddle.parameters.Parameters.from_tar(gzip.open(model_path)) # 获取网络模型 model = Model(dict_size, image_shape, is_infer=True) # 获取预测器 inferer = paddle.inference.Inference(output_layer=model.log_probs, parameters=parameters) # 加载数据 test_batch = [[my_reader.load_image(img_path)]] # 开始预测 return start_infer(inferer, test_batch, reversed_char_dict)
def train(train_file_list_path, test_file_list_path, label_dict_path, model_save_dir): # 获取训练列表 train_file_list = get_file_list(train_file_list_path) # 获取测试列表 test_file_list = get_file_list(test_file_list_path) # 使用训练数据生成标记字典 if not os.path.exists(label_dict_path): print(("Label dictionary is not given, the dictionary " "is automatically built from the training data.")) build_label_dict(train_file_list, label_dict_path) # 获取标签字典 char_dict = load_dict(label_dict_path) # 获取字典大小 dict_size = len(char_dict) # 定义网络拓扑 model = Model(dict_size, IMAGE_SHAPE, is_infer=False) # 初始化PaddlePaddle paddle.init(use_gpu=True, trainer_count=1) # 创建优化方法 optimizer = paddle.optimizer.Momentum( momentum=0.9, regularization=paddle.optimizer.L2Regularization(rate=0.0005 * 128), learning_rate=0.001 / 128, learning_rate_decay_a=0.1, learning_rate_decay_b=128000 * 35, learning_rate_schedule="discexp", ) # 创建训练参数 params = paddle.parameters.create(model.cost) # 定义训练器 trainer = paddle.trainer.SGD(cost=model.cost, parameters=params, update_equation=optimizer, extra_layers=model.eval) # 获取reader my_reader = Reader(char_dict=char_dict, image_shape=IMAGE_SHAPE) # 说明数据层之间的关系 feeding = {'image': 0, 'label': 1} # 训练事件 def event_handler(event): if isinstance(event, paddle.event.EndIteration): if event.batch_id % 100 == 0: print("Pass %d, batch %d, Samples %d, Cost %f" % (event.pass_id, event.batch_id, event.batch_id * BATCH_SIZE, event.cost)) if isinstance(event, paddle.event.EndPass): # 这里由于训练和测试数据共享相同的格式 # 我们仍然使用reader.train_reader来读取测试数据 test_reader = paddle.batch( my_reader.train_reader(test_file_list), batch_size=BATCH_SIZE) result = trainer.test(reader=test_reader, feeding=feeding) print("Test %d, Cost %f" % (event.pass_id, result.cost)) # 检查保存model的路径是否存在,如果不存在就创建 if not os.path.exists(model_save_dir): os.mkdir(model_save_dir) with gzip.open( os.path.join(model_save_dir, "params_pass.tar.gz"), "w") as f: trainer.save_parameter_to_tar(f) # 获取训练数据的reader train_reader = paddle.batch( paddle.reader.shuffle( my_reader.train_reader(train_file_list), buf_size=1000), batch_size=BATCH_SIZE) # 开始训练 trainer.train(reader=train_reader, feeding=feeding, event_handler=event_handler, num_passes=1000)
def train(train_file_list_path, test_file_list_path, label_dict_path, model_save_dir): if not os.path.exists(model_save_dir): os.mkdir(model_save_dir) train_file_list = get_file_list(train_file_list_path) test_file_list = get_file_list(test_file_list_path) if not os.path.exists(label_dict_path): print(("Label dictionary is not given, the dictionary " "is automatically built from the training data.")) build_label_dict(train_file_list, label_dict_path) char_dict = load_dict(label_dict_path) dict_size = len(char_dict) data_generator = DataGenerator(char_dict=char_dict, image_shape=conf.image_shape) paddle.init(use_gpu=conf.use_gpu, trainer_count=conf.trainer_count) # Create optimizer. optimizer = paddle.optimizer.Momentum(momentum=conf.momentum) # Define network topology. model = Model(dict_size, conf.image_shape, is_infer=False) # Create all the trainable parameters. params = paddle.parameters.create(model.cost) trainer = paddle.trainer.SGD(cost=model.cost, parameters=params, update_equation=optimizer, extra_layers=model.eval) # Feeding dictionary. feeding = {'image': 0, 'label': 1} def event_handler(event): if isinstance(event, paddle.event.EndIteration): if event.batch_id % conf.log_period == 0: print("Pass %d, batch %d, Samples %d, Cost %f, Eval %s" % (event.pass_id, event.batch_id, event.batch_id * conf.batch_size, event.cost, event.metrics)) if isinstance(event, paddle.event.EndPass): # Here, because training and testing data share a same format, # we still use the reader.train_reader to read the testing data. result = trainer.test(reader=paddle.batch( data_generator.train_reader(test_file_list), batch_size=conf.batch_size), feeding=feeding) print("Test %d, Cost %f, Eval %s" % (event.pass_id, result.cost, result.metrics)) with gzip.open( os.path.join(model_save_dir, "params_pass_%05d.tar.gz" % event.pass_id), "w") as f: trainer.save_parameter_to_tar(f) trainer.train(reader=paddle.batch(paddle.reader.shuffle( data_generator.train_reader(train_file_list), buf_size=conf.buf_size), batch_size=conf.batch_size), feeding=feeding, event_handler=event_handler, num_passes=conf.num_passes)
def train(train_file_list_path, test_file_list_path, label_dict_path, model_save_dir): # 检查保存model的路径是否存在,如果不存在就创建 if not os.path.exists(model_save_dir): os.mkdir(model_save_dir) # 获取训练列表 train_file_list = get_file_list(train_file_list_path) # 获取测试列表 test_file_list = get_file_list(test_file_list_path) # 使用训练数据生成标记字典 if not os.path.exists(label_dict_path): print(("Label dictionary is not given, the dictionary " "is automatically built from the training data.")) build_label_dict(train_file_list, label_dict_path) # 获取标签字典 char_dict = load_dict(label_dict_path) # 获取字典大小 dict_size = len(char_dict) # 获取reader data_generator = DataGenerator(char_dict=char_dict, image_shape=conf.image_shape) # 初始化PaddlePaddle paddle.init(use_gpu=conf.use_gpu, trainer_count=conf.trainer_count) # 创建训练参数 optimizer = paddle.optimizer.Momentum(momentum=conf.momentum) # 定义网络拓扑 model = Model(dict_size, conf.image_shape, is_infer=False) # 创建训练参数 params = paddle.parameters.create(model.cost) trainer = paddle.trainer.SGD(cost=model.cost, parameters=params, update_equation=optimizer, extra_layers=model.eval) # 说明数据层之间的关系 feeding = {'image': 0, 'label': 1} # 训练事件 def event_handler(event): if isinstance(event, paddle.event.EndIteration): if event.batch_id % conf.log_period == 0: print("Pass %d, batch %d, Samples %d, Cost %f, Eval %s" % (event.pass_id, event.batch_id, event.batch_id * conf.batch_size, event.cost, event.metrics)) if isinstance(event, paddle.event.EndPass): # 这里由于训练和测试数据共享相同的格式 # 我们仍然使用reader.train_reader来读取测试数据 result = trainer.test(reader=paddle.batch( data_generator.train_reader(test_file_list), batch_size=conf.batch_size), feeding=feeding) print("Test %d, Cost %f, Eval %s" % (event.pass_id, result.cost, result.metrics)) with gzip.open(os.path.join(model_save_dir, "params_pass.tar.gz"), "w") as f: trainer.save_parameter_to_tar(f) # 开始训练 trainer.train(reader=paddle.batch(paddle.reader.shuffle( data_generator.train_reader(train_file_list), buf_size=conf.buf_size), batch_size=conf.batch_size), feeding=feeding, event_handler=event_handler, num_passes=conf.num_passes)