def do_save_inference_model(args): test_prog = fluid.default_main_program() startup_prog = fluid.default_startup_program() with fluid.program_guard(test_prog, startup_prog): test_prog.random_seed = args.random_seed startup_prog.random_seed = args.random_seed with fluid.unique_name.guard(): # define inputs of the network input_slots = [ { "name": "src_ids", "shape": (-1, args.max_seq_len, 1), "dtype": "int64" }, { "name": "pos_ids", "shape": (-1, args.max_seq_len, 1), "dtype": "int64" }, { "name": "sent_ids", "shape": (-1, args.max_seq_len, 1), "dtype": "int64" }, { "name": "input_mask", "shape": (-1, args.max_seq_len, 1), "dtype": "float32" }, { "name": "input_span_mask", "shape": (-1, args.max_seq_len), "dtype": "float32" }, { "name": "unique_id", "shape": (-1, 1), "dtype": "int64" }, ] input_field = InputField(input_slots) input_field.build(build_pyreader=True) # define the network predictions = create_net(is_training=False, model_input=input_field, args=args) # declare the outputs to be fetched unique_ids, top_k_start_log_probs, top_k_start_indexes, top_k_end_log_probs, top_k_end_indexes = predictions # put all fetched outputs into fetch_list fetch_list = [ unique_ids.name, top_k_start_log_probs.name, top_k_start_indexes.name, top_k_end_log_probs.name, top_k_end_indexes.name ] # prepare predicting if args.use_cuda: place = fluid.CUDAPlace(0) else: place = fluid.CPUPlace() exe = fluid.Executor(place) exe.run(startup_prog) assert (args.init_from_params) or (args.init_from_pretrain_model) if args.init_from_params: init_from_params(args, exe, test_prog) elif args.init_from_pretrain_model: init_from_pretrain_model(args, exe, test_prog) # saving inference model fluid.io.save_inference_model( args.inference_model_dir, feeded_var_names=[ input_field.src_ids.name, input_field.pos_ids.name, input_field.sent_ids.name, input_field.input_mask.name, input_field.input_span_mask.name, input_field.unique_id.name ], target_vars=[ unique_ids, top_k_start_log_probs, top_k_start_indexes, top_k_end_log_probs, top_k_end_indexes ], executor=exe, main_program=test_prog, model_filename="model.pdmodel", params_filename="params.pdparams") print("save inference model at %s" % (args.inference_model_dir))
def do_predict(args): if args.use_cuda: dev_count = fluid.core.get_cuda_device_count() place = fluid.CUDAPlace(0) else: dev_count = int(os.environ.get('CPU_NUM', 1)) place = fluid.CPUPlace() # define the data generator processor = reader.DataProcessor(fpattern=args.predict_file, src_vocab_fpath=args.src_vocab_fpath, trg_vocab_fpath=args.trg_vocab_fpath, token_delimiter=args.token_delimiter, use_token_batch=False, batch_size=args.batch_size, device_count=dev_count, pool_size=args.pool_size, sort_type=reader.SortType.NONE, shuffle=False, shuffle_batch=False, start_mark=args.special_token[0], end_mark=args.special_token[1], unk_mark=args.special_token[2], max_length=args.max_length, n_head=args.n_head) batch_generator = processor.data_generator(phase="predict", place=place) args.src_vocab_size, args.trg_vocab_size, args.bos_idx, args.eos_idx, \ args.unk_idx = processor.get_vocab_summary() trg_idx2word = reader.DataProcessor.load_dict( dict_path=args.trg_vocab_fpath, reverse=True) test_prog = fluid.default_main_program() startup_prog = fluid.default_startup_program() with fluid.program_guard(test_prog, startup_prog): with fluid.unique_name.guard(): # define input and reader input_field_names = desc.encoder_data_input_fields + desc.fast_decoder_data_input_fields input_slots = [{ "name": name, "shape": desc.input_descs[name][0], "dtype": desc.input_descs[name][1] } for name in input_field_names] input_field = InputField(input_slots) input_field.build(build_pyreader=True) # define the network out_ids, out_scores, weight_matrix = create_net( is_training=False, model_input=input_field, args=args) out_ids.persistable = out_scores.persistable = weight_matrix.persistable = True # This is used here to set dropout to the test mode. test_prog = test_prog.clone(for_test=True) # prepare predicting ## define the executor and program for training exe = fluid.Executor(place) exe.run(startup_prog) assert (args.init_from_params) or (args.init_from_pretrain_model) if args.init_from_params: init_from_params(args, exe, test_prog) elif args.init_from_pretrain_model: init_from_pretrain_model(args, exe, test_prog) # to avoid a longer length than training, reset the size of position encoding to max_length for pos_enc_param_name in desc.pos_enc_param_names: pos_enc_param = fluid.global_scope().find_var( pos_enc_param_name).get_tensor() pos_enc_param.set( position_encoding_init(args.max_length + 1, args.d_model), place) exe_strategy = fluid.ExecutionStrategy() # to clear tensor array after each iteration exe_strategy.num_iteration_per_drop_scope = 1 compiled_test_prog = fluid.CompiledProgram(test_prog).with_data_parallel( exec_strategy=exe_strategy, places=place) f = open(args.output_file, "wb") # start predicting ## decorate the pyreader with batch_generator input_field.reader.decorate_batch_generator(batch_generator) input_field.reader.start() while True: try: #print(input_field.src_word) seq_ids, seq_scores, out_weight = exe.run( test_prog, fetch_list=[out_ids.name, out_scores.name, weight_matrix], return_numpy=False) # print(out_weight) #print(weight_matrix) # How to parse the results: # Suppose the lod of seq_ids is: # [[0, 3, 6], [0, 12, 24, 40, 54, 67, 82]] # then from lod[0]: # there are 2 source sentences, beam width is 3. # from lod[1]: # the first source sentence has 3 hyps; the lengths are 12, 12, 16 # the second source sentence has 3 hyps; the lengths are 14, 13, 15 hyps = [[] for i in range(len(seq_ids.lod()[0]) - 1)] scores = [[] for i in range(len(seq_scores.lod()[0]) - 1)] for i in range(len(seq_ids.lod()[0]) - 1): # for each source sentence start = seq_ids.lod()[0][i] end = seq_ids.lod()[0][i + 1] for j in range(end - start): # for each candidate sub_start = seq_ids.lod()[1][start + j] sub_end = seq_ids.lod()[1][start + j + 1] hyps[i].append(b" ".join([ trg_idx2word[idx] for idx in post_process_seq( np.array(seq_ids)[sub_start:sub_end], args.bos_idx, args.eos_idx) ])) scores[i].append(np.array(seq_scores)[sub_end - 1]) f.write(hyps[i][-1] + b"\n") if len(hyps[i]) >= args.n_best: break except fluid.core.EOFException: break f.close()
def do_train(args): if args.use_cuda: if num_trainers > 1: # for multi-process gpu training dev_count = 1 else: dev_count = fluid.core.get_cuda_device_count() gpu_id = int(os.environ.get('FLAGS_selected_gpus', 0)) place = fluid.CUDAPlace(gpu_id) else: dev_count = int(os.environ.get('CPU_NUM', 1)) place = fluid.CPUPlace() # define the data generator processor = reader.DataProcessor(fpattern=args.training_file, src_vocab_fpath=args.src_vocab_fpath, trg_vocab_fpath=args.trg_vocab_fpath, token_delimiter=args.token_delimiter, use_token_batch=args.use_token_batch, batch_size=args.batch_size, device_count=dev_count, pool_size=args.pool_size, sort_type=args.sort_type, shuffle=args.shuffle, shuffle_batch=args.shuffle_batch, start_mark=args.special_token[0], end_mark=args.special_token[1], unk_mark=args.special_token[2], max_length=args.max_length, n_head=args.n_head) batch_generator = processor.data_generator(phase="train") if num_trainers > 1: # for multi-process gpu training batch_generator = fluid.contrib.reader.distributed_batch_reader( batch_generator) args.src_vocab_size, args.trg_vocab_size, args.bos_idx, args.eos_idx, \ args.unk_idx = processor.get_vocab_summary() train_prog = fluid.default_main_program() startup_prog = fluid.default_startup_program() random_seed = eval(str(args.random_seed)) if random_seed is not None: train_prog.random_seed = random_seed startup_prog.random_seed = random_seed with fluid.program_guard(train_prog, startup_prog): with fluid.unique_name.guard(): # define input and reader input_field_names = desc.encoder_data_input_fields + \ desc.decoder_data_input_fields[:-1] + desc.label_data_input_fields input_slots = [{ "name": name, "shape": desc.input_descs[name][0], "dtype": desc.input_descs[name][1] } for name in input_field_names] input_field = InputField(input_slots) input_field.build(build_pyreader=True) # define the network sum_cost, avg_cost, token_num = create_net(is_training=True, model_input=input_field, args=args) sum_cost.persistable = avg_cost.persistable = token_num.persistable = True # define the optimizer with fluid.default_main_program()._lr_schedule_guard(): learning_rate = fluid.layers.learning_rate_scheduler.noam_decay( args.d_model, args.warmup_steps) * args.learning_rate optimizer = fluid.optimizer.Adam(learning_rate=learning_rate, beta1=args.beta1, beta2=args.beta2, epsilon=float(args.eps)) optimizer.minimize(avg_cost) # prepare training ## decorate the pyreader with batch_generator input_field.reader.decorate_batch_generator(batch_generator) ## define the executor and program for training exe = fluid.Executor(place) exe.run(startup_prog) # init position_encoding for pos_enc_param_name in desc.pos_enc_param_names: pos_enc_param = fluid.global_scope().find_var( pos_enc_param_name).get_tensor() pos_enc_param.set( position_encoding_init(args.max_length + 1, args.d_model), place) assert (args.init_from_checkpoint == "") or (args.init_from_pretrain_model == "") ## init from some checkpoint, to resume the previous training if args.init_from_checkpoint: init_from_checkpoint(args, exe, train_prog) ## init from some pretrain models, to better solve the current task if args.init_from_pretrain_model: init_from_pretrain_model(args, exe, train_prog) build_strategy = fluid.compiler.BuildStrategy() build_strategy.enable_inplace = True exec_strategy = fluid.ExecutionStrategy() if num_trainers > 1: dist_utils.prepare_for_multi_process(exe, build_strategy, train_prog) exec_strategy.num_threads = 1 compiled_train_prog = fluid.CompiledProgram(train_prog).with_data_parallel( loss_name=avg_cost.name, build_strategy=build_strategy, exec_strategy=exec_strategy) # the best cross-entropy value with label smoothing loss_normalizer = -( (1. - args.label_smooth_eps) * np.log((1. - args.label_smooth_eps)) + args.label_smooth_eps * np.log(args.label_smooth_eps / (args.trg_vocab_size - 1) + 1e-20)) # start training step_idx = 0 for pass_id in range(args.epoch): pass_start_time = time.time() input_field.reader.start() batch_id = 0 while True: try: outs = exe.run( compiled_train_prog, fetch_list=[sum_cost.name, token_num.name] if step_idx % args.print_step == 0 else []) if step_idx % args.print_step == 0: sum_cost_val, token_num_val = np.array(outs[0]), np.array( outs[1]) # sum the cost from multi-devices total_sum_cost = sum_cost_val.sum() total_token_num = token_num_val.sum() total_avg_cost = total_sum_cost / total_token_num if step_idx == 0: logging.info( "step_idx: %d, epoch: %d, batch: %d, avg loss: %f, " "normalized loss: %f, ppl: %f" % (step_idx, pass_id, batch_id, total_avg_cost, total_avg_cost - loss_normalizer, np.exp([min(total_avg_cost, 100)]))) avg_batch_time = time.time() else: logging.info( "step_idx: %d, epoch: %d, batch: %d, avg loss: %f, " "normalized loss: %f, ppl: %f, speed: %.2f step/s" % (step_idx, pass_id, batch_id, total_avg_cost, total_avg_cost - loss_normalizer, np.exp([min(total_avg_cost, 100) ]), args.print_step / (time.time() - avg_batch_time))) avg_batch_time = time.time() if step_idx % args.save_step == 0 and step_idx != 0: if args.save_checkpoint: save_checkpoint(args, exe, train_prog, "step_" + str(step_idx)) if args.save_param: save_param(args, exe, train_prog, "step_" + str(step_idx)) batch_id += 1 step_idx += 1 except fluid.core.EOFException: input_field.reader.reset() break time_consumed = time.time() - pass_start_time if args.save_checkpoint: save_checkpoint(args, exe, train_prog, "step_final") if args.save_param: save_param(args, exe, train_prog, "step_final") if args.enable_ce: # For CE print("kpis\ttrain_cost_card%d\t%f" % (dev_count, total_avg_cost)) print("kpis\ttrain_duration_card%d\t%f" % (dev_count, time_consumed))
def do_predict(args): test_prog = fluid.default_main_program() startup_prog = fluid.default_startup_program() with fluid.program_guard(test_prog, startup_prog): test_prog.random_seed = args.random_seed startup_prog.random_seed = args.random_seed with fluid.unique_name.guard(): # define inputs of the network input_slots = [ { "name": "src_ids", "shape": (-1, args.max_seq_len, 1), "dtype": "int64" }, { "name": "pos_ids", "shape": (-1, args.max_seq_len, 1), "dtype": "int64" }, { "name": "sent_ids", "shape": (-1, args.max_seq_len, 1), "dtype": "int64" }, { "name": "input_mask", "shape": (-1, args.max_seq_len, 1), "dtype": "float32" }, { "name": "input_span_mask", "shape": (-1, args.max_seq_len), "dtype": "float32" }, { "name": "unique_id", "shape": (-1, 1), "dtype": "int64" }, ] input_field = InputField(input_slots) input_field.build(build_pyreader=True) # define the network predictions = create_net( is_training=False, model_input=input_field, args=args) # declare the outputs to be fetched unique_ids, top_k_start_log_probs, top_k_start_indexes, top_k_end_log_probs, top_k_end_indexes = predictions # make them persistable, will be removed in PaddlePaddle 1.6 unique_ids.persistable = True top_k_start_log_probs.persistable = True top_k_start_indexes.persistable = True top_k_end_log_probs.persistable = True top_k_end_indexes.persistable = True # put all fetched outputs into fetch_list fetch_list = [ unique_ids.name, top_k_start_log_probs.name, top_k_start_indexes.name, top_k_end_log_probs.name, top_k_end_indexes.name ] # prepare predicting if args.use_cuda: place = fluid.CUDAPlace(0) else: place = fluid.CPUPlace() exe = fluid.Executor(place) exe.run(startup_prog) assert (args.init_from_params) or (args.init_from_pretrain_model) if args.init_from_params: init_from_params(args, exe, test_prog) elif args.init_from_pretrain_model: init_from_pretrain_model(args, exe, test_prog) compiled_test_prog = fluid.CompiledProgram(test_prog) # start predicting ## define data-processer and start data-reading processor = DataProcessor( vocab_path=args.vocab_path, do_lower_case=args.do_lower_case, max_seq_length=args.max_seq_len, in_tokens=args.in_tokens, doc_stride=args.doc_stride, do_stride=args.do_stride, max_query_length=args.max_query_len) ## define the data generator batch_generator = processor.data_generator( data_path=args.predict_file, batch_size=args.batch_size, phase="predict", shuffle=False, dev_count=1, epoch=1) ## decorate the pyreader with batch_generator input_field.reader.decorate_batch_generator(batch_generator) all_results = [] RawResult = collections.namedtuple("RawResult", [ "unique_id", "top_k_start_log_probs", "top_k_start_indexes", "top_k_end_log_probs", "top_k_end_indexes" ]) input_field.reader.start() while True: try: np_unique_ids, np_top_k_start_log_probs, np_top_k_start_indexes, \ np_top_k_end_log_probs, np_top_k_end_indexes = exe.run(compiled_test_prog, fetch_list = fetch_list) for idx in range(np_unique_ids.shape[0]): if len(all_results) % 1000 == 0: print("Processing example: %d" % len(all_results)) unique_id = int(np_unique_ids[idx]) top_k_start_log_probs = [ float(x) for x in np_top_k_start_log_probs[idx].flat ] top_k_start_indexes = [ int(x) for x in np_top_k_start_indexes[idx].flat ] top_k_end_log_probs = [ float(x) for x in np_top_k_end_log_probs[idx].flat ] top_k_end_indexes = [ int(x) for x in np_top_k_end_indexes[idx].flat ] all_results.append( RawResult( unique_id=unique_id, top_k_start_log_probs=top_k_start_log_probs, top_k_start_indexes=top_k_start_indexes, top_k_end_log_probs=top_k_end_log_probs, top_k_end_indexes=top_k_end_indexes)) except fluid.core.EOFException: break features = processor.get_features( processor.predict_examples, is_training=False) write_predictions(processor.predict_examples, features, all_results, args.n_best_size, args.max_answer_length, args.do_lower_case, args.output_prediction_file, args.output_nbest_file, None, args.start_top_k, args.end_top_k, args.verbose)
def do_save_inference_model(args): if args.use_cuda: dev_count = fluid.core.get_cuda_device_count() place = fluid.CUDAPlace(0) else: dev_count = int(os.environ.get('CPU_NUM', 1)) place = fluid.CPUPlace() test_prog = fluid.default_main_program() startup_prog = fluid.default_startup_program() with fluid.program_guard(test_prog, startup_prog): with fluid.unique_name.guard(): # define input and reader input_field_names = desc.encoder_data_input_fields + desc.fast_decoder_data_input_fields input_slots = [{ "name": name, "shape": desc.input_descs[name][0], "dtype": desc.input_descs[name][1] } for name in input_field_names] input_field = InputField(input_slots) input_field.build(build_pyreader=True) # define the network predictions = create_net(is_training=False, model_input=input_field, args=args) out_ids, out_scores = predictions # This is used here to set dropout to the test mode. test_prog = test_prog.clone(for_test=True) # prepare predicting ## define the executor and program for training exe = fluid.Executor(place) exe.run(startup_prog) assert (args.init_from_params) or (args.init_from_pretrain_model) if args.init_from_params: init_from_params(args, exe, test_prog) elif args.init_from_pretrain_model: init_from_pretrain_model(args, exe, test_prog) # saving inference model fluid.io.save_inference_model(args.inference_model_dir, feeded_var_names=input_field_names, target_vars=[out_ids, out_scores], executor=exe, main_program=test_prog, model_filename="model.pdmodel", params_filename="params.pdparams") print("save inference model at %s" % (args.inference_model_dir))
def do_train(args): train_prog = fluid.default_main_program() startup_prog = fluid.default_startup_program() with fluid.program_guard(train_prog, startup_prog): train_prog.random_seed = args.random_seed startup_prog.random_seed = args.random_seed with fluid.unique_name.guard(): # define input and reader input_slots = [{ "name": "src_ids", "shape": (-1, args.max_seq_len, 1), "dtype": "int64" }, { "name": "pos_ids", "shape": (-1, args.max_seq_len, 1), "dtype": "int64" }, { "name": "sent_ids", "shape": (-1, args.max_seq_len, 1), "dtype": "int64" }, { "name": "input_mask", "shape": (-1, args.max_seq_len, 1), "dtype": "float32" }, { "name": "input_span_mask", "shape": (-1, args.max_seq_len), "dtype": "float32" }, { "name": "start_positions", "shape": (-1, 1), "dtype": "int64" }, { "name": "end_positions", "shape": (-1, 1), "dtype": "int64" }, { "name": "is_null_answer", "shape": (-1, 1), "dtype": "int64" }] input_field = InputField(input_slots) input_field.build(build_pyreader=True) # define the network loss = create_net(is_training=True, model_input=input_field, args=args) loss.persistable = True # define the optimizer if args.use_cuda: dev_count = fluid.core.get_cuda_device_count() else: dev_count = int( os.environ.get('CPU_NUM', multiprocessing.cpu_count())) # as we need to get the max training steps for warmup training, # we define the data processer in advance # usually, we can declare data processor later, outsides the program_gurad scope processor = DataProcessor(vocab_path=args.vocab_path, do_lower_case=args.do_lower_case, max_seq_length=args.max_seq_len, in_tokens=args.in_tokens, doc_stride=args.doc_stride, do_stride=args.do_stride, max_query_length=args.max_query_len) ## define the data generator batch_generator = processor.data_generator( data_path=args.training_file, batch_size=args.batch_size, phase="train", shuffle=True, dev_count=dev_count, epoch=args.epoch) num_train_examples = processor.get_num_examples(phase='train') max_train_steps = args.epoch * num_train_examples // dev_count // args.batch_size warmup_steps = int(max_train_steps * args.warmup_proportion) print(max_train_steps, warmup_steps, num_train_examples) optimizor = optimization(loss=loss, warmup_steps=warmup_steps, num_train_steps=max_train_steps, learning_rate=args.learning_rate, train_program=train_prog, startup_prog=startup_prog, weight_decay=args.weight_decay, scheduler=args.lr_scheduler, use_fp16=args.use_fp16, loss_scaling=args.loss_scaling) # prepare training ## decorate the pyreader with batch_generator input_field.reader.decorate_batch_generator(batch_generator) ## define the executor and program for training if args.use_cuda: place = fluid.CUDAPlace(0) else: place = fluid.CPUPlace() exe = fluid.Executor(place) exe.run(startup_prog) assert (args.init_from_checkpoint == "") or (args.init_from_pretrain_model == "") ## init from some checkpoint, to resume the previous training if args.init_from_checkpoint: init_from_checkpoint(args, exe, train_prog) ## init from some pretrain models, to better solve the current task if args.init_from_pretrain_model: init_from_pretrain_model(args, exe, train_prog) build_strategy = fluid.compiler.BuildStrategy() build_strategy.enable_inplace = True compiled_train_prog = fluid.CompiledProgram(train_prog).with_data_parallel( loss_name=loss.name, build_strategy=build_strategy) # start training step = 0 for epoch_step in range(args.epoch): input_field.reader.start() while True: try: # this is for minimizing the fetching op, saving the training speed. if step % args.print_step == 0: fetch_list = [loss.name] else: fetch_list = [] output = exe.run(compiled_train_prog, fetch_list=fetch_list) if step % args.print_step == 0: print("step: %d, loss: %.4f" % (step, np.sum(output[0]))) if step % args.save_step == 0 and step != 0: if args.save_checkpoint: save_checkpoint(args, exe, train_prog, "step_" + str(step)) if args.save_param: save_param(args, exe, train_prog, "step_" + str(step)) step += 1 except fluid.core.EOFException: input_field.reader.reset() break if args.save_checkpoint: save_checkpoint(args, exe, train_prog, "step_final") if args.save_param: save_param(args, exe, train_prog, "step_final")