def do_predict(args): if args.use_gpu: place = "gpu:0" else: place = "cpu" paddle.set_device(place) # Define data loader test_loader, to_tokens = reader.create_infer_loader(args) # Define model transformer = InferTransformerModel(src_vocab_size=args.src_vocab_size, trg_vocab_size=args.trg_vocab_size, max_length=args.max_length + 1, n_layer=args.n_layer, n_head=args.n_head, d_model=args.d_model, d_inner_hid=args.d_inner_hid, dropout=args.dropout, weight_sharing=args.weight_sharing, bos_id=args.bos_idx, eos_id=args.eos_idx, beam_size=args.beam_size, max_out_len=args.max_out_len) # Load the trained model assert args.init_from_params, ( "Please set init_from_params to load the infer model.") model_dict = paddle.load( os.path.join(args.init_from_params, "transformer.pdparams")) # To avoid a longer length than training, reset the size of position # encoding to max_length model_dict["encoder.pos_encoder.weight"] = position_encoding_init( args.max_length + 1, args.d_model) model_dict["decoder.pos_encoder.weight"] = position_encoding_init( args.max_length + 1, args.d_model) transformer.load_dict(model_dict) # Set evaluate mode transformer.eval() f = open(args.output_file, "w") with paddle.no_grad(): for (src_word, ) in test_loader: finished_seq = transformer(src_word=src_word) finished_seq = finished_seq.numpy().transpose([0, 2, 1]) for ins in finished_seq: for beam_idx, beam in enumerate(ins): if beam_idx >= args.n_best: break id_list = post_process_seq(beam, args.bos_idx, args.eos_idx) word_list = to_tokens(id_list) sequence = " ".join(word_list) + "\n" f.write(sequence)
def do_inference(args): # Define data loader test_loader, to_tokens = reader.create_infer_loader(args, True) predictor = Predictor.create_predictor(args=args, profile=args.profile, model_name=args.model_name) sequence_outputs = predictor.predict(test_loader, to_tokens, args.n_best, args.bos_idx, args.eos_idx) f = open(args.output_file, "w") for target in sequence_outputs: for sequence in target: f.write(sequence + "\n") f.close()
def do_predict(args): place = "gpu" paddle.set_device(place) # Define data loader test_loader, to_tokens = reader.create_infer_loader(args) # Define model transformer = FasterTransformer( src_vocab_size=args.src_vocab_size, trg_vocab_size=args.trg_vocab_size, max_length=args.max_length + 1, n_layer=args.n_layer, n_head=args.n_head, d_model=args.d_model, d_inner_hid=args.d_inner_hid, dropout=args.dropout, weight_sharing=args.weight_sharing, bos_id=args.bos_idx, eos_id=args.eos_idx, decoding_strategy="beam_search", beam_size=args.beam_size, max_out_len=args.max_out_len, decoding_lib=args.decoding_lib, use_fp16_decoding=args.use_fp16_decoding) # Set evaluate mode transformer.eval() # Load checkpoint. transformer.load(init_from_params=os.path.join(args.init_from_params, "transformer.pdparams")) f = open(args.output_file, "w") with paddle.no_grad(): for (src_word, ) in test_loader: finished_seq = transformer(src_word=src_word) finished_seq = finished_seq.numpy().transpose([1, 2, 0]) for ins in finished_seq: for beam_idx, beam in enumerate(ins): if beam_idx >= args.n_best: break id_list = post_process_seq(beam, args.bos_idx, args.eos_idx) word_list = to_tokens(id_list) sequence = " ".join(word_list) + "\n" f.write(sequence)
def do_inference(args): # Define data loader test_loader, to_tokens = reader.create_infer_loader(args) predictor = Predictor.create_predictor(args) sequence_outputs = predictor.predict(test_loader) f = open(args.output_file, "w") for finished_sequence in sequence_outputs: finished_sequence = finished_sequence[0].transpose([0, 2, 1]) for ins in finished_sequence: for beam_idx, beam in enumerate(ins): if beam_idx >= args.n_best: break id_list = post_process_seq(beam, args.bos_idx, args.eos_idx) word_list = to_tokens(id_list) sequence = " ".join(word_list) + "\n" f.write(sequence)
def do_predict(args): paddle.enable_static() if args.use_gpu: place = paddle.set_device("gpu:0") else: place = paddle.set_device("cpu") # Define data loader test_loader, to_tokens = reader.create_infer_loader(args) test_program = paddle.static.Program() startup_program = paddle.static.Program() with paddle.static.program_guard(test_program, startup_program): src_word = paddle.static.data( name="src_word", shape=[None, None], dtype="int64") # Define model transformer = InferTransformerModel( src_vocab_size=args.src_vocab_size, trg_vocab_size=args.trg_vocab_size, max_length=args.max_length + 1, n_layer=args.n_layer, n_head=args.n_head, d_model=args.d_model, d_inner_hid=args.d_inner_hid, dropout=args.dropout, weight_sharing=args.weight_sharing, bos_id=args.bos_idx, eos_id=args.eos_idx, beam_size=args.beam_size, max_out_len=args.max_out_len) finished_seq = transformer(src_word=src_word) test_program = test_program.clone(for_test=True) exe = paddle.static.Executor(place) exe.run(startup_program) assert ( args.init_from_params), "must set init_from_params to load parameters" paddle.static.load(test_program, os.path.join(args.init_from_params, "transformer"), exe) print("finish initing model from params from %s" % (args.init_from_params)) # cast weights from fp16 to fp32 after loading if args.use_pure_fp16: cast_parameters_to_fp32(place, test_program) f = open(args.output_file, "w") for data in test_loader: finished_sequence, = exe.run(test_program, feed={'src_word': data[0]}, fetch_list=finished_seq.name) finished_sequence = finished_sequence.transpose([0, 2, 1]) for ins in finished_sequence: for beam_idx, beam in enumerate(ins): if beam_idx >= args.n_best: break id_list = post_process_seq(beam, args.bos_idx, args.eos_idx) word_list = to_tokens(id_list) sequence = " ".join(word_list) + "\n" f.write(sequence) paddle.disable_static()
def do_predict(args): if args.device == 'gpu': place = "gpu:0" elif args.device == 'xpu': place = "xpu:0" elif args.device == 'cpu': place = "cpu" paddle.set_device(place) # Define data loader test_loader, to_tokens = reader.create_infer_loader(args) # Define model transformer = SimultaneousTransformer( args.src_vocab_size, args.trg_vocab_size, args.max_length + 1, args.n_layer, args.n_head, args.d_model, args.d_inner_hid, args.dropout, args.weight_sharing, args.bos_idx, args.eos_idx, args.waitk) # Load the trained model assert args.init_from_params, ( "Please set init_from_params to load the infer model.") model_dict = paddle.load( os.path.join(args.init_from_params, "transformer.pdparams")) # To avoid a longer length than training, reset the size of position # encoding to max_length model_dict[ "src_pos_embedding.pos_encoder.weight"] = position_encoding_init( args.max_length + 1, args.d_model) model_dict[ "trg_pos_embedding.pos_encoder.weight"] = position_encoding_init( args.max_length + 1, args.d_model) transformer.load_dict(model_dict) # Set evaluate mode transformer.eval() f = open(args.output_file, "w", encoding='utf8') with paddle.no_grad(): for input_data in test_loader: (src_word, ) = input_data finished_seq, finished_scores = transformer.greedy_search( src_word, max_len=args.max_out_len, waitk=args.waitk) finished_seq = finished_seq.numpy() finished_scores = finished_scores.numpy() for idx, ins in enumerate(finished_seq): for beam_idx, beam in enumerate(ins): if beam_idx >= args.n_best: break id_list = post_process_seq(beam, args.bos_idx, args.eos_idx) word_list = to_tokens(id_list) sequence = ' '.join(word_list) + "\n" f.write(sequence) f.close()
def do_predict(args): if args.device == "gpu": place = "gpu" else: place = "cpu" paddle.set_device(place) # Define data loader test_loader, to_tokens = reader.create_infer_loader(args) # Define model # `TransformerGenerator` automatically chioces using `FasterTransformer` # (with jit building) or the slower verison `InferTransformerModel`. transformer = TransformerGenerator( src_vocab_size=args.src_vocab_size, trg_vocab_size=args.trg_vocab_size, max_length=args.max_length + 1, num_encoder_layers=args.n_layer, num_decoder_layers=args.n_layer, n_head=args.n_head, d_model=args.d_model, d_inner_hid=args.d_inner_hid, dropout=args.dropout, weight_sharing=args.weight_sharing, bos_id=args.bos_idx, eos_id=args.eos_idx, beam_size=args.beam_size, max_out_len=args.max_out_len, use_ft=not args.without_ft, beam_search_version=args.beam_search_version, rel_len=args.use_rel_len, # only works when using FT or beam search v2 alpha=args.alpha, # only works when using beam search v2 use_fp16_decoding=False) # only works when using FT # Load the trained model assert args.init_from_params, ( "Please set init_from_params to load the infer model.") transformer.load( os.path.join(args.init_from_params, "transformer.pdparams")) # Set evaluate mode transformer.eval() f = open(args.output_file, "w", encoding="utf-8") with paddle.no_grad(): for (src_word, ) in test_loader: # When `output_time_major` argument is `True` for TransformerGenerator, # the shape of finished_seq is `[seq_len, batch_size, beam_size]` # for beam search v1 or `[seq_len, batch_size, beam_size * 2]` for # beam search v2. finished_seq = transformer(src_word=src_word) finished_seq = finished_seq.numpy().transpose([1, 2, 0]) for ins in finished_seq: for beam_idx, beam in enumerate(ins): if beam_idx >= args.n_best: break id_list = post_process_seq(beam, args.bos_idx, args.eos_idx) word_list = to_tokens(id_list) sequence = " ".join(word_list) + "\n" f.write(sequence)
def do_predict(args): place = "gpu" place = paddle.set_device(place) # Define data loader # NOTE: Data yielded by DataLoader may be on CUDAPinnedPlace, # but custom op doesn't support CUDAPinnedPlace. Hence, # disable using CUDAPinnedPlace in DataLoader. paddle.fluid.reader.use_pinned_memory(False) test_loader, to_tokens = reader.create_infer_loader(args) # Define model transformer = FasterTransformer( src_vocab_size=args.src_vocab_size, trg_vocab_size=args.trg_vocab_size, max_length=args.max_length + 1, num_encoder_layers=args.n_layer, num_decoder_layers=args.n_layer, n_head=args.n_head, d_model=args.d_model, d_inner_hid=args.d_inner_hid, dropout=args.dropout, weight_sharing=args.weight_sharing, bos_id=args.bos_idx, eos_id=args.eos_idx, decoding_strategy=args.decoding_strategy, beam_size=args.beam_size, max_out_len=args.max_out_len, diversity_rate=args.diversity_rate, decoding_lib=args.decoding_lib, use_fp16_decoding=args.use_fp16_decoding, enable_faster_encoder=args.enable_faster_encoder, use_fp16_encoder=args.use_fp16_encoder) # Set evaluate mode transformer.eval() # Load checkpoint. transformer.load(init_from_params=os.path.join(args.init_from_params, "transformer.pdparams")) f = open(args.output_file, "w") with paddle.no_grad(): if args.profile: import time start = time.time() for (src_word, ) in test_loader: finished_seq = transformer(src_word=src_word) if not args.profile: if args.decoding_strategy == "beam_search" or args.decoding_strategy == "beam_search_v2": finished_seq = finished_seq.numpy().transpose([1, 2, 0]) elif args.decoding_strategy == "topk_sampling" or args.decoding_strategy == "topp_sampling": finished_seq = np.expand_dims( finished_seq.numpy().transpose([1, 0]), axis=1) for ins in finished_seq: for beam_idx, beam in enumerate(ins): if beam_idx >= args.n_best: break id_list = post_process_seq(beam, args.bos_idx, args.eos_idx) word_list = to_tokens(id_list) sequence = " ".join(word_list) + "\n" f.write(sequence) if args.profile: if args.decoding_strategy == "beam_search" or args.decoding_strategy == "beam_search_v2": logger.info( "Setting info: batch size: {}, beam size: {}, use fp16: {}. ". format(args.infer_batch_size, args.beam_size, args.use_fp16_decoding)) elif args.decoding_strategy == "topk_sampling": logger.info( "Setting info: batch size: {}, topk: {}, use fp16: {}. ". format(args.infer_batch_size, args.topk, args.use_fp16_decoding)) elif args.decoding_strategy == "topp_sampling": logger.info( "Setting info: batch size: {}, topp: {}, use fp16: {}. ". format(args.infer_batch_size, args.topp, args.use_fp16_decoding)) paddle.fluid.core._cuda_synchronize(place) logger.info("Average time latency is {} ms/batch. ".format(( time.time() - start) / len(test_loader) * 1000))