def do_predict(args): place = "gpu" place = paddle.set_device(place) reader.adapt_vocab_size(args) # Define model transformer = FasterTransformer( src_vocab_size=args.src_vocab_size, trg_vocab_size=args.trg_vocab_size, max_length=args.max_length + 1, num_encoder_layers=args.n_layer, num_decoder_layers=args.n_layer, n_head=args.n_head, d_model=args.d_model, d_inner_hid=args.d_inner_hid, dropout=args.dropout, weight_sharing=args.weight_sharing, bos_id=args.bos_idx, eos_id=args.eos_idx, decoding_strategy=args.decoding_strategy, beam_size=args.beam_size, max_out_len=args.max_out_len, decoding_lib=args.decoding_lib, use_fp16_decoding=args.use_fp16_decoding, enable_faster_encoder=args.enable_faster_encoder, use_fp16_encoder=args.use_fp16_encoder, rel_len=args.use_rel_len, alpha=args.alpha) # Set evaluate mode transformer.eval() # Load checkpoint. transformer.load(init_from_params=os.path.join(args.init_from_params, "transformer.pdparams")) # Convert dygraph model to static graph model transformer = paddle.jit.to_static( transformer, input_spec=[ # src_word paddle.static.InputSpec(shape=[None, None], dtype="int64"), # trg_word # Support exporting model which support force decoding # NOTE: Data type MUST be int32 ! # paddle.static.InputSpec( # shape=[None, None], dtype="int32") ]) # Save converted static graph model paddle.jit.save(transformer, os.path.join(args.inference_model_dir, "transformer")) logger.info("Transformer has been saved to {}".format( args.inference_model_dir))
def do_predict(args): paddle.enable_static() place = "gpu" place = paddle.set_device(place) reader.adapt_vocab_size(args) test_program = paddle.static.Program() startup_program = paddle.static.Program() with paddle.static.program_guard(test_program, startup_program): src_word = paddle.static.data(name="src_word", shape=[None, None], dtype="int64") # Define model transformer = FasterTransformer( src_vocab_size=args.src_vocab_size, trg_vocab_size=args.trg_vocab_size, max_length=args.max_length + 1, num_encoder_layers=args.n_layer, num_decoder_layers=args.n_layer, n_head=args.n_head, d_model=args.d_model, d_inner_hid=args.d_inner_hid, dropout=args.dropout, weight_sharing=args.weight_sharing, bos_id=args.bos_idx, eos_id=args.eos_idx, decoding_strategy=args.decoding_strategy, beam_size=args.beam_size, max_out_len=args.max_out_len, decoding_lib=args.decoding_lib, use_fp16_decoding=args.use_fp16_decoding, rel_len=args.use_rel_len, alpha=args.alpha) finished_seq = transformer(src_word=src_word) test_program = test_program.clone(for_test=True) exe = paddle.static.Executor(place) exe.run(startup_program) # Load checkpoint. transformer.export_params(init_from_params=os.path.join( args.init_from_params, "transformer.pdparams"), place=place) paddle.static.save_inference_model(os.path.join(args.inference_model_dir, "transformer"), feed_vars=src_word, fetch_vars=finished_seq, executor=exe, program=test_program)
def do_export(args): # Adapt vocabulary size reader.adapt_vocab_size(args) # Define model transformer = InferTransformerModel( src_vocab_size=args.src_vocab_size, trg_vocab_size=args.trg_vocab_size, max_length=args.max_length + 1, num_encoder_layers=args.n_layer, num_decoder_layers=args.n_layer, n_head=args.n_head, d_model=args.d_model, d_inner_hid=args.d_inner_hid, dropout=args.dropout, weight_sharing=args.weight_sharing, bos_id=args.bos_idx, eos_id=args.eos_idx, beam_size=args.beam_size, max_out_len=args.max_out_len) # Load the trained model assert args.init_from_params, ( "Please set init_from_params to load the infer model.") model_dict = paddle.load( os.path.join(args.init_from_params, "transformer.pdparams")) # To avoid a longer length than training, reset the size of position # encoding to max_length model_dict["encoder.pos_encoder.weight"] = position_encoding_init( args.max_length + 1, args.d_model) model_dict["decoder.pos_encoder.weight"] = position_encoding_init( args.max_length + 1, args.d_model) transformer.load_dict(model_dict) # Set evaluate mode transformer.eval() # Convert dygraph model to static graph model transformer = paddle.jit.to_static( transformer, input_spec=[ # src_word paddle.static.InputSpec( shape=[None, None], dtype="int64") ]) # Save converted static graph model paddle.jit.save(transformer, os.path.join(args.inference_model_dir, "transformer")) logger.info("Transformer has been saved to {}".format( args.inference_model_dir))