def infer(args): paddle.set_device(args.device) set_seed(args.seed) model = UnifiedTransformerLMHeadModel.from_pretrained( args.model_name_or_path) tokenizer = UnifiedTransformerTokenizer.from_pretrained( args.model_name_or_path) test_ds = load_dataset('duconv', splits='test_1') test_ds, test_data_loader = create_data_loader(test_ds, tokenizer, args, 'test') model.eval() total_time = 0.0 start_time = time.time() pred_responses = [] for step, inputs in enumerate(test_data_loader, 1): input_ids, token_type_ids, position_ids, attention_mask, seq_len = inputs output = model.generate(input_ids=input_ids, token_type_ids=token_type_ids, position_ids=position_ids, attention_mask=attention_mask, seq_len=seq_len, max_length=args.max_dec_len, min_length=args.min_dec_len, decode_strategy=args.decode_strategy, temperature=args.temperature, top_k=args.top_k, top_p=args.top_p, num_beams=args.num_beams, length_penalty=args.length_penalty, early_stopping=args.early_stopping, num_return_sequences=args.num_return_sequences, use_fp16_decoding=args.use_fp16_decoding, use_faster=args.faster) total_time += (time.time() - start_time) if step % args.logging_steps == 0: print('step %d - %.3fs/step' % (step, total_time / args.logging_steps)) total_time = 0.0 ids, scores = output results = select_response(ids, scores, tokenizer, args.max_dec_len, args.num_return_sequences) pred_responses.extend(results) start_time = time.time() with open(args.output_path, 'w', encoding='utf-8') as fout: for response in pred_responses: fout.write(response + '\n') print('\nSave inference result into: %s' % args.output_path) target_responses = [example['response'] for example in test_ds] calc_bleu_and_distinct(pred_responses, target_responses)
def interaction(args, model, tokenizer): history = [] start_info = "Enter [EXIT] to quit the interaction, [NEXT] to start a new conversation." cprint(start_info, "yellow", attrs=["bold"]) while True: user_utt = input(colored("[Human]: ", "red", attrs=["bold"])).strip() if user_utt == "[EXIT]": break elif user_utt == "[NEXT]": history = [] cprint(start_info, "yellow", attrs=["bold"]) else: history.append(user_utt) inputs = tokenizer.dialogue_encode( history, add_start_token_as_response=True, return_tensors=True, is_split_into_words=False) inputs['input_ids'] = inputs['input_ids'].astype('int64') ids, scores = model.generate( input_ids=inputs['input_ids'], token_type_ids=inputs['token_type_ids'], position_ids=inputs['position_ids'], attention_mask=inputs['attention_mask'], max_length=args.max_dec_len, min_length=args.min_dec_len, decode_strategy=args.decode_strategy, temperature=args.temperature, top_k=args.top_k, top_p=args.top_p, num_beams=args.num_beams, length_penalty=args.length_penalty, early_stopping=args.early_stopping, num_return_sequences=args.num_return_sequences, use_faster=True) bot_response = select_response(ids, scores, tokenizer, args.max_dec_len, args.num_return_sequences, keep_space=False)[0] print(colored("[Bot]:", "blue", attrs=["bold"]), colored(bot_response, attrs=["bold"])) history.append(bot_response) return