def setting(): print("Setting models") torch.no_grad() args.kr_dict='bible_people/bible_data_GYGJ+NIV/PN/subword/vocab.kr.pkl' args.en_dict='bible_people/bible_data_GYGJ+NIV/PN/subword/vocab.en.pkl' args.save_dir = './results' args.k2e_model_file = 'kr2en.mylstm.150.250.250.250.bible_data_GJNIV_PN' args.e2k_model_file = 'en2kr.mylstm.300.500.500.500.bleu05' args.src_file = '/home/nmt19/RNN_model/input.txt.tok.sym.pn.sub' kr_dict = read_dict(args.kr_dict) en_dict = read_dict(args.en_dict) with open(PN_dict_name, 'rb') as f: PN_dict = pickle.load(f, encoding="utf-8") str_temp ="" count = 0 for kk, vv in PN_dict.items(): if(len(kk) == 1): count += 1 str_temp += kk str_temp += "\n" one_char_name = "bible_people/name_one_char.txt" with open(one_char_name, 'w') as f: f.write(str_temp) key_file = open(one_char_name, "r", encoding="utf-8") key_line = key_file.readline() while key_line: key_line = key_line.replace('\n', '') PN_dict.pop(key_line) key_line = key_file.readline() key_file.close() PN_list= sorted(PN_dict.items(), key=operator.itemgetter(1), reverse=True) k2e_trg_inv_dict = dict() for kk, vv in en_dict.items(): k2e_trg_inv_dict[vv] = kk e2k_trg_inv_dict = dict() for kk, vv in kr_dict.items(): e2k_trg_inv_dict[vv] = kk k2e_model_name = args.save_dir + '/' + args.k2e_model_file + '.pth' + '.best.pth' e2k_model_name = args.save_dir + '/' + args.e2k_model_file + '.pth' + '.best.pth' k2e_model = torch.load(k2e_model_name) print("k2e best model loaded") e2k_model = torch.load(e2k_model_name) print("e2k best model loaded") return k2e_model, e2k_model, k2e_trg_inv_dict, e2k_trg_inv_dict, PN_list
parser.add_argument("--beam_width", type=int, default=1) EOS_token = 1 args = parser.parse_args() torch.no_grad() args.src_dict='/home/nmt19/data_05/bleu05/test/vocab.kr.pkl' args.trg_dict='/home/nmt19/data_05/bleu05/test/vocab.en.pkl' args.save_dir = './results' args.model_file = 'kr2en.mylstm.300.500.500.500.bleu05' args.beam_width = 3 src_file = '/home/nmt19/RNN_model/input.kr.tok.sub' trg_dict = read_dict(args.trg_dict) trg_inv_dict = dict() for kk, vv in trg_dict.items(): trg_inv_dict[vv] = kk file_name = args.save_dir + '/' + args.model_file + '.pth' + '.best.pth' print("Using best model") model = torch.load(file_name) for i in range(3): input_sen = input("source: ") print(input_sen) text_file = open("input.kr", "w",encoding="utf8")
def translate_file(args, valid=None, model=None): torch.no_grad() valid_iter = TextIterator(args.valid_src_file, args.src_dict, batch_size=1, maxlen=1000, ahead=1, resume_num=0) trg_dict2 = read_dict(args.trg_dict) args.trg_words_n = len(trg_dict2) trg_inv_dict = dict() for kk, vv in trg_dict2.items(): trg_inv_dict[vv] = kk # model if model is None: file_name = args.save_dir + '/' + args.model_file + '.pth' if args.use_best == 1: file_name = file_name + '.best.pth' print("Using best model") model = torch.load(file_name) ''' model = AttNMT(args=args) state_dict = tmp_model.module.state_dict() model.load_state_dict(state_dict) model.to(device) print("I'm using ", device) ''' # translate if valid: multibleu_cmd = ["perl", args.bleu_script, args.valid_trg_file, "<"] mb_subprocess = Popen(multibleu_cmd, stdin=PIPE, stdout=PIPE, universal_newlines=True) else: fp = open(args.trans_file, 'w') for x_data, x_mask, cur_line, iloop in valid_iter: if valid or args.beam_width == 1: samples = translate_beam_1(model, x_data, args) else: samples = translate_beam_k(model, x_data, args) sentence = ids2words(trg_inv_dict, samples, eos_id=EOS_token) sentence = unbpe(sentence) if valid: mb_subprocess.stdin.write(sentence + '\n') mb_subprocess.stdin.flush() if iloop % 500 == 0: print(iloop, 'is validated...') else: fp.write(sentence+'\n') if iloop % 500 == 0: print(iloop, 'is translated...') ret = -1 if valid: mb_subprocess.stdin.close() stdout = mb_subprocess.stdout.readline() out_parse = re.match(r'BLEU = [-.0-9]+', stdout) mb_subprocess.terminate() if out_parse: ret = float(out_parse.group()[6:]) else: fp.close() torch.set_grad_enabled(True) return ret
def setting(): print("Setting models") torch.no_grad() args.kr_dict = 'aihub/PN_version/subword/vocab.kr.pkl' args.en_dict = 'aihub/PN_version/subword/vocab.en.pkl' args.save_dir = './results' args.k2e_model_file = 'kr2en.mylstm.300.500.500.500.aihub.pn' args.e2k_model_file = 'en2kr.mylstm.300.500.500.500.bleu05' args.src_file = '/home/nmt19/RNN_model/input.txt.tok.pn.sub' kr_dict = read_dict(args.kr_dict) en_dict = read_dict(args.en_dict) with open(PN_dict_name, 'rb') as f: PN_dict = pickle.load(f, encoding="utf-8") ###한국어(키) 한글자짜리 뻄 # print(len(PN_dict)) str_temp = "" count = 0 for kk, vv in PN_dict.items(): if (len(kk) == 1): count += 1 str_temp += kk str_temp += "\n" # print("count : ", count) # one_char_name = "bible_people/name_one_char.txt" # with open(one_char_name, 'w') as f: # f.write(str_temp) # key_file = open(one_char_name, "r", encoding="utf-8") # # key_line = key_file.readline() # while key_line: # key_line = key_line.replace('\n', '') # PN_dict.pop(key_line) # key_line = key_file.readline() # # key_file.close() ###영어(밸류)기준으로 긴단어부터해서 정렬 # print("ch1 : ", type(PN_dict)) ###딕트를 정렬하니까 리스트로 바뀜 PN_list = sorted(PN_dict.items(), key=operator.itemgetter(1), reverse=True) # print("ch2 : ", type(PN_dict)) # print("ch3 : ", type(PN_list[0][0])) # print(PN_list[0][0]) # print(PN_list[0][1]) # print(PN_dict) # print(len(PN_list)) k2e_trg_inv_dict = dict() for kk, vv in en_dict.items(): k2e_trg_inv_dict[vv] = kk e2k_trg_inv_dict = dict() for kk, vv in kr_dict.items(): e2k_trg_inv_dict[vv] = kk k2e_model_name = args.save_dir + '/' + args.k2e_model_file + '.pth' + '.best.pth' e2k_model_name = args.save_dir + '/' + args.e2k_model_file + '.pth' + '.best.pth' k2e_model = torch.load(k2e_model_name) print("k2e best model loaded") e2k_model = torch.load(e2k_model_name) print("e2k best model loaded") return k2e_model, e2k_model, k2e_trg_inv_dict, e2k_trg_inv_dict, PN_list