Пример #1
0
def setting():
    print("Setting models")
    torch.no_grad()

    args.kr_dict='bible_people/bible_data_GYGJ+NIV/PN/subword/vocab.kr.pkl'
    args.en_dict='bible_people/bible_data_GYGJ+NIV/PN/subword/vocab.en.pkl'
    args.save_dir = './results'
    args.k2e_model_file = 'kr2en.mylstm.150.250.250.250.bible_data_GJNIV_PN'
    args.e2k_model_file = 'en2kr.mylstm.300.500.500.500.bleu05'
    args.src_file = '/home/nmt19/RNN_model/input.txt.tok.sym.pn.sub'

    kr_dict = read_dict(args.kr_dict)
    en_dict = read_dict(args.en_dict)
    with open(PN_dict_name, 'rb') as f:
        PN_dict = pickle.load(f, encoding="utf-8")

    str_temp =""
    count = 0
    for kk, vv in PN_dict.items():
        if(len(kk) == 1):
            count += 1
            str_temp += kk
            str_temp += "\n"
    one_char_name = "bible_people/name_one_char.txt"
    with open(one_char_name, 'w') as f:
        f.write(str_temp)

    key_file = open(one_char_name, "r", encoding="utf-8")

    key_line = key_file.readline()
    while key_line:
        key_line = key_line.replace('\n', '')
        PN_dict.pop(key_line)
        key_line = key_file.readline()

    key_file.close()

    PN_list= sorted(PN_dict.items(), key=operator.itemgetter(1), reverse=True)

    k2e_trg_inv_dict = dict()
    for kk, vv in en_dict.items():
        k2e_trg_inv_dict[vv] = kk

    e2k_trg_inv_dict = dict()
    for kk, vv in kr_dict.items():
        e2k_trg_inv_dict[vv] = kk

    k2e_model_name = args.save_dir + '/' + args.k2e_model_file + '.pth' + '.best.pth'
    e2k_model_name = args.save_dir + '/' + args.e2k_model_file + '.pth' + '.best.pth'

    k2e_model = torch.load(k2e_model_name)
    print("k2e best model loaded")
    
    e2k_model = torch.load(e2k_model_name)
    print("e2k best model loaded")
    
    return k2e_model, e2k_model, k2e_trg_inv_dict, e2k_trg_inv_dict, PN_list
parser.add_argument("--beam_width", type=int, default=1)

EOS_token = 1

args = parser.parse_args()

torch.no_grad()
args.src_dict='/home/nmt19/data_05/bleu05/test/vocab.kr.pkl'
args.trg_dict='/home/nmt19/data_05/bleu05/test/vocab.en.pkl'
args.save_dir = './results'
args.model_file = 'kr2en.mylstm.300.500.500.500.bleu05'
args.beam_width = 3
src_file = '/home/nmt19/RNN_model/input.kr.tok.sub'


trg_dict = read_dict(args.trg_dict)

trg_inv_dict = dict()
for kk, vv in trg_dict.items():
    trg_inv_dict[vv] = kk


file_name = args.save_dir + '/' + args.model_file + '.pth' + '.best.pth'
print("Using best model")
model = torch.load(file_name)

for i in range(3):
    input_sen = input("source: ")
    print(input_sen)

    text_file = open("input.kr", "w",encoding="utf8")
Пример #3
0
def translate_file(args, valid=None, model=None):
    torch.no_grad()

    valid_iter = TextIterator(args.valid_src_file, args.src_dict,
                         batch_size=1, maxlen=1000,
                         ahead=1, resume_num=0)

    trg_dict2 = read_dict(args.trg_dict)

    args.trg_words_n = len(trg_dict2)

    trg_inv_dict = dict()
    for kk, vv in trg_dict2.items():
        trg_inv_dict[vv] = kk

    # model
    if model is None:

        file_name = args.save_dir + '/' + args.model_file + '.pth'
        if args.use_best == 1:
            file_name = file_name + '.best.pth'
            print("Using best model")
        model = torch.load(file_name)
    '''
    model = AttNMT(args=args)
    state_dict = tmp_model.module.state_dict()
    model.load_state_dict(state_dict)
    model.to(device)
    print("I'm using ", device)
    '''
    # translate
    if valid:
        multibleu_cmd = ["perl", args.bleu_script, args.valid_trg_file, "<"]
        mb_subprocess = Popen(multibleu_cmd, stdin=PIPE, stdout=PIPE, universal_newlines=True)
    else:
        fp = open(args.trans_file, 'w')

    for x_data, x_mask, cur_line, iloop in valid_iter:
        if valid or args.beam_width == 1:
            samples = translate_beam_1(model, x_data, args)
        else:
            samples = translate_beam_k(model, x_data, args)
        sentence = ids2words(trg_inv_dict, samples, eos_id=EOS_token)
        sentence = unbpe(sentence)
        if valid:
            mb_subprocess.stdin.write(sentence + '\n')
            mb_subprocess.stdin.flush()
            if iloop % 500 == 0:
                print(iloop, 'is validated...')
        else:
            fp.write(sentence+'\n')
            if iloop % 500 == 0:
                print(iloop, 'is translated...')

    ret = -1
    if valid:
        mb_subprocess.stdin.close()
        stdout = mb_subprocess.stdout.readline()
        out_parse = re.match(r'BLEU = [-.0-9]+', stdout)
        mb_subprocess.terminate()
        if out_parse:
            ret = float(out_parse.group()[6:])
    else:
        fp.close()

    torch.set_grad_enabled(True)
    return ret
Пример #4
0
def setting():
    print("Setting models")
    torch.no_grad()

    args.kr_dict = 'aihub/PN_version/subword/vocab.kr.pkl'
    args.en_dict = 'aihub/PN_version/subword/vocab.en.pkl'
    args.save_dir = './results'
    args.k2e_model_file = 'kr2en.mylstm.300.500.500.500.aihub.pn'
    args.e2k_model_file = 'en2kr.mylstm.300.500.500.500.bleu05'
    args.src_file = '/home/nmt19/RNN_model/input.txt.tok.pn.sub'

    kr_dict = read_dict(args.kr_dict)
    en_dict = read_dict(args.en_dict)
    with open(PN_dict_name, 'rb') as f:
        PN_dict = pickle.load(f, encoding="utf-8")

    ###한국어(키) 한글자짜리 뻄
    # print(len(PN_dict))
    str_temp = ""
    count = 0
    for kk, vv in PN_dict.items():
        if (len(kk) == 1):
            count += 1
            str_temp += kk
            str_temp += "\n"
    # print("count : ", count)
    # one_char_name = "bible_people/name_one_char.txt"
    # with open(one_char_name, 'w') as f:
    #     f.write(str_temp)

    # key_file = open(one_char_name, "r", encoding="utf-8")
    #
    # key_line = key_file.readline()
    # while key_line:
    #     key_line = key_line.replace('\n', '')
    #     PN_dict.pop(key_line)
    #     key_line = key_file.readline()
    #
    # key_file.close()

    ###영어(밸류)기준으로 긴단어부터해서 정렬
    # print("ch1 : ", type(PN_dict))
    ###딕트를 정렬하니까 리스트로 바뀜
    PN_list = sorted(PN_dict.items(), key=operator.itemgetter(1), reverse=True)
    # print("ch2 : ", type(PN_dict))
    # print("ch3 : ", type(PN_list[0][0]))
    # print(PN_list[0][0])
    # print(PN_list[0][1])
    # print(PN_dict)

    # print(len(PN_list))

    k2e_trg_inv_dict = dict()
    for kk, vv in en_dict.items():
        k2e_trg_inv_dict[vv] = kk

    e2k_trg_inv_dict = dict()
    for kk, vv in kr_dict.items():
        e2k_trg_inv_dict[vv] = kk

    k2e_model_name = args.save_dir + '/' + args.k2e_model_file + '.pth' + '.best.pth'
    e2k_model_name = args.save_dir + '/' + args.e2k_model_file + '.pth' + '.best.pth'

    k2e_model = torch.load(k2e_model_name)
    print("k2e best model loaded")
    e2k_model = torch.load(e2k_model_name)
    print("e2k best model loaded")
    return k2e_model, e2k_model, k2e_trg_inv_dict, e2k_trg_inv_dict, PN_list