def only_evaluate(data,
                  codes,
                  keyword_num,
                  vocabulary,
                  batch_size,
                  embedding_dim,
                  hidden_state_size,
                  rnn_num_layer,
                  learning_rate,
                  epoches,
                  saved_name,
                  stack_size,
                  load_previous_model=False):
    save_path = os.path.join(config.save_model_root, saved_name)
    # for d in data:
    #     def get_i(i):
    #         return d[i]
    #     show_process_map(get_i, range(len(d)))
    # for d, n in zip(data, ["train", "val", "test"]):
    #     print("There are {} parsed data in the {} dataset".format(len(d), n))
    test_dataset = data

    loss_function = nn.CrossEntropyLoss(size_average=False,
                                        ignore_index=PAD_TOKEN)
    model = ScopeGrammarLanguageModel(
        vocabulary.vocabulary_size,
        embedding_dim,
        hidden_state_size,
        rnn_num_layer,
        keyword_num,
        stack_size,
        batch_size,
    )

    torch_util.load_model(model, save_path)
    test_loss, top_k_accuracy, sample_predict, sample_target, sample_prob, steps = accuracy_evaluate(
        model, test_dataset, batch_size, loss_function, vocabulary)
    best_test_perplexity = torch.exp(test_loss).item()
    print("load the previous mode, test perplexity is :{}".format(
        best_test_perplexity))
    # print(prof)
    print("The model {} test perplexity is {}".format(saved_name,
                                                      best_test_perplexity))
    print("The top k accuracy:")
    for k, v in top_k_accuracy.items():
        print("{}:{}".format(k, v))

    i = 0
    for pre, tar, prob, code in zip(sample_predict, sample_target, sample_prob,
                                    codes):
        print('{} in step {} target token: {}'.format(i, steps.item(), code))
        for p, t, pr in zip(pre, tar, prob):
            print("{}:{}:{}".format(t, p, pr))
        i += 1
Exemplo n.º 2
0
def get_model(model_fn,
              model_params,
              path,
              load_previous=False,
              parallel=False,
              gpu_index=None,
              vocabulary=None,
              has_delimiter=False):
    m = model_fn(**model_params)
    # to_cuda(m)
    if parallel:
        m = nn.DataParallel(m.cuda(), device_ids=[0, 1])
    elif gpu_index is not None:
        m = nn.DataParallel(m.cuda(), device_ids=[gpu_index])
    else:
        m = nn.DataParallel(m.cuda(), device_ids=[0])

    load_config_3_model = False
    if load_config_3_model:
        model_params['check_error_task'] = True
        model_params['detect_token_model_param']['check_error_task'] = True
        pre_m = model_fn(**model_params)
        pre_m = nn.DataParallel(pre_m.cuda(), device_ids=[gpu_index])
        torch_util.load_model(pre_m, path)

        m_state_dict = m.state_dict()
        pre_state_dict = pre_m.state_dict()
        pre_state_dict.pop('module.discriminator.output.weight')
        pre_state_dict.pop('module.discriminator.output.bias')
        pre_state_dict = {
            k: v
            for k, v in pre_state_dict.items() if k in m_state_dict
        }
        m_state_dict.update(pre_state_dict)
        m.load_state_dict(m_state_dict)
        model_params['check_error_task'] = False
        model_params['detect_token_model_param']['check_error_task'] = False

    if load_previous:
        # torch_util.load_model(m, path, map_location={'cuda:1': 'cuda:0'})
        torch_util.load_model(m, path)
        print("load previous model from {}".format(path))
    else:
        print("create new model")
    if gpu_index is None and not parallel:
        m = m.module.cpu()
    return m
Exemplo n.º 3
0
def only_evaluate(data,
                  keyword_num,
                  vocabulary,
                  batch_size,
                  embedding_dim,
                  hidden_state_size,
                  rnn_num_layer,
                  learning_rate,
                  epoches,
                  saved_name,
                  stack_size,
                  load_previous_model=False):
    save_path = os.path.join(config.save_model_root, saved_name)
    # for d in data:
    #     def get_i(i):
    #         return d[i]
    #     show_process_map(get_i, range(len(d)))
    # for d, n in zip(data, ["train", "val", "test"]):
    #     print("There are {} parsed data in the {} dataset".format(len(d), n))
    test_dataset = data

    loss_function = nn.CrossEntropyLoss(size_average=False,
                                        ignore_index=PAD_TOKEN)
    model = ScopeGrammarLanguageModel(
        vocabulary.vocabulary_size,
        embedding_dim,
        hidden_state_size,
        rnn_num_layer,
        keyword_num,
        stack_size,
        batch_size,
    )
    optimizer = optim.SGD(model.parameters(), lr=learning_rate)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min')

    torch_util.load_model(model, save_path)
    test_loss, top_k_accuracy = accuracy_evaluate(model, test_dataset,
                                                  batch_size, loss_function)
    best_test_perplexity = torch.exp(test_loss).item()
    print("load the previous mode, test perplexity is :{}".format(
        best_test_perplexity))
    # print(prof)
    print("The model {} test perplexity is {}".format(saved_name,
                                                      best_test_perplexity))
    print("The top k accuracy:")
    for k, v in top_k_accuracy.items():
        print("{}:{}".format(k, v))
Exemplo n.º 4
0
def get_model(model_fn,
              parameter,
              pre_process_module_fn,
              pre_process_module_parameter,
              path,
              load_previous=False,
              parallel=False,
              gpu_index=None):
    m = model_fn(**parameter)
    # to_cuda(m)
    if parallel:
        m = nn.DataParallel(m.cuda(), device_ids=[0, 1])
    elif gpu_index is not None:
        m = m.cuda(gpu_index)
    m = pre_process_module_fn(m, **pre_process_module_parameter)
    if load_previous:
        torch_util.load_model(m, path)
        print("load previous model")
    else:
        print("create new model")
    return m
Exemplo n.º 5
0
def get_model(model_fn,
              model_params,
              path,
              load_previous=False,
              parallel=False,
              gpu_index=None):
    m = model_fn(**model_params)
    # to_cuda(m)
    if parallel:
        m = nn.DataParallel(m.cuda(), device_ids=[0, 1])
    elif gpu_index is not None:
        m = nn.DataParallel(m.cuda(gpu_index), device_ids=[gpu_index])
    else:
        m = nn.DataParallel(m.cuda(), device_ids=[0])
    if load_previous:
        # torch_util.load_model(m, path, map_location={'cuda:0': 'cuda:1'})
        torch_util.load_model(m, path)
        print("load previous model from {}".format(path))
    else:
        print("create new model")
    if gpu_index is None and not parallel:
        m = m.module.cpu()
    return m
Exemplo n.º 6
0
def train_and_evaluate(data, dataset_type, batch_size, embedding_dim, hidden_state_size, rnn_num_layer, dropout_p, learning_rate, epoches, saved_name, load_name=None, gcc_file_path='test.c', encoder_is_bidirectional=True, decoder_is_bidirectional=False):
    save_path = os.path.join(config.save_model_root, saved_name)
    if load_name is not None:
        load_path = os.path.join(config.save_model_root, load_name)

    for d, n in zip(data, ["train", "val", "test"]):
        print("There are {} raw data in the {} dataset".format(len(d), n))
    if dataset_type == 'random':
        vocabulary = load_vocabulary(get_random_error_c99_code_token_vocabulary, get_random_error_c99_code_token_vocabulary_id_map, [BEGIN], [END], UNK)
    elif dataset_type == 'common':
        vocabulary = load_vocabulary(get_common_error_c99_code_token_vocabulary, get_common_error_c99_code_token_vocabulary_id_map, [BEGIN], [END], UNK)
    else:
        vocabulary = load_vocabulary(get_common_error_c99_code_token_vocabulary, get_common_error_c99_code_token_vocabulary_id_map, [BEGIN], [END], UNK)
    generate_dataset = lambda df: CCodeErrorDataSet(df, vocabulary)
    data = [generate_dataset(d) for d in data]
    for d, n in zip(data, ["train", "val", "test"]):
        print("There are {} parsed data in the {} dataset".format(len(d), n))
    train_dataset, valid_dataset, test_dataset = data

    begin_id = vocabulary.word_to_id(vocabulary.begin_tokens[0])
    end_id = vocabulary.word_to_id(vocabulary.end_tokens[0])
    unk_id = vocabulary.word_to_id(vocabulary.unk)

    # loss_function = nn.CrossEntropyLoss(size_average=False, ignore_index=TARGET_PAD_TOKEN)
    loss_function = nn.CrossEntropyLoss(ignore_index=TARGET_PAD_TOKEN)
    model = Seq2SeqModel(hidden_state_size, embedding_dim, vocabulary.vocabulary_size, rnn_num_layer, batch_size, dropout_p, MAX_LENGTH, begin_id, end_id, encoder_is_bidirectional=encoder_is_bidirectional, decoder_is_bidirectional=decoder_is_bidirectional)
    optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min')

    if load_name is not None:
        torch_util.load_model(model, load_path)
        valid_loss, valid_accuracy, valid_correct, valid_compare_correct, _ = evaluate(model, valid_dataset, loss_function, batch_size, begin_id, end_id, unk_id, vocabulary.id_to_word, file_path=gcc_file_path)
        test_loss, test_accuracy, test_correct, test_compare_correct, _ = evaluate(model, test_dataset, loss_function, batch_size, begin_id, end_id, unk_id, vocabulary.id_to_word, file_path=gcc_file_path)
        _, _, _, _, valid_loss_in_train = evaluate(model, valid_dataset, loss_function, batch_size, begin_id, end_id, unk_id, vocabulary.id_to_word, file_path=gcc_file_path, use_force_train=True)
        _, _, _, _, test_loss_in_train = evaluate(model, test_dataset, loss_function, batch_size, begin_id, end_id, unk_id, vocabulary.id_to_word, file_path=gcc_file_path, use_force_train=True)
        best_valid_accuracy = valid_accuracy
        best_test_accuracy = test_accuracy
        best_valid_loss = valid_loss
        best_test_loss = test_loss
        best_valid_correct = valid_correct
        best_test_correct = test_correct
        best_valid_compare_correct = valid_compare_correct
        best_test_compare_correct = test_compare_correct
        best_valid_loss_in_train = valid_loss_in_train
        best_test_loss_in_train = test_loss_in_train
        # best_valid_accuracy = None
        # best_test_accuracy = None
        # best_valid_loss = None
        # best_test_loss = None
        # best_valid_correct = None
        # best_test_correct = None
        # best_valid_compare_correct = None
        # best_test_compare_correct = None
        # best_valid_loss_in_train = None
        # best_test_loss_in_train = None
        print(
            "load the previous mode, validation perplexity is {}, test perplexity is :{}".format(valid_accuracy, test_accuracy))
        scheduler.step(best_valid_loss)
    else:
        best_valid_accuracy = None
        best_test_accuracy = None
        best_valid_loss = None
        best_test_loss = None
        best_valid_correct = None
        best_test_correct = None
        best_valid_compare_correct = None
        best_test_compare_correct = None
        best_valid_loss_in_train = None
        best_test_loss_in_train = None

    for epoch in range(epoches):
        train_loss = train(model, train_dataset, batch_size, loss_function, optimizer)
        valid_loss, valid_accuracy, valid_correct, valid_compare_correct, _ = evaluate(model, valid_dataset, loss_function, batch_size, vocabulary.begin_tokens[0],
                                  vocabulary.end_tokens[0], vocabulary.unk, vocabulary.id_to_word,
                                  file_path=gcc_file_path)
        test_loss, test_accuracy, test_correct, test_compare_correct, _ = evaluate(model, test_dataset, loss_function, batch_size, vocabulary.begin_tokens[0], vocabulary.end_tokens[0],
                                 vocabulary.unk, vocabulary.id_to_word, file_path=gcc_file_path)
        _, _, _, _, valid_loss_in_train = evaluate(model, valid_dataset, loss_function, batch_size, begin_id, end_id,
                                                   unk_id, vocabulary.id_to_word, file_path=gcc_file_path,
                                                   use_force_train=True)
        _, _, _, _, test_loss_in_train = evaluate(model, test_dataset, loss_function, batch_size, begin_id, end_id,
                                                  unk_id, vocabulary.id_to_word, file_path=gcc_file_path,
                                                  use_force_train=True)

        # train_perplexity = torch.exp(train_loss)[0]

        scheduler.step(valid_loss)

        if best_valid_correct is None or valid_correct < best_valid_correct:
            best_valid_accuracy = valid_accuracy
            best_test_accuracy = test_accuracy
            best_valid_loss = valid_loss
            best_test_loss = test_loss
            best_valid_correct = valid_correct
            best_test_correct = test_correct
            best_valid_compare_correct = valid_compare_correct
            best_test_compare_correct = test_compare_correct
            best_valid_loss_in_train = valid_loss_in_train
            best_test_loss_in_train = test_loss_in_train
            if not is_debug:
                torch_util.save_model(model, save_path)

        print("epoch {}: train loss of {}, valid loss of {}, test loss of {},  "
              "valid accuracy of {}, test accuracy of {}, valid correct of {}, test correct of {}, "
              "valid compare correct of {}, test compare correct of {}, valid loss in train of {}, test loss in train of {}".
              format(epoch, train_loss, valid_loss, test_loss, valid_accuracy, test_accuracy, valid_correct, test_correct, valid_compare_correct, test_compare_correct, valid_loss_in_train, test_loss_in_train))
    print("The model {} best valid accuracy is {} and test accuracy is {} and "
          "best valid loss is {} and test loss is {}, best valid correct is {}, best test correct is {}"
          "best valid compare correct is {}, best test compare correct is {}, "
          "best valid loss in train is {} and best test loss in train is {}".
          format(saved_name, best_valid_accuracy, best_test_accuracy, best_valid_loss, best_test_loss, best_valid_correct, best_test_correct, best_valid_compare_correct, best_test_compare_correct, best_valid_loss_in_train, best_test_loss_in_train))
Exemplo n.º 7
0
def train_and_evaluate(data_type,
                       batch_size,
                       hidden_size,
                       num_heads,
                       encoder_stack_num,
                       decoder_stack_num,
                       structed_num_layers,
                       addition_reward_gamma,
                       baseline_min_len,
                       length_punish_scale,
                       dropout_p,
                       learning_rate,
                       epoches,
                       saved_name,
                       load_name=None,
                       gcc_file_path='test.c',
                       normalize_type='layer',
                       predict_type='start',
                       pretrain_s_model_epoch=0):
    save_path = os.path.join(config.save_model_root, saved_name)
    if load_name is not None:
        load_path = os.path.join(config.save_model_root, load_name)

    begin_tokens = ['<BEGIN>']
    end_tokens = ['<END>']
    unk_token = '<UNK>'
    addition_tokens = ['<GAP>']
    vocabulary = create_common_error_vocabulary(
        begin_tokens=begin_tokens,
        end_tokens=end_tokens,
        unk_token=unk_token,
        addition_tokens=addition_tokens)

    begin_tokens_id = [vocabulary.word_to_id(i) for i in begin_tokens]
    end_tokens_id = [vocabulary.word_to_id(i) for i in end_tokens]
    unk_token_id = vocabulary.word_to_id(unk_token)
    addition_tokens_id = [vocabulary.word_to_id(i) for i in addition_tokens]

    if is_debug:
        data_dict = load_common_error_data_sample_100()
    else:
        data_dict = load_common_error_data()
    datasets = [
        CCodeErrorDataSet(pd.DataFrame(dd), vocabulary, name)
        for dd, name in zip(data_dict, ["train", "all_valid", "all_test"])
    ]
    for d, n in zip(datasets, ["train", "val", "test"]):
        print("There are {} parsed data in the {} dataset".format(len(d), n))
    train_dataset, valid_dataset, test_dataset = datasets

    seq_model = OnlyAttentionFixErrorModelWithoutInputEmbedding(
        vocabulary_size=vocabulary.vocabulary_size,
        hidden_size=2 * hidden_size,
        sequence_max_length=MAX_LENGTH,
        num_heads=num_heads,
        start_label=vocabulary.word_to_id(vocabulary.begin_tokens[0]),
        end_label=vocabulary.word_to_id(vocabulary.end_tokens[0]),
        pad_label=0,
        encoder_stack_num=encoder_stack_num,
        decoder_stack_num=decoder_stack_num,
        dropout_p=dropout_p,
        normalize_type=normalize_type)
    p_model = SelectPolicy(input_size=2 * hidden_size + 2 * hidden_size +
                           2 * hidden_size,
                           action_num=2)
    s_model = StructedRepresentationRNN(
        vocabulary_size=vocabulary.vocabulary_size,
        hidden_size=hidden_size,
        num_layers=structed_num_layers,
        batch_size=batch_size,
        dropout_p=dropout_p)

    seq_loss = nn.CrossEntropyLoss(ignore_index=TARGET_PAD_TOKEN)
    seq_loss_no_reduce = nn.CrossEntropyLoss(ignore_index=TARGET_PAD_TOKEN,
                                             reduce=False)

    p_optimizer = torch.optim.SGD(p_model.parameters(), lr=learning_rate)
    p_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        p_optimizer, 'min')
    s_optimizer = torch.optim.SGD(itertools.chain(s_model.parameters(),
                                                  seq_model.parameters()),
                                  lr=learning_rate)
    s_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        p_optimizer, 'min')
    best_p_valid_loss = None
    best_p_test_loss = None
    best_s_valid_loss = None
    best_s_test_loss = None

    remain_length_punish = create_remain_punish(length_punish_scale)
    # delay_reward_fn = create_delayed_reward_fn(seq_loss_no_reduce, gamma=addition_reward_gamma, length_punish_fn=delete_length_punish)
    delay_reward_fn = create_delayed_reward_fn(
        seq_loss_no_reduce,
        gamma=addition_reward_gamma,
        length_punish_fn=remain_length_punish)
    # baseline_fn = create_baseline_fn(baseline_min_len)
    baseline_fn = None
    delay_loss_fn = create_delay_loss_fn(do_normalize=False)

    if load_name is not None:
        torch_util.load_model(p_model,
                              load_path + '_p',
                              map_location={'cuda:1': 'cuda:0'})
        torch_util.load_model(s_model,
                              load_path + '_s',
                              map_location={'cuda:1': 'cuda:0'})
        torch_util.load_model(seq_model,
                              load_path + '_seq',
                              map_location={'cuda:1': 'cuda:0'})
        best_s_valid_loss, best_p_valid_loss, valid_select_probs, valid_accuracy_topk = combine_train(
            p_model,
            s_model,
            seq_model,
            valid_dataset,
            batch_size,
            loss_fn=seq_loss,
            p_optimizer=p_optimizer,
            s_optimizer=s_optimizer,
            delay_reward_fn=delay_reward_fn,
            baseline_fn=baseline_fn,
            delay_loss_fn=delay_loss_fn,
            vocab=vocabulary,
            train_type='valid',
            predict_type='first',
            include_error_reward=-10000,
            pretrain=False)
        best_s_test_loss, best_p_test_loss, test_select_probs, test_accuracy_topk = combine_train(
            p_model,
            s_model,
            seq_model,
            test_dataset,
            batch_size,
            loss_fn=seq_loss,
            p_optimizer=p_optimizer,
            s_optimizer=s_optimizer,
            delay_reward_fn=delay_reward_fn,
            baseline_fn=baseline_fn,
            delay_loss_fn=delay_loss_fn,
            vocab=vocabulary,
            train_type='test',
            predict_type='first',
            include_error_reward=-10000,
            pretrain=False)

    if pretrain_s_model_epoch > 0:
        for i in range(pretrain_s_model_epoch):
            print('in epoch: {}, train type: {}'.format(i, 'pre-train'))
            pretrain_seq_loss, pretrain_p_loss, pretrain_select_probs, pretrain_accuracy_topk = combine_train(
                p_model,
                s_model,
                seq_model,
                train_dataset,
                batch_size,
                loss_fn=seq_loss,
                p_optimizer=p_optimizer,
                s_optimizer=s_optimizer,
                delay_reward_fn=delay_reward_fn,
                baseline_fn=baseline_fn,
                delay_loss_fn=delay_loss_fn,
                vocab=vocabulary,
                train_type='s_model',
                predict_type='first',
                include_error_reward=-10000,
                pretrain=True,
                random_action=[0.6, 0.4])
            print('pretrain {}: seq_loss: {}, p_loss: {}'.format(
                str(i), pretrain_seq_loss, pretrain_p_loss))

    for epoch in range(epoches):
        if int(epoch / 2) % 2 == 0:
            train_type = 'p_model'
        else:
            train_type = 's_model'
            # train_type = 'p_model'
        print('in epoch: {}, train type: {}'.format(epoch, train_type))
        train_seq_loss, train_p_loss, train_select_probs, train_accuracy_topk = combine_train(
            p_model,
            s_model,
            seq_model,
            train_dataset,
            batch_size,
            loss_fn=seq_loss,
            p_optimizer=p_optimizer,
            s_optimizer=s_optimizer,
            delay_reward_fn=delay_reward_fn,
            baseline_fn=baseline_fn,
            delay_loss_fn=delay_loss_fn,
            vocab=vocabulary,
            train_type=train_type,
            predict_type='first',
            include_error_reward=-10000,
            pretrain=False)
        if not is_debug:
            valid_seq_loss, valid_p_loss, valid_select_probs, valid_accuracy_topk = combine_train(
                p_model,
                s_model,
                seq_model,
                valid_dataset,
                batch_size,
                loss_fn=seq_loss,
                p_optimizer=p_optimizer,
                s_optimizer=s_optimizer,
                delay_reward_fn=delay_reward_fn,
                baseline_fn=baseline_fn,
                delay_loss_fn=delay_loss_fn,
                vocab=vocabulary,
                train_type='valid',
                predict_type='first',
                include_error_reward=-10000,
                pretrain=False)
            test_seq_loss, test_p_loss, test_select_probs, test_accuracy_topk = combine_train(
                p_model,
                s_model,
                seq_model,
                test_dataset,
                batch_size,
                loss_fn=seq_loss,
                p_optimizer=p_optimizer,
                s_optimizer=s_optimizer,
                delay_reward_fn=delay_reward_fn,
                baseline_fn=baseline_fn,
                delay_loss_fn=delay_loss_fn,
                vocab=vocabulary,
                train_type='test',
                predict_type='first',
                include_error_reward=-10000,
                pretrain=False)
        else:
            valid_seq_loss = 0
            valid_p_loss = 0
            valid_select_probs = 0
            valid_accuracy_topk = {}
            test_seq_loss = 0
            test_p_loss = 0
            test_select_probs = 0
            test_accuracy_topk = {}
        # train_seq_loss = 0
        # train_p_loss = 0
        # valid_seq_loss = 0
        # valid_p_loss = 0
        # test_seq_loss = 0
        # test_p_loss = 0

        if train_type == 's_model':
            s_scheduler.step(valid_seq_loss)
        elif train_type == 'p_model':
            p_scheduler.step(valid_p_loss)

        if best_s_valid_loss is None or valid_seq_loss < best_s_valid_loss:
            best_p_valid_loss = valid_p_loss
            best_p_test_loss = test_p_loss
            best_s_valid_loss = valid_seq_loss
            best_s_test_loss = test_seq_loss
        if not is_debug:
            cur_time = time.strftime("%Y%m%d%H%M%S", time.localtime())
            torch_util.save_model(
                p_model, '{}_{}_{}_{}'.format(save_path, 'p', str(epoch),
                                              cur_time))
            torch_util.save_model(
                s_model, '{}_{}_{}_{}'.format(save_path, 's', str(epoch),
                                              cur_time))
            torch_util.save_model(
                seq_model, '{}_{}_{}_{}'.format(save_path, 'seq', str(epoch),
                                                cur_time))
        print(
            'epoch {}: train_seq_loss: {}, train_p_loss: {}, train_select_probs: {}, train_accuracy: {}, '
            'valid_seq_loss: {}, valid_p_loss: {}, valid_select_probs: {}, valid_accuracy: {}, '
            'test_seq_loss: {}, test_p_loss: {}, test_select_probs: {}, test_accuracy: {}'
            .format(epoch, train_seq_loss, train_p_loss, train_select_probs,
                    train_accuracy_topk, valid_seq_loss, valid_p_loss,
                    valid_select_probs, valid_accuracy_topk, test_seq_loss,
                    test_p_loss, test_select_probs, test_accuracy_topk))
    print('the model {} best valid_seq_loss: {}, best valid_p_loss: {}, '
          'best test_seq_loss: {}, best test_p_loss: {}'.format(
              saved_name, best_s_valid_loss, best_p_valid_loss,
              best_s_test_loss, best_p_test_loss))
def train_and_test(data_type,
                   batch_size,
                   hidden_size,
                   num_heads,
                   encoder_stack_num,
                   decoder_stack_num,
                   dropout_p,
                   learning_rate,
                   epoches,
                   saved_name,
                   load_name=None,
                   gcc_file_path='test.c',
                   normalize_type='layer',
                   predict_type='start'):
    save_path = os.path.join(config.save_model_root, saved_name)
    if load_name is not None:
        load_path = os.path.join(config.save_model_root, load_name)

    begin_tokens = ['<BEGIN>']
    end_tokens = ['<END>']
    unk_token = '<UNK>'
    addition_tokens = ['<GAP>']
    vocabulary = create_common_error_vocabulary(
        begin_tokens=begin_tokens,
        end_tokens=end_tokens,
        unk_token=unk_token,
        addition_tokens=addition_tokens)

    begin_tokens_id = [vocabulary.word_to_id(i) for i in begin_tokens]
    end_tokens_id = [vocabulary.word_to_id(i) for i in end_tokens]
    unk_token_id = vocabulary.word_to_id(unk_token)
    addition_tokens_id = [vocabulary.word_to_id(i) for i in addition_tokens]

    if is_debug:
        data_dict = load_common_error_data_sample_100()
    else:
        data_dict = load_common_error_data()
    datasets = [
        CCodeErrorDataSet(pd.DataFrame(dd), vocabulary, name)
        for dd, name in zip(data_dict, ["train", "all_valid", "all_test"])
    ]
    for d, n in zip(datasets, ["train", "val", "test"]):
        print("There are {} parsed data in the {} dataset".format(len(d), n))
    train_dataset, valid_dataset, test_dataset = datasets
    print(train_dataset)
    model = OnlyAttentionFixErrorModel(
        vocabulary_size=vocabulary.vocabulary_size,
        hidden_size=hidden_size,
        sequence_max_length=MAX_LENGTH,
        num_heads=num_heads,
        start_label=vocabulary.word_to_id(vocabulary.begin_tokens[0]),
        end_label=vocabulary.word_to_id(vocabulary.end_tokens[0]),
        pad_label=0,
        encoder_stack_num=encoder_stack_num,
        decoder_stack_num=decoder_stack_num,
        dropout_p=dropout_p,
        normalize_type=normalize_type)

    loss_function = nn.CrossEntropyLoss(ignore_index=TARGET_PAD_TOKEN)
    optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min')

    best_valid_loss = None
    best_test_loss = None
    best_valid_compile_result = None
    best_test_compile_result = None
    if load_name is not None:
        torch_util.load_model(model, load_path)
        best_valid_loss = evaluate(model,
                                   valid_dataset,
                                   batch_size,
                                   loss_function,
                                   vocabulary.id_to_word,
                                   file_path=gcc_file_path,
                                   gap_token=addition_tokens_id[0],
                                   begin_tokens=begin_tokens_id,
                                   end_tokens=end_tokens_id,
                                   predict_type=predict_type,
                                   use_force_train=True)
        best_test_loss = evaluate(model,
                                  test_dataset,
                                  batch_size,
                                  loss_function,
                                  vocabulary.id_to_word,
                                  file_path=gcc_file_path,
                                  gap_token=addition_tokens_id[0],
                                  begin_tokens=begin_tokens_id,
                                  end_tokens=end_tokens_id,
                                  predict_type=predict_type,
                                  use_force_train=True)

    for epoch in range(epoches):
        train_loss = train(model,
                           train_dataset,
                           batch_size,
                           loss_function,
                           optimizer,
                           gap_token=addition_tokens_id[0],
                           begin_tokens=begin_tokens_id,
                           end_tokens=end_tokens_id,
                           predict_type=predict_type)
        valid_loss = evaluate(model,
                              valid_dataset,
                              batch_size,
                              loss_function,
                              vocabulary.id_to_word,
                              file_path=gcc_file_path,
                              gap_token=addition_tokens_id[0],
                              begin_tokens=begin_tokens_id,
                              end_tokens=end_tokens_id,
                              predict_type=predict_type,
                              use_force_train=True)
        # valid_compile_result = evaluate(model, valid_dataset, batch_size, loss_function, vocabulary.id_to_word, file_path=gcc_file_path, gap_token=addition_tokens_id[0], begin_tokens=begin_tokens_id, end_tokens=end_tokens_id, predict_type=predict_type)
        test_loss = evaluate(model,
                             test_dataset,
                             batch_size,
                             loss_function,
                             vocabulary.id_to_word,
                             file_path=gcc_file_path,
                             gap_token=addition_tokens_id[0],
                             begin_tokens=begin_tokens_id,
                             end_tokens=end_tokens_id,
                             predict_type=predict_type,
                             use_force_train=True)
        # test_compile_result = evaluate(model, test_dataset, batch_size, loss_function, vocabulary.id_to_word, file_path=gcc_file_path, gap_token=addition_tokens_id[0], begin_tokens=begin_tokens_id, end_tokens=end_tokens_id, predict_type=predict_type)

        valid_compile_result = 0
        test_compile_result = 0

        scheduler.step(valid_loss)

        if best_valid_loss is None or valid_loss < best_valid_loss:
            best_valid_loss = valid_loss
            best_valid_compile_result = valid_compile_result
            best_test_loss = test_loss
            best_test_compile_result = test_compile_result
            if not is_debug:
                torch_util.save_model(model, save_path)
        print('epoch {}: train loss of {}, valid loss of {}, test loss of {}, '
              'valid compile result of {}, test compile result of {}'.format(
                  epoch, train_loss, valid_loss, test_loss,
                  valid_compile_result, test_compile_result))
    print(
        'The model {} best valid loss of {}, best test loss of {}, best valid compile result of {}, '
        'best test compile result of {}'.format(saved_name, best_valid_loss,
                                                best_test_loss,
                                                best_valid_compile_result,
                                                best_test_compile_result))
Exemplo n.º 9
0
def train_and_evaluate(data,
                       batch_size,
                       embedding_dim,
                       hidden_state_size,
                       rnn_num_layer,
                       learning_rate,
                       epoches,
                       saved_name,
                       load_previous_model=False):
    save_path = os.path.join(config.save_model_root, saved_name)
    for d, n in zip(data, ["train", "val", "test"]):
        print("There are {} raw data in the {} dataset".format(len(d), n))
    vocabulary = load_vocabulary(get_token_vocabulary, get_vocabulary_id_map_with_keyword, [BEGIN], [END], UNK)
    print("vocab_size:{}".format(vocabulary.vocabulary_size))
    print("The max token id:{}".format(max(vocabulary.word_to_id_dict.values())))

    slk_constants = C99SLKConstants()
    terminal_token_index = set(range(slk_constants.START_SYMBOL-2)) - {63, 64}
    label_vocabulary = C99LabelVocabulary(slk_constants)
    production_vocabulary = SLKProductionVocabulary(slk_constants)
    transforms_fn = transforms.Compose([
        IsNone("original"),
        key_transform(GrammarLanguageModelTypeInputMap(production_vocabulary), "tree"),
        IsNone("after type input"),
        FlatMap(),
        IsNone("Flat Map"),
        PadMap(production_vocabulary.token_num()),
        IsNone("Pad Map"),
    ])
    generate_dataset = lambda df: CCodeDataSet(df, vocabulary, transforms_fn)
    data = [generate_dataset(d) for d in data]
    for d, n in zip(data, ["train", "val", "test"]):
        print("There are {} parsed data in the {} dataset".format(len(d), n))
    train_dataset, valid_dataset, test_dataset = data
    keyword_index = [vocabulary.word_to_id(t) for t in pre_defined_c_tokens | {"CONSTANT", "STRING_LITERAL"}]
    identifier_index = label_vocabulary.get_label_id("ID") - 1 # zero

    loss_function = nn.CrossEntropyLoss(size_average=False, ignore_index=PAD_TOKEN)
    model = GrammarLanguageModel(
        vocabulary.vocabulary_size,
        production_vocabulary.token_num(),
        embedding_dim,
        hidden_state_size,
        rnn_num_layer,
        identifier_index,
        keyword_index,
        terminal_token_index,
        batch_size
    )
    optimizer = optim.SGD(model.parameters(), lr=learning_rate)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min')
    if load_previous_model:
        torch_util.load_model(model, save_path)
        valid_loss = evaluate(model, valid_dataset, batch_size, loss_function)
        test_loss = evaluate(model, test_dataset, batch_size, loss_function)
        best_valid_perplexity = torch.exp(valid_loss)[0]
        best_test_perplexity = torch.exp(test_loss)[0]
        print(
            "load the previous mode, validation perplexity is {}, test perplexity is :{}".format(best_valid_perplexity,
                                                                                                 best_test_perplexity))
        scheduler.step(best_valid_perplexity)
    else:
        best_valid_perplexity = None
        best_test_perplexity = None
    for epoch in range(epoches):
        train_loss = train(model, train_dataset, batch_size, loss_function, optimizer)
        valid_loss = evaluate(model, valid_dataset, batch_size, loss_function)
        test_loss = evaluate(model, test_dataset, batch_size, loss_function)

        train_perplexity = torch.exp(train_loss)[0]
        valid_perplexity = torch.exp(valid_loss)[0]
        test_perplexity = torch.exp(test_loss)[0]

        scheduler.step(valid_perplexity)

        if best_valid_perplexity is None or valid_perplexity < best_valid_perplexity:
            best_valid_perplexity = valid_perplexity
            best_test_perplexity = test_perplexity
            torch_util.save_model(model, save_path)

        print("epoch {}: train perplexity of {},  valid perplexity of {}, test perplexity of {}".
              format(epoch, train_perplexity, valid_perplexity, test_perplexity))
    print("The model {} best valid perplexity is {} and test perplexity is {}".
          format(saved_name, best_valid_perplexity, best_test_perplexity))
Exemplo n.º 10
0
def train_and_evaluate(data,
                       batch_size,
                       embedding_dim,
                       hidden_state_size,
                       rnn_num_layer,
                       learning_rate,
                       epoches,
                       saved_name,
                       load_previous_model=False):
    save_path = os.path.join(config.save_model_root, saved_name)
    for d, n in zip(data, ["train", "val", "test"]):
        print("There are {} raw data in the {} dataset".format(len(d), n))
    vocabulary = load_vocabulary(get_token_vocabulary, get_vocabulary_id_map,
                                 [BEGIN], [END], UNK)
    production_vocabulary = get_all_c99_production_vocabulary()
    print("terminal num:{}".format(len(
        production_vocabulary._terminal_id_set)))
    transforms_fn = transforms.Compose([
        key_transform(GrammarLanguageModelTypeInputMap(production_vocabulary),
                      "tree"),
        FlatMap(),
        PadMap(production_vocabulary.token_num()),
    ])
    generate_dataset = lambda df: CCodeDataSet(df, vocabulary, transforms_fn)
    data = [generate_dataset(d) for d in data]
    for d, n in zip(data, ["train", "val", "test"]):
        print("There are {} parsed data in the {} dataset".format(len(d), n))
    train_dataset, valid_dataset, test_dataset = data

    loss_function = nn.CrossEntropyLoss(size_average=False,
                                        ignore_index=PAD_TOKEN)
    model = GrammarLanguageModel(vocabulary.vocabulary_size,
                                 production_vocabulary.token_num(),
                                 embedding_dim, hidden_state_size,
                                 rnn_num_layer, batch_size)
    optimizer = optim.SGD(model.parameters(), lr=learning_rate)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min')
    if load_previous_model:
        torch_util.load_model(model, save_path)
        valid_loss = evaluate(model, valid_dataset, batch_size, loss_function)
        test_loss = evaluate(model, test_dataset, batch_size, loss_function)
        best_valid_perplexity = torch.exp(valid_loss)[0]
        best_test_perplexity = torch.exp(test_loss)[0]
        print(
            "load the previous mode, validation perplexity is {}, test perplexity is :{}"
            .format(best_valid_perplexity, best_test_perplexity))
        scheduler.step(best_valid_perplexity)
    else:
        best_valid_perplexity = None
        best_test_perplexity = None
    for epoch in range(epoches):
        train_loss = train(model, train_dataset, batch_size, loss_function,
                           optimizer)
        valid_loss = evaluate(model, valid_dataset, batch_size, loss_function)
        test_loss = evaluate(model, test_dataset, batch_size, loss_function)

        train_perplexity = torch.exp(train_loss)[0]
        valid_perplexity = torch.exp(valid_loss)[0]
        test_perplexity = torch.exp(test_loss)[0]

        scheduler.step(valid_perplexity)

        if best_valid_perplexity is None or valid_perplexity < best_valid_perplexity:
            best_valid_perplexity = valid_perplexity
            best_test_perplexity = test_perplexity
            torch_util.save_model(model, save_path)

        print(
            "epoch {}: train perplexity of {},  valid perplexity of {}, test perplexity of {}"
            .format(epoch, train_perplexity, valid_perplexity,
                    test_perplexity))
    print("The model {} best valid perplexity is {} and test perplexity is {}".
          format(saved_name, best_valid_perplexity, best_test_perplexity))
Exemplo n.º 11
0
def train_and_evaluate_lstm_model(embedding_dim,
                                  hidden_size,
                                  num_layers,
                                  bidirectional,
                                  dropout,
                                  learning_rate,
                                  batch_size,
                                  epoches,
                                  saved_name,
                                  load_path=None,
                                  is_accuracy=False,
                                  is_example=False):
    print(
        '------------------------------------- start train and evaluate ----------------------------------------'
    )
    print(
        'embedding_dim: {}, hidden_size: {}, num_layers: {}, bidirectional: {}, dropout: {}, '
        'learning_rate: {}, batch_size: {}, epoches: {}, saved_name: {}'.
        format(embedding_dim, hidden_size, num_layers, bidirectional, dropout,
               learning_rate, batch_size, epoches, saved_name))
    debug = False
    save_path = os.path.join(config.save_model_root, saved_name)

    vocabulary = load_vocabulary(get_token_vocabulary, get_vocabulary_id_map,
                                 [BEGIN], [END], UNK)
    vocabulary_size = vocabulary.vocabulary_size

    print('before read data')
    if is_example:
        example_data = read_example_code_tokens()
    elif debug:
        train_data, valid_data, test_data = [
            d[:100] for d in read_filtered_without_include_code_tokens()
        ]
        print("train data size:{}".format(len(train_data)))
    else:
        train_data, valid_data, test_data = read_filtered_without_include_code_tokens(
        )
        print("train data size:{}".format(len(train_data)))

    print('before create loss function')
    loss_function = nn.CrossEntropyLoss(ignore_index=0)
    print('before create model')
    model = LSTMModel(vocabulary_size + 1, embedding_dim, hidden_size,
                      num_layers, batch_size, bidirectional, dropout)
    if load_path is not None:
        load_path = os.path.join(config.save_model_root, load_path)
        print('load model from {}'.format(load_path))
        torch_util.load_model(model,
                              load_path,
                              map_location={'cuda:1': 'cuda:0'})
    print('after create model')
    optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
    print('after create optimizer')
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min')
    # scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

    print('before parse xy')
    if is_example:
        example_X, example_y = parse_xy(example_data, vocabulary.word_to_id)
        evaluate(model, example_X, example_y, loss_function, batch_size,
                 is_example, vocabulary.id_to_word)
        return
    else:
        train_X, train_y = parse_xy(train_data, vocabulary.word_to_id)
        valid_X, valid_y = parse_xy(valid_data, vocabulary.word_to_id)
        test_X, test_y = parse_xy(test_data, vocabulary.word_to_id)
        print('after parse xy train data: {}, valid data: {}, test data: {}'.
              format(len(train_X), len(valid_X), len(test_X)))

    best_valid_perplexity = None
    best_test_perplexity = None

    sys.stdout.flush()
    sys.stderr.flush()

    if is_accuracy:
        test_accuracy = model_test(model, test_X, test_y, loss_function,
                                   batch_size)
        print("The model {} accuracy is {}".format(load_path, test_accuracy))
        return test_accuracy

    for i in range(epoches):
        print('in epoch {}'.format(i))
        train_loss = train(model, train_X, train_y, optimizer, loss_function,
                           batch_size)
        print('after train: {}'.format(train_loss))
        valid_loss = evaluate(model, valid_X, valid_y, loss_function,
                              batch_size)
        print('after valid: {}'.format(valid_loss))
        test_loss = evaluate(model, test_X, test_y, loss_function, batch_size)
        print('after test: {}'.format(test_loss))
        print("epoch {}: train loss of {},  valid loss of {}, test loss of {}".
              format(i, train_loss, valid_loss, test_loss))

        train_perplexity = torch.exp(train_loss)[0]
        valid_perplexity = torch.exp(valid_loss)[0]
        test_perplexity = torch.exp(test_loss)[0]

        scheduler.step(valid_perplexity)

        if (best_valid_perplexity is None or
                valid_perplexity < best_valid_perplexity) and not is_accuracy:
            best_valid_perplexity = valid_perplexity
            best_test_perplexity = test_perplexity
            torch_util.save_model(model, save_path)

        print(
            "epoch {}: train perplexity of {},  valid perplexity of {}, test perplexity of {}"
            .format(i, train_perplexity, valid_perplexity, test_perplexity))
        sys.stdout.flush()
        sys.stderr.flush()
    print("The model {} best valid perplexity is {} and test perplexity is {}".
          format(saved_name, best_valid_perplexity, best_test_perplexity))
def train_and_evaluate(data,
                       keyword_num,
                       vocabulary,
                       batch_size,
                       embedding_dim,
                       hidden_state_size,
                       rnn_num_layer,
                       learning_rate,
                       epoches,
                       saved_name,
                       stack_size,
                       load_previous_model=False,
                       random_init_stack_vector=False):
    save_path = os.path.join(config.save_model_root, saved_name)
    # for d in data:
    #     def get_i(i):
    #         return d[i]
    #     show_process_map(get_i, range(len(d)))
    for d, n in zip(data, ["train", "val", "test"]):
        print("There are {} parsed data in the {} dataset".format(len(d), n))
    train_dataset, valid_dataset, test_dataset = data

    loss_function = nn.CrossEntropyLoss(size_average=False,
                                        ignore_index=PAD_TOKEN)
    model = ScopeGrammarLanguageModel(
        vocabulary.vocabulary_size,
        embedding_dim,
        hidden_state_size,
        rnn_num_layer,
        keyword_num,
        stack_size,
        batch_size,
        random_init_stack_vector=random_init_stack_vector)
    optimizer = optim.SGD(model.parameters(), lr=learning_rate)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min')
    if load_previous_model:
        torch_util.load_model(model, save_path)
        valid_loss = evaluate(model, valid_dataset, batch_size, loss_function)
        test_loss = evaluate(model, test_dataset, batch_size, loss_function)
        best_valid_perplexity = torch.exp(valid_loss).item()
        best_test_perplexity = torch.exp(test_loss).item()
        print(
            "load the previous mode, validation perplexity is {}, test perplexity is :{}"
            .format(best_valid_perplexity, best_test_perplexity))
        scheduler.step(best_valid_perplexity)
    else:
        best_valid_perplexity = None
        best_test_perplexity = None
    begin_time = time.time()
    # with torch.autograd.profiler.profile() as prof:
    for epoch in range(epoches):
        train_loss = train(model, train_dataset, batch_size, loss_function,
                           optimizer)
        valid_loss = evaluate(model, valid_dataset, batch_size, loss_function)
        test_loss = evaluate(model, test_dataset, batch_size, loss_function)

        train_perplexity = torch.exp(train_loss).item()
        valid_perplexity = torch.exp(valid_loss).item()
        test_perplexity = torch.exp(test_loss).item()

        scheduler.step(valid_perplexity)

        if best_valid_perplexity is None or valid_perplexity < best_valid_perplexity:
            best_valid_perplexity = valid_perplexity
            best_test_perplexity = test_perplexity
            torch_util.save_model(model, save_path)

        print(
            "epoch {}: train perplexity of {},  valid perplexity of {}, test perplexity of {}"
            .format(epoch, train_perplexity, valid_perplexity,
                    test_perplexity))
    # print(prof)
    print("The model {} best valid perplexity is {} and test perplexity is {}".
          format(saved_name, best_valid_perplexity, best_test_perplexity))
    print("use time {} seconds".format(time.time() - begin_time))
Exemplo n.º 13
0
def train_and_evaluate_n_gram_language_model(embedding_dim,
                                             context_size,
                                             layer_num,
                                             hidden_size,
                                             learning_rate,
                                             batch_size,
                                             epoches,
                                             saved_name,
                                             is_load=False,
                                             is_accuracy=False,
                                             is_example=False):
    debug = False
    save_path = os.path.join(config.save_model_root, saved_name)

    begin_tokens = [BEGIN + str(i) for i in range(context_size)]
    vocabulary = load_vocabulary(get_token_vocabulary, get_vocabulary_id_map,
                                 begin_tokens, [END], UNK)
    vocabulary_size = vocabulary.vocabulary_size
    print("The vocabulary_size:{}".format(vocabulary_size))

    if is_example:
        example_data = read_example_code_tokens()
    elif debug:
        train_data, valid_data, test_data = [
            d[:100] for d in read_filtered_without_include_code_tokens()
        ]
        print("train data size:{}".format(len(train_data)))
    else:
        train_data, valid_data, test_data = read_filtered_without_include_code_tokens(
        )
        print("train data size:{}".format(len(train_data)))
    sys.stdout.flush()
    sys.stderr.flush()

    loss_function = nn.CrossEntropyLoss()
    model = NGramLanguageModeler(vocabulary_size, embedding_dim, context_size,
                                 batch_size, layer_num, hidden_size)
    optimizer = optim.SGD(model.parameters(), lr=learning_rate)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min')

    if is_example:
        example_X, example_y = parse_xy(example_data, context_size,
                                        vocabulary.word_to_id)
    elif not is_accuracy:
        train_X, train_y = parse_xy(train_data, context_size,
                                    vocabulary.word_to_id)
    if not is_example:
        valid_X, valid_y = parse_xy(valid_data, context_size,
                                    vocabulary.word_to_id)
        test_X, test_y = parse_xy(test_data, context_size,
                                  vocabulary.word_to_id)

    if is_load:
        print('load model from {}'.format(save_path))
        torch_util.load_model(model, save_path)
        if not is_example:
            valid_loss = evaluate(model, valid_X, valid_y, batch_size,
                                  loss_function)
            test_loss = evaluate(model, test_X, test_y, batch_size,
                                 loss_function)
            best_valid_perplexity = torch.exp(valid_loss)[0]
            best_test_perplexity = torch.exp(test_loss)[0]
            print(
                'best_valid_perplexity: {}, best_valid_perplexity: {}'.format(
                    best_valid_perplexity, best_test_perplexity))
    else:
        best_valid_perplexity = None
        best_test_perplexity = None

    if is_example:
        evaluate(model,
                 example_X,
                 example_y,
                 batch_size,
                 loss_function,
                 is_accuracy=is_accuracy,
                 is_example=is_example,
                 id_to_word_fn=vocabulary.id_to_word)
        print("end model {} example")
        return

    if is_accuracy:
        test_accuracy = evaluate(model,
                                 test_X,
                                 test_y,
                                 batch_size,
                                 loss_function,
                                 is_accuracy=is_accuracy)
        print("The model {} accuracy is {}".format(saved_name, test_accuracy))
        return test_accuracy

    for epoch in range(epoches):
        train_loss = trian(train_X, batch_size, loss_function, model,
                           optimizer, train_y)
        valid_loss = evaluate(model, valid_X, valid_y, batch_size,
                              loss_function)
        test_loss = evaluate(model, test_X, test_y, batch_size, loss_function)

        train_perplexity = torch.exp(train_loss)[0]
        # train_perplexity = 0
        valid_perplexity = torch.exp(valid_loss)[0]
        test_perplexity = torch.exp(test_loss)[0]

        # print("valid_perplexity:{}".format(valid_perplexity))
        scheduler.step(valid_perplexity)

        if best_valid_perplexity is None or valid_perplexity < best_valid_perplexity:
            best_valid_perplexity = valid_perplexity
            best_test_perplexity = test_perplexity
            print('save model in epoch {}'.format(epoch))
            torch_util.save_model(model, save_path)

        print(
            "epoch {}: train perplexity of {},  valid perplexity of {}, test perplexity of {}"
            .format(epoch, train_perplexity, valid_perplexity,
                    test_perplexity))
    print("The model {} best valid perplexity is {} and test perplexity is {}".
          format(saved_name, best_valid_perplexity, best_test_perplexity))