Пример #1
0
                    log('\n')
                    log('*' * 80)
                    eval_train_batch(epoch, batch_i, loss.data[0],
                                     flat_argument, pred, argument2idx)

                    log('dev:')
                    srl_model.eval()
                    score, dev_output = eval_data(srl_model,
                                                  elmo,
                                                  dev_dataset,
                                                  batch_size,
                                                  word2idx,
                                                  fr_word2idx,
                                                  lemma2idx,
                                                  pos2idx,
                                                  pretrain2idx,
                                                  fr_pretrain2idx,
                                                  deprel2idx,
                                                  argument2idx,
                                                  idx2argument,
                                                  idx2word,
                                                  False,
                                                  dev_predicate_correct,
                                                  dev_predicate_sum,
                                                  isPretrain=True)
                    if dev_best_score is None or score[5] > dev_best_score[5]:
                        dev_best_score = score
                        output_predict(
                            os.path.join(
                                result_path, 'dev_argument_{:.2f}.pred'.format(
                                    dev_best_score[2] * 100)), dev_output)
                        torch.save(srl_model.EN_Labeler.state_dict(),
Пример #2
0
def parallel_train_1_epoc(srl_model,
                          criterion,
                          optimizer,
                          train_dataset,
                          labeled_dataset_fr,
                          batch_size,
                          word2idx,
                          fr_word2idx,
                          lemma2idx,
                          pos2idx,
                          pretrain2idx,
                          fr_pretrain2idx,
                          deprel2idx,
                          argument2idx,
                          idx2word,
                          shuffle=False,
                          lang='En',
                          dev_best_score=None,
                          test_best_score=None,
                          test_ood_best_score=None):
    unlabeled_dataset_en, unlabeled_dataset_fr = train_dataset
    unlabeled_Generator_En = inter_utils.get_batch(unlabeled_dataset_en,
                                                   batch_size,
                                                   word2idx,
                                                   fr_word2idx,
                                                   lemma2idx,
                                                   pos2idx,
                                                   pretrain2idx,
                                                   fr_pretrain2idx,
                                                   deprel2idx,
                                                   argument2idx,
                                                   idx2word,
                                                   shuffle=False,
                                                   lang="En")

    for batch_i, unlabeled_data_fr in enumerate(
            inter_utils.get_batch(unlabeled_dataset_fr,
                                  batch_size,
                                  word2idx,
                                  fr_word2idx,
                                  lemma2idx,
                                  pos2idx,
                                  pretrain2idx,
                                  fr_pretrain2idx,
                                  deprel2idx,
                                  argument2idx,
                                  idx2word,
                                  shuffle=False,
                                  lang='Fr')):
        srl_model.train()
        unlabeled_data_en = unlabeled_Generator_En.next()

        predicates_1D = unlabeled_data_en['predicates_idx']
        predicates_1D_fr = unlabeled_data_fr['predicates_idx']
        #log(predicates_1D, predicates_1D_fr)
        u_loss = srl_model((unlabeled_data_en, unlabeled_data_fr),
                           lang='En',
                           unlabeled='True')
        optimizer.zero_grad()
        u_loss.backward()
        optimizer.step()

        if batch_i % 50 == 0:
            log(batch_i, u_loss)

        if batch_i % 500 == 0:
            log('\n')
            log('*' * 80)
            srl_model.eval()
            # eval_train_batch(epoch, batch_i, loss.data[0], flat_argument, pred, argument2idx)

            log('FR test:')
            score, dev_output = eval_data(srl_model,
                                          elmo,
                                          labeled_dataset_fr,
                                          30,
                                          word2idx,
                                          fr_word2idx,
                                          lemma2idx,
                                          pos2idx,
                                          pretrain2idx,
                                          fr_pretrain2idx,
                                          deprel2idx,
                                          argument2idx,
                                          idx2argument,
                                          idx2word,
                                          False,
                                          dev_predicate_correct,
                                          dev_predicate_sum,
                                          lang='Fr')

            if dev_best_score is None or score[5] > dev_best_score[5]:
                dev_best_score = score
                output_predict(
                    os.path.join(
                        result_path, 'dev_argument_{:.2f}.pred'.format(
                            dev_best_score[2] * 100)), dev_output)
                # torch.save(srl_model, os.path.join(os.path.dirname(__file__),'model/best_{:.2f}.pkl'.format(dev_best_score[2]*100)))
            log('\tdev best P:{:.2f} R:{:.2f} F1:{:.2f} NP:{:.2f} NR:{:.2f} NF1:{:.2f}'
                .format(dev_best_score[0] * 100, dev_best_score[1] * 100,
                        dev_best_score[2] * 100, dev_best_score[3] * 100,
                        dev_best_score[4] * 100, dev_best_score[5] * 100))
    return dev_best_score
Пример #3
0
def train_1_epoc(srl_model,
                 criterion,
                 optimizer,
                 train_dataset,
                 labeled_dataset_fr,
                 batch_size,
                 word2idx,
                 fr_word2idx,
                 lemma2idx,
                 pos2idx,
                 pretrain2idx,
                 fr_pretrain2idx,
                 deprel2idx,
                 argument2idx,
                 idx2word,
                 shuffle=False,
                 lang='En',
                 dev_best_score=None,
                 test_best_score=None,
                 test_ood_best_score=None):
    for batch_i, train_input_data in enumerate(
            inter_utils.get_batch(train_dataset,
                                  batch_size,
                                  word2idx,
                                  fr_word2idx,
                                  lemma2idx,
                                  pos2idx,
                                  pretrain2idx,
                                  fr_pretrain2idx,
                                  deprel2idx,
                                  argument2idx,
                                  idx2word,
                                  shuffle=shuffle,
                                  lang=lang)):

        flat_argument = train_input_data['flat_argument']
        target_batch_variable = get_torch_variable_from_np(flat_argument)

        out, out_word = srl_model(train_input_data, lang='En')
        loss = criterion(out, target_batch_variable)
        loss_word = criterion(out_word, target_batch_variable)
        if batch_i % 50 == 0:
            log(batch_i, loss, loss_word)

        optimizer.zero_grad()
        (loss + loss_word).backward()
        optimizer.step()

        if batch_i > 0 and batch_i % show_steps == 0:

            _, pred = torch.max(out, 1)

            pred = get_data(pred)

            # pred = pred.reshape([bs, sl])

            log('\n')
            log('*' * 80)

            eval_train_batch(epoch, batch_i, loss.data[0], flat_argument, pred,
                             argument2idx)

            log('FR test:')
            score, dev_output = eval_data(srl_model,
                                          elmo,
                                          labeled_dataset_fr,
                                          batch_size,
                                          word2idx,
                                          fr_word2idx,
                                          lemma2idx,
                                          pos2idx,
                                          pretrain2idx,
                                          fr_pretrain2idx,
                                          deprel2idx,
                                          argument2idx,
                                          idx2argument,
                                          idx2word,
                                          False,
                                          dev_predicate_correct,
                                          dev_predicate_sum,
                                          lang='Fr')

            if dev_best_score is None or score[5] > dev_best_score[5]:
                dev_best_score = score
                output_predict(
                    os.path.join(
                        result_path, 'dev_argument_{:.2f}.pred'.format(
                            dev_best_score[2] * 100)), dev_output)
                # torch.save(srl_model, os.path.join(os.path.dirname(__file__),'model/best_{:.2f}.pkl'.format(dev_best_score[2]*100)))
            log('\tdev best P:{:.2f} R:{:.2f} F1:{:.2f} NP:{:.2f} NR:{:.2f} NF1:{:.2f}'
                .format(dev_best_score[0] * 100, dev_best_score[1] * 100,
                        dev_best_score[2] * 100, dev_best_score[3] * 100,
                        dev_best_score[4] * 100, dev_best_score[5] * 100))
    return dev_best_score
Пример #4
0
                    log('\n')
                    log('*' * 80)

                    eval_train_batch(epoch, batch_i, loss.data[0],
                                     flat_argument, pred, argument2idx)

                    log('FR test:')
                    score, dev_output = eval_data(srl_model,
                                                  elmo,
                                                  labeled_dataset_fr,
                                                  batch_size,
                                                  word2idx,
                                                  fr_word2idx,
                                                  lemma2idx,
                                                  pos2idx,
                                                  pretrain2idx,
                                                  fr_pretrain2idx,
                                                  deprel2idx,
                                                  argument2idx,
                                                  idx2argument,
                                                  idx2word,
                                                  False,
                                                  dev_predicate_correct,
                                                  dev_predicate_sum,
                                                  lang='Fr')
                    log('En test:')
                    eval_data(srl_model,
                              elmo,
                              dev_dataset,
                              batch_size,
                              word2idx,
                              fr_word2idx,
Пример #5
0
                    _, pred = torch.max(out, 1)

                    pred = get_data(pred)

                    # pred = pred.reshape([bs, sl])

                    print('\n')
                    print('*' * 80)

                    eval_train_batch(epoch, batch_i, loss.data[0],
                                     flat_argument, pred, argument2idx)

                    print('dev:')
                    score, dev_output = eval_data(
                        srl_model, elmo, dev_dataset, batch_size, word2idx,
                        lemma2idx, pos2idx, pretrain2idx, deprel2idx,
                        argument2idx, idx2argument, False,
                        dev_predicate_correct, dev_predicate_sum)
                    if dev_best_score is None or score[2] > dev_best_score[2]:
                        dev_best_score = score
                        output_predict(
                            os.path.join(
                                result_path, 'dev_argument_{:.2f}.pred'.format(
                                    dev_best_score[2] * 100)), dev_output)
                        # torch.save(srl_model, os.path.join(os.path.dirname(__file__),'model/best_{:.2f}.pkl'.format(dev_best_score[2]*100)))
                    print(
                        '\tdev best P:{:.2f} R:{:.2f} F1:{:.2f} NP:{:.2f} NR:{:.2f} NF1:{:.2f}'
                        .format(dev_best_score[0] * 100,
                                dev_best_score[1] * 100,
                                dev_best_score[2] * 100,
                                dev_best_score[3] * 100,
Пример #6
0
                    srl_model.eval()
                    _, pred = torch.max(out, 1)
                    pred = get_data(pred)

                    print('\n')
                    print('*' * 80)

                    #eval_train_batch(epoch, batch_i, loss.item(), flat_argument, pred, argument2idx)

                    print('Zh test:')
                    score, dev_output = eval_data(srl_model,
                                                  elmo,
                                                  dev_dataset_fr,
                                                  batch_size,
                                                  argument2idx,
                                                  idx2argument,
                                                  False,
                                                  dev_predicate_correct,
                                                  dev_predicate_sum,
                                                  lang='Fr',
                                                  use_bert=use_bert)
                    print('En test:')
                    eval_data(srl_model,
                              elmo,
                              dev_dataset_en,
                              batch_size,
                              argument2idx,
                              idx2argument,
                              False,
                              dev_predicate_correct,
                              dev_predicate_sum,