示例#1
0
                   tag_to_ix=tag_to_id,
                   embedding_dim=parameters['word_dim'],
                   hidden_dim=parameters['word_lstm_dim'],
                   use_gpu=use_gpu,
                   char_to_ix=char_to_id,
                   pre_word_embeds=word_embeds,
                   use_crf=parameters['crf'],
                   char_mode=parameters['char_mode'],
                   char_embedding_dim=parameters['char_dim'],
                   char_lstm_dim=parameters['char_lstm_dim'],
                   alpha=parameters['alpha'])
# n_cap=4,
# cap_embedding_dim=10)

if use_gpu:
    model.cuda()

learning_rate = 0.015
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9)
losses = []
best_dev_F = -1.0
best_test_F = -1.0
best_train_F = -1.0
all_F = [[0, 0, 0]]
plot_every = 10
eval_every = 20
sample_count = 0

best_idx = 0

if parameters['reload']:
示例#2
0
def main():
    parser = argparse.ArgumentParser()
    # parameters
    parser.add_argument("--epoch",
                        default=100,
                        type=int,
                        help="the number of epoches needed to train")
    parser.add_argument("--lr",
                        default=1e-3,
                        type=float,
                        help="the learning rate")
    parser.add_argument("--train_data_path",
                        default='data/train.tsv',
                        type=str,
                        help="train dataset path")
    parser.add_argument("--dev_data_path",
                        default=None,
                        type=str,
                        help="dev dataset path")
    parser.add_argument("--test_data_path",
                        default='data/test.tsv',
                        type=str,
                        help="test dataset path")
    parser.add_argument("--train_batch_size",
                        default=128,
                        type=int,
                        help="the batch size")
    parser.add_argument("--dev_batch_size",
                        default=64,
                        type=int,
                        help="the batch size")
    parser.add_argument("--test_batch_size",
                        default=64,
                        type=int,
                        help="the batch size")
    parser.add_argument("--embedding_path",
                        default='data/sgns.renmin.bigram-char',
                        type=str,
                        help="pre-trained word embeddings path")
    parser.add_argument("--embedding_size",
                        default=300,
                        type=int,
                        help="the word embedding size")
    parser.add_argument("--hidden_size",
                        default=512,
                        type=int,
                        help="the hidden size")
    parser.add_argument("--fine_tuning",
                        default=True,
                        type=bool,
                        help="whether fine-tune word embeddings")
    parser.add_argument("--early_stopping",
                        default=15,
                        type=int,
                        help="Tolerance for early stopping (# of epochs).")
    parser.add_argument("--load_model",
                        default='results/20_Model_best.pt',
                        help="load pretrained model for testing")
    args = parser.parse_args()

    if not args.train_data_path:
        logger.info("please input train dataset path")
        exit()
    if not (args.dev_data_path or args.test_data_path):
        logger.info("please input dev or test dataset path")
        exit()

    TEXT, LABEL, vocab_size, word_embeddings, train_iter, dev_iter, test_iter, tag_dict = \
                dataset.load_dataset(args.train_data_path, args.dev_data_path, \
                 args.test_data_path, args.embedding_path, args.train_batch_size, \
                                        args.dev_batch_size, args.test_batch_size)

    idx_tag = {}
    for tag in tag_dict:
        idx_tag[tag_dict[tag]] = tag

    model = BiLSTM_CRF(args.embedding_size, args.hidden_size, vocab_size,
                       tag_dict, word_embeddings)
    if torch.cuda.is_available():
        model = model.cuda()

    # cost_test = []
    # start = time.perf_counter()
    # train_dev_size = len(train_iter)
    # train_size = int(train_dev_size*0.9)
    train_data, dev_data = dataset.train_dev_split(train_iter, 0.9)
    # for batch in train_data:
    #     print(batch)
    #     exit()

    # train_data = lambda: islice(train_iter,0,train_size)
    # dev_data = lambda: islice(train_iter,train_size,train_dev_size)
    # train_data = islice(train_iter,0,train_size)
    # dev_data = islice(train_iter,train_size,train_dev_size)
    if args.load_model:
        model.load_state_dict(torch.load(args.load_model, map_location='cpu'))
        # p, r, f1, eval_loss, all_assess = eval_model(model, dev_data, idx_tag)
        # logger.info('Eval Loss:%.4f, Eval P:%.4f, Eval R:%.4f, Eval F1:%.4f', \
        #                             eval_loss, p, r, f1)
        p, r, f1, eval_loss, all_assess = eval_model(model, test_iter, idx_tag)
        logger.info('LOC Test P:%.4f, Test R:%.4f, Test F1:%.4f', \
                all_assess['LOC']['P'], all_assess['LOC']['R'], all_assess['LOC']['F'])
        logger.info('PER Test P:%.4f, Test R:%.4f, Test F1:%.4f', \
                all_assess['PER']['P'], all_assess['PER']['R'], all_assess['PER']['F'])
        logger.info('ORG Test P:%.4f, Test R:%.4f, Test F1:%.4f', \
                all_assess['ORG']['P'], all_assess['ORG']['R'], all_assess['ORG']['F'])
        logger.info('Micro_AVG Test P:%.4f, Test R:%.4f, Test F1:%.4f', \
                                    p, r, f1)
        return

    best_score = 0.0
    for epoch in range(args.epoch):
        # train_data_ = copy.deepcopy(train_data)
        # dev_data_ = copy.deepcopy(dev_data)
        # train_model(model, train_data_, dev_data_, epoch, args.lr, idx_tag)
        train_loss, p, r, f1, eval_loss = train_model(model, train_data,
                                                      dev_data, epoch, args.lr,
                                                      idx_tag)

        logger.info('Epoch:%d, Training Loss:%.4f', epoch, train_loss)
        logger.info('Epoch:%d, Eval Loss:%.4f, Eval P:%.4f, Eval R:%.4f, Eval F1:%.4f', \
                                    epoch, eval_loss, p, r, f1)
        # p, r, f1, eval_loss, all_assess = eval_model(model,  test_iter, idx_tag)
        # logger.info('Test Loss:%.4f, Test P:%.4f, Test R:%.4f, Test F1:%.4f', \
        #                             eval_loss, p, r, f1)
        if f1 > best_score:
            best_score = f1
            torch.save(
                model.state_dict(),
                'results/%d_%s_%s.pt' % (epoch, 'Model', str(best_score)))