示例#1
0
def train_and_val():
    embedding_dim = 100
    hidden_dim = 100
    model_load_path = None
    best_model_save_path = 'model/model_100_best_0223.pth'
    max_score = 0
    stop_epoch = 30
    unimprove_time = 0
    val_json_path = '/home/agwave/Data/resume/val_0222.json'
    val_pdf_dir = '/home/agwave/Data/resume/val_0222/'

    training_data = get_data_from_data_txt(TRAIN_WORD_TO_TAG_PATH)
    with open('supporting_document/train_word_to_tag_0223.json', 'r') as j:
        word_to_ix = json.load(j)
    tag_to_ix = {'b-name': 0, 'i-name': 1, 'b-bir': 2, 'i-bir': 3, 'b-gend': 4, 'i-gend': 5,
                 'b-tel': 6, 'i-tel': 7, 'b-acad': 8, 'i-acad': 9, 'b-nati': 10, 'i-nati': 11,
                 'b-live': 12, 'i-live': 13, 'b-poli': 14, 'i-poli': 15, 'b-unv': 16, 'i-unv': 17,
                 'b-comp': 18, 'i-comp': 19, 'b-work': 20, 'i-work': 21, 'b-post': 22, 'i-post': 23,
                 'b-proj': 24, 'i-proj': 25, 'b-resp': 26, 'i-resp': 27, 'b-degr': 28, 'i-degr': 29,
                 'b-grti': 30, 'i-grti': 31, 'b-woti': 32, 'i-woti': 33, 'b-prti': 34, 'i-prti': 35,
                 'o': 36, '<start>': 37, '<stop>': 38}
    model = BiLSTM_CRF(len(word_to_ix), tag_to_ix, embedding_dim, hidden_dim)
    optimizer = optim.Adam(model.parameters(), lr=0.01)
    start_epoch = 0
    if model_load_path != None:
        print('load model...')
        checkpoint = torch.load(model_load_path)
        model.load_state_dict(checkpoint['model_state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        start_epoch = checkpoint['epoch'] + 1
    preliminary_score = get_score_by_model(model, val_json_path, val_pdf_dir)
    print('preliminary score:', preliminary_score)

    for epoch in range(start_epoch, stop_epoch):
        print("---------------------")
        print("running epoch : ", epoch)
        start_time = time.time()
        for sentence, tags in tqdm(training_data):
            model.zero_grad()
            sentence_in = prepare_sequence(sentence, word_to_ix)
            targets = torch.tensor([tag_to_ix[t] for t in tags], dtype=torch.long)
            loss = model.neg_log_likelihood(sentence_in, targets)
            loss.backward()
            nn.utils.clip_grad_norm_(model.parameters(), 1)
            optimizer.step()
        cur_epoch_score = get_score_by_model(model, val_json_path, val_pdf_dir)
        print('score', cur_epoch_score)
        print('running time:', time.time() - start_time)
        if cur_epoch_score > max_score:
            unimprove_time = 0
            max_score = cur_epoch_score
            torch.save({
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'epoch': epoch
            }, best_model_save_path)
            print('save best model successfully.')
        else:
            break
示例#2
0
def _main():
    data_manager = DataManager()
    vocab_size = len(data_manager.word2ix)
    model = BiLSTM_CRF(device, vocab_size, data_manager.tag2ix, EMBEDDING_DIM, HIDDEN_DIM)
    model = model.to(device)

    train_set = NerDataset(data_manager.train_sents, data_manager.train_tags)
    dev_set = NerDataset(data_manager.dev_sents, data_manager.dev_tags)
    train_loader = DataLoader(train_set, batch_size=BATCH_SZ, shuffle=True)
    dev_loader = DataLoader(dev_set, batch_size=BATCH_SZ, shuffle=True)

    optimizer = optim.Adam(model.parameters(), lr=0.01)
    epoch_loss = []

    '''with torch.no_grad():
        precheck_sent = to_tensor(train_loader[0])
        precheck_tag = to_tensor(dataset.train_tags[0])
        print(precheck_tag)
        print(model(precheck_sent))'''

    for epoch in range(EPOCH_NUM):
        for sents, tags, lengths in tqdm(train_loader):
            sents = sents.to(device)
            tags = tags.to(device)
            lengths = lengths.to(device)
            # print(lengths, sents.size(), tags.size())
            loss = model.neg_log_likelihood(sents, tags, lengths)

            epoch_loss.append(loss.item())
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        print(epoch, ' epoch loss: ', sum(epoch_loss)/len(epoch_loss))
        save_model(model, epoch)
        eval(model, dev_loader)
示例#3
0
def train_all_data():
    embedding_dim = 100
    hidden_dim = 100
    stop_epoch = 1
    model_1_epoch = 'model/model_1_epoch_lr0001.pth'

    training_data = get_data_from_data_txt(DATA_PERFECT_PATH)
    word_to_ix = get_word_to_ix(training_data, min_word_freq=1)
    tag_to_ix = {'b-name': 0, 'i-name': 1, 'b-bir': 2, 'i-bir': 3, 'b-gend': 4, 'i-gend': 5,
                 'b-tel': 6, 'i-tel': 7, 'b-acad': 8, 'i-acad': 9, 'b-nati': 10, 'i-nati': 11,
                 'b-live': 12, 'i-live': 13, 'b-poli': 14, 'i-poli': 15, 'b-unv': 16, 'i-unv': 17,
                 'b-comp': 18, 'i-comp': 19, 'b-work': 20, 'i-work': 21, 'b-post': 22, 'i-post': 23,
                 'b-proj': 24, 'i-proj': 25, 'b-resp': 26, 'i-resp': 27, 'b-degr': 28, 'i-degr': 29,
                 'b-grti': 30, 'i-grti': 31, 'b-woti': 32, 'i-woti': 33, 'b-prti': 34, 'i-prti': 35,
                 'o': 36, '<start>': 37, '<stop>': 38, 'c-live': 39, 'c-proj': 40, 'c-woti': 41,
                 'c-post': 42, 'c-unv': 43, 'c-nati': 44, 'c-poli': 45, 'c-prti':46, 'c-comp': 47}

    model = BiLSTM_CRF(len(word_to_ix), tag_to_ix, embedding_dim, hidden_dim)
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    # Make sure prepare_sequence from earlier in the LSTM section is loaded
    for epoch in range(
            stop_epoch):  # again, normally you would NOT do 300 epochs, it is toy data
        print("---------------------")
        print("running epon : ", epoch + 1)
        start_time = time.time()
        for sentence, tags in tqdm(training_data):
            model.zero_grad()
            sentence_in = prepare_sequence(sentence, word_to_ix)
            targets = torch.tensor([tag_to_ix[t] for t in tags], dtype=torch.long)
            loss = model.neg_log_likelihood(sentence_in, targets)
            loss.backward()
            nn.utils.clip_grad_norm_(model.parameters(), 15)
            optimizer.step()
        cur_epoch_score = get_score_by_model(model, TRAIN_JSON_PATH, TRAIN_PDF_DIR)
        print('score', cur_epoch_score)
        print('running time:', time.time() - start_time)
        print()
        if epoch == stop_epoch:
            torch.save({
                'model_state_dict': model.state_dict()
            }, model_1_epoch)
示例#4
0
def train():
    """
    模型训练
    """
    train_writer = SummaryWriter(log_dir='./log/train')
    test_writer = SummaryWriter(log_dir='./log/test')

    # step1 模型
    bilstm_crf = BiLSTM_CRF(opt.vocab_size, opt.emb_dim, opt.emb_dim//2, opt.tag_num, dropout=opt.dropout)
    if opt.load_model_path:     # 是否加载checkpoint
        bilstm_crf.load(opt.load_model_path)

    # step2 数据
    rmrb_train_dataset = RmrbDataset(train=True)
    rmrb_test_dataset = RmrbDataset(train=False)
    rmrb_train_dataloader = DataLoader(rmrb_train_dataset, batch_size=64, shuffle=True)
    rmrb_test_dataloader = DataLoader(rmrb_test_dataset, batch_size=len(rmrb_test_dataset), shuffle=True)

    # step3 损失函数和优化器
    # loss_fn = t.nn.CrossEntropyLoss()
    lr = opt.lr
    optimizer = t.optim.Adam(params=bilstm_crf.parameters(), lr=lr, weight_decay=opt.weight_decay)

    previous_loss = 1e9
    iteration = 0
    for epoch in range(opt.max_epoch):
        print('epoch {}'.format(epoch))
        for ii, (x_batch, y_batch) in enumerate(rmrb_train_dataloader):
            # 计算loss
            loss = bilstm_crf.log_likelihood(x_batch, y_batch)
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()

            if ii % 20 == 0:
                # print('loss:{}'.format(loss.item()))
                train_writer.add_scalar('Loss', loss.item(), iteration)
                iteration += 1
                if loss > previous_loss:
                    lr = lr * opt.lr_decay
                else:
                    previous_loss = loss.item()
        # 保存模型检查点
        bilstm_crf.save()

        # 评价指标
        with t.no_grad():
            bilstm_crf.eval()   # 将模型设置为验证模式
            for x_test, y_test in rmrb_test_dataloader:
                test_loss = bilstm_crf.log_likelihood(x_test, y_test)
                test_writer.add_scalar('Loss', test_loss.item(), iteration)
                y_pre = bilstm_crf(x_test)
                print(classification_report(t.flatten(y_test), t.flatten(y_pre)))
            bilstm_crf.train()  # 将模型恢复成训练模式
示例#5
0
def run(word_train,
        label_train,
        word_dev,
        label_dev,
        vocab,
        device,
        kf_index=0):
    # build dataset
    train_dataset = SegDataset(word_train, label_train, vocab, config.label2id)
    dev_dataset = SegDataset(word_dev, label_dev, vocab, config.label2id)
    # build data_loader
    train_loader = DataLoader(train_dataset,
                              batch_size=config.batch_size,
                              shuffle=True,
                              collate_fn=train_dataset.collate_fn)
    dev_loader = DataLoader(dev_dataset,
                            batch_size=config.batch_size,
                            shuffle=True,
                            collate_fn=dev_dataset.collate_fn)
    # model
    model = BiLSTM_CRF(embedding_size=config.embedding_size,
                       hidden_size=config.hidden_size,
                       vocab_size=vocab.vocab_size(),
                       target_size=vocab.label_size(),
                       num_layers=config.lstm_layers,
                       lstm_drop_out=config.lstm_drop_out,
                       nn_drop_out=config.nn_drop_out)
    model.to(device)
    # optimizer
    optimizer = optim.Adam(model.parameters(),
                           lr=config.lr,
                           betas=config.betas)
    scheduler = StepLR(optimizer,
                       step_size=config.lr_step,
                       gamma=config.lr_gamma)
    # how to initialize these parameters elegantly
    for p in model.crf.parameters():
        _ = torch.nn.init.uniform_(p, -1, 1)
    # train and test
    # train(train_loader, dev_loader, vocab, model, optimizer, scheduler, device, kf_index)
    with torch.no_grad():
        # test on the final test set
        test_loss, f1 = test(config.test_dir, vocab, device, kf_index)
    return test_loss, f1
def prepare_sequence(seq, to_ix):
    idxs = [to_ix[w] for w in seq]
    return torch.tensor(idxs, dtype=torch.long)


# word转化id
word_to_ix = {}
for sentence, tags in training_data:
    for word in sentence:
        if word not in word_to_ix:
            word_to_ix[word] = len(word_to_ix)

tag_to_ix = {"B": 0, "I": 1, "O": 2, START_TAG: 3, STOP_TAG: 4}

model = BiLSTM_CRF(len(word_to_ix), tag_to_ix, EMBEDDING_DIM, HIDDEN_DIM)
optimizer = optim.SGD(model.parameters(), lr=0.01, weight_decay=1e-4)

# Check predictions before training
with torch.no_grad():
    # sentence id序列
    precheck_sent = prepare_sequence(training_data[0][0], word_to_ix)
    # 此training data的tags id序列
    precheck_tags = torch.tensor([tag_to_ix[t] for t in training_data[0][1]],
                                 dtype=torch.long)
    print('预测前score和tags id序列:')
    print(model(precheck_sent))

# Make sure prepare_sequence from earlier in the LSTM section is loaded
for epoch in range(
        300):  # again, normally you would NOT do 300 epochs, it is toy data
    for sentence, tags in training_data:
                   tag_to_ix=tag_to_id,
                   embedding_dim=parameters['word_dim'],
                   hidden_dim=parameters['word_lstm_dim'],
                   use_gpu=use_gpu,
                   char_to_ix=char_to_id,
                   pre_word_embeds=word_embeds,
                   use_crf=parameters['crf'],
                   char_mode=parameters['char_mode'])
# n_cap=4,
# cap_embedding_dim=10)

file_name = './evaluation/saved_checkpoint_wfeats.txt'
if use_gpu:
    model.cuda()
learning_rate = 0.015
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9)
#print(restore_checkpoint(file_name, model, optimizer))
model, optimizer, current_epoch, other_info = restore_checkpoint(
    file_name, model, optimizer)
print("model loaded")
sys.stdout.flush()

print("begin labeling")


def label(model, datas):
    prediction = []
    for data in datas:
        words = data['str_words']
        chars2 = data['chars']
        caps = data['caps']
示例#8
0
def train(conf):
    train_sentences = load_sentences(conf.train_file, conf.zeros)
    dev_sentences = load_sentences(conf.dev_file, conf.zeros)
    test_sentences = load_sentences(conf.test_file, conf.zeros)

    dico_chars_train = char_mapping(train_sentences, conf.lower)[0]
    dico_chars, char_to_id, id_to_char = augment_with_pretrained(
        dico_chars_train.copy(), conf.emb_file,
        list(
            itertools.chain.from_iterable([[w[0] for w in s]
                                           for s in test_sentences])))
    _t, tag_to_id, id_to_tag = tag_mapping(train_sentences)
    # prepare data, get a collection of list containing index
    train_data = prepare_dataset(train_sentences, char_to_id, tag_to_id,
                                 conf.lower)
    dev_data = prepare_dataset(dev_sentences, char_to_id, tag_to_id,
                               conf.lower)
    test_data = prepare_dataset(test_sentences, char_to_id, tag_to_id,
                                conf.lower)

    #loading word embeddings
    all_word_embeds = {}
    for i, line in enumerate(codecs.open(conf.emb_file, 'r', 'utf-8')):
        s = line.strip().split()
        if len(s) == conf.embedding_dim + 1:
            all_word_embeds[s[0]] = np.array([float(i) for i in s[1:]])
    word_embeds_dict = np.random.uniform(-np.sqrt(0.06), np.sqrt(0.06),
                                         (len(char_to_id), conf.embedding_dim))
    for w in char_to_id:
        if w in all_word_embeds:
            word_embeds_dict[char_to_id[w]] = all_word_embeds[w]
        elif w.lower() in all_word_embeds:
            word_embeds_dict[char_to_id[w]] = all_word_embeds[w.lower()]
    print('Loaded %i pretrained embeddings.' % len(all_word_embeds))

    train_manager = BatchManager(train_data, conf.batch_size)

    model = BiLSTM_CRF(conf, tag_to_id, char_to_id, word_embeds_dict)
    optimizer = torch.optim.SGD(model.parameters(),
                                lr=conf.learning_rate,
                                weight_decay=1e-4)
    epoch = conf.epochs
    dev_f1_ = 0
    for epoch in range(1, epoch + 1):
        print(f'train on epoch {epoch}')
        j = 1
        for batch in train_manager.iter_batch(shuffle=True):
            batch_loss = 0.0
            sentences = batch[1]
            tags = batch[-1]
            for i, index in enumerate(np.random.permutation(len(sentences))):
                model.zero_grad()
                sentence_in = sentences[index]
                tags_in = tags[index]
                loss = model.neg_log_likelihood(sentence_in, tags_in)
                loss.backward()
                optimizer.step()
                batch_loss += loss.data
            print(
                f'[batch {j},batch size:{conf.batch_size}] On this batch loss: {batch_loss}'
            )
            j = j + 1
        print(f'Begin validing result on [epoch {epoch}] valid dataset ...')
        dev_results = get_predictions(model, dev_data, id_to_tag)
        dev_f1 = evaluate_ner(dev_results, conf)
        if dev_f1 > dev_f1_:
            torch.save(model, conf.model_file)
            print('save model success.')
        test_results = get_predictions(model, test_data, id_to_tag)
        test_f1 = evaluate_ner(test_results, conf)
        print(f'[epoch {epoch}] On test dataset] f1: {test_f1:3f}')
示例#9
0
    embedding_dim=parameters['word_dim'],
    hidden_dim=parameters['word_lstm_dim'],
    use_gpu=use_gpu,
    pre_word_embeds=word_embeds,
    use_crf=True,  #parameters['crf'],
    semroles_embedding_dim=2000,
)

if parameters['reload']:
    model = torch.load(model_name)
if use_gpu:
    model.cuda()
learning_rate = 0.015

parameters_alg = itertools.ifilter(lambda p: p.requires_grad,
                                   model.parameters())

optimizer = torch.optim.SGD(parameters_alg, lr=learning_rate, momentum=0.9)

losses = []
loss = 0.0
best_dev_F = -1.0
best_test_F = -1.0
best_train_F = -1.0

best_dev_Acc = -1.0
best_test_Acc = -1.0
best_train_Acc = -1.0
best_dev_Acc_post = -1.0
best_test_Acc_post = -1.0
best_train_Acc_post = -1.0
示例#10
0
                   use_gpu=use_gpu,
                   char_to_ix=char_to_id,
                   pre_word_embeds=word_embeds,
                   use_crf=parameters['crf'],
                   char_mode=parameters['char_mode'],
                   char_embedding_dim=parameters['char_dim'],
                   char_lstm_dim=parameters['char_lstm_dim'],
                   alpha=parameters['alpha'])
# n_cap=4,
# cap_embedding_dim=10)

if use_gpu:
    model.cuda()

learning_rate = 0.015
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9)
losses = []
best_dev_F = -1.0
best_test_F = -1.0
best_train_F = -1.0
all_F = [[0, 0, 0]]
plot_every = 10
eval_every = 20
sample_count = 0

best_idx = 0

if parameters['reload']:
    print('loading model:', parameters['reload'])
    checkpoint = torch.load(models_path + parameters['reload'])
    #model.load_state_dict(checkpoint)
示例#11
0
    char_mode=char_mode,
    char_embedding_dim=char_dim,
    char_lstm_dim=char_lstm_dim,
    char_lstm_bidirect=char_lstm_bidirect,
    char_cnn_win=char_cnn_win,
    char_cnn_output=char_cnn_dim,
    char_to_id=char_to_id,
    use_gpu=use_gpu,
    dropout=dropout,
    use_crf=use_crf,
)

print(model)

p_count = 0
for parameter in model.parameters():
    if parameter.requires_grad:
        p_count += 1

name_count = 0
param_list = []
for param_name, param in model.named_parameters():
    if param.requires_grad:
        name_count += 1
        print(param_name, "  ", param.size())
        param_list.append(param_name)

print("p_count:{0},name_count:{1}".format(p_count, name_count))

log = str(model)
with open(os.path.join(logs_path, "{0}.important.log".format(name)),
示例#12
0
                                 "rb"))
    print('word vocab', len(word_vocab))
    print('char vocab', len(char_vocab))
    print('pos vocab', len(pos_vocab))
    print('tag vocab', len(tag_vocab))

    schema = get_schemas(source_path)

    # model
    train_device = torch.device(device if torch.cuda.is_available() else "cpu")
    model = BiLSTM_CRF(char_init_embed=(len(char_vocab), char_embed_dim),
                       word_init_embed=(len(word_vocab), word_embed_dim),
                       pos_init_embed=(len(pos_vocab), pos_embed_dim),
                       spo_embed_dim=len(schema),
                       sentence_length=seq_len,
                       hidden_size=hidden_dim,
                       num_classes=len(tag_vocab),
                       dropout=dropout,
                       id2words=tag_vocab.idx2word,
                       encoding_type=encoding_type,
                       weight=weight)
    model.to(train_device)
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=learning_rate,
                                 weight_decay=weight_decay)

    # train
    writer = SummaryWriter(log_dir=log_path)
    train()
    writer.close()
示例#13
0
def my_train():
    os.makedirs(f"model_result", exist_ok=True)
    torch.manual_seed(1)
    device = torch.device('cuda')

    data_dir = f"data/{DATASET}/processed"

    # 加载
    train_data = NERDataset(os.path.join(data_dir, "train.pkl"))
    test_data = NERDataset(os.path.join(data_dir, "test.pkl"))
    dev_data = NERDataset(os.path.join(data_dir, "dev.pkl"))

    word_to_idx = load_obj(os.path.join(data_dir, "word_to_idx.pkl"))
    tag_to_idx = load_obj(os.path.join(data_dir, "tag_to_idx.pkl"))

    idx_to_tag = {n: m for m, n in tag_to_idx.items()}

    train_loader = DataLoader(
        train_data,
        batch_size=BATCH_SIZE,
        collate_fn=BatchPadding(),
        shuffle=True,
        num_workers=2,
        pin_memory=True,
    )
    dev_loader = DataLoader(
        dev_data,
        batch_size=BATCH_SIZE,
        collate_fn=BatchPadding(),
        shuffle=True,
        num_workers=2,
        pin_memory=True,
    )
    test_loader = DataLoader(
        test_data,
        batch_size=BATCH_SIZE,
        collate_fn=BatchPadding(),
        shuffle=True,
        num_workers=2,
        pin_memory=True,
    )

    # 建模
    model = BiLSTM_CRF(len(word_to_idx), len(tag_to_idx), EMBEDDING_DIM,
                       HIDDEN_DIM, DROPOUT).to(device)
    print(model)
    optimizer = optim.Adam(model.parameters(), lr=LEARN_RATE)

    print("\n开始训练")
    f1_max = 0
    cur_patience = 0  # 用于避免过拟合
    for epoch in range(EPOCHS):
        model.train()
        for i, (seqs, tags, masks) in enumerate(train_loader, 1):
            optimizer.zero_grad()
            loss = model.loss(seqs.to(device), tags.to(device),
                              masks.to(device))
            loss.backward()
            optimizer.step()
            if i % LOG_INTERVAL == 0:
                print("epoch {}: {:.0f}%\t\tLoss: {:.6f}".format(
                    epoch, 100.0 * i / len(train_loader), loss.item()))
        dev_precision, dev_recall, dev_f1 = evaluate(model, dev_loader,
                                                     idx_to_tag)
        test_precision, test_recall, test_f1 = evaluate(
            model, test_loader, idx_to_tag)
        print(
            f"\ndev\tprecision: {dev_precision}, recall: {dev_recall}, f1: {dev_f1}"
        )
        print(
            f"test\tprecision: {test_precision}, recall: {test_recall}, f1: {test_f1}\n"
        )

        torch.save(model.state_dict(), f"model_result/{epoch}.pt")

        if dev_f1 > f1_max:  # 用于检测过拟合情况
            f1_max = dev_f1
            cur_patience = 0
            if dev_f1 > 0.9 and test_f1 > 0.9:
                break
        else:
            cur_patience += 1
            if cur_patience >= PATIENCE:  # 多次低于最高f1,break
                break
    print("Best dev F1: ", f1_max)
示例#14
0
    )
    test_loader = DataLoader(
        test_data,
        batch_size=args.batch_size,
        collate_fn=BatchPadding(),
        shuffle=False,
        num_workers=2,
        pin_memory=True,
    )

    # Model
    model = BiLSTM_CRF(
        len(word_to_ix), len(tag_to_ix), args.embed_dim, args.hidden_dim, args.dropout
    ).to(device)
    print(model)
    optimizer = optim.Adam(model.parameters(), lr=args.lr)

    print("Training...")
    best_dev_f1 = 0
    bad_count = 0
    for epoch in range(args.epochs):
        model.train()
        for i, (seqs, tags, masks) in enumerate(train_loader, 1):
            optimizer.zero_grad()
            loss = model.loss(seqs.to(device), tags.to(device), masks.to(device))
            loss.backward()
            optimizer.step()
            if i % args.log_interval == 0:
                print(
                    "Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}".format(
                        epoch + 1,
示例#15
0
#                    hidden_size=parameters['word_lstm_size'],
#                    use_gpu=use_gpu,
#                    char_to_ix=char_to_id,
#                    pre_word_embeds=word_embeds,
#                    crf=parameters['crf'],
#                    char_mode=parameters['char_mode'])
# n_cap=4,
# cap_embedding_size=10)
if parameters['reload']:
    model.load_state_dict(torch.load(model_name))
if use_gpu:
    model.cuda()
learning_rate = args.lr
#args.lr_method = "adadelta"#"momentum"#"adadelta"#"adagrad"#"sgd"
if args.lr_method == "sgd":
    optimizer = torch.optim.SGD(model.parameters(),
                                lr=learning_rate,
                                momentum=0.9)
elif args.lr_method == "adadelta":
    optimizer = torch.optim.Adadelta(model.parameters(),
                                     learning_rate,
                                     rho=0.95,
                                     eps=1e-06)
losses = []
loss = 0.0
best_dev_F = -1.0
best_test_F = -1.0
best_train_F = -1.0
all_F = [[0, 0, 0]]
plot_every = 500
eval_every = 5000
示例#16
0
                   tag_to_ix=tag_to_id,
                   embedding_dim=parameters['word_dim'],
                   hidden_dim=parameters['word_lstm_dim'],
                   use_gpu=use_gpu,
                   pre_word_embeds=word_embeds,
                   use_crf=True,     #parameters['crf'],
                   semroles_embedding_dim=2000,
		   )

if parameters['reload']:
    model = torch.load(model_name)
if use_gpu:
    model.cuda()
learning_rate = 0.015

parameters_alg = itertools.ifilter(lambda p: p.requires_grad, model.parameters())

optimizer = torch.optim.SGD(parameters_alg, lr=learning_rate, momentum=0.9)


losses = []
loss = 0.0
best_dev_F = -1.0
best_test_F = -1.0
best_train_F = -1.0

best_dev_Acc = -1.0
best_test_Acc = -1.0
best_train_Acc = -1.0
best_dev_Acc_post = -1.0
best_test_Acc_post = -1.0
示例#17
0
train_dataset, eval_dataset = torch.utils.data.random_split(
    dataset, (80000, 10000))

train_dataloder = DataLoader(train_dataset,
                             batch_size=Config.batch_size,
                             shuffle=True,
                             num_workers=1,
                             drop_last=False)

model = BiLSTM_CRF(len(char2idx), len(Config.tagert2idx), Config.embedding_dim,
                   Config.hidden_dim)

#默认使用GPU
if Config.use_gpu:
    model = model.to('cuda')
optimizer = optim.Adam(model.parameters(),
                       lr=0.001,
                       betas=(0.9, 0.999),
                       eps=1e-08,
                       weight_decay=0)

best_score = 0
for epoch in range(Config.epochs):
    model.train()
    total_loss = 0
    for batch_sentence, batch_label, batch_length in train_dataloder:

        model.zero_grad()

        batch_sentence, batch_label, batch_length, _ = sort_batch_data(
            batch_sentence, batch_label, batch_length)
示例#18
0
# with open('./data/wvmodel.pkl', 'rb') as inp:
#     wvmodel = pickle.load(inp)
# print('wvmodel loaded!')
#
# weight = torch.zeros(args.vocab_size, args.embedding_size)
# for i in range(len(wvmodel.index2word)):
#     try:
#         index = word_to_idx[wvmodel.index2word[i]]
#     except:
#         continue
#     weight[index,:] = torch.from_numpy(wvmodel.get_vector(
#         idx_to_word[word_to_idx[wvmodel.index2word[i]]]))


model = BiLSTM_CRF(args, label2idx, weight,device).to(device)
optimizer = optim.Adam(filter(lambda p:p.requires_grad, model.parameters()), lr=args.lr, weight_decay=args.weight_decay)

best_f1 = 0.0
print('training on ',device)
for epoch in range(1):
    model.train()
    train_loss_sum = 0.0
    steps = 0
    for batch in train_iter:
        X, y = batch.TEXT, batch.LABEL
        X, y = X.to(device).long(), y.to(device).long()

        loss = model.neg_log_likelihood(X, y)

        optimizer.zero_grad()
        loss.backward()
示例#19
0
        cPickle.dump(mappings, f)

#Model Load
model = BiLSTM_CRF(word_to_ix=word_to_id, ix_to_word=id_to_word, tag_to_ix=tag_to_id, char_to_ix = char_to_id, mor_to_ix = mor_to_id,
    embedding_dim=parameters['word_dim'], hidden_dim=parameters['word_lstm_dim'], char_lstm_dim=parameters['char_lstm_dim'],
    char_dim = parameters['char_dim'], pre_word_embeds=word_embeds,
    pre_char_embeds = char_embeds, use_gpu=parameters['use_gpu'], use_crf=parameters['crf'], use_elmo=parameters['use_elmo'],
    elmo_option = parameters['elmo_option'], elmo_weight = parameters['elmo_weight'])


if parameters['reload']:
    model.load_state_dict(torch.load(model_name))
if use_gpu:
    model.cuda()
learning_rate = 0.001
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
losses = []
loss = 0.0
best_test = -np.inf
best_dev = -np.inf
best_dev_F = -1.0
best_test_F = -1.0
best_train_F = -1.0
best_epoch = 0
best_dev_epoch = 0
all_F = [[0, 0, 0]]
plot_every = 50
eval_every = 350
count = 0
test_list = []
dev_list = []
示例#20
0
        batch_size=args.batch_size,collate_fn=collate_fn, shuffle=True)
    valid_loader = torch.utils.data.DataLoader(dataset=valid_data, batch_size=args.batch_size,
        collate_fn=collate_fn, shuffle=False)
    test_loader = torch.utils.data.DataLoader(dataset=test_data, batch_size=args.batch_size,
                                               collate_fn=collate_fn, shuffle=False)

    weight_matrix = get_weight(wvmodel,len(word2id),args.embedding_size)
    print('weight_matrix',weight_matrix.size())
    model = BiLSTM_CRF(len(word2id),label2id, args.embedding_size, weight_matrix, args.hidden_size).cuda()
    
    if os.path.exists(args.param_path):
        print('loading params')
        # pdb.set_trace()
        model.load_state_dict(torch.load(args.param_path))

    optim = torch.optim.Adam(model.parameters(), args.learning_rate)
    criterion = torch.nn.CrossEntropyLoss()

    train(args, train_loader,valid_loader, model, optim, criterion)
    end_loss, end_f1 = evaluate_accuracy(model, test_loader)
    print("====================>test loss: %.4f, test f1 : %.4f"%(end_loss, end_f1))
else:
    print('test begin')
    with open(args.test_path, 'r', encoding='utf-8') as ftest_text:
        test_textlines = [line.strip().lower().split(' ') for line in ftest_text.readlines()]

        test_textlines = [[word2id[word] if word in word2id else unk for word in line] for line in test_textlines]

        test_textlines = [torch.Tensor(line).long() for line in test_textlines]
        
        weight_matrix = get_weight(wvmodel,len(word2id),args.embedding_size)
示例#21
0
import torch
import torch.optim as optim
from dataset import Dataset
from model import BiLSTM_CRF

# torch.set_default_tensor_type('torch.cuda.FloatTensor')

epochs = 100
dataset = Dataset()
train_loader = dataset.get_train_loader(1)
model = BiLSTM_CRF(dataset.get_vocab_size(), dataset.get_label_index_dict(),
                   128, 128)

optimizer = optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-4)

model.train()
for epoch in range(epochs):
    for iter, batch in enumerate(train_loader):
        sentence_in, targets = batch.line, batch.label

        sentence_in = sentence_in.permute([1, 0]).reshape(-1).contiguous()
        targets = targets.permute([1, 0]).reshape(-1).contiguous()

        model.zero_grad()
        loss = model.neg_log_likelihood(sentence_in.squeeze(-1),
                                        targets.squeeze(-1)) / len(sentence_in)

        loss.backward()
        optimizer.step()

        print("{}-{}: {:.5f}".format(epoch, iter, loss.item()))
示例#22
0
        use_crf=parameters['crf'],
        char_mode=parameters['char_mode'],
        # n_cap=4,
        # cap_embedding_dim=10
    )
    if parameters['reload']:
        model.load_state_dict(torch.load(model_name))

    if use_gpu:
        GPU_id = gpu_id
        print("GPU ID = ", GPU_id)
        torch.cuda.set_device(GPU_id)
        model.cuda()

    learning_rate = parameters["LR"]
    optimizer = torch.optim.SGD(model.parameters(),
                                lr=learning_rate,
                                momentum=0.9)
    step_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.8)

    t = time.time()

    train_model(model, step_lr_scheduler, optimizer, train_data, dev_data,
                test_data)

    print("total time in training: ", time.time() - t)

    try:
        os.remove(parameters["sorted_entity_list_file_name"])
    except Exception as e:
        pass