Пример #1
0
def _main():
    data_manager = DataManager()
    vocab_size = len(data_manager.word2ix)
    model = BiLSTM_CRF(device, vocab_size, data_manager.tag2ix, EMBEDDING_DIM, HIDDEN_DIM)
    model = model.to(device)

    train_set = NerDataset(data_manager.train_sents, data_manager.train_tags)
    dev_set = NerDataset(data_manager.dev_sents, data_manager.dev_tags)
    train_loader = DataLoader(train_set, batch_size=BATCH_SZ, shuffle=True)
    dev_loader = DataLoader(dev_set, batch_size=BATCH_SZ, shuffle=True)

    optimizer = optim.Adam(model.parameters(), lr=0.01)
    epoch_loss = []

    '''with torch.no_grad():
        precheck_sent = to_tensor(train_loader[0])
        precheck_tag = to_tensor(dataset.train_tags[0])
        print(precheck_tag)
        print(model(precheck_sent))'''

    for epoch in range(EPOCH_NUM):
        for sents, tags, lengths in tqdm(train_loader):
            sents = sents.to(device)
            tags = tags.to(device)
            lengths = lengths.to(device)
            # print(lengths, sents.size(), tags.size())
            loss = model.neg_log_likelihood(sents, tags, lengths)

            epoch_loss.append(loss.item())
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        print(epoch, ' epoch loss: ', sum(epoch_loss)/len(epoch_loss))
        save_model(model, epoch)
        eval(model, dev_loader)
Пример #2
0
def run(word_train,
        label_train,
        word_dev,
        label_dev,
        vocab,
        device,
        kf_index=0):
    # build dataset
    train_dataset = SegDataset(word_train, label_train, vocab, config.label2id)
    dev_dataset = SegDataset(word_dev, label_dev, vocab, config.label2id)
    # build data_loader
    train_loader = DataLoader(train_dataset,
                              batch_size=config.batch_size,
                              shuffle=True,
                              collate_fn=train_dataset.collate_fn)
    dev_loader = DataLoader(dev_dataset,
                            batch_size=config.batch_size,
                            shuffle=True,
                            collate_fn=dev_dataset.collate_fn)
    # model
    model = BiLSTM_CRF(embedding_size=config.embedding_size,
                       hidden_size=config.hidden_size,
                       vocab_size=vocab.vocab_size(),
                       target_size=vocab.label_size(),
                       num_layers=config.lstm_layers,
                       lstm_drop_out=config.lstm_drop_out,
                       nn_drop_out=config.nn_drop_out)
    model.to(device)
    # optimizer
    optimizer = optim.Adam(model.parameters(),
                           lr=config.lr,
                           betas=config.betas)
    scheduler = StepLR(optimizer,
                       step_size=config.lr_step,
                       gamma=config.lr_gamma)
    # how to initialize these parameters elegantly
    for p in model.crf.parameters():
        _ = torch.nn.init.uniform_(p, -1, 1)
    # train and test
    # train(train_loader, dev_loader, vocab, model, optimizer, scheduler, device, kf_index)
    with torch.no_grad():
        # test on the final test set
        test_loss, f1 = test(config.test_dir, vocab, device, kf_index)
    return test_loss, f1
Пример #3
0
    return sentences_sort, lengths_sort, idx_unsort


char2idx = pickle.load(open('char2idx.pkl', 'rb'))
data = pickle.load(open('predict_data.pkl', 'rb'))

predict_data = PredData(data, char2idx)
dataloader = DataLoader(predict_data, batch_size=32, drop_last=False)

model = BiLSTM_CRF(len(char2idx), len(Config.tagert2idx), Config.embedding_dim,
                   Config.hidden_dim)

model.load_state_dict(torch.load('model_best.pth'))
if Config.use_gpu:
    model.to('cuda')
model.eval()

predict_result = []
with torch.no_grad():
    for batch_sentences, batch_lengths in dataloader:
        sentences, lengths, idx_unsort = sort_batch_data(
            batch_sentences, batch_lengths)
        if Config.use_gpu:
            sentences = sentences.cuda()
        pred = model(sentences, lengths)
        pred = pred[idx_unsort]
        pred = pred.cpu().numpy()

        ls = batch_lengths.numpy()
Пример #4
0
parser.add_argument('--word_embed_size',type=int, default = 200, help='word嵌入dim')
parser.add_argument('--input_embed_size',type=int, default = 250, help='lstm_input_嵌入dim')
parser.add_argument('--hidden_size',type=int , default = 250, help='decoder_lstm隐藏层dim')
parser.add_argument('--add_dropout',type= int , default = 1, help='input_embed是否dropout')
parser.add_argument('--device',type=str , default ='cuda:2', help='train device')
args = parser.parse_args(args=[])

idx_to_tag = ['B-ORG','O','B-MISC','B-PER', 'I-PER', 'B-LOC', 'I-ORG', 'I-MISC', 'I-LOC', 'STOP', 'START']
# 获取数据迭代器
seq, char_, train_iter, test_iter, val_iter = get_data_iter()
START ='START'
STOP = 'STOP'
device = tc.device('cuda:2')
net = BiLSTM_CRF(tag_to_idx, seq.vocab, char_.vocab, args)
net.load_state_dict(tc.load(args.save_path))
net = net.to(device)

#测试
def test_(net, data_iter, device, idx_to_tag):
    loss_sum, acc_sum, n = 0.0, 0.0, 0
    seq_pred = []
    net.eval() # 进行测试模式
    for batch_data in data_iter:  
        sentence = (batch_data.Seq).to(device)
        char_ = (batch_data.Char_).to(device)
        char_len = (batch_data.Char_len).to(device)
        tag_seq = net(sentence, char_, char_len)
        seq_pred.append(tag_seq)
        n += sentence.shape[1]
        if n % 200 == 0:
            print(f'test__ n = {n}')
Пример #5
0
log = str(model)
with open(os.path.join(logs_path, "{0}.important.log".format(name)),
          "a") as fout:
    fout.write(log)
    fout.write('\n')
    for param in param_list:
        fout.write(param)
        fout.write('\n')
    fout.flush()

if reload:
    last_saved_model = torch.load(model_name, map_location=device_name)
    model.load_state_dict(last_saved_model.state_dict())
    model.use_gpu = use_gpu
if use_gpu:
    model = model.to(device)

# Perf: Adam < AdaDelta < SGD
if optimizer_choice == OptimizationMethod.SGDWithDecreasingLR:
    learning_rate = 0.02
    learning_momentum = 0.9
    print("learning_rate:{0}".format(learning_rate))
    optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad,
                                       model.parameters()),
                                lr=learning_rate,
                                momentum=learning_momentum)

elif optimizer_choice == OptimizationMethod.Adam:
    optimizer = torch.optim.Adam(
        filter(lambda p: p.requires_grad, model.parameters()))
elif optimizer_choice == OptimizationMethod.AdaDelta:
Пример #6
0
train_dataset, eval_dataset = torch.utils.data.random_split(
    dataset, (80000, 10000))

train_dataloder = DataLoader(train_dataset,
                             batch_size=Config.batch_size,
                             shuffle=True,
                             num_workers=1,
                             drop_last=False)

model = BiLSTM_CRF(len(char2idx), len(Config.tagert2idx), Config.embedding_dim,
                   Config.hidden_dim)

#默认使用GPU
if Config.use_gpu:
    model = model.to('cuda')
optimizer = optim.Adam(model.parameters(),
                       lr=0.001,
                       betas=(0.9, 0.999),
                       eps=1e-08,
                       weight_decay=0)

best_score = 0
for epoch in range(Config.epochs):
    model.train()
    total_loss = 0
    for batch_sentence, batch_label, batch_length in train_dataloder:

        model.zero_grad()

        batch_sentence, batch_label, batch_length, _ = sort_batch_data(
Пример #7
0
        mappings = {
            "word_to_id": word_to_id,
            "tag_to_id": tag_to_id,
            "char_to_id": char_to_id,
            "parameters": parameters,
            "word_embeds": word_embeds,
        }
        pickle.dump(mappings, f)

    print("word_to_id: ", len(word_to_id))

    model = BiLSTM_CRF(
        vocab_size=len(word_to_id),
        tag_to_ix=tag_to_id,
        embedding_dim=parameters["word_dim"],
        hidden_dim=parameters["word_lstm_dim"],
        use_gpu=use_gpu,
        char_to_ix=char_to_id,
        pre_word_embeds=word_embeds,
        use_crf=parameters["crf"],
        char_mode=parameters["char_mode"],
    )
    # n_cap=4,
    # cap_embedding_dim=10)

    if parameters["reload"]:
        model = torch.load(model_name)

    model.to(device)
    train()
Пример #8
0
    print("Not using pre-trained embeddings")
    embeddings = None

model = BiLSTM_CRF(vectorizer.token_vocab,
                   vectorizer.tag_vocab,
                   args.batch_size,
                   dropout=args.dropout,
                   embedding_dim=args.embedding_dim,
                   hidden_dim=args.hidden_dim)
if args.reload_from_files and os.path.exists(args.model_state_file):
    model.load_state_dict(torch.load(args.model_state_file))
    print("Reloaded model")
else:
    print("New model")

model = model.to(args.device)
for name, param in model.named_parameters():
    if 'weight' in name:
        nn.init.xavier_normal_(param.data)
    else:
        nn.init.constant_(param.data, 0)
optimizer = optim.Adam(model.parameters(), lr=args.learning_rate)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer=optimizer,
                                                 mode='min',
                                                 factor=0.5,
                                                 patience=1)
train_state = make_train_state(args)

epoch_bar = tqdm(desc='training routine', total=args.num_epochs, position=0)

dataset.set_split('train')
Пример #9
0
                                 "rb"))
    print('word vocab', len(word_vocab))
    print('char vocab', len(char_vocab))
    print('pos vocab', len(pos_vocab))
    print('tag vocab', len(tag_vocab))

    schema = get_schemas(source_path)

    # model
    train_device = torch.device(device if torch.cuda.is_available() else "cpu")
    model = BiLSTM_CRF(char_init_embed=(len(char_vocab), char_embed_dim),
                       word_init_embed=(len(word_vocab), word_embed_dim),
                       pos_init_embed=(len(pos_vocab), pos_embed_dim),
                       spo_embed_dim=len(schema),
                       sentence_length=seq_len,
                       hidden_size=hidden_dim,
                       num_classes=len(tag_vocab),
                       dropout=dropout,
                       id2words=tag_vocab.idx2word,
                       encoding_type=encoding_type,
                       weight=weight)
    model.to(train_device)
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=learning_rate,
                                 weight_decay=weight_decay)

    # train
    writer = SummaryWriter(log_dir=log_path)
    train()
    writer.close()