def _main(): data_manager = DataManager() vocab_size = len(data_manager.word2ix) model = BiLSTM_CRF(device, vocab_size, data_manager.tag2ix, EMBEDDING_DIM, HIDDEN_DIM) model = model.to(device) train_set = NerDataset(data_manager.train_sents, data_manager.train_tags) dev_set = NerDataset(data_manager.dev_sents, data_manager.dev_tags) train_loader = DataLoader(train_set, batch_size=BATCH_SZ, shuffle=True) dev_loader = DataLoader(dev_set, batch_size=BATCH_SZ, shuffle=True) optimizer = optim.Adam(model.parameters(), lr=0.01) epoch_loss = [] '''with torch.no_grad(): precheck_sent = to_tensor(train_loader[0]) precheck_tag = to_tensor(dataset.train_tags[0]) print(precheck_tag) print(model(precheck_sent))''' for epoch in range(EPOCH_NUM): for sents, tags, lengths in tqdm(train_loader): sents = sents.to(device) tags = tags.to(device) lengths = lengths.to(device) # print(lengths, sents.size(), tags.size()) loss = model.neg_log_likelihood(sents, tags, lengths) epoch_loss.append(loss.item()) optimizer.zero_grad() loss.backward() optimizer.step() print(epoch, ' epoch loss: ', sum(epoch_loss)/len(epoch_loss)) save_model(model, epoch) eval(model, dev_loader)
def run(word_train, label_train, word_dev, label_dev, vocab, device, kf_index=0): # build dataset train_dataset = SegDataset(word_train, label_train, vocab, config.label2id) dev_dataset = SegDataset(word_dev, label_dev, vocab, config.label2id) # build data_loader train_loader = DataLoader(train_dataset, batch_size=config.batch_size, shuffle=True, collate_fn=train_dataset.collate_fn) dev_loader = DataLoader(dev_dataset, batch_size=config.batch_size, shuffle=True, collate_fn=dev_dataset.collate_fn) # model model = BiLSTM_CRF(embedding_size=config.embedding_size, hidden_size=config.hidden_size, vocab_size=vocab.vocab_size(), target_size=vocab.label_size(), num_layers=config.lstm_layers, lstm_drop_out=config.lstm_drop_out, nn_drop_out=config.nn_drop_out) model.to(device) # optimizer optimizer = optim.Adam(model.parameters(), lr=config.lr, betas=config.betas) scheduler = StepLR(optimizer, step_size=config.lr_step, gamma=config.lr_gamma) # how to initialize these parameters elegantly for p in model.crf.parameters(): _ = torch.nn.init.uniform_(p, -1, 1) # train and test # train(train_loader, dev_loader, vocab, model, optimizer, scheduler, device, kf_index) with torch.no_grad(): # test on the final test set test_loss, f1 = test(config.test_dir, vocab, device, kf_index) return test_loss, f1
return sentences_sort, lengths_sort, idx_unsort char2idx = pickle.load(open('char2idx.pkl', 'rb')) data = pickle.load(open('predict_data.pkl', 'rb')) predict_data = PredData(data, char2idx) dataloader = DataLoader(predict_data, batch_size=32, drop_last=False) model = BiLSTM_CRF(len(char2idx), len(Config.tagert2idx), Config.embedding_dim, Config.hidden_dim) model.load_state_dict(torch.load('model_best.pth')) if Config.use_gpu: model.to('cuda') model.eval() predict_result = [] with torch.no_grad(): for batch_sentences, batch_lengths in dataloader: sentences, lengths, idx_unsort = sort_batch_data( batch_sentences, batch_lengths) if Config.use_gpu: sentences = sentences.cuda() pred = model(sentences, lengths) pred = pred[idx_unsort] pred = pred.cpu().numpy() ls = batch_lengths.numpy()
parser.add_argument('--word_embed_size',type=int, default = 200, help='word嵌入dim') parser.add_argument('--input_embed_size',type=int, default = 250, help='lstm_input_嵌入dim') parser.add_argument('--hidden_size',type=int , default = 250, help='decoder_lstm隐藏层dim') parser.add_argument('--add_dropout',type= int , default = 1, help='input_embed是否dropout') parser.add_argument('--device',type=str , default ='cuda:2', help='train device') args = parser.parse_args(args=[]) idx_to_tag = ['B-ORG','O','B-MISC','B-PER', 'I-PER', 'B-LOC', 'I-ORG', 'I-MISC', 'I-LOC', 'STOP', 'START'] # 获取数据迭代器 seq, char_, train_iter, test_iter, val_iter = get_data_iter() START ='START' STOP = 'STOP' device = tc.device('cuda:2') net = BiLSTM_CRF(tag_to_idx, seq.vocab, char_.vocab, args) net.load_state_dict(tc.load(args.save_path)) net = net.to(device) #测试 def test_(net, data_iter, device, idx_to_tag): loss_sum, acc_sum, n = 0.0, 0.0, 0 seq_pred = [] net.eval() # 进行测试模式 for batch_data in data_iter: sentence = (batch_data.Seq).to(device) char_ = (batch_data.Char_).to(device) char_len = (batch_data.Char_len).to(device) tag_seq = net(sentence, char_, char_len) seq_pred.append(tag_seq) n += sentence.shape[1] if n % 200 == 0: print(f'test__ n = {n}')
log = str(model) with open(os.path.join(logs_path, "{0}.important.log".format(name)), "a") as fout: fout.write(log) fout.write('\n') for param in param_list: fout.write(param) fout.write('\n') fout.flush() if reload: last_saved_model = torch.load(model_name, map_location=device_name) model.load_state_dict(last_saved_model.state_dict()) model.use_gpu = use_gpu if use_gpu: model = model.to(device) # Perf: Adam < AdaDelta < SGD if optimizer_choice == OptimizationMethod.SGDWithDecreasingLR: learning_rate = 0.02 learning_momentum = 0.9 print("learning_rate:{0}".format(learning_rate)) optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=learning_rate, momentum=learning_momentum) elif optimizer_choice == OptimizationMethod.Adam: optimizer = torch.optim.Adam( filter(lambda p: p.requires_grad, model.parameters())) elif optimizer_choice == OptimizationMethod.AdaDelta:
train_dataset, eval_dataset = torch.utils.data.random_split( dataset, (80000, 10000)) train_dataloder = DataLoader(train_dataset, batch_size=Config.batch_size, shuffle=True, num_workers=1, drop_last=False) model = BiLSTM_CRF(len(char2idx), len(Config.tagert2idx), Config.embedding_dim, Config.hidden_dim) #默认使用GPU if Config.use_gpu: model = model.to('cuda') optimizer = optim.Adam(model.parameters(), lr=0.001, betas=(0.9, 0.999), eps=1e-08, weight_decay=0) best_score = 0 for epoch in range(Config.epochs): model.train() total_loss = 0 for batch_sentence, batch_label, batch_length in train_dataloder: model.zero_grad() batch_sentence, batch_label, batch_length, _ = sort_batch_data(
mappings = { "word_to_id": word_to_id, "tag_to_id": tag_to_id, "char_to_id": char_to_id, "parameters": parameters, "word_embeds": word_embeds, } pickle.dump(mappings, f) print("word_to_id: ", len(word_to_id)) model = BiLSTM_CRF( vocab_size=len(word_to_id), tag_to_ix=tag_to_id, embedding_dim=parameters["word_dim"], hidden_dim=parameters["word_lstm_dim"], use_gpu=use_gpu, char_to_ix=char_to_id, pre_word_embeds=word_embeds, use_crf=parameters["crf"], char_mode=parameters["char_mode"], ) # n_cap=4, # cap_embedding_dim=10) if parameters["reload"]: model = torch.load(model_name) model.to(device) train()
print("Not using pre-trained embeddings") embeddings = None model = BiLSTM_CRF(vectorizer.token_vocab, vectorizer.tag_vocab, args.batch_size, dropout=args.dropout, embedding_dim=args.embedding_dim, hidden_dim=args.hidden_dim) if args.reload_from_files and os.path.exists(args.model_state_file): model.load_state_dict(torch.load(args.model_state_file)) print("Reloaded model") else: print("New model") model = model.to(args.device) for name, param in model.named_parameters(): if 'weight' in name: nn.init.xavier_normal_(param.data) else: nn.init.constant_(param.data, 0) optimizer = optim.Adam(model.parameters(), lr=args.learning_rate) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer=optimizer, mode='min', factor=0.5, patience=1) train_state = make_train_state(args) epoch_bar = tqdm(desc='training routine', total=args.num_epochs, position=0) dataset.set_split('train')
"rb")) print('word vocab', len(word_vocab)) print('char vocab', len(char_vocab)) print('pos vocab', len(pos_vocab)) print('tag vocab', len(tag_vocab)) schema = get_schemas(source_path) # model train_device = torch.device(device if torch.cuda.is_available() else "cpu") model = BiLSTM_CRF(char_init_embed=(len(char_vocab), char_embed_dim), word_init_embed=(len(word_vocab), word_embed_dim), pos_init_embed=(len(pos_vocab), pos_embed_dim), spo_embed_dim=len(schema), sentence_length=seq_len, hidden_size=hidden_dim, num_classes=len(tag_vocab), dropout=dropout, id2words=tag_vocab.idx2word, encoding_type=encoding_type, weight=weight) model.to(train_device) optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay) # train writer = SummaryWriter(log_dir=log_path) train() writer.close()