Python NNCRF示例，model.lstmcrf.NNCRF Python示例

示例#1

0

显示文件

文件： main.py 项目： oguzhankemal/pytorch_lstmcrf

def evaluate_model(config: Config, model: NNCRF, batch_insts_ids, name: str,
                   insts: List[Instance]):
    ## evaluation
    metrics = np.asarray([0, 0, 0], dtype=int)
    batch_id = 0
    batch_size = config.batch_size
    for batch in batch_insts_ids:
        one_batch_insts = insts[batch_id * batch_size:(batch_id + 1) *
                                batch_size]
        sorted_batch_insts = sorted(one_batch_insts,
                                    key=lambda inst: len(inst.input.words),
                                    reverse=True)
        batch_max_scores, batch_max_ids = model.decode(batch)
        metrics += eval.evaluate_num(sorted_batch_insts, batch_max_ids,
                                     batch[-1], batch[1], config.idx2labels)
        batch_id += 1
    p, total_predict, total_entity = metrics[0], metrics[1], metrics[2]
    precision = p * 1.0 / total_predict * 100 if total_predict != 0 else 0
    recall = p * 1.0 / total_entity * 100 if total_entity != 0 else 0
    fscore = 2.0 * precision * recall / (
        precision + recall) if precision != 0 or recall != 0 else 0
    print("[%s set] Precision: %.2f, Recall: %.2f, F1: %.2f" %
          (name, precision, recall, fscore),
          flush=True)
    return [precision, recall, fscore]

示例#2

0

显示文件

def test_model(config: Config, test_insts):
    dep_model_name = config.dep_model.name
    if config.dep_model == DepModelType.dggcn:
        dep_model_name += '(' + str(config.num_gcn_layers) + ","+str(config.gcn_dropout)+ ","+str(config.gcn_mlp_layers)+")"
    model_name = "model_files/lstm_{}_{}_crf_{}_{}_{}_dep_{}_elmo_{}_{}_gate_{}_base_{}_epoch_{}_lr_{}_doubledep_{}_comb_{}.m".format(config.num_lstm_layer, config.hidden_dim,
                                                                                                                                      config.dataset, config.affix,
                                                                                                                                      config.train_num,
                                                                                                                                      dep_model_name,
                                                                                                                                      config.context_emb.name,
                                                                                                                                      config.optimizer.lower(),
                                                                                                                                      config.edge_gate,
                                                                                                                                      config.num_base,
                                                                                                                                      config.num_epochs,
                                                                                                                                      config.learning_rate, config.double_dep_label, config.interaction_func)
    res_name = "results/lstm_{}_{}_crf_{}_{}_{}_dep_{}_elmo_{}_{}_gate_{}_base_{}_epoch_{}_lr_{}_doubledep_{}_comb_{}.results".format(config.num_lstm_layer, config.hidden_dim,
                                                                                                                                      config.dataset, config.affix,
                                                                                                                                      config.train_num,
                                                                                                                                      dep_model_name,
                                                                                                                                      config.context_emb.name,
                                                                                                                                      config.optimizer.lower(),
                                                                                                                                      config.edge_gate,
                                                                                                                                      config.num_base,
                                                                                                                                      config.num_epochs,
                                                                                                                                      config.learning_rate, config.double_dep_label, config.interaction_func)
    model = NNCRF(config)
    model.load_state_dict(torch.load(model_name))
    model.eval()
    test_batches = batching_list_instances(config, test_insts)
    evaluate(config, model, test_batches, "test", test_insts)
    write_results(res_name, test_insts)

示例#3

0

显示文件

文件： mt_lstmcrf.py 项目： ShiKenlp/Multi_Task_NER

 def __init__(self, config_base, config_conll, config_ontonotes):
     super(MT_LSTMCRF, self).__init__()
     self.config_base = config_base
     self.config_conll = config_conll
     self.config_ontonotes = config_ontonotes
     self.lstmcrf_base = NNCRF(config_base)
     self.lstmcrf_conll = NNCRF(config_conll)
     self.lstmcrf_ontonotes = NNCRF(config_ontonotes)

示例#4

0

显示文件

文件： main.py 项目： oguzhankemal/pytorch_lstmcrf

def test_model(config: Config, test_insts):
    model_name = "model_files/lstm_{}_crf_{}_{}_dep_{}_elmo_{}_lr_{}.m".format(
        config.hidden_dim,
        config.dataset, config.train_num, config.context_emb.name,
        config.optimizer.lower(), config.learning_rate)
    res_name = "results/lstm_{}_crf_{}_{}_dep_{}_elmo_{}_lr_{}.results".format(
        config.hidden_dim,
        config.dataset, config.train_num, config.context_emb.name,
        config.optimizer.lower(), config.learning_rate)

    model = NNCRF(config)
    model.load_state_dict(torch.load(model_name))
    model.eval()
    test_batches = batching_list_instances(config, test_insts)
    evaluate_model(config, model, test_batches, "test", test_insts)
    write_results(res_name, test_insts)

示例#5

0

显示文件

文件： main.py 项目： xuuuluuu/SynLSTM-for-NER

def test_model(config: Config, test_insts):
    dep_model_name = config.dep_model.name
    if config.dep_model == DepModelType.dggcn:
        dep_model_name += '(' + str(config.num_gcn_layers) + "," + str(
            config.gcn_dropout) + "," + str(config.gcn_mlp_layers) + ")"
    model_name = "model_files/gcn_{}_hidden_{}_dataset_{}_{}_context_{}.m".format(
        config.num_gcn_layers, config.hidden_dim, config.dataset, config.affix,
        config.context_emb.name)
    res_name = "results/gcn_{}_hidden_{}_dataset_{}_{}_context_{}.results".format(
        config.num_gcn_layers, config.hidden_dim, config.dataset, config.affix,
        config.context_emb.name)
    model = NNCRF(config)
    model.load_state_dict(torch.load(model_name))
    model.eval()
    test_batches = batching_list_instances(config, test_insts)
    evaluate(config, model, test_batches, "test", test_insts)
    write_results(res_name, test_insts)

示例#6

0

显示文件

文件： mt_lstmcrf.py 项目： ShiKenlp/Multi_Task_NER

class MT_LSTMCRF(nn.Module):
    def __init__(self, config_base, config_conll, config_ontonotes):
        super(MT_LSTMCRF, self).__init__()
        self.config_base = config_base
        self.config_conll = config_conll
        self.config_ontonotes = config_ontonotes
        self.lstmcrf_base = NNCRF(config_base)
        self.lstmcrf_conll = NNCRF(config_conll)
        self.lstmcrf_ontonotes = NNCRF(config_ontonotes)

    def neg_log_obj_total(self, words, word_seq_lens, batch_context_emb, chars,
                          char_seq_lens, prefix_label, conll_label,
                          notes_label, mask_base, mask_conll, mask_ontonotes):
        loss_base, hiddens_base = self.lstmcrf_base.neg_log_obj(
            words, word_seq_lens, batch_context_emb, chars, char_seq_lens,
            prefix_label, mask_base)
        # hidden_base = w1 * h1
        loss_conll, _ = self.lstmcrf_conll.neg_log_obj(words, word_seq_lens,
                                                       batch_context_emb,
                                                       chars, char_seq_lens,
                                                       conll_label, mask_conll,
                                                       hiddens_base)
        loss_ontonotes, _ = self.lstmcrf_ontonotes.neg_log_obj(
            words, word_seq_lens, batch_context_emb, chars, char_seq_lens,
            notes_label, mask_ontonotes, hiddens_base)
        loss_total = loss_base + loss_conll + loss_ontonotes
        # loss_total = loss_ontonotes
        return loss_total

    def decode(self, batchinput):
        words, word_seq_lens, batch_context_emb, chars, char_seq_lens, prefix_label, conll_label, notes_label, mask_base, mask_conll, mask_ontonotes = batchinput
        _, hiddens_base = self.lstmcrf_base.neg_log_obj(
            words, word_seq_lens, batch_context_emb, chars, char_seq_lens,
            prefix_label, mask_base)
        bestScores_conll, decodeIdx_conll = self.lstmcrf_conll.decode(
            batchinput, hiddens_base)
        bestScores_notes, decodeIdx_notes = self.lstmcrf_ontonotes.decode(
            batchinput, hiddens_base)

        return bestScores_conll, decodeIdx_conll, bestScores_notes, decodeIdx_notes, mask_conll, mask_ontonotes

示例#7

0

显示文件

文件： main.py 项目： oguzhankemal/pytorch_lstmcrf

def learn_from_insts(config: Config, epoch: int, train_insts, dev_insts,
                     test_insts):
    # train_insts: List[Instance], dev_insts: List[Instance], test_insts: List[Instance], batch_size: int = 1
    model = NNCRF(config)
    optimizer = get_optimizer(config, model)
    train_num = len(train_insts)
    print("number of instances: %d" % (train_num))
    print(colored("[Shuffled] Shuffle the training instance ids", "red"))
    random.shuffle(train_insts)

    batched_data = batching_list_instances(config, train_insts)
    dev_batches = batching_list_instances(config, dev_insts)
    test_batches = batching_list_instances(config, test_insts)

    best_dev = [-1, 0]
    best_test = [-1, 0]

    model_folder = "model_files"
    res_folder = "results"
    model_name = model_folder + "/lstm_{}_crf_{}_{}_dep_{}_elmo_{}_lr_{}.m".format(
        config.hidden_dim,
        config.dataset, config.train_num, config.context_emb.name,
        config.optimizer.lower(), config.learning_rate)
    res_name = res_folder + "/lstm_{}_crf_{}_{}_dep_{}_elmo_{}_lr_{}.results".format(
        config.hidden_dim,
        config.dataset, config.train_num, config.context_emb.name,
        config.optimizer.lower(), config.learning_rate)
    print("[Info] The model will be saved to: %s" % (model_name))
    if not os.path.exists(model_folder):
        os.makedirs(model_folder)
    if not os.path.exists(res_folder):
        os.makedirs(res_folder)

    for i in range(1, epoch + 1):
        epoch_loss = 0
        start_time = time.time()
        model.zero_grad()
        if config.optimizer.lower() == "sgd":
            optimizer = lr_decay(config, optimizer, i)
        for index in np.random.permutation(len(batched_data)):
            # for index in range(len(batched_data)):
            model.train()
            batch_word, batch_wordlen, batch_context_emb, batch_char, batch_charlen, batch_label = batched_data[
                index]
            loss = model.neg_log_obj(batch_word, batch_wordlen,
                                     batch_context_emb, batch_char,
                                     batch_charlen, batch_label)
            epoch_loss += loss.item()
            loss.backward()
            # # torch.nn.utils.clip_grad_norm_(model.parameters(), config.clip) ##clipping the gradient
            optimizer.step()
            model.zero_grad()

        end_time = time.time()
        print("Epoch %d: %.5f, Time is %.2fs" %
              (i, epoch_loss, end_time - start_time),
              flush=True)

        model.eval()
        dev_metrics = evaluate_model(config, model, dev_batches, "dev",
                                     dev_insts)
        test_metrics = evaluate_model(config, model, test_batches, "test",
                                      test_insts)
        if dev_metrics[2] > best_dev[0]:
            print("saving the best model...")
            best_dev[0] = dev_metrics[2]
            best_dev[1] = i
            best_test[0] = test_metrics[2]
            best_test[1] = i
            torch.save(model.state_dict(), model_name)
            write_results(res_name, test_insts)
        model.zero_grad()

    print("The best dev: %.2f" % (best_dev[0]))
    print("The corresponding test: %.2f" % (best_test[0]))
    print("Final testing.")
    model.load_state_dict(torch.load(model_name))
    model.eval()
    evaluate_model(config, model, test_batches, "test", test_insts)
    write_results(res_name, test_insts)

示例#8

0

显示文件

文件： main.py 项目： xuuuluuu/SynLSTM-for-NER

def learn_from_insts(config: Config, epoch: int, train_insts, dev_insts,
                     test_insts):
    # train_insts: List[Instance], dev_insts: List[Instance], test_insts: List[Instance], batch_size: int = 1
    model = NNCRF(config)
    optimizer = get_optimizer(config, model)
    train_num = len(train_insts)
    print("number of instances: %d" % (train_num))
    print(colored("[Shuffled] Shuffle the training instance ids", "red"))
    random.shuffle(train_insts)

    batched_data = batching_list_instances(config, train_insts)
    dev_batches = batching_list_instances(config, dev_insts)
    test_batches = batching_list_instances(config, test_insts)

    best_dev = [-1, 0]
    best_test = [-1, 0]

    dep_model_name = config.dep_model.name
    if config.dep_model == DepModelType.dggcn:
        dep_model_name += '(' + str(config.num_gcn_layers) + "," + str(
            config.gcn_dropout) + "," + str(config.gcn_mlp_layers) + ")"
    model_name = "model_files/gcn_{}_hidden_{}_dataset_{}_{}_context_{}.m".format(
        config.num_gcn_layers, config.hidden_dim, config.dataset, config.affix,
        config.context_emb.name)
    res_name = "results/gcn_{}_hidden_{}_dataset_{}_{}_context_{}.results".format(
        config.num_gcn_layers, config.hidden_dim, config.dataset, config.affix,
        config.context_emb.name)
    print(
        "[Info] The model will be saved to: %s, please ensure models folder exist"
        % (model_name))
    if not os.path.exists("model_files"):
        os.makedirs("model_files")
    if not os.path.exists("results"):
        os.makedirs("results")

    for i in range(1, epoch + 1):
        epoch_loss = 0
        start_time = time.time()
        model.zero_grad()
        if config.optimizer.lower() == "sgd":
            optimizer = lr_decay(config, optimizer, i)
        for index in np.random.permutation(len(batched_data)):
            # for index in range(len(batched_data)):
            model.train()
            # optimizer.zero_grad()

            batch_word, batch_wordlen, batch_context_emb, batch_char, batch_charlen, adj_matrixs, adjs_in, adjs_out, graphs, dep_label_adj, batch_dep_heads, trees, batch_label, batch_dep_label, batch_poslabel = batched_data[
                index]
            loss = model.neg_log_obj(batch_word, batch_wordlen,
                                     batch_context_emb, batch_char,
                                     batch_charlen, adj_matrixs, adjs_in,
                                     adjs_out, graphs, dep_label_adj,
                                     batch_dep_heads, batch_label,
                                     batch_dep_label, batch_poslabel, trees)
            epoch_loss += loss.item()
            loss.backward()
            if config.dep_model == DepModelType.dggcn:
                torch.nn.utils.clip_grad_norm_(
                    model.parameters(), config.clip)  ##clipping the gradient
            optimizer.step()
            model.zero_grad()

        end_time = time.time()
        print("Epoch %d: %.5f, Time is %.2fs" %
              (i, epoch_loss, end_time - start_time),
              flush=True)

        if i + 1 >= config.eval_epoch:
            model.eval()
            dev_metrics = evaluate(config, model, dev_batches, "dev",
                                   dev_insts)
            if dev_metrics[2] > best_dev[0]:
                test_metrics = evaluate(config, model, test_batches, "test",
                                        test_insts)
                print("saving the best model...")
                best_dev[0] = dev_metrics[2]
                best_dev[1] = i
                best_test[0] = test_metrics[2]
                best_test[1] = i
                torch.save(model.state_dict(), model_name)
                write_results(res_name, test_insts)
            model.zero_grad()

    print("The best dev: %.2f" % (best_dev[0]))
    print("The corresponding test: %.2f" % (best_test[0]))
    print("Final testing.")
    model.load_state_dict(torch.load(model_name))
    model.eval()
    evaluate(config, model, test_batches, "test", test_insts)
    write_results(res_name, test_insts)

示例#9

0

显示文件

def learn_from_insts(config:Config, epoch: int, train_insts, dev_insts, test_insts):
    # train_insts: List[Instance], dev_insts: List[Instance], test_insts: List[Instance], batch_size: int = 1
    if config.pretrain_dep:
        model_path = f"model_files/{config.pdep_model}/{config.pdep_model}.tar.gz"
        predictor = Predictor(model_path)
        model = NNCRF(config, pretrained_dep_model=predictor.model)
    else:
        model = NNCRF(config)
    optimizer = get_optimizer(config, model)
    train_num = len(train_insts)
    print("number of instances: %d" % (train_num))
    print(colored("[Shuffled] Shuffle the training instance ids", "red"))
    random.shuffle(train_insts)



    batched_data = batching_list_instances(config, train_insts)
    dev_batches = batching_list_instances(config, dev_insts)
    test_batches = batching_list_instances(config, test_insts)

    best_dev = [-1, 0]
    best_test = [-1, 0]

    dep_model_name = config.dep_model.name
    if config.dep_model == DepModelType.dggcn:
        dep_model_name += '(' + str(config.num_gcn_layers) + "," + str(config.gcn_dropout) + "," + str(
            config.gcn_mlp_layers) + ")"
    model_folder = config.model_folder
    res_folder = "results"
    model_path = f"model_files/{model_folder}/gnn.pt"
    config_path = f"model_files/{model_folder}/config.conf"
    res_path = f"{res_folder}/{model_folder}.res"
    os.makedirs(f"model_files/{model_folder}", exist_ok=True)  ## create model files. not raise error if exist
    os.makedirs(res_folder, exist_ok=True)
    print(f"[Info] The model will be saved to the directory: model_files/{model_folder}")

    for i in range(1, epoch + 1):
        epoch_loss = 0
        start_time = time.time()
        model.zero_grad()
        if config.optimizer.lower() == "sgd":
            optimizer = lr_decay(config, optimizer, i)
        for index in np.random.permutation(len(batched_data)):
        # for index in range(len(batched_data)):
            model.train()
            batch_word, batch_wordlen, batch_context_emb, batch_char, batch_charlen, adj_matrixs, adjs_in, adjs_out, graphs, dep_label_adj, batch_dep_heads, trees, batch_label, batch_dep_label = batched_data[index]
            loss = model.neg_log_obj(batch_word, batch_wordlen, batch_context_emb,batch_char, batch_charlen, adj_matrixs, adjs_in, adjs_out, graphs, dep_label_adj, batch_dep_heads, batch_label, batch_dep_label, trees)
            epoch_loss += loss.item()
            loss.backward()
            if config.dep_model == DepModelType.dggcn:
                torch.nn.utils.clip_grad_norm_(model.parameters(), config.clip) ##clipping the gradient
            optimizer.step()
            model.zero_grad()

        end_time = time.time()
        print("Epoch %d: %.5f, Time is %.2fs" % (i, epoch_loss, end_time - start_time), flush=True)

        if i + 1 >= config.eval_epoch:
            model.eval()
            dev_metrics = evaluate(config, model, dev_batches, "dev", dev_insts)
            test_metrics = evaluate(config, model, test_batches, "test", test_insts)
            if dev_metrics[2] > best_dev[0]:
                print("saving the best model...")
                best_dev[0] = dev_metrics[2]
                best_dev[1] = i
                best_test[0] = test_metrics[2]
                best_test[1] = i
                torch.save(model.state_dict(), model_path)
                write_results(res_path, test_insts)
            model.zero_grad()

    print("Archiving the best Model...")
    with tarfile.open(f"model_files/{model_folder}/{model_folder}.tar.gz", "w:gz") as tar:
        tar.add(f"model_files/{model_folder}", arcname=os.path.basename(model_folder))
    print("Finished archiving the models")

    print("The best dev: %.2f" % (best_dev[0]))
    print("The corresponding test: %.2f" % (best_test[0]))
    print("Final testing.")
    model.load_state_dict(torch.load(model_path))
    model.eval()
    evaluate(config, model, test_batches, "test", test_insts)
    write_results(res_path, test_insts)