示例#1
0
    pairs_batch_dev = DataLoader(dataset=data_dev,
                    batch_size=batch_size,
                    shuffle=True,
                    collate_fn=prepare_data.collate,
                    pin_memory=True)


    # initialize the model
    model = NERModel(word_embedding_dim, char_embedding_dim, morph_embedding_dim, word_hidden_size, char_hidden_size, morph_hidden_size, 
                len(char2idx), len(morph2idx), len(tag2idx)+1, word_num_layers, char_num_layers, morph_num_layers, dropout_prob).to(device)
    model.train()

    criterion = nn.NLLLoss()

    optimizer = radam.RAdam(model.parameters(), lr=learning_rate) 
    print(model)
    
    total_trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    print('The number of trainable parameters is: %d' % (total_trainable_params))



    # train the model
    if skip_training == False:
        train(model, word_num_layers, char_num_layers, morph_num_layers, num_epochs, pairs_batch_train, pairs_batch_dev, word_hidden_size, 
            char_hidden_size, morph_hidden_size, batch_size, criterion, optimizer, patience, device)
        model.load_state_dict(torch.load('weights/model_lower.pt'))
    else:
        model.load_state_dict(torch.load('weights/model_lower.pt'))
示例#2
0
def main():
    hp = parse_args()

    # Setup model directories
    model_name = get_model_name(hp)
    model_path = path.join(hp.model_dir, model_name)
    best_model_path = path.join(model_path, 'best_models')
    if not path.exists(model_path):
        os.makedirs(model_path)
    if not path.exists(best_model_path):
        os.makedirs(best_model_path)

    # Set random seed
    torch.manual_seed(hp.seed)

    # Hacky way of assigning the number of labels.
    encoder = Encoder(
        model=hp.model,
        model_size=hp.model_size,
        fine_tune=hp.fine_tune,
        # CASE-PRESERVED!!
        cased=True)
    # Load data
    logging.info("Loading data")
    train_iter, val_iter, test_iter, num_labels = NERDataset.iters(
        hp.data_dir,
        encoder,
        batch_size=hp.batch_size,
        eval_batch_size=hp.eval_batch_size,
        train_frac=hp.train_frac)
    logging.info("Data loaded")

    # Initialize the model
    model = NERModel(encoder, num_labels=num_labels, **vars(hp)).cuda()
    sys.stdout.flush()

    if not hp.fine_tune:
        optimizer = torch.optim.Adam(model.get_other_params(), lr=hp.lr)
    else:
        optimizer = torch.optim.Adam(model.parameters(), lr=hp.lr)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                           mode='max',
                                                           patience=5,
                                                           factor=0.5,
                                                           verbose=True)
    steps_done = 0
    max_f1 = 0
    init_num_stuck_evals = 0
    num_steps = (hp.n_epochs * len(train_iter.data())) // hp.real_batch_size
    # Quantize the number of training steps to eval steps
    num_steps = (num_steps // hp.eval_steps) * hp.eval_steps
    logging.info("Total training steps: %d" % num_steps)

    location = path.join(model_path, "model.pt")
    if path.exists(location):
        logging.info("Loading previous checkpoint")
        checkpoint = torch.load(location)
        model.encoder.weighing_params = checkpoint['weighing_params']
        if hp.fine_tune:
            model.encoder.model.load_state_dict(checkpoint['encoder'])
        model.span_net.load_state_dict(checkpoint['span_net'])
        model.label_net.load_state_dict(checkpoint['label_net'])
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        scheduler.load_state_dict(checkpoint['scheduler_state_dict'])
        steps_done = checkpoint['steps_done']
        init_num_stuck_evals = checkpoint['num_stuck_evals']
        max_f1 = checkpoint['max_f1']
        torch.set_rng_state(checkpoint['rng_state'])
        logging.info("Steps done: %d, Max F1: %.3f" % (steps_done, max_f1))

    if not hp.eval:
        train(hp,
              model,
              train_iter,
              val_iter,
              optimizer,
              scheduler,
              model_path,
              best_model_path,
              init_steps=steps_done,
              max_f1=max_f1,
              eval_steps=hp.eval_steps,
              num_steps=num_steps,
              init_num_stuck_evals=init_num_stuck_evals)

    val_f1, test_f1 = final_eval(hp, model, best_model_path, val_iter,
                                 test_iter)
    perf_dir = path.join(hp.model_dir, "perf")
    if not path.exists(perf_dir):
        os.makedirs(perf_dir)
    if hp.slurm_job_id and hp.slurm_array_id:
        perf_file = path.join(
            perf_dir, hp.slurm_job_id + "_" + hp.slurm_array_id + ".txt")
    else:
        perf_file = path.join(model_path, "perf.txt")
    with open(perf_file, "w") as f:
        f.write("%s\n" % (model_path))
        f.write("%s\t%.4f\n" % ("Valid", val_f1))
        f.write("%s\t%.4f\n" % ("Test", test_f1))
corpus = read_lines('/eng.txt')
datax, datay, tag_to_int = read_corpus(corpus)

corpus_test = read_lines('/eng_test.txt')
testx, testy, _ = read_corpus(corpus_test)

corpus_validate = read_lines('/eng_validate.txt')
validatex, validatey, _ = read_corpus(corpus_validate)

embed_size = 50
scrf_size = 100
allowed_span_length = 6
epochs = 100
validate_epochs = len(validatex)
test_epochs = len(testx)

model = NERModel(embed_size, scrf_size, tag_to_int, tag_to_int['<STOP>'],
                 tag_to_int['<START>'], allowed_span_length)

optimizer = optim.Adagrad(model.parameters(), lr=0.009)

word_dict = gs.Word2Vec(datax + validatex + testx,
                        min_count=1,
                        size=embed_size)

data_loader = DataLoader(word_dict, datax, datay, testx, testy, validatex,
                         validatey)

train(model, data_loader, optimizer, epochs, validate_epochs)

test(model, data_loader, test_epochs)
示例#4
0
    if tokenizer is not None:
        print("success")

    train_dataset = NerDataset(hp.trainset, tokenizer=tokenizer)
    eval_dataset = NerDataset(hp.validset, tokenizer=tokenizer)

    train_iter = data.DataLoader(dataset=train_dataset,
                                 batch_size=hp.batch_size,
                                 shuffle=True,
                                 num_workers=4,
                                 collate_fn=pad)
    eval_iter = data.DataLoader(dataset=eval_dataset,
                                batch_size=hp.batch_size,
                                shuffle=False,
                                num_workers=4,
                                collate_fn=pad)

    optimizer = optim.Adam(model.parameters(), lr=hp.lr)
    criterion = nn.CrossEntropyLoss(ignore_index=0)

    for epoch in range(1, hp.n_epochs + 1):
        train(model, train_iter, optimizer, criterion)

        print(f"=========eval at epoch={epoch}=========")
        if not os.path.exists(hp.logdir): os.makedirs(hp.logdir)
        fname = os.path.join(hp.logdir, str(epoch))
        precision, recall, f1 = eval(model, eval_iter, fname)

        torch.save(model.state_dict(), f"{fname}.pt")
        print(f"weights were saved to {fname}.pt")
示例#5
0
    parser.add_argument('--decay_rate', type=float, default=0.05, help='decay rate')
    parser.add_argument('--plot_interval', type=int, default=2000, help='plot every # steps')

    args = parser.parse_args()
    torch.manual_seed(args.seed)

    # =============== Load device ===============
    if torch.cuda.is_available():
        if not args.cuda:
            print("WARNING: You have a CUDA device, so you should probably run with --cuda")
    device = torch.device("cuda" if args.cuda else "cpu")

    # =============== Load data ===============
    cleaner = data.Cleaner(args)
    raw_train_data, raw_dev_data, raw_test_data = cleaner.clean()
    dataset = data.Dataset(raw_train_data, raw_dev_data, raw_test_data, args)
    word2idx, tag2idx, char2idx = dataset.word_to_id, dataset.tag_to_id, dataset.char_to_id
    train_data, dev_data, test_data = dataset.train_data, dataset.dev_data, dataset.test_data
    print("{} / {} / {} sentences in train / dev / test.".format(len(train_data), len(dev_data), len(test_data)))

    # =============== Build the model ===============
    model = NERModel(word2idx, tag2idx, char2idx, args)
    if args.cuda:
        model.to(device)
    print('Model Initialized!!, n_params = {}'.format(sum(p.numel() for p in model.parameters() if p.requires_grad)))

    # =============== Train the model ===============
    all_f1, all_acc = create_and_train_model(model, train_data, dev_data, test_data, tag2idx, args)
    print('f1 = {}'.format(all_f1))
    print('acc = {}'.format(all_acc))