pairs_batch_dev = DataLoader(dataset=data_dev, batch_size=batch_size, shuffle=True, collate_fn=prepare_data.collate, pin_memory=True) # initialize the model model = NERModel(word_embedding_dim, char_embedding_dim, morph_embedding_dim, word_hidden_size, char_hidden_size, morph_hidden_size, len(char2idx), len(morph2idx), len(tag2idx)+1, word_num_layers, char_num_layers, morph_num_layers, dropout_prob).to(device) model.train() criterion = nn.NLLLoss() optimizer = radam.RAdam(model.parameters(), lr=learning_rate) print(model) total_trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad) print('The number of trainable parameters is: %d' % (total_trainable_params)) # train the model if skip_training == False: train(model, word_num_layers, char_num_layers, morph_num_layers, num_epochs, pairs_batch_train, pairs_batch_dev, word_hidden_size, char_hidden_size, morph_hidden_size, batch_size, criterion, optimizer, patience, device) model.load_state_dict(torch.load('weights/model_lower.pt')) else: model.load_state_dict(torch.load('weights/model_lower.pt'))
def main(): hp = parse_args() # Setup model directories model_name = get_model_name(hp) model_path = path.join(hp.model_dir, model_name) best_model_path = path.join(model_path, 'best_models') if not path.exists(model_path): os.makedirs(model_path) if not path.exists(best_model_path): os.makedirs(best_model_path) # Set random seed torch.manual_seed(hp.seed) # Hacky way of assigning the number of labels. encoder = Encoder( model=hp.model, model_size=hp.model_size, fine_tune=hp.fine_tune, # CASE-PRESERVED!! cased=True) # Load data logging.info("Loading data") train_iter, val_iter, test_iter, num_labels = NERDataset.iters( hp.data_dir, encoder, batch_size=hp.batch_size, eval_batch_size=hp.eval_batch_size, train_frac=hp.train_frac) logging.info("Data loaded") # Initialize the model model = NERModel(encoder, num_labels=num_labels, **vars(hp)).cuda() sys.stdout.flush() if not hp.fine_tune: optimizer = torch.optim.Adam(model.get_other_params(), lr=hp.lr) else: optimizer = torch.optim.Adam(model.parameters(), lr=hp.lr) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', patience=5, factor=0.5, verbose=True) steps_done = 0 max_f1 = 0 init_num_stuck_evals = 0 num_steps = (hp.n_epochs * len(train_iter.data())) // hp.real_batch_size # Quantize the number of training steps to eval steps num_steps = (num_steps // hp.eval_steps) * hp.eval_steps logging.info("Total training steps: %d" % num_steps) location = path.join(model_path, "model.pt") if path.exists(location): logging.info("Loading previous checkpoint") checkpoint = torch.load(location) model.encoder.weighing_params = checkpoint['weighing_params'] if hp.fine_tune: model.encoder.model.load_state_dict(checkpoint['encoder']) model.span_net.load_state_dict(checkpoint['span_net']) model.label_net.load_state_dict(checkpoint['label_net']) optimizer.load_state_dict(checkpoint['optimizer_state_dict']) scheduler.load_state_dict(checkpoint['scheduler_state_dict']) steps_done = checkpoint['steps_done'] init_num_stuck_evals = checkpoint['num_stuck_evals'] max_f1 = checkpoint['max_f1'] torch.set_rng_state(checkpoint['rng_state']) logging.info("Steps done: %d, Max F1: %.3f" % (steps_done, max_f1)) if not hp.eval: train(hp, model, train_iter, val_iter, optimizer, scheduler, model_path, best_model_path, init_steps=steps_done, max_f1=max_f1, eval_steps=hp.eval_steps, num_steps=num_steps, init_num_stuck_evals=init_num_stuck_evals) val_f1, test_f1 = final_eval(hp, model, best_model_path, val_iter, test_iter) perf_dir = path.join(hp.model_dir, "perf") if not path.exists(perf_dir): os.makedirs(perf_dir) if hp.slurm_job_id and hp.slurm_array_id: perf_file = path.join( perf_dir, hp.slurm_job_id + "_" + hp.slurm_array_id + ".txt") else: perf_file = path.join(model_path, "perf.txt") with open(perf_file, "w") as f: f.write("%s\n" % (model_path)) f.write("%s\t%.4f\n" % ("Valid", val_f1)) f.write("%s\t%.4f\n" % ("Test", test_f1))
corpus = read_lines('/eng.txt') datax, datay, tag_to_int = read_corpus(corpus) corpus_test = read_lines('/eng_test.txt') testx, testy, _ = read_corpus(corpus_test) corpus_validate = read_lines('/eng_validate.txt') validatex, validatey, _ = read_corpus(corpus_validate) embed_size = 50 scrf_size = 100 allowed_span_length = 6 epochs = 100 validate_epochs = len(validatex) test_epochs = len(testx) model = NERModel(embed_size, scrf_size, tag_to_int, tag_to_int['<STOP>'], tag_to_int['<START>'], allowed_span_length) optimizer = optim.Adagrad(model.parameters(), lr=0.009) word_dict = gs.Word2Vec(datax + validatex + testx, min_count=1, size=embed_size) data_loader = DataLoader(word_dict, datax, datay, testx, testy, validatex, validatey) train(model, data_loader, optimizer, epochs, validate_epochs) test(model, data_loader, test_epochs)
if tokenizer is not None: print("success") train_dataset = NerDataset(hp.trainset, tokenizer=tokenizer) eval_dataset = NerDataset(hp.validset, tokenizer=tokenizer) train_iter = data.DataLoader(dataset=train_dataset, batch_size=hp.batch_size, shuffle=True, num_workers=4, collate_fn=pad) eval_iter = data.DataLoader(dataset=eval_dataset, batch_size=hp.batch_size, shuffle=False, num_workers=4, collate_fn=pad) optimizer = optim.Adam(model.parameters(), lr=hp.lr) criterion = nn.CrossEntropyLoss(ignore_index=0) for epoch in range(1, hp.n_epochs + 1): train(model, train_iter, optimizer, criterion) print(f"=========eval at epoch={epoch}=========") if not os.path.exists(hp.logdir): os.makedirs(hp.logdir) fname = os.path.join(hp.logdir, str(epoch)) precision, recall, f1 = eval(model, eval_iter, fname) torch.save(model.state_dict(), f"{fname}.pt") print(f"weights were saved to {fname}.pt")
parser.add_argument('--decay_rate', type=float, default=0.05, help='decay rate') parser.add_argument('--plot_interval', type=int, default=2000, help='plot every # steps') args = parser.parse_args() torch.manual_seed(args.seed) # =============== Load device =============== if torch.cuda.is_available(): if not args.cuda: print("WARNING: You have a CUDA device, so you should probably run with --cuda") device = torch.device("cuda" if args.cuda else "cpu") # =============== Load data =============== cleaner = data.Cleaner(args) raw_train_data, raw_dev_data, raw_test_data = cleaner.clean() dataset = data.Dataset(raw_train_data, raw_dev_data, raw_test_data, args) word2idx, tag2idx, char2idx = dataset.word_to_id, dataset.tag_to_id, dataset.char_to_id train_data, dev_data, test_data = dataset.train_data, dataset.dev_data, dataset.test_data print("{} / {} / {} sentences in train / dev / test.".format(len(train_data), len(dev_data), len(test_data))) # =============== Build the model =============== model = NERModel(word2idx, tag2idx, char2idx, args) if args.cuda: model.to(device) print('Model Initialized!!, n_params = {}'.format(sum(p.numel() for p in model.parameters() if p.requires_grad))) # =============== Train the model =============== all_f1, all_acc = create_and_train_model(model, train_data, dev_data, test_data, tag2idx, args) print('f1 = {}'.format(all_f1)) print('acc = {}'.format(all_acc))