def train_one(config: Config, train_insts: List[Instance], dev_insts: List[Instance], model_name: str, test_insts: List[Instance] = None, config_name: str = None, result_filename: str = None) -> NNCRF: train_batches = batching_list_instances(config, train_insts) dev_batches = batching_list_instances(config, dev_insts) if test_insts: test_batches = simple_batching(config, test_insts) else: test_batches = None model = NNCRF(config) model.train() optimizer = get_optimizer(config, model) epoch = config.num_epochs best_dev_f1 = -1 saved_test_metrics = None for i in range(1, epoch + 1): epoch_loss = 0 start_time = time.time() model.zero_grad() if config.optimizer.lower() == "sgd": optimizer = lr_decay(config, optimizer, i) for index in np.random.permutation(len(train_batches)): model.train() loss = model(*train_batches[index]) epoch_loss += loss.item() loss.backward() optimizer.step() model.zero_grad() end_time = time.time() print("Epoch %d: %.5f, Time is %.2fs" % (i, epoch_loss, end_time - start_time), flush=True) model.eval() # metric is [precision, recall, f_score] dev_metrics = evaluate_model(config, model, "dev", dev_insts) if test_insts is not None: test_metrics = evaluate_model(config, model, "test", test_insts) if dev_metrics[2] > best_dev_f1: print("saving the best model...") best_dev_f1 = dev_metrics[2] if test_insts is not None: saved_test_metrics = test_metrics torch.save(model.state_dict(), model_name) # # Save the corresponding config as well. if config_name: f = open(config_name, 'wb') pickle.dump(config, f) f.close() if result_filename: write_results(result_filename, test_insts) model.zero_grad() if test_insts is not None: print(f"The best dev F1: {best_dev_f1}") print(f"The corresponding test: {saved_test_metrics}") return model
# Sorted by scores, desc task_ids, scores = get_task_ids_and_scores(connect_str_or_path, USE_SQLITE, population_id) print_with_time("Exploiting interval %s. Best score: %.2f" % (intervals_trained_col[0]-1, max(scores))) seed_for_shuffling = np.random.randint(10**5) fraction = 0.20 cutoff = int(np.ceil(fraction * len(task_ids))) top_ids = task_ids[:cutoff] bottom_ids = task_ids[len(task_ids)-cutoff:] nonbottom_ids = task_ids[:len(task_ids)-cutoff] for bottom_id in bottom_ids: top_id = np.random.choice(top_ids) model = MODEL_CLASS() optimizer = get_optimizer(model) top_trainer = Trainer(model=model, optimizer=optimizer) top_checkpoint_path = (checkpoint_str % (population_id, top_id)) top_trainer.load_checkpoint(top_checkpoint_path) model = MODEL_CLASS() optimizer = get_optimizer(model) bot_trainer = Trainer(model=model, optimizer=optimizer) bot_checkpoint_path = (checkpoint_str % (population_id, bottom_id)) bot_trainer.exploit_and_explore(top_trainer, HYPERPARAM_NAMES) bot_trainer.save_checkpoint(bot_checkpoint_path) key_value_pairs = dict(
def train_model(config: Config, epoch: int, train_insts: List[Instance], dev_insts: List[Instance], test_insts: List[Instance]): ### Data Processing Info train_num = len(train_insts) print("number of instances: %d" % (train_num)) print(colored("[Shuffled] Shuffle the training instance ids", "red")) random.shuffle(train_insts) batched_data = batching_list_instances(config, train_insts) dev_batches = batching_list_instances(config, dev_insts) test_batches = batching_list_instances(config, test_insts) if config.embedder_type == "normal": model = NNCRF(config) optimizer = get_optimizer(config, model) scheduler = None else: print( colored( f"[Model Info]: Working with transformers package from huggingface with {config.embedder_type}", 'red')) print( colored( f"[Optimizer Info]: You should be aware that you are using the optimizer from huggingface.", 'red')) print( colored( f"[Optimizer Info]: Change the optimier in transformers_util.py if you want to make some modifications.", 'red')) model = TransformersCRF(config) optimizer, scheduler = get_huggingface_optimizer_and_scheduler( config, model, num_training_steps=len(batched_data) * epoch, weight_decay=0.0, eps=1e-8, warmup_step=0) print( colored(f"[Optimizer Info] Modify the optimizer info as you need.", 'red')) print(optimizer) model.to(config.device) best_dev = [-1, 0] best_test = [-1, 0] model_folder = config.model_folder res_folder = "results" if os.path.exists("model_files/" + model_folder): raise FileExistsError( f"The folder model_files/{model_folder} exists. Please either delete it or create a new one " f"to avoid override.") model_path = f"model_files/{model_folder}/lstm_crf.m" config_path = f"model_files/{model_folder}/config.conf" res_path = f"{res_folder}/{model_folder}.results" print("[Info] The model will be saved to: %s.tar.gz" % (model_folder)) os.makedirs(f"model_files/{model_folder}", exist_ok=True) ## create model files. not raise error if exist os.makedirs(res_folder, exist_ok=True) no_incre_dev = 0 print( colored( f"[Train Info] Start training, you have set to stop if performace not increase for {config.max_no_incre} epochs", 'red')) for i in tqdm(range(1, epoch + 1), desc="Epoch"): epoch_loss = 0 start_time = time.time() model.zero_grad() if config.optimizer.lower() == "sgd": optimizer = lr_decay(config, optimizer, i) for index in tqdm(np.random.permutation(len(batched_data)), desc="--training batch", total=len(batched_data)): model.train() loss = model(**batched_data[index]) epoch_loss += loss.item() loss.backward() if config.max_grad_norm > 0: torch.nn.utils.clip_grad_norm_(model.parameters(), config.max_grad_norm) optimizer.step() optimizer.zero_grad() model.zero_grad() if scheduler is not None: scheduler.step() end_time = time.time() print("Epoch %d: %.5f, Time is %.2fs" % (i, epoch_loss, end_time - start_time), flush=True) model.eval() dev_metrics = evaluate_model(config, model, dev_batches, "dev", dev_insts) test_metrics = evaluate_model(config, model, test_batches, "test", test_insts) if dev_metrics[2] > best_dev[0]: print("saving the best model...") no_incre_dev = 0 best_dev[0] = dev_metrics[2] best_dev[1] = i best_test[0] = test_metrics[2] best_test[1] = i torch.save(model.state_dict(), model_path) # Save the corresponding config as well. f = open(config_path, 'wb') pickle.dump(config, f) f.close() write_results(res_path, test_insts) else: no_incre_dev += 1 model.zero_grad() if no_incre_dev >= config.max_no_incre: print( "early stop because there are %d epochs not increasing f1 on dev" % no_incre_dev) break print("Archiving the best Model...") with tarfile.open(f"model_files/{model_folder}/{model_folder}.tar.gz", "w:gz") as tar: tar.add(f"model_files/{model_folder}", arcname=os.path.basename(model_folder)) print("Finished archiving the models") print("The best dev: %.2f" % (best_dev[0])) print("The corresponding test: %.2f" % (best_test[0])) print("Final testing.") model.load_state_dict(torch.load(model_path)) model.eval() evaluate_model(config, model, test_batches, "test", test_insts) write_results(res_path, test_insts)
def train_model(config: Config, epoch: int, train_insts: List[Instance], dev_insts: List[Instance], test_insts: List[Instance]): model = NNCRF(config) optimizer = get_optimizer(config, model) train_num = len(train_insts) print("number of instances: %d" % (train_num)) print(colored("[Shuffled] Shuffle the training instance ids", "red")) random.shuffle(train_insts) batched_data = batching_list_instances(config, train_insts) dev_batches = batching_list_instances(config, dev_insts) test_batches = batching_list_instances(config, test_insts) best_dev = [-1, 0] best_test = [-1, 0] model_folder = config.model_folder res_folder = "results" if os.path.exists(model_folder): raise FileExistsError( f"The folder {model_folder} exists. Please either delete it or create a new one " f"to avoid override.") model_name = model_folder + "/lstm_crf.m".format() config_name = model_folder + "/config.conf" res_name = res_folder + "/lstm_crf.results".format() print("[Info] The model will be saved to: %s.tar.gz" % (model_folder)) if not os.path.exists(model_folder): os.makedirs(model_folder) if not os.path.exists(res_folder): os.makedirs(res_folder) for i in range(1, epoch + 1): epoch_loss = 0 start_time = time.time() model.zero_grad() if config.optimizer.lower() == "sgd": optimizer = lr_decay(config, optimizer, i) for index in np.random.permutation(len(batched_data)): model.train() loss = model(*batched_data[index]) epoch_loss += loss.item() loss.backward() optimizer.step() model.zero_grad() loss.detach() end_time = time.time() print("Epoch %d: %.5f, Time is %.2fs" % (i, epoch_loss, end_time - start_time), flush=True) model.eval() dev_metrics = evaluate_model(config, model, dev_batches, "dev", dev_insts) test_metrics = evaluate_model(config, model, test_batches, "test", test_insts) if test_metrics[1][2] > best_test[0]: print("saving the best model...") best_dev[0] = dev_metrics[1][2] best_dev[1] = i best_test[0] = test_metrics[1][2] best_test[1] = i torch.save(model.state_dict(), model_name) # Save the corresponding config as well. f = open(config_name, 'wb') pickle.dump(config, f) f.close() print('Exact\n') print_report(test_metrics[-2]) print('Overlap\n') print_report(test_metrics[-1]) write_results(res_name, test_insts) print("Archiving the best Model...") with tarfile.open(model_folder + "/" + model_folder + ".tar.gz", "w:gz") as tar: tar.add(model_folder, arcname=os.path.basename(model_folder)) model.zero_grad() print("Finished archiving the models") print("The best dev: %.2f" % (best_dev[0])) print("The corresponding test: %.2f" % (best_test[0])) print("Final testing.") model.load_state_dict(torch.load(model_name)) model.eval() evaluate_model(config, model, test_batches, "test", test_insts) write_results(res_name, test_insts)
def train_model(config: Config, epoch: int, train_insts: List[Instance], dev_insts: List[Instance], test_insts: List[Instance]): model = NNCRF(config) optimizer = get_optimizer(config, model) train_num = len(train_insts) print("number of instances: %d" % (train_num)) print(colored("[Shuffled] Shuffle the training instance ids", "red")) random.shuffle(train_insts) batched_data = batching_list_instances(config, train_insts) dev_batches = batching_list_instances(config, dev_insts) test_batches = batching_list_instances(config, test_insts) best_dev = [-1, 0] best_test = [-1, 0] model_folder = config.model_folder res_folder = "results" if os.path.exists("model_files/" + model_folder): raise FileExistsError( f"The folder model_files/{model_folder} exists. Please either delete it or create a new one " f"to avoid override.") model_path = f"model_files/{model_folder}/lstm_crf.m" config_path = f"model_files/{model_folder}/config.conf" res_path = f"{res_folder}/{model_folder}.results" print("[Info] The model will be saved to: %s.tar.gz" % (model_folder)) os.makedirs(f"model_files/{model_folder}", exist_ok=True) ## create model files. not raise error if exist os.makedirs(res_folder, exist_ok=True) no_incre_dev = 0 for i in tqdm(range(1, epoch + 1), desc="Epoch"): epoch_loss = 0 start_time = time.time() model.zero_grad() if config.optimizer.lower() == "sgd": optimizer = lr_decay(config, optimizer, i) for index in tqdm(np.random.permutation(len(batched_data)), desc="--training batch", total=len(batched_data)): model.train() loss = model(*batched_data[index]) epoch_loss += loss.item() loss.backward() optimizer.step() model.zero_grad() end_time = time.time() print("Epoch %d: %.5f, Time is %.2fs" % (i, epoch_loss, end_time - start_time), flush=True) model.eval() dev_metrics = evaluate_model(config, model, dev_batches, "dev", dev_insts) test_metrics = evaluate_model(config, model, test_batches, "test", test_insts) if dev_metrics[2] > best_dev[0]: print("saving the best model...") no_incre_dev = 0 best_dev[0] = dev_metrics[2] best_dev[1] = i best_test[0] = test_metrics[2] best_test[1] = i torch.save(model.state_dict(), model_path) # Save the corresponding config as well. f = open(config_path, 'wb') pickle.dump(config, f) f.close() write_results(res_path, test_insts) else: no_incre_dev += 1 model.zero_grad() if no_incre_dev >= config.max_no_incre: print( "early stop because there are %d epochs not increasing f1 on dev" % no_incre_dev) break print("Archiving the best Model...") with tarfile.open(f"model_files/{model_folder}/{model_folder}.tar.gz", "w:gz") as tar: tar.add(f"model_files/{model_folder}", arcname=os.path.basename(model_folder)) print("Finished archiving the models") print("The best dev: %.2f" % (best_dev[0])) print("The corresponding test: %.2f" % (best_test[0])) print("Final testing.") model.load_state_dict(torch.load(model_path)) model.eval() evaluate_model(config, model, test_batches, "test", test_insts) write_results(res_path, test_insts)
def train_one(config: Config, train_batches: List[Tuple], dev_insts: List[Instance], dev_batches: List[Tuple], model_name: str, test_insts: List[Instance] = None, test_batches: List[Tuple] = None, config_name: str = None, result_filename: str = None, rate_schedule_neg=None, rate_schedule_pos=None) -> NNCRF_sl: model = NNCRF_sl(config) model.train() optimizer = get_optimizer(config, model) epoch = config.num_epochs best_dev_f1 = -1 saved_test_metrics = None for i in range(1, epoch + 1): ratios_sum = [0] * 6 forget_rate_neg = rate_schedule_neg[i - 1] forget_rate_pos = rate_schedule_pos[i - 1] epoch_loss = 0 epoch_loss_neg = 0 epoch_loss_pos = 0 start_time = time.time() model.zero_grad() if config.optimizer.lower() == "sgd": optimizer = lr_decay(config, optimizer, i) is_constrain = config.is_constrain for index in np.random.permutation(len(train_batches)): model.train() tmp = tuple( list(train_batches[index]) + [forget_rate_neg, forget_rate_pos, is_constrain]) loss, ratios, loss_neg, loss_pos = model(*tmp) ratios_sum = [ ratios_sum[i] + ratios[i] for i in range(len(ratios)) ] epoch_loss += loss.item() epoch_loss_neg += loss_neg.item() epoch_loss_pos += loss_pos.item() loss.backward() optimizer.step() model.zero_grad() end_time = time.time() print("Epoch %d: %.5f, Time is %.2fs" % (i, epoch_loss / epoch, end_time - start_time), flush=True) print('avg neg NLL: ' + str(epoch_loss_neg / epoch) + ' avg pos NLL: ' + str(epoch_loss_pos / epoch)) model.eval() # metric is [precision, recall, f_score] dev_metrics = evaluate_model(config, model, dev_batches, "dev", dev_insts) if test_insts is not None: test_metrics = evaluate_model(config, model, test_batches, "test", test_insts) if dev_metrics[2] > best_dev_f1: print("saving the best model..." + ' epoch' + str(i)) best_dev_f1 = dev_metrics[2] if test_insts is not None: saved_test_metrics = test_metrics torch.save(model.state_dict(), model_name) # # Save the corresponding config as well. if config_name: f = open(config_name, 'wb') pickle.dump(config, f) f.close() if result_filename: write_results(result_filename, test_insts) model.zero_grad() if test_insts is not None: print(f"The best dev F1: {best_dev_f1}") print(f"The corresponding test: {saved_test_metrics}") return model
def train_one( config: Config, train_batches: List[Tuple], dev_insts: List[Instance], dev_batches: List[Tuple], model_name: str, test_insts: List[Instance] = None, test_batches: List[Tuple] = None, config_name: str = None, result_filename: str = None, ) -> NNCRF: model = NNCRF(config) model.train() optimizer = get_optimizer(config, model) lr_scheduler = SlantedTriangular(optimizer, config.num_epochs, num_steps_per_epoch=len(train_batches), ratio=16) epoch = config.num_epochs best_dev_f1 = -1 saved_test_metrics = None for i in range(1, epoch + 1): epoch_loss = 0 start_time = time.time() model.zero_grad() # if config.optimizer.lower() == "sgd": # optimizer = lr_decay(config, optimizer, i) lr_scheduler.step(epoch=i) for index in tqdm(np.random.permutation(len(train_batches)), f"Training epoch {i}", len(train_batches)): model.train() loss = model(*train_batches[index]) epoch_loss += loss.item() # print(f"Batch loss: {loss.item()}") loss.backward() optimizer.step() model.zero_grad() lr_scheduler.step_batch() end_time = time.time() print("Epoch %d: %.5f, Time is %.2fs" % (i, epoch_loss / len(train_batches), end_time - start_time), flush=True) model.eval() # metric is [precision, recall, f_score] dev_metrics = evaluate_model(config, model, dev_batches, "dev", dev_insts) if test_insts is not None: test_metrics = evaluate_model(config, model, test_batches, "test", test_insts) if dev_metrics[2] > best_dev_f1: print("saving the best model...") best_dev_f1 = dev_metrics[2] if test_insts is not None: saved_test_metrics = test_metrics torch.save(model.state_dict(), model_name) # # Save the corresponding config as well. if config_name: f = open(config_name, "wb") pickle.dump(config, f) f.close() if result_filename: write_results(result_filename, test_insts) model.zero_grad() if test_insts is not None: print(f"The best dev F1: {best_dev_f1}") print(f"The corresponding test: {saved_test_metrics}") return model