def __init__(self, model: str = "attacut-sc"): # resolve model's path model_path = artifacts.get_path(model) params = utils.load_training_params(model_path) model_name = params.name log.info("loading model %s" % model_name) model_cls: models.BaseModel = models.get_model(model_name) # instantiate dataset dataset: dataloaders.SequenceDataset = model_cls.dataset( # dir=model_path, dict_dir=model_path ) # load necessary dicts into memory data_config: Dict = dataset.setup_featurizer() # instantiate model self.model = model_cls.load( model_path, data_config, params.params ) self.dataset = dataset
def main(model_name, data_dir, epoch=10, lr=0.001, batch_size=64, weight_decay=0.0, checkpoint=5, model_params="", output_dir="", no_workers=4, lr_schedule="", prev_model=""): model_cls = models.get_model(model_name) dataset_cls = model_cls.dataset() training_set: dl.SequenceDataset = dataset_cls.load_preprocessed_file_with_suffix( data_dir, "training.txt") validation_set: dl.SequenceDataset = dataset_cls.load_preprocessed_file_with_suffix( data_dir, "val.txt") # only required data_config = training_set.setup_featurizer("%s/dictionary" % data_dir) device = get_device() print("Using device: %s" % device) params = {} if model_params: params['model_config'] = model_params print(">> model configuration: %s" % model_params) if prev_model: print("Initiate model from %s" % prev_model) model = models.get_model(model_name).load(prev_model, data_config, **params) else: model = models.get_model(model_name)(data_config, **params) model = model.to(device) criterion = torch.nn.BCEWithLogitsLoss() optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay) if prev_model: print("Loading prev optmizer's state") optimizer.load_state_dict(torch.load("%s/optimizer.pth" % prev_model)) print("Previous learning rate", get_lr(optimizer)) # force torch to use the given lr, not previous one for param_group in optimizer.param_groups: param_group['lr'] = lr param_group['initial_lr'] = lr print("Current learning rate", get_lr(optimizer)) if lr_schedule: schedule_params = utils.parse_model_params(lr_schedule) scheduler = optim.lr_scheduler.StepLR( optimizer, step_size=schedule_params['step'], gamma=schedule_params['gamma'], ) dataloader_params = dict(batch_size=batch_size, num_workers=no_workers, collate_fn=dataset_cls.collate_fn) print("Using dataset: %s" % type(dataset_cls).__name__) training_generator = data.DataLoader(training_set, shuffle=True, **dataloader_params) validation_generator = data.DataLoader(validation_set, shuffle=False, **dataloader_params) total_train_size = len(training_set) total_test_size = len(validation_set) print("We have %d train samples and %d test samples" % (total_train_size, total_test_size)) # for FloydHub print('{"metric": "%s:%s", "value": %s}' % ("model", model_name, model.total_trainable_params())) utils.maybe_create_dir(output_dir) copy_files("%s/dictionary/*.json" % data_dir, output_dir) utils.save_training_params( output_dir, utils.ModelParams(name=model_name, params=model.model_params)) for e in range(1, epoch + 1): print("===EPOCH %d ===" % (e)) if lr_schedule: curr_lr = get_lr(optimizer) print_floydhub_metrics(dict(lr=curr_lr), step=e, prefix="global") print("lr: ", curr_lr) with utils.Timer("epoch-training") as timer: do_iterate( model, training_generator, prefix="training", step=e, device=device, optimizer=optimizer, criterion=criterion, ) with utils.Timer("epoch-validation") as timer, \ torch.no_grad(): do_iterate( model, validation_generator, prefix="validation", step=e, device=device, criterion=criterion, ) if lr_schedule: scheduler.step() if checkpoint and e % checkpoint == 0: model_path = "%s/model-e-%d.pth" % (output_dir, e) print("Saving model to %s" % model_path) torch.save(model.state_dict(), model_path) model_path = "%s/model.pth" % output_dir opt_path = "%s/optimizer.pth" % output_dir print("Saving model to %s" % model_path) torch.save(model.state_dict(), model_path) torch.save(optimizer.state_dict(), opt_path)
def main( model_name, data_dir, epoch=10, lr=0.001, batch_size=64, weight_decay=0.0, checkpoint=0, model_params="", output_dir="", no_workers=4, prev_model="", ): model_cls = models.get_model(model_name) output_scheme = output_tags.get_scheme( utils.parse_model_params(model_params)["oc"]) dataset_cls = model_cls.dataset training_set: dl.SequenceDataset = dataset_cls.load_preprocessed_file_with_suffix( data_dir, "training.txt", output_scheme) validation_set: dl.SequenceDataset = dataset_cls.load_preprocessed_file_with_suffix( data_dir, "val.txt", output_scheme) # only required data_config = training_set.setup_featurizer() device = models.get_device() print("Using device: %s" % device) params = {} if model_params: params['model_config'] = model_params print(">> model configuration: %s" % model_params) if prev_model: print("Initiate model from %s" % prev_model) model = models.get_model(model_name).load(prev_model, data_config, **params) else: model = models.get_model(model_name)(data_config, **params) model = model.to(device) if hasattr(model, "crf"): criterion = loss.crf else: criterion = loss.cross_ent optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay) if prev_model: print("Loading prev optmizer's state") optimizer.load_state_dict(torch.load("%s/optimizer.pth" % prev_model)) print("Previous learning rate", get_lr(optimizer)) # force torch to use the given lr, not previous one for param_group in optimizer.param_groups: param_group['lr'] = lr param_group['initial_lr'] = lr print("Current learning rate", get_lr(optimizer)) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, "min", patience=0, verbose=True) dataloader_params = dict(batch_size=batch_size, num_workers=no_workers, collate_fn=dataset_cls.collate_fn) print("Using dataset: %s" % type(dataset_cls).__name__) training_generator = data.DataLoader(training_set, shuffle=True, **dataloader_params) validation_generator = data.DataLoader(validation_set, shuffle=False, **dataloader_params) total_train_size = len(training_set) total_test_size = len(validation_set) print("We have %d train samples and %d test samples" % (total_train_size, total_test_size)) # for FloydHub print('{"metric": "%s:%s", "value": %s}' % ("model", model_name, model.total_trainable_params())) os.makedirs(output_dir, exist_ok=True) copy_files("%s/dictionary/*.json" % data_dir, output_dir) start_training_time = time.time() best_val_loss = np.inf for e in range(1, epoch + 1): print("===EPOCH %d ===" % (e)) st_time = time.time() curr_lr = get_lr(optimizer) print(f"lr={curr_lr}") with utils.Timer("epoch-training") as timer: model.train() _ = do_iterate( model, training_generator, prefix="training", step=e, device=device, optimizer=optimizer, criterion=criterion, ) with utils.Timer("epoch-validation") as timer, \ torch.no_grad(): model.eval() val_loss = do_iterate( model, validation_generator, prefix="validation", step=e, device=device, criterion=criterion, ) elapsed_time = (time.time() - st_time) / 60. print(f"Time took: {elapsed_time:.4f} mins") scheduler.step(val_loss) if val_loss < best_val_loss: model_path = "%s/model.pth" % output_dir opt_path = "%s/optimizer.pth" % output_dir print("Saving model to %s" % model_path) torch.save(model.state_dict(), model_path) torch.save(optimizer.state_dict(), opt_path) best_val_loss = val_loss training_took = time.time() - start_training_time print(f"[training] total time: {training_took}") config = utils.parse_model_params(model_params) if "embs" in config and type(config["embs"]) == str: emb = config["embs"] copy_files(f"{data_dir}/dictionary/sy-emb-{emb}.npy", output_dir) utils.save_training_params( output_dir, utils.ModelParams(name=model_name, params=model.model_params, training_took=training_took, num_trainable_params=model.total_trainable_params(), lr=lr, weight_decay=weight_decay, epoch=epoch))