def main(): parser = argparse.ArgumentParser() arg = parser.add_argument arg('--seed', type=int, default=666) arg('--distributed_backend', type=str, default='ddp') arg('--num_workers', type=int, default=16) arg('--gpus', type=int, default=8) arg('--num_nodes', type=int, default=1) parser = Model.add_model_specific_args(parser) args = parser.parse_args() if args.model_name is None or args.transform_name is None: raise ValueError('Specify model name and transformation rule') if args.optimizer_name not in ('sgd', 'adam', 'rmsprop'): raise (ValueError, 'Please choose optimizer from sgd|adam') # init_seed(seed=args.seed) experiment_name = md5(bytes(str(args), encoding='utf8')).hexdigest() logger.info(str(args)); logger.info(f'experiment_name={experiment_name}') tb_logger = TensorBoardLogger(save_dir=RESULT_DIR, name=experiment_name, version=int(time.time())) checkpoint_callback = pl.callbacks.ModelCheckpoint(filepath=tb_logger.log_dir + "/{epoch:02d}-{auc:.5f}", monitor='auc', mode='max', save_top_k=3, verbose=True) # set min_delta negative, b/c there's double run of early-stopping sometimes for v0.7.6 early_stop_callback = pl.callbacks.EarlyStopping(monitor='auc', mode='max', patience=9, #min_delta=-0.000001, verbose=True) model = Model(**vars(args)) trainer = pl.Trainer.from_argparse_args(args, checkpoint_callback=checkpoint_callback, early_stop_callback=early_stop_callback, logger=tb_logger, # train_percent_check=0.1, # num_sanity_val_steps=args.gpus, use_amp=True, ) trainer.fit(model)
def main(): # loads a lot of default parser values from the 'parser' file parser = get_parser() # get args from parser as an object args = parser.parse_args() args.device = 'cuda' if args.cuda else 'cpu' # initialize seeds utils.init_seed(args.seed) # print('loader stuff', args) loader = Loader.IncrementalLoader(args, seed=args.seed) # print('loader stuff after after', args) n_inputs, n_outputs, n_tasks = loader.get_dataset_info() # setup logging # logging is from 'misc_utils.py' from 'utils' folder timestamp = utils.get_date_time() # this line is redundant bcz log_dir already takes care of it args.log_dir, args.tf_dir = utils.log_dir(args, timestamp) # stores args into "training_parameters.json" # create the model neural net model = Model.Net(n_inputs, n_outputs, n_tasks, args, innerlr=args.opt_lr, outerlr=args.alpha_init) # make model cuda-ized if possible model.net.to(args.device) # for all the CL baselines result_val_t, result_val_a, result_test_t, result_test_a, spent_time = life_experience(model, loader, args) # save results in files or print on terminal save_results(args, result_val_t, result_val_a, result_test_t, result_test_a, model, spent_time)
def main(): parser = argparse.ArgumentParser(description='Semantic Segmentation') parser.add_argument('--train_cfg', type=str, default='./configs/train_config.yaml', help='train config path') args = parser.parse_args() config_folder = Path(args.train_cfg.strip("/")) config = load_yaml(config_folder) init_seed(config['SEED']) df, train_ids, valid_ids = split_dataset(config['DATA_TRAIN']) train_dataset = getattribute(config=config, name_package='TRAIN_DATASET', df=df, img_ids=train_ids) valid_dataset = getattribute(config=config, name_package='VALID_DATASET', df=df, img_ids=valid_ids) train_dataloader = getattribute(config=config, name_package='TRAIN_DATALOADER', dataset=train_dataset) valid_dataloader = getattribute(config=config, name_package='VALID_DATALOADER', dataset=valid_dataset) model = getattribute(config=config, name_package='MODEL') criterion = getattribute(config=config, name_package='CRITERION') optimizer = getattribute(config=config, name_package='OPTIMIZER', params=model.parameters()) scheduler = getattribute(config=config, name_package='SCHEDULER', optimizer=optimizer) device = config['DEVICE'] metric_ftns = [accuracy_dice_score] num_epoch = config['NUM_EPOCH'] gradient_clipping = config['GRADIENT_CLIPPING'] gradient_accumulation_steps = config['GRADIENT_ACCUMULATION_STEPS'] early_stopping = config['EARLY_STOPPING'] validation_frequency = config['VALIDATION_FREQUENCY'] saved_period = config['SAVED_PERIOD'] checkpoint_dir = Path(config['CHECKPOINT_DIR'], type(model).__name__) checkpoint_dir.mkdir(exist_ok=True, parents=True) resume_path = config['RESUME_PATH'] learning = Learning(model=model, optimizer=optimizer, criterion=criterion, device=device, metric_ftns=metric_ftns, num_epoch=num_epoch, scheduler=scheduler, grad_clipping=gradient_clipping, grad_accumulation_steps=gradient_accumulation_steps, early_stopping=early_stopping, validation_frequency=validation_frequency, save_period=saved_period, checkpoint_dir=checkpoint_dir, resume_path=resume_path) learning.train(tqdm(train_dataloader), tqdm(valid_dataloader))
def main(): parser = argparse.ArgumentParser(description='Semantic Segmentation') parser.add_argument('--train_cfg', type=str, default='./configs/train_config.yaml', help='train config path') parser.add_argument('--resume_path', type=str, default='./saved/', help='resume path') args = parser.parse_args() config_folder = Path(args.train_cfg.strip("/")) config = load_yaml(config_folder) init_seed(config['SEED']) sub, test_ids = read_data('./data/sample_submission.csv') test_dataset = getattribute(config=config, name_package='TEST_DATASET', df=sub, img_ids=test_ids) test_dataloader = getattribute(config=config, name_package='TEST_DATALOADER', dataset=test_dataset) model = getattribute(config=config, name_package='MODEL') print("Loading checkpoint: {} ...".format(args.resume_path)) checkpoint = torch.load(args.resume_path, map_location=lambda storage, loc: storage) model.load_state_dict(checkpoint['state_dict']) model = model.cuda() encoded_pixels = [] image_id = 0 for idx, (data, _) in enumerate(tqdm(test_dataloader)): data = data.cuda() outputs = model(data) for batch in outputs: for probability in batch: probability = probability.cpu().detach().numpy() if probability.shape != (350, 525): probability = cv2.resize(probability, dsize=(525, 350), interpolation=cv2.INTER_LINEAR) predict, num_predict = post_process( sigmoid(probability), class_params[image_id % 4][0], class_params[image_id % 4][1]) if num_predict == 0: encoded_pixels.append('') else: r = mask2rle(predict) encoded_pixels.append(r) image_id += 1 sub['EncodedPixels'] = encoded_pixels sub.to_csv('submission.csv', columns=['Image_Label', 'EncodedPixels'], index=False)
def test_trainer_8(self): init_seed(options={"seed": 0}) # learning rate scheduler step lr_scheduler_step = 15 num_support_tr = 6 num_query_tr = 12 num_samples = num_support_tr + num_query_tr # number of random classes per episode for training # this should be equal or less than the unique number # of classes in the dataset classes_per_it = 3 iterations = 10 proto_net = ProtoNetTUF(encoder=linear(in_features=2, out_features=3)) train_engine = TrainEngine(model=proto_net) # optimizer to be used for learning optimizer = optim.Adam(params=proto_net.parameters(), lr=0.1, weight_decay=0.001) # how to reduce the learning rate lr_scheduler = torch.optim.lr_scheduler.StepLR( optimizer=optimizer, gamma=0.01, step_size=lr_scheduler_step, verbose=True) train_loader = TUFDataset(filename=Path("./test_data/train_data.csv"), dataset_type="train") sampler = BatchSampler(labels=train_loader.labels, classes_per_it=classes_per_it, num_samples=num_samples, iterations=iterations, mode="train") dataloader = torch.utils.data.DataLoader(train_loader, batch_sampler=sampler) options = { "optimizer": optimizer, "lr_scheduler": lr_scheduler, "max_epochs": 1, "device": "cpu", "sample_loader": dataloader, "iterations": iterations, "num_support_tr": num_support_tr } train_engine.train(options=options)
def test(configuration: dict) -> None: device = configuration['device'] if device == 'gpu' and not torch.cuda.is_available(): print( "{0} You specified CUDA as device but PyTorch configuration does not support CUDA" .format(WARNING)) print("{0} Setting device to cpu".format(WARNING)) configuration['device'] = 'cpu' # initialize seed for random generation utilities init_seed(options=configuration) test_model_path = Path(configuration["save_model_path"] + "/" + configuration["model_name"] + "/" + configuration["test_model"]) model = ProtoNetTUF.build_network(encoder=linear_with_softmax( in_features=configuration["in_features"], out_features=len(configuration["classes"])), options=configuration) model.load_state_dict(torch.load(test_model_path)) train_dataset = TUFDataset(filename=Path(configuration["test_dataset"]), dataset_type="test", classes=configuration["classes"]) print(f"{INFO} Test dataset size {len(train_dataset)} ") # number of samples for training # num_support_tr is the number of support points per class # num_query_tr is the number of query points per class num_samples = configuration["num_support_tr"] + configuration[ "num_query_tr"] sampler = BatchSampler(labels=train_dataset.labels, classes_per_it=len(configuration["classes"]), num_samples=num_samples, iterations=configuration["iterations"], mode="train") dataloader = torch.utils.data.DataLoader(train_dataset, batch_sampler=sampler) '''
def train(configuration: dict) -> None: dirs = os.listdir(configuration["save_model_path"]) if configuration["model_name"] in dirs: raise ValueError(f"Directory {configuration['model_name']} exists") # create directory if it doesnt exist output_path = Path(configuration["save_model_path"] + "/" + configuration["model_name"]) # create the output directory os.mkdir(path=output_path) configuration["save_model_path"] = str(output_path) with open(output_path / "config.json", 'w', newline="\n") as fh: # save the configuration in the output json.dump(configuration, fh) device = configuration['device'] if device == 'gpu' and not torch.cuda.is_available(): print( "{0} You specified CUDA as device but PyTorch configuration does not support CUDA" .format(WARNING)) print("{0} Setting device to cpu".format(WARNING)) configuration['device'] = 'cpu' # initialize seed for random generation utilities init_seed(options=configuration) # the model to train model = ProtoNetTUF.build_network(encoder=convolution_with_linear_softmax( in_channels=2, out_channels=1, kernel_size=1, in_features=configuration["in_features"], out_features=len(configuration["classes"])), options=configuration) # initialize the optimizer optim = torch.optim.Adam( params=model.parameters(), lr=configuration["optimizer"]["lr"], weight_decay=configuration["optimizer"]["weight_decay"]) # initialize scheduler for learning rate decay # Decays the learning rate of each parameter group by gamma every step_size epochs. # Notice that such decay can happen simultaneously with other changes # to the learning rate from outside this scheduler. # When last_epoch=-1, sets initial lr as lr. lr_scheduler = torch.optim.lr_scheduler.StepLR( optimizer=optim, gamma=configuration["lr_scheduler"]["gamma"], step_size=configuration["lr_scheduler"]["step_size"]) train_dataset = TUFDataset(filename=Path(configuration["train_dataset"]), dataset_type="train", classes=configuration["classes"]) print(f"{INFO} Training dataset size {len(train_dataset)} ") # number of samples for training # num_support_tr is the number of support points per class # num_query_tr is the number of query points per class num_samples = configuration["num_support_tr"] + configuration[ "num_query_tr"] sampler = BatchSampler(labels=train_dataset.labels, classes_per_it=len(configuration["classes"]), num_samples=num_samples, iterations=configuration["iterations"], mode="train") dataloader = torch.utils.data.DataLoader(train_dataset, batch_sampler=sampler) # options for the training engine options = TrainEngine.build_options( optimizer=optim, lr_scheduler=lr_scheduler, max_epochs=configuration["max_epochs"], iterations=configuration["iterations"], device=configuration["device"], sample_loader=dataloader, num_support_tr=configuration["num_support_tr"]) options = extend_options_from_config(configuration=configuration, options=options) if configuration["validate"]: num_support_validation = configuration["num_support_validation"] num_query_validation = configuration["num_query_validation"] num_samples_validation = num_query_validation + num_support_validation print(f"{INFO} Number of samples validation {num_samples_validation}") validation_dataset = TUFDataset(filename=Path( configuration["validate_dataset"]), dataset_type="validate", classes=configuration["classes"]) print(f"{INFO} Validation dataset size {len(validation_dataset)} ") val_sampler = BatchSampler(labels=validation_dataset.labels, classes_per_it=len( configuration["classes"]), num_samples=num_samples_validation, iterations=configuration["iterations"], mode="validate") validation_dataloader = torch.utils.data.DataLoader( validation_dataset, batch_sampler=val_sampler) options["validation_dataloader"] = validation_dataloader options["num_support_validation"] = configuration[ "num_support_validation"] # train the model engine = TrainEngine(model=model) engine.train(options=options) engine_state = engine.state x = [epoch for epoch in range(configuration["max_epochs"])] train_loss = engine_state["average_train_loss"] validation_loss = engine_state["average_validation_loss"] plt.plot(x, train_loss, 'r*', label="Train loss") plt.plot(x, validation_loss, 'bo', label="Validation loss") plt.xlabel("Epoch") plt.ylabel("Average Loss") plt.legend(loc="upper right") plt.title( "Train vs Validation loss. $\eta=${0}, Iterations/epoch {1}".format( configuration["optimizer"]["lr"], configuration["iterations"])) plt.savefig( Path(configuration["save_model_path"] + "/" + "train_validation_loss.png")) plt.close() train_acc = engine_state["average_train_acc"] validation_acc = engine_state["average_validation_acc"] plt.plot(x, train_acc, 'r*', label="Train accuracy") plt.plot(x, validation_acc, 'bo', label="Validation accuracy") plt.xlabel("Epoch") plt.ylabel("Average Accuracy") plt.legend(loc="upper right") plt.title("Train vs Validation accuracy. $\eta=${0}, Iterations/epoch {1}". format(configuration["optimizer"]["lr"], configuration["iterations"])) plt.savefig( Path(configuration["save_model_path"] + "/" + "train_validation_accuracy.png"))
'caching feature extractors and ' 'language index mapping') parser.add_argument('-vsizes', default=[2000, 2000, 12000, 12000, 12000], type=int, nargs='+', help="Vocabulary sizes, " "for 1-gram, 2-gram, 3-gram, 4-gram and word features," " respectively. Default setting is named as 'sm'.") # parser.add_argument('-vsizes', default=[10000, 10000, 50000, 50000, 50000], type=int, nargs='+', # help="Vocabulary sizes, " # "for 1-gram, 2-gram, 3-gram, 4-gram and word features," # " respectively. Default setting is named as 'lg'.") args = parser.parse_args() utils.init_seed(args.seed) utils.log(f'Params: {str(args)}') if os.path.exists(args.mdir): utils.log(f'{args.mdir} already exists. ' f'Input \'yes\' to overwrite, ' f'or \'no\' to load and train:') key = input() if key == 'yes': os.system(f'rm -rf {args.mdir}') os.makedirs(args.mdir) elif key == 'no': pass else: exit() else:
def main(): parser = argparse.ArgumentParser(description='Semantic Segmentation') parser.add_argument('--train_cfg', type=str, default='./configs/train.yaml', help='train config path') args = parser.parse_args() config_folder = Path(args.train_cfg.strip("/")) config = load_yaml(config_folder) init_seed(config['SEED']) image_datasets = { x: vinDataset(root_dir=config['ROOT_DIR'], file_name=config['FILE_NAME'], num_triplet=config['NUM_TRIPLET'], phase=x) for x in ['train', 'valid'] } dataloaders = { x: torch.utils.data.DataLoader(image_datasets[x], batch_size=config['BATCH_SIZE'], shuffle=True, num_workers=4, pin_memory=True) for x in ['train', 'valid'] } model = getattribute(config=config, name_package='MODEL') criterion = getattribute(config=config, name_package='CRITERION') metric_ftns = [accuracy_score] optimizer = getattribute(config=config, name_package='OPTIMIZER', params=model.parameters()) scheduler = getattribute(config=config, name_package='SCHEDULER', optimizer=optimizer) device = config['DEVICE'] num_epoch = config['NUM_EPOCH'] gradient_clipping = config['GRADIENT_CLIPPING'] gradient_accumulation_steps = config['GRADIENT_ACCUMULATION_STEPS'] early_stopping = config['EARLY_STOPPING'] validation_frequency = config['VALIDATION_FREQUENCY'] saved_period = config['SAVED_PERIOD'] checkpoint_dir = Path(config['CHECKPOINT_DIR'], type(model).__name__) checkpoint_dir.mkdir(exist_ok=True, parents=True) resume_path = config['RESUME_PATH'] learning = Learning(model=model, criterion=criterion, metric_ftns=metric_ftns, optimizer=optimizer, device=device, num_epoch=num_epoch, scheduler=scheduler, grad_clipping=gradient_clipping, grad_accumulation_steps=gradient_accumulation_steps, early_stopping=early_stopping, validation_frequency=validation_frequency, save_period=saved_period, checkpoint_dir=checkpoint_dir, resume_path=resume_path) learning.train(tqdm(dataloaders['train']), tqdm(dataloaders['valid']))
def calc_criterions(y_pred, y_true): metric_names = ['log_loss', 'recall', 'far'] results = {key: 0 for key in metric_names} results['log_loss'] = log_loss(y_true, y_pred) results['recall'] = recall_rate(preds, labels).item() results['far'] = false_detection_rate(preds, labels).item() return results if __name__ == '__main__': args = baseline_args().parse_args() init_seed(args) eeg_conf = set_eeg_conf(args) dataloaders = { phase: set_dataloader(args, eeg_conf, phase, label_func, device='cpu') for phase in ['train', 'val', 'test'] } models = None for phase in ['train', 'val']: for i, (inputs, labels) in tqdm(enumerate(dataloaders[phase]), total=len(dataloaders[phase])): models, preds = model_baseline(models, inputs, labels) if args.test: # test phase
model = nets.GNIMLP(idim=opt.idim, nclasses=nclasses, capacity=opt.capacity, criterion=nn.CrossEntropyLoss(), add_per=opt.add_per, retain_ratio=opt.retain_ratio, device=device).to(device) if opt.net == 'gsmlp': model = nets.GSMLP(idim=opt.idim, nclasses=nclasses, capacity=opt.capacity, criterion=nn.CrossEntropyLoss(), bsz_sampling=opt.bsz_sampling).to(device) utils.init_seed(opt.seed) utils.init_model(model) optimizer = optim.Adam(params=filter(lambda p: p.requires_grad, model.parameters()), lr=opt.lr) criterion = nn.CrossEntropyLoss() if opt.dataset in TEXT_DATASETS: folder_pwd = os.path.join(DATA, CHEN) info = json.loads(open(os.path.join(folder_pwd, INFO), "rt").read()) training_text.train_ll(model, { 'train': utrain_iter, 'valid': uvalid_iter }, info, opt, optimizer)
def main(): parser = argparse.ArgumentParser(description='Pytorch parser') parser.add_argument('--train_cfg', type=str, default='./configs/efficientdet-d0.yaml', help='train config path') parser.add_argument('-d', '--device', default=None, type=str, help='indices of GPUs to enable (default: all)') parser.add_argument('-r', '--resume', default=None, type=str, help='path to latest checkpoint (default: None)') CustomArgs = collections.namedtuple('CustomArgs', 'flags type target') options = [ CustomArgs(['-lr', '--learning_rate'], type=float, target='OPTIMIZER,ARGS,lr'), CustomArgs( ['-bs', '--batch_size'], type=int, target= 'TRAIN_DATALOADER,ARGS,batch_size;VALID_DATALOADER,ARGS,batch_size' ) ] config = config_parser(parser, options) init_seed(config['SEED']) train_dataset = VOCDetection(root=VOC_ROOT, transform=SSDAugmentation( voc['min_dim'], MEANS)) train_dataloader = getattribute(config=config, name_package='TRAIN_DATALOADER', dataset=train_dataset, collate_fn=detection_collate) # valid_dataloader = getattribute(config = config, name_package = 'VALID_DATALOADER', dataset = valid_dataset) model = getattribute(config=config, name_package='MODEL') criterion = getattribute(config=config, name_package='CRITERION') optimizer = getattribute(config=config, name_package='OPTIMIZER', params=model.parameters()) scheduler = getattribute(config=config, name_package='SCHEDULER', optimizer=optimizer) device = config['DEVICE'] metric_ftns = [] num_epoch = config['NUM_EPOCH'] gradient_clipping = config['GRADIENT_CLIPPING'] gradient_accumulation_steps = config['GRADIENT_ACCUMULATION_STEPS'] early_stopping = config['EARLY_STOPPING'] validation_frequency = config['VALIDATION_FREQUENCY'] tensorboard = config['TENSORBOARD'] checkpoint_dir = Path(config['CHECKPOINT_DIR'], type(model).__name__) checkpoint_dir.mkdir(exist_ok=True, parents=True) resume_path = config['RESUME_PATH'] learning = Learning(model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, metric_ftns=metric_ftns, device=device, num_epoch=num_epoch, grad_clipping=gradient_clipping, grad_accumulation_steps=gradient_accumulation_steps, early_stopping=early_stopping, validation_frequency=validation_frequency, tensorboard=tensorboard, checkpoint_dir=checkpoint_dir, resume_path=resume_path) learning.train(tqdm(train_dataloader))