def train(): args = cli_options() fr = FlowerRecognizor(args.arch, args.hidden_units, args.learning_rate, args.gpu) train_loader, valid_loader, test_loader, class_to_idx = create_data_loaders( args.data_directory) fr.train(args.save_directory, train_loader, valid_loader, class_to_idx, args.epochs) fr.test(test_loader)
import torch import numpy as np import utils from args import get_args from data.transforms import complex_abs import matplotlib.pyplot as plt args = get_args() train_loader, dev_loader = utils.create_data_loaders(args, if_shuffle=False) prev_file = "" tensor_dict = {'blurred_images': [], 'target_images': []} print("All the outputs will be stored in the folder: ", args.out_dir) for i, data in enumerate(train_loader): original_kspace, masked_kspace, mask, target, fname, slice_index = data fname = fname[0] blurred_image = utils.kspaceto2dimage(masked_kspace, cropping=True, resolution=args.resolution) if prev_file != fname and prev_file != "": utils.save_tensors(tensor_dict, args.out_dir, prev_file) tensor_dict['blurred_images'] = [blurred_image.squeeze()] tensor_dict['target_images'] = [target.squeeze()] prev_file = fname elif prev_file == fname: tensor_dict['blurred_images'].append(blurred_image.squeeze()) tensor_dict['target_images'].append(target.squeeze()) elif prev_file == "":
def main(): """ Main call function for the script :return: """ import os source_path = os.path.abspath(__file__) base_path = os.path.dirname(os.path.dirname(source_path)) args = get_args() if "params/" not in args.config: args.config = "../params/" + args.config with open(os.path.join(base_path, args.config), 'r', encoding='utf-8') as config_file: json_string = config_file.read() if args.split.lower() == 'cluster': random_split = False print("Using cluster split for train and validation") else: random_split = True print("Using random split for train and validation") import os source_path = os.path.abspath(__file__) os.chdir(os.path.dirname(source_path)) params = json.loads(json_string) print('Neuraldecipher training with param settings:') print(params) if params['neuraldecipher'].get('norm_before') is None: params['neuraldecipher']['norm_before'] = True # instantiate neuraldecipher model neuraldecipher = Neuraldecipher(**params['neuraldecipher']) print("Neuraldecipher model:") print(neuraldecipher) # instantiate trainer object trainer = Trainer(model=neuraldecipher, trainparams=params['training']) earlystopping = EarlyStopping(mode='min', patience=params['training']['patience']) optimizer = torch.optim.Adam( params=neuraldecipher.parameters(), betas=(params['training']['b1'], params['training']['b2']), lr=params['training']['lr'], weight_decay=params['training']['weight_decay']) scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.70, patience=10, verbose=True) if params['training']['loss'] == 'mse': criterion = torch.nn.MSELoss() elif params['training']['loss'] == 'x-sigmoid': criterion = XSigmoidLoss() elif params['training']['loss'] == 'x-tanh': criterion = XTanhLoss() elif params['training']['loss'] == 'log-cosh': criterion = LogCoshLoss() else: criterion = torch.nn.MSELoss() if str_to_bool(args.cosineloss): criterion_2 = CosineSimLoss() criteria = [criterion, criterion_2] print("Using {} and cosine difference loss.".format( params['training']['loss'])) else: criteria = [criterion] print("Using {} loss.".format(params['training']['loss'])) seed = params['training']['seed'] torch.manual_seed(seed) np.random.seed(seed) if 'cuda' in params['training']['device']: torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False # obtain datasets for training and validation train_data, test_data = create_train_and_test_set( ecfp_path=params['training']['data_dir'], test_group=7, random_split=random_split) # create dataloaders train_loader, test_loader = create_data_loaders( train_data, test_data, batch_size=params['training']['batch_size'], num_workers=args.num_workers) trainer._train(criteria, earlystopping, scheduler, optimizer, train_loader, test_loader, verbose=True)
import torch import torch.cuda as cuda import torch.nn as nn from skimage.measure import compare_ssim as ssim from tensorboardX import SummaryWriter from torch.autograd import Variable from torch.nn import functional as F from torch.utils.data import Dataset import utils from data import transforms from anet_model import AnetModel from args import get_args args = get_args() train_loader, dev_loader = utils.create_data_loaders(args) # ### Custom dataset class def build_model(args): model = AnetModel(in_chans=2, out_chans=2, chans=args.num_chans, num_pool_layers=args.num_pools, drop_prob=args.drop_prob).to(args.device) return model # def build_optim(args, params): # optimizer = torch.optim.RMSprop(params, args.learning_rate, weight_decay=args.weight_decay)
def train(): # initiate command line arguments, configuration file and logging block args = parse_args() config = read_config() try: if args.overwrite: shutil.rmtree(f"./logs/{args.name}", ignore_errors=True) os.mkdir(f"./logs/{args.name}") except: print(f"log folder {args.name} already exits.") init_logging(log_path=f"./logs/{args.name}") # determine train model on which device, cuda or cpu device = 'cuda' if torch.cuda.is_available() else 'cpu' logger.info(f"running training on {device}") device += f':{args.main_cuda}' # prepare training and validation datasets logger.info('creating dataset and data loaders') dataset = args.dataset train_dataset = AerialDataset("train", dataset, config[dataset]["train"]["image_path"], config[dataset]["train"]["mask_path"]) val_dataset = AerialDataset("val", dataset, config[dataset]["val"]["image_path"], config[dataset]["val"]["mask_path"]) train_loader, train_metrics_loader, val_metrics_loader = create_data_loaders( train_dataset=train_dataset, val_dataset=val_dataset, num_workers=config["num_workers"], batch_size=config["batchsize"], ) # create model logger.info( f'creating BiseNetv2 and optimizer with initial lr of {config["learning_rate"]}' ) model = BiSeNetV2(config["n_classes"]) model = nn.DataParallel(model, device_ids=[x for x in range(args.main_cuda, 4) ]).to(device) # initiate loss function and optimizer optimizer_fn = init_optimizer(config) optimizer = optimizer_fn(model.parameters(), lr=config["learning_rate"]) logger.info('creating trainer and evaluator engines') _loss_fn = init_loss(config["loss_fn"]) loss_fn = LossWithAux(_loss_fn) # create trainer and evaluator wiht ignite.engine trainer = engine.create_supervised_trainer( model=model, optimizer=optimizer, loss_fn=loss_fn, device=device, non_blocking=True, ) evaluator = engine.create_supervised_evaluator( model=model, metrics={ 'loss': metrics.Loss(nn.CrossEntropyLoss()), "[email protected]": metrics.Accuracy(thresholded_transform(0.3)), "[email protected]": metrics.Accuracy(thresholded_transform(0.3)), "IOU": metrics.IoU( metrics.ConfusionMatrix(num_classes=config["n_classes"])), "mIOU": metrics.mIoU( metrics.ConfusionMatrix(num_classes=config["n_classes"])), }, device=device, non_blocking=True, output_transform=lambda x, y, y_pred: (torch.sigmoid(y_pred["out"]), y), ) # attach event listener to do post process after each iteration and epoch logger.info(f'creating summary writer with tag {config["model_tag"]}') writer = tensorboard.SummaryWriter(log_dir=f'logs/{config["model_tag"]}') # logger.info('attaching lr scheduler') # lr_scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9) # attach_lr_scheduler(trainer, lr_scheduler, writer) logger.info('attaching event driven calls') attach_model_checkpoint(trainer, {config["model_tag"]: model.module}, args.name) attach_training_logger(trainer, writer=writer) attach_metric_logger(trainer, evaluator, 'train', train_metrics_loader, writer) attach_metric_logger(trainer, evaluator, 'val', val_metrics_loader, writer) # start training (evaluation is included too) logger.info('training...') trainer.run(train_loader, max_epochs=config["epochs"])
def main(exp_name="cifar_for_images", load=False): device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print(device) seed_everything() print(torch.cuda.get_device_name(0)) torch.seed() if str(my_computer) == "False": n_clusters = None if os.environ["dataset_name"] == "imagenet": n_clusters = 512 os.environ["batch_size"] = str(512) elif os.environ["dataset_name"] == "cifar10": n_clusters = 20 elif os.environ["dataset_name"] == "tiny_imagenet": n_clusters = 200 os.environ["n_cluster"] = str(n_clusters) else: os.environ["n_cluster"] = "10" print(f"n clustrs is {os.environ['n_cluster']}") print(f"batch size is {os.environ['batch_size']}") n_classes = None if os.environ["dataset_name"] == "imagenet": n_classes = 1000 elif os.environ["dataset_name"] == "cifar10": n_classes = 10 elif os.environ["dataset_name"] == "tiny_imagenet": n_classes = 200 if network_to_use == "DenseNet": models = [ DenseNet( 200, clustering_algorithm=clustering_algorithms.KmeanSklearnByBatch( n_clusters=int(os.environ['n_cluster']))), DenseNet(200) ] # models = [DenseNet(200),resnet50(num_classes=200,pretrained=False)] base_lrs = [0.0001, 0.00001, 0.00001, 0.000001] max_lrs = [0.0006, 0.00006, 0.00006, 0.000006] step_sizes_up = [4686, 4686, 3128, 1564] ths = [0.52, 0.61, 0.62, 0.99] optimizer1 = torch.optim.RMSprop(models[0].parameters(), lr=0.0001, eps=1e-08, weight_decay=2e-4) scheduler1 = chainedCyclicLr(optimizer=optimizer1, base_lrs=base_lrs, max_lrs=max_lrs, step_sizes_up=step_sizes_up, ths=ths) optimizer2 = torch.optim.RMSprop(models[1].parameters(), lr=0.0001, eps=1e-08, weight_decay=2e-4) scheduler2 = chainedCyclicLr(optimizer=optimizer2, base_lrs=base_lrs, max_lrs=max_lrs, step_sizes_up=step_sizes_up, ths=ths) loss_func = nn.NLLLoss elif network_to_use == "ResNet50": models = [ resnet50( num_classes=n_classes, clustering_algorithm=clustering_algorithms.KmeanSklearnByBatch( n_clusters=int(os.environ['n_cluster'])), pretrained=False), resnet50(num_classes=n_classes, pretrained=False) ] fake = torch.optim.SGD(models[0].parameters(), lr=0.001, momentum=0.9, nesterov=True, weight_decay=5e-4) optimizer1 = torch.optim.Adam(models[0].parameters(), lr=0.001, betas=(0.9, 0.999), eps=1e-08, weight_decay=0, amsgrad=False) scheduler1 = torch.optim.lr_scheduler.CyclicLR(fake, base_lr=0.00001, max_lr=0.01, step_size_up=5000, mode="triangular2") optimizer2 = torch.optim.Adam(models[1].parameters(), lr=0.001, betas=(0.9, 0.999), eps=1e-08, weight_decay=0, amsgrad=False) scheduler2 = torch.optim.lr_scheduler.CyclicLR(fake, base_lr=0.00001, max_lr=0.01, step_size_up=5000, mode="triangular2") loss_func = nn.CrossEntropyLoss else: models = [] for model_idx, model in enumerate(models): print(f"copy model {model_idx} to device") # print(model) # print(f"model {model_idx} total param is {sum(p.numel() for p in model.parameters())}") # print(f"model {model_idx} traineble param is {sum(p.numel() for p in model.parameters() if p.requires_grad)}") model.to(device=device) # print(os.popen('nvidia-smi').read()) train_dls, eval_dls, test_dls = [], [], [] # create cluster resnet data if os.environ["dataset_name"] == "imagenet": data_root = "/home/ML_courses/datasets/imagenet/" train_set_normal, test_set = utils.ImageNetDs( data_root=data_root, max_index=500, do_aug=True), utils.ImageNetDs(data_root=data_root, is_train=False, is_eval=False, do_aug=False) train_set_clustered, eval_set = utils.ImageNetDs( data_root=data_root, max_index=400, do_aug=False), utils.ImageNetDs(data_root=data_root, is_eval=True, is_train=False, max_index=400, do_aug=False) # train_set_normal, test_set = utils.DS_by_batch( # data_root=os.path.join(os.path.dirname(os.getcwd()), "data", "data_clustering", "imagenet"), # max_index=10), utils.DS_by_batch( # data_root=os.path.join(os.path.dirname(os.getcwd()), "data", "data_clustering", "imagenet"), is_train=False, # is_eval=False) # train_set_clustered, eval_set = utils.DS_by_batch( # data_root=os.path.join(os.path.dirname(os.getcwd()), "data", "data_clustering", "imagenet"), # max_index=9), utils.DS_by_batch( # data_root=os.path.join(os.path.dirname(os.getcwd()), "data", "data_clustering", "imagenet"), is_eval=True, # is_train=False) elif os.environ["dataset_name"] == "cifar10": train_set_normal, test_set = utils.Cifar10Ds( data_root=os.path.join(os.path.dirname(os.getcwd()), "data", "data_clustering"), max_index=5), utils.Cifar10Ds(data_root=os.path.join( os.path.dirname(os.getcwd()), "data", "data_clustering"), is_train=False, is_eval=False) train_set_clustered, eval_set = utils.Cifar10Ds( data_root=os.path.join(os.path.dirname(os.getcwd()), "data", "data_clustering"), max_index=4), utils.Cifar10Ds(data_root=os.path.join( os.path.dirname(os.getcwd()), "data", "data_clustering"), is_eval=True, is_train=False) elif os.environ["dataset_name"] == "tiny_imagenet": data_root = "/content/tiny-imagenet-200" if my_computer == "False" else os.path.join( os.path.dirname(os.getcwd()), "data", "data_clustering", "tiny-imagenet-200") train_set_normal, test_set = utils.TinyInDs( data_root=data_root, max_index=500, do_aug=True), utils.TinyInDs(data_root=data_root, is_train=False, is_eval=False, do_aug=False) train_set_clustered, eval_set = utils.TinyInDs( data_root=data_root, max_index=400, do_aug=False), utils.TinyInDs(data_root=data_root, is_eval=True, is_train=False, max_index=400, do_aug=False) else: raise Exception("1") tb = utils.Tb(exp_name=exp_name) print("clustreee") if str(my_computer) == "True": start_clustering = 6 else: if os.environ["dataset_name"] == "imagenet": start_clustering = 20000 elif os.environ["dataset_name"] == "cifar10": start_clustering = 200 elif os.environ["dataset_name"] == "tiny_imagenet": start_clustering = 3000 clustered_smapler = ClusteredSampler(train_set_normal, tb=tb) train_dl, eval_dl, test_dl = utils.create_data_loaders( [train_set_clustered, eval_set, test_set], [ RegularSampler(train_set_clustered), RegularSampler(eval_set), RegularSampler(test_set) ]) train_dls.append(train_dl) eval_dls.append(eval_dl) test_dls.append(test_dl) # normal resnet data train_dl, eval_dl, test_dl = utils.create_data_loaders( [train_set_normal, [], test_set], [RegularSampler(train_set_normal), None, RegularSampler(test_set)]) train_dls.append(train_dl) eval_dls.append(eval_dl) test_dls.append(test_dl) trainer = Trainer(models=models, train_dls=train_dls, eval_dls=eval_dls, test_dls=test_dls, loss_fn=loss_func(), loss_fn_eval=loss_func(reduction="none"), optimizers=[optimizer1, optimizer2], schedulers=[scheduler1, scheduler2], num_steps=300000, tb=tb, load=load, clustered_sampler=clustered_smapler, start_clustering=start_clustering) trainer.train_models()