예제 #1
0
def train():
    args = cli_options()

    fr = FlowerRecognizor(args.arch, args.hidden_units, args.learning_rate,
                          args.gpu)
    train_loader, valid_loader, test_loader, class_to_idx = create_data_loaders(
        args.data_directory)

    fr.train(args.save_directory, train_loader, valid_loader, class_to_idx,
             args.epochs)
    fr.test(test_loader)
예제 #2
0
import torch
import numpy as np
import utils
from args import get_args
from data.transforms import complex_abs
import matplotlib.pyplot as plt

args = get_args()

train_loader, dev_loader = utils.create_data_loaders(args, if_shuffle=False)

prev_file = ""
tensor_dict = {'blurred_images': [], 'target_images': []}
print("All the outputs will be stored in the folder: ", args.out_dir)

for i, data in enumerate(train_loader):
    original_kspace, masked_kspace, mask, target, fname, slice_index = data
    fname = fname[0]
    blurred_image = utils.kspaceto2dimage(masked_kspace,
                                          cropping=True,
                                          resolution=args.resolution)

    if prev_file != fname and prev_file != "":
        utils.save_tensors(tensor_dict, args.out_dir, prev_file)
        tensor_dict['blurred_images'] = [blurred_image.squeeze()]
        tensor_dict['target_images'] = [target.squeeze()]
        prev_file = fname
    elif prev_file == fname:
        tensor_dict['blurred_images'].append(blurred_image.squeeze())
        tensor_dict['target_images'].append(target.squeeze())
    elif prev_file == "":
예제 #3
0
def main():
    """
    Main call function for the script
    :return:
    """

    import os
    source_path = os.path.abspath(__file__)
    base_path = os.path.dirname(os.path.dirname(source_path))

    args = get_args()
    if "params/" not in args.config:
        args.config = "../params/" + args.config

    with open(os.path.join(base_path, args.config), 'r',
              encoding='utf-8') as config_file:
        json_string = config_file.read()

    if args.split.lower() == 'cluster':
        random_split = False
        print("Using cluster split for train and validation")
    else:
        random_split = True
        print("Using random split for train and validation")

    import os
    source_path = os.path.abspath(__file__)
    os.chdir(os.path.dirname(source_path))

    params = json.loads(json_string)
    print('Neuraldecipher training with param settings:')
    print(params)

    if params['neuraldecipher'].get('norm_before') is None:
        params['neuraldecipher']['norm_before'] = True

    # instantiate neuraldecipher model
    neuraldecipher = Neuraldecipher(**params['neuraldecipher'])
    print("Neuraldecipher model:")
    print(neuraldecipher)

    # instantiate trainer object
    trainer = Trainer(model=neuraldecipher, trainparams=params['training'])
    earlystopping = EarlyStopping(mode='min',
                                  patience=params['training']['patience'])
    optimizer = torch.optim.Adam(
        params=neuraldecipher.parameters(),
        betas=(params['training']['b1'], params['training']['b2']),
        lr=params['training']['lr'],
        weight_decay=params['training']['weight_decay'])

    scheduler = ReduceLROnPlateau(optimizer,
                                  mode='min',
                                  factor=0.70,
                                  patience=10,
                                  verbose=True)

    if params['training']['loss'] == 'mse':
        criterion = torch.nn.MSELoss()
    elif params['training']['loss'] == 'x-sigmoid':
        criterion = XSigmoidLoss()
    elif params['training']['loss'] == 'x-tanh':
        criterion = XTanhLoss()
    elif params['training']['loss'] == 'log-cosh':
        criterion = LogCoshLoss()
    else:
        criterion = torch.nn.MSELoss()

    if str_to_bool(args.cosineloss):
        criterion_2 = CosineSimLoss()
        criteria = [criterion, criterion_2]
        print("Using {} and cosine difference loss.".format(
            params['training']['loss']))
    else:
        criteria = [criterion]
        print("Using {} loss.".format(params['training']['loss']))

    seed = params['training']['seed']
    torch.manual_seed(seed)
    np.random.seed(seed)
    if 'cuda' in params['training']['device']:
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

    # obtain datasets for training and validation
    train_data, test_data = create_train_and_test_set(
        ecfp_path=params['training']['data_dir'],
        test_group=7,
        random_split=random_split)
    # create dataloaders
    train_loader, test_loader = create_data_loaders(
        train_data,
        test_data,
        batch_size=params['training']['batch_size'],
        num_workers=args.num_workers)

    trainer._train(criteria,
                   earlystopping,
                   scheduler,
                   optimizer,
                   train_loader,
                   test_loader,
                   verbose=True)
예제 #4
0
import torch
import torch.cuda as cuda
import torch.nn as nn
from skimage.measure import compare_ssim as ssim
from tensorboardX import SummaryWriter
from torch.autograd import Variable
from torch.nn import functional as F
from torch.utils.data import Dataset

import utils
from data import transforms
from anet_model import AnetModel
from args import get_args

args = get_args()
train_loader, dev_loader = utils.create_data_loaders(args)

# ### Custom dataset class


def build_model(args):
    model = AnetModel(in_chans=2,
                      out_chans=2,
                      chans=args.num_chans,
                      num_pool_layers=args.num_pools,
                      drop_prob=args.drop_prob).to(args.device)
    return model


# def build_optim(args, params):
#     optimizer = torch.optim.RMSprop(params, args.learning_rate, weight_decay=args.weight_decay)
def train():
    # initiate command line arguments, configuration file and logging block
    args = parse_args()
    config = read_config()
    try:
        if args.overwrite:
            shutil.rmtree(f"./logs/{args.name}", ignore_errors=True)
        os.mkdir(f"./logs/{args.name}")
    except:
        print(f"log folder {args.name} already exits.")

    init_logging(log_path=f"./logs/{args.name}")

    # determine train model on which device, cuda or cpu
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    logger.info(f"running training on {device}")
    device += f':{args.main_cuda}'

    # prepare training and validation datasets
    logger.info('creating dataset and data loaders')
    dataset = args.dataset

    train_dataset = AerialDataset("train", dataset,
                                  config[dataset]["train"]["image_path"],
                                  config[dataset]["train"]["mask_path"])
    val_dataset = AerialDataset("val", dataset,
                                config[dataset]["val"]["image_path"],
                                config[dataset]["val"]["mask_path"])
    train_loader, train_metrics_loader, val_metrics_loader = create_data_loaders(
        train_dataset=train_dataset,
        val_dataset=val_dataset,
        num_workers=config["num_workers"],
        batch_size=config["batchsize"],
    )

    # create model
    logger.info(
        f'creating BiseNetv2 and optimizer with initial lr of {config["learning_rate"]}'
    )

    model = BiSeNetV2(config["n_classes"])
    model = nn.DataParallel(model,
                            device_ids=[x for x in range(args.main_cuda, 4)
                                        ]).to(device)

    # initiate loss function and optimizer
    optimizer_fn = init_optimizer(config)
    optimizer = optimizer_fn(model.parameters(), lr=config["learning_rate"])

    logger.info('creating trainer and evaluator engines')

    _loss_fn = init_loss(config["loss_fn"])
    loss_fn = LossWithAux(_loss_fn)

    # create trainer and evaluator wiht ignite.engine
    trainer = engine.create_supervised_trainer(
        model=model,
        optimizer=optimizer,
        loss_fn=loss_fn,
        device=device,
        non_blocking=True,
    )

    evaluator = engine.create_supervised_evaluator(
        model=model,
        metrics={
            'loss':
            metrics.Loss(nn.CrossEntropyLoss()),
            "[email protected]":
            metrics.Accuracy(thresholded_transform(0.3)),
            "[email protected]":
            metrics.Accuracy(thresholded_transform(0.3)),
            "IOU":
            metrics.IoU(
                metrics.ConfusionMatrix(num_classes=config["n_classes"])),
            "mIOU":
            metrics.mIoU(
                metrics.ConfusionMatrix(num_classes=config["n_classes"])),
        },
        device=device,
        non_blocking=True,
        output_transform=lambda x, y, y_pred:
        (torch.sigmoid(y_pred["out"]), y),
    )

    # attach event listener to do post process after each iteration and epoch

    logger.info(f'creating summary writer with tag {config["model_tag"]}')
    writer = tensorboard.SummaryWriter(log_dir=f'logs/{config["model_tag"]}')

    # logger.info('attaching lr scheduler')
    # lr_scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9)
    # attach_lr_scheduler(trainer, lr_scheduler, writer)

    logger.info('attaching event driven calls')
    attach_model_checkpoint(trainer, {config["model_tag"]: model.module},
                            args.name)
    attach_training_logger(trainer, writer=writer)

    attach_metric_logger(trainer, evaluator, 'train', train_metrics_loader,
                         writer)
    attach_metric_logger(trainer, evaluator, 'val', val_metrics_loader, writer)

    # start training (evaluation is included too)
    logger.info('training...')
    trainer.run(train_loader, max_epochs=config["epochs"])
예제 #6
0
def main(exp_name="cifar_for_images", load=False):
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print(device)
    seed_everything()
    print(torch.cuda.get_device_name(0))
    torch.seed()
    if str(my_computer) == "False":
        n_clusters = None
        if os.environ["dataset_name"] == "imagenet":
            n_clusters = 512
            os.environ["batch_size"] = str(512)
        elif os.environ["dataset_name"] == "cifar10":
            n_clusters = 20
        elif os.environ["dataset_name"] == "tiny_imagenet":
            n_clusters = 200
        os.environ["n_cluster"] = str(n_clusters)
    else:
        os.environ["n_cluster"] = "10"
    print(f"n clustrs is {os.environ['n_cluster']}")
    print(f"batch size is {os.environ['batch_size']}")
    n_classes = None
    if os.environ["dataset_name"] == "imagenet":
        n_classes = 1000
    elif os.environ["dataset_name"] == "cifar10":
        n_classes = 10
    elif os.environ["dataset_name"] == "tiny_imagenet":
        n_classes = 200

    if network_to_use == "DenseNet":
        models = [
            DenseNet(
                200,
                clustering_algorithm=clustering_algorithms.KmeanSklearnByBatch(
                    n_clusters=int(os.environ['n_cluster']))),
            DenseNet(200)
        ]
        # models = [DenseNet(200),resnet50(num_classes=200,pretrained=False)]
        base_lrs = [0.0001, 0.00001, 0.00001, 0.000001]
        max_lrs = [0.0006, 0.00006, 0.00006, 0.000006]
        step_sizes_up = [4686, 4686, 3128, 1564]
        ths = [0.52, 0.61, 0.62, 0.99]
        optimizer1 = torch.optim.RMSprop(models[0].parameters(),
                                         lr=0.0001,
                                         eps=1e-08,
                                         weight_decay=2e-4)
        scheduler1 = chainedCyclicLr(optimizer=optimizer1,
                                     base_lrs=base_lrs,
                                     max_lrs=max_lrs,
                                     step_sizes_up=step_sizes_up,
                                     ths=ths)
        optimizer2 = torch.optim.RMSprop(models[1].parameters(),
                                         lr=0.0001,
                                         eps=1e-08,
                                         weight_decay=2e-4)
        scheduler2 = chainedCyclicLr(optimizer=optimizer2,
                                     base_lrs=base_lrs,
                                     max_lrs=max_lrs,
                                     step_sizes_up=step_sizes_up,
                                     ths=ths)
        loss_func = nn.NLLLoss
    elif network_to_use == "ResNet50":
        models = [
            resnet50(
                num_classes=n_classes,
                clustering_algorithm=clustering_algorithms.KmeanSklearnByBatch(
                    n_clusters=int(os.environ['n_cluster'])),
                pretrained=False),
            resnet50(num_classes=n_classes, pretrained=False)
        ]
        fake = torch.optim.SGD(models[0].parameters(),
                               lr=0.001,
                               momentum=0.9,
                               nesterov=True,
                               weight_decay=5e-4)
        optimizer1 = torch.optim.Adam(models[0].parameters(),
                                      lr=0.001,
                                      betas=(0.9, 0.999),
                                      eps=1e-08,
                                      weight_decay=0,
                                      amsgrad=False)
        scheduler1 = torch.optim.lr_scheduler.CyclicLR(fake,
                                                       base_lr=0.00001,
                                                       max_lr=0.01,
                                                       step_size_up=5000,
                                                       mode="triangular2")
        optimizer2 = torch.optim.Adam(models[1].parameters(),
                                      lr=0.001,
                                      betas=(0.9, 0.999),
                                      eps=1e-08,
                                      weight_decay=0,
                                      amsgrad=False)
        scheduler2 = torch.optim.lr_scheduler.CyclicLR(fake,
                                                       base_lr=0.00001,
                                                       max_lr=0.01,
                                                       step_size_up=5000,
                                                       mode="triangular2")
        loss_func = nn.CrossEntropyLoss
    else:
        models = []

    for model_idx, model in enumerate(models):
        print(f"copy model {model_idx} to device")
        # print(model)
        # print(f"model {model_idx} total param is {sum(p.numel() for p in model.parameters())}")
        # print(f"model {model_idx} traineble param is {sum(p.numel() for p in model.parameters() if p.requires_grad)}")
        model.to(device=device)
        # print(os.popen('nvidia-smi').read())
    train_dls, eval_dls, test_dls = [], [], []
    # create cluster resnet data

    if os.environ["dataset_name"] == "imagenet":

        data_root = "/home/ML_courses/datasets/imagenet/"
        train_set_normal, test_set = utils.ImageNetDs(
            data_root=data_root, max_index=500,
            do_aug=True), utils.ImageNetDs(data_root=data_root,
                                           is_train=False,
                                           is_eval=False,
                                           do_aug=False)
        train_set_clustered, eval_set = utils.ImageNetDs(
            data_root=data_root, max_index=400,
            do_aug=False), utils.ImageNetDs(data_root=data_root,
                                            is_eval=True,
                                            is_train=False,
                                            max_index=400,
                                            do_aug=False)
        # train_set_normal, test_set = utils.DS_by_batch(
        #     data_root=os.path.join(os.path.dirname(os.getcwd()), "data", "data_clustering", "imagenet"),
        #     max_index=10), utils.DS_by_batch(
        #     data_root=os.path.join(os.path.dirname(os.getcwd()), "data", "data_clustering", "imagenet"), is_train=False,
        #     is_eval=False)
        # train_set_clustered, eval_set = utils.DS_by_batch(
        #     data_root=os.path.join(os.path.dirname(os.getcwd()), "data", "data_clustering", "imagenet"),
        #     max_index=9), utils.DS_by_batch(
        #     data_root=os.path.join(os.path.dirname(os.getcwd()), "data", "data_clustering", "imagenet"), is_eval=True,
        #     is_train=False)

    elif os.environ["dataset_name"] == "cifar10":
        train_set_normal, test_set = utils.Cifar10Ds(
            data_root=os.path.join(os.path.dirname(os.getcwd()), "data",
                                   "data_clustering"),
            max_index=5), utils.Cifar10Ds(data_root=os.path.join(
                os.path.dirname(os.getcwd()), "data", "data_clustering"),
                                          is_train=False,
                                          is_eval=False)
        train_set_clustered, eval_set = utils.Cifar10Ds(
            data_root=os.path.join(os.path.dirname(os.getcwd()), "data",
                                   "data_clustering"),
            max_index=4), utils.Cifar10Ds(data_root=os.path.join(
                os.path.dirname(os.getcwd()), "data", "data_clustering"),
                                          is_eval=True,
                                          is_train=False)
    elif os.environ["dataset_name"] == "tiny_imagenet":
        data_root = "/content/tiny-imagenet-200" if my_computer == "False" else os.path.join(
            os.path.dirname(os.getcwd()), "data", "data_clustering",
            "tiny-imagenet-200")
        train_set_normal, test_set = utils.TinyInDs(
            data_root=data_root, max_index=500,
            do_aug=True), utils.TinyInDs(data_root=data_root,
                                         is_train=False,
                                         is_eval=False,
                                         do_aug=False)
        train_set_clustered, eval_set = utils.TinyInDs(
            data_root=data_root, max_index=400,
            do_aug=False), utils.TinyInDs(data_root=data_root,
                                          is_eval=True,
                                          is_train=False,
                                          max_index=400,
                                          do_aug=False)
    else:
        raise Exception("1")
    tb = utils.Tb(exp_name=exp_name)
    print("clustreee")
    if str(my_computer) == "True":
        start_clustering = 6
    else:
        if os.environ["dataset_name"] == "imagenet":
            start_clustering = 20000
        elif os.environ["dataset_name"] == "cifar10":
            start_clustering = 200
        elif os.environ["dataset_name"] == "tiny_imagenet":
            start_clustering = 3000

    clustered_smapler = ClusteredSampler(train_set_normal, tb=tb)
    train_dl, eval_dl, test_dl = utils.create_data_loaders(
        [train_set_clustered, eval_set, test_set], [
            RegularSampler(train_set_clustered),
            RegularSampler(eval_set),
            RegularSampler(test_set)
        ])
    train_dls.append(train_dl)
    eval_dls.append(eval_dl)
    test_dls.append(test_dl)
    # normal resnet data
    train_dl, eval_dl, test_dl = utils.create_data_loaders(
        [train_set_normal, [], test_set],
        [RegularSampler(train_set_normal), None,
         RegularSampler(test_set)])
    train_dls.append(train_dl)
    eval_dls.append(eval_dl)
    test_dls.append(test_dl)

    trainer = Trainer(models=models,
                      train_dls=train_dls,
                      eval_dls=eval_dls,
                      test_dls=test_dls,
                      loss_fn=loss_func(),
                      loss_fn_eval=loss_func(reduction="none"),
                      optimizers=[optimizer1, optimizer2],
                      schedulers=[scheduler1, scheduler2],
                      num_steps=300000,
                      tb=tb,
                      load=load,
                      clustered_sampler=clustered_smapler,
                      start_clustering=start_clustering)
    trainer.train_models()