Exemplo n.º 1
0
def main():
    args = parser.parse_args()

    if args.seed is None:
        args.seed = random.randint(1, 10000)
    print("Random Seed: ", args.seed)
    random.seed(args.seed)
    torch.manual_seed(args.seed)
    if args.gpus:
        torch.cuda.manual_seed_all(args.seed)

    time_stamp = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
    if args.evaluate:
        args.results_dir = '/tmp'
    if args.save is '':
        args.save = time_stamp
    save_path = os.path.join(args.results_dir, args.save)
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    if args.gpus is not None:
        args.gpus = [int(i) for i in args.gpus.split(',')]
        device = 'cuda:' + str(args.gpus[0])
        cudnn.benchmark = True
    else:
        device = 'cpu'

    if args.type == 'float64':
        dtype = torch.float64
    elif args.type == 'float32':
        dtype = torch.float32
    elif args.type == 'float16':
        dtype = torch.float16
    else:
        raise ValueError('Wrong type!')  # TODO int8

    model = MobileNet2(input_size=args.input_size, scale=args.scaling)
    num_parameters = sum([l.nelement() for l in model.parameters()])
    print(model)
    print('number of parameters: {}'.format(num_parameters))
    print('FLOPs: {}'.format(
        flops_benchmark.count_flops(MobileNet2,
                                    args.batch_size // len(args.gpus) if args.gpus is not None else args.batch_size,
                                    device, dtype, args.input_size, 3, args.scaling)))

    train_loader, val_loader = get_loaders(args.dataroot, args.batch_size, args.batch_size, args.input_size,
                                           args.workers)
    # define loss function (criterion) and optimizer
    criterion = torch.nn.CrossEntropyLoss()
    if args.gpus is not None:
        model = torch.nn.DataParallel(model, args.gpus)
    model.to(device=device, dtype=dtype)
    criterion.to(device=device, dtype=dtype)

    optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.decay,
                                nesterov=True)
    if args.find_clr:
        find_bounds_clr(model, train_loader, optimizer, criterion, device, dtype, min_lr=args.min_lr,
                        max_lr=args.max_lr, step_size=args.epochs_per_step * len(train_loader), mode=args.mode,
                        save_path=save_path)
        return

    if args.clr:
        scheduler = CyclicLR(optimizer, base_lr=args.min_lr, max_lr=args.max_lr,
                             step_size=args.epochs_per_step * len(train_loader), mode=args.mode)
    else:
        scheduler = MultiStepLR(optimizer, milestones=args.schedule, gamma=args.gamma)

    best_test = 0

    # optionally resume from a checkpoint
    data = None
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume, map_location=device)
            args.start_epoch = checkpoint['epoch'] - 1
            best_test = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})"
                  .format(args.resume, checkpoint['epoch']))
        elif os.path.isdir(args.resume):
            checkpoint_path = os.path.join(args.resume, 'checkpoint.pth.tar')
            csv_path = os.path.join(args.resume, 'results.csv')
            print("=> loading checkpoint '{}'".format(checkpoint_path))
            checkpoint = torch.load(checkpoint_path, map_location=device)
            args.start_epoch = checkpoint['epoch'] - 1
            best_test = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})".format(checkpoint_path, checkpoint['epoch']))
            data = []
            with open(csv_path) as csvfile:
                reader = csv.DictReader(csvfile)
                for row in reader:
                    data.append(row)
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    if args.evaluate:
        loss, top1, top5 = test(model, val_loader, criterion, device, dtype)  # TODO
        return

    csv_logger = CsvLogger(filepath=save_path, data=data)
    csv_logger.save_params(sys.argv, args)

    claimed_acc1 = None
    claimed_acc5 = None
    if args.input_size in claimed_acc_top1:
        if args.scaling in claimed_acc_top1[args.input_size]:
            claimed_acc1 = claimed_acc_top1[args.input_size][args.scaling]
            claimed_acc5 = claimed_acc_top5[args.input_size][args.scaling]
            csv_logger.write_text(
                'Claimed accuracies are: {:.2f}% top-1, {:.2f}% top-5'.format(claimed_acc1 * 100., claimed_acc5 * 100.))
    train_network(args.start_epoch, args.epochs, scheduler, model, train_loader, val_loader, optimizer, criterion,
                  device, dtype, args.batch_size, args.log_interval, csv_logger, save_path, claimed_acc1, claimed_acc5,
                  best_test)
Exemplo n.º 2
0
def main():
    args = parser.parse_args()

    if args.seed is None:
        args.seed = random.randint(1, 10000)
    print("Random Seed: ", args.seed)
    random.seed(args.seed)
    torch.manual_seed(args.seed)
    if args.gpus:
        torch.cuda.manual_seed_all(args.seed)

    time_stamp = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
    if args.evaluate:
        args.results_dir = '/tmp'
    if args.save is '':
        args.save = time_stamp
    save_path = os.path.join(args.results_dir, args.save)
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    if args.gpus is not None:
        args.gpus = [int(i) for i in args.gpus.split(',')]
        device = 'cuda:' + str(args.gpus[0])
        cudnn.benchmark = True
    else:
        device = 'cpu'

    if args.type == 'float64':
        dtype = torch.float64
    elif args.type == 'float32':
        dtype = torch.float32
    elif args.type == 'float16':
        dtype = torch.float16
    else:
        raise ValueError('Wrong type!')  # TODO int8

    if (args.model == "recnn"):
        print("Training RECNN")
        model = RECNN()
        ex_model = RECNN_Mask()
    else:
        print("Error: no model matched!")
    num_parameters = sum([l.nelement() for l in model.parameters()])
    print(model)
    print('number of parameters: {}'.format(num_parameters))

    # define loss function (criterion) and optimizer
    criterion = torch.nn.MSELoss()

    if args.gpus is not None:
        model = torch.nn.DataParallel(model, args.gpus)
        ex_model = torch.nn.DataParallel(ex_model, args.gpus)

    model.to(device=device, dtype=dtype)
    ex_model.to(device=device, dtype=dtype)
    criterion.to(device=device, dtype=dtype)

    optimizer = torch.optim.SGD(model.parameters(),
                                args.learning_rate,
                                momentum=args.momentum,
                                weight_decay=args.decay,
                                nesterov=True)
    if args.find_clr:
        find_bounds_clr(model,
                        train_loader,
                        optimizer,
                        criterion,
                        device,
                        dtype,
                        min_lr=args.min_lr,
                        max_lr=args.max_lr,
                        step_size=args.epochs_per_step * len(train_loader),
                        mode=args.mode,
                        save_path=save_path)
        return

    if args.clr:
        scheduler = CyclicLR(optimizer,
                             base_lr=args.min_lr,
                             max_lr=args.max_lr,
                             step_size=args.epochs_per_step *
                             len(train_loader),
                             mode=args.mode)
    else:
        scheduler = MultiStepLR(optimizer,
                                milestones=args.schedule,
                                gamma=args.gamma)

    best_test = 0

    # optionally resume from a checkpoint
    data = None
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume, map_location=device)
            args.start_epoch = checkpoint['epoch'] - 1
            best_test = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
        elif os.path.isdir(args.resume):
            checkpoint_path = os.path.join(args.resume, 'model_best.pth.tar')
            csv_path = os.path.join(args.resume, 'results.csv')
            print("=> loading checkpoint '{}'".format(checkpoint_path))
            checkpoint = torch.load(checkpoint_path, map_location=device)
            args.start_epoch = checkpoint['epoch']
            best_test = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            ex_model.load_state_dict(checkpoint['state_dict'])

            print("=> loaded checkpoint '{}' (epoch {})".format(
                checkpoint_path, checkpoint['epoch']))

        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    if args.extract_features:
        test_hdf5_list = [
            x for x in glob.glob(os.path.join(args.h5dir, 'test', '*.h5'))
        ]
        test_hdf5_list.sort()
        print(test_hdf5_list)
        tcnt = 0
        for f in test_hdf5_list:
            h5_file = h5py.File(f, 'r')
            tcnt = tcnt + 1
            if tcnt == 1:
                testx = torch.from_numpy(np.array(h5_file['data']))
                testy = torch.from_numpy(np.array(h5_file['label']))
            else:
                testcx = torch.from_numpy(np.array(h5_file['data']))
                testcy = torch.from_numpy(np.array(h5_file['label']))
                testx = torch.cat((testx, testcx), 0)
                testy = torch.cat((testy, testcy), 0)

        tex_shape = testx.shape
        testx = testx.view(tex_shape[0], 1, tex_shape[1], tex_shape[2],
                           tex_shape[3])
        testxy = torch.utils.data.TensorDataset(testx, testy)
        val_loader = torch.utils.data.DataLoader(testxy,
                                                 batch_size=args.batch_size,
                                                 shuffle=False)
        (test_features, test_preds,
         test_target) = extract_features(model, ex_model, val_loader,
                                         criterion, device, dtype)

        test_features_numpy = test_features.cpu().numpy()
        test_preds_numpy = test_preds.cpu().numpy()
        test_target_numpy = test_target.cpu().numpy()

        test_data = {
            'test_features': test_features_numpy,
            'test_preds': test_preds_numpy,
            'test_target': test_target_numpy
        }
        test_mat_filename = 'test' + args.setting
        scipy.io.savemat(test_mat_filename, test_data)
        train_hdf5_list = [
            x for x in glob.glob(os.path.join(args.h5dir, 'train', '*.h5'))
        ]
        train_hdf5_list.sort()
        tcnt = 0
        for f in train_hdf5_list:
            h5_file = h5py.File(f, 'r')
            tcnt = tcnt + 1
            if tcnt == 1:
                trainx = torch.from_numpy(np.array(h5_file['data']))
                trainy = torch.from_numpy(np.array(h5_file['label']))
            else:
                traincx = torch.from_numpy(np.array(h5_file['data']))
                traincy = torch.from_numpy(np.array(h5_file['label']))
                trainx = torch.cat((trainx, traincx), 0)
                trainy = torch.cat((trainy, traincy), 0)

        trx_shape = trainx.shape
        trainx = trainx.view(trx_shape[0], 1, trx_shape[1], trx_shape[2],
                             trx_shape[3])
        trainxy = torch.utils.data.TensorDataset(trainx, trainy)
        train_loader = torch.utils.data.DataLoader(trainxy,
                                                   batch_size=args.batch_size,
                                                   shuffle=False)

        (train_features, train_preds,
         train_target) = extract_features(model, ex_model, train_loader,
                                          criterion, device, dtype)

        train_features_numpy = train_features.cpu().numpy()
        train_preds_numpy = train_preds.cpu().numpy()
        train_target_numpy = train_target.cpu().numpy()
        train_data = {
            'train_features': train_features_numpy,
            'train_preds': train_preds_numpy,
            'train_target': train_target_numpy
        }
        train_mat_filename = 'train' + args.setting
        scipy.io.savemat(train_mat_filename, train_data)
        return

    if args.evaluate:
        loss, top1, top5 = test(model, val_loader, criterion, device,
                                dtype)  # TODO
        return

    csv_logger = CsvLogger(filepath=save_path, data=data)
    csv_logger.save_params(sys.argv, args)

    claimed_acc1 = None
    claimed_acc5 = None
    best_test = 10000000
    train_network(args.start_epoch, args.epochs, scheduler, model,
                  train_loader, val_loader, optimizer, criterion, device,
                  dtype, args.batch_size, args.log_interval, csv_logger,
                  save_path, claimed_acc1, claimed_acc5, best_test)
Exemplo n.º 3
0
def main():
    args = get_args()
    device, dtype = args.device, args.dtype

    train_loader, val_loader = get_loaders(args.dataroot, args.batch_size,
                                           args.batch_size, args.input_size,
                                           args.workers, args.world_size,
                                           args.local_rank)

    model = MnasNet(n_class=args.num_classes,
                    width_mult=args.scaling,
                    drop_prob=0.0,
                    num_steps=len(train_loader) * args.epochs)
    num_parameters = sum([l.nelement() for l in model.parameters()])
    flops = flops_benchmark.count_flops(MnasNet,
                                        1,
                                        device,
                                        dtype,
                                        args.input_size,
                                        3,
                                        width_mult=args.scaling)
    if not args.child:
        print(model)
        print('number of parameters: {}'.format(num_parameters))
        print('FLOPs: {}'.format(flops))

    # define loss function (criterion) and optimizer
    criterion = CrossEntropyLoss()
    mixup = Mixup(args.num_classes, args.mixup, args.smooth_eps)

    model, criterion = model.to(device=device,
                                dtype=dtype), criterion.to(device=device,
                                                           dtype=dtype)
    if args.dtype == torch.float16:
        for module in model.modules():  # FP batchnorm
            if is_bn(module):
                module.to(dtype=torch.float32)

    if args.distributed:
        args.device_ids = [args.local_rank]
        dist.init_process_group(backend=args.dist_backend,
                                init_method=args.dist_init,
                                world_size=args.world_size,
                                rank=args.local_rank)
        model = torch.nn.parallel.DistributedDataParallel(
            model, device_ids=[args.local_rank], output_device=args.local_rank)
        print('Node #{}'.format(args.local_rank))
    else:
        model = torch.nn.parallel.DataParallel(model,
                                               device_ids=[args.local_rank],
                                               output_device=args.local_rank)

    optimizer_class = torch.optim.SGD
    optimizer_params = {
        "lr": args.learning_rate,
        "momentum": args.momentum,
        "weight_decay": args.decay,
        "nesterov": True
    }
    if args.find_clr:
        optimizer = torch.optim.SGD(model.parameters(),
                                    args.learning_rate,
                                    momentum=args.momentum,
                                    weight_decay=args.decay,
                                    nesterov=True)
        find_bounds_clr(model,
                        train_loader,
                        optimizer,
                        criterion,
                        device,
                        dtype,
                        min_lr=args.min_lr,
                        max_lr=args.max_lr,
                        step_size=args.epochs_per_step * len(train_loader),
                        mode=args.mode,
                        save_path=args.save_path)
        return

    if args.sched == 'clr':
        scheduler_class = CyclicLR
        scheduler_params = {
            "base_lr": args.min_lr,
            "max_lr": args.max_lr,
            "step_size": args.epochs_per_step * len(train_loader),
            "mode": args.mode
        }
    elif args.sched == 'multistep':
        scheduler_class = MultiStepLR
        scheduler_params = {"milestones": args.schedule, "gamma": args.gamma}
    elif args.sched == 'cosine':
        scheduler_class = CosineLR
        scheduler_params = {
            "max_epochs": args.epochs,
            "warmup_epochs": args.warmup,
            "iter_in_epoch": len(train_loader)
        }
    elif args.sched == 'gamma':
        scheduler_class = StepLR
        scheduler_params = {"step_size": 30, "gamma": args.gamma}
    else:
        raise ValueError('Wrong scheduler!')

    optim = OptimizerWrapper(model,
                             optimizer_class=optimizer_class,
                             optimizer_params=optimizer_params,
                             scheduler_class=scheduler_class,
                             scheduler_params=scheduler_params,
                             use_shadow_weights=args.dtype == torch.float16)
    best_test = 0

    # optionally resume from a checkpoint
    data = None
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume, map_location=device)
            args.start_epoch = checkpoint['epoch'] - 1
            best_test = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            optim.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
        elif os.path.isdir(args.resume):
            checkpoint_path = os.path.join(
                args.resume, 'checkpoint{}.pth.tar'.format(args.local_rank))
            csv_path = os.path.join(args.resume,
                                    'results{}.csv'.format(args.local_rank))
            print("=> loading checkpoint '{}'".format(checkpoint_path))
            checkpoint = torch.load(checkpoint_path, map_location=device)
            args.start_epoch = checkpoint['epoch'] - 1
            best_test = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            optim.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                checkpoint_path, checkpoint['epoch']))
            data = []
            with open(csv_path) as csvfile:
                reader = csv.DictReader(csvfile)
                for row in reader:
                    data.append(row)
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    if args.evaluate:
        loss, top1, top5 = test(model, val_loader, criterion, device, dtype,
                                args.child)  # TODO
        return

    csv_logger = CsvLogger(filepath=args.save_path,
                           data=data,
                           local_rank=args.local_rank)
    csv_logger.save_params(sys.argv, args)

    claimed_acc1 = None
    claimed_acc5 = None
    if args.input_size in claimed_acc_top1:
        if args.scaling in claimed_acc_top1[args.input_size]:
            claimed_acc1 = claimed_acc_top1[args.input_size][args.scaling]
            if not args.child:
                csv_logger.write_text(
                    'Claimed accuracy is {:.2f}% top-1'.format(claimed_acc1 *
                                                               100.))
    train_network(args.start_epoch, args.epochs, optim, model, train_loader,
                  val_loader, criterion, mixup, device, dtype, args.batch_size,
                  args.log_interval, csv_logger, args.save_path, claimed_acc1,
                  claimed_acc5, best_test, args.local_rank, args.child)
Exemplo n.º 4
0
                                    device, dtype, input_size, 3, scaling)))

    train_loader, val_loader = get_loaders(dataroot, batch_size, batch_size, input_size,
                                           workers)
    # define loss function (criterion) and optimizer
    criterion = torch.nn.CrossEntropyLoss()
    if gpus is not None:
        model = torch.nn.DataParallel(model, gpus)
    model.to(device=device, dtype=dtype)
    criterion.to(device=device, dtype=dtype)

    optimizer = torch.optim.SGD(model.parameters(), learning_rate, momentum=momentum, weight_decay=decay,
                                nesterov=True)
    if args.find_clr:
        find_bounds_clr(model, train_loader, optimizer, criterion, device, dtype, min_lr=min_lr,
                        max_lr=max_lr, step_size=epochs_per_step * len(train_loader), mode=mode,
                        save_path=save_path)
        return

    if args.clr:
        scheduler = CyclicLR(optimizer, base_lr=args.min_lr, max_lr=args.max_lr,
                             step_size=args.epochs_per_step * len(train_loader), mode=args.mode)
    else:
        scheduler = MultiStepLR(optimizer, milestones=args.schedule, gamma=args.gamma)

    best_test = 0

 
    if evaluate == 'true':
        loss, top1, top5 = test(model, val_loader, criterion, device, dtype)  # TODO
        return
Exemplo n.º 5
0
def main():
    args = parser.parse_args()

    if args.seed is None:
        args.seed = random.randint(1, 10000)
    print("Random Seed: ", args.seed)
    random.seed(args.seed)
    torch.manual_seed(args.seed)
    if args.gpus:
        torch.cuda.manual_seed_all(args.seed)

    time_stamp = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
    if args.evaluate:
        args.results_dir = '/tmp'
    if args.save is '':
        args.save = time_stamp
    save_path = os.path.join(args.results_dir, args.save)
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    if args.gpus is not None:
        args.gpus = [int(i) for i in args.gpus.split(',')]
        device = 'cuda:' + str(args.gpus[0])
        cudnn.benchmark = True
    else:
        device = 'cpu'

    if args.type == 'float64':
        dtype = torch.float64
    elif args.type == 'float32':
        dtype = torch.float32
    elif args.type == 'float16':
        dtype = torch.float16
    else:
        raise ValueError('Wrong type!')  # TODO int8

    model = ShuffleNetV2(scale=args.scaling,
                         c_tag=args.c_tag,
                         SE=args.SE,
                         residual=args.residual,
                         groups=args.groups)
    num_parameters = sum([l.nelement() for l in model.parameters()])
    print(model)
    print('number of parameters: {}'.format(num_parameters))
    print('FLOPs: {}'.format(
        flops_benchmark.count_flops(
            ShuffleNetV2, args.batch_size //
            len(args.gpus) if args.gpus is not None else args.batch_size,
            device, dtype, args.input_size, 3, args.scaling, 3, args.c_tag,
            1000, torch.nn.ReLU, args.SE, args.residual, args.groups)))

    train_loader, val_loader = get_loaders(args.dataroot, args.batch_size,
                                           args.batch_size, args.input_size,
                                           args.workers)
    # define loss function (criterion) and optimizer
    criterion = torch.nn.CrossEntropyLoss()
    if args.gpus is not None:
        model = torch.nn.DataParallel(model, args.gpus)
    model.to(device=device, dtype=dtype)
    criterion.to(device=device, dtype=dtype)

    optimizer = torch.optim.SGD(model.parameters(),
                                args.learning_rate,
                                momentum=args.momentum,
                                weight_decay=args.decay,
                                nesterov=True)
    if args.find_clr:
        find_bounds_clr(model,
                        train_loader,
                        optimizer,
                        criterion,
                        device,
                        dtype,
                        min_lr=args.min_lr,
                        max_lr=args.max_lr,
                        step_size=args.epochs_per_step * len(train_loader),
                        mode=args.mode,
                        save_path=save_path)
        return

    if args.clr:
        scheduler = CyclicLR(optimizer,
                             base_lr=args.min_lr,
                             max_lr=args.max_lr,
                             step_size=args.epochs_per_step *
                             len(train_loader),
                             mode=args.mode)
    else:
        scheduler = MultiStepLR(optimizer,
                                milestones=args.schedule,
                                gamma=args.gamma)

    best_test = 0

    # optionally resume from a checkpoint
    data = None
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume, map_location=device)
            args.start_epoch = checkpoint['epoch'] - 1
            best_test = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
        elif os.path.isdir(args.resume):
            checkpoint_path = os.path.join(args.resume, 'checkpoint.pth.tar')
            csv_path = os.path.join(args.resume, 'results.csv')
            print("=> loading checkpoint '{}'".format(checkpoint_path))
            checkpoint = torch.load(checkpoint_path, map_location=device)
            args.start_epoch = checkpoint['epoch'] - 1
            best_test = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                checkpoint_path, checkpoint['epoch']))
            data = []
            with open(csv_path) as csvfile:
                reader = csv.DictReader(csvfile)
                for row in reader:
                    data.append(row)
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    if args.evaluate:
        loss, top1, top5 = test(model, val_loader, criterion, device,
                                dtype)  # TODO
        return

    csv_logger = CsvLogger(filepath=save_path, data=data)
    csv_logger.save_params(sys.argv, args)

    claimed_acc1 = None
    claimed_acc5 = None
    if args.SE in claimed_acc_top1:
        if args.scaling in claimed_acc_top1[args.SE]:
            claimed_acc1 = 1 - claimed_acc_top1[args.SE][args.scaling]
            csv_logger.write_text('Claimed accuracy is {:.2f}% top-1'.format(
                claimed_acc1 * 100.))
    train_network(args.start_epoch, args.epochs, scheduler, model,
                  train_loader, val_loader, optimizer, criterion, device,
                  dtype, args.batch_size, args.log_interval, csv_logger,
                  save_path, claimed_acc1, claimed_acc5, best_test)
Exemplo n.º 6
0
def main():
    import warnings

    # filter out corrupted images warnings
    warnings.filterwarnings("ignore", "(Possibly )?corrupt EXIF data", UserWarning)

    args = get_args()
    device, dtype = args.device, args.dtype

    train_loader, val_loader = get_loaders(args.dataroot, args.batch_size, args.batch_size, args.input_size,
                                           args.workers, args.world_size, args.local_rank)
    args.num_batches = len(train_loader) * args.epochs
    args.start_step = len(train_loader) * args.start_epoch
    model = MobileNetV3(num_classes=args.num_classes, scale=args.scaling, in_channels=3, drop_prob=args.dp,
                        num_steps=args.num_batches, start_step=args.start_step, small=args.small)
    num_parameters = sum([l.nelement() for l in model.parameters()])
    flops = flops_benchmark.count_flops(MobileNetV3, 2, device, dtype, args.input_size, 3, num_classes=args.num_classes,
                                        scale=args.scaling, drop_prob=args.dp, num_steps=args.num_batches,
                                        start_step=args.start_step, small=args.small)
    if not args.child:
        print(model)
        print('number of parameters: {}'.format(num_parameters))
        print('FLOPs: {}'.format(flops))
        print('Resuts saved to {}'.format(args.save_path))

    # define loss function (criterion) and optimizer
    criterion = CrossEntropyLoss()

    model, criterion = model.to(device=device, dtype=dtype), criterion.to(device=device, dtype=dtype)
    if args.dtype == torch.float16:
        for module in model.modules():  # FP batchnorm
            if is_bn(module):
                module.to(dtype=torch.float32)  # github.com/pytorch/pytorch/issues/20634

    if args.distributed:
        args.device_ids = [args.local_rank]
        dist.init_process_group(backend=args.dist_backend, init_method=args.dist_init, world_size=args.world_size,
                                rank=args.local_rank)
        if args.sync_bn:
            model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model)
        model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.local_rank],
                                                          output_device=args.local_rank)
        print('Node #{}'.format(args.local_rank))
    else:
        model = torch.nn.parallel.DataParallel(model, device_ids=[args.local_rank], output_device=args.local_rank)

    if args.find_clr:
        optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum,
                                    weight_decay=args.decay, nesterov=True)
        find_bounds_clr(model, train_loader, optimizer, criterion, device, dtype, min_lr=args.min_lr,
                        max_lr=args.max_lr, step_size=args.epochs_per_step * len(train_loader), mode=args.mode,
                        save_path=args.save_path)
        return

    best_test = 0

    # optionally resume from a checkpoint
    data = None
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume, map_location=device)
            args.start_epoch = checkpoint['epoch']
            args.start_step = len(train_loader) * args.start_epoch
            optim, mixup = init_optimizer_and_mixup(args, train_loader, model, checkpoint['optimizer'])
            best_test = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            print("=> loaded checkpoint '{}' (epoch {})".format(args.resume, checkpoint['epoch']))
        elif os.path.isdir(args.resume):
            checkpoint_path = os.path.join(args.resume, 'checkpoint{}.pth.tar'.format(args.local_rank))
            csv_path = os.path.join(args.resume, 'results{}.csv'.format(args.local_rank))
            print("=> loading checkpoint '{}'".format(checkpoint_path))
            checkpoint = torch.load(checkpoint_path, map_location=device)
            args.start_epoch = checkpoint['epoch']
            args.start_step = len(train_loader) * args.start_epoch
            optim, mixup = init_optimizer_and_mixup(args, train_loader, model, checkpoint['optimizer'])
            best_test = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            print("=> loaded checkpoint '{}' (epoch {})".format(checkpoint_path, checkpoint['epoch']))
            data = []
            with open(csv_path) as csvfile:
                reader = csv.DictReader(csvfile)
                for row in reader:
                    data.append(row)
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))
    else:
        optim, mixup = init_optimizer_and_mixup(args, train_loader, model)

    if args.evaluate:
        if args.swa:
            sd = swa_clr(args.swa, device)
            model.load_state_dict(sd)
        loss, top1, top5 = test(model, val_loader, criterion, device, dtype, args.child)  # TODO
        return

    csv_logger = CsvLogger(filepath=args.save_path, data=data, local_rank=args.local_rank)
    csv_logger.save_params(sys.argv, args)

    claimed_acc1 = None
    claimed_acc5 = None
    ntype = 'small' if args.small else 'large'
    if ntype in claimed_acc_top1:
        if args.input_size in claimed_acc_top1[ntype]:
            if args.scaling in claimed_acc_top1[ntype][args.input_size]:
                claimed_acc1 = claimed_acc_top1[ntype][args.input_size][args.scaling]
                if not args.child:
                    csv_logger.write_text('Claimed accuracy is {:.2f}% top-1'.format(claimed_acc1 * 100.))
    train_network(args.start_epoch, args.epochs, optim, model, train_loader, val_loader, criterion, mixup,
                  device, dtype, args.batch_size, args.log_interval, csv_logger, args.save_path, claimed_acc1,
                  claimed_acc5, best_test, args.local_rank, args.child)
Exemplo n.º 7
0
def main():
    args = parser.parse_args()

    if args.seed is None:
        args.seed = random.randint(1, 10000)
    print("Random Seed: ", args.seed)
    random.seed(args.seed)
    torch.manual_seed(args.seed)
    if args.gpus:
        torch.cuda.manual_seed_all(args.seed)

    time_stamp = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
    if args.evaluate:
        args.results_dir = '/tmp'
    if args.save is '':
        args.save = 'pretrain_aug_' + time_stamp
    save_path = os.path.join(args.results_dir, args.save)
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    if args.gpus is not None:
        args.gpus = [int(i) for i in args.gpus.split(',')]
        device = 'cuda:' + str(args.gpus[0])
        cudnn.benchmark = True
    else:
        device = 'cpu'

    if args.type == 'float64':
        dtype = torch.float64
    elif args.type == 'float32':
        dtype = torch.float32
    elif args.type == 'float16':
        dtype = torch.float16
    else:
        raise ValueError('Wrong type!')  # TODO int8

    model = STN_MobileNet2(input_size=args.input_size, scale=args.scaling, shearing=args.shearing)
    # print(model.stnmod.fc_loc[0].bias.data)
    num_parameters = sum([l.nelement() for l in model.parameters()])
    print(model)
    print('number of parameters: {}'.format(num_parameters))
    print('FLOPs: {}'.format(
        flops_benchmark.count_flops(STN_MobileNet2,
                                    args.batch_size // len(args.gpus) if args.gpus is not None else args.batch_size,
                                    device, dtype, args.input_size, 3, args.scaling)))

    train_loader, val_loader, test_loader = get_loaders(args.dataroot, args.batch_size, args.batch_size,
                                                        args.input_size,
                                                        args.workers, args.b_weights)
    # define loss function (criterion) and optimizer
    criterion = torch.nn.CrossEntropyLoss()
    if args.gpus is not None:
        model = torch.nn.DataParallel(model, args.gpus)
    model.to(device=device, dtype=dtype)

    criterion.to(device=device, dtype=dtype)

    optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.decay,
                                nesterov=True)
    if args.find_clr:
        find_bounds_clr(model, train_loader, optimizer, criterion, device, dtype, min_lr=args.min_lr,
                        max_lr=args.max_lr, step_size=args.epochs_per_step * len(train_loader), mode=args.mode,
                        save_path=save_path)
        return

    if args.clr:
        print('Use CLR')
        scheduler = CyclicLR(optimizer, base_lr=args.min_lr, max_lr=args.max_lr,
                             step_size=args.epochs_per_step * len(train_loader), mode=args.mode)
    else:
        print('Use scheduler')
        scheduler = MultiStepLR(optimizer, milestones=args.schedule, gamma=args.gamma)

    best_val = 500

    # optionally resume from a checkpoint
    data = None
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume, map_location=device)
            # args.start_epoch = checkpoint['epoch'] - 1
            # best_val = checkpoint['best_prec1']
            # best_test = checkpoint['best_prec1']
            args.start_epoch = 0
            best_val = 500
            state_dict = checkpoint['state_dict']

            # if weights from imagenet

            new_state_dict = OrderedDict()
            for k, v in state_dict.items():
                #     print(k, v.size())
                name = k
                if k == 'module.fc.bias':
                    new_state_dict[name] = torch.zeros(101)
                    continue
                elif k == 'module.fc.weight':
                    new_state_dict[name] = torch.ones(101, 1280)
                    continue
                else:
                    print('else:', name)
                    new_state_dict[name] = v

            model.load_state_dict(new_state_dict, strict=False)
            # optimizer.load_state_dict(checkpoint['optimizer'], strict=False)
            print("=> loaded checkpoint '{}' (epoch {})"
                  .format(args.resume, checkpoint['epoch']))
        elif os.path.isdir(args.resume):
            checkpoint_path = os.path.join(args.resume, 'checkpoint.pth.tar')
            csv_path = os.path.join(args.resume, 'results.csv')
            print("=> loading checkpoint '{}'".format(checkpoint_path))
            checkpoint = torch.load(checkpoint_path, map_location=device)
            args.start_epoch = checkpoint['epoch'] - 1
            best_val = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})".format(checkpoint_path, checkpoint['epoch']))
            data = []
            with open(csv_path) as csvfile:
                reader = csv.DictReader(csvfile)
                for row in reader:
                    data.append(row)
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    if args.evaluate:
        loss, test_mae = test(model, test_loader, criterion, device, dtype)  # TODO
        return

    csv_logger = CsvLogger(filepath=save_path, data=data)
    csv_logger.save_params(sys.argv, args)

    # claimed_acc1 = None
    # claimed_acc5 = None
    # if args.input_size in claimed_acc_top1:
    #     if args.scaling in claimed_acc_top1[args.input_size]:
    #         claimed_acc1 = claimed_acc_top1[args.input_size][args.scaling]
    #         claimed_acc5 = claimed_acc_top5[args.input_size][args.scaling]
    #         csv_logger.write_text(
    #             'Claimed accuracies are: {:.2f}% top-1, {:.2f}% top-5'.format(claimed_acc1 * 100., claimed_acc5 * 100.))

    train_network(args.start_epoch, args.epochs, scheduler, model, train_loader, val_loader, test_loader, optimizer,
                  criterion,
                  device, dtype, args.batch_size, args.log_interval, csv_logger, save_path, best_val)
Exemplo n.º 8
0
def main():
	
    seed = random.randint(1, 10000)
    random.seed(seed)
    torch.manual_seed(seed)
    
    torch.cuda.manual_seed_all(seed)

    time_stamp = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')

    results_dir = '/tmp'
    save = time_stamp
    save_path = os.path.join(results_dir, save)

    if not os.path.exists(save_path):
        os.makedirs(save_path)

    gpus = 2#[int(i) for i in gpus.split(',')]
    device = 'cuda:0' #+ str(args.gpus[0])
    cudnn.benchmark = True
    dtype = torch.float64

    input_size = 224
    scaling = 1.0
    batch_size = 20
    workers = 4
    learning_rate = 0.02
    momentum = 0.9
    decay = 0.00004
    max_lr = 1
    min_lr = 0.00001
    start_epoch = 0
    epochs = 400
    epochs_per_step = 20
    log_interval = 100
    mode = 'triangular2'
    evaluate = 'false'
    dataroot = "data"


    model = MobileNet2(input_size=input_size, scale=scaling)
    num_parameters = sum([l.nelement() for l in model.parameters()])
    #print(model)


    """print('number of parameters: {}'.format(num_parameters))
    print('FLOPs: {}'.format(
        flops_benchmark.count_flops(MobileNet2,
                                    batch_size // len(gpus) if gpus is not None else batch_size,
                                    device, dtype, input_size, 3, scaling)))"""

    train_loader, val_loader = get_loaders(dataroot, batch_size, batch_size, input_size, workers)

    # define loss function (criterion) and optimizer
    criterion = torch.nn.CrossEntropyLoss()
    model = torch.nn.DataParallel(model)
        
    model.to(device=device, dtype=dtype)
    criterion.to(device=device, dtype=dtype)

    optimizer = torch.optim.SGD(model.parameters(), learning_rate, momentum=momentum, weight_decay=decay,
                                nesterov=True)
    find_bounds_clr(model, train_loader, optimizer, criterion, device, dtype, min_lr=min_lr,
                        max_lr=max_lr, step_size=epochs_per_step * len(train_loader), mode=mode,
                        save_path=save_path)
    scheduler = CyclicLR(optimizer, base_lr=min_lr, max_lr=max_lr,
                             step_size=epochs_per_step * len(train_loader), mode=mode)
    
    best_test = 0

 
    if evaluate == 'true':
        loss, top1, top5 = test(model, val_loader, criterion, device, dtype)  # TODO
        return

    data = []

    csv_logger = CsvLogger(filepath=save_path, data=data)
    #csv_logger.save_params(sys.argv, args)

    claimed_acc1 = None
    claimed_acc5 = None
    if input_size in claimed_acc_top1:
        if scaling in claimed_acc_top1[input_size]:
            claimed_acc1 = claimed_acc_top1[input_size][scaling]
            claimed_acc5 = claimed_acc_top5[input_size][scaling]
            csv_logger.write_text(
                'Claimed accuracies are: {:.2f}% top-1, {:.2f}% top-5'.format(claimed_acc1 * 100., claimed_acc5 * 100.))
            
            
    train_network(start_epoch, epochs, scheduler, model, train_loader, val_loader, optimizer, criterion,
                  device, dtype, batch_size, log_interval, csv_logger, './data', claimed_acc1, claimed_acc5,
                  best_test)
    
    return 1