Пример #1
0
    def __init__(self, args):
        super(Dataloader, self).__init__()
        self.args = args

        self.dataset_test_name = args.dataset_test
        self.dataset_train_name = args.dataset_train
        self.dataroot = args.dataroot
        self.batch_size = args.batch_size

        if self.dataset_train_name == "CELEBA":
            self.dataset_train, self.dataset_train_len = datasets.ImageFolder(
                root=self.dataroot + "/train")

        elif self.dataset_train_name == "MNIST":
            self.dataset_train, self.dataset_train_len = datasets.MNIST(
                self.dataroot).train()

        else:
            raise (Exception("Unknown Dataset"))

        if self.dataset_test_name == "CELEBA":
            self.dataset_test, self.dataset_test_len = datasets.ImageFolder(
                root=self.dataroot + "/test")

        elif self.dataset_test_name == "MNIST":
            self.dataset_test, self.dataset_test_len = datasets.MNIST(
                self.dataroot).test()

        else:
            raise (Exception("Unknown Dataset"))
def fetch_dataset(data_name, subset):
    dataset = {}
    print('fetching data {}...'.format(data_name))
    root = './data/{}'.format(data_name)
    if data_name == 'MNIST':
        dataset['train'] = datasets.MNIST(root=root,
                                          split='train',
                                          subset=subset,
                                          transform=datasets.Compose([
                                              transforms.ToTensor(),
                                              transforms.Normalize((0.1307, ),
                                                                   (0.3081, ))
                                          ]))
        dataset['test'] = datasets.MNIST(root=root,
                                         split='test',
                                         subset=subset,
                                         transform=datasets.Compose([
                                             transforms.ToTensor(),
                                             transforms.Normalize((0.1307, ),
                                                                  (0.3081, ))
                                         ]))
    elif data_name == 'CIFAR10':
        dataset['train'] = datasets.CIFAR10(
            root=root,
            split='train',
            subset=subset,
            transform=datasets.Compose([
                transforms.RandomCrop(32, padding=4),
                transforms.RandomHorizontalFlip(),
                transforms.ToTensor(),
                transforms.Normalize((0.4914, 0.4822, 0.4465),
                                     (0.2023, 0.1994, 0.2010))
            ]))
        dataset['test'] = datasets.CIFAR10(root=root,
                                           split='test',
                                           subset=subset,
                                           transform=datasets.Compose([
                                               transforms.ToTensor(),
                                               transforms.Normalize(
                                                   (0.4914, 0.4822, 0.4465),
                                                   (0.2023, 0.1994, 0.2010))
                                           ]))
    elif data_name in ['PennTreebank', 'WikiText2', 'WikiText103']:
        dataset['train'] = eval(
            'datasets.{}(root=root, split=\'train\')'.format(data_name))
        dataset['test'] = eval(
            'datasets.{}(root=root, split=\'test\')'.format(data_name))
    else:
        raise ValueError('Not valid dataset name')
    print('data ready')
    return dataset
Пример #3
0
def test_module_import():
    import datasets

    m = datasets.MNIST()
    # m2 = torchvision.datasets.MNIST()
    # assert len(m) == len(m2)
    assert len(m) == 60000
Пример #4
0
def _get_maf_original(data_name):
    warnings.warn(
        "This function should generally not be called because it "
        "requires special setup but is kept here in order to reproduce functions if "
        "needed.")
    if sys.version_info < (3, ):
        # Load MNIST from MAF code
        maf_path = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                                "..", "..", "maf")
        sys.path.append(maf_path)
        # noinspection PyPackageRequirements
        import datasets  # maf/datasets/*

        # Reset datasets root directory relative to this file
        datasets.root = os.path.join(maf_path, "data") + "/"

        # Copied from maf/experiments.py
        if data_name == "mnist":
            data = datasets.MNIST(logit=True, dequantize=True)
        elif data_name == "bsds300":
            data = datasets.BSDS300()
        elif data_name == "cifar10":
            data = datasets.CIFAR10(logit=True, flip=True, dequantize=True)
        elif data_name == "power":
            data = datasets.POWER()
        elif data_name == "gas":
            data = datasets.GAS()
        elif data_name == "hepmass":
            data = datasets.HEPMASS()
        elif data_name == "miniboone":
            data = datasets.MINIBOONE()
        else:
            raise ValueError("Unknown dataset")

        # Make a dictionary instead of pickled object for better compatibility
        if hasattr(data.trn, "labels"):
            data_dict = dict(
                X_train=data.trn.x,
                y_train=data.trn.labels,
                X_validation=data.val.x,
                y_validation=data.val.labels,
                X_test=data.tst.x,
                y_test=data.tst.labels,
                data_name=data_name,
            )
        else:
            data_dict = dict(
                X_train=data.trn.x,
                X_validation=data.val.x,
                X_test=data.tst.x,
                data_name=data_name,
            )
    else:
        raise RuntimeError(
            "Must create data using Python 2 to load data since MAF is written for "
            "Python 2")
    return data_dict
Пример #5
0
def _get_maf_original(data_name):
    warnings.warn(
        'This function should generally not be called because it '
        'requires special setup but is kept here in order to reproduce functions if '
        'needed.')
    if sys.version_info < (3, ):
        # Load MNIST from MAF code
        maf_path = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                                '..', '..', 'maf')
        sys.path.append(maf_path)
        # noinspection PyPackageRequirements
        import datasets  # maf/datasets/*

        # Reset datasets root directory relative to this file
        datasets.root = os.path.join(maf_path, 'data') + '/'

        # Copied from maf/experiments.py
        if data_name == 'mnist':
            data = datasets.MNIST(logit=True, dequantize=True)
        elif data_name == 'bsds300':
            data = datasets.BSDS300()
        elif data_name == 'cifar10':
            data = datasets.CIFAR10(logit=True, flip=True, dequantize=True)
        elif data_name == 'power':
            data = datasets.POWER()
        elif data_name == 'gas':
            data = datasets.GAS()
        elif data_name == 'hepmass':
            data = datasets.HEPMASS()
        elif data_name == 'miniboone':
            data = datasets.MINIBOONE()
        else:
            raise ValueError('Unknown dataset')

        # Make a dictionary instead of pickled object for better compatibility
        if hasattr(data.trn, 'labels'):
            data_dict = dict(
                X_train=data.trn.x,
                y_train=data.trn.labels,
                X_validation=data.val.x,
                y_validation=data.val.labels,
                X_test=data.tst.x,
                y_test=data.tst.labels,
                data_name=data_name,
            )
        else:
            data_dict = dict(
                X_train=data.trn.x,
                X_validation=data.val.x,
                X_test=data.tst.x,
                data_name=data_name,
            )
    else:
        raise RuntimeError(
            'Must create data using Python 2 to load data since MAF is written for '
            'Python 2')
    return data_dict
Пример #6
0
 def __init__(self):
     self.config = Config()
     self.gstep = tf.Variable(0,
                              dtype=tf.int32,
                              trainable=False,
                              name='global_step')
     self.num_epochs = self.config.num_epochs
     self.batch_size = self.config.batch_size
     self.isTraining = self.config.isTraining
     self.isVisualize = self.config.isVisualize
     self.isAnimate = self.config.isAnimate
     self.dataset = datasets.MNIST(self.config)
Пример #7
0
def load_data(name):
    """
    Loads the dataset. Has to be called before anything else.
    :param name: string, the dataset's name
    """

    assert isinstance(name, str), 'Name must be a string'
    datasets.root = root_data
    global data, data_name

    if data_name == name:
        return

    if name == 'mnist':
        data = datasets.MNIST(logit=True, dequantize=True)
        data_name = name

    elif name == 'bsds300':
        data = datasets.BSDS300()
        data_name = name

    elif name == 'cifar10':
        data = datasets.CIFAR10(logit=True, flip=True, dequantize=True)
        data_name = name

    elif name == 'power':
        data = POWER()
        data_name = name

    elif name == 'gas':
        data = datasets.GAS()
        data_name = name

    elif name == 'hepmass':
        data = datasets.HEPMASS()
        data_name = name

    elif name == 'miniboone':
        data = datasets.MINIBOONE()
        data_name = name

    else:
        raise ValueError('Unknown dataset')
Пример #8
0
def load_data(name,logit=False,dequantize=False,flip = False):
    """
    Loads the dataset. Has to be called before anything else.
    :param name: string, the dataset's name
    """
    
    assert isinstance(name, str), 'Name must be a string'
    # global data
    
    
    if name == 'mnist':
        data = datasets.MNIST(logit=logit, dequantize=dequantize)
    elif name == 'bsds300':
        data = datasets.BSDS300()
    elif name == 'cifar10':
        data = datasets.CIFAR10(logit=logit, flip=flip, dequantize=dequantize)
    elif name == 'power':
        data = datasets.POWER()
    elif name == 'gas':
        data = datasets.GAS()
    elif name == 'hepmass':
        data = datasets.HEPMASS()
    elif name == 'miniboone':
        data = datasets.MINIBOONE()
    else:
        raise Exception('Unknown dataset')

    # get data splits
    X_train = data.trn.x
    X_val = data.val.x
    X_test = data.tst.x
    
    # Convert to float32
    X_train = X_train.astype(np.float32)
    X_val = X_val.astype(np.float32)
    X_test = X_test.astype(np.float32)
    
    return data, X_train, X_val, X_test
Пример #9
0
elif arguments.descriptor == 'BRISK':
    globals.descriptor = features.BRISK()

elif arguments.descriptor == 'AKAZE':
    globals.descriptor = features.AKAZE()

elif arguments.descriptor == 'FREAK':
    globals.descriptor = features.FREAK()

# Print descriptor
features.printDescriptor()

# Path of Dataset
if arguments.dataset == 'MNIST':
    globals.data_train_images, globals.data_test_images = datasets.MNIST()

elif arguments.dataset == 'JAFFE':
    globals.data_train_images, globals.data_test_images = datasets.JAFFE()

elif arguments.dataset == 'Extended-CK+':
    globals.data_train_images, globals.data_test_images = datasets.extendedCK()

elif arguments.dataset == 'FEI':
    globals.data_train_images, globals.data_test_images = datasets.FEI()

elif arguments.dataset == 'CIFAR-10':
    globals.data_train_images, globals.data_test_images = datasets.CIFAR10()

elif arguments.dataset == 'FER-2013':
    globals.data_train_images, globals.data_test_images = datasets.FER2013()
Пример #10
0
import torchvision
import datasets

batch_size = 4

if (__name__ == '__main__'):
    parser = argparse.ArgumentParser(description='Taming VAEs experiments')
    parser.add_argument('--data',
                        dest='dataset',
                        default=None,
                        help='Dataset to be used')
    args = parser.parse_args()

    if (args.dataset.lower() == 'mnist'):
        data_set = datasets.MNIST('./data/mnist/',
                                  download=True,
                                  transform=torchvision.transforms.ToTensor())
    elif (args.dataset.lower() == 'cifar10'):
        data_set = datasets.CIFAR10(
            './data/cifar10/',
            download=True,
            transform=torchvision.transforms.ToTensor())
    elif (args.dataset.lower() == 'celeba'):
        data_set = datasets.CELEBA('./data/celeba/',
                                   transform=torchvision.transforms.ToTensor())

    loader = torch.utils.data.DataLoader(data_set,
                                         batch_size=batch_size,
                                         shuffle=True,
                                         drop_last=True)
    for batch in loader:
Пример #11
0
def main(args):
    args.update_exclusive(default_args)
    directory = os.path.join(config.RESULTSDIR, 'ws', args.directory)
    if not os.path.exists(directory):
        os.makedirs(directory)
    if not args.restore:
        map(os.remove, (os.path.join(directory, f)
                        for f in os.listdir(directory)
                        if f.endswith(".txt") or f.endswith(".png")))
        with open(os.path.join(directory, 'params.txt'), 'w') as f:
            f.write(repr(args))
    print directory

    dataset = datasets.MNIST(binary=True)

    x = tf.placeholder(np.float32, shape=(None, dataset.get_data_dim()))

    model = Model(x,
                  args.latent_units,
                  q_units=args.q_units,
                  p_units=args.p_units,
                  sleep=args.sleep_type,
                  batch_norm=args.bn)
    examples_per_epoch = dataset.data['train'][0].shape[0]
    num_updates = args.n_epochs * examples_per_epoch / args.mb_size

    step = tf.Variable(0, trainable=False)
    lr = tf.placeholder(tf.float32)
    train_op = args.optimizer(lr).minimize(model.loss, global_step=step)
    with tf.control_dependencies([train_op]):
        # linearly anneal alpha from 0 to 1 over the course of N/2 epochs, then train for additional N/2 epochs with alpha=1
        train_op = tf.assign(
            model.alpha,
            tf.minimum(
                1., tf.maximum(0.,
                               tf.cast(step, tf.float32) / num_updates * 2)))

    init_op = tf.initialize_all_variables()

    saver = tf.train.Saver(max_to_keep=1)

    with tf.Session() as sess:
        if not args.restore:
            sess.run(init_op)
        else:
            saver.restore(sess,
                          tf.train.latest_checkpoint(os.path.join(directory)))
            print "restored"
            measure_test_log_likelihood(sess, model, dataset, directory)
            import ipdb
            ipdb.set_trace()
        for x_np, _ in dataset.random_minibatches('train', args.mb_size,
                                                  num_updates):
            i, _ = sess.run([step, train_op], feed_dict={x: x_np, lr: args.lr})
            if i % 1000 == 1 or i == num_updates - 1:
                visualize(sess, model, dataset, directory,
                          float(i) * args.mb_size / examples_per_epoch)
            if i % 10000 == 1 or i == num_updates - 1:
                saver.save(sess,
                           os.path.join(directory, 'model.chk'),
                           global_step=step)
            if i % 10000 == 1:
                print directory
def main(cf):
    print(
        f"\nStarting divisize normalization experiment {cf.logdir}: --seed {cf.seed} --device {utils.DEVICE}"
    )
    pprint.pprint(cf)
    os.makedirs(cf.logdir, exist_ok=True)
    utils.seed(cf.seed)
    utils.save_json({k: str(v)
                     for (k, v) in cf.items()}, cf.logdir + "config.json")

    train_dataset = datasets.MNIST(train=True,
                                   scale=cf.label_scale,
                                   size=cf.train_size,
                                   normalize=cf.normalize)
    test_dataset = datasets.MNIST(train=False,
                                  scale=cf.label_scale,
                                  size=cf.test_size,
                                  normalize=cf.normalize)
    train_loader = datasets.get_dataloader(train_dataset, cf.batch_size)
    test_loader = datasets.get_dataloader(test_dataset, cf.batch_size)
    print(
        f"Loaded data [train batches: {len(train_loader)} test batches: {len(test_loader)}]"
    )

    model = PCModel(nodes=cf.nodes,
                    mu_dt=cf.mu_dt,
                    act_fn=cf.act_fn,
                    use_bias=cf.use_bias,
                    kaiming_init=cf.kaiming_init,
                    pe_fn=cf.pe_fn,
                    pe_fn_inverse=cf.pe_fn_inverse)
    optimizer = optim.get_optim(
        model.params,
        cf.optim,
        cf.lr,
        batch_scale=cf.batch_scale,
        grad_clip=cf.grad_clip,
        weight_decay=cf.weight_decay,
    )

    with torch.no_grad():
        metrics = {"acc": []}
        for epoch in range(1, cf.n_epochs + 1):

            print(f"\nTrain @ epoch {epoch} ({len(train_loader)} batches)")
            for batch_id, (img_batch, label_batch) in enumerate(train_loader):
                model.train_batch_supervised(img_batch,
                                             label_batch,
                                             cf.n_train_iters,
                                             fixed_preds=cf.fixed_preds_train)
                optimizer.step(
                    curr_epoch=epoch,
                    curr_batch=batch_id,
                    n_batches=len(train_loader),
                    batch_size=img_batch.size(0),
                )

            if epoch % cf.test_every == 0:
                acc = 0
                for _, (img_batch, label_batch) in enumerate(test_loader):
                    label_preds = model.test_batch_supervised(img_batch)
                    acc += datasets.accuracy(label_preds, label_batch)
                metrics["acc"].append(acc / len(test_loader))
                print("\nTest @ epoch {} / Accuracy: {:.4f}".format(
                    epoch, acc / len(test_loader)))

            utils.save_json(metrics, cf.logdir + "metrics.json")
Пример #13
0
if Args.cuda:
    print('Using GPU.')
# %% DataSet ID
datasets_id = {1: 'MNIST'}
try:
    datasets_name = datasets_id[Args.dataset]
except:
    print(
        'Because of a wrong dataset. You will using the default MNIST dataset.'
    )
    datasets_name = 'MNIST'
# %% Main Function
if __name__ == '__main__':
    if datasets_name == 'MNIST':
        print('Using the MNIST dataset.')
        data_train, data_test = datasets.MNIST(True)
        data_loader_train = torch.utils.data.DataLoader(
            dataset=data_train, batch_size=Args.batch_size, shuffle=True)
        data_loader_test = torch.utils.data.DataLoader(
            dataset=data_test, batch_size=Args.batch_size, shuffle=True)

        cnn = models.CNN_MNIST1().cuda() if Args.cuda else models.CNN_MNIST1()
        loss_func = torch.nn.CrossEntropyLoss()
        optimizer = torch.optim.Adam(cnn.parameters(), lr=Args.learn_rate)
        if Args.show:
            plt.ion()
            figure = plt.figure(1)
            Accuracy = []
        print('==>Start Training.')
        for epoch in range(Args.epochs):
            for step, (x, y) in enumerate(data_loader_train):
Пример #14
0
def get_data(args, train_flag=True, transform=None):
    if train_flag:
        # p = 0.5  # Invert 50% only randomly
        p = 1.0  # Invert all
    else:
        p = -1.0  # disable random transformations for testset

    if not transform:
        # if args.nc == 1:
        #     transform = transforms.Compose([
        #         transforms.Resize(args.image_size),
        #         transforms.Grayscale(),
        #         transforms.CenterCrop(args.image_size),
        #         transforms.ToTensor(),
        #         transforms.Normalize(
        #             (0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
        #     ])
        # else:
        if args.dataset in {'usps', 'mnist'} and args.exp == 'mnist_svhn':
            transform = transforms.Compose([
                transforms.Grayscale(num_output_channels=args.nc),
                transforms.ColorJitter(brightness=0.4,
                                       contrast=0.4,
                                       saturation=0.4),
                transforms.Resize(args.image_size),
                transforms.CenterCrop(args.image_size),
                transforms.ToTensor(),
                Normalize_RandomInvert_pixels(p=p, nc=args.nc),
                RandomClampTensors(min_margin=0, max_margin=0.3),
            ])
        elif args.dataset in {'usps', 'mnist'} and args.exp == 'svhn_mnist':
            transform = transforms.Compose([
                transforms.Grayscale(num_output_channels=args.nc),
                # transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4),
                transforms.Resize(args.image_size),
                transforms.CenterCrop(args.image_size),
                transforms.ToTensor(),
                Normalize_RandomInvert_pixels(p=-1, nc=args.nc),
                # RandomClampTensors(min_margin=0, max_margin=0.0),
            ])
        elif args.dataset in {'usps', 'mnist'}:
            transform = transforms.Compose([
                transforms.Grayscale(num_output_channels=args.nc),
                # transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4),
                transforms.Resize(args.image_size),
                transforms.CenterCrop(args.image_size),
                transforms.ToTensor(),
                Normalize_RandomInvert_pixels(p=-1, nc=args.nc),
            ])
        elif args.dataset in {'svhn', 'syndigits'
                              } and args.exp == 'mnist_svhn':
            if args.nc == 1:
                transform = transforms.Compose([
                    transforms.Resize(args.image_size),
                    transforms.Grayscale(num_output_channels=args.nc),
                    transforms.CenterCrop(args.image_size),
                    transforms.ToTensor(),
                    Normalize_RandomInvert_pixels(p=-1, nc=args.nc),
                    # RandomClampTensors(min_margin=0, max_margin=0.3),
                ])
            else:
                transform = transforms.Compose([
                    transforms.Grayscale(num_output_channels=args.nc),
                    transforms.Resize(args.image_size),
                    transforms.CenterCrop(args.image_size),
                    # transforms.RandomResizedCrop(args.image_size),
                    transforms.ToTensor(),
                    Normalize_RandomInvert_pixels(p=-1, nc=args.nc),
                ])
        elif args.dataset in {'svhn', 'syndigits'
                              } and args.exp == 'svhn_mnist':
            if args.nc == 1:
                transform = transforms.Compose([
                    transforms.Resize(args.image_size),
                    transforms.Grayscale(num_output_channels=args.nc),
                    transforms.CenterCrop(args.image_size),
                    transforms.ToTensor(),
                    Normalize_RandomInvert_pixels(p=-1, nc=args.nc),
                    # RandomClampTensors(min_margin=0, max_margin=0.2),
                ])
            else:
                transform = transforms.Compose([
                    transforms.Grayscale(num_output_channels=args.nc),
                    transforms.Resize(args.image_size),
                    transforms.CenterCrop(args.image_size),
                    # transforms.RandomResizedCrop(args.image_size),
                    transforms.ToTensor(),
                    Normalize_RandomInvert_pixels(p=-1, nc=args.nc),
                ])
        elif args.dataset in {'svhn', 'syndigits'}:
            if args.nc == 1:
                transform = transforms.Compose([
                    transforms.Resize(args.image_size),
                    transforms.Grayscale(num_output_channels=args.nc),
                    transforms.CenterCrop(args.image_size),
                    transforms.ToTensor(),
                    Normalize_RandomInvert_pixels(p=-1, nc=args.nc),
                    # RandomClampTensors(min_margin=0, max_margin=0.2),
                ])
            else:
                transform = transforms.Compose([
                    # transforms.Grayscale(num_output_channels=args.nc),
                    transforms.Resize(args.image_size),
                    transforms.CenterCrop(args.image_size),
                    # transforms.RandomResizedCrop(args.image_size),
                    transforms.ToTensor(),
                    Normalize_RandomInvert_pixels(p=-1, nc=args.nc),
                ])
        elif args.dataset in {'cifar9', 'stl9'}:
            transform = transforms.Compose([
                transforms.Resize(args.image_size),
                transforms.CenterCrop(args.image_size),
                # transforms.RandomHorizontalFlip(p=p),
                transforms.ToTensor(),
                transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
            ])
        elif args.dataset in {'amazon', 'dslr', 'webcam'}:
            transform = transforms.Compose([
                transforms.RandomResizedCrop(args.image_size),
                transforms.RandomHorizontalFlip(),
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225]),
            ])
        else:
            transform = transforms.Compose([
                transforms.Resize(args.image_size),
                transforms.CenterCrop(args.image_size),
                transforms.ToTensor(),
                transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
            ])

    if args.dataset == 'usps':
        train_dataset = mydset.USPS(root=args.dataroot,
                                    download=False,
                                    train=True,
                                    transform=transform)
        test_dataset = mydset.USPS(root=args.dataroot,
                                   download=False,
                                   train=False,
                                   transform=transform)

    elif args.dataset == 'mnist':
        train_dataset = mydset.MNIST(root=args.dataroot,
                                     download=False,
                                     train=True,
                                     transform=transform)
        test_dataset = mydset.MNIST(root=args.dataroot,
                                    download=False,
                                    train=False,
                                    transform=transform)

    elif args.dataset == 'svhn':

        train_dataset = mydset.SVHN(root=args.dataroot,
                                    download=False,
                                    train=True,
                                    transform=transform)
        test_dataset = mydset.SVHN(root=args.dataroot,
                                   download=False,
                                   train=False,
                                   transform=transform)

    elif args.dataset == 'syndigits':
        train_dataset = mydset.SYNDIGITS(root=args.dataroot,
                                         download=False,
                                         train=True,
                                         transform=transform)
        test_dataset = mydset.SYNDIGITS(root=args.dataroot,
                                        download=False,
                                        train=False,
                                        transform=transform)

    elif args.dataset == 'cifar9':
        train_dataset = mydset.CIFAR9(root=args.dataroot,
                                      download=False,
                                      train=True,
                                      transform=transform)
        test_dataset = mydset.CIFAR9(root=args.dataroot,
                                     download=False,
                                     train=False,
                                     transform=transform)

    elif args.dataset == 'stl9':
        train_dataset = mydset.STL9(root=args.dataroot,
                                    download=False,
                                    train=True,
                                    transform=transform)
        test_dataset = mydset.STL9(root=args.dataroot,
                                   download=False,
                                   train=False,
                                   transform=transform)

    elif args.dataset == 'gtsrb':
        train_dataset = mydset.GTSRB(root=args.dataroot,
                                     train=True,
                                     transform=transform)
        test_dataset = mydset.GTSRB(root=args.dataroot,
                                    train=False,
                                    transform=transform)

    elif args.dataset == 'amazon':
        # dataset = dset.ImageFolder(root=root_path + dir, transform=transform_dict[phase])
        # train_size = int(0.8 * len(data))
        # test_size = len(data) - train_size
        # data_train, data_val = torch.utils.data.random_split(data, [train_size, test_size])
        dataset_path = os.path.join(args.dataroot, 'office', 'amazon',
                                    'images')
        train_dataset = mydset.OFFICE(root=dataset_path,
                                      train=True,
                                      transform=transform)
        test_dataset = mydset.OFFICE(root=dataset_path,
                                     train=False,
                                     transform=transform)

    elif args.dataset == 'dslr':
        dataset_path = os.path.join(args.dataroot, 'office', 'dslr', 'images')
        train_dataset = mydset.OFFICE(root=dataset_path,
                                      train=True,
                                      transform=transform)
        test_dataset = mydset.OFFICE(root=dataset_path,
                                     train=False,
                                     transform=transform)

    elif args.dataset == 'webcam':
        dataset_path = os.path.join(args.dataroot, 'office', 'webcam',
                                    'images')
        train_dataset = mydset.OFFICE(root=dataset_path,
                                      train=True,
                                      transform=transform)
        test_dataset = mydset.OFFICE(root=dataset_path,
                                     train=False,
                                     transform=transform)

    elif args.dataset == 'synsigns':
        train_dataset = mydset.SYNSIGNS(root=args.dataroot,
                                        train=True,
                                        transform=transform)
        test_dataset = mydset.SYNSIGNS(root=args.dataroot,
                                       train=False,
                                       transform=transform)

    # elif args.dataset == 'celeba':
    #     imdir = 'train' if train_flag else 'val'
    #     dataroot = os.path.join(args.dataroot, imdir)
    #     if args.image_size != 64:
    #         raise ValueError('the image size for CelebA args.dataset need to be 64!')
    #
    #     args.dataset = FolderWithImages(root=dataroot,
    #                                input_transform=transforms.Compose([
    #                                    ALICropAndScale(),
    #                                    transforms.ToTensor(),
    #                                    transforms.Normalize(
    #                                        (0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
    #                                ]),
    #                                target_transform=transforms.ToTensor()
    #                                )
    #
    # elif args.dataset in ['imagenet', 'folder', 'lfw']:
    #     dataset = dset.ImageFolder(root=args.dataroot,
    #                                transform=transform)
    #
    # elif args.dataset == 'lsun':
    #     dataset = dset.LSUN(db_path=args.dataroot,
    #                         classes=['bedroom_train'],
    #                         transform=transform)
    #
    # elif args.dataset == 'cifar10':
    #     dataset = dset.CIFAR10(root=args.dataroot,
    #                            download=False,
    #                            train=train_flag,
    #                            transform=transform)
    #
    # elif args.dataset == 'cifar100':
    #     dataset = dset.CIFAR100(root=args.dataroot,
    #                             download=False,
    #                             train=train_flag,
    #                             transform=transform)
    #
    # elif args.dataset == 'celeba':
    #     imdir = 'train' if train_flag else 'val'
    #     dataroot = os.path.join(args.dataroot, imdir)
    #     if args.image_size != 64:
    #         raise ValueError('the image size for CelebA dataset need to be 64!')
    #
    #     dataset = FolderWithImages(root=dataroot,
    #                                input_transform=transforms.Compose([
    #                                    ALICropAndScale(),
    #                                    transforms.ToTensor(),
    #                                    transforms.Normalize(
    #                                        (0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
    #                                ]),
    #                                target_transform=transforms.ToTensor()
    #                                )
    else:
        raise ValueError("Unknown dataset %s" % (args.dataset))

    print(
        '{}: train: count={}, X.shape={}, X.min={}, X.max={}, test: count={}, X.shape={}, X.min={}, X.max={}'
        .format(args.dataset.upper(), train_dataset.__len__(),
                train_dataset[0][0].shape,
                train_dataset[0][0].min(), train_dataset[0][0].max(),
                test_dataset.__len__(), test_dataset[0][0].shape,
                test_dataset[0][0].min(), test_dataset[0][0].max()))

    return train_dataset, test_dataset
def fetch_dataset(data_name):
    print('fetching data {}...'.format(data_name))
    if (data_name == 'MNIST'):
        train_dir = './data/{}/train'.format(data_name)
        test_dir = './data/{}/test'.format(data_name)
        train_dataset = datasets.MNIST(root=train_dir,
                                       train=True,
                                       download=True,
                                       transform=transforms.ToTensor())
        if (normalize):
            stats = make_stats(train_dataset, batch_size=128)
            train_transform = transforms.Compose(
                [transforms.ToTensor(),
                 transforms.Normalize(stats)])
            test_transform = transforms.Compose(
                [transforms.ToTensor(),
                 transforms.Normalize(stats)])
        else:
            train_transform = transforms.Compose([transforms.ToTensor()])
            test_transform = transforms.Compose([transforms.ToTensor()])
        train_dataset.transform = train_transform
        test_dataset = datasets.MNIST(root=test_dir,
                                      train=False,
                                      download=True,
                                      transform=test_transform)

    elif (data_name == 'EMNIST' or data_name == 'EMNIST_byclass'
          or data_name == 'EMNIST_bymerge' or data_name == 'EMNIST_balanced'
          or data_name == 'EMNIST_letters' or data_name == 'EMNIST_digits'
          or data_name == 'EMNIST_mnist'):
        train_dir = './data/{}/train'.format(data_name.split('_')[0])
        test_dir = './data/{}/test'.format(data_name.split('_')[0])
        transform = transforms.Compose([transforms.ToTensor()])
        split = 'balanced' if len(
            data_name.split('_')) == 1 else data_name.split('_')[1]
        train_dataset = datasets.EMNIST(root=train_dir,
                                        split=split,
                                        branch=branch,
                                        train=True,
                                        download=True,
                                        transform=transform)
        test_dataset = datasets.EMNIST(root=test_dir,
                                       split=split,
                                       branch=branch,
                                       train=False,
                                       download=True,
                                       transform=transform)

    elif (data_name == 'FashionMNIST'):
        train_dir = './data/{}/train'.format(data_name)
        test_dir = './data/{}/test'.format(data_name)
        transform = transforms.Compose([transforms.ToTensor()])
        train_dataset = datasets.FashionMNIST(root=train_dir,
                                              train=True,
                                              download=True,
                                              transform=transform)
        test_dataset = datasets.FashionMNIST(root=test_dir,
                                             train=False,
                                             download=True,
                                             transform=transform)

    elif (data_name == 'CIFAR10'):
        train_dir = './data/{}/train'.format(data_name)
        test_dir = './data/{}/validation'.format(data_name)
        train_dataset = datasets.CIFAR10(train_dir,
                                         train=True,
                                         transform=transforms.ToTensor(),
                                         download=True)
        if (normalize):
            stats = make_stats(train_dataset, batch_size=128)
            train_transform = transforms.Compose([
                transforms.RandomCrop(32, padding=4),
                transforms.RandomHorizontalFlip(),
                transforms.ToTensor(),
                transforms.Normalize(stats)
            ])
            test_transform = transforms.Compose(
                [transforms.ToTensor(),
                 transforms.Normalize(stats)])
        else:
            train_transform = transforms.Compose([
                transforms.RandomCrop(32, padding=4),
                transforms.RandomHorizontalFlip(),
                transforms.ToTensor()
            ])
            test_transform = transforms.Compose([transforms.ToTensor()])
        train_dataset.transform = train_transform
        test_dataset = datasets.CIFAR10(test_dir,
                                        train=False,
                                        transform=test_transform,
                                        download=True)

    elif (data_name == 'CIFAR100'):
        train_dir = './data/{}/train'.format(data_name)
        test_dir = './data/{}/validation'.format(data_name)
        train_dataset = datasets.CIFAR100(train_dir,
                                          branch=branch,
                                          train=True,
                                          transform=transforms.ToTensor(),
                                          download=True)
        if (normalize):
            stats = make_stats(train_dataset, batch_size=128)
            train_transform = transforms.Compose([
                transforms.RandomCrop(32, padding=4),
                transforms.RandomHorizontalFlip(),
                transforms.ToTensor(),
                transforms.Normalize(stats)
            ])
            test_transform = transforms.Compose(
                [transforms.ToTensor(),
                 transforms.Normalize(stats)])
        else:
            train_transform = transforms.Compose([
                transforms.RandomCrop(32, padding=4),
                transforms.RandomHorizontalFlip(),
                transforms.ToTensor()
            ])
            test_transform = transforms.Compose([transforms.ToTensor()])
        train_dataset.transform = train_transform
        test_dataset = datasets.CIFAR100(test_dir,
                                         branch=branch,
                                         train=False,
                                         transform=test_transform,
                                         download=True)

    elif (data_name == 'SVHN'):
        train_dir = './data/{}/train'.format(data_name)
        test_dir = './data/{}/validation'.format(data_name)
        train_dataset = datasets.SVHN(train_dir,
                                      split='train',
                                      transform=transforms.ToTensor(),
                                      download=True)
        if (normalize):
            stats = make_stats(train_dataset, batch_size=128)
            train_transform = transforms.Compose(
                [transforms.ToTensor(),
                 transforms.Normalize(stats)])
            test_transform = transforms.Compose(
                [transforms.ToTensor(),
                 transforms.Normalize(stats)])
        else:
            train_transform = transforms.Compose([transforms.ToTensor()])
            test_transform = transforms.Compose([transforms.ToTensor()])
        train_dataset.transform = train_transform
        test_dataset = datasets.SVHN(test_dir,
                                     split='test',
                                     transform=test_transform,
                                     download=True)

    elif (data_name == 'ImageNet'):
        train_dir = './data/{}/train'.format(data_name)
        test_dir = './data/{}/validation'.format(data_name)
        train_dataset = datasets.ImageFolder(train_dir,
                                             transform=transforms.ToTensor())
        if (normalize):
            stats = make_stats(train_dataset, batch_size=128)
            train_transform = transforms.Compose([
                transforms.Resize((224, 224)),
                transforms.ToTensor(),
                transforms.Normalize(stats)
            ])
            test_transform = transforms.Compose([
                transforms.Resize((224, 224)),
                transforms.ToTensor(),
                transforms.Normalize(stats)
            ])
        else:
            train_transform = transforms.Compose(
                [transforms.Resize((224, 224)),
                 transforms.ToTensor()])
            test_transform = transforms.Compose(
                [transforms.Resize((224, 224)),
                 transforms.ToTensor()])
        train_dataset.transform = train_transform
        test_dataset = datasets.ImageFolder(test_dir, transform=test_transform)

    elif (data_name == 'CUB2011'):
        train_dir = './data/{}/train'.format(data_name.split('_')[0])
        test_dir = './data/{}/validation'.format(data_name.split('_')[0])
        train_dataset = datasets.CUB2011(train_dir,
                                         transform=transforms.Compose([
                                             transforms.Resize((224, 224)),
                                             transforms.ToTensor()
                                         ]),
                                         download=True)
        if (normalize):
            stats = make_stats(train_dataset, batch_size=128)
            train_transform = transforms.Compose([
                transforms.Resize((224, 224)),
                transforms.ToTensor(),
                transforms.Normalize(stats)
            ])
            test_transform = transforms.Compose([
                transforms.Resize((224, 224)),
                transforms.ToTensor(),
                transforms.Normalize(stats)
            ])
        else:
            train_transform = transforms.Compose(
                [transforms.Resize((224, 224)),
                 transforms.ToTensor()])
            test_transform = transforms.Compose(
                [transforms.Resize((224, 224)),
                 transforms.ToTensor()])
        train_dataset.transform = train_transform
        test_dataset = datasets.CUB2011(test_dir,
                                        transform=test_transform,
                                        download=True)

    elif (data_name == 'WheatImage' or data_name == 'WheatImage_binary'
          or data_name == 'WheatImage_six'):
        train_dir = './data/{}/train'.format(data_name.split('_')[0])
        test_dir = './data/{}/validation'.format(data_name.split('_')[0])
        label_mode = 'six' if len(
            data_name.split('_')) == 1 else data_name.split('_')[1]
        train_dataset = datasets.WheatImage(train_dir,
                                            label_mode=label_mode,
                                            transform=transforms.Compose([
                                                transforms.Resize((224, 288)),
                                                transforms.ToTensor()
                                            ]))
        if (normalize):
            stats = make_stats(train_dataset, batch_size=128)
            train_transform = transforms.Compose([
                transforms.Resize((224, 288)),
                transforms.RandomHorizontalFlip(),
                transforms.RandomVerticalFlip(),
                transforms.ToTensor(),
                transforms.Normalize(stats)
            ])
            test_transform = transforms.Compose([
                transforms.Resize((224, 288)),
                transforms.ToTensor(),
                transforms.Normalize(stats)
            ])
        else:
            train_transform = transforms.Compose([
                transforms.Resize((224, 288)),
                transforms.RandomHorizontalFlip(),
                transforms.RandomVerticalFlip(),
                transforms.ToTensor()
            ])
            test_transform = transforms.Compose(
                [transforms.Resize((224, 288)),
                 transforms.ToTensor()])
        train_dataset.transform = train_transform
        test_dataset = datasets.WheatImage(test_dir,
                                           label_mode=label_mode,
                                           transform=test_transform)

    elif (data_name == 'CocoDetection'):
        train_dir = './data/Coco/train2017'
        train_ann = './data/Coco/annotations/instances_train2017.json'
        test_dir = './data/Coco/val2017'
        test_ann = './data/Coco/annotations/instances_val2017.json'
        transform = transforms.Compose(
            [transforms.Resize((224, 224)),
             transforms.ToTensor()])
        train_dataset = datasets.CocoDetection(train_dir,
                                               train_ann,
                                               transform=transform)
        test_dataset = datasets.CocoDetection(test_dir,
                                              test_ann,
                                              transform=transform)

    elif (data_name == 'CocoCaptions'):
        train_dir = './data/Coco/train2017'
        train_ann = './data/Coco/annotations/captions_train2017.json'
        test_dir = './data/Coco/val2017'
        test_ann = './data/Coco/annotations/captions_val2017.json'
        transform = transforms.Compose(
            [transforms.Resize((224, 224)),
             transforms.ToTensor()])
        train_dataset = datasets.CocoCaptions(train_dir,
                                              train_ann,
                                              transform=transform)
        test_dataset = datasets.CocoCaptions(test_dir,
                                             test_ann,
                                             transform=transform)

    elif (data_name == 'VOCDetection'):
        train_dir = './data/VOC/VOCdevkit'
        test_dir = './data/VOC/VOCdevkit'
        transform = transforms.Compose(
            [transforms.Resize((224, 224)),
             transforms.ToTensor()])
        train_dataset = datasets.VOCDetection(train_dir,
                                              'trainval',
                                              transform=transform)
        test_dataset = datasets.VOCDetection(test_dir,
                                             'test',
                                             transform=transform)

    elif (data_name == 'VOCSegmentation'):
        train_dir = './data/VOC/VOCdevkit'
        test_dir = './data/VOC/VOCdevkit'
        transform = transforms.Compose(
            [transforms.Resize((224, 224)),
             transforms.ToTensor()])
        train_dataset = datasets.VOCSegmentation(train_dir,
                                                 'trainval',
                                                 transform=transform)
        test_dataset = datasets.VOCSegmentation(test_dir,
                                                'test',
                                                transform=transform)

    elif (data_name == 'MOSI' or data_name == 'MOSI_binary'
          or data_name == 'MOSI_five' or data_name == 'MOSI_seven'
          or data_name == 'MOSI_regression'):
        train_dir = './data/{}'.format(data_name.split('_')[0])
        test_dir = './data/{}'.format(data_name.split('_')[0])
        label_mode = 'five' if len(
            data_name.split('_')) == 1 else data_name.split('_')[1]
        train_dataset = datasets.MOSI(train_dir,
                                      split='trainval',
                                      label_mode=label_mode,
                                      download=True)
        stats = make_stats(train_dataset, batch_size=1)
        train_transform = transforms.Compose([transforms.Normalize(stats)])
        test_transform = transforms.Compose([transforms.Normalize(stats)])
        train_dataset.transform = train_transform
        test_dataset = datasets.MOSI(test_dir,
                                     split='test',
                                     label_mode=label_mode,
                                     download=True,
                                     transform=test_transform)

    elif (data_name == 'Kodak'):
        train_dataset = None
        transform = transforms.Compose([transforms.ToTensor()])
        test_dir = './data/{}'.format(data_name)
        train_dataset = datasets.ImageFolder(test_dir, transform)
        test_dataset = datasets.ImageFolder(test_dir, transform)

    elif (data_name == 'UCID'):
        train_dataset = None
        transform = transforms.Compose([transforms.ToTensor()])
        test_dir = './data/{}'.format(data_name)
        train_dataset = datasets.ImageFolder(test_dir, transform)
        test_dataset = datasets.ImageFolder(test_dir, transform)
    else:
        raise ValueError('Not valid dataset name')
    print('data ready')
    return train_dataset, test_dataset
Пример #16
0
def load_datasets(name, root, batch_size):
  if name == "mnist":
    train_dataset = datasets.MNIST(root=root,
                                   download=True,
                                   train=True,
                                   transform=transforms.Compose([
                                     transforms.Resize(28),
                                     transforms.RandomHorizontalFlip(),
                                     transforms.ToTensor(),
                                     transforms.Normalize([0.5], [0.5]),
                                   ]))

    train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size,
                                                   shuffle=True, num_workers=8)
    test_dataset = datasets.MNIST(root=root,
                                  download=True,
                                  train=False,
                                  transform=transforms.Compose([
                                    transforms.Resize(28),
                                    transforms.ToTensor(),
                                    transforms.Normalize([0.5], [0.5]),
                                  ]))

    test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size,
                                                  shuffle=False, num_workers=8)
    return train_dataloader, test_dataloader

  elif name == "fmnist":
    train_dataset = datasets.FashionMNIST(root=root,
                                          download=True,
                                          train=True,
                                          transform=transforms.Compose([
                                            transforms.Resize(28),
                                            transforms.RandomHorizontalFlip(),
                                            transforms.ToTensor(),
                                            transforms.Normalize([0.5], [0.5]),
                                          ]))

    train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size,
                                                   shuffle=True, num_workers=8)
    test_dataset = datasets.FashionMNIST(root=root,
                                         download=True,
                                         train=False,
                                         transform=transforms.Compose([
                                           transforms.Resize(28),
                                           transforms.ToTensor(),
                                           transforms.Normalize([0.5], [0.5]),
                                         ]))

    test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size,
                                                  shuffle=False, num_workers=8)
    return train_dataloader, test_dataloader

  elif name == "kmnist":
    train_dataset = datasets.KMNIST(root=root,
                                    download=True,
                                    train=True,
                                    transform=transforms.Compose([
                                      transforms.Resize(28),
                                      transforms.RandomHorizontalFlip(),
                                      transforms.ToTensor(),
                                      transforms.Normalize([0.5], [0.5]),
                                    ]))

    train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size,
                                                   shuffle=True, num_workers=8)
    test_dataset = datasets.KMNIST(root=root,
                                   download=True,
                                   train=False,
                                   transform=transforms.Compose([
                                     transforms.Resize(28),
                                     transforms.ToTensor(),
                                     transforms.Normalize([0.5], [0.5]),
                                   ]))

    test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size,
                                                  shuffle=False, num_workers=8)
    return train_dataloader, test_dataloader

  elif name == "qmnist":
    train_dataset = datasets.QMNIST(root=root,
                                    download=True,
                                    train=True,
                                    transform=transforms.Compose([
                                      transforms.Resize(28),
                                      transforms.RandomHorizontalFlip(),
                                      transforms.ToTensor(),
                                      transforms.Normalize([0.5], [0.5]),
                                    ]))

    train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size,
                                                   shuffle=True, num_workers=8)
    test_dataset = datasets.QMNIST(root=root,
                                   download=True,
                                   what="test50k",
                                   train=False,
                                   transform=transforms.Compose([
                                     transforms.Resize(28),
                                     transforms.ToTensor(),
                                     transforms.Normalize([0.5], [0.5]),
                                   ]))

    test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size,
                                                  shuffle=False, num_workers=8)
    return train_dataloader, test_dataloader

  elif name == "cifar10":
    train_dataset = datasets.CIFAR10(root=root,
                                     download=True,
                                     train=True,
                                     transform=transforms.Compose([
                                       transforms.Resize(32),
                                       transforms.RandomHorizontalFlip(),
                                       transforms.ToTensor(),
                                       transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
                                     ]))

    train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size,
                                                   shuffle=True, num_workers=8)
    test_dataset = datasets.CIFAR10(root=root,
                                    download=True,
                                    train=False,
                                    transform=transforms.Compose([
                                      transforms.Resize(32),
                                      transforms.ToTensor(),
                                      transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
                                    ]))

    test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size,
                                                  shuffle=False, num_workers=8)
    return train_dataloader, test_dataloader

  elif name == "cifar100":
    train_dataset = datasets.CIFAR100(root=root,
                                      download=True,
                                      train=True,
                                      transform=transforms.Compose([
                                        transforms.Resize(32),
                                        transforms.RandomHorizontalFlip(),
                                        transforms.ToTensor(),
                                        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
                                      ]))

    train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size,
                                                   shuffle=True, num_workers=8)
    test_dataset = datasets.CIFAR100(root=root,
                                     download=True,
                                     train=False,
                                     transform=transforms.Compose([
                                       transforms.Resize(32),
                                       transforms.ToTensor(),
                                       transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
                                     ]))

    test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size,
                                                  shuffle=False, num_workers=8)
    return train_dataloader, test_dataloader