def celeb_faces_model():
    model = CelebFacesNeuralNetwork()

    training_data = datasets.USPS(root="data",
                                  train=True,
                                  download=True,
                                  transform=ToTensor())

    test_data = datasets.USPS(root="data",
                              train=False,
                              download=True,
                              transform=ToTensor())

    img, tar = training_data.__getitem__(1)
    print(img.size())

    learning_rate = 1e-3  # how much to update models parameters at each batch/epoch
    batch_size = 64  # the number of data samples seen by the model in each epoch
    epochs = 10  # each iteration of the optimization loop is called an epoch.

    train_dataloader = DataLoader(training_data,
                                  batch_size=batch_size,
                                  shuffle=True)
    test_dataloader = DataLoader(test_data,
                                 batch_size=batch_size,
                                 shuffle=True)

    loss_fn = nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

    for t in range(epochs):
        print(f"Epoch {t + 1}\n-------------------------------")
        train_loop(train_dataloader, model, loss_fn, optimizer)
        test_loop(test_dataloader, model, loss_fn)
    print("Done!")
def get_usps(train, download=True, drop_last=True, get_pseudo=False):
    """Get USPS dataset loader."""
    # Image pre-processing
    pre_process = transforms.Compose([
        transforms.ToTensor(),
        transforms.Resize((32, 32)),
        transforms.Lambda(gray2rgb),
        transforms.Normalize(mean=(params.dataset_mean, ),
                             std=(params.dataset_std, )),
    ])

    # Dataset and data loader
    if get_pseudo:
        path = 'data/usps_train_pseudo.mat' if train else 'data/usps_eval_pseudo.mat'
        usps_dataset = CustomDataset(path, transforms.ToTensor())
    else:
        usps_dataset = datasets.USPS(root=params.data_root,
                                     train=train,
                                     transform=pre_process,
                                     download=download)

    usps_data_loader = torch.utils.data.DataLoader(
        dataset=usps_dataset,
        batch_size=params.batch_size,
        shuffle=False,
        num_workers=params.num_worker,
        drop_last=drop_last,
        pin_memory=True)

    return usps_data_loader
Exemplo n.º 3
0
 def __init__(self, labeled_data, unlabeled_data, test_data, batch_size):
     super().__init__()
     transform = transforms.Compose([
         transforms.Grayscale(),
         transforms.Resize(32),
         transforms.ToTensor()
     ])
     self.dataset_by_name = {
         "MNIST":
         lambda train: datasets.MNIST("./data/mnist",
                                      train=train,
                                      transform=transform,
                                      download=True),
         "USPS":
         lambda train: datasets.USPS(
             "./data/usps", train=train, transform=transform, download=True
         ),
         "SVHN":
         lambda train: datasets.SVHN("./data/svhn",
                                     transform=transform,
                                     download=True,
                                     split="train" if train else "test")
     }
     self.labeled_data = labeled_data
     self.unlabeled_data = unlabeled_data
     self.test_data = test_data
     self.batch_size = batch_size
def get_data(data_name, transform, is_train=True):
    if data_name =="mnist":
        data_loader = datasets.MNIST(root=paramsTrans.dataset_root, train=is_train, download=True, transform=transform)
    elif data_name == "usps":
        data_loader = datasets.USPS(root=paramsTrans.dataset_root, train=is_train, download=True, transform=transform)
    data = DataLoader(dataset=data_loader, batch_size=paramsTrans.batch_size, shuffle=True, num_workers=paramsTrans.num_workers)
    return data
Exemplo n.º 5
0
def usps():
    return itertools.chain(*[
        collect_download_configs(
            lambda: datasets.USPS(ROOT, train=train, download=True),
            name=f"USPS, {'train' if train else 'test'}",
            file="usps",
        ) for train in (True, False)
    ])
Exemplo n.º 6
0
def get_vision_dset(dset_name,device,x_only=False):
    dirpath = f'~/unsupervised_object_learning/{dset_name}/data'
    if dset_name in ['MNISTfull', 'MNISTtest']:
        dtest=tdatasets.MNIST(root=dirpath,train=False,download=True)
        x, y = dtest.data, dtest.targets
        if dset_name == 'MNISTfull':
            dtrain=tdatasets.MNIST(root=dirpath,train=True,download=True)
            x = torch.cat([dtrain.data,x])
            y = torch.cat([dtrain.targets,y])
        data = x if x_only else (x,y)
    elif dset_name == 'MNISTtrain':
        dtrain=tdatasets.MNIST(root=dirpath,train=True,download=True)
        data = dtrain.data if x_only else (dtrain.data,dtrain.targets)
    elif dset_name == 'FashionMNIST':
        dtrain=tdatasets.FashionMNIST(root=dirpath,train=True,download=True)
        dtest=tdatasets.FashionMNIST(root=dirpath,train=False,download=True)
        x = torch.cat([dtrain.data,dtest.data])
        y = torch.cat([dtrain.targets,dtest.targets])
        data = x if x_only else (x,y)
    elif dset_name == 'USPS':
        dtrain=tdatasets.USPS(root=dirpath,train=True,download=True)
        dtest=tdatasets.USPS(root=dirpath,train=False,download=True)
        train_data = torch.tensor(dtrain.data,device=device)
        test_data = torch.tensor(dtest.data,device=device)
        train_targets = torch.tensor(dtrain.targets,device=device)
        test_targets = torch.tensor(dtest.targets,device=device)
        x = torch.cat([train_data,test_data])
        y = torch.cat([train_targets,test_targets])
        data = x if x_only else (x,y)
        #data = torch.tensor(d.data,device=device) if x_only else (torch.tensor(d.data,device=device), torch.tensor(d.targets,device=device))
    elif dset_name == 'CIFAR10':
        d=tdatasets.CIFAR10(root=dirpath,train=True,download=True)
        data = torch.tensor(d.data,device=device) if x_only else (torch.tensor(d.data,device=device), torch.tensor(d.targets,device=device))
    elif dset_name == 'coil-100':
        data = load_coil100(x_only)
    elif dset_name == 'letterAJ':
        data = load_letterAJ(x_only)
        return TransformDataset(data,[add_colour_dimension],x_only,device=device)
    return TransformDataset(data,[to_float_tensor,add_colour_dimension],x_only,device=device)
Exemplo n.º 7
0
def get_loader(dataset, opts):

	transform = transforms.Compose([
		transforms.Scale(opts.image_size),
		transforms.ToTensor(),
		transforms.Normalize((0.5,), (0.5,))])

	if dataset == 'USPS' :
		usps_train = datasets.USPS(root = '/content/CycleGan/data/USPS',train=True, download=True, transform = transform)
		usps_test = datasets.USPS(root = '/content/CycleGan/data/USPS',train=False, download=True, transform = transform)
		train_dloader = DataLoader(dataset=usps_train,
									batch_size=opts.batch_size,
									shuffle=True,
									num_workers=opts.num_workers)

		test_dloader = DataLoader(dataset=usps_test,
									train=False,
									batch_size=opts.batch_size,
									shuffle=True,
									num_workers=opts.num_workers)

	if dataset == 'MNIST':
		mnis_train = datasets.MNIST(root='/content/CycleGan/data/MNIST',train=True, download=True, transform=transform)
		mnis_test = datasets.MNIST(root='/content/CycleGan/data/MNIST',train=False, download=True, transform=transform)
		train_dloader = DataLoader(dataset=mnis_train,
									Train = True,
									batch_size=opts.batch_size,
									shuffle=True,
									num_workers=opts.num_workers)

		test_dloader = DataLoader(dataset=mnis_test,
									Train = False,
									batch_size=opts.batch_size,
									shuffle=True,
									num_workers=opts.num_workers)

	return train_dloader, test_dloader
Exemplo n.º 8
0
    def __init__(self, path):
        transform = get_transform()

        self.MNIST_dataset = datasets.MNIST(root=os.path.join(path, "MNIST"),
                                            transform=transform,
                                            train=True,
                                            download=True)

        self.SVHN_dataset = datasets.SVHN(root=os.path.join(path, "SVHN"),
                                          transform=transform,
                                          split='train',
                                          download=True)

        self.USPS_dataset = datasets.USPS(root=os.path.join(path, "USPS"),
                                          transform=transform,
                                          train=True,
                                          download=True)
Exemplo n.º 9
0
def get_usps(train, batch_size=1024):
    """Get USPS dataset loader."""
    # image pre-processing
    pre_process = transforms.Compose([
        transforms.Resize(32),
        transforms.ToTensor(),
        transforms.Normalize((0.5, ), (0.5, ))
    ])

    # dataset and data loader
    usps_dataset = datasets.USPS(root='cache/data/',
                                 train=train,
                                 transform=pre_process,
                                 download=True)

    usps_data_loader = torch.utils.data.DataLoader(dataset=usps_dataset,
                                                   batch_size=batch_size,
                                                   shuffle=True,
                                                   num_workers=8)

    return usps_data_loader
Exemplo n.º 10
0
def get_usps(train, adp=False):
    """Get usps dataset loader."""
    # image pre-processing
    pre_process = transforms.Compose([
        transforms.Resize(params.image_size),
        transforms.ToTensor(),
        # transforms.Normalize((0.5),(0.5)),
        transforms.Lambda(lambda x: x.repeat(3, 1, 1)),
        #   transforms.Grayscale(1),
    ])

    # dataset and data loader
    usps_dataset = datasets.USPS(root=params.usps_dataset_root,
                                 train=train,
                                 transform=pre_process,
                                 download=True)

    usps_data_loader = torch.utils.data.DataLoader(
        dataset=usps_dataset,
        batch_size=params.adp_batch_size if adp else params.batch_size,
        shuffle=True,
        drop_last=True)
    return usps_data_loader
Exemplo n.º 11
0
    transforms.Normalize([0.5], [0.5])
])

if DB == 'MNIST':
    dataset = datasets.MNIST(db_path,
                             train=True,
                             download=True,
                             transform=transform)
elif DB == 'FashionMNIST':
    dataset = datasets.FashionMNIST(db_path,
                                    train=True,
                                    download=True,
                                    transform=transform)
elif DB == 'USPS':
    dataset = datasets.USPS(db_path,
                            train=True,
                            download=True,
                            transform=transform)
elif DB == 'SVHN':
    dataset = datasets.SVHN(db_path,
                            split='train',
                            download=True,
                            transform=transform)
elif DB == 'CIFAR10':
    dataset = datasets.CIFAR10(db_path,
                               train=True,
                               download=True,
                               transform=transform)

data_loader = torch.utils.data.DataLoader(dataset=dataset,
                                          batch_size=BATCH_SIZE,
                                          shuffle=True,
    def __init__(self, data_path, dataset_name="mnist", valid_size=0.2,
                 transformations=None, shuffle=False, **kwargs):
        """
        Initializer of the classification dataset object
        """

        # checking valid values for the parameters
        assert dataset_name in ["mnist", "fashion_mnist", 'usps', 'mnist-test'], \
                f"Dataset name: {dataset_name} is not a correct value. " \
                f"Choose one from ['mnist', 'usps', 'mnist-test', 'fashion_mnist']"
        assert (valid_size >= 0 and valid_size < 1), f"Valid size must be in range [0,1)"

        self.data_path = data_path
        self.dataset_name = dataset_name
        self.valid_size = valid_size
        self.shuffle = shuffle

        # enforcing ToTensor in the transforms
        if(transformations is None):
            transformations = []
        if(transforms.ToTensor() not in transformations):
            transformations.append(transforms.ToTensor())
        transformations.append(transforms.Normalize((0.5,), (0.5,)))
        transformations = transforms.Compose(transformations)

        # loading the corresponding data
        if(dataset_name == "mnist"):
            train_set = datasets.MNIST(self.data_path, train=True, download=True,
                                            transform=transformations)
            test_set = datasets.MNIST(self.data_path, train=False, download=True,
                                        transform=transformations)

        elif(dataset_name == "mnist-test"):
            train_set = None
            test_set = datasets.MNIST(self.data_path, train=False, download=True,
                                        transform=transformations)

        elif(dataset_name == "svhn"):
            train_set = datasets.SVHN(self.data_path, split='train',download=True,
                                      transform=transformations)
            test_set = datasets.SVHN(self.data_path, split='test',download=True,
                                      transform=transformations)
            train_set.targets, test_set.targets = train_set.labels, test_set.labels

        elif(dataset_name == "fashion_mnist"):
            train_set = datasets.FashionMNIST(self.data_path, train=True, download=True,
                                              transform=transformations)
            test_set = datasets.FashionMNIST(self.data_path, train=False, download=True,
                                             transform=transformations)

        elif(dataset_name == "usps"):
            train_set = datasets.USPS(self.data_path, train=True, download=True,
                                      transform=transformations)
            test_set = datasets.USPS(self.data_path, train=False, download=True,
                                     transform=transformations)

        elif(dataset_name == "coil-100"):
            data_path = os.path.join(self.data_path, "coil-100", "coil-100")
            get_lbl = lambda name: int(name.split("_")[0][3:])
            train_set = None
            test_set = CustomDataset(root=data_path,
                                     transform=transformations,
                                     get_lbl=get_lbl,
                                     downscale=(32,32),
                                     **kwargs)

        if(train_set is not None):
            self.train_data, self.train_labels = train_set.data, train_set.targets
        self.test_data, self.test_labels = test_set.data, test_set.targets
        self.train_set = train_set
        self.test_set = test_set
        if(self.valid_size > 0 and self.train_set is not None):
            self._get_train_validation_split()

        return
                                  target_transform=None,
                                  download=True)
svhn_test_image = osp.join(outdir, 'svhn_test_image')
os.system("mkdir -p " + svhn_test_image)
svhn_labels = svhn_dataset_test.labels.flatten()
with open(osp.join(outdir, 'svhn_test.txt'), 'w') as label_file:
    for i in range(len(svhn_test_image)):
        img = Image.fromarray(svhn_dataset_test.data[i].transpose(1, 2, 0))
        img.save(osp.join(svhn_test_image, '{:d}.png'.format(i)))
        label_file.write(svhn_test_image +
                         '/{:d}.png {:d}\n'.format(i, svhn_labels[i]))

outdir = '/data/usps'
usps_dataset_train = datasets.USPS('/data/usps',
                                   train=True,
                                   transform=None,
                                   target_transform=None,
                                   download=True)
usps_train_image = osp.join(outdir, 'usps_tain_image')
os.system("mkdir -p " + usps_train_image)
usps_labels = usps_dataset_train.targets  # .labels.flatten()
with open(osp.join(outdir, 'usps_train.txt'), 'w') as label_file:
    for i in range(len(usps_dataset_train)):
        img = Image.fromarray(usps_dataset_train.data[i])
        img.save(osp.join(usps_train_image, '{:d}.png'.format(i)))
        label_file.write(usps_train_image +
                         '/{:d}.png {:d}\n'.format(i, usps_labels[i]))

usps_dataset_test = datasets.USPS('/data/usps',
                                  train=False,
                                  transform=None,
Exemplo n.º 14
0
                                   download=True)

# CIFAR 10
train3_dataset = dsets.CIFAR10(root='./data',
                               train=True,
                               transform=transforms.ToTensor(),
                               download=True)

test3_dataset = dsets.CIFAR10(root='./data',
                              train=False,
                              transform=transforms.ToTensor(),
                              download=True)

#USPS
train4_dataset = dsets.USPS(root='./data',
                            train=True,
                            transform=transforms.ToTensor(),
                            download=True)

test4_dataset = dsets.USPS(root='./data',
                           train=False,
                           transform=transforms.ToTensor(),
                           download=True)

train_dataset = train1_dataset
test_dataset = test1_dataset

batch_size = 100
n_iters = 2000
num_epochs = n_iters / (len(train_dataset) / batch_size)
num_epochs = int(num_epochs)
Exemplo n.º 15
0
def get_digit_five_train_loader(d_name,
                                batch_size=16,
                                num_workers=1,
                                pin_memory=False,
                                drop_last=False,
                                resize=28):

    d_transforms = transforms.Compose([
        transforms.Resize((resize, resize)),
        transforms.ToTensor(),
        transforms.transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
    ])

    if d_name == "MNIST":
        d_transforms = transforms.Compose([
            transforms.Resize((resize, resize)),
            transforms.Grayscale(3),
            transforms.ToTensor(),
            transforms.transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
        ])
        trainset = datasets.MNIST('./digits_data/',
                                  download=True,
                                  train=True,
                                  transform=d_transforms)
        valset = datasets.MNIST('./digits_data/',
                                download=True,
                                train=False,
                                transform=d_transforms)
    elif d_name == "MNIST-M":
        d_transforms = transforms.Compose([
            transforms.Resize((resize, resize)),
            transforms.Grayscale(3),
            transforms.ToTensor(),
            transforms.transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
        ])
        trainset = MNISTM.MNISTM_dataset('./digits_data/',
                                         download=True,
                                         train=True,
                                         transform=d_transforms)
        valset = MNISTM.MNISTM_dataset('./digits_data/',
                                       download=True,
                                       train=False,
                                       transform=d_transforms)
    elif d_name == 'SVHN':
        trainset = datasets.SVHN('./digits_data/',
                                 download=True,
                                 split="train",
                                 transform=d_transforms)
        valset = datasets.SVHN('./digits_data/',
                               download=True,
                               split="test",
                               transform=d_transforms)
    elif d_name == 'USPS':
        d_transforms = transforms.Compose([
            transforms.Resize((resize, resize)),
            transforms.Grayscale(3),
            transforms.ToTensor(),
            transforms.transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
        ])
        trainset = datasets.USPS('./digits_data/',
                                 download=True,
                                 train=True,
                                 transform=d_transforms)
        valset = datasets.USPS('./digits_data/',
                               download=True,
                               train=False,
                               transform=d_transforms)
    elif d_name == 'SY':
        trainset = datasets.ImageFolder(
            './digits_data/synthetic_digits/imgs_train',
            transform=d_transforms)
        valset = datasets.ImageFolder(
            './digits_data/synthetic_digits/imgs_valid',
            transform=d_transforms)

    trainset.num_classes = 10
    valset.num_classes = 10

    train_loader = data.DataLoader(trainset,
                                   batch_size=batch_size,
                                   shuffle=True,
                                   num_workers=num_workers,
                                   pin_memory=pin_memory,
                                   drop_last=drop_last)

    test_loader = data.DataLoader(valset,
                                  batch_size=1,
                                  shuffle=True,
                                  num_workers=num_workers,
                                  pin_memory=pin_memory,
                                  drop_last=drop_last)

    return train_loader, test_loader
Exemplo n.º 16
0
    def __init__(self,
                 dataset,
                 root_folder="raw-datasets/",
                 flatten=False,
                 coil20_unprocessed=False,
                 debug=False,
                 n_samples=100):
        super(Datasets, self).__init__()

        transform_train_list = []
        transform_test_list = []

        if dataset == "mnist":
            transform_train_list.append(transforms.ToTensor())
            transform_train_list.append(
                transforms.Normalize((0.1307, ), (0.3081, )))

            if flatten:
                transform_train_list.append(ReshapeTransform((-1, )))

            transform_train = transforms.Compose(transform_train_list)
            transform_test = transform_train

            self.train_data = datasets.MNIST(root=root_folder,
                                             train=True,
                                             download=True,
                                             transform=transform_train)
            self.test_data = datasets.MNIST(root=root_folder,
                                            train=False,
                                            download=True,
                                            transform=transform_test)

            if debug:
                self.train_data.data = self.train_data.data[:n_samples]
                self.test_data.data = self.test_data.data[:n_samples]

            self.dim_flatten = self.train_data.data.size(
                1) * self.train_data.data.size(2)

            self.d_in = 1
            self.hw_in = 28

        elif dataset == "fashion":
            transform_train_list.append(transforms.ToTensor())
            transform_train_list.append(transforms.Normalize((0.5, ), (0.5, )))

            if flatten:
                transform_train_list.append(ReshapeTransform((-1, )))

            transform_train = transforms.Compose(transform_train_list)
            transform_test = transform_train

            self.train_data = datasets.FashionMNIST(root=root_folder,
                                                    train=True,
                                                    download=True,
                                                    transform=transform_train)
            self.test_data = datasets.FashionMNIST(root=root_folder,
                                                   train=False,
                                                   download=True,
                                                   transform=transform_test)

            if debug:
                self.train_data.data = self.train_data.data[:n_samples]
                self.test_data.data = self.test_data.data[:n_samples]

            self.dim_flatten = self.train_data.data.size(
                1) * self.train_data.data.size(2)

            self.d_in = 1
            self.hw_in = 28

        elif dataset == "cifar10":

            transform_train_list.append(transforms.RandomCrop(32, padding=4))
            transform_train_list.append(transforms.RandomHorizontalFlip())
            transform_train_list.append(transforms.ToTensor())
            transform_train_list.append(
                transforms.Normalize(mean=[0.491, 0.482, 0.447],
                                     std=[0.247, 0.243, 0.262]))

            transform_test_list.append(transforms.ToTensor())
            transform_test_list.append(
                transforms.Normalize(mean=[0.491, 0.482, 0.447],
                                     std=[0.247, 0.243, 0.262]))

            if flatten:
                transform_train_list.append(ReshapeTransform((-1, )))
                transform_test_list.append(ReshapeTransform((-1, )))

            transform_train = transforms.Compose(transform_train_list)
            transform_test = transforms.Compose(transform_test_list)

            self.train_data = datasets.CIFAR10(root=root_folder,
                                               train=True,
                                               download=True,
                                               transform=transform_train)
            self.test_data = datasets.CIFAR10(root=root_folder,
                                              train=False,
                                              download=True,
                                              transform=transform_test)

            if debug:
                self.train_data.data = self.train_data.data[:n_samples]
                self.test_data.data = self.test_data.data[:n_samples]

            data_shape = self.train_data.data.shape
            self.dim_flatten = data_shape[1] * data_shape[2] * data_shape[3]
            self.d_in = 3
            self.hw_in = 32

        elif dataset == "cifar100":
            transform_train_list.append(transforms.RandomCrop(32, padding=4))
            transform_train_list.append(transforms.RandomHorizontalFlip())
            transform_train_list.append(transforms.ToTensor())
            transform_train_list.append(
                transforms.Normalize(mean=[0.507, 0.487, 0.441],
                                     std=[0.267, 0.256, 0.276]))

            transform_test_list.append(transforms.ToTensor())
            transform_test_list.append(
                transforms.Normalize(mean=[0.507, 0.487, 0.441],
                                     std=[0.267, 0.256, 0.276]))

            if flatten:
                transform_train_list.append(ReshapeTransform((-1, )))
                transform_test_list.append(ReshapeTransform((-1, )))

            transform_train = transforms.Compose(transform_train_list)
            transform_test = transforms.Compose(transform_test_list)

            self.train_data = datasets.CIFAR100(root=root_folder,
                                                train=True,
                                                download=True,
                                                transform=transform_train)
            self.test_data = datasets.CIFAR100(root=root_folder,
                                               train=False,
                                               download=True,
                                               transform=transform_test)

            if debug:
                self.train_data.data = self.train_data.data[:n_samples]
                self.test_data.data = self.test_data.data[:n_samples]

            data_shape = self.train_data.data.shape
            self.dim_flatten = data_shape[1] * data_shape[2] * data_shape[3]
            self.d_in = 3
            self.hw_in = 32

        elif dataset == "svhn":
            transform_train_list.append(transforms.ToTensor())
            if flatten:
                transform_train_list.append(ReshapeTransform((-1, )))

            transform_train = transforms.Compose(transform_train_list)
            transform_test = transform_train

            self.train_data = datasets.SVHN(root=root_folder,
                                            split='train',
                                            download=True,
                                            transform=transform_train)
            self.test_data = datasets.SVHN(root=root_folder,
                                           split='test',
                                           download=True,
                                           transform=transform_test)

            if debug:
                self.train_data.data = self.train_data.data[:n_samples]
                self.test_data.data = self.test_data.data[:n_samples]

            data_shape = self.train_data.data.shape
            self.dim_flatten = data_shape[1] * data_shape[2] * data_shape[3]
            self.d_in = 3
            self.hw_in = 32

        elif dataset == "usps":

            transform_train_list.append(transforms.ToTensor())
            if flatten:
                transform_train_list.append(ReshapeTransform((-1, )))

            transform_train = transforms.Compose(transform_train_list)
            transform_test = transform_train

            self.train_data = datasets.USPS(root=root_folder,
                                            train=True,
                                            download=True,
                                            transform=transform_train)
            self.test_data = datasets.USPS(root=root_folder,
                                           train=False,
                                           download=True,
                                           transform=transform_test)

            if debug:
                self.train_data.data = self.train_data.data[:n_samples]
                self.test_data.data = self.test_data.data[:n_samples]

            data_shape = self.train_data.data.shape
            self.dim_flatten = data_shape[1] * data_shape[2]
            self.d_in = 1
            self.hw_in = 16

        elif dataset == "coil20":
            transform_train_list.append(transforms.ToTensor())

            if flatten:
                transform_train_list.append(ReshapeTransform((-1, )))

            transform_train = transforms.Compose(transform_train_list)
            transform_test = transform_train

            self.train_data = COIL20(root=root_folder,
                                     processed=not coil20_unprocessed,
                                     download=True,
                                     transform=transform_train)
            self.test_data = COIL20(root=root_folder,
                                    processed=not coil20_unprocessed,
                                    download=True,
                                    transform=transform_test)

            if debug:
                self.train_data.data = self.train_data.data[:n_samples]
                self.test_data.data = self.test_data.data[:n_samples]

            data_shape = self.train_data.data.shape
            self.dim_flatten = data_shape[1] * data_shape[2]
            self.d_in = 1
            self.hw_in = 32

        elif dataset == "colors":
            transform_train_list.append(transforms.ToTensor())

            if flatten:
                transform_train_list.append(ReshapeTransform((-1, )))

            transform_train = transforms.Compose(transform_train_list)
            transform_test = transform_train

            self.train_data = COLORS(root=root_folder,
                                     transform=transform_train)
            self.test_data = COLORS(root=root_folder, transform=transform_test)

            data_shape = self.train_data.data.shape
            self.dim_flatten = data_shape[1]
            self.d_in = 3
            self.hw_in = 1
        else:
            self.train_data = CustomDataset(load_path=join(
                root_folder, dataset),
                                            norm="minmax")
            self.test_data = self.train_data
            self.dim_flatten = self.train_data.data.shape[1]
Exemplo n.º 17
0
def get_digits_loaders_concat(targets,
                              batch_size=16,
                              num_workers=1,
                              pin_memory=False,
                              drop_last=False,
                              resize=28):

    d_transforms = transforms.Compose([
        transforms.Resize((resize, resize)),
        transforms.ToTensor(),
        transforms.transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
    ])
    vals = []
    trains = []
    for d_name in targets:
        if d_name == "MNIST":
            d_transforms = transforms.Compose([
                transforms.Resize((resize, resize)),
                transforms.Grayscale(3),
                transforms.ToTensor(),
                transforms.transforms.Normalize((0.5, 0.5, 0.5),
                                                (0.5, 0.5, 0.5)),
            ])
            trainset = datasets.MNIST('./digits_data/',
                                      download=True,
                                      train=True,
                                      transform=d_transforms)
            valset = datasets.MNIST('./digits_data/',
                                    download=True,
                                    train=False,
                                    transform=d_transforms)
        elif d_name == "MNIST-M":
            d_transforms = transforms.Compose([
                transforms.Resize((resize, resize)),
                transforms.Grayscale(3),
                transforms.ToTensor(),
                transforms.transforms.Normalize((0.5, 0.5, 0.5),
                                                (0.5, 0.5, 0.5)),
            ])
            trainset = MNISTM.MNISTM_dataset('./digits_data/',
                                             download=True,
                                             train=True,
                                             transform=d_transforms)
            valset = MNISTM.MNISTM_dataset('./digits_data/',
                                           download=True,
                                           train=False,
                                           transform=d_transforms)
        elif d_name == 'SVHN':
            trainset = datasets.SVHN('./digits_data/',
                                     download=True,
                                     split="train",
                                     transform=d_transforms)
            valset = datasets.SVHN('./digits_data/',
                                   download=True,
                                   split="test",
                                   transform=d_transforms)
        elif d_name == 'USPS':
            d_transforms = transforms.Compose([
                transforms.Resize((resize, resize)),
                transforms.Grayscale(3),
                transforms.ToTensor(),
                transforms.transforms.Normalize((0.5, 0.5, 0.5),
                                                (0.5, 0.5, 0.5)),
            ])
            trainset = datasets.USPS('./digits_data/',
                                     download=True,
                                     train=True,
                                     transform=d_transforms)
            valset = datasets.USPS('./digits_data/',
                                   download=True,
                                   train=False,
                                   transform=d_transforms)
        elif d_name == 'SY':
            trainset = datasets.ImageFolder(
                './digits_data/synthetic_digits/imgs_train',
                transform=d_transforms)
            valset = datasets.ImageFolder(
                './digits_data/synthetic_digits/imgs_valid',
                transform=d_transforms)
        trains.append(trainset)
        vals.append(valset)

    train_sets = torch.utils.data.ConcatDataset(trains)
    val_sets = torch.utils.data.ConcatDataset(vals)
    train_sets.num_classes = 10
    val_sets.num_classes = 10

    trainloader = data.DataLoader(train_sets,
                                  batch_size=batch_size,
                                  shuffle=True,
                                  num_workers=num_workers,
                                  pin_memory=pin_memory,
                                  drop_last=drop_last)

    valloader = data.DataLoader(val_sets,
                                batch_size=batch_size,
                                shuffle=True,
                                num_workers=num_workers,
                                pin_memory=pin_memory,
                                drop_last=drop_last)

    return trainloader, valloader
Exemplo n.º 18
0
def load_usps(args, **kwargs):
    # set args
    args.input_size = [1, 16, 16]
    args.input_type = 'gray'
    args.dynamic_binarization = False

    # start processing
    from torchvision import datasets, transforms
    train_loader = torch.utils.data.DataLoader(datasets.USPS(
        os.path.join('datasets', "usps"),
        train=True,
        download=True,
        transform=transforms.Compose([transforms.ToTensor()])),
                                               batch_size=args.batch_size,
                                               shuffle=True)

    test_loader = torch.utils.data.DataLoader(datasets.USPS(
        os.path.join('datasets', "usps"),
        train=False,
        download=True,
        transform=transforms.Compose([transforms.ToTensor()])),
                                              batch_size=args.batch_size,
                                              shuffle=True)

    # preparing data
    x_train = train_loader.dataset.data.astype('float32')
    x_train = x_train / 255.
    x_train = np.reshape(
        x_train, (x_train.shape[0], x_train.shape[1] * x_train.shape[2]))

    y_train = np.array(train_loader.dataset.targets, dtype=int)

    x_test = test_loader.dataset.data.astype('float32')
    x_test = x_test / 255.
    x_test = np.reshape(x_test,
                        (x_test.shape[0], x_test.shape[1] * x_test.shape[2]))

    y_test = np.array(test_loader.dataset.targets, dtype=int)

    # validation set; 7291 data points in training dataset; 2007 in test dataset
    x_val = x_train[6000:]
    y_val = np.array(y_train[6000:], dtype=int)
    x_train = x_train[0:6000]
    y_train = np.array(y_train[0:6000], dtype=int)

    # pytorch data loader
    train = data_utils.TensorDataset(torch.from_numpy(x_train),
                                     torch.from_numpy(y_train))
    train_loader = data_utils.DataLoader(train,
                                         batch_size=args.batch_size,
                                         shuffle=True,
                                         **kwargs)

    validation = data_utils.TensorDataset(
        torch.from_numpy(x_val).float(), torch.from_numpy(y_val))
    val_loader = data_utils.DataLoader(validation,
                                       batch_size=args.test_batch_size,
                                       shuffle=False,
                                       **kwargs)

    test = data_utils.TensorDataset(
        torch.from_numpy(x_test).float(), torch.from_numpy(y_test))
    test_loader = data_utils.DataLoader(test,
                                        batch_size=args.test_batch_size,
                                        shuffle=False,
                                        **kwargs)

    # setting pseudo-inputs inits
    if args.use_training_data_init == 1:
        args.pseudoinputs_std = 0.01
        init = x_train[0:args.number_components_input].T
        args.pseudoinputs_mean = torch.from_numpy(
            init + args.pseudoinputs_std *
            np.random.randn(np.prod(args.input_size),
                            args.number_components_input)).float()
    else:
        args.pseudoinputs_mean = 0.05
        args.pseudoinputs_std = 0.01

    return train_loader, val_loader, test_loader, args