示例#1
0
    def __init__(self, dim=20, n_points=2000, dtype='float32', device="cpu"):
        len_train = n_points//2
        X_unnorm = t.from_numpy(np.random.uniform(low=-1., high=1., size=[len_train, dim]).astype(dtype))
        y_unnorm = t.from_numpy(np.random.uniform(low=-1., high=1., size=[len_train, 1]).astype(dtype))

        X_test_unnorm = t.from_numpy(np.random.uniform(low=1., high=2., size=[n_points-len_train, dim]).astype(dtype))
        y_test_unnorm = t.from_numpy(np.random.uniform(low=1., high=2., size=[n_points-len_train, 1]).astype(dtype))

        X_unnorm = t.cat([X_unnorm, X_test_unnorm])
        y_unnorm = t.cat([y_unnorm, y_test_unnorm])

        index_train = np.arange(len_train)
        index_test  = np.arange(len_train, n_points)

        # record unnormalized dataset
        self.unnorm = Dataset(X_unnorm, y_unnorm, index_train, index_test, device)

        # compute normalization constants based on training set
        self.X_std = t.std(self.unnorm.train_X, 0)
        self.X_std[self.X_std == 0] = 1. # ensure we don't divide by zero
        self.X_mean = t.mean(self.unnorm.train_X, 0)

        self.y_mean = t.mean(self.unnorm.train_y)
        self.y_std  = t.std(self.unnorm.train_y)

        X_norm = (self.unnorm.X - self.X_mean)/self.X_std
        y_norm = (self.unnorm.y - self.y_mean)/self.y_std

        self.norm = Dataset(X_norm, y_norm, index_train, index_test, device)

        self.num_train_set = self.unnorm.X.shape[0]
        self.in_shape   = self.unnorm.X.shape[1:]
        self.out_shape  = self.unnorm.y.shape[1:]
示例#2
0
    def __init__(self, dataset, model, batch_size=None, dtype='float32', device="cpu"):
        if batch_size is None:
            new_y = model(dataset.norm.X).sample()
        else:
            dataloader_train = t.utils.data.DataLoader(dataset.norm.train, batch_size=batch_size)
            dataloader_test = t.utils.data.DataLoader(dataset.norm.test, batch_size=batch_size)
            batch_preds = []
            for dataloader in [dataloader_train, dataloader_test]:
                for batch_x, _ in dataloader:
                    batch_preds.append(model(batch_x).sample())
            new_y = t.cat(batch_preds)

        # split into train and test
        index_train = np.arange(len(dataset.norm.train_X))
        index_test  = np.arange(len(dataset.norm.train_X), len(dataset.norm.X))

        # record unnormalized dataset
        self.unnorm = Dataset(dataset.unnorm.X, new_y, index_train, index_test, device)
        self.norm = Dataset(dataset.norm.X, new_y, index_train, index_test, device)

        self.num_train_set = self.unnorm.X.shape[0]
        self.in_shape   = self.unnorm.X.shape[1:]
        self.out_shape  = self.unnorm.y.shape[1:]
示例#3
0
    def __init__(self, dtype='float32', device="cpu", download=False):
        _ROOT = os.path.abspath(os.path.dirname(__file__))
        dataset_dir = f'{_ROOT}/mnist/'

        # load data
        data_train = torchvision.datasets.MNIST(dataset_dir,
                                                download=download,
                                                train=True)
        data_test = torchvision.datasets.MNIST(dataset_dir,
                                               download=download,
                                               train=False)

        # get data into right shape and type
        X_unnorm = t.from_numpy(
            np.concatenate([data_train.data,
                            data_test.data]).astype(dtype)).reshape([-1, 784])
        y = t.from_numpy(
            np.concatenate([data_train.targets,
                            data_test.targets]).astype('int'))

        # train / test split
        index_train = np.arange(len(data_train))
        index_test = np.arange(len(data_train),
                               len(data_train) + len(data_test))

        # create unnormalized data set
        self.unnorm = Dataset(X_unnorm, y, index_train, index_test, device)

        # create normalized data set
        X_norm = self.unnorm.X / 255.
        self.norm = Dataset(X_norm, y, index_train, index_test, device)

        # save some data shapes
        self.num_train_set = self.unnorm.X.shape[0]
        self.in_shape = self.unnorm.X.shape[1:]
        self.out_shape = self.unnorm.y.shape[1:]
示例#4
0
    def __init__(self, dataset, split, dtype='float32', device="cpu"):
        _ROOT = os.path.abspath(os.path.dirname(__file__))
        dataset_dir = f'{_ROOT}/{dataset}/'
        data = np.loadtxt(f'{dataset_dir}/data.txt').astype(getattr(np, dtype))
        index_features = np.loadtxt(f'{dataset_dir}/index_features.txt')
        index_target = np.loadtxt(f'{dataset_dir}/index_target.txt')
        X_unnorm = t.from_numpy(data[:, index_features.astype(int)])
        y_unnorm = t.from_numpy(
            data[:, index_target.astype(int):index_target.astype(int) + 1])

        # split into train and test
        index_train = np.loadtxt(
            f'{dataset_dir}/index_train_{split}.txt').astype(int)
        index_test = np.loadtxt(
            f'{dataset_dir}/index_test_{split}.txt').astype(int)

        # record unnormalized dataset
        self.unnorm = Dataset(X_unnorm, y_unnorm, index_train, index_test,
                              device)

        # compute normalization constants based on training set
        self.X_std = t.std(self.unnorm.train_X, 0)
        self.X_std[self.X_std == 0] = 1.  # ensure we don't divide by zero
        self.X_mean = t.mean(self.unnorm.train_X, 0)

        self.y_mean = t.mean(self.unnorm.train_y)
        self.y_std = t.std(self.unnorm.train_y)

        X_norm = (self.unnorm.X - self.X_mean) / self.X_std
        y_norm = (self.unnorm.y - self.y_mean) / self.y_std

        self.norm = Dataset(X_norm, y_norm, index_train, index_test, device)

        self.num_train_set = self.unnorm.X.shape[0]
        self.in_shape = self.unnorm.X.shape[1:]
        self.out_shape = self.unnorm.y.shape[1:]
示例#5
0
    def __init__(self, dtype='float32', device="cpu", download=False):
        _ROOT = os.path.abspath(os.path.dirname(__file__))
        dataset_dir = f'{_ROOT}/mnist/'

        # load data
        data_train = torchvision.datasets.MNIST(dataset_dir,
                                                download=download,
                                                train=True)
        data_test = torchvision.datasets.MNIST(dataset_dir,
                                               download=download,
                                               train=False)

        # Rotate the images
        np.random.seed(1337)

        data_test_rot_small = np.zeros_like(data_test.data)
        labels_rot_small = np.zeros_like(data_test.targets)

        for i, img in enumerate(data_test.data):
            angle = np.random.randint(low=-45, high=45)
            img_rot = ndimage.rotate(img, angle, reshape=False)
            data_test_rot_small[i] = img_rot
            labels_rot_small[i] = data_test.targets[i]

        data_test_rot_large = np.zeros_like(data_test.data)
        labels_rot_large = np.zeros_like(data_test.targets)

        for i, img in enumerate(data_test.data):
            angle = np.random.randint(low=-90, high=90)
            img_rot = ndimage.rotate(img, angle, reshape=False)
            data_test_rot_large[i] = img_rot
            labels_rot_large[i] = data_test.targets[i]

        # get data into right shape and type
        X_unnorm = t.from_numpy(
            np.concatenate([
                data_train.data, data_test.data, data_test_rot_small,
                data_test_rot_large
            ]).astype(dtype)).reshape([-1, 784])
        y = t.from_numpy(
            np.concatenate([
                data_train.targets, data_test.targets, labels_rot_small,
                labels_rot_large
            ]).astype('int'))

        # train / test split
        index_train = np.arange(len(data_train))
        index_test = np.arange(len(data_train),
                               len(data_train) + 3 * len(data_test))

        # create unnormalized data set
        self.unnorm = Dataset(X_unnorm, y, index_train, index_test, device)

        # create normalized data set
        X_norm = self.unnorm.X / 255.
        self.norm = Dataset(X_norm, y, index_train, index_test, device)

        # save some data shapes
        self.num_train_set = self.unnorm.X.shape[0]
        self.in_shape = self.unnorm.X.shape[1:]
        self.out_shape = self.unnorm.y.shape[1:]