예제 #1
0
 def test_dataset_with_transform(self):
     for r in self.rs:
         for k in self.ks:
             for d in self.ds:
                 for n in self.ns:
                     data = random.RandomData(k=k, r=r, d=d)
                     X, _, Y = data.generate_samples(n)
                     ds = dataset.RandomDataset(data)
                     self.assertTrue(
                         len(ds) == len(data.data_points_labels) == n)
                     indexes = np.random.choice(np.arange(len(ds)),
                                                size=max(10,
                                                         len(ds) // 10))
                     for index in indexes:
                         x, y = ds[index]
                         self.assertTrue(x.dtype == torch.float32)
                         self.assertTrue(y.dtype == torch.float32)
                         self.assertSequenceEqual(x.shape, (d, ))
                         self.assertTrue(y.detach().item() in [-1., 1.])
                         np.testing.assert_allclose(
                             x.detach().cpu().numpy(),
                             data.data_points[index, :],
                             rtol=1e-6,
                             atol=1e-6)
                         self.assertTrue(y.detach().item() ==
                                         data.data_points_labels[index])
예제 #2
0
    def test_dataloader_without_shuffle(self):
        for r in self.rs:
            for k in self.ks:
                for d in self.ds:
                    for n in self.ns:
                        data = random.RandomData(k=k, r=r, d=d)
                        x, _, y = data.generate_samples(n)
                        ds = dataset.RandomDataset(data)
                        dataloader = DataLoader(ds,
                                                shuffle=False,
                                                batch_size=BATCH_SIZE)
                        i = 0
                        x_shapes = []
                        y_shapes = []
                        for index, (x, y) in enumerate(dataloader):
                            self.assertTrue(index == i)

                            x_shape = x.shape
                            y_shape = y.shape
                            self.assertTrue(len(x_shape) == 2)
                            self.assertTrue(len(y_shape) == 2)
                            self.assertTrue(x_shape[1] == d)
                            self.assertTrue(y_shape[1] == 1)
                            self.assertTrue(x.dtype == torch.float32)
                            self.assertTrue(y.dtype == torch.float32)
                            self.assertTrue((y == 1).sum() +
                                            (y == -1).sum() == len(y))
                            np.testing.assert_allclose(
                                x.detach().cpu().numpy(),
                                data.data_points[index *
                                                 BATCH_SIZE:(index + 1) *
                                                 BATCH_SIZE, :],
                                rtol=1e-6,
                                atol=1e-6)

                            self.assertTrue(
                                (y.detach().cpu().numpy()[:, 0] == data.
                                 data_points_labels[index *
                                                    BATCH_SIZE:(index + 1) *
                                                    BATCH_SIZE]).all())
                            x_shapes.append(x_shape)
                            y_shapes.append(y_shape)
                            i += 1

                        self.assertTrue(i == len(x_shapes) == len(y_shapes))
                        self.assertTrue(
                            len(x_shapes) == math.ceil(
                                len(dataloader.dataset) / BATCH_SIZE))
                        self.assertTrue(
                            all([
                                x_shapes[j][0] == y_shapes[j][0]
                                for j in range(len(x_shapes))
                            ]))
                        self.assertTrue(x_shapes[-1][0] <= BATCH_SIZE)
                        self.assertTrue(
                            all([
                                y_shapes[j] == (BATCH_SIZE, 1)
                                for j in range(len(x_shapes) - 1)
                            ]))
예제 #3
0
    def test_train_val_test_split(self):
        r, k, d, n = 1., 4, 20, 10000
        n_train = 6000
        n_val = 2000
        n_test = 2000

        shuffled_indexes = np.arange(n)
        np.random.shuffle(shuffled_indexes)
        train_indexes = shuffled_indexes[:n_train]
        val_indexes = shuffled_indexes[n_train:n_train + n_val]
        test_indexes = shuffled_indexes[n_train + n_val:]

        data = random.RandomData(k=k, r=r, d=d)
        data.generate_samples(n)
        ds = dataset.RandomDataset(data)

        training_dataset = Subset(ds, train_indexes)
        val_dataset = Subset(ds, val_indexes)
        test_dataset = Subset(ds, test_indexes)

        self.assertTrue(len(training_dataset) == n_train)
        self.assertTrue(len(val_dataset) == n_val)
        self.assertTrue(len(test_dataset) == n_test)

        train_dataloader = DataLoader(training_dataset,
                                      shuffle=True,
                                      batch_size=BATCH_SIZE)
        val_dataloader = DataLoader(val_dataset,
                                    shuffle=True,
                                    batch_size=BATCH_SIZE)
        test_dataloader = DataLoader(test_dataset,
                                     shuffle=True,
                                     batch_size=BATCH_SIZE)

        i = 0
        for index, (x, y) in enumerate(train_dataloader):
            i += len(x)
        self.assertTrue(i == n_train)

        i = 0
        for index, (x, y) in enumerate(val_dataloader):
            i += len(x)
        self.assertTrue(i == n_val)

        i = 0
        for index, (x, y) in enumerate(test_dataloader):
            i += len(x)
        self.assertTrue(i == n_test)
    def setUp(self) -> None:
        config_file = os.path.join('../../pytorch/configs', 'wide_two_layer_net.yaml')
        with open(config_file, 'r') as stream:
            try:
                config_dict = yaml.safe_load(stream)
            except yaml.YAMLError as e:
                raise Exception("Exception while reading yaml file {} : {}".format(config_file, e))
        self.config = ModelConfig(config_dict=config_dict)
        self.two_layer_net = TwoLayerNet(self.config, train_hidden=True)

        r, k, n = 1., 4, 5000
        d = config_dict['architecture']['input_size']

        data = random.RandomData(k=k, r=r, d=d, n=n)
        data.generate_samples()
        self.ds = dataset.RandomDataset(data)
        self.data_loader = DataLoader(self.ds, shuffle=True, batch_size=BATCH_SIZE)
예제 #5
0
 def test_dataset_without_transform(self):
     for r in self.rs:
         for k in self.ks:
             for d in self.ds:
                 for n in self.ns:
                     data = random.RandomData(k=k, r=r, d=d)
                     x, _, y = data.generate_samples(n)
                     ds = dataset.RandomDataset(data, transform=None)
                     self.assertTrue(
                         len(ds) == len(data.data_points_labels) == n)
                     indexes = np.random.choice(np.arange(len(ds)),
                                                size=max(10,
                                                         len(ds) // 10))
                     for index in indexes:
                         x, y = ds[index]
                         self.assertSequenceEqual(x.shape, (d, ))
                         self.assertTrue(y in [-1, 1])
                         np.testing.assert_allclose(
                             x,
                             data.data_points[index, :],
                             rtol=1e-9,
                             atol=1e-9)
                         self.assertTrue(
                             y == data.data_points_labels[index])
 def _generate_data(k, r, d, n):
     data = random.RandomData(k=k, r=r, d=d, n=n)
     data.generate_samples()
     return dataset.RandomDataset(data)
예제 #7
0
 def _generate_data(k, r, d, n) -> torch.utils.data.Dataset:
     data = random.RandomData(k=k, r=r, d=d, n=n)
     data.generate_samples()
     return dataset.RandomDataset(data)