def test_shuffle(self):
     bg = BatchGenerator(
         self.df,
         x_structure=('var1', self.lb),
         y_structure=('label', self.le),
         batch_size=8,
         shuffle=True,
     )
     batch1 = bg[0]
     bg.on_epoch_end()
     bg.on_epoch_end()
     batch2 = bg[0]
     assert not np.array_equal(batch1[0], batch2[0])
 def test_inverse_transform(self):
     # batch size equals to dataset size
     bg = BatchGenerator(
         self.df,
         x_structure=('label', self.le),
         y_structure=('var1', self.lb),
         batch_size=self.df.shape[0],
         shuffle=False,
     )
     batch = bg[0]
     inverse = bg.inverse_transform(batch[1])
     assert type(inverse) is pd.DataFrame
     assert inverse.shape == (self.df.shape[0], 1)
     b = self.df[['var1']]
     assert inverse.equals(self.df[['var1']])
 def test_y_structure_none(self):
     bg = BatchGenerator(
         self.df,
         x_structure=('label', self.le),
         batch_size=self.df.shape[0],
         shuffle=False,
     )
     batch = bg[0]
     assert type(batch) == np.ndarray
 def test_encoder_adaptor(self):
     """
     This test only makes sure the adaptor parameter is passed correctly
     :return:
     """
     bg = BatchGenerator(self.df,
                         x_structure=('var1', self.lb),
                         y_structure=('label', self.le),
                         shuffle=False,
                         encoder_adaptor='numpy')
     assert isinstance(bg.batch_shaper.x_structure._encoder_adaptor,
                       NumpyEncoderAdaptor)
     bg = BatchGenerator(self.df,
                         x_structure=('var1', self.lb),
                         y_structure=('label', self.le),
                         shuffle=False,
                         encoder_adaptor='pandas')
     assert isinstance(bg.batch_shaper.x_structure._encoder_adaptor,
                       PandasEncoderAdaptor)
 def test_transform(self):
     le = LabelEncoder().fit(self.df['var2'])
     bg = BatchGenerator(
         self.df,
         x_structure=[('var1', self.lb), ('var2', le)],
         y_structure=('label', self.le),
         shuffle=False,
         batch_size=3,
     )
     trf = bg.transform(self.df)
     assert type(trf) == tuple
     assert len(trf) == 2
     assert type(trf[0]) == list
     assert len(trf[0]) == 2
     assert type(trf[1]) == np.ndarray
     assert trf[1].shape == (8, 1)
     trf = bg.transform(self.df, return_y=False)
     assert type(trf) == list
     assert len(trf) == 2
 def test_batch_sizes(self):
     # batch size equals to dataset size
     bg = BatchGenerator(
         self.df,
         x_structure=('var1', self.lb),
         y_structure=('label', self.le),
         batch_size=self.df.shape[0],
         shuffle=True,
     )
     batch = bg[0]
     assert batch[0].shape[0] == self.df.shape[0]
     # batch size bigger than dataset size
     bg = BatchGenerator(
         self.df,
         x_structure=('var1', self.lb),
         y_structure=('label', self.le),
         batch_size=self.df.shape[0] + 10,
         shuffle=True,
     )
     batch = bg[0]
     assert batch[0].shape[0] == self.df.shape[0]
 def test_metadata(self):
     bg = BatchGenerator(
         self.df,
         x_structure=('var1', self.lb),
         y_structure=('label', self.le),
         batch_size=8,
         shuffle=False,
     )
     md = bg.metadata
     assert type(md) == tuple
     assert len(md) == 2
     assert type(md[0]) == dict
     assert type(md[1]) == dict
 def test_shapes(self):
     le = LabelEncoder().fit(self.df['var2'])
     bg = BatchGenerator(
         self.df,
         x_structure=[('var1', self.lb), ('var2', le)],
         y_structure=('label', self.le),
         shuffle=False,
     )
     sh = bg.shapes
     assert type(sh) == tuple
     assert len(sh) == 2
     assert type(sh[0]) == list
     assert len(sh[0]) == 2
     assert sh[0][0] == (3, )
     assert sh[0][1] == (1, )
     assert sh[1] == (1, )
    def test_batch_transformer_integration(self):
        """
        This test only checks that declaring and using BatchGenerator with batch_transform parameter
        does not cause errors.
        TODO: need to add actual encoders here
        """
        class TestTransform(BatchTransformer):
            def __init__(self, col_name):
                self.col_name = col_name
                super().__init__()

            def transform(self, batch):
                # red is the most rare label in the dataset. Here I will blindly replace all values in
                # col_name column with this value
                batch[self.col_name] = 'Red'
                return batch

            def inverse_transform(self, batch):
                return batch

        class TransparentTransform(BatchTransformer):
            def transform(self, batch):
                return batch

            def inverse_transform(self, batch):
                return batch

        bt1 = TransparentTransform()
        bt2 = TestTransform('label')
        bg = BatchGenerator(
            self.df,
            x_structure=('var1', self.lb),
            y_structure=('label', self.le),
            batch_transforms=[bt1, bt2],
            batch_size=8,
            shuffle=False,
        )
        batch = bg[0]
        assert type(batch) == tuple
        assert len(batch) == 2
        assert (self.le.inverse_transform(batch[1]) == 'Red').all()
Exemplo n.º 10
0
 def test_basic(self):
     bg = BatchGenerator(
         self.df,
         x_structure=('var1', self.lb),
         y_structure=('label', self.le),
         batch_size=3,
         shuffle=True,
     )
     assert len(bg) == 3
     batch = bg[0]
     assert type(batch) == tuple
     assert type(batch[0]) == np.ndarray
     assert type(batch[1]) == np.ndarray
     assert batch[0].shape == (3, 3)
     assert batch[1].shape == (3, 1)
     batch = bg[2]
     assert type(batch) == tuple
     assert type(batch[0]) == np.ndarray
     assert type(batch[1]) == np.ndarray
     assert batch[0].shape == (2, 3)
     assert batch[1].shape == (2, 1)
     with pytest.raises(IndexError):
         batch = bg[3]