def test_transform_batch_inconsistent_mode(self, mode): dataset = dummy_dataset.DummyDataset() self.mode = mode view = dataset.transform_batch(('a', ), self._transform) view.get_examples(None, None) with pytest.raises(ValueError): view.get_examples(None, None)
def test_fetch(self, mode, return_array): def callback(indices, key_indices): assert indices is None assert key_indices is None dataset = dummy_dataset.DummyDataset( mode=mode, return_array=return_array, callback=callback ) output = dataset.fetch() if mode is tuple: expected = tuple(dataset.data) elif mode is dict: expected = dict(zip(("a", "b", "c"), dataset.data)) elif mode is None: expected = dataset.data[0] np.testing.assert_equal(output, expected) if mode is dict: output = output.values() elif mode is None: output = (output,) for out in output: if return_array: assert isinstance(out, np.ndarray) else: assert isinstance(out, list)
def test_with_converter(mode): dataset = dummy_dataset.DummyDataset(mode=mode) def converter(*args, **kwargs): if mode is tuple: np.testing.assert_equal(args, tuple(dataset.data)) assert kwargs == {} elif mode is dict: assert args == () np.testing.assert_equal( kwargs, dict(zip(('a', 'b', 'c'), dataset.data))) elif mode is None: np.testing.assert_equal(args, tuple(dataset.data)) assert kwargs == {} return 'converted' view = dataset.with_converter(converter) assert isinstance(view, ppe.dataset.TabularDataset) assert len(view) == len(dataset) assert view.keys == dataset.keys assert view.mode == dataset.mode assert ( view.get_examples(None, None) == dataset.get_examples(None, None)) assert view.convert(view.fetch()) == 'converted'
def test_join(mode_a, mode_b, return_array, key_indices): if key_indices is None: expected_key_indices_a = None expected_key_indices_b = None return key_size_a = 3 if mode_a else 1 key_indices_a = tuple( key_index for key_index in key_indices if key_index < key_size_a) key_indices_b = tuple( key_index - key_size_a for key_index in key_indices if key_size_a <= key_index) if key_indices_a: expected_key_indices_a = key_indices_a if key_indices_b: expected_key_indices_b = key_indices_b def callback_a(indices, key_indices): assert indices is None assert key_indices == expected_key_indices_a dataset_a = dummy_dataset.DummyDataset( mode=mode_a, return_array=return_array, callback=callback_a, convert=True) def callback_b(indices, key_indices): assert indices is None assert key_indices == expected_key_indices_b dataset_b = dummy_dataset. DummyDataset( keys=('d', 'e'), mode=mode_b, return_array=return_array, callback=callback_b) view = dataset_a.join(dataset_b) assert isinstance(view, ppe.dataset.TabularDataset) assert len(view) == len(dataset_a) assert view.keys == dataset_a.keys + dataset_b.keys assert view.mode == dataset_a.mode or dataset_b.mode or tuple output = view.get_examples(None, key_indices) data = np.vstack((dataset_a.data, dataset_b.data)) if key_indices is not None: data = data[list(key_indices)] for out, d in itertools.zip_longest(output, data): np.testing.assert_equal(out, d) if return_array: assert isinstance(out, np.ndarray) else: assert isinstance(out, list) assert view.convert(output) == 'converted'
def test_delegate_dataset(mode): dataset = tabular.DelegateDataset(dummy_dataset.DummyDataset(mode=mode)) assert isinstance(dataset, ppe.dataset.TabularDataset) assert len(dataset) == len(dataset.dataset) assert dataset.keys == dataset.dataset.keys assert dataset.mode == dataset.dataset.mode assert (dataset.get_example(3) == dataset.dataset.get_example(3))
def test_transform_inconsistent_mode(self, mode): dataset = dummy_dataset.DummyDataset() self.mode = mode view = dataset.transform( ('a', ), [((('a', 'b', 'c'), ('a', )), self._transform)]) view.get_examples([0], None) with pytest.raises(ValueError): view.get_examples([0], None)
def test_concat(mode_a, mode_b, return_array, parameter_set): def callback_a(indices, key_indices): assert indices == parameter_set['expected_indices_a'] assert key_indices is None dataset_a = dummy_dataset.DummyDataset(keys=('a', 'b', 'c') if mode_b else ('a', ), mode=mode_a, return_array=return_array, callback=callback_a, convert=True) def callback_b(indices, key_indices): assert indices == parameter_set['expected_indices_b'] assert key_indices is None dataset_b = dummy_dataset.DummyDataset(size=5, keys=('a', 'b', 'c') if mode_a else ('a', ), mode=mode_b, return_array=return_array, callback=callback_b) view = dataset_a.concat(dataset_b) assert isinstance(view, ppe.dataset.TabularDataset) assert len(view) == len(dataset_a) + len(dataset_b) assert view.keys == dataset_a.keys assert view.mode == dataset_a.mode output = view.get_examples(parameter_set['indices'], None) data = np.hstack((dataset_a.data, dataset_b.data)) if parameter_set['indices'] is not None: data = data[:, parameter_set['indices']] for out, d in itertools.zip_longest(output, data): np.testing.assert_equal(out, d) if return_array and operator.xor( ('expected_indices_a' in parameter_set), ('expected_indices_b' in parameter_set)): assert isinstance(out, np.ndarray) else: assert isinstance(out, list) assert view.convert(output) == 'converted'
def test_astuple(mode): dataset = dummy_dataset.DummyDataset(mode=mode, convert=True) view = dataset.astuple() assert isinstance(view, ppe.dataset.TabularDataset) assert len(view) == len(dataset) assert view.keys == dataset.keys assert view.mode == tuple assert (view.get_examples(None, None) == dataset.get_examples(None, None)) assert view.convert(view.fetch()) == 'converted'
def test_transform_batch_length_changed(self, mode): dataset = dummy_dataset.DummyDataset() self.mode = mode def transform_batch(a, b, c): if self.mode is tuple: return a + [0], elif self.mode is dict: return {'a': a + [0]} elif self.mode is None: return a + [0] view = dataset.transform_batch(('a', ), transform_batch) with pytest.raises(ValueError): view.get_examples(None, None)
def test_iter(self, mode, return_array): dataset = dummy_dataset.DummyDataset( mode=mode, return_array=return_array) it = iter(dataset) for i in range(10): if mode is tuple: expected = tuple(dataset.data[:, i]) elif mode is dict: expected = dict(zip(("a", "b", "c"), dataset.data[:, i])) elif mode is None: expected = dataset.data[0, i] assert next(it) == expected with pytest.raises(StopIteration): next(it)
def test_get_example(self, mode, return_array): def callback(indices, key_indices): assert indices == [3] assert key_indices is None dataset = dummy_dataset.DummyDataset( mode=mode, return_array=return_array, callback=callback ) if mode is tuple: expected = tuple(dataset.data[:, 3]) elif mode is dict: expected = dict(zip(("a", "b", "c"), dataset.data[:, 3])) elif mode is None: expected = dataset.data[0, 3] assert dataset.get_example(3) == expected
def test_with_dataloader(batch_size, mode): size = 10 keys = ('a', 'b', 'c') dataset = dummy_dataset.DummyDataset(size=size, keys=keys, mode=mode) expected = torch.tensor(dataset.data).type(torch.float64) expected_per_key = [ [ expected[i, j * batch_size:(j + 1) * batch_size] for j in range((size + batch_size - 1) // batch_size) ] for i in range(len(keys)) ] print(expected_per_key) dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size) for i, example in enumerate(dataloader): for j, key in enumerate(keys): assert torch.allclose( expected_per_key[j][i], example[key if mode == dict else j])
def test_convert(self, mode, return_array): dataset = dummy_dataset.DummyDataset( mode=mode, return_array=return_array) output = dataset.convert(dataset.fetch()) if mode is tuple: expected = tuple(dataset.data) elif mode is dict: expected = dict(zip(("a", "b", "c"), dataset.data)) elif mode is None: expected = dataset.data[0] np.testing.assert_equal(output, expected) if mode is dict: output = output.values() elif mode is None: output = (output,) for out in output: assert isinstance(out, np.ndarray)
def test_slice(test_args): exception = test_args.get('index_exception', None) \ or test_args.get('key_exception', None) indices = test_args['indices'] keys = test_args['keys'] mode = test_args['mode'] return_array = test_args['return_array'] get_examples_indices = test_args['get_examples_indices'] get_examples_key_indices = test_args['get_examples_key_indices'] if isinstance(indices, list): indices = [ index if isinstance(index, bool) else test_args['integer'](index) for index in indices ] def callback(indices, key_indices): if isinstance(indices, list) \ or isinstance(get_examples_indices, list): assert isinstance(indices, list) elif isinstance(indices, slice) \ or isinstance(get_examples_indices, slice): assert isinstance(indices, slice) else: assert indices is None if keys is None and get_examples_key_indices is None: assert key_indices is None else: assert isinstance(key_indices, tuple) dataset = dummy_dataset.DummyDataset(mode=mode, return_array=return_array, callback=callback, convert=True) if exception is not None: with pytest.raises(exception): if keys is None: dataset.slice[indices] else: dataset.slice[indices, keys] return if keys is None: view = dataset.slice[indices] data = dataset.data[:, _indices_for_numpy(indices)] else: view = dataset.slice[indices, keys] if isinstance(keys, tuple): keys = keys else: keys = keys, key_indices = [{'a': 0, 'b': 1, 'c': 2}.get(key, key) for key in keys] data = dataset.data[key_indices][:, _indices_for_numpy(indices)] assert isinstance(view, ppe.dataset.TabularDataset) assert len(view) == test_args['expected_len'] assert view.keys == test_args['expected_keys'] if keys is None: assert view.mode == mode elif isinstance(keys, tuple): assert view.mode == mode or tuple else: assert view.mode is None output = view.get_examples(get_examples_indices, get_examples_key_indices) if get_examples_indices is not None: data = data[:, _indices_for_numpy(get_examples_indices)] if get_examples_key_indices is not None: data = data[list(get_examples_key_indices)] for out, d in itertools.zip_longest(output, data): np.testing.assert_equal(out, d) if return_array: assert isinstance(out, np.ndarray) else: assert isinstance(out, list) assert view.convert(output) == 'converted'
def test_concat_key_length(): dataset_a = dummy_dataset.DummyDataset() dataset_b = dummy_dataset.DummyDataset(keys=('a', 'b')) with pytest.raises(ValueError): dataset_a.concat(dataset_b)
def test_join_conflict_key(): dataset_a = dummy_dataset.DummyDataset() dataset_b = dummy_dataset.DummyDataset(keys=('a', 'd')) with pytest.raises(ValueError): dataset_a.join(dataset_b)
def test_transform(in_mode, out_mode, indices, key_indices, with_batch): dataset = dummy_dataset.DummyDataset(mode=in_mode, return_array=True, convert=True) def transform(*args, **kwargs): if in_mode is tuple: assert len(args) == 3 assert len(kwargs) == 0 a, b, c = args elif in_mode is dict: assert len(args) == 0 assert len(kwargs) == 3 a, b, c = kwargs['a'], kwargs['b'], kwargs['c'] elif in_mode is None: assert len(args) == 1 assert len(kwargs) == 0 a, = args b, c = a, a if with_batch: assert isinstance(a, np.ndarray) assert isinstance(b, np.ndarray) assert isinstance(c, np.ndarray) else: assert isinstance(a, float) assert isinstance(b, float) assert isinstance(c, float) if out_mode is tuple: return a + b, b + c elif out_mode is dict: return {'alpha': a + b, 'beta': b + c} elif out_mode is None: return a + b + c def transform_alpha(*args, **kwargs): if in_mode is tuple: assert len(args) == 3 assert len(kwargs) == 0 a, b, c = args elif in_mode is dict: assert len(args) == 0 assert len(kwargs) == 3 a, b, c = kwargs['a'], kwargs['b'], kwargs['c'] elif in_mode is None: assert len(args) == 1 assert len(kwargs) == 0 a, = args b, c = a, a if with_batch: assert isinstance(a, np.ndarray) assert isinstance(b, np.ndarray) assert isinstance(c, np.ndarray) else: assert isinstance(a, float) assert isinstance(b, float) assert isinstance(c, float) if out_mode is tuple: return a + b, elif out_mode is dict: return {'alpha': a + b} elif out_mode is None: return a + b + c def transform_beta(*args, **kwargs): if in_mode is tuple: assert len(args) == 3 assert len(kwargs) == 0 a, b, c = args elif in_mode is dict: assert len(args) == 0 assert len(kwargs) == 3 a, b, c = kwargs['a'], kwargs['b'], kwargs['c'] elif in_mode is None: assert len(args) == 1 assert len(kwargs) == 0 a, = args b, c = a, a if with_batch: assert isinstance(a, np.ndarray) assert isinstance(b, np.ndarray) assert isinstance(c, np.ndarray) else: assert isinstance(a, float) assert isinstance(b, float) assert isinstance(c, float) if out_mode is tuple: return b + c, elif out_mode is dict: return {'beta': b + c} elif out_mode is None: return a + b + c if in_mode is not None: a, b, c = dataset.data else: a, = dataset.data b, c = a, a if out_mode is not None: if in_mode is not None: d_transform = [((('a', 'b', 'c'), ('alpha', 'beta')), transform)] else: d_transform = [((('a', ), ('alpha', )), transform_alpha), ((('a', ), ('beta', )), transform_beta)] if with_batch: view = dataset.transform_batch(('alpha', 'beta'), d_transform) else: view = dataset.transform(('alpha', 'beta'), d_transform) data = np.vstack((a + b, b + c)) else: if in_mode is not None: d_transform = [((('a', 'b', 'c'), ('alpha', )), transform_alpha)] else: d_transform = [((('a', ), ('alpha', )), transform_alpha)] if with_batch: view = dataset.transform_batch(('alpha', ), d_transform) else: view = dataset.transform(('alpha', ), d_transform) data = (a + b + c)[None] assert isinstance(view, ppe.dataset.TabularDataset) assert len(view) == len(dataset) if out_mode is not None: assert view.keys == ('alpha', 'beta') assert view.mode == out_mode else: assert view.keys == ('alpha', ) assert view.mode == out_mode output = view.get_examples(indices, key_indices) if indices is not None: data = data[:, indices] if key_indices is not None: data = data[list(key_indices)] for out, d in itertools.zip_longest(output, data): np.testing.assert_equal(out, d) if with_batch: assert isinstance(out, np.ndarray) else: assert isinstance(out, list) assert view.convert(view.fetch()) == 'converted'
def test_join_length(): dataset_a = dummy_dataset.DummyDataset() dataset_b = dummy_dataset.DummyDataset(size=5, keys=('d', 'e')) with pytest.raises(ValueError): dataset_a.join(dataset_b)