class DatasetTestCase(TestCase): def setUp(self): self.base = range(100) self.data = Dataset(self.base) def test_dunder_getitem(self): self.assertSequenceEqual(self.data, self.base) def test_supports_slicing(self): slice1 = slice(10, 20) slice2 = slice(0, 99) self.assertListEqual(self.data[slice1], list(self.base[slice1])) self.assertListEqual(self.data[slice2], list(self.base[slice2])) def test_dunder_len(self): self.assertEqual(len(self.data), len(self.base)) def test_dunder_add(self): data = self.data + self.data + self.data expected = list(self.base) * 3 self.assertSequenceEqual(data, expected) self.assertIsInstance(data, ConcatDataset) def test_map(self): def f(x): return x ** 2 self.assertSequenceEqual( self.data.map(f), list(map(f, self.base))) def test_filter(self): def f(x): return x % 2 == 0 self.assertSequenceEqual( self.data.filter(f), list(filter(f, self.base))) def test_flat_map(self): def f(x): return [x] self.assertSequenceEqual( self.data.flat_map(f), list(itertools.chain.from_iterable(map(f, self.base)))) def test_window(self): self.assertSequenceEqual( list(itertools.chain.from_iterable(self.data.window(3))), self.base) def test_supports_multiple_maps(self): def f(x): return x + 1 prev_data = self.data for i in range(100): data = prev_data.map(f) self.assertEqual(data._dataset, prev_data) self.assertIs(data._map_func, f) prev_data = data self.assertSequenceEqual( data, [x + 100 for x in self.base]) def test_all(self): self.assertListEqual(self.data.all(), list(self.base)) def test_first(self): self.assertEqual(self.data.first(), self.base[0]) def test_take(self): n = 50 self.assertListEqual(self.data.take(n), list(self.base[:n])) @patch('lineflow.core.Path.open') @patch('lineflow.core.Path') @patch('lineflow.core.pickle.dump') def test_saves_yourself(self, pickle_dump_mock, Path_mock, open_mock): path = Mock() Path_mock.return_value = path # Assume cache doesn't exist, but a directory exists path.exists.return_value = False path.parent.exists.return_value = True # Setup Path.open fp = Mock() open_mock.return_value.__enter__.return_value = fp path.open = open_mock filepath = '/path/to/cache' data = self.data.save(filepath) path.exists.assert_called_once() path.parent.exists.assert_called_once() path.open.assert_called_once_with('wb') pickle_dump_mock.assert_called_once_with(self.data.all(), fp) self.assertIsInstance(data, lineflow.core.CacheDataset) @patch('lineflow.core.Path.open') @patch('lineflow.core.Path') @patch('lineflow.core.pickle.dump') def test_makes_a_directory_and_saves_yourself(self, pickle_dump_mock, Path_mock, open_mock): path = Mock() Path_mock.return_value = path # Assume cache doesn't exist, also a directory doesn't exist path.exists.return_value = False path.parent.exists.return_value = False # Setup Path.open fp = Mock() open_mock.return_value.__enter__.return_value = fp path.open = open_mock filepath = '/path/to/cache' data = self.data.save(filepath) path.exists.assert_called_once() path.parent.exists.assert_called_once() path.parent.mkdir.assert_called_once_with(parents=True) path.open.assert_called_once_with('wb') pickle_dump_mock.assert_called_once_with(self.data.all(), fp) self.assertIsInstance(data, lineflow.core.CacheDataset) @patch('lineflow.core.Path.open') @patch('lineflow.core.Path') @patch('lineflow.core.pickle.dump') def test_maps_func_and_saves_yourself(self, pickle_dump_mock, Path_mock, open_mock): path = Mock() Path_mock.return_value = path # Assume cache doesn't exist, but a directory exists path.exists.return_value = False path.parent.exists.return_value = True # Setup Path.open fp = Mock() open_mock.return_value.__enter__.return_value = fp path.open = open_mock filepath = '/path/to/cache' data = self.data.map(lambda x: x ** 2).save(filepath) path.exists.assert_called_once() path.parent.exists.assert_called_once() path.open.assert_called_once_with('wb') pickle_dump_mock.assert_called_once_with(data.all(), fp) self.assertIsInstance(data, lineflow.core.CacheDataset) self.assertListEqual(data._dataset, [x ** 2 for x in self.base]) for i, x in enumerate(data): y = self.data[i] ** 2 self.assertEqual(x, y) self.assertEqual(data[i], y) @patch('lineflow.core.Path.open') @patch('lineflow.core.Path') @patch('lineflow.core.pickle.load') def test_loads_existed_cache_implicitly(self, pickle_load_mock, Path_mock, open_mock): path = Mock() Path_mock.return_value = path # Assume cache exists path.exists.return_value = True # Setup Path.open fp = Mock() open_mock.return_value.__enter__.return_value = fp path.open = open_mock # Setup pickle.load pickle_load_mock.return_value = list(self.base) filepath = '/path/to/cache' data = self.data.save(filepath) path.exists.assert_called_once() path.open.assert_called_once_with('rb') pickle_load_mock.assert_called_once_with(fp) self.assertIsInstance(data, lineflow.core.CacheDataset)
class DatasetTestCase(TestCase): def setUp(self): self.base = range(100) self.data = Dataset(self.base) def test_getitem(self): self.assertSequenceEqual(self.data, self.base) def test_supports_slicing(self): slice1 = slice(10, 20) slice2 = slice(0, 99) self.assertListEqual(self.data[slice1], list(self.base[slice1])) self.assertListEqual(self.data[slice2], list(self.base[slice2])) def test_len(self): self.assertEqual(len(self.data), len(self.base)) def test_add(self): data = self.data + self.data + self.data expected = list(self.base) * 3 self.assertSequenceEqual(data, expected) def test_map(self): def f(x): return x**2 self.assertSequenceEqual(self.data.map(f), list(map(f, self.base))) def test_keeps_original_dataset_after_multiple_maps(self): def f(x): return x data = self.data for i in range(100): data = data.map(f) self.assertEqual(data._dataset, self.base) self.assertEqual(len(data._funcs), i + 1) def test_supports_method_chain(self): data = self.data.map(lambda x: x**2).map(lambda x: x / 2) self.assertSequenceEqual(data, [x**2 / 2 for x in self.base]) def test_all(self): self.assertListEqual(self.data.all(), list(self.base)) def test_first(self): self.assertEqual(self.data.first(), self.base[0]) def test_take(self): n = 50 self.assertListEqual(self.data.take(n), list(self.base[:n])) @patch('lineflow.core.Path.open') @patch('lineflow.core.Path') @patch('lineflow.core.pickle.dump') def test_saves_yourself(self, pickle_dump_mock, Path_mock, open_mock): path = Mock() Path_mock.return_value = path # Assume cache doesn't exist, but a directory exists path.exists.return_value = False path.parent.exists.return_value = True # Setup Path.open fp = Mock() open_mock.return_value.__enter__.return_value = fp path.open = open_mock filepath = '/path/to/cache' data = self.data.save(filepath) path.exists.assert_called_once() path.parent.exists.assert_called_once() path.open.assert_called_once_with('wb') pickle_dump_mock.assert_called_once_with(self.data.all(), fp) self.assertIsInstance(data, lineflow.core.CacheDataset) @patch('lineflow.core.Path.open') @patch('lineflow.core.Path') @patch('lineflow.core.pickle.dump') def test_makes_a_directory_and_saves_yourself(self, pickle_dump_mock, Path_mock, open_mock): path = Mock() Path_mock.return_value = path # Assume cache doesn't exist, also a directory doesn't exist path.exists.return_value = False path.parent.exists.return_value = False # Setup Path.open fp = Mock() open_mock.return_value.__enter__.return_value = fp path.open = open_mock filepath = '/path/to/cache' data = self.data.save(filepath) path.exists.assert_called_once() path.parent.exists.assert_called_once() path.parent.mkdir.assert_called_once_with(parents=True) path.open.assert_called_once_with('wb') pickle_dump_mock.assert_called_once_with(self.data.all(), fp) self.assertIsInstance(data, lineflow.core.CacheDataset) @patch('lineflow.core.Path.open') @patch('lineflow.core.Path') @patch('lineflow.core.pickle.dump') def test_maps_func_and_saves_yourself(self, pickle_dump_mock, Path_mock, open_mock): path = Mock() Path_mock.return_value = path # Assume cache doesn't exist, but a directory exists path.exists.return_value = False path.parent.exists.return_value = True # Setup Path.open fp = Mock() open_mock.return_value.__enter__.return_value = fp path.open = open_mock filepath = '/path/to/cache' data = self.data.map(lambda x: x**2).save(filepath) path.exists.assert_called_once() path.parent.exists.assert_called_once() path.open.assert_called_once_with('wb') pickle_dump_mock.assert_called_once_with(data.all(), fp) self.assertIsInstance(data, lineflow.core.CacheDataset) self.assertListEqual(data._dataset, [x**2 for x in self.base]) for i, x in enumerate(data): y = self.data[i]**2 self.assertEqual(x, y) self.assertEqual(data[i], y) @patch('lineflow.core.Path.open') @patch('lineflow.core.Path') @patch('lineflow.core.pickle.load') def test_loads_existed_cache_implicitly(self, pickle_load_mock, Path_mock, open_mock): path = Mock() Path_mock.return_value = path # Assume cache exists path.exists.return_value = True # Setup Path.open fp = Mock() open_mock.return_value.__enter__.return_value = fp path.open = open_mock # Setup pickle.load pickle_load_mock.return_value = list(self.base) filepath = '/path/to/cache' data = self.data.save(filepath) path.exists.assert_called_once() path.open.assert_called_once_with('rb') pickle_load_mock.assert_called_once_with(fp) self.assertIsInstance(data, lineflow.core.CacheDataset) @patch('lineflow.core.open') @patch('lineflow.core.pickle.load') def test_load(self, pickle_load_mock, open_mock): pickle_load_mock.return_value = list(self.base) enter_mock = Mock() open_mock.return_value.__enter__.return_value = enter_mock filepath = '/path/to/dataset' data = lineflow.load(filepath) open_mock.assert_called_once_with(filepath, 'rb') pickle_load_mock.assert_called_once_with(enter_mock) self.assertListEqual(data.all(), list(self.base)) self.assertEqual(data._dataset, list(self.base)) with self.assertWarns(DeprecationWarning): lineflow.Dataset.load(filepath)