def test_list_lengths_null_array(self): batch = input_batch.InputBatch( pa.Table.from_arrays([ pa.array([None, None, None], type=pa.null()), ], ['f1'])) np.testing.assert_array_equal( batch.list_lengths(types.FeaturePath(['f1'])), [0, 0, 0])
def test_null_mask_null_array(self): batch = input_batch.InputBatch( pa.Table.from_arrays([pa.array([None], type=pa.null())], ['feature'])) path = types.FeaturePath(['feature']) expected_mask = np.array([True]) np.testing.assert_array_equal(batch.null_mask(path), expected_mask)
def test_list_lengths_path_missing(self): batch = input_batch.InputBatch( pa.Table.from_arrays([ pa.array([1, None, 1]), ], ['f1'])) np.testing.assert_array_equal( batch.list_lengths(types.FeaturePath(['f2'])), [0, 0, 0])
def test_list_lengths(self): batch = input_batch.InputBatch( pa.Table.from_arrays([ pa.array([[1], None, [1, 2]]), ], ['f1'])) np.testing.assert_array_equal( batch.list_lengths(types.FeaturePath(['f1'])), [1, 0, 2])
def test_all_null_mask_one_missing(self): batch = input_batch.InputBatch( pa.Table.from_arrays([pa.array([None, [1]])], ['f2'])) path1 = types.FeaturePath(['f1']) path2 = types.FeaturePath(['f2']) expected_mask = np.array([True, False]) np.testing.assert_array_equal(batch.all_null_mask(path1, path2), expected_mask)
def test_count_missing_generator_single_batch(self): batch = input_batch.InputBatch( pa.Table.from_arrays([pa.array([[1], None, []])], ['feature'])) path = types.FeaturePath(['feature']) generator = count_missing_generator.CountMissingGenerator(path) accumulator = generator.create_accumulator() accumulator = generator.add_input(accumulator, batch) self.assertEqual(1, generator.extract_output(accumulator))
def test_list_lengths_non_list(self): batch = input_batch.InputBatch( pa.RecordBatch.from_arrays([ pa.array([1, None, 1]), ], ['f1'])) with self.assertRaisesRegex( ValueError, r'Can only compute list lengths on list arrays, found.*'): batch.list_lengths(types.FeaturePath(['f1']))
def add_input( self, accumulator: List[CONSTITUENT_ACCTYPE], input_record_batch: pa.RecordBatch) -> List[CONSTITUENT_ACCTYPE]: batch = input_batch.InputBatch(input_record_batch) return [ c.add_input(a, batch) for c, a in zip(self._constituents, accumulator) ]
def test_all_null_mask_all_missing(self): batch = input_batch.InputBatch( pa.RecordBatch.from_arrays([pa.array([None, None], type=pa.null())], ['f3'])) path1 = types.FeaturePath(['f1']) path2 = types.FeaturePath(['f2']) expected_mask = np.array([True, True]) np.testing.assert_array_equal( batch.all_null_mask(path1, path2), expected_mask)
def add_input( self, accumulator: Iterable[CONSTITUENT_ACCTYPE], input_record_batch: pa.RecordBatch ) -> Iterable[CONSTITUENT_ACCTYPE]: # pytype: disable=invalid-annotation batch = input_batch.InputBatch(input_record_batch) return [ c.add_input(a, batch) for c, a in zip(self._constituents, accumulator) ]
def test_length_diff_generator_both_missing(self): batch = input_batch.InputBatch( pa.Table.from_arrays([pa.array([[1], [1], [1]])], ['required'])) path1 = types.FeaturePath(['f1']) path2 = types.FeaturePath(['f2']) required_path = types.FeaturePath('required') generator = length_diff_generator.LengthDiffGenerator( path1, path2, required_paths=[required_path]) accumulator = generator.create_accumulator() accumulator = generator.add_input(accumulator, batch) self.assertEqual((0, 0), generator.extract_output(accumulator))
def test_count_missing_generator_required_path(self): batch = input_batch.InputBatch( pa.RecordBatch.from_arrays( [pa.array([[1], None, []]), pa.array([[1], None, []])], ['index', 'value'])) path = types.FeaturePath(['index']) required_path = types.FeaturePath(['value']) generator = count_missing_generator.CountMissingGenerator( path, [required_path]) accumulator = generator.create_accumulator() accumulator = generator.add_input(accumulator, batch) self.assertEqual(0, generator.extract_output(accumulator))
def test_all_null_mask(self): batch = input_batch.InputBatch( pa.Table.from_arrays([ pa.array([[1], None, []]), pa.array([[1], None, None]), pa.array([[1], None, None]) ], ['f1', 'f2', 'f3'])) path1 = types.FeaturePath(['f1']) path2 = types.FeaturePath(['f2']) path3 = types.FeaturePath(['f3']) expected_mask = np.array([False, True, False]) np.testing.assert_array_equal(batch.all_null_mask(path1, path2, path3), expected_mask)
def test_all_null_mask_unequal_lengths(self): batch = input_batch.InputBatch( pa.Table.from_arrays([ pa.array([[1]]), pa.array([[{ 'sf1': [[1]] }, { 'sf1': [[1]] }]]), ], ['f1', 'f2'])) with self.assertRaisesRegex( ValueError, r'.*null_mask\(f2.sf1\).size.*\(1 != 2\).*'): batch.all_null_mask(types.FeaturePath(['f1']), types.FeaturePath(['f2', 'sf1']))
def test_length_diff_generator_negative_min_max(self): batch = input_batch.InputBatch( pa.Table.from_arrays([ pa.array([[1, 2, 3], None, [1]]), pa.array([[1], None, []]), pa.array([[1], None, [1]]) ], ['f1', 'f2', 'required'])) path1 = types.FeaturePath(['f1']) path2 = types.FeaturePath(['f2']) required_path = types.FeaturePath('required') generator = length_diff_generator.LengthDiffGenerator( path2, path1, required_paths=[path1, path2, required_path]) accumulator = generator.create_accumulator() accumulator = generator.add_input(accumulator, batch) self.assertEqual((-2, -1), generator.extract_output(accumulator))
def test_null_mask(self): batch = input_batch.InputBatch( pa.RecordBatch.from_arrays([pa.array([[1], None, []])], ['feature'])) path = types.FeaturePath(['feature']) expected_mask = np.array([False, True, False]) np.testing.assert_array_equal(batch.null_mask(path), expected_mask)
def test_all_null_mask_no_paths(self): batch = input_batch.InputBatch( pa.Table.from_arrays([pa.array([None, None], type=pa.null())], ['f3'])) with self.assertRaisesRegex(ValueError, r'Paths cannot be empty.*'): batch.all_null_mask()
def test_list_lengths_empty_array(self): batch = input_batch.InputBatch( pa.Table.from_arrays([pa.array([])], ['f1'])) np.testing.assert_array_equal( batch.list_lengths(types.FeaturePath(['f1'])), [])
def test_null_mask_path_missing(self): batch = input_batch.InputBatch( pa.Table.from_arrays([pa.array([[1], None, []])], ['feature'])) path = types.FeaturePath(['feature2']) expected_mask = np.array([True, True, True]) np.testing.assert_array_equal(batch.null_mask(path), expected_mask)
def test_null_mask_empty_array(self): batch = input_batch.InputBatch( pa.Table.from_arrays([pa.array([])], ['feature'])) path = types.FeaturePath(['feature']) expected_mask = np.array([], dtype=bool) np.testing.assert_array_equal(batch.null_mask(path), expected_mask)