def test_list_lengths_null_array(self):
     batch = input_batch.InputBatch(
         pa.Table.from_arrays([
             pa.array([None, None, None], type=pa.null()),
         ], ['f1']))
     np.testing.assert_array_equal(
         batch.list_lengths(types.FeaturePath(['f1'])), [0, 0, 0])
 def test_null_mask_null_array(self):
     batch = input_batch.InputBatch(
         pa.Table.from_arrays([pa.array([None], type=pa.null())],
                              ['feature']))
     path = types.FeaturePath(['feature'])
     expected_mask = np.array([True])
     np.testing.assert_array_equal(batch.null_mask(path), expected_mask)
 def test_list_lengths_path_missing(self):
     batch = input_batch.InputBatch(
         pa.Table.from_arrays([
             pa.array([1, None, 1]),
         ], ['f1']))
     np.testing.assert_array_equal(
         batch.list_lengths(types.FeaturePath(['f2'])), [0, 0, 0])
 def test_list_lengths(self):
     batch = input_batch.InputBatch(
         pa.Table.from_arrays([
             pa.array([[1], None, [1, 2]]),
         ], ['f1']))
     np.testing.assert_array_equal(
         batch.list_lengths(types.FeaturePath(['f1'])), [1, 0, 2])
 def test_all_null_mask_one_missing(self):
     batch = input_batch.InputBatch(
         pa.Table.from_arrays([pa.array([None, [1]])], ['f2']))
     path1 = types.FeaturePath(['f1'])
     path2 = types.FeaturePath(['f2'])
     expected_mask = np.array([True, False])
     np.testing.assert_array_equal(batch.all_null_mask(path1, path2),
                                   expected_mask)
 def test_count_missing_generator_single_batch(self):
   batch = input_batch.InputBatch(
       pa.Table.from_arrays([pa.array([[1], None, []])], ['feature']))
   path = types.FeaturePath(['feature'])
   generator = count_missing_generator.CountMissingGenerator(path)
   accumulator = generator.create_accumulator()
   accumulator = generator.add_input(accumulator, batch)
   self.assertEqual(1, generator.extract_output(accumulator))
Exemplo n.º 7
0
 def test_list_lengths_non_list(self):
   batch = input_batch.InputBatch(
       pa.RecordBatch.from_arrays([
           pa.array([1, None, 1]),
       ], ['f1']))
   with self.assertRaisesRegex(
       ValueError, r'Can only compute list lengths on list arrays, found.*'):
     batch.list_lengths(types.FeaturePath(['f1']))
Exemplo n.º 8
0
 def add_input(
         self, accumulator: List[CONSTITUENT_ACCTYPE],
         input_record_batch: pa.RecordBatch) -> List[CONSTITUENT_ACCTYPE]:
     batch = input_batch.InputBatch(input_record_batch)
     return [
         c.add_input(a, batch)
         for c, a in zip(self._constituents, accumulator)
     ]
Exemplo n.º 9
0
 def test_all_null_mask_all_missing(self):
   batch = input_batch.InputBatch(
       pa.RecordBatch.from_arrays([pa.array([None, None], type=pa.null())],
                                  ['f3']))
   path1 = types.FeaturePath(['f1'])
   path2 = types.FeaturePath(['f2'])
   expected_mask = np.array([True, True])
   np.testing.assert_array_equal(
       batch.all_null_mask(path1, path2), expected_mask)
Exemplo n.º 10
0
 def add_input(
     self, accumulator: Iterable[CONSTITUENT_ACCTYPE],
     input_record_batch: pa.RecordBatch
 ) -> Iterable[CONSTITUENT_ACCTYPE]:  # pytype: disable=invalid-annotation
     batch = input_batch.InputBatch(input_record_batch)
     return [
         c.add_input(a, batch)
         for c, a in zip(self._constituents, accumulator)
     ]
Exemplo n.º 11
0
 def test_length_diff_generator_both_missing(self):
     batch = input_batch.InputBatch(
         pa.Table.from_arrays([pa.array([[1], [1], [1]])], ['required']))
     path1 = types.FeaturePath(['f1'])
     path2 = types.FeaturePath(['f2'])
     required_path = types.FeaturePath('required')
     generator = length_diff_generator.LengthDiffGenerator(
         path1, path2, required_paths=[required_path])
     accumulator = generator.create_accumulator()
     accumulator = generator.add_input(accumulator, batch)
     self.assertEqual((0, 0), generator.extract_output(accumulator))
Exemplo n.º 12
0
 def test_count_missing_generator_required_path(self):
     batch = input_batch.InputBatch(
         pa.RecordBatch.from_arrays(
             [pa.array([[1], None, []]),
              pa.array([[1], None, []])], ['index', 'value']))
     path = types.FeaturePath(['index'])
     required_path = types.FeaturePath(['value'])
     generator = count_missing_generator.CountMissingGenerator(
         path, [required_path])
     accumulator = generator.create_accumulator()
     accumulator = generator.add_input(accumulator, batch)
     self.assertEqual(0, generator.extract_output(accumulator))
Exemplo n.º 13
0
 def test_all_null_mask(self):
     batch = input_batch.InputBatch(
         pa.Table.from_arrays([
             pa.array([[1], None, []]),
             pa.array([[1], None, None]),
             pa.array([[1], None, None])
         ], ['f1', 'f2', 'f3']))
     path1 = types.FeaturePath(['f1'])
     path2 = types.FeaturePath(['f2'])
     path3 = types.FeaturePath(['f3'])
     expected_mask = np.array([False, True, False])
     np.testing.assert_array_equal(batch.all_null_mask(path1, path2, path3),
                                   expected_mask)
Exemplo n.º 14
0
 def test_all_null_mask_unequal_lengths(self):
     batch = input_batch.InputBatch(
         pa.Table.from_arrays([
             pa.array([[1]]),
             pa.array([[{
                 'sf1': [[1]]
             }, {
                 'sf1': [[1]]
             }]]),
         ], ['f1', 'f2']))
     with self.assertRaisesRegex(
             ValueError, r'.*null_mask\(f2.sf1\).size.*\(1 != 2\).*'):
         batch.all_null_mask(types.FeaturePath(['f1']),
                             types.FeaturePath(['f2', 'sf1']))
Exemplo n.º 15
0
 def test_length_diff_generator_negative_min_max(self):
     batch = input_batch.InputBatch(
         pa.Table.from_arrays([
             pa.array([[1, 2, 3], None, [1]]),
             pa.array([[1], None, []]),
             pa.array([[1], None, [1]])
         ], ['f1', 'f2', 'required']))
     path1 = types.FeaturePath(['f1'])
     path2 = types.FeaturePath(['f2'])
     required_path = types.FeaturePath('required')
     generator = length_diff_generator.LengthDiffGenerator(
         path2, path1, required_paths=[path1, path2, required_path])
     accumulator = generator.create_accumulator()
     accumulator = generator.add_input(accumulator, batch)
     self.assertEqual((-2, -1), generator.extract_output(accumulator))
Exemplo n.º 16
0
 def test_null_mask(self):
   batch = input_batch.InputBatch(
       pa.RecordBatch.from_arrays([pa.array([[1], None, []])], ['feature']))
   path = types.FeaturePath(['feature'])
   expected_mask = np.array([False, True, False])
   np.testing.assert_array_equal(batch.null_mask(path), expected_mask)
Exemplo n.º 17
0
 def test_all_null_mask_no_paths(self):
     batch = input_batch.InputBatch(
         pa.Table.from_arrays([pa.array([None, None], type=pa.null())],
                              ['f3']))
     with self.assertRaisesRegex(ValueError, r'Paths cannot be empty.*'):
         batch.all_null_mask()
Exemplo n.º 18
0
 def test_list_lengths_empty_array(self):
     batch = input_batch.InputBatch(
         pa.Table.from_arrays([pa.array([])], ['f1']))
     np.testing.assert_array_equal(
         batch.list_lengths(types.FeaturePath(['f1'])), [])
Exemplo n.º 19
0
 def test_null_mask_path_missing(self):
     batch = input_batch.InputBatch(
         pa.Table.from_arrays([pa.array([[1], None, []])], ['feature']))
     path = types.FeaturePath(['feature2'])
     expected_mask = np.array([True, True, True])
     np.testing.assert_array_equal(batch.null_mask(path), expected_mask)
Exemplo n.º 20
0
 def test_null_mask_empty_array(self):
     batch = input_batch.InputBatch(
         pa.Table.from_arrays([pa.array([])], ['feature']))
     path = types.FeaturePath(['feature'])
     expected_mask = np.array([], dtype=bool)
     np.testing.assert_array_equal(batch.null_mask(path), expected_mask)