示例#1
0
 def test_element_lengths_list_array(self, list_type_factory):
   list_lengths = array_util.GetElementLengths(
       pa.array([], type=list_type_factory(pa.int64())))
   self.assertTrue(list_lengths.equals(pa.array([], type=pa.int64())))
   list_lengths = array_util.GetElementLengths(
       pa.array([[1., 2.], [], [3.]], list_type_factory(pa.float32())))
   self.assertTrue(list_lengths.equals(pa.array([2, 0, 1], type=pa.int64())))
   list_lengths = array_util.GetElementLengths(
       pa.array([[1., 2.], None, [3.]], list_type_factory(pa.float64())))
   self.assertTrue(list_lengths.equals(pa.array([2, 0, 1], type=pa.int64())))
示例#2
0
    def test_element_lengths(self):
        list_lengths = array_util.GetElementLengths(
            pa.array([], type=pa.list_(pa.int64())))
        self.assertTrue(list_lengths.equals(pa.array([], type=pa.int32())))
        list_lengths = array_util.GetElementLengths(
            pa.array([[1., 2.], [], [3.]]))
        self.assertTrue(
            list_lengths.equals(pa.array([2, 0, 1], type=pa.int32())))
        list_lengths = array_util.GetElementLengths(
            pa.array([[1., 2.], None, [3.]]))
        self.assertTrue(
            list_lengths.equals(pa.array([2, 0, 1], type=pa.int32())))

        list_lengths = array_util.GetElementLengths(
            pa.array([b"a", b"bb", None, b"", b"ccc"], type=pa.binary()))
        self.assertTrue(
            list_lengths.equals(pa.array([1, 2, 0, 0, 3], type=pa.int32())))

        list_lengths = array_util.GetElementLengths(
            pa.array([u"a", u"bb", None, u"", u"ccc"], type=pa.string()))
        self.assertTrue(
            list_lengths.equals(pa.array([1, 2, 0, 0, 3], type=pa.int32())))

        with self.assertRaisesRegex(RuntimeError, "NotImplemented"):
            array_util.GetElementLengths(pa.array([1, 2, 3], type=pa.int32()))
 def update(self, feature_array: pa.Array) -> None:
   """Update the partial bytes statistics using the input value."""
   if pa.types.is_null(feature_array.type):
     return
   # Iterate through the value array and update the partial stats.'
   flattened_values_array, _ = arrow_util.flatten_nested(feature_array)
   if (pa.types.is_floating(flattened_values_array.type) or
       pa.types.is_integer(flattened_values_array.type)):
     raise ValueError('Bytes stats cannot be computed on INT/FLOAT features.')
   if flattened_values_array:
     num_bytes = array_util.GetElementLengths(
         flattened_values_array).to_numpy()
     self.min_num_bytes = min(self.min_num_bytes, np.min(num_bytes))
     self.max_num_bytes = max(self.max_num_bytes, np.max(num_bytes))
     self.total_num_bytes += np.sum(num_bytes)
示例#4
0
 def test_element_lengths_unsupported_type(self):
   with self.assertRaisesRegex(RuntimeError, "Unimplemented"):
     array_util.GetElementLengths(pa.array([1, 2, 3], type=pa.int32()))
示例#5
0
  def test_element_lengths_binary_like(self, binary_like_type):

    list_lengths = array_util.GetElementLengths(
        pa.array([b"a", b"bb", None, b"", b"ccc"], type=binary_like_type))
    self.assertTrue(list_lengths.equals(pa.array([1, 2, 0, 0, 3],
                                                 type=pa.int64())))