def _is_zero_copy_only(pa_type: pa.DataType) -> bool: """ When converting a pyarrow array to a numpy array, we must know whether this could be done in zero-copy or not. This function returns the value of the ``zero_copy_only`` parameter to pass to ``.to_numpy()``, given the type of the pyarrow array. # zero copy is available for all primitive types except booleans # primitive types are types for which the physical representation in arrow and in numpy # https://github.com/wesm/arrow/blob/c07b9b48cf3e0bbbab493992a492ae47e5b04cad/python/pyarrow/types.pxi#L821 # see https://arrow.apache.org/docs/python/generated/pyarrow.Array.html#pyarrow.Array.to_numpy # and https://issues.apache.org/jira/browse/ARROW-2871?jql=text%20~%20%22boolean%20to_numpy%22 """ return is_primitive(pa_type) and not is_boolean(pa_type)
def to_numpy(self): storage: pa.ListArray = self.storage size = 1 for i in range(self.type.ndims): size *= self.type.shape[i] storage = storage.flatten() # zero copy is available for all primitive types except booleans # primitive types are types for which the physical representation in arrow and in numpy # https://github.com/wesm/arrow/blob/c07b9b48cf3e0bbbab493992a492ae47e5b04cad/python/pyarrow/types.pxi#L821 # see https://arrow.apache.org/docs/python/generated/pyarrow.Array.html#pyarrow.Array.to_numpy # and https://issues.apache.org/jira/browse/ARROW-2871?jql=text%20~%20%22boolean%20to_numpy%22 zero_copy_only = is_primitive(storage.type) and not is_boolean(storage.type) numpy_arr = storage.to_numpy(zero_copy_only=zero_copy_only) numpy_arr = numpy_arr.reshape(len(self), *self.type.shape) return numpy_arr
def test_is_primitive(): assert types.is_primitive(pa.int32()) assert not types.is_primitive(pa.list_(pa.int32()))