def _read_pylist(column: pyarrow.ChunkedArray) -> List[Any]: dtype = column.type pylist = column.to_pylist() if pyarrow.types.is_timestamp(dtype) and dtype.unit == "ns": # pyarrow returns timestamps as pandas.Timestamp values (because # that has higher resolution than datetime.datetime). But we want # datetime.datetime. We'll truncate to microseconds. # # If someone complains, then we should change our API to pass int64 # instead of datetime.datetime. pylist = [None if v is None else v.to_pydatetime() for v in pylist] elif pyarrow.types.is_floating(dtype): # Pandas does not differentiate between NaN and None; so in effect, # neither do we. Numeric tables can have NaN and never None; # timestamp and String columns can have None and never NaT; int # columns cannot have NaN or None. nan = float("nan") pylist = [nan if v is None else v for v in pylist] return pylist
def _arrow_array_to_json_list(array: pyarrow.ChunkedArray) -> List[Any]: """ Convert `array` to a JSON-encodable List. Strings become Strings; Numbers become int/float; Datetimes become ISO8601-encoded Strings. """ if isinstance(array.type, pyarrow.TimestampType): multiplier = 1.0 / TimestampUnits[array.type.unit] return [ ( None if v is pyarrow.NULL else ( datetime.datetime.utcfromtimestamp(v.value * multiplier).isoformat() + "Z" ) ) for v in array ] else: return array.to_pylist()