示例#1
0
def _read_pylist(column: pyarrow.ChunkedArray) -> List[Any]:
    dtype = column.type

    pylist = column.to_pylist()
    if pyarrow.types.is_timestamp(dtype) and dtype.unit == "ns":
        # pyarrow returns timestamps as pandas.Timestamp values (because
        # that has higher resolution than datetime.datetime). But we want
        # datetime.datetime. We'll truncate to microseconds.
        #
        # If someone complains, then we should change our API to pass int64
        # instead of datetime.datetime.
        pylist = [None if v is None else v.to_pydatetime() for v in pylist]
    elif pyarrow.types.is_floating(dtype):
        # Pandas does not differentiate between NaN and None; so in effect,
        # neither do we. Numeric tables can have NaN and never None;
        # timestamp and String columns can have None and never NaT; int
        # columns cannot have NaN or None.
        nan = float("nan")
        pylist = [nan if v is None else v for v in pylist]
    return pylist
示例#2
0
def _arrow_array_to_json_list(array: pyarrow.ChunkedArray) -> List[Any]:
    """
    Convert `array` to a JSON-encodable List.

    Strings become Strings; Numbers become int/float; Datetimes become
    ISO8601-encoded Strings.
    """
    if isinstance(array.type, pyarrow.TimestampType):
        multiplier = 1.0 / TimestampUnits[array.type.unit]
        return [
            (
                None
                if v is pyarrow.NULL
                else (
                    datetime.datetime.utcfromtimestamp(v.value * multiplier).isoformat()
                    + "Z"
                )
            )
            for v in array
        ]
    else:
        return array.to_pylist()