Пример #1
0
    def test_timestamp_with_tz_to_pandas_type(self):
        from pyarrow.compat import DatetimeTZDtype

        tz = 'America/Los_Angeles'
        t = pa.timestamp('ns', tz=tz)

        assert t.to_pandas_dtype() == DatetimeTZDtype('ns', tz=tz)
Пример #2
0
def _make_datetimetz(tz):
    from pyarrow.compat import DatetimeTZDtype
    return DatetimeTZDtype('ns', tz=tz)
Пример #3
0
def _make_datetimetz(tz):
    tz = pa.lib.string_to_tzinfo(tz)
    return DatetimeTZDtype('ns', tz=tz)
def table_to_blockmanager(table, nthreads=1):
    import pandas.core.internals as _int
    from pyarrow.compat import DatetimeTZDtype
    import pyarrow.lib as lib

    block_table = table

    index_columns = []
    index_arrays = []
    index_names = []
    schema = table.schema
    row_count = table.num_rows
    metadata = schema.metadata

    if metadata is not None and b'pandas' in metadata:
        pandas_metadata = json.loads(metadata[b'pandas'].decode('utf8'))
        index_columns = pandas_metadata['index_columns']

    for name in index_columns:
        i = schema.get_field_index(name)
        if i != -1:
            col = table.column(i)
            index_name = (None if is_unnamed_index_level(name) else name)
            values = col.to_pandas().values
            if not values.flags.writeable:
                # ARROW-1054: in pandas 0.19.2, factorize will reject
                # non-writeable arrays when calling MultiIndex.from_arrays
                values = values.copy()

            index_arrays.append(values)
            index_names.append(index_name)
            block_table = block_table.remove_column(
                block_table.schema.get_field_index(name))

    result = lib.table_to_blocks(block_table, nthreads)

    blocks = []
    for item in result:
        block_arr = item['block']
        placement = item['placement']
        if 'dictionary' in item:
            cat = pd.Categorical(block_arr,
                                 categories=item['dictionary'],
                                 ordered=False,
                                 fastpath=True)
            block = _int.make_block(cat,
                                    placement=placement,
                                    klass=_int.CategoricalBlock,
                                    fastpath=True)
        elif 'timezone' in item:
            dtype = DatetimeTZDtype('ns', tz=item['timezone'])
            block = _int.make_block(block_arr,
                                    placement=placement,
                                    klass=_int.DatetimeTZBlock,
                                    dtype=dtype,
                                    fastpath=True)
        else:
            block = _int.make_block(block_arr, placement=placement)
        blocks.append(block)

    if len(index_arrays) > 1:
        index = pd.MultiIndex.from_arrays(index_arrays, names=index_names)
    elif len(index_arrays) == 1:
        index = pd.Index(index_arrays[0], name=index_names[0])
    else:
        index = pd.RangeIndex(row_count)

    axes = [[column.name for column in block_table.itercolumns()], index]

    return _int.BlockManager(blocks, axes)
Пример #5
0
def _make_datetimetz(tz):
    from pyarrow.compat import DatetimeTZDtype
    tz = pa.lib.string_to_tzinfo(tz)
    return DatetimeTZDtype('ns', tz=tz)