Пример #1
0
def _reconstruct_block(item, columns=None, extension_columns=None):
    """
    Construct a pandas Block from the `item` dictionary coming from pyarrow's
    serialization or returned by arrow::python::ConvertTableToPandas.

    This function takes care of converting dictionary types to pandas
    categorical, Timestamp-with-timezones to the proper pandas Block, and
    conversion to pandas ExtensionBlock

    Parameters
    ----------
    item : dict
        For basic types, this is a dictionary in the form of
        {'block': np.ndarray of values, 'placement': pandas block placement}.
        Additional keys are present for other types (dictionary, timezone,
        object).
    columns :
        Column names of the table being constructed, used for extension types
    extension_columns : dict
        Dictionary of {column_name: pandas_dtype} that includes all columns
        and corresponding dtypes that will be converted to a pandas
        ExtensionBlock.

    Returns
    -------
    pandas Block

    """
    import pandas.core.internals as _int

    block_arr = item.get('block', None)
    placement = item['placement']
    if 'dictionary' in item:
        cat = _pandas_api.categorical_type.from_codes(
            block_arr, categories=item['dictionary'],
            ordered=item['ordered'])
        block = _int.make_block(cat, placement=placement)
    elif 'timezone' in item:
        dtype = make_datetimetz(item['timezone'])
        block = _int.make_block(block_arr, placement=placement,
                                klass=_int.DatetimeTZBlock,
                                dtype=dtype)
    elif 'object' in item:
        block = _int.make_block(builtin_pickle.loads(block_arr),
                                placement=placement)
    elif 'py_array' in item:
        # create ExtensionBlock
        arr = item['py_array']
        assert len(placement) == 1
        name = columns[placement[0]]
        pandas_dtype = extension_columns[name]
        if not hasattr(pandas_dtype, '__from_arrow__'):
            raise ValueError("This column does not support to be converted "
                             "to a pandas ExtensionArray")
        pd_ext_arr = pandas_dtype.__from_arrow__(arr)
        block = _int.make_block(pd_ext_arr, placement=placement)
    else:
        block = _int.make_block(block_arr, placement=placement)

    return block
Пример #2
0
def _load_pickle_from_buffer(data):
    as_memoryview = memoryview(data)
    return builtin_pickle.loads(as_memoryview)