Пример #1
0
def dataframe_to_serialized_dict(frame):
    block_manager = frame._data

    blocks = []
    axes = [ax for ax in block_manager.axes]

    for block in block_manager.blocks:
        values = block.values
        block_data = {}

        if _pandas_api.is_datetimetz(values.dtype):
            block_data['timezone'] = pa.lib.tzinfo_to_string(values.tz)
            if hasattr(values, 'values'):
                values = values.values
        elif _pandas_api.is_categorical(values):
            block_data.update(dictionary=values.categories,
                              ordered=values.ordered)
            values = values.codes
        block_data.update(
            placement=block.mgr_locs.as_array,
            block=values
        )

        # If we are dealing with an object array, pickle it instead.
        if values.dtype == np.dtype(object):
            block_data['object'] = None
            block_data['block'] = builtin_pickle.dumps(
                values, protocol=builtin_pickle.HIGHEST_PROTOCOL)

        blocks.append(block_data)

    return {
        'blocks': blocks,
        'axes': axes
    }
Пример #2
0
def dataframe_to_types(df, preserve_index, columns=None):
    (all_names, column_names, _, index_descriptors, index_columns,
     columns_to_convert, _) = _get_columns_to_convert(df, None, preserve_index,
                                                      columns)

    types = []
    # If pandas knows type, skip conversion
    for c in columns_to_convert:
        values = c.values
        if _pandas_api.is_categorical(values):
            type_ = pa.array(c, from_pandas=True).type
        elif _pandas_api.is_extension_array_dtype(values):
            type_ = pa.array(c.head(0), from_pandas=True).type
        else:
            values, type_ = get_datetimetz_type(values, c.dtype, None)
            type_ = pa.lib._ndarray_to_arrow_type(values, type_)
            if type_ is None:
                type_ = pa.array(c, from_pandas=True).type
        types.append(type_)

    metadata = construct_metadata(columns_to_convert, df, column_names,
                                  index_columns, index_descriptors,
                                  preserve_index, types)

    return all_names, types, metadata
Пример #3
0
def dataframe_to_types(df, preserve_index, columns=None):
    (all_names,
     column_names,
     index_descriptors,
     index_columns,
     columns_to_convert,
     _) = _get_columns_to_convert(df, None, preserve_index, columns)

    types = []
    # If pandas knows type, skip conversion
    for c in columns_to_convert:
        values = c.values
        if _pandas_api.is_categorical(values):
            type_ = pa.array(c, from_pandas=True).type
        else:
            values, type_ = get_datetimetz_type(values, c.dtype, None)
            type_ = pa.lib._ndarray_to_arrow_type(values, type_)
            if type_ is None:
                type_ = pa.array(c, from_pandas=True).type
        types.append(type_)

    metadata = construct_metadata(df, column_names, index_columns,
                                  index_descriptors, preserve_index, types)

    return all_names, types, metadata