def dataframe_to_serialized_dict(frame): block_manager = frame._data blocks = [] axes = [ax for ax in block_manager.axes] for block in block_manager.blocks: values = block.values block_data = {} if _pandas_api.is_datetimetz(values.dtype): block_data['timezone'] = pa.lib.tzinfo_to_string(values.tz) if hasattr(values, 'values'): values = values.values elif _pandas_api.is_categorical(values): block_data.update(dictionary=values.categories, ordered=values.ordered) values = values.codes block_data.update( placement=block.mgr_locs.as_array, block=values ) # If we are dealing with an object array, pickle it instead. if values.dtype == np.dtype(object): block_data['object'] = None block_data['block'] = builtin_pickle.dumps( values, protocol=builtin_pickle.HIGHEST_PROTOCOL) blocks.append(block_data) return { 'blocks': blocks, 'axes': axes }
def _extract_index_level(table, result_table, descr, field_name_to_metadata): field_name = descr['field_name'] logical_name = field_name_to_metadata[field_name]['name'] index_name = _backwards_compatible_index_name(field_name, logical_name) i = table.schema.get_field_index(field_name) if i == -1: # The serialized index column was removed by the user return table, None, None col = table.column(i) col_pandas = col.to_pandas() values = col_pandas.values if hasattr(values, 'flags') and not values.flags.writeable: # ARROW-1054: in pandas 0.19.2, factorize will reject # non-writeable arrays when calling MultiIndex.from_arrays values = values.copy() pd = _pandas_api.pd if _pandas_api.is_datetimetz(col_pandas.dtype): index_level = (pd.Series(values).dt.tz_localize('utc') .dt.tz_convert(col_pandas.dtype.tz)) else: index_level = pd.Series(values, dtype=col_pandas.dtype) result_table = result_table.remove_column( result_table.schema.get_field_index(field_name) ) return result_table, index_level, index_name
def _extract_index_level(table, result_table, field_name, field_name_to_metadata): logical_name = field_name_to_metadata[field_name]['name'] index_name = _backwards_compatible_index_name(field_name, logical_name) i = table.schema.get_field_index(field_name) if i == -1: # The serialized index column was removed by the user return table, None, None col = table.column(i) col_pandas = col.to_pandas() values = col_pandas.values if hasattr(values, 'flags') and not values.flags.writeable: # ARROW-1054: in pandas 0.19.2, factorize will reject # non-writeable arrays when calling MultiIndex.from_arrays values = values.copy() pd = _pandas_api.pd if _pandas_api.is_datetimetz(col_pandas.dtype): index_level = (pd.Series(values).dt.tz_localize('utc').dt.tz_convert( col_pandas.dtype.tz)) else: index_level = pd.Series(values, dtype=col_pandas.dtype) result_table = result_table.remove_column( result_table.schema.get_field_index(field_name)) return result_table, index_level, index_name
def get_datetimetz_type(values, dtype, type_): if values.dtype.type != np.datetime64: return values, type_ if _pandas_api.is_datetimetz(dtype) and type_ is None: # If no user type passed, construct a tz-aware timestamp type tz = dtype.tz unit = dtype.unit type_ = pa.timestamp(unit, tz) elif type_ is None: # Trust the NumPy dtype type_ = pa.from_numpy_dtype(values.dtype) return values, type_