Пример #1
0
def dshape_from_pandas_helper(col):
    """Return an object from datashape.coretypes given a column from a pandas
    dataframe.
    """
    if (isinstance(col.dtype, type(pd.Categorical.dtype)) or
            isinstance(col.dtype, pd.api.types.CategoricalDtype) or
            cudf and isinstance(col.dtype, cudf.core.dtypes.CategoricalDtype)):
        # Compute category dtype
        categories = np.array(col.cat.categories)
        if categories.dtype.kind == 'U':
            categories = categories.astype('object')

        cat_dshape = datashape.dshape('{} * {}'.format(
            len(col.cat.categories),
            categories.dtype,
        ))
        return datashape.Categorical(categories,
                                     type=cat_dshape,
                                     ordered=col.cat.ordered)
    elif col.dtype.kind == 'M':
        tz = getattr(col.dtype, 'tz', None)
        if tz is not None:
            # Pandas stores this as a pytz.tzinfo, but DataShape wants a string
            tz = str(tz)
        return datashape.Option(datashape.DateTime(tz=tz))
    elif isinstance(col.dtype, RaggedDtype):
        return col.dtype
    dshape = datashape.CType.from_numpy_dtype(col.dtype)
    dshape = datashape.string if dshape == datashape.object_ else dshape
    if dshape in (datashape.string, datashape.datetime_):
        return datashape.Option(dshape)
    return dshape
Пример #2
0
def dshape_from_pandas_helper(col):
    if isinstance(col.dtype, type(pd.Categorical.dtype)):
        cat_dshape = datashape.dshape('{} * {}'.format(
            len(col.cat.categories),
            col.cat.categories.dtype,
        ))
        return datashape.Categorical(col.cat.categories.values,
                                     type=cat_dshape,
                                     ordered=col.cat.categorical.ordered)
    elif col.dtype.kind == 'M':
        tz = getattr(col.dtype, 'tz', None)
        if tz is not None:
            # Pandas stores this as a pytz.tzinfo, but DataShape wants a string
            tz = str(tz)
        return datashape.Option(datashape.DateTime(tz=tz))
    dshape = datashape.CType.from_numpy_dtype(col.dtype)
    dshape = datashape.string if dshape == datashape.object_ else dshape
    if dshape in (datashape.string, datashape.datetime_):
        return datashape.Option(dshape)
    return dshape