Exemplo n.º 1
0
    def na_op(x, y):
        try:
            result = op(x, y)
        except TypeError:
            if isinstance(y, list):
                y = lib.list_to_object_array(y)

            if isinstance(y, (np.ndarray, pd.Series)):
                if (is_bool_dtype(x.dtype) and is_bool_dtype(y.dtype)):
                    result = op(x, y)  # when would this be hit?
                else:
                    x = com._ensure_object(x)
                    y = com._ensure_object(y)
                    result = lib.vec_binop(x, y, op)
            else:
                try:

                    # let null fall thru
                    if not isnull(y):
                        y = bool(y)
                    result = lib.scalar_binop(x, y, op)
                except:
                    raise TypeError("cannot compare a dtyped [{0}] array with "
                                    "a scalar of type [{1}]".format(
                                        x.dtype, type(y).__name__))

        return result
Exemplo n.º 2
0
    def na_op(x, y):
        try:
            result = op(x, y)
        except TypeError:
            if isinstance(y, list):
                y = lib.list_to_object_array(y)

            if isinstance(y, (np.ndarray, pd.Series)):
                if (is_bool_dtype(x.dtype) and is_bool_dtype(y.dtype)):
                    result = op(x, y)  # when would this be hit?
                else:
                    x = com._ensure_object(x)
                    y = com._ensure_object(y)
                    result = lib.vec_binop(x, y, op)
            else:
                try:

                    # let null fall thru
                    if not isnull(y):
                        y = bool(y)
                    result = lib.scalar_binop(x, y, op)
                except:
                    raise TypeError("cannot compare a dtyped [{0}] array with "
                                    "a scalar of type [{1}]".format(
                                        x.dtype, type(y).__name__))

        return result
Exemplo n.º 3
0
def get_dtype_kinds(l):
    """
    Parameters
    ----------
    l : list of arrays

    Returns
    -------
    a set of kinds that exist in this list of arrays
    """

    typs = set()
    for arr in l:

        dtype = arr.dtype
        if com.is_categorical_dtype(dtype):
            typ = "category"
        elif com.is_sparse(arr):
            typ = "sparse"
        elif com.is_datetimetz(arr):
            typ = "datetimetz"
        elif com.is_datetime64_dtype(dtype):
            typ = "datetime"
        elif com.is_timedelta64_dtype(dtype):
            typ = "timedelta"
        elif com.is_object_dtype(dtype):
            typ = "object"
        elif com.is_bool_dtype(dtype):
            typ = "bool"
        else:
            typ = dtype.kind
        typs.add(typ)
    return typs
Exemplo n.º 4
0
    def _wrap_result_expand(self, result, expand=False):
        from pandas.core.index import Index
        if not hasattr(result, 'ndim'):
            return result

        if isinstance(self.series, Index):
            name = getattr(result, 'name', None)
            # if result is a boolean np.array, return the np.array
            # instead of wrapping it into a boolean Index (GH 8875)
            if hasattr(result, 'dtype') and is_bool_dtype(result):
                return result

            if expand:
                result = list(result)
            return Index(result, name=name)
        else:
            index = self.series.index
            if expand:
                cons_row = self.series._constructor
                cons = self.series._constructor_expanddim
                data = [cons_row(x) for x in result]
                return cons(data, index=index)
            else:
                name = getattr(result, 'name', None)
                cons = self.series._constructor
                return cons(result, name=name, index=index)
Exemplo n.º 5
0
Arquivo: sql.py Projeto: yazici/pandas
    def _sqlalchemy_type(self, arr_or_dtype):
        from sqlalchemy.types import (BigInteger, Float, Text, Boolean,
                                      DateTime, Date, Interval)

        if arr_or_dtype is date:
            return Date
        if com.is_datetime64_dtype(arr_or_dtype):
            try:
                tz = arr_or_dtype.tzinfo
                return DateTime(timezone=True)
            except:
                return DateTime
        if com.is_timedelta64_dtype(arr_or_dtype):
            warnings.warn(
                "the 'timedelta' type is not supported, and will be "
                "written as integer values (ns frequency) to the "
                "database.", UserWarning)
            return BigInteger
        elif com.is_float_dtype(arr_or_dtype):
            return Float
        elif com.is_integer_dtype(arr_or_dtype):
            # TODO: Refine integer size.
            return BigInteger
        elif com.is_bool_dtype(arr_or_dtype):
            return Boolean
        return Text
Exemplo n.º 6
0
    def restore_type(self, dtype, sample=None):
        """Restore type from Pandas
        """

        # Pandas types
        if pdc.is_bool_dtype(dtype):
            return 'boolean'
        elif pdc.is_datetime64_any_dtype(dtype):
            return 'datetime'
        elif pdc.is_integer_dtype(dtype):
            return 'integer'
        elif pdc.is_numeric_dtype(dtype):
            return 'number'

        # Python types
        if sample is not None:
            if isinstance(sample, (list, tuple)):
                return 'array'
            elif isinstance(sample, datetime.date):
                return 'date'
            elif isinstance(sample, isodate.Duration):
                return 'duration'
            elif isinstance(sample, dict):
                return 'object'
            elif isinstance(sample, six.string_types):
                return 'string'
            elif isinstance(sample, datetime.time):
                return 'time'

        return 'string'
Exemplo n.º 7
0
    def _wrap_result(self, result, use_codes=True, name=None):

        # for category, we do the stuff on the categories, so blow it up
        # to the full series again
        # But for some operations, we have to do the stuff on the full values,
        # so make it possible to skip this step as the method already did this before
        # the transformation...
        if use_codes and self._is_categorical:
            result = take_1d(result, self._orig.cat.codes)

        # leave as it is to keep extract and get_dummies results
        # can be merged to _wrap_result_expand in v0.17
        from pandas.core.series import Series
        from pandas.core.frame import DataFrame
        from pandas.core.index import Index

        if not hasattr(result, 'ndim'):
            return result
        name = name or getattr(result, 'name', None) or self._orig.name

        if result.ndim == 1:
            if isinstance(self._orig, Index):
                # if result is a boolean np.array, return the np.array
                # instead of wrapping it into a boolean Index (GH 8875)
                if is_bool_dtype(result):
                    return result
                return Index(result, name=name)
            return Series(result, index=self._orig.index, name=name)
        else:
            assert result.ndim < 3
            return DataFrame(result, index=self._orig.index)
Exemplo n.º 8
0
    def _evaluate_compare(self, other, op):
        """
        We have been called because a comparison between
        8 aware arrays. numpy >= 1.11 will
        now warn about NaT comparisons
        """

        # coerce to a similar object
        if not isinstance(other, type(self)):
            if not com.is_list_like(other):
                # scalar
                other = [other]
            elif lib.isscalar(lib.item_from_zerodim(other)):
                # ndarray scalar
                other = [other.item()]
            other = type(self)(other)

        # compare
        result = getattr(self.asi8, op)(other.asi8)

        # technically we could support bool dtyped Index
        # for now just return the indexing array directly
        mask = (self._isnan) | (other._isnan)
        if is_bool_dtype(result):
            result[mask] = False
            return result
        try:
            result[mask] = tslib.iNaT
            return Index(result)
        except TypeError:
            return result
Exemplo n.º 9
0
    def wrapper(self, other):
        func = getattr(super(TimedeltaIndex, self), opname)
        if _is_convertible_to_td(other):
            other = _to_m8(other)
            result = func(other)
            if com.isnull(other):
                result.fill(nat_result)
        else:
            if not com.is_list_like(other):
                raise TypeError("cannot compare a TimedeltaIndex with type "
                                "{0}".format(type(other)))

            other = TimedeltaIndex(other).values
            result = func(other)
            result = _values_from_object(result)

            if isinstance(other, Index):
                o_mask = other.values.view('i8') == tslib.iNaT
            else:
                o_mask = other.view('i8') == tslib.iNaT

            if o_mask.any():
                result[o_mask] = nat_result

        if self.hasnans:
            result[self._isnan] = nat_result

        # support of bool dtype indexers
        if com.is_bool_dtype(result):
            return result
        return Index(result)
Exemplo n.º 10
0
    def _evaluate_compare(self, other, op):
        """
        We have been called because a comparison between
        8 aware arrays. numpy >= 1.11 will
        now warn about NaT comparisons
        """

        # coerce to a similar object
        if not isinstance(other, type(self)):
            if not com.is_list_like(other):
                # scalar
                other = [other]
            elif lib.isscalar(lib.item_from_zerodim(other)):
                # ndarray scalar
                other = [other.item()]
            other = type(self)(other)

        # compare
        result = getattr(self.asi8, op)(other.asi8)

        # technically we could support bool dtyped Index
        # for now just return the indexing array directly
        mask = (self._isnan) | (other._isnan)
        if is_bool_dtype(result):
            result[mask] = False
            return result
        try:
            result[mask] = tslib.iNaT
            return Index(result)
        except TypeError:
            return result
Exemplo n.º 11
0
def na_value_for_dtype(dtype):
    """
    Return a dtype compat na value

    Parameters
    ----------
    dtype : string / dtype

    Returns
    -------
    dtype compat na value
    """

    from pandas.core import common as com
    from pandas import NaT
    dtype = pandas_dtype(dtype)

    if (com.is_datetime64_dtype(dtype) or
        com.is_datetime64tz_dtype(dtype) or
        com.is_timedelta64_dtype(dtype)):
        return NaT
    elif com.is_float_dtype(dtype):
        return np.nan
    elif com.is_integer_dtype(dtype):
        return 0
    elif com.is_bool_dtype(dtype):
        return False
    return np.nan
Exemplo n.º 12
0
def get_dtype_kinds(l):
    """
    Parameters
    ----------
    l : list of arrays

    Returns
    -------
    a set of kinds that exist in this list of arrays
    """

    typs = set()
    for arr in l:

        dtype = arr.dtype
        if com.is_categorical_dtype(dtype):
            typ = 'category'
        elif com.is_sparse(arr):
            typ = 'sparse'
        elif com.is_datetimetz(arr):
            typ = 'datetimetz'
        elif com.is_datetime64_dtype(dtype):
            typ = 'datetime'
        elif com.is_timedelta64_dtype(dtype):
            typ = 'timedelta'
        elif com.is_object_dtype(dtype):
            typ = 'object'
        elif com.is_bool_dtype(dtype):
            typ = 'bool'
        else:
            typ = dtype.kind
        typs.add(typ)
    return typs
Exemplo n.º 13
0
    def _sqlalchemy_type(self, arr_or_dtype):
        from sqlalchemy.types import (BigInteger, Float, Text, Boolean,
            DateTime, Date, Interval)

        if arr_or_dtype is date:
            return Date
        if com.is_datetime64_dtype(arr_or_dtype):
            try:
                tz = arr_or_dtype.tzinfo
                return DateTime(timezone=True)
            except:
                return DateTime
        if com.is_timedelta64_dtype(arr_or_dtype):
            warnings.warn("the 'timedelta' type is not supported, and will be "
                          "written as integer values (ns frequency) to the "
                          "database.", UserWarning)
            return BigInteger
        elif com.is_float_dtype(arr_or_dtype):
            return Float
        elif com.is_integer_dtype(arr_or_dtype):
            # TODO: Refine integer size.
            return BigInteger
        elif com.is_bool_dtype(arr_or_dtype):
            return Boolean
        return Text
Exemplo n.º 14
0
    def _wrap_result_expand(self, result, expand=False):
        if not isinstance(expand, bool):
            raise ValueError("expand must be True or False")

        from pandas.core.index import Index, MultiIndex
        if not hasattr(result, 'ndim'):
            return result

        if isinstance(self.series, Index):
            name = getattr(result, 'name', None)
            # if result is a boolean np.array, return the np.array
            # instead of wrapping it into a boolean Index (GH 8875)
            if hasattr(result, 'dtype') and is_bool_dtype(result):
                return result

            if expand:
                result = list(result)
                return MultiIndex.from_tuples(result, names=name)
            else:
                return Index(result, name=name)
        else:
            index = self.series.index
            if expand:
                def cons_row(x):
                    if is_list_like(x):
                        return x
                    else:
                        return [ x ]
                cons = self.series._constructor_expanddim
                data = [cons_row(x) for x in result]
                return cons(data, index=index)
            else:
                name = getattr(result, 'name', None)
                cons = self.series._constructor
                return cons(result, name=name, index=index)
Exemplo n.º 15
0
    def _wrap_result(self, result, use_codes=True, name=None):

        # for category, we do the stuff on the categories, so blow it up
        # to the full series again
        # But for some operations, we have to do the stuff on the full values,
        # so make it possible to skip this step as the method already did this before
        # the transformation...
        if use_codes and self._is_categorical:
            result = take_1d(result, self._orig.cat.codes)

        # leave as it is to keep extract and get_dummies results
        # can be merged to _wrap_result_expand in v0.17
        from pandas.core.series import Series
        from pandas.core.frame import DataFrame
        from pandas.core.index import Index

        if not hasattr(result, 'ndim'):
            return result
        name = name or getattr(result, 'name', None) or self._orig.name

        if result.ndim == 1:
            if isinstance(self._orig, Index):
                # if result is a boolean np.array, return the np.array
                # instead of wrapping it into a boolean Index (GH 8875)
                if is_bool_dtype(result):
                    return result
                return Index(result, name=name)
            return Series(result, index=self._orig.index, name=name)
        else:
            assert result.ndim < 3
            return DataFrame(result, index=self._orig.index)
Exemplo n.º 16
0
    def _wrap_result_expand(self, result, expand=False):
        from pandas.core.index import Index
        if not hasattr(result, 'ndim'):
            return result

        if isinstance(self.series, Index):
            name = getattr(result, 'name', None)
            # if result is a boolean np.array, return the np.array
            # instead of wrapping it into a boolean Index (GH 8875)
            if hasattr(result, 'dtype') and is_bool_dtype(result):
                return result

            if expand:
                result = list(result)
            return Index(result, name=name)
        else:
            index = self.series.index
            if expand:
                cons_row = self.series._constructor
                cons = self.series._constructor_expanddim
                data = [cons_row(x) for x in result]
                return cons(data, index=index)
            else:
                name = getattr(result, 'name', None)
                cons = self.series._constructor
                return cons(result, name=name, index=index)
Exemplo n.º 17
0
def get_dtype_kinds(l):
    """
    Parameters
    ----------
    l : list of arrays

    Returns
    -------
    a set of kinds that exist in this list of arrays
    """

    typs = set()
    for arr in l:

        dtype = arr.dtype
        if com.is_categorical_dtype(dtype):
            typ = 'category'
        elif com.is_sparse(arr):
            typ = 'sparse'
        elif com.is_datetimetz(arr):
            typ = 'datetimetz'
        elif com.is_datetime64_dtype(dtype):
            typ = 'datetime'
        elif com.is_timedelta64_dtype(dtype):
            typ = 'timedelta'
        elif com.is_object_dtype(dtype):
            typ = 'object'
        elif com.is_bool_dtype(dtype):
            typ = 'bool'
        else:
            typ = dtype.kind
        typs.add(typ)
    return typs
Exemplo n.º 18
0
    def wrapper(self, other):
        func = getattr(super(TimedeltaIndex, self), opname)
        if _is_convertible_to_td(other):
            other = _to_m8(other)
            result = func(other)
            if com.isnull(other):
                result.fill(nat_result)
        else:
            if not com.is_list_like(other):
                raise TypeError("cannot compare a TimedeltaIndex with type "
                                "{0}".format(type(other)))

            other = TimedeltaIndex(other).values
            result = func(other)
            result = _values_from_object(result)

            if isinstance(other, Index):
                o_mask = other.values.view('i8') == tslib.iNaT
            else:
                o_mask = other.view('i8') == tslib.iNaT

            if o_mask.any():
                result[o_mask] = nat_result

        if self.hasnans:
            result[self._isnan] = nat_result

        # support of bool dtype indexers
        if com.is_bool_dtype(result):
            return result
        return Index(result)
Exemplo n.º 19
0
Arquivo: sql.py Projeto: Jemash/pandas
    def _sqlalchemy_type(self, col):
        from sqlalchemy.types import (BigInteger, Float, Text, Boolean,
            DateTime, Date, Time, Interval)

        if com.is_datetime64_dtype(col):
            try:
                tz = col.tzinfo
                return DateTime(timezone=True)
            except:
                return DateTime
        if com.is_timedelta64_dtype(col):
            warnings.warn("the 'timedelta' type is not supported, and will be "
                          "written as integer values (ns frequency) to the "
                          "database.", UserWarning)
            return BigInteger
        elif com.is_float_dtype(col):
            return Float
        elif com.is_integer_dtype(col):
            # TODO: Refine integer size.
            return BigInteger
        elif com.is_bool_dtype(col):
            return Boolean
        inferred = lib.infer_dtype(com._ensure_object(col))
        if inferred == 'date':
            return Date
        if inferred == 'time':
            return Time
        return Text
Exemplo n.º 20
0
def _isfinite(values):
    if is_datetime_or_timedelta_dtype(values):
        return isnull(values)
    if (is_complex_dtype(values) or is_float_dtype(values)
            or is_integer_dtype(values) or is_bool_dtype(values)):
        return ~np.isfinite(values)
    return ~np.isfinite(values.astype('float64'))
Exemplo n.º 21
0
    def _wrap_result_expand(self, result, expand=False):
        if not isinstance(expand, bool):
            raise ValueError("expand must be True or False")

        from pandas.core.index import Index, MultiIndex
        if not hasattr(result, 'ndim'):
            return result

        if isinstance(self.series, Index):
            name = getattr(result, 'name', None)
            # if result is a boolean np.array, return the np.array
            # instead of wrapping it into a boolean Index (GH 8875)
            if hasattr(result, 'dtype') and is_bool_dtype(result):
                return result

            if expand:
                result = list(result)
                return MultiIndex.from_tuples(result, names=name)
            else:
                return Index(result, name=name)
        else:
            index = self.series.index
            if expand:
                def cons_row(x):
                    if is_list_like(x):
                        return x
                    else:
                        return [ x ]
                cons = self.series._constructor_expanddim
                data = [cons_row(x) for x in result]
                return cons(data, index=index)
            else:
                name = getattr(result, 'name', None)
                cons = self.series._constructor
                return cons(result, name=name, index=index)
Exemplo n.º 22
0
    def _sqlalchemy_type(self, col):
        from sqlalchemy.types import (BigInteger, Float, Text, Boolean,
                                      DateTime, Date, Time)

        if com.is_datetime64_dtype(col):
            try:
                tz = col.tzinfo
                return DateTime(timezone=True)
            except:
                return DateTime
        if com.is_timedelta64_dtype(col):
            warnings.warn(
                "the 'timedelta' type is not supported, and will be "
                "written as integer values (ns frequency) to the "
                "database.", UserWarning)
            return BigInteger
        elif com.is_float_dtype(col):
            return Float
        elif com.is_integer_dtype(col):
            # TODO: Refine integer size.
            return BigInteger
        elif com.is_bool_dtype(col):
            return Boolean
        inferred = lib.infer_dtype(com._ensure_object(col))
        if inferred == 'date':
            return Date
        if inferred == 'time':
            return Time
        return Text
Exemplo n.º 23
0
def _isfinite(values):
    if _is_datetime_or_timedelta_dtype(values):
        return isnull(values)
    if (is_complex_dtype(values) or is_float_dtype(values) or
            is_integer_dtype(values) or is_bool_dtype(values)):
        return ~np.isfinite(values)
    return ~np.isfinite(values.astype('float64'))
Exemplo n.º 24
0
def pandas_col_to_ibis_type(col):
    import pandas.core.common as pdcom
    import ibis.expr.datatypes as dt
    import numpy as np
    dty = col.dtype

    # datetime types
    if pdcom.is_datetime64_dtype(dty):
        if pdcom.is_datetime64_ns_dtype(dty):
            return 'timestamp'
        else:
            raise com.IbisTypeError("Column {0} has dtype {1}, which is "
                                    "datetime64-like but does "
                                    "not use nanosecond units"
                                    .format(col.name, dty))
    if pdcom.is_timedelta64_dtype(dty):
        print("Warning: encoding a timedelta64 as an int64")
        return 'int64'

    if pdcom.is_categorical_dtype(dty):
        return dt.Category(len(col.cat.categories))

    if pdcom.is_bool_dtype(dty):
        return 'boolean'

    # simple numerical types
    if issubclass(dty.type, np.int8):
        return 'int8'
    if issubclass(dty.type, np.int16):
        return 'int16'
    if issubclass(dty.type, np.int32):
        return 'int32'
    if issubclass(dty.type, np.int64):
        return 'int64'
    if issubclass(dty.type, np.float32):
        return 'float'
    if issubclass(dty.type, np.float64):
        return 'double'
    if issubclass(dty.type, np.uint8):
        return 'int16'
    if issubclass(dty.type, np.uint16):
        return 'int32'
    if issubclass(dty.type, np.uint32):
        return 'int64'
    if issubclass(dty.type, np.uint64):
        raise com.IbisTypeError("Column {0} is an unsigned int64"
                                .format(col.name))

    if pdcom.is_object_dtype(dty):
        # TODO: overly broad?
        return 'string'

    raise com.IbisTypeError("Column {0} is dtype {1}"
                            .format(col.name, dty))
Exemplo n.º 25
0
def dtype_to_jtstype(dtype):
    # Convert
    if pdc.is_bool_dtype(dtype):
        return 'boolean'
    elif pdc.is_integer_dtype(dtype):
        return 'integer'
    elif pdc.is_numeric_dtype(dtype):
        return 'number'
    elif pdc.is_datetime64_any_dtype(dtype):
        return 'datetime'
    else:
        return 'string'
Exemplo n.º 26
0
def pandas_col_to_ibis_type(col):
    import pandas.core.common as pdcom
    import ibis.expr.datatypes as dt
    import numpy as np
    dty = col.dtype

    # datetime types
    if pdcom.is_datetime64_dtype(dty):
        if pdcom.is_datetime64_ns_dtype(dty):
            return 'timestamp'
        else:
            raise com.IbisTypeError("Column {0} has dtype {1}, which is "
                                    "datetime64-like but does "
                                    "not use nanosecond units".format(
                                        col.name, dty))
    if pdcom.is_timedelta64_dtype(dty):
        print("Warning: encoding a timedelta64 as an int64")
        return 'int64'

    if pdcom.is_categorical_dtype(dty):
        return dt.Category(len(col.cat.categories))

    if pdcom.is_bool_dtype(dty):
        return 'boolean'

    # simple numerical types
    if issubclass(dty.type, np.int8):
        return 'int8'
    if issubclass(dty.type, np.int16):
        return 'int16'
    if issubclass(dty.type, np.int32):
        return 'int32'
    if issubclass(dty.type, np.int64):
        return 'int64'
    if issubclass(dty.type, np.float32):
        return 'float'
    if issubclass(dty.type, np.float64):
        return 'double'
    if issubclass(dty.type, np.uint8):
        return 'int16'
    if issubclass(dty.type, np.uint16):
        return 'int32'
    if issubclass(dty.type, np.uint32):
        return 'int64'
    if issubclass(dty.type, np.uint64):
        raise com.IbisTypeError("Column {0} is an unsigned int64".format(
            col.name))

    if pdcom.is_object_dtype(dty):
        # TODO: overly broad?
        return 'string'

    raise com.IbisTypeError("Column {0} is dtype {1}".format(col.name, dty))
Exemplo n.º 27
0
    def f(x, y):
        xmask = isnull(x)
        ymask = isnull(y)
        mask = xmask | ymask

        result = op(x, y)

        if mask.any():
            if is_bool_dtype(result):
                result = result.astype('O')
            np.putmask(result, mask, np.nan)

        return result
Exemplo n.º 28
0
    def f(x, y):
        xmask = isnull(x)
        ymask = isnull(y)
        mask = xmask | ymask

        result = op(x, y)

        if mask.any():
            if is_bool_dtype(result):
                result = result.astype('O')
            np.putmask(result, mask, np.nan)

        return result
Exemplo n.º 29
0
def _get_values(values,
                skipna,
                fill_value=None,
                fill_value_typ=None,
                isfinite=False,
                copy=True):
    """ utility to get the values view, mask, dtype
    if necessary copy and mask using the specified fill_value
    copy = True will force the copy
    """
    values = _values_from_object(values)
    if isfinite:
        mask = _isfinite(values)
    else:
        mask = isnull(values)

    dtype = values.dtype
    dtype_ok = _na_ok_dtype(dtype)

    # get our fill value (in case we need to provide an alternative
    # dtype for it)
    fill_value = _get_fill_value(dtype,
                                 fill_value=fill_value,
                                 fill_value_typ=fill_value_typ)

    if skipna:
        if copy:
            values = values.copy()
        if dtype_ok:
            np.putmask(values, mask, fill_value)

        # promote if needed
        else:
            values, changed = _maybe_upcast_putmask(values, mask, fill_value)

    elif copy:
        values = values.copy()

    values = _view_if_needed(values)

    # return a platform independent precision dtype
    dtype_max = dtype
    if is_integer_dtype(dtype) or is_bool_dtype(dtype):
        dtype_max = np.int64
    elif is_float_dtype(dtype):
        dtype_max = np.float64

    return values, mask, dtype, dtype_max
Exemplo n.º 30
0
def as_json_table_type(x):
    """
    Convert a NumPy / pandas type to its corresponding json_table.

    Parameters
    ----------
    x : array or dtype

    Returns
    -------
    t : str
        the Table Schema data types

    Notes
    -----
    This table shows the relationship between NumPy / pandas dtypes,
    and Table Schema dtypes.

    ==============  =================
    Pandas type     Table Schema type
    ==============  =================
    int64           integer
    float64         number
    bool            boolean
    datetime64[ns]  datetime
    timedelta64[ns] duration
    object          str
    categorical     any
    =============== =================
    """
    if is_integer_dtype(x):
        return 'integer'
    elif is_bool_dtype(x):
        return 'boolean'
    elif is_numeric_dtype(x):
        return 'number'
    elif (is_datetime64_dtype(x) or is_datetime64tz_dtype(x)):
        return 'datetime'
    elif is_timedelta64_dtype(x):
        return 'duration'
    elif is_categorical_dtype(x):
        return 'any'
    elif is_string_dtype(x):
        return 'string'
    else:
        return 'any'
Exemplo n.º 31
0
def as_json_table_type(x):
    """
    Convert a NumPy / pandas type to its corresponding json_table.

    Parameters
    ----------
    x : array or dtype

    Returns
    -------
    t : str
        the Table Schema data types

    Notes
    -----
    This table shows the relationship between NumPy / pandas dtypes,
    and Table Schema dtypes.

    ==============  =================
    Pandas type     Table Schema type
    ==============  =================
    int64           integer
    float64         number
    bool            boolean
    datetime64[ns]  datetime
    timedelta64[ns] duration
    object          str
    categorical     any
    =============== =================
    """
    if is_integer_dtype(x):
        return 'integer'
    elif is_bool_dtype(x):
        return 'boolean'
    elif is_numeric_dtype(x):
        return 'number'
    elif (is_datetime64_dtype(x) or is_datetime64tz_dtype(x)):
        return 'datetime'
    elif is_timedelta64_dtype(x):
        return 'duration'
    elif is_categorical_dtype(x):
        return 'any'
    elif is_string_dtype(x):
        return 'string'
    else:
        return 'any'
Exemplo n.º 32
0
def _get_values(values, skipna, fill_value=None, fill_value_typ=None,
                isfinite=False, copy=True):
    """ utility to get the values view, mask, dtype
    if necessary copy and mask using the specified fill_value
    copy = True will force the copy
    """
    values = _values_from_object(values)
    if isfinite:
        mask = _isfinite(values)
    else:
        mask = isnull(values)

    dtype = values.dtype
    dtype_ok = _na_ok_dtype(dtype)

    # get our fill value (in case we need to provide an alternative
    # dtype for it)
    fill_value = _get_fill_value(dtype, fill_value=fill_value,
                                 fill_value_typ=fill_value_typ)

    if skipna:
        if copy:
            values = values.copy()
        if dtype_ok:
            np.putmask(values, mask, fill_value)

        # promote if needed
        else:
            values, changed = _maybe_upcast_putmask(values, mask, fill_value)

    elif copy:
        values = values.copy()

    values = _view_if_needed(values)

    # return a platform independent precision dtype
    dtype_max = dtype
    if is_integer_dtype(dtype) or is_bool_dtype(dtype):
        dtype_max = np.int64
    elif is_float_dtype(dtype):
        dtype_max = np.float64

    return values, mask, dtype, dtype_max
Exemplo n.º 33
0
    def _wrap_result_expand(self, result, expand=False):
        if not isinstance(expand, bool):
            raise ValueError("expand must be True or False")

        # for category, we do the stuff on the categories, so blow it up
        # to the full series again
        if self._is_categorical:
            result = take_1d(result, self._orig.cat.codes)

        from pandas.core.index import Index, MultiIndex
        if not hasattr(result, 'ndim'):
            return result

        if isinstance(self._orig, Index):
            name = getattr(result, 'name', None)
            # if result is a boolean np.array, return the np.array
            # instead of wrapping it into a boolean Index (GH 8875)
            if hasattr(result, 'dtype') and is_bool_dtype(result):
                return result

            if expand:
                result = list(result)
                return MultiIndex.from_tuples(result, names=name)
            else:
                return Index(result, name=name)
        else:
            index = self._orig.index
            if expand:

                def cons_row(x):
                    if is_list_like(x):
                        return x
                    else:
                        return [x]

                cons = self._orig._constructor_expanddim
                data = [cons_row(x) for x in result]
                return cons(data, index=index)
            else:
                name = getattr(result, 'name', None)
                cons = self._orig._constructor
                return cons(result, name=name, index=index)
Exemplo n.º 34
0
    def _wrap_result_expand(self, result, expand=False):
        if not isinstance(expand, bool):
            raise ValueError("expand must be True or False")

        # for category, we do the stuff on the categories, so blow it up
        # to the full series again
        if self._is_categorical:
            result = take_1d(result, self._orig.cat.codes)

        from pandas.core.index import Index, MultiIndex
        if not hasattr(result, 'ndim'):
            return result

        if isinstance(self._orig, Index):
            name = getattr(result, 'name', None)
            # if result is a boolean np.array, return the np.array
            # instead of wrapping it into a boolean Index (GH 8875)
            if hasattr(result, 'dtype') and is_bool_dtype(result):
                return result

            if expand:
                result = list(result)
                return MultiIndex.from_tuples(result, names=name)
            else:
                return Index(result, name=name)
        else:
            index = self._orig.index
            if expand:

                def cons_row(x):
                    if is_list_like(x):
                        return x
                    else:
                        return [x]

                cons = self._orig._constructor_expanddim
                data = [cons_row(x) for x in result]
                return cons(data, index=index)
            else:
                name = getattr(result, 'name', None)
                cons = self._orig._constructor
                return cons(result, name=name, index=index)
Exemplo n.º 35
0
    def _wrap_result(self, result):
        from pandas.core.series import Series
        from pandas.core.frame import DataFrame
        from pandas.core.index import Index

        if not hasattr(result, 'ndim'):
            return result
        elif result.ndim == 1:
            name = getattr(result, 'name', None)
            if isinstance(self.series, Index):
                # if result is a boolean np.array, return the np.array
                # instead of wrapping it into a boolean Index (GH 8875)
                if is_bool_dtype(result):
                    return result
                return Index(result, name=name or self.series.name)
            return Series(result, index=self.series.index,
                          name=name or self.series.name)
        else:
            assert result.ndim < 3
            return DataFrame(result, index=self.series.index)
Exemplo n.º 36
0
    def _wrap_result(self, result):
        from pandas.core.series import Series
        from pandas.core.frame import DataFrame
        from pandas.core.index import Index

        if not hasattr(result, 'ndim'):
            return result
        elif result.ndim == 1:
            name = getattr(result, 'name', None)
            if isinstance(self.series, Index):
                # if result is a boolean np.array, return the np.array
                # instead of wrapping it into a boolean Index (GH 8875)
                if is_bool_dtype(result):
                    return result
                return Index(result, name=name or self.series.name)
            return Series(result, index=self.series.index,
                          name=name or self.series.name)
        else:
            assert result.ndim < 3
            return DataFrame(result, index=self.series.index)
Exemplo n.º 37
0
def _ensure_numeric(x):
    if isinstance(x, np.ndarray):
        if is_integer_dtype(x) or is_bool_dtype(x):
            x = x.astype(np.float64)
        elif is_object_dtype(x):
            try:
                x = x.astype(np.complex128)
            except:
                x = x.astype(np.float64)
            else:
                if not np.any(x.imag):
                    x = x.real
    elif not (is_float(x) or is_integer(x) or is_complex(x)):
        try:
            x = float(x)
        except Exception:
            try:
                x = complex(x)
            except Exception:
                raise TypeError('Could not convert %s to numeric' % str(x))
    return x
Exemplo n.º 38
0
def _ensure_numeric(x):
    if isinstance(x, np.ndarray):
        if is_integer_dtype(x) or is_bool_dtype(x):
            x = x.astype(np.float64)
        elif is_object_dtype(x):
            try:
                x = x.astype(np.complex128)
            except:
                x = x.astype(np.float64)
            else:
                if not np.any(x.imag):
                    x = x.real
    elif not (is_float(x) or is_integer(x) or is_complex(x)):
        try:
            x = float(x)
        except Exception:
            try:
                x = complex(x)
            except Exception:
                raise TypeError('Could not convert %s to numeric' % str(x))
    return x
Exemplo n.º 39
0
    def _wrap_result(self, result, **kwargs):
        # leave as it is to keep extract and get_dummies results
        # can be merged to _wrap_result_expand in v0.17
        from pandas.core.series import Series
        from pandas.core.frame import DataFrame
        from pandas.core.index import Index

        if not hasattr(result, 'ndim'):
            return result
        name = kwargs.get('name') or getattr(result, 'name', None) or self.series.name

        if result.ndim == 1:
            if isinstance(self.series, Index):
                # if result is a boolean np.array, return the np.array
                # instead of wrapping it into a boolean Index (GH 8875)
                if is_bool_dtype(result):
                    return result
                return Index(result, name=name)
            return Series(result, index=self.series.index, name=name)
        else:
            assert result.ndim < 3
            return DataFrame(result, index=self.series.index)
Exemplo n.º 40
0
    def _wrap_result(self, result, **kwargs):

        # leave as it is to keep extract and get_dummies results
        # can be merged to _wrap_result_expand in v0.17
        from pandas.core.series import Series
        from pandas.core.frame import DataFrame
        from pandas.core.index import Index

        if not hasattr(result, 'ndim'):
            return result
        name = kwargs.get('name') or getattr(result, 'name', None) or self.series.name

        if result.ndim == 1:
            if isinstance(self.series, Index):
                # if result is a boolean np.array, return the np.array
                # instead of wrapping it into a boolean Index (GH 8875)
                if is_bool_dtype(result):
                    return result
                return Index(result, name=name)
            return Series(result, index=self.series.index, name=name)
        else:
            assert result.ndim < 3
            return DataFrame(result, index=self.series.index)
Exemplo n.º 41
0
    def __array_wrap__(self, result, context=None):
        """
        Gets called after a ufunc. Needs additional handling as
        PeriodIndex stores internal data as int dtype

        Replace this to __numpy_ufunc__ in future version
        """
        if isinstance(context, tuple) and len(context) > 0:
            func = context[0]
            if (func is np.add):
                return self._add_delta(context[1][1])
            elif (func is np.subtract):
                return self._add_delta(-context[1][1])
            elif isinstance(func, np.ufunc):
                if 'M->M' not in func.types:
                    msg = "ufunc '{0}' not supported for the PeriodIndex"
                    # This should be TypeError, but TypeError cannot be raised
                    # from here because numpy catches.
                    raise ValueError(msg.format(func.__name__))

        if com.is_bool_dtype(result):
            return result
        return PeriodIndex(result, freq=self.freq, name=self.name)
Exemplo n.º 42
0
    def __array_wrap__(self, result, context=None):
        """
        Gets called after a ufunc. Needs additional handling as
        PeriodIndex stores internal data as int dtype

        Replace this to __numpy_ufunc__ in future version
        """
        if isinstance(context, tuple) and len(context) > 0:
            func = context[0]
            if (func is np.add):
                return self._add_delta(context[1][1])
            elif (func is np.subtract):
                return self._add_delta(-context[1][1])
            elif isinstance(func, np.ufunc):
                if 'M->M' not in func.types:
                    msg = "ufunc '{0}' not supported for the PeriodIndex"
                    # This should be TypeError, but TypeError cannot be raised
                    # from here because numpy catches.
                    raise ValueError(msg.format(func.__name__))

        if com.is_bool_dtype(result):
            return result
        return PeriodIndex(result, freq=self.freq, name=self.name)