Exemplo n.º 1
0
    def test_infer_dtype_bytes(self):
        compare = 'string' if PY2 else 'bytes'

        # string array of bytes
        arr = np.array(list('abc'), dtype='S1')
        self.assertEqual(lib.infer_dtype(arr), compare)

        # object array of bytes
        arr = arr.astype(object)
        self.assertEqual(lib.infer_dtype(arr), compare)
Exemplo n.º 2
0
    def test_infer_dtype_bytes(self):
        compare = 'string' if PY2 else 'bytes'

        # string array of bytes
        arr = np.array(list('abc'), dtype='S1')
        self.assertEqual(lib.infer_dtype(arr), compare)

        # object array of bytes
        arr = arr.astype(object)
        self.assertEqual(lib.infer_dtype(arr), compare)
Exemplo n.º 3
0
    def test_integers(self):
        arr = np.array([1, 2, 3, np.int64(4), np.int32(5)], dtype='O')
        result = lib.infer_dtype(arr)
        self.assertEqual(result, 'integer')

        arr = np.array([1, 2, 3, np.int64(4), np.int32(5), 'foo'], dtype='O')
        result = lib.infer_dtype(arr)
        self.assertEqual(result, 'mixed-integer')

        arr = np.array([1, 2, 3, 4, 5], dtype='i4')
        result = lib.infer_dtype(arr)
        self.assertEqual(result, 'integer')
Exemplo n.º 4
0
    def test_integers(self):
        arr = np.array([1, 2, 3, np.int64(4), np.int32(5)], dtype='O')
        result = lib.infer_dtype(arr)
        self.assertEqual(result, 'integer')

        arr = np.array([1, 2, 3, np.int64(4), np.int32(5), 'foo'], dtype='O')
        result = lib.infer_dtype(arr)
        self.assertEqual(result, 'mixed-integer')

        arr = np.array([1, 2, 3, 4, 5], dtype='i4')
        result = lib.infer_dtype(arr)
        self.assertEqual(result, 'integer')
Exemplo n.º 5
0
    def test_integers(self):
        arr = np.array([1, 2, 3, np.int64(4), np.int32(5)], dtype="O")
        result = lib.infer_dtype(arr)
        self.assertEqual(result, "integer")

        arr = np.array([1, 2, 3, np.int64(4), np.int32(5), "foo"], dtype="O")
        result = lib.infer_dtype(arr)
        self.assertEqual(result, "mixed-integer")

        arr = np.array([1, 2, 3, 4, 5], dtype="i4")
        result = lib.infer_dtype(arr)
        self.assertEqual(result, "integer")
Exemplo n.º 6
0
    def _convert_to_array(self, values, name=None, other=None):
        """converts values to ndarray"""
        from pandas.tseries.timedeltas import to_timedelta

        coerce = True
        if not is_list_like(values):
            values = np.array([values])
        inferred_type = lib.infer_dtype(values)

        if inferred_type in ('datetime64', 'datetime', 'date', 'time'):
            # if we have a other of timedelta, but use pd.NaT here we
            # we are in the wrong path
            if (other is not None and other.dtype == 'timedelta64[ns]'
                    and all(isnull(v) for v in values)):
                values = np.empty(values.shape, dtype=other.dtype)
                values[:] = tslib.iNaT

            # a datelike
            elif isinstance(values, pd.DatetimeIndex):
                values = values.to_series()
            elif not (isinstance(values, (np.ndarray, pd.Series))
                      and com.is_datetime64_dtype(values)):
                values = tslib.array_to_datetime(values)
        elif inferred_type in ('timedelta', 'timedelta64'):
            # have a timedelta, convert to to ns here
            values = to_timedelta(values, coerce=coerce)
        elif inferred_type == 'integer':
            # py3 compat where dtype is 'm' but is an integer
            if values.dtype.kind == 'm':
                values = values.astype('timedelta64[ns]')
            elif isinstance(values, pd.PeriodIndex):
                values = values.to_timestamp().to_series()
            elif name not in ('__truediv__', '__div__', '__mul__'):
                raise TypeError("incompatible type for a datetime/timedelta "
                                "operation [{0}]".format(name))
        elif isinstance(values[0], pd.DateOffset):
            # handle DateOffsets
            os = np.array([getattr(v, 'delta', None) for v in values])
            mask = isnull(os)
            if mask.any():
                raise TypeError(
                    "cannot use a non-absolute DateOffset in "
                    "datetime/timedelta operations [{0}]".format(', '.join(
                        [com.pprint_thing(v) for v in values[mask]])))
            values = to_timedelta(os, coerce=coerce)
        elif inferred_type == 'floating':

            # all nan, so ok, use the other dtype (e.g. timedelta or datetime)
            if isnull(values).all():
                values = np.empty(values.shape, dtype=other.dtype)
                values[:] = tslib.iNaT
            else:
                raise TypeError(
                    'incompatible type [{0}] for a datetime/timedelta '
                    'operation'.format(np.array(values).dtype))
        else:
            raise TypeError("incompatible type [{0}] for a datetime/timedelta"
                            " operation".format(np.array(values).dtype))

        return values
Exemplo n.º 7
0
    def _convert_to_array(self, values, name=None, other=None):
        """converts values to ndarray"""
        from pandas.tseries.timedeltas import to_timedelta

        coerce = True
        if not is_list_like(values):
            values = np.array([values])
        inferred_type = lib.infer_dtype(values)

        if inferred_type in ('datetime64', 'datetime', 'date', 'time'):
            # if we have a other of timedelta, but use pd.NaT here we
            # we are in the wrong path
            if (other is not None and other.dtype == 'timedelta64[ns]' and
                    all(isnull(v) for v in values)):
                values = np.empty(values.shape, dtype=other.dtype)
                values[:] = iNaT

            # a datelike
            elif isinstance(values, pd.DatetimeIndex):
                values = values.to_series()
            elif not (isinstance(values, (np.ndarray, pd.Series)) and
                      is_datetime64_dtype(values)):
                values = tslib.array_to_datetime(values)
        elif inferred_type in ('timedelta', 'timedelta64'):
            # have a timedelta, convert to to ns here
            values = to_timedelta(values, coerce=coerce)
        elif inferred_type == 'integer':
            # py3 compat where dtype is 'm' but is an integer
            if values.dtype.kind == 'm':
                values = values.astype('timedelta64[ns]')
            elif isinstance(values, pd.PeriodIndex):
                values = values.to_timestamp().to_series()
            elif name not in ('__truediv__', '__div__', '__mul__'):
                raise TypeError("incompatible type for a datetime/timedelta "
                                "operation [{0}]".format(name))
        elif isinstance(values[0], pd.DateOffset):
            # handle DateOffsets
            os = np.array([getattr(v, 'delta', None) for v in values])
            mask = isnull(os)
            if mask.any():
                raise TypeError("cannot use a non-absolute DateOffset in "
                                "datetime/timedelta operations [{0}]".format(
                                    ', '.join([com.pprint_thing(v)
                                               for v in values[mask]])))
            values = to_timedelta(os, coerce=coerce)
        elif inferred_type == 'floating':

            # all nan, so ok, use the other dtype (e.g. timedelta or datetime)
            if isnull(values).all():
                values = np.empty(values.shape, dtype=other.dtype)
                values[:] = iNaT
            else:
                raise TypeError(
                    'incompatible type [{0}] for a datetime/timedelta '
                    'operation'.format(np.array(values).dtype))
        else:
            raise TypeError("incompatible type [{0}] for a datetime/timedelta"
                            " operation".format(np.array(values).dtype))

        return values
Exemplo n.º 8
0
def maybe_to_datetimelike(data, copy=False):
    """
    return a DelegatedClass of a Series that is datetimelike (e.g. datetime64[ns] dtype or a Series of Periods)
    raise TypeError if this is not possible.

    Parameters
    ----------
    data : Series
    copy : boolean, default False
           copy the input data

    Returns
    -------
    DelegatedClass

    """

    if not isinstance(data, Series):
        raise TypeError("cannot convert an object of type {0} to a datetimelike index".format(type(data)))

    index = data.index
    if issubclass(data.dtype.type, np.datetime64):
        return DatetimeProperties(DatetimeIndex(data, copy=copy), index)
    else:

        if isinstance(data, PeriodIndex):
            return PeriodProperties(PeriodIndex(data, copy=copy), index)

        data = com._values_from_object(data)
        inferred = lib.infer_dtype(data)
        if inferred == 'period':
            return PeriodProperties(PeriodIndex(data), index)

    raise TypeError("cannot convert an object of type {0} to a datetimelike index".format(type(data)))
Exemplo n.º 9
0
    def _from_arraylike(cls, data, freq, tz):
        if freq is not None:
            freq = Period._maybe_convert_freq(freq)

        if not isinstance(
                data, (np.ndarray, PeriodIndex, DatetimeIndex, Int64Index)):
            if is_scalar(data) or isinstance(data, Period):
                raise ValueError('PeriodIndex() must be called with a '
                                 'collection of some kind, %s was passed' %
                                 repr(data))

            # other iterable of some kind
            if not isinstance(data, (list, tuple)):
                data = list(data)

            try:
                data = _ensure_int64(data)
                if freq is None:
                    raise ValueError('freq not specified')
                data = np.array([Period(x, freq=freq) for x in data],
                                dtype=np.int64)
            except (TypeError, ValueError):
                data = _ensure_object(data)

                if freq is None:
                    freq = period.extract_freq(data)
                data = period.extract_ordinals(data, freq)
        else:
            if isinstance(data, PeriodIndex):
                if freq is None or freq == data.freq:
                    freq = data.freq
                    data = data._values
                else:
                    base1, _ = _gfc(data.freq)
                    base2, _ = _gfc(freq)
                    data = period.period_asfreq_arr(data._values, base1, base2,
                                                    1)
            else:
                if is_object_dtype(data):
                    inferred = infer_dtype(data)
                    if inferred == 'integer':
                        data = data.astype(np.int64)

                if freq is None and is_object_dtype(data):
                    # must contain Period instance and thus extract ordinals
                    freq = period.extract_freq(data)
                    data = period.extract_ordinals(data, freq)

                if freq is None:
                    msg = 'freq not specified and cannot be inferred'
                    raise ValueError(msg)

                if data.dtype != np.int64:
                    if np.issubdtype(data.dtype, np.datetime64):
                        data = dt64arr_to_periodarr(data, freq, tz)
                    else:
                        data = _ensure_object(data)
                        data = period.extract_ordinals(data, freq)

        return data, freq
Exemplo n.º 10
0
    def test_bools(self):
        arr = np.array([True, False, True, True, True], dtype="O")
        result = lib.infer_dtype(arr)
        self.assertEqual(result, "boolean")

        arr = np.array([np.bool_(True), np.bool_(False)], dtype="O")
        result = lib.infer_dtype(arr)
        self.assertEqual(result, "boolean")

        arr = np.array([True, False, True, "foo"], dtype="O")
        result = lib.infer_dtype(arr)
        self.assertEqual(result, "mixed")

        arr = np.array([True, False, True], dtype=bool)
        result = lib.infer_dtype(arr)
        self.assertEqual(result, "boolean")
Exemplo n.º 11
0
    def _sqlalchemy_type(self, col):
        from sqlalchemy.types import (BigInteger, Float, Text, Boolean,
                                      DateTime, Date, Time)

        if com.is_datetime64_dtype(col):
            try:
                tz = col.tzinfo
                return DateTime(timezone=True)
            except:
                return DateTime
        if com.is_timedelta64_dtype(col):
            warnings.warn(
                "the 'timedelta' type is not supported, and will be "
                "written as integer values (ns frequency) to the "
                "database.", UserWarning)
            return BigInteger
        elif com.is_float_dtype(col):
            return Float
        elif com.is_integer_dtype(col):
            # TODO: Refine integer size.
            return BigInteger
        elif com.is_bool_dtype(col):
            return Boolean
        inferred = lib.infer_dtype(com._ensure_object(col))
        if inferred == 'date':
            return Date
        if inferred == 'time':
            return Time
        return Text
Exemplo n.º 12
0
Arquivo: sql.py Projeto: Jemash/pandas
    def _sql_type_name(self, col):
        pytype = col.dtype.type
        pytype_name = "text"
        if issubclass(pytype, np.floating):
            pytype_name = "float"
        elif com.is_timedelta64_dtype(pytype):
            warnings.warn("the 'timedelta' type is not supported, and will be "
                          "written as integer values (ns frequency) to the "
                          "database.", UserWarning)
            pytype_name = "int"
        elif issubclass(pytype, np.integer):
            pytype_name = "int"
        elif issubclass(pytype, np.datetime64) or pytype is datetime:
            # Caution: np.datetime64 is also a subclass of np.number.
            pytype_name = "datetime"
        elif issubclass(pytype, np.bool_):
            pytype_name = "bool"
        elif issubclass(pytype, np.object):
            pytype = lib.infer_dtype(com._ensure_object(col))
            if pytype == "date":
                pytype_name = "date"
            elif pytype == "time":
                pytype_name = "time"

        return _SQL_TYPES[pytype_name][self.pd_sql.flavor]
Exemplo n.º 13
0
Arquivo: sql.py Projeto: Jemash/pandas
    def _sqlalchemy_type(self, col):
        from sqlalchemy.types import (BigInteger, Float, Text, Boolean,
            DateTime, Date, Time, Interval)

        if com.is_datetime64_dtype(col):
            try:
                tz = col.tzinfo
                return DateTime(timezone=True)
            except:
                return DateTime
        if com.is_timedelta64_dtype(col):
            warnings.warn("the 'timedelta' type is not supported, and will be "
                          "written as integer values (ns frequency) to the "
                          "database.", UserWarning)
            return BigInteger
        elif com.is_float_dtype(col):
            return Float
        elif com.is_integer_dtype(col):
            # TODO: Refine integer size.
            return BigInteger
        elif com.is_bool_dtype(col):
            return Boolean
        inferred = lib.infer_dtype(com._ensure_object(col))
        if inferred == 'date':
            return Date
        if inferred == 'time':
            return Time
        return Text
Exemplo n.º 14
0
def is_datetime_arraylike(arr):
    """ return if we are datetime arraylike / DatetimeIndex """
    if isinstance(arr, ABCDatetimeIndex):
        return True
    elif isinstance(arr, (np.ndarray, ABCSeries)):
        return arr.dtype == object and lib.infer_dtype(arr) == 'datetime'
    return getattr(arr, 'inferred_type', None) == 'datetime'
Exemplo n.º 15
0
def is_period_arraylike(arr):
    """ return if we are period arraylike / PeriodIndex """
    if isinstance(arr, ABCPeriodIndex):
        return True
    elif isinstance(arr, (np.ndarray, ABCSeries)):
        return arr.dtype == object and lib.infer_dtype(arr) == 'period'
    return getattr(arr, 'inferred_type', None) == 'period'
Exemplo n.º 16
0
def _hashtable_algo(f, values, return_dtype=None):
    """
    f(HashTable, type_caster) -> result
    """

    dtype = values.dtype
    if is_float_dtype(dtype):
        return f(htable.Float64HashTable, _ensure_float64)
    elif is_signed_integer_dtype(dtype):
        return f(htable.Int64HashTable, _ensure_int64)
    elif is_unsigned_integer_dtype(dtype):
        return f(htable.UInt64HashTable, _ensure_uint64)
    elif is_datetime64_dtype(dtype):
        return_dtype = return_dtype or 'M8[ns]'
        return f(htable.Int64HashTable, _ensure_int64).view(return_dtype)
    elif is_timedelta64_dtype(dtype):
        return_dtype = return_dtype or 'm8[ns]'
        return f(htable.Int64HashTable, _ensure_int64).view(return_dtype)

    # its cheaper to use a String Hash Table than Object
    if lib.infer_dtype(values) in ['string']:
        return f(htable.StringHashTable, _ensure_object)

    # use Object
    return f(htable.PyObjectHashTable, _ensure_object)
Exemplo n.º 17
0
def make_block(values, items, ref_items):
    dtype = values.dtype
    vtype = dtype.type
    klass = None

    if issubclass(vtype, np.floating):
        klass = FloatBlock
    elif issubclass(vtype, np.complexfloating):
        klass = ComplexBlock
    elif issubclass(vtype, np.datetime64):
        klass = DatetimeBlock
    elif issubclass(vtype, np.integer):
        if vtype != np.int64:
            values = values.astype("i8")
        klass = IntBlock
    elif dtype == np.bool_:
        klass = BoolBlock

    # try to infer a datetimeblock
    if klass is None and np.prod(values.shape):
        flat = values.flatten()
        inferred_type = lib.infer_dtype(flat)
        if inferred_type == "datetime":

            # we have an object array that has been inferred as datetime, so convert it
            try:
                values = tslib.array_to_datetime(flat).reshape(values.shape)
                klass = DatetimeBlock
            except:  # it already object, so leave it
                pass

    if klass is None:
        klass = ObjectBlock

    return klass(values, items, ref_items, ndim=values.ndim)
Exemplo n.º 18
0
    def test_floats(self):
        arr = np.array([1.0, 2.0, 3.0, np.float64(4), np.float32(5)], dtype="O")
        result = lib.infer_dtype(arr)
        self.assertEqual(result, "floating")

        arr = np.array([1, 2, 3, np.float64(4), np.float32(5), "foo"], dtype="O")
        result = lib.infer_dtype(arr)
        self.assertEqual(result, "mixed-integer")

        arr = np.array([1, 2, 3, 4, 5], dtype="f4")
        result = lib.infer_dtype(arr)
        self.assertEqual(result, "floating")

        arr = np.array([1, 2, 3, 4, 5], dtype="f8")
        result = lib.infer_dtype(arr)
        self.assertEqual(result, "floating")
Exemplo n.º 19
0
def _hashtable_algo(f, values, return_dtype=None):
    """
    f(HashTable, type_caster) -> result
    """

    dtype = values.dtype
    if is_float_dtype(dtype):
        return f(htable.Float64HashTable, _ensure_float64)
    elif is_signed_integer_dtype(dtype):
        return f(htable.Int64HashTable, _ensure_int64)
    elif is_unsigned_integer_dtype(dtype):
        return f(htable.UInt64HashTable, _ensure_uint64)
    elif is_datetime64_dtype(dtype):
        return_dtype = return_dtype or 'M8[ns]'
        return f(htable.Int64HashTable, _ensure_int64).view(return_dtype)
    elif is_timedelta64_dtype(dtype):
        return_dtype = return_dtype or 'm8[ns]'
        return f(htable.Int64HashTable, _ensure_int64).view(return_dtype)

    # its cheaper to use a String Hash Table than Object
    if lib.infer_dtype(values) in ['string']:
        return f(htable.StringHashTable, _ensure_object)

    # use Object
    return f(htable.PyObjectHashTable, _ensure_object)
Exemplo n.º 20
0
    def _sql_type_name(self, col):
        pytype = col.dtype.type
        pytype_name = "text"
        if issubclass(pytype, np.floating):
            pytype_name = "float"
        elif com.is_timedelta64_dtype(pytype):
            warnings.warn(
                "the 'timedelta' type is not supported, and will be "
                "written as integer values (ns frequency) to the "
                "database.", UserWarning)
            pytype_name = "int"
        elif issubclass(pytype, np.integer):
            pytype_name = "int"
        elif issubclass(pytype, np.datetime64) or pytype is datetime:
            # Caution: np.datetime64 is also a subclass of np.number.
            pytype_name = "datetime"
        elif issubclass(pytype, np.bool_):
            pytype_name = "bool"
        elif issubclass(pytype, np.object):
            pytype = lib.infer_dtype(com._ensure_object(col))
            if pytype == "date":
                pytype_name = "date"
            elif pytype == "time":
                pytype_name = "time"

        return _SQL_TYPES[pytype_name][self.pd_sql.flavor]
Exemplo n.º 21
0
def is_datetime_arraylike(arr):
    """ return if we are datetime arraylike / DatetimeIndex """
    if isinstance(arr, ABCDatetimeIndex):
        return True
    elif isinstance(arr, (np.ndarray, ABCSeries)):
        return arr.dtype == object and lib.infer_dtype(arr) == 'datetime'
    return getattr(arr, 'inferred_type', None) == 'datetime'
Exemplo n.º 22
0
    def _from_arraylike(cls, data, freq, tz):
        if freq is not None:
            freq = Period._maybe_convert_freq(freq)

        if not isinstance(data, (np.ndarray, PeriodIndex,
                                 DatetimeIndex, Int64Index)):
            if is_scalar(data) or isinstance(data, Period):
                raise ValueError('PeriodIndex() must be called with a '
                                 'collection of some kind, %s was passed'
                                 % repr(data))

            # other iterable of some kind
            if not isinstance(data, (list, tuple)):
                data = list(data)

            try:
                data = _ensure_int64(data)
                if freq is None:
                    raise ValueError('freq not specified')
                data = np.array([Period(x, freq=freq) for x in data],
                                dtype=np.int64)
            except (TypeError, ValueError):
                data = _ensure_object(data)

                if freq is None:
                    freq = period.extract_freq(data)
                data = period.extract_ordinals(data, freq)
        else:
            if isinstance(data, PeriodIndex):
                if freq is None or freq == data.freq:
                    freq = data.freq
                    data = data._values
                else:
                    base1, _ = _gfc(data.freq)
                    base2, _ = _gfc(freq)
                    data = period.period_asfreq_arr(data._values,
                                                    base1, base2, 1)
            else:
                if is_object_dtype(data):
                    inferred = infer_dtype(data)
                    if inferred == 'integer':
                        data = data.astype(np.int64)

                if freq is None and is_object_dtype(data):
                    # must contain Period instance and thus extract ordinals
                    freq = period.extract_freq(data)
                    data = period.extract_ordinals(data, freq)

                if freq is None:
                    msg = 'freq not specified and cannot be inferred'
                    raise ValueError(msg)

                if data.dtype != np.int64:
                    if np.issubdtype(data.dtype, np.datetime64):
                        data = dt64arr_to_periodarr(data, freq, tz)
                    else:
                        data = _ensure_object(data)
                        data = period.extract_ordinals(data, freq)

        return data, freq
Exemplo n.º 23
0
def is_period_arraylike(arr):
    """ return if we are period arraylike / PeriodIndex """
    if isinstance(arr, ABCPeriodIndex):
        return True
    elif isinstance(arr, (np.ndarray, ABCSeries)):
        return arr.dtype == object and lib.infer_dtype(arr) == 'period'
    return getattr(arr, 'inferred_type', None) == 'period'
Exemplo n.º 24
0
    def test_bools(self):
        arr = np.array([True, False, True, True, True], dtype='O')
        result = lib.infer_dtype(arr)
        self.assertEqual(result, 'boolean')

        arr = np.array([np.bool_(True), np.bool_(False)], dtype='O')
        result = lib.infer_dtype(arr)
        self.assertEqual(result, 'boolean')

        arr = np.array([True, False, True, 'foo'], dtype='O')
        result = lib.infer_dtype(arr)
        self.assertEqual(result, 'mixed')

        arr = np.array([True, False, True], dtype=bool)
        result = lib.infer_dtype(arr)
        self.assertEqual(result, 'boolean')
Exemplo n.º 25
0
    def test_object(self):

        # GH 7431
        # cannot infer more than this as only a single element
        arr = np.array([None], dtype='O')
        result = lib.infer_dtype(arr)
        self.assertEqual(result, 'mixed')
Exemplo n.º 26
0
def _get_data_algo(values, func_map):

    f = None
    if is_float_dtype(values):
        f = func_map['float64']
        values = _ensure_float64(values)

    elif needs_i8_conversion(values):
        f = func_map['int64']
        values = values.view('i8')

    elif is_signed_integer_dtype(values):
        f = func_map['int64']
        values = _ensure_int64(values)

    elif is_unsigned_integer_dtype(values):
        f = func_map['uint64']
        values = _ensure_uint64(values)

    else:
        values = _ensure_object(values)

        # its cheaper to use a String Hash Table than Object
        if lib.infer_dtype(values) in ['string']:
            try:
                f = func_map['string']
            except KeyError:
                pass

    if f is None:
        f = func_map['object']

    return f, values
Exemplo n.º 27
0
def unique1d(values):
    """
    Hash table-based unique
    """
    if np.issubdtype(values.dtype, np.floating):
        table = htable.Float64HashTable(len(values))
        uniques = np.array(table.unique(_ensure_float64(values)),
                           dtype=np.float64)
    elif np.issubdtype(values.dtype, np.datetime64):
        table = htable.Int64HashTable(len(values))
        uniques = table.unique(_ensure_int64(values))
        uniques = uniques.view('M8[ns]')
    elif np.issubdtype(values.dtype, np.timedelta64):
        table = htable.Int64HashTable(len(values))
        uniques = table.unique(_ensure_int64(values))
        uniques = uniques.view('m8[ns]')
    elif np.issubdtype(values.dtype, np.signedinteger):
        table = htable.Int64HashTable(len(values))
        uniques = table.unique(_ensure_int64(values))
    elif np.issubdtype(values.dtype, np.unsignedinteger):
        table = htable.UInt64HashTable(len(values))
        uniques = table.unique(_ensure_uint64(values))
    else:

        # its cheaper to use a String Hash Table than Object
        if lib.infer_dtype(values) in ['string']:
            table = htable.StringHashTable(len(values))
        else:
            table = htable.PyObjectHashTable(len(values))

        uniques = table.unique(_ensure_object(values))

    return uniques
Exemplo n.º 28
0
    def test_bools(self):
        arr = np.array([True, False, True, True, True], dtype='O')
        result = lib.infer_dtype(arr)
        self.assertEqual(result, 'boolean')

        arr = np.array([np.bool_(True), np.bool_(False)], dtype='O')
        result = lib.infer_dtype(arr)
        self.assertEqual(result, 'boolean')

        arr = np.array([True, False, True, 'foo'], dtype='O')
        result = lib.infer_dtype(arr)
        self.assertEqual(result, 'mixed')

        arr = np.array([True, False, True], dtype=bool)
        result = lib.infer_dtype(arr)
        self.assertEqual(result, 'boolean')
Exemplo n.º 29
0
def unique1d(values):
    """
    Hash table-based unique
    """
    if np.issubdtype(values.dtype, np.floating):
        table = htable.Float64HashTable(len(values))
        uniques = np.array(table.unique(_ensure_float64(values)),
                           dtype=np.float64)
    elif np.issubdtype(values.dtype, np.datetime64):
        table = htable.Int64HashTable(len(values))
        uniques = table.unique(_ensure_int64(values))
        uniques = uniques.view('M8[ns]')
    elif np.issubdtype(values.dtype, np.timedelta64):
        table = htable.Int64HashTable(len(values))
        uniques = table.unique(_ensure_int64(values))
        uniques = uniques.view('m8[ns]')
    elif np.issubdtype(values.dtype, np.signedinteger):
        table = htable.Int64HashTable(len(values))
        uniques = table.unique(_ensure_int64(values))
    elif np.issubdtype(values.dtype, np.unsignedinteger):
        table = htable.UInt64HashTable(len(values))
        uniques = table.unique(_ensure_uint64(values))
    else:

        # its cheaper to use a String Hash Table than Object
        if lib.infer_dtype(values) in ['string']:
            table = htable.StringHashTable(len(values))
        else:
            table = htable.PyObjectHashTable(len(values))

        uniques = table.unique(_ensure_object(values))

    return uniques
Exemplo n.º 30
0
    def test_object(self):

        # GH 7431
        # cannot infer more than this as only a single element
        arr = np.array([None], dtype='O')
        result = lib.infer_dtype(arr)
        self.assertEqual(result, 'mixed')
Exemplo n.º 31
0
Arquivo: ops.py Projeto: ghl3/pandas
    def _convert_to_array(self, values, name=None, other=None):
        """converts values to ndarray"""
        from pandas.tseries.timedeltas import to_timedelta

        ovalues = values
        if not is_list_like(values):
            values = np.array([values])

        inferred_type = lib.infer_dtype(values)

        if inferred_type in ('datetime64', 'datetime', 'date', 'time'):
            # if we have a other of timedelta, but use pd.NaT here we
            # we are in the wrong path
            if (other is not None and other.dtype == 'timedelta64[ns]' and
                    all(isnull(v) for v in values)):
                values = np.empty(values.shape, dtype=other.dtype)
                values[:] = iNaT

            # a datelike
            elif isinstance(values, pd.DatetimeIndex):
                values = values.to_series()
            # datetime with tz
            elif isinstance(ovalues, datetime.datetime) and hasattr(ovalues,'tz'):
                values = pd.DatetimeIndex(values)
            # datetime array with tz
            elif com.is_datetimetz(values):
                if isinstance(values, pd.Series):
                    values = values._values
            elif not (isinstance(values, (np.ndarray, pd.Series)) and
                      is_datetime64_dtype(values)):
                values = tslib.array_to_datetime(values)
        elif inferred_type in ('timedelta', 'timedelta64'):
            # have a timedelta, convert to to ns here
            values = to_timedelta(values, errors='coerce')
        elif inferred_type == 'integer':
            # py3 compat where dtype is 'm' but is an integer
            if values.dtype.kind == 'm':
                values = values.astype('timedelta64[ns]')
            elif isinstance(values, pd.PeriodIndex):
                values = values.to_timestamp().to_series()
            elif name not in ('__truediv__', '__div__', '__mul__'):
                raise TypeError("incompatible type for a datetime/timedelta "
                                "operation [{0}]".format(name))
        elif inferred_type == 'floating':
            # all nan, so ok, use the other dtype (e.g. timedelta or datetime)
            if isnull(values).all():
                values = np.empty(values.shape, dtype=other.dtype)
                values[:] = iNaT
            else:
                raise TypeError(
                    'incompatible type [{0}] for a datetime/timedelta '
                    'operation'.format(np.array(values).dtype))
        elif self._is_offset(values):
            return values
        else:
            raise TypeError("incompatible type [{0}] for a datetime/timedelta"
                            " operation".format(np.array(values).dtype))

        return values
Exemplo n.º 32
0
    def test_floats(self):
        arr = np.array([1., 2., 3., np.float64(4), np.float32(5)], dtype='O')
        result = lib.infer_dtype(arr)
        self.assertEqual(result, 'floating')

        arr = np.array([1, 2, 3, np.float64(4), np.float32(5), 'foo'],
                       dtype='O')
        result = lib.infer_dtype(arr)
        self.assertEqual(result, 'mixed-integer')

        arr = np.array([1, 2, 3, 4, 5], dtype='f4')
        result = lib.infer_dtype(arr)
        self.assertEqual(result, 'floating')

        arr = np.array([1, 2, 3, 4, 5], dtype='f8')
        result = lib.infer_dtype(arr)
        self.assertEqual(result, 'floating')
Exemplo n.º 33
0
    def test_categorical(self):

        # GH 8974
        from pandas import Categorical, Series
        arr = Categorical(list('abc'))
        result = lib.infer_dtype(arr)
        self.assertEqual(result, 'categorical')

        result = lib.infer_dtype(Series(arr))
        self.assertEqual(result, 'categorical')

        arr = Categorical(list('abc'), categories=['cegfab'], ordered=True)
        result = lib.infer_dtype(arr)
        self.assertEqual(result, 'categorical')

        result = lib.infer_dtype(Series(arr))
        self.assertEqual(result, 'categorical')
Exemplo n.º 34
0
    def test_floats(self):
        arr = np.array([1., 2., 3., np.float64(4), np.float32(5)], dtype='O')
        result = lib.infer_dtype(arr)
        self.assertEqual(result, 'floating')

        arr = np.array([1, 2, 3, np.float64(4), np.float32(5), 'foo'],
                       dtype='O')
        result = lib.infer_dtype(arr)
        self.assertEqual(result, 'mixed-integer')

        arr = np.array([1, 2, 3, 4, 5], dtype='f4')
        result = lib.infer_dtype(arr)
        self.assertEqual(result, 'floating')

        arr = np.array([1, 2, 3, 4, 5], dtype='f8')
        result = lib.infer_dtype(arr)
        self.assertEqual(result, 'floating')
Exemplo n.º 35
0
    def test_categorical(self):

        # GH 8974
        from pandas import Categorical, Series
        arr = Categorical(list('abc'))
        result = lib.infer_dtype(arr)
        self.assertEqual(result, 'categorical')

        result = lib.infer_dtype(Series(arr))
        self.assertEqual(result, 'categorical')

        arr = Categorical(list('abc'), categories=['cegfab'], ordered=True)
        result = lib.infer_dtype(arr)
        self.assertEqual(result, 'categorical')

        result = lib.infer_dtype(Series(arr))
        self.assertEqual(result, 'categorical')
Exemplo n.º 36
0
    def _convert_to_array(self, values, name=None, other=None):
        """converts values to ndarray"""
        from pandas.tseries.timedeltas import _possibly_cast_to_timedelta

        coerce = "compat" if pd._np_version_under1p7 else True
        if not is_list_like(values):
            values = np.array([values])
        inferred_type = lib.infer_dtype(values)

        if inferred_type in ("datetime64", "datetime", "date", "time"):
            # if we have a other of timedelta, but use pd.NaT here we
            # we are in the wrong path
            if other is not None and other.dtype == "timedelta64[ns]" and all(isnull(v) for v in values):
                values = np.empty(values.shape, dtype=other.dtype)
                values[:] = tslib.iNaT

            # a datetlike
            elif not (isinstance(values, (pa.Array, pd.Series)) and com.is_datetime64_dtype(values)):
                values = tslib.array_to_datetime(values)
            elif isinstance(values, pd.DatetimeIndex):
                values = values.to_series()
        elif inferred_type in ("timedelta", "timedelta64"):
            # have a timedelta, convert to to ns here
            values = _possibly_cast_to_timedelta(values, coerce=coerce)
        elif inferred_type == "integer":
            # py3 compat where dtype is 'm' but is an integer
            if values.dtype.kind == "m":
                values = values.astype("timedelta64[ns]")
            elif isinstance(values, pd.PeriodIndex):
                values = values.to_timestamp().to_series()
            elif name not in ("__truediv__", "__div__", "__mul__"):
                raise TypeError("incompatible type for a datetime/timedelta " "operation [{0}]".format(name))
        elif isinstance(values[0], pd.DateOffset):
            # handle DateOffsets
            os = pa.array([getattr(v, "delta", None) for v in values])
            mask = isnull(os)
            if mask.any():
                raise TypeError(
                    "cannot use a non-absolute DateOffset in "
                    "datetime/timedelta operations [{0}]".format(", ".join([com.pprint_thing(v) for v in values[mask]]))
                )
            values = _possibly_cast_to_timedelta(os, coerce=coerce)
        elif inferred_type == "floating":

            # all nan, so ok, use the other dtype (e.g. timedelta or datetime)
            if isnull(values).all():
                values = np.empty(values.shape, dtype=other.dtype)
                values[:] = tslib.iNaT
            else:
                raise TypeError(
                    "incompatible type [{0}] for a datetime/timedelta " "operation".format(pa.array(values).dtype)
                )
        else:
            raise TypeError(
                "incompatible type [{0}] for a datetime/timedelta" " operation".format(pa.array(values).dtype)
            )

        return values
Exemplo n.º 37
0
    def test_categorical(self):

        # GH 8974
        from pandas import Categorical, Series

        arr = Categorical(list("abc"))
        result = lib.infer_dtype(arr)
        self.assertEqual(result, "categorical")

        result = lib.infer_dtype(Series(arr))
        self.assertEqual(result, "categorical")

        arr = Categorical(list("abc"), categories=["cegfab"], ordered=True)
        result = lib.infer_dtype(arr)
        self.assertEqual(result, "categorical")

        result = lib.infer_dtype(Series(arr))
        self.assertEqual(result, "categorical")
Exemplo n.º 38
0
def is_datetime_arraylike(arr):
    if isinstance(arr, pd.DataFrame):
        return arr.apply(pd_is_datetime_arraylike).all()
    elif pd_is_datetime_arraylike is not None:
        return pd_is_datetime_arraylike(arr)
    elif isinstance(arr, pd.DatetimeIndex):
        return True
    else:
        inferred = lib.infer_dtype(arr)
        return 'datetime' in inferred
Exemplo n.º 39
0
def is_datetime_arraylike(arr):
    if isinstance(arr, pd.DataFrame):
        return arr.apply(pd_is_datetime_arraylike).all()
    elif pd_is_datetime_arraylike is not None:
        return pd_is_datetime_arraylike(arr)
    elif isinstance(arr, pd.DatetimeIndex):
        return True
    else:
        inferred = lib.infer_dtype(arr)
        return 'datetime' in inferred
Exemplo n.º 40
0
def _convert_bin_to_numeric_type(x):
    """
    if the passed bin is of datetime/timedelta type,
    this method converts it to integer
    """
    dtype = infer_dtype(x)
    if dtype == 'timedelta' or dtype == 'timedelta64':
        x = to_timedelta(x).view(np.int64)
    elif dtype == 'datetime' or dtype == 'datetime64':
        x = to_datetime(x).view(np.int64)
    return x
Exemplo n.º 41
0
def _convert_obj(obj):
    """
        Convert a series to pytables values and Atom
    """
    if isinstance(obj, pd.DatetimeIndex):
        converted = obj.asi8
        return converted, 'datetime64', tb.Int64Atom()
    elif isinstance(obj, pd.PeriodIndex):
        converted = obj.values
        return converted, 'periodindex', tb.Int64Atom()
    elif isinstance(obj, pd.PeriodIndex):
        converted = obj.values
        return converted, 'int64', tb.Int64Atom()

    inferred_type = lib.infer_dtype(obj)
    values = np.asarray(obj)

    if inferred_type == 'datetime64':
        converted = values.view('i8')
        return converted, inferred_type, tb.Int64Atom()
    if inferred_type == 'string':
        # TODO, am I doing this right?
        converted = np.array(list(values), dtype=np.bytes_)
        itemsize = converted.dtype.itemsize
        # for OBT, can't assume value will be right for future
        # frame keys
        if itemsize < MIN_ITEMSIZE:
            itemsize = MIN_ITEMSIZE
            converted = converted.astype("S{0}".format(itemsize))

        return converted, inferred_type, tb.StringAtom(itemsize)
    elif inferred_type == 'unicode':
        # table's don't seem to support objects
        raise Exception("Unsupported inferred_type {0}".format(inferred_type))

        converted = np.asarray(values, dtype='O')
        return converted, inferred_type, tb.ObjectAtom()
    elif inferred_type == 'datetime':
        converted = np.array([(time.mktime(v.timetuple()) +
                            v.microsecond / 1E6) for v in values],
                            dtype=np.float64)
        return converted, inferred_type, tb.Time64Atom()
    elif inferred_type == 'integer':
        converted = np.asarray(values, dtype=np.int64)
        return converted, inferred_type, tb.Int64Atom()
    elif inferred_type == 'floating':
        converted = np.asarray(values, dtype=np.float64)
        return converted, inferred_type, tb.Float64Atom()
    raise Exception("Unsupported inferred_type {0} {1}".format(inferred_type, str(values[-5:])))
Exemplo n.º 42
0
def _convert_index(index):
    if isinstance(index, DatetimeIndex):
        converted = index.asi8
        return converted, 'datetime64', _tables().Int64Col()
    elif isinstance(index, (Int64Index, PeriodIndex)):
        atom = _tables().Int64Col()
        return index.values, 'integer', atom

    if isinstance(index, MultiIndex):
        raise Exception('MultiIndex not supported here!')

    inferred_type = lib.infer_dtype(index)

    values = np.asarray(index)

    if inferred_type == 'datetime64':
        converted = values.view('i8')
        return converted, 'datetime64', _tables().Int64Col()
    elif inferred_type == 'datetime':
        converted = np.array([(time.mktime(v.timetuple()) +
                            v.microsecond / 1E6) for v in values],
                            dtype=np.float64)
        return converted, 'datetime', _tables().Time64Col()
    elif inferred_type == 'date':
        converted = np.array([time.mktime(v.timetuple()) for v in values],
                            dtype=np.int32)
        return converted, 'date', _tables().Time32Col()
    elif inferred_type == 'string':
        # atom = _tables().ObjectAtom()
        # return np.asarray(values, dtype='O'), 'object', atom

        converted = np.array(list(values), dtype=np.str_)
        itemsize = converted.dtype.itemsize
        return converted, 'string', _tables().StringCol(itemsize)
    elif inferred_type == 'unicode':
        atom = _tables().ObjectAtom()
        return np.asarray(values, dtype='O'), 'object', atom
    elif inferred_type == 'integer':
        # take a guess for now, hope the values fit
        atom = _tables().Int64Col()
        return np.asarray(values, dtype=np.int64), 'integer', atom
    elif inferred_type == 'floating':
        atom = _tables().Float64Col()
        return np.asarray(values, dtype=np.float64), 'float', atom
    else:  # pragma: no cover
        atom = _tables().ObjectAtom()
        return np.asarray(values, dtype='O'), 'object', atom
Exemplo n.º 43
0
    def _convert_to_array(self, values, name=None):
        """converts values to ndarray"""
        from pandas.tseries.timedeltas import _possibly_cast_to_timedelta

        coerce = 'compat' if pd._np_version_under1p7 else True
        if not is_list_like(values):
            values = np.array([values])
        inferred_type = lib.infer_dtype(values)
        if inferred_type in ('datetime64', 'datetime', 'date', 'time'):
            # a datetlike
            if not (isinstance(values, (pa.Array, pd.Series))
                    and com.is_datetime64_dtype(values)):
                values = tslib.array_to_datetime(values)
            elif isinstance(values, pd.DatetimeIndex):
                values = values.to_series()
        elif inferred_type in ('timedelta', 'timedelta64'):
            # have a timedelta, convert to to ns here
            values = _possibly_cast_to_timedelta(values, coerce=coerce)
        elif inferred_type == 'integer':
            # py3 compat where dtype is 'm' but is an integer
            if values.dtype.kind == 'm':
                values = values.astype('timedelta64[ns]')
            elif isinstance(values, pd.PeriodIndex):
                values = values.to_timestamp().to_series()
            elif name not in ('__truediv__', '__div__', '__mul__'):
                raise TypeError("incompatible type for a datetime/timedelta "
                                "operation [{0}]".format(name))
        elif isinstance(values[0], pd.DateOffset):
            # handle DateOffsets
            os = pa.array([getattr(v, 'delta', None) for v in values])
            mask = isnull(os)
            if mask.any():
                raise TypeError(
                    "cannot use a non-absolute DateOffset in "
                    "datetime/timedelta operations [{0}]".format(','.join(
                        [com.pprint_thing(v) for v in values[mask]])))
            values = _possibly_cast_to_timedelta(os, coerce=coerce)
        else:
            raise TypeError(
                "incompatible type [{0}] for a datetime/timedelta operation".
                format(pa.array(values).dtype))

        return values
Exemplo n.º 44
0
def _infer_fill_value(val):
    """
    infer the fill value for the nan/NaT from the provided
    scalar/ndarray/list-like if we are a NaT, return the correct dtyped
    element to provide proper block construction
    """

    if not is_list_like(val):
        val = [val]
    val = np.array(val, copy=False)
    if is_datetimelike(val):
        return np.array('NaT', dtype=val.dtype)
    elif is_object_dtype(val.dtype):
        dtype = lib.infer_dtype(_ensure_object(val))
        if dtype in ['datetime', 'datetime64']:
            return np.array('NaT', dtype=_NS_DTYPE)
        elif dtype in ['timedelta', 'timedelta64']:
            return np.array('NaT', dtype=_TD_DTYPE)
    return np.nan
Exemplo n.º 45
0
def _infer_fill_value(val):
    """
    infer the fill value for the nan/NaT from the provided
    scalar/ndarray/list-like if we are a NaT, return the correct dtyped
    element to provide proper block construction
    """

    if not is_list_like(val):
        val = [val]
    val = np.array(val, copy=False)
    if is_datetimelike(val):
        return np.array('NaT', dtype=val.dtype)
    elif is_object_dtype(val.dtype):
        dtype = lib.infer_dtype(_ensure_object(val))
        if dtype in ['datetime', 'datetime64']:
            return np.array('NaT', dtype=_NS_DTYPE)
        elif dtype in ['timedelta', 'timedelta64']:
            return np.array('NaT', dtype=_TD_DTYPE)
    return np.nan
Exemplo n.º 46
0
    def _convert_to_array(self, values, name=None):
        """converts values to ndarray"""
        from pandas.tseries.timedeltas import _possibly_cast_to_timedelta

        coerce = 'compat' if pd._np_version_under1p7 else True
        if not is_list_like(values):
            values = np.array([values])
        inferred_type = lib.infer_dtype(values)
        if inferred_type in ('datetime64', 'datetime', 'date', 'time'):
            # a datetlike
            if not (isinstance(values, (pa.Array, pd.Series)) and
                    com.is_datetime64_dtype(values)):
                values = tslib.array_to_datetime(values)
            elif isinstance(values, pd.DatetimeIndex):
                values = values.to_series()
        elif inferred_type in ('timedelta', 'timedelta64'):
            # have a timedelta, convert to to ns here
            values = _possibly_cast_to_timedelta(values, coerce=coerce)
        elif inferred_type == 'integer':
            # py3 compat where dtype is 'm' but is an integer
            if values.dtype.kind == 'm':
                values = values.astype('timedelta64[ns]')
            elif isinstance(values, pd.PeriodIndex):
                values = values.to_timestamp().to_series()
            elif name not in ('__truediv__', '__div__', '__mul__'):
                raise TypeError("incompatible type for a datetime/timedelta "
                                "operation [{0}]".format(name))
        elif isinstance(values[0], pd.DateOffset):
            # handle DateOffsets
            os = pa.array([getattr(v, 'delta', None) for v in values])
            mask = isnull(os)
            if mask.any():
                raise TypeError("cannot use a non-absolute DateOffset in "
                                "datetime/timedelta operations [{0}]".format(
                                    ', '.join([com.pprint_thing(v)
                                               for v in values[mask]])))
            values = _possibly_cast_to_timedelta(os, coerce=coerce)
        else:
            raise TypeError("incompatible type [{0}] for a datetime/timedelta"
                            " operation".format(pa.array(values).dtype))

        return values
Exemplo n.º 47
0
    def fill_hdf(self, table = None, dataframe = None):
        assert table is not None, u"The mandatory keyword argument 'table' is not provided"
        assert dataframe is not None, u"The mandatory keyword argument 'dataframe' is not provided"
        if table not in self.tables:
            self.tables[table] = {}

        log.info("Inserting table {} in HDF file {}".format(
            table,
            self.hdf5_file_path,
            )
        )
        store_path = table
        try:
            dataframe.to_hdf(self.hdf5_file_path, store_path, format = 'table', append = False)
        except TypeError:
            types = dataframe.apply(lambda x: infer_dtype(x.values))
            log.info("The following types are converted to strings \n {}".format(types[types=='unicode']))
            for column in types[types=='unicode'].index:
                dataframe[column] = dataframe[column].astype(str)
            dataframe.to_hdf(self.hdf5_file_path, store_path)
Exemplo n.º 48
0
def _possibly_cast_to_datetime(value, dtype, coerce = False):
    """ try to cast the array/value to a datetimelike dtype, converting float nan to iNaT """

    if isinstance(dtype, basestring):
        dtype = np.dtype(dtype)

    if dtype is not None and is_datetime64_dtype(dtype):
        if np.isscalar(value):
            if value == tslib.iNaT or isnull(value):
                value = tslib.iNaT
        else:
            value = np.array(value)

            # have a scalar array-like (e.g. NaT)
            if value.ndim == 0:
                value = tslib.iNaT

            # we have an array of datetime & nulls
            elif np.prod(value.shape):
                try:
                    value = tslib.array_to_datetime(value, coerce = coerce)
                except:
                    pass

    elif dtype is None:
        # we might have a array (or single object) that is datetime like, and no dtype is passed
        # don't change the value unless we find a datetime set
        v = value
        if not (is_list_like(v) or hasattr(v,'len')):
            v = [ v ]
        if len(v):
            inferred_type = lib.infer_dtype(v)
            if inferred_type == 'datetime':
                try:
                    value = tslib.array_to_datetime(np.array(v))
                except:
                    pass

    return value
Exemplo n.º 49
0
def maybe_to_datetimelike(data, copy=False):
    """
    return a DelegatedClass of a Series that is datetimelike (e.g. datetime64[ns] dtype or a Series of Periods)
    raise TypeError if this is not possible.

    Parameters
    ----------
    data : Series
    copy : boolean, default False
           copy the input data

    Returns
    -------
    DelegatedClass

    """

    if not isinstance(data, Series):
        raise TypeError(
            "cannot convert an object of type {0} to a datetimelike index".
            format(type(data)))

    index = data.index
    if issubclass(data.dtype.type, np.datetime64):
        return DatetimeProperties(DatetimeIndex(data, copy=copy), index)
    else:

        if isinstance(data, PeriodIndex):
            return PeriodProperties(PeriodIndex(data, copy=copy), index)

        data = com._values_from_object(data)
        inferred = lib.infer_dtype(data)
        if inferred == 'period':
            return PeriodProperties(PeriodIndex(data), index)

    raise TypeError(
        "cannot convert an object of type {0} to a datetimelike index".format(
            type(data)))
Exemplo n.º 50
0
def make_block(values, items, ref_items):
    dtype = values.dtype
    vtype = dtype.type
    klass = None

    if issubclass(vtype, np.floating):
        klass = FloatBlock
    elif issubclass(vtype, np.complexfloating):
        klass = ComplexBlock
    elif issubclass(vtype, np.datetime64):
        klass = DatetimeBlock
    elif issubclass(vtype, np.integer):
        if vtype != np.int64:
            values = values.astype('i8')
        klass = IntBlock
    elif dtype == np.bool_:
        klass = BoolBlock

    # try to infer a datetimeblock
    if klass is None and np.prod(values.shape):
        flat = values.flatten()
        inferred_type = lib.infer_dtype(flat)
        if inferred_type == 'datetime':

            # we have an object array that has been inferred as datetime, so
            # convert it
            try:
                values = tslib.array_to_datetime(flat).reshape(values.shape)
                klass = DatetimeBlock
            except:  # it already object, so leave it
                pass

    if klass is None:
        klass = ObjectBlock

    return klass(values, items, ref_items, ndim=values.ndim)
Exemplo n.º 51
0
    def test_constructor_with_datetime_tz(self):

        # 8260
        # support datetime64 with tz

        dr = date_range('20130101', periods=3, tz='US/Eastern')
        s = Series(dr)
        self.assertTrue(s.dtype.name == 'datetime64[ns, US/Eastern]')
        self.assertTrue(s.dtype == 'datetime64[ns, US/Eastern]')
        self.assertTrue(is_datetime64tz_dtype(s.dtype))
        self.assertTrue('datetime64[ns, US/Eastern]' in str(s))

        # export
        result = s.values
        self.assertIsInstance(result, np.ndarray)
        self.assertTrue(result.dtype == 'datetime64[ns]')

        exp = pd.DatetimeIndex(result)
        exp = exp.tz_localize('UTC').tz_convert(tz=s.dt.tz)
        self.assert_index_equal(dr, exp)

        # indexing
        result = s.iloc[0]
        self.assertEqual(
            result,
            Timestamp('2013-01-01 00:00:00-0500', tz='US/Eastern', freq='D'))
        result = s[0]
        self.assertEqual(
            result,
            Timestamp('2013-01-01 00:00:00-0500', tz='US/Eastern', freq='D'))

        result = s[Series([True, True, False], index=s.index)]
        assert_series_equal(result, s[0:2])

        result = s.iloc[0:1]
        assert_series_equal(result, Series(dr[0:1]))

        # concat
        result = pd.concat([s.iloc[0:1], s.iloc[1:]])
        assert_series_equal(result, s)

        # astype
        result = s.astype(object)
        expected = Series(DatetimeIndex(s._values).asobject)
        assert_series_equal(result, expected)

        result = Series(s.values).dt.tz_localize('UTC').dt.tz_convert(s.dt.tz)
        assert_series_equal(result, s)

        # astype - datetime64[ns, tz]
        result = Series(s.values).astype('datetime64[ns, US/Eastern]')
        assert_series_equal(result, s)

        result = Series(s.values).astype(s.dtype)
        assert_series_equal(result, s)

        result = s.astype('datetime64[ns, CET]')
        expected = Series(date_range('20130101 06:00:00', periods=3, tz='CET'))
        assert_series_equal(result, expected)

        # short str
        self.assertTrue('datetime64[ns, US/Eastern]' in str(s))

        # formatting with NaT
        result = s.shift()
        self.assertTrue('datetime64[ns, US/Eastern]' in str(result))
        self.assertTrue('NaT' in str(result))

        # long str
        t = Series(date_range('20130101', periods=1000, tz='US/Eastern'))
        self.assertTrue('datetime64[ns, US/Eastern]' in str(t))

        result = pd.DatetimeIndex(s, freq='infer')
        tm.assert_index_equal(result, dr)

        # inference
        s = Series([
            pd.Timestamp('2013-01-01 13:00:00-0800', tz='US/Pacific'),
            pd.Timestamp('2013-01-02 14:00:00-0800', tz='US/Pacific')
        ])
        self.assertTrue(s.dtype == 'datetime64[ns, US/Pacific]')
        self.assertTrue(lib.infer_dtype(s) == 'datetime64')

        s = Series([
            pd.Timestamp('2013-01-01 13:00:00-0800', tz='US/Pacific'),
            pd.Timestamp('2013-01-02 14:00:00-0800', tz='US/Eastern')
        ])
        self.assertTrue(s.dtype == 'object')
        self.assertTrue(lib.infer_dtype(s) == 'datetime')

        # with all NaT
        s = Series(pd.NaT, index=[0, 1], dtype='datetime64[ns, US/Eastern]')
        expected = Series(pd.DatetimeIndex(['NaT', 'NaT'], tz='US/Eastern'))
        assert_series_equal(s, expected)
Exemplo n.º 52
0
def safe_sort(values, labels=None, na_sentinel=-1, assume_unique=False):
    """
    Sort ``values`` and reorder corresponding ``labels``.
    ``values`` should be unique if ``labels`` is not None.
    Safe for use with mixed types (int, str), orders ints before strs.

    .. versionadded:: 0.19.0

    Parameters
    ----------
    values : list-like
        Sequence; must be unique if ``labels`` is not None.
    labels : list_like
        Indices to ``values``. All out of bound indices are treated as
        "not found" and will be masked with ``na_sentinel``.
    na_sentinel : int, default -1
        Value in ``labels`` to mark "not found".
        Ignored when ``labels`` is None.
    assume_unique : bool, default False
        When True, ``values`` are assumed to be unique, which can speed up
        the calculation. Ignored when ``labels`` is None.

    Returns
    -------
    ordered : ndarray
        Sorted ``values``
    new_labels : ndarray
        Reordered ``labels``; returned when ``labels`` is not None.

    Raises
    ------
    TypeError
        * If ``values`` is not list-like or if ``labels`` is neither None
        nor list-like
        * If ``values`` cannot be sorted
    ValueError
        * If ``labels`` is not None and ``values`` contain duplicates.
    """
    if not is_list_like(values):
        raise TypeError("Only list-like objects are allowed to be passed to"
                        "safe_sort as values")
    values = np.array(values, copy=False)

    def sort_mixed(values):
        # order ints before strings, safe in py3
        str_pos = np.array([isinstance(x, string_types) for x in values],
                           dtype=bool)
        nums = np.sort(values[~str_pos])
        strs = np.sort(values[str_pos])
        return _ensure_object(np.concatenate([nums, strs]))

    sorter = None
    if compat.PY3 and lib.infer_dtype(values) == 'mixed-integer':
        # unorderable in py3 if mixed str/int
        ordered = sort_mixed(values)
    else:
        try:
            sorter = values.argsort()
            ordered = values.take(sorter)
        except TypeError:
            # try this anyway
            ordered = sort_mixed(values)

    # labels:

    if labels is None:
        return ordered

    if not is_list_like(labels):
        raise TypeError("Only list-like objects or None are allowed to be"
                        "passed to safe_sort as labels")
    labels = _ensure_platform_int(np.asarray(labels))

    from pandas import Index
    if not assume_unique and not Index(values).is_unique:
        raise ValueError("values should be unique if labels is not None")

    if sorter is None:
        # mixed types
        (hash_klass, _), values = _get_data_algo(values, _hashtables)
        t = hash_klass(len(values))
        t.map_locations(values)
        sorter = _ensure_platform_int(t.lookup(ordered))

    reverse_indexer = np.empty(len(sorter), dtype=np.int_)
    reverse_indexer.put(sorter, np.arange(len(sorter)))

    mask = (labels < -len(values)) | (labels >= len(values)) | \
        (labels == na_sentinel)

    # (Out of bound indices will be masked with `na_sentinel` next, so we may
    # deal with them here without performance loss using `mode='wrap'`.)
    new_labels = reverse_indexer.take(labels, mode='wrap')
    np.putmask(new_labels, mask, na_sentinel)

    return ordered, _ensure_platform_int(new_labels)
Exemplo n.º 53
0
def _possibly_infer_to_datetimelike(value, convert_dates=False):
    """
    we might have a array (or single object) that is datetime like,
    and no dtype is passed don't change the value unless we find a
    datetime/timedelta set

    this is pretty strict in that a datetime/timedelta is REQUIRED
    in addition to possible nulls/string likes

    ONLY strings are NOT datetimelike

    Parameters
    ----------
    value : np.array / Series / Index / list-like
    convert_dates : boolean, default False
       if True try really hard to convert dates (such as datetime.date), other
       leave inferred dtype 'date' alone

    """

    if isinstance(value, (ABCDatetimeIndex, ABCPeriodIndex)):
        return value
    elif isinstance(value, ABCSeries):
        if isinstance(value._values, ABCDatetimeIndex):
            return value._values

    v = value

    if not is_list_like(v):
        v = [v]
    v = np.array(v, copy=False)
    shape = v.shape
    if not v.ndim == 1:
        v = v.ravel()

    if len(v):

        def _try_datetime(v):
            # safe coerce to datetime64
            try:
                v = tslib.array_to_datetime(v, errors='raise')
            except ValueError:

                # we might have a sequence of the same-datetimes with tz's
                # if so coerce to a DatetimeIndex; if they are not the same,
                # then these stay as object dtype
                try:
                    from pandas import to_datetime
                    return to_datetime(v)
                except:
                    pass

            except:
                pass

            return v.reshape(shape)

        def _try_timedelta(v):
            # safe coerce to timedelta64

            # will try first with a string & object conversion
            from pandas import to_timedelta
            try:
                return to_timedelta(v)._values.reshape(shape)
            except:
                return v

        # do a quick inference for perf
        sample = v[:min(3, len(v))]
        inferred_type = lib.infer_dtype(sample)

        if (inferred_type in ['datetime', 'datetime64']
                or (convert_dates and inferred_type in ['date'])):
            value = _try_datetime(v)
        elif inferred_type in ['timedelta', 'timedelta64']:
            value = _try_timedelta(v)

        # It's possible to have nulls intermixed within the datetime or
        # timedelta.  These will in general have an inferred_type of 'mixed',
        # so have to try both datetime and timedelta.

        # try timedelta first to avoid spurious datetime conversions
        # e.g. '00:00:01' is a timedelta but technically is also a datetime
        elif inferred_type in ['mixed']:

            if lib.is_possible_datetimelike_array(_ensure_object(v)):
                value = _try_timedelta(v)
                if lib.infer_dtype(value) in ['mixed']:
                    value = _try_datetime(v)

    return value
Exemplo n.º 54
0
def _possibly_downcast_to_dtype(result, dtype):
    """ try to cast to the specified dtype (e.g. convert back to bool/int
    or could be an astype of float64->float32
    """

    if is_scalar(result):
        return result

    def trans(x):
        return x

    if isinstance(dtype, string_types):
        if dtype == 'infer':
            inferred_type = lib.infer_dtype(_ensure_object(result.ravel()))
            if inferred_type == 'boolean':
                dtype = 'bool'
            elif inferred_type == 'integer':
                dtype = 'int64'
            elif inferred_type == 'datetime64':
                dtype = 'datetime64[ns]'
            elif inferred_type == 'timedelta64':
                dtype = 'timedelta64[ns]'

            # try to upcast here
            elif inferred_type == 'floating':
                dtype = 'int64'
                if issubclass(result.dtype.type, np.number):

                    def trans(x):  # noqa
                        return x.round()
            else:
                dtype = 'object'

    if isinstance(dtype, string_types):
        dtype = np.dtype(dtype)

    try:

        # don't allow upcasts here (except if empty)
        if dtype.kind == result.dtype.kind:
            if (result.dtype.itemsize <= dtype.itemsize
                    and np.prod(result.shape)):
                return result

        if issubclass(dtype.type, np.floating):
            return result.astype(dtype)
        elif is_bool_dtype(dtype) or is_integer_dtype(dtype):

            # if we don't have any elements, just astype it
            if not np.prod(result.shape):
                return trans(result).astype(dtype)

            # do a test on the first element, if it fails then we are done
            r = result.ravel()
            arr = np.array([r[0]])

            # if we have any nulls, then we are done
            if isnull(arr).any() or not np.allclose(arr,
                                                    trans(arr).astype(dtype)):
                return result

            # a comparable, e.g. a Decimal may slip in here
            elif not isinstance(
                    r[0],
                (np.integer, np.floating, np.bool, int, float, bool)):
                return result

            if (issubclass(result.dtype.type, (np.object_, np.number))
                    and notnull(result).all()):
                new_result = trans(result).astype(dtype)
                try:
                    if np.allclose(new_result, result):
                        return new_result
                except:

                    # comparison of an object dtype with a number type could
                    # hit here
                    if (new_result == result).all():
                        return new_result

        # a datetimelike
        # GH12821, iNaT is casted to float
        elif dtype.kind in ['M', 'm'] and result.dtype.kind in ['i', 'f']:
            try:
                result = result.astype(dtype)
            except:
                if dtype.tz:
                    # convert to datetime and change timezone
                    from pandas import to_datetime
                    result = to_datetime(result).tz_localize(dtype.tz)

    except:
        pass

    return result
Exemplo n.º 55
0
    def test_infer_dtype_datetime(self):

        arr = np.array([Timestamp('2011-01-01'), Timestamp('2011-01-02')])
        self.assertEqual(lib.infer_dtype(arr), 'datetime')

        arr = np.array(
            [np.datetime64('2011-01-01'),
             np.datetime64('2011-01-01')],
            dtype=object)
        self.assertEqual(lib.infer_dtype(arr), 'datetime64')

        arr = np.array([datetime(2011, 1, 1), datetime(2012, 2, 1)])
        self.assertEqual(lib.infer_dtype(arr), 'datetime')

        # starts with nan
        for n in [pd.NaT, np.nan]:
            arr = np.array([n, pd.Timestamp('2011-01-02')])
            self.assertEqual(lib.infer_dtype(arr), 'datetime')

            arr = np.array([n, np.datetime64('2011-01-02')])
            self.assertEqual(lib.infer_dtype(arr), 'datetime64')

            arr = np.array([n, datetime(2011, 1, 1)])
            self.assertEqual(lib.infer_dtype(arr), 'datetime')

            arr = np.array([n, pd.Timestamp('2011-01-02'), n])
            self.assertEqual(lib.infer_dtype(arr), 'datetime')

            arr = np.array([n, np.datetime64('2011-01-02'), n])
            self.assertEqual(lib.infer_dtype(arr), 'datetime64')

            arr = np.array([n, datetime(2011, 1, 1), n])
            self.assertEqual(lib.infer_dtype(arr), 'datetime')

        # different type of nat
        arr = np.array([np.timedelta64('nat'),
                        np.datetime64('2011-01-02')],
                       dtype=object)
        self.assertEqual(lib.infer_dtype(arr), 'mixed')

        arr = np.array([np.datetime64('2011-01-02'),
                        np.timedelta64('nat')],
                       dtype=object)
        self.assertEqual(lib.infer_dtype(arr), 'mixed')

        # mixed datetime
        arr = np.array([datetime(2011, 1, 1), pd.Timestamp('2011-01-02')])
        self.assertEqual(lib.infer_dtype(arr), 'datetime')

        # should be datetime?
        arr = np.array(
            [np.datetime64('2011-01-01'),
             pd.Timestamp('2011-01-02')])
        self.assertEqual(lib.infer_dtype(arr), 'mixed')

        arr = np.array(
            [pd.Timestamp('2011-01-02'),
             np.datetime64('2011-01-01')])
        self.assertEqual(lib.infer_dtype(arr), 'mixed')

        arr = np.array([np.nan, pd.Timestamp('2011-01-02'), 1])
        self.assertEqual(lib.infer_dtype(arr), 'mixed-integer')

        arr = np.array([np.nan, pd.Timestamp('2011-01-02'), 1.1])
        self.assertEqual(lib.infer_dtype(arr), 'mixed')

        arr = np.array([np.nan, '2011-01-01', pd.Timestamp('2011-01-02')])
        self.assertEqual(lib.infer_dtype(arr), 'mixed')
Exemplo n.º 56
0
    def test_infer_dtype_timedelta(self):

        arr = np.array([pd.Timedelta('1 days'), pd.Timedelta('2 days')])
        self.assertEqual(lib.infer_dtype(arr), 'timedelta')

        arr = np.array([np.timedelta64(1, 'D'),
                        np.timedelta64(2, 'D')],
                       dtype=object)
        self.assertEqual(lib.infer_dtype(arr), 'timedelta')

        arr = np.array([timedelta(1), timedelta(2)])
        self.assertEqual(lib.infer_dtype(arr), 'timedelta')

        # starts with nan
        for n in [pd.NaT, np.nan]:
            arr = np.array([n, Timedelta('1 days')])
            self.assertEqual(lib.infer_dtype(arr), 'timedelta')

            arr = np.array([n, np.timedelta64(1, 'D')])
            self.assertEqual(lib.infer_dtype(arr), 'timedelta')

            arr = np.array([n, timedelta(1)])
            self.assertEqual(lib.infer_dtype(arr), 'timedelta')

            arr = np.array([n, pd.Timedelta('1 days'), n])
            self.assertEqual(lib.infer_dtype(arr), 'timedelta')

            arr = np.array([n, np.timedelta64(1, 'D'), n])
            self.assertEqual(lib.infer_dtype(arr), 'timedelta')

            arr = np.array([n, timedelta(1), n])
            self.assertEqual(lib.infer_dtype(arr), 'timedelta')

        # different type of nat
        arr = np.array([np.datetime64('nat'),
                        np.timedelta64(1, 'D')],
                       dtype=object)
        self.assertEqual(lib.infer_dtype(arr), 'mixed')

        arr = np.array([np.timedelta64(1, 'D'),
                        np.datetime64('nat')],
                       dtype=object)
        self.assertEqual(lib.infer_dtype(arr), 'mixed')
Exemplo n.º 57
0
    def test_infer_dtype_all_nan_nat_like(self):
        arr = np.array([np.nan, np.nan])
        self.assertEqual(lib.infer_dtype(arr), 'floating')

        # nan and None mix are result in mixed
        arr = np.array([np.nan, np.nan, None])
        self.assertEqual(lib.infer_dtype(arr), 'mixed')

        arr = np.array([None, np.nan, np.nan])
        self.assertEqual(lib.infer_dtype(arr), 'mixed')

        # pd.NaT
        arr = np.array([pd.NaT])
        self.assertEqual(lib.infer_dtype(arr), 'datetime')

        arr = np.array([pd.NaT, np.nan])
        self.assertEqual(lib.infer_dtype(arr), 'datetime')

        arr = np.array([np.nan, pd.NaT])
        self.assertEqual(lib.infer_dtype(arr), 'datetime')

        arr = np.array([np.nan, pd.NaT, np.nan])
        self.assertEqual(lib.infer_dtype(arr), 'datetime')

        arr = np.array([None, pd.NaT, None])
        self.assertEqual(lib.infer_dtype(arr), 'datetime')

        # np.datetime64(nat)
        arr = np.array([np.datetime64('nat')])
        self.assertEqual(lib.infer_dtype(arr), 'datetime64')

        for n in [np.nan, pd.NaT, None]:
            arr = np.array([n, np.datetime64('nat'), n])
            self.assertEqual(lib.infer_dtype(arr), 'datetime64')

            arr = np.array([pd.NaT, n, np.datetime64('nat'), n])
            self.assertEqual(lib.infer_dtype(arr), 'datetime64')

        arr = np.array([np.timedelta64('nat')], dtype=object)
        self.assertEqual(lib.infer_dtype(arr), 'timedelta')

        for n in [np.nan, pd.NaT, None]:
            arr = np.array([n, np.timedelta64('nat'), n])
            self.assertEqual(lib.infer_dtype(arr), 'timedelta')

            arr = np.array([pd.NaT, n, np.timedelta64('nat'), n])
            self.assertEqual(lib.infer_dtype(arr), 'timedelta')

        # datetime / timedelta mixed
        arr = np.array(
            [pd.NaT,
             np.datetime64('nat'),
             np.timedelta64('nat'), np.nan])
        self.assertEqual(lib.infer_dtype(arr), 'mixed')

        arr = np.array([np.timedelta64('nat'),
                        np.datetime64('nat')],
                       dtype=object)
        self.assertEqual(lib.infer_dtype(arr), 'mixed')