Exemplo n.º 1
0
 def test_pandas_dtypes(self):
     # TODO: not implemented yet
     with self.assertRaises(TypeError):
         self.assertEqual(_find_common_type([CategoricalDtype()]),
                          CategoricalDtype)
     with self.assertRaises(TypeError):
         self.assertEqual(_find_common_type([DatetimeTZDtype()]),
                          DatetimeTZDtype)
Exemplo n.º 2
0
    def test_period_dtype(self):
        dtype = PeriodDtype(freq='D')
        self.assertEqual(_find_common_type([dtype, dtype]), 'period[D]')

        for dtype2 in [DatetimeTZDtype(unit='ns', tz='Asia/Tokyo'),
                       PeriodDtype(freq='2D'), PeriodDtype(freq='H'),
                       np.dtype('datetime64[ns]'), np.object, np.int64]:
            self.assertEqual(_find_common_type([dtype, dtype2]), np.object)
            self.assertEqual(_find_common_type([dtype2, dtype]), np.object)
Exemplo n.º 3
0
    def test_datetimetz_dtype(self):
        dtype = DatetimeTZDtype(unit='ns', tz='US/Eastern')
        self.assertEqual(_find_common_type([dtype, dtype]),
                         'datetime64[ns, US/Eastern]')

        for dtype2 in [DatetimeTZDtype(unit='ns', tz='Asia/Tokyo'),
                       np.dtype('datetime64[ns]'), np.object, np.int64]:
            self.assertEqual(_find_common_type([dtype, dtype2]), np.object)
            self.assertEqual(_find_common_type([dtype2, dtype]), np.object)
Exemplo n.º 4
0
    def test_period_dtype(self):
        dtype = PeriodDtype(freq='D')
        self.assertEqual(_find_common_type([dtype, dtype]), 'period[D]')

        for dtype2 in [DatetimeTZDtype(unit='ns', tz='Asia/Tokyo'),
                       PeriodDtype(freq='2D'), PeriodDtype(freq='H'),
                       np.dtype('datetime64[ns]'), np.object, np.int64]:
            self.assertEqual(_find_common_type([dtype, dtype2]), np.object)
            self.assertEqual(_find_common_type([dtype2, dtype]), np.object)
Exemplo n.º 5
0
    def test_datetimetz_dtype(self):
        dtype = DatetimeTZDtype(unit='ns', tz='US/Eastern')
        self.assertEqual(_find_common_type([dtype, dtype]),
                         'datetime64[ns, US/Eastern]')

        for dtype2 in [DatetimeTZDtype(unit='ns', tz='Asia/Tokyo'),
                       np.dtype('datetime64[ns]'), np.object, np.int64]:
            self.assertEqual(_find_common_type([dtype, dtype2]), np.object)
            self.assertEqual(_find_common_type([dtype2, dtype]), np.object)
Exemplo n.º 6
0
    def test_numpy_dtypes(self):
        # (source_types, destination_type)
        testcases = (
            # identity
            ((np.int64,), np.int64),
            ((np.uint64,), np.uint64),
            ((np.float32,), np.float32),
            ((np.object,), np.object),

            # into ints
            ((np.int16, np.int64), np.int64),
            ((np.int32, np.uint32), np.int64),
            ((np.uint16, np.uint64), np.uint64),

            # into floats
            ((np.float16, np.float32), np.float32),
            ((np.float16, np.int16), np.float32),
            ((np.float32, np.int16), np.float32),
            ((np.uint64, np.int64), np.float64),
            ((np.int16, np.float64), np.float64),
            ((np.float16, np.int64), np.float64),

            # into others
            ((np.complex128, np.int32), np.complex128),
            ((np.object, np.float32), np.object),
            ((np.object, np.int16), np.object),
        )
        for src, common in testcases:
            self.assertEqual(_find_common_type(src), common)
Exemplo n.º 7
0
    def test_numpy_dtypes(self):
        # (source_types, destination_type)
        testcases = (
            # identity
            ((np.int64,), np.int64),
            ((np.uint64,), np.uint64),
            ((np.float32,), np.float32),
            ((np.object,), np.object),

            # into ints
            ((np.int16, np.int64), np.int64),
            ((np.int32, np.uint32), np.int64),
            ((np.uint16, np.uint64), np.uint64),

            # into floats
            ((np.float16, np.float32), np.float32),
            ((np.float16, np.int16), np.float32),
            ((np.float32, np.int16), np.float32),
            ((np.uint64, np.int64), np.float64),
            ((np.int16, np.float64), np.float64),
            ((np.float16, np.int64), np.float64),

            # into others
            ((np.complex128, np.int32), np.complex128),
            ((np.object, np.float32), np.object),
            ((np.object, np.int16), np.object),

            ((np.dtype('datetime64[ns]'), np.dtype('datetime64[ns]')),
             np.dtype('datetime64[ns]')),
            ((np.dtype('timedelta64[ns]'), np.dtype('timedelta64[ns]')),
             np.dtype('timedelta64[ns]')),

            ((np.dtype('datetime64[ns]'), np.dtype('datetime64[ms]')),
             np.dtype('datetime64[ns]')),
            ((np.dtype('timedelta64[ms]'), np.dtype('timedelta64[ns]')),
             np.dtype('timedelta64[ns]')),

            ((np.dtype('datetime64[ns]'), np.dtype('timedelta64[ns]')),
             np.object),
            ((np.dtype('datetime64[ns]'), np.int64), np.object)
        )
        for src, common in testcases:
            self.assertEqual(_find_common_type(src), common)

        with tm.assertRaises(ValueError):
            # empty
            _find_common_type([])
Exemplo n.º 8
0
    def test_numpy_dtypes(self):
        # (source_types, destination_type)
        testcases = (
            # identity
            ((np.int64, ), np.int64),
            ((np.uint64, ), np.uint64),
            ((np.float32, ), np.float32),
            ((np.object, ), np.object),

            # into ints
            ((np.int16, np.int64), np.int64),
            ((np.int32, np.uint32), np.int64),
            ((np.uint16, np.uint64), np.uint64),

            # into floats
            ((np.float16, np.float32), np.float32),
            ((np.float16, np.int16), np.float32),
            ((np.float32, np.int16), np.float32),
            ((np.uint64, np.int64), np.float64),
            ((np.int16, np.float64), np.float64),
            ((np.float16, np.int64), np.float64),

            # into others
            ((np.complex128, np.int32), np.complex128),
            ((np.object, np.float32), np.object),
            ((np.object, np.int16), np.object),
            ((np.dtype('datetime64[ns]'), np.dtype('datetime64[ns]')),
             np.dtype('datetime64[ns]')),
            ((np.dtype('timedelta64[ns]'), np.dtype('timedelta64[ns]')),
             np.dtype('timedelta64[ns]')),
            ((np.dtype('datetime64[ns]'), np.dtype('datetime64[ms]')),
             np.dtype('datetime64[ns]')),
            ((np.dtype('timedelta64[ms]'), np.dtype('timedelta64[ns]')),
             np.dtype('timedelta64[ns]')),
            ((np.dtype('datetime64[ns]'), np.dtype('timedelta64[ns]')),
             np.object),
            ((np.dtype('datetime64[ns]'), np.int64), np.object))
        for src, common in testcases:
            self.assertEqual(_find_common_type(src), common)

        with tm.assertRaises(ValueError):
            # empty
            _find_common_type([])
Exemplo n.º 9
0
    def test_pandas_dtypes(self):
        dtype = CategoricalDtype()
        self.assertEqual(_find_common_type([dtype]), 'category')
        self.assertEqual(_find_common_type([dtype, dtype]), 'category')
        self.assertEqual(_find_common_type([np.object, dtype]), np.object)

        dtype = DatetimeTZDtype(unit='ns', tz='US/Eastern')
        self.assertEqual(_find_common_type([dtype, dtype]),
                         'datetime64[ns, US/Eastern]')

        for dtype2 in [DatetimeTZDtype(unit='ns', tz='Asia/Tokyo'),
                       np.dtype('datetime64[ns]'), np.object, np.int64]:
            self.assertEqual(_find_common_type([dtype, dtype2]), np.object)
            self.assertEqual(_find_common_type([dtype2, dtype]), np.object)
Exemplo n.º 10
0
    def to_coo(self):
        """
        Return the contents of the frame as a sparse SciPy COO matrix.

        .. versionadded:: 0.20.0

        Returns
        -------
        coo_matrix : scipy.sparse.spmatrix
            If the caller is heterogeneous and contains booleans or objects,
            the result will be of dtype=object. See Notes.

        Notes
        -----
        The dtype will be the lowest-common-denominator type (implicit
        upcasting); that is to say if the dtypes (even of numeric types)
        are mixed, the one that accommodates all will be chosen.

        e.g. If the dtypes are float16 and float32, dtype will be upcast to
        float32. By numpy.find_common_type convention, mixing int64 and
        and uint64 will result in a float64 dtype.
        """
        try:
            from scipy.sparse import coo_matrix
        except ImportError:
            raise ImportError('Scipy is not installed')

        dtype = _find_common_type(self.dtypes)
        cols, rows, datas = [], [], []
        for col, name in enumerate(self):
            s = self[name]
            row = s.sp_index.to_int_index().indices
            cols.append(np.repeat(col, len(row)))
            rows.append(row)
            datas.append(s.sp_values.astype(dtype, copy=False))

        cols = np.concatenate(cols)
        rows = np.concatenate(rows)
        datas = np.concatenate(datas)
        return coo_matrix((datas, (rows, cols)), shape=self.shape)
Exemplo n.º 11
0
Arquivo: ops.py Projeto: DGrady/pandas
    def na_op(x, y):
        try:
            result = expressions.evaluate(op, str_rep, x, y,
                                          raise_on_error=True, **eval_kwargs)
        except TypeError:
            if isinstance(y, (np.ndarray, ABCSeries, pd.Index)):
                dtype = _find_common_type([x.dtype, y.dtype])
                result = np.empty(x.size, dtype=dtype)
                mask = notnull(x) & notnull(y)
                result[mask] = op(x[mask], _values_from_object(y[mask]))
            elif isinstance(x, np.ndarray):
                result = np.empty(len(x), dtype=x.dtype)
                mask = notnull(x)
                result[mask] = op(x[mask], y)
            else:
                raise TypeError("{typ} cannot perform the operation "
                                "{op}".format(typ=type(x).__name__,
                                              op=str_rep))

            result, changed = _maybe_upcast_putmask(result, ~mask, np.nan)

        result = missing.fill_zeros(result, x, y, name, fill_zeros)
        return result
Exemplo n.º 12
0
def _sparse_array_op(left, right, op, name, series=False):

    if series and is_integer_dtype(left) and is_integer_dtype(right):
        # series coerces to float64 if result should have NaN/inf
        if name in ('floordiv', 'mod') and (right.values == 0).any():
            left = left.astype(np.float64)
            right = right.astype(np.float64)
        elif name in ('rfloordiv', 'rmod') and (left.values == 0).any():
            left = left.astype(np.float64)
            right = right.astype(np.float64)

    # dtype used to find corresponding sparse method
    if not is_dtype_equal(left.dtype, right.dtype):
        dtype = _find_common_type([left.dtype, right.dtype])
        left = left.astype(dtype)
        right = right.astype(dtype)
    else:
        dtype = left.dtype

    # dtype the result must have
    result_dtype = None

    if left.sp_index.ngaps == 0 or right.sp_index.ngaps == 0:
        with np.errstate(all='ignore'):
            result = op(left.get_values(), right.get_values())
            fill = op(_get_fill(left), _get_fill(right))

        if left.sp_index.ngaps == 0:
            index = left.sp_index
        else:
            index = right.sp_index
    elif left.sp_index.equals(right.sp_index):
        with np.errstate(all='ignore'):
            result = op(left.sp_values, right.sp_values)
            fill = op(_get_fill(left), _get_fill(right))
        index = left.sp_index
    else:
        if name[0] == 'r':
            left, right = right, left
            name = name[1:]

        if name in ('and', 'or') and dtype == 'bool':
            opname = 'sparse_{name}_uint8'.format(name=name, dtype=dtype)
            # to make template simple, cast here
            left_sp_values = left.sp_values.view(np.uint8)
            right_sp_values = right.sp_values.view(np.uint8)
            result_dtype = np.bool
        else:
            opname = 'sparse_{name}_{dtype}'.format(name=name, dtype=dtype)
            left_sp_values = left.sp_values
            right_sp_values = right.sp_values

        sparse_op = getattr(splib, opname)
        with np.errstate(all='ignore'):
            result, index, fill = sparse_op(left_sp_values, left.sp_index,
                                            left.fill_value, right_sp_values,
                                            right.sp_index, right.fill_value)

    if result_dtype is None:
        result_dtype = result.dtype

    return _wrap_result(name, result, index, fill, dtype=result_dtype)
Exemplo n.º 13
0
def _sparse_array_op(left, right, op, name, series=False):

    if series and is_integer_dtype(left) and is_integer_dtype(right):
        # series coerces to float64 if result should have NaN/inf
        if name in ('floordiv', 'mod') and (right.values == 0).any():
            left = left.astype(np.float64)
            right = right.astype(np.float64)
        elif name in ('rfloordiv', 'rmod') and (left.values == 0).any():
            left = left.astype(np.float64)
            right = right.astype(np.float64)

    # dtype used to find corresponding sparse method
    if not is_dtype_equal(left.dtype, right.dtype):
        dtype = _find_common_type([left.dtype, right.dtype])
        left = left.astype(dtype)
        right = right.astype(dtype)
    else:
        dtype = left.dtype

    # dtype the result must have
    result_dtype = None

    if left.sp_index.ngaps == 0 or right.sp_index.ngaps == 0:
        with np.errstate(all='ignore'):
            result = op(left.get_values(), right.get_values())
            fill = op(_get_fill(left), _get_fill(right))

        if left.sp_index.ngaps == 0:
            index = left.sp_index
        else:
            index = right.sp_index
    elif left.sp_index.equals(right.sp_index):
        with np.errstate(all='ignore'):
            result = op(left.sp_values, right.sp_values)
            fill = op(_get_fill(left), _get_fill(right))
        index = left.sp_index
    else:
        if name[0] == 'r':
            left, right = right, left
            name = name[1:]

        if name in ('and', 'or') and dtype == 'bool':
            opname = 'sparse_{name}_uint8'.format(name=name, dtype=dtype)
            # to make template simple, cast here
            left_sp_values = left.sp_values.view(np.uint8)
            right_sp_values = right.sp_values.view(np.uint8)
            result_dtype = np.bool
        else:
            opname = 'sparse_{name}_{dtype}'.format(name=name, dtype=dtype)
            left_sp_values = left.sp_values
            right_sp_values = right.sp_values

        sparse_op = getattr(splib, opname)
        with np.errstate(all='ignore'):
            result, index, fill = sparse_op(left_sp_values, left.sp_index,
                                            left.fill_value, right_sp_values,
                                            right.sp_index, right.fill_value)

    if result_dtype is None:
        result_dtype = result.dtype

    return _wrap_result(name, result, index, fill, dtype=result_dtype)
Exemplo n.º 14
0
 def test_categorical_dtype(self):
     dtype = CategoricalDtype()
     self.assertEqual(_find_common_type([dtype]), 'category')
     self.assertEqual(_find_common_type([dtype, dtype]), 'category')
     self.assertEqual(_find_common_type([np.object, dtype]), np.object)
Exemplo n.º 15
0
 def test_categorical_dtype(self):
     dtype = CategoricalDtype()
     self.assertEqual(_find_common_type([dtype]), 'category')
     self.assertEqual(_find_common_type([dtype, dtype]), 'category')
     self.assertEqual(_find_common_type([np.object, dtype]), np.object)