示例#1
0
    def test_isnull_datetime(self):
        self.assertFalse(isnull(datetime.now()))
        self.assertTrue(notnull(datetime.now()))

        idx = date_range('1/1/1990', periods=20)
        exp = np.ones(len(idx), dtype=bool)
        tm.assert_numpy_array_equal(notnull(idx), exp)

        idx = np.asarray(idx)
        idx[0] = iNaT
        idx = DatetimeIndex(idx)
        mask = isnull(idx)
        self.assertTrue(mask[0])
        exp = np.array([True] + [False] * (len(idx) - 1), dtype=bool)
        tm.assert_numpy_array_equal(mask, exp)

        # GH 9129
        pidx = idx.to_period(freq='M')
        mask = isnull(pidx)
        self.assertTrue(mask[0])
        exp = np.array([True] + [False] * (len(idx) - 1), dtype=bool)
        tm.assert_numpy_array_equal(mask, exp)

        mask = isnull(pidx[1:])
        exp = np.zeros(len(mask), dtype=bool)
        tm.assert_numpy_array_equal(mask, exp)
示例#2
0
    def test_datetime_other_units(self):
        idx = pd.DatetimeIndex(['2011-01-01', 'NaT', '2011-01-02'])
        exp = np.array([False, True, False])
        tm.assert_numpy_array_equal(isnull(idx), exp)
        tm.assert_numpy_array_equal(notnull(idx), ~exp)
        tm.assert_numpy_array_equal(isnull(idx.values), exp)
        tm.assert_numpy_array_equal(notnull(idx.values), ~exp)

        for dtype in [
                'datetime64[D]', 'datetime64[h]', 'datetime64[m]',
                'datetime64[s]', 'datetime64[ms]', 'datetime64[us]',
                'datetime64[ns]'
        ]:
            values = idx.values.astype(dtype)

            exp = np.array([False, True, False])
            tm.assert_numpy_array_equal(isnull(values), exp)
            tm.assert_numpy_array_equal(notnull(values), ~exp)

            exp = pd.Series([False, True, False])
            s = pd.Series(values)
            tm.assert_series_equal(isnull(s), exp)
            tm.assert_series_equal(notnull(s), ~exp)
            s = pd.Series(values, dtype=object)
            tm.assert_series_equal(isnull(s), exp)
            tm.assert_series_equal(notnull(s), ~exp)
示例#3
0
    def _combine_match_index(self, other, func, level=None, fill_value=None):
        new_data = {}

        if fill_value is not None:
            raise NotImplementedError("'fill_value' argument is not supported")
        if level is not None:
            raise NotImplementedError("'level' argument is not supported")

        new_index = self.index.union(other.index)
        this = self
        if self.index is not new_index:
            this = self.reindex(new_index)

        if other.index is not new_index:
            other = other.reindex(new_index)

        for col, series in compat.iteritems(this):
            new_data[col] = func(series.values, other.values)

        # fill_value is a function of our operator
        if isnull(other.fill_value) or isnull(self.default_fill_value):
            fill_value = np.nan
        else:
            fill_value = func(np.float64(self.default_fill_value),
                              np.float64(other.fill_value))

        return self._constructor(
            new_data, index=new_index, columns=self.columns,
            default_fill_value=fill_value).__finalize__(self)
示例#4
0
    def test_timedelta_other_units(self):
        idx = pd.TimedeltaIndex(['1 days', 'NaT', '2 days'])
        exp = np.array([False, True, False])
        tm.assert_numpy_array_equal(isnull(idx), exp)
        tm.assert_numpy_array_equal(notnull(idx), ~exp)
        tm.assert_numpy_array_equal(isnull(idx.values), exp)
        tm.assert_numpy_array_equal(notnull(idx.values), ~exp)

        for dtype in [
                'timedelta64[D]', 'timedelta64[h]', 'timedelta64[m]',
                'timedelta64[s]', 'timedelta64[ms]', 'timedelta64[us]',
                'timedelta64[ns]'
        ]:
            values = idx.values.astype(dtype)

            exp = np.array([False, True, False])
            tm.assert_numpy_array_equal(isnull(values), exp)
            tm.assert_numpy_array_equal(notnull(values), ~exp)

            exp = pd.Series([False, True, False])
            s = pd.Series(values)
            tm.assert_series_equal(isnull(s), exp)
            tm.assert_series_equal(notnull(s), ~exp)
            s = pd.Series(values, dtype=object)
            tm.assert_series_equal(isnull(s), exp)
            tm.assert_series_equal(notnull(s), ~exp)
示例#5
0
    def test_isnull_datetime(self):
        self.assertFalse(isnull(datetime.now()))
        self.assertTrue(notnull(datetime.now()))

        idx = date_range('1/1/1990', periods=20)
        exp = np.ones(len(idx), dtype=bool)
        tm.assert_numpy_array_equal(notnull(idx), exp)

        idx = np.asarray(idx)
        idx[0] = iNaT
        idx = DatetimeIndex(idx)
        mask = isnull(idx)
        self.assertTrue(mask[0])
        exp = np.array([True] + [False] * (len(idx) - 1), dtype=bool)
        tm.assert_numpy_array_equal(mask, exp)

        # GH 9129
        pidx = idx.to_period(freq='M')
        mask = isnull(pidx)
        self.assertTrue(mask[0])
        exp = np.array([True] + [False] * (len(idx) - 1), dtype=bool)
        tm.assert_numpy_array_equal(mask, exp)

        mask = isnull(pidx[1:])
        exp = np.zeros(len(mask), dtype=bool)
        tm.assert_numpy_array_equal(mask, exp)
示例#6
0
    def test_isnull_nat(self):
        result = isnull([NaT])
        exp = np.array([True])
        tm.assert_numpy_array_equal(result, exp)

        result = isnull(np.array([NaT], dtype=object))
        exp = np.array([True])
        tm.assert_numpy_array_equal(result, exp)
示例#7
0
    def test_isnull_nat(self):
        result = isnull([NaT])
        exp = np.array([True])
        tm.assert_numpy_array_equal(result, exp)

        result = isnull(np.array([NaT], dtype=object))
        exp = np.array([True])
        tm.assert_numpy_array_equal(result, exp)
示例#8
0
    def test_isnull(self):
        self.assertFalse(isnull(1.))
        self.assertTrue(isnull(None))
        self.assertTrue(isnull(np.NaN))
        self.assertTrue(float('nan'))
        self.assertFalse(isnull(np.inf))
        self.assertFalse(isnull(-np.inf))

        # series
        for s in [tm.makeFloatSeries(), tm.makeStringSeries(),
                  tm.makeObjectSeries(), tm.makeTimeSeries(),
                  tm.makePeriodSeries()]:
            assert isinstance(isnull(s), Series)

        # frame
        for df in [tm.makeTimeDataFrame(), tm.makePeriodFrame(),
                   tm.makeMixedDataFrame()]:
            result = isnull(df)
            expected = df.apply(isnull)
            tm.assert_frame_equal(result, expected)

        # panel
        with catch_warnings(record=True):
            for p in [tm.makePanel(), tm.makePeriodPanel(),
                      tm.add_nans(tm.makePanel())]:
                result = isnull(p)
                expected = p.apply(isnull)
                tm.assert_panel_equal(result, expected)

        # panel 4d
        with catch_warnings(record=True):
            for p in [tm.makePanel4D(), tm.add_nans_panel4d(tm.makePanel4D())]:
                result = isnull(p)
                expected = p.apply(isnull)
                tm.assert_panel4d_equal(result, expected)
    def test_isnull(self):
        assert not isnull(1.)
        assert isnull(None)
        assert isnull(np.NaN)
        assert float('nan')
        assert not isnull(np.inf)
        assert not isnull(-np.inf)

        # series
        for s in [tm.makeFloatSeries(), tm.makeStringSeries(),
                  tm.makeObjectSeries(), tm.makeTimeSeries(),
                  tm.makePeriodSeries()]:
            assert isinstance(isnull(s), Series)

        # frame
        for df in [tm.makeTimeDataFrame(), tm.makePeriodFrame(),
                   tm.makeMixedDataFrame()]:
            result = isnull(df)
            expected = df.apply(isnull)
            tm.assert_frame_equal(result, expected)

        # panel
        with catch_warnings(record=True):
            for p in [tm.makePanel(), tm.makePeriodPanel(),
                      tm.add_nans(tm.makePanel())]:
                result = isnull(p)
                expected = p.apply(isnull)
                tm.assert_panel_equal(result, expected)

        # panel 4d
        with catch_warnings(record=True):
            for p in [tm.makePanel4D(), tm.add_nans_panel4d(tm.makePanel4D())]:
                result = isnull(p)
                expected = p.apply(isnull)
                tm.assert_panel4d_equal(result, expected)
示例#10
0
    def test_period(self):
        idx = pd.PeriodIndex(['2011-01', 'NaT', '2012-01'], freq='M')
        exp = np.array([False, True, False])
        tm.assert_numpy_array_equal(isnull(idx), exp)
        tm.assert_numpy_array_equal(notnull(idx), ~exp)

        exp = pd.Series([False, True, False])
        s = pd.Series(idx)
        tm.assert_series_equal(isnull(s), exp)
        tm.assert_series_equal(notnull(s), ~exp)
        s = pd.Series(idx, dtype=object)
        tm.assert_series_equal(isnull(s), exp)
        tm.assert_series_equal(notnull(s), ~exp)
示例#11
0
    def test_period(self):
        idx = pd.PeriodIndex(['2011-01', 'NaT', '2012-01'], freq='M')
        exp = np.array([False, True, False])
        tm.assert_numpy_array_equal(isnull(idx), exp)
        tm.assert_numpy_array_equal(notnull(idx), ~exp)

        exp = pd.Series([False, True, False])
        s = pd.Series(idx)
        tm.assert_series_equal(isnull(s), exp)
        tm.assert_series_equal(notnull(s), ~exp)
        s = pd.Series(idx, dtype=object)
        tm.assert_series_equal(isnull(s), exp)
        tm.assert_series_equal(notnull(s), ~exp)
示例#12
0
    def f(x, y):
        xmask = isnull(x)
        ymask = isnull(y)
        mask = xmask | ymask

        with np.errstate(all='ignore'):
            result = op(x, y)

        if mask.any():
            if is_bool_dtype(result):
                result = result.astype('O')
            np.putmask(result, mask, np.nan)

        return result
示例#13
0
    def f(x, y):
        xmask = isnull(x)
        ymask = isnull(y)
        mask = xmask | ymask

        with np.errstate(all='ignore'):
            result = op(x, y)

        if mask.any():
            if is_bool_dtype(result):
                result = result.astype('O')
            np.putmask(result, mask, np.nan)

        return result
示例#14
0
    def test_empty_object(self):

        for shape in [(4, 0), (4, )]:
            arr = np.empty(shape=shape, dtype=object)
            result = isnull(arr)
            expected = np.ones(shape=shape, dtype=bool)
            tm.assert_numpy_array_equal(result, expected)
示例#15
0
def _hash_scalar(val, encoding='utf8', hash_key=None):
    """
    Hash scalar value

    Returns
    -------
    1d uint64 numpy array of hash value, of length 1
    """

    if isnull(val):
        # this is to be consistent with the _hash_categorical implementation
        return np.array([np.iinfo(np.uint64).max], dtype='u8')

    if getattr(val, 'tzinfo', None) is not None:
        # for tz-aware datetimes, we need the underlying naive UTC value and
        # not the tz aware object or pd extension type (as
        # infer_dtype_from_scalar would do)
        if not isinstance(val, tslib.Timestamp):
            val = tslib.Timestamp(val)
        val = val.tz_convert(None)

    dtype, val = infer_dtype_from_scalar(val)
    vals = np.array([val], dtype=dtype)

    return hash_array(vals, hash_key=hash_key, encoding=encoding,
                      categorize=False)
示例#16
0
def backfill_2d(values, limit=None, mask=None, dtype=None):
    if dtype is None:
        dtype = values.dtype
    _method = None
    if is_float_dtype(values):
        _method = getattr(algos, 'backfill_2d_inplace_%s' % dtype.name, None)
    elif is_datetime64_dtype(dtype) or is_datetime64tz_dtype(dtype):
        _method = _backfill_2d_datetime
    elif is_integer_dtype(values):
        values = _ensure_float64(values)
        _method = algos.backfill_2d_inplace_float64
    elif values.dtype == np.object_:
        _method = algos.backfill_2d_inplace_object

    if _method is None:
        raise ValueError('Invalid dtype for backfill_2d [%s]' % dtype.name)

    if mask is None:
        mask = isnull(values)
    mask = mask.view(np.uint8)

    if np.all(values.shape):
        _method(values, mask, limit=limit)
    else:
        # for test coverage
        pass
    return values
示例#17
0
def nargsort(items, kind='quicksort', ascending=True, na_position='last'):
    """
    This is intended to be a drop-in replacement for np.argsort which
    handles NaNs. It adds ascending and na_position parameters.
    GH #6399, #5231
    """

    # specially handle Categorical
    if is_categorical_dtype(items):
        return items.argsort(ascending=ascending)

    items = np.asanyarray(items)
    idx = np.arange(len(items))
    mask = isnull(items)
    non_nans = items[~mask]
    non_nan_idx = idx[~mask]
    nan_idx = np.nonzero(mask)[0]
    if not ascending:
        non_nans = non_nans[::-1]
        non_nan_idx = non_nan_idx[::-1]
    indexer = non_nan_idx[non_nans.argsort(kind=kind)]
    if not ascending:
        indexer = indexer[::-1]
    # Finally, place the NaNs at the end or the beginning according to
    # na_position
    if na_position == 'last':
        indexer = np.concatenate([indexer, nan_idx])
    elif na_position == 'first':
        indexer = np.concatenate([nan_idx, indexer])
    else:
        raise ValueError('invalid na_position: {!r}'.format(na_position))
    return indexer
示例#18
0
def test_notnull():
    assert notnull(1.)
    assert not notnull(None)
    assert not notnull(np.NaN)

    with cf.option_context("mode.use_inf_as_null", False):
        assert notnull(np.inf)
        assert notnull(-np.inf)

        arr = np.array([1.5, np.inf, 3.5, -np.inf])
        result = notnull(arr)
        assert result.all()

    with cf.option_context("mode.use_inf_as_null", True):
        assert not notnull(np.inf)
        assert not notnull(-np.inf)

        arr = np.array([1.5, np.inf, 3.5, -np.inf])
        result = notnull(arr)
        assert result.sum() == 2

    with cf.option_context("mode.use_inf_as_null", False):
        for s in [
                tm.makeFloatSeries(),
                tm.makeStringSeries(),
                tm.makeObjectSeries(),
                tm.makeTimeSeries(),
                tm.makePeriodSeries()
        ]:
            assert (isinstance(isnull(s), Series))
示例#19
0
def _isfinite(values):
    if is_datetime_or_timedelta_dtype(values):
        return isnull(values)
    if (is_complex_dtype(values) or is_float_dtype(values) or
            is_integer_dtype(values) or is_bool_dtype(values)):
        return ~np.isfinite(values)
    return ~np.isfinite(values.astype('float64'))
示例#20
0
def _hash_scalar(val, encoding='utf8', hash_key=None):
    """
    Hash scalar value

    Returns
    -------
    1d uint64 numpy array of hash value, of length 1
    """

    if isnull(val):
        # this is to be consistent with the _hash_categorical implementation
        return np.array([np.iinfo(np.uint64).max], dtype='u8')

    if getattr(val, 'tzinfo', None) is not None:
        # for tz-aware datetimes, we need the underlying naive UTC value and
        # not the tz aware object or pd extension type (as
        # infer_dtype_from_scalar would do)
        if not isinstance(val, tslib.Timestamp):
            val = tslib.Timestamp(val)
        val = val.tz_convert(None)

    dtype, val = infer_dtype_from_scalar(val)
    vals = np.array([val], dtype=dtype)

    return hash_array(vals,
                      hash_key=hash_key,
                      encoding=encoding,
                      categorize=False)
示例#21
0
    def wrapper(self, other):
        msg = "cannot compare a TimedeltaIndex with type {0}"
        func = getattr(super(TimedeltaIndex, self), opname)
        if _is_convertible_to_td(other) or other is NaT:
            try:
                other = _to_m8(other)
            except ValueError:
                # failed to parse as timedelta
                raise TypeError(msg.format(type(other)))
            result = func(other)
            if isnull(other):
                result.fill(nat_result)
        else:
            if not is_list_like(other):
                raise TypeError(msg.format(type(other)))

            other = TimedeltaIndex(other).values
            result = func(other)
            result = _values_from_object(result)

            if isinstance(other, Index):
                o_mask = other.values.view('i8') == iNaT
            else:
                o_mask = other.view('i8') == iNaT

            if o_mask.any():
                result[o_mask] = nat_result

        if self.hasnans:
            result[self._isnan] = nat_result

        # support of bool dtype indexers
        if is_bool_dtype(result):
            return result
        return Index(result)
示例#22
0
def test_notnull():
    assert notnull(1.)
    assert not notnull(None)
    assert not notnull(np.NaN)

    with cf.option_context("mode.use_inf_as_null", False):
        assert notnull(np.inf)
        assert notnull(-np.inf)

        arr = np.array([1.5, np.inf, 3.5, -np.inf])
        result = notnull(arr)
        assert result.all()

    with cf.option_context("mode.use_inf_as_null", True):
        assert not notnull(np.inf)
        assert not notnull(-np.inf)

        arr = np.array([1.5, np.inf, 3.5, -np.inf])
        result = notnull(arr)
        assert result.sum() == 2

    with cf.option_context("mode.use_inf_as_null", False):
        for s in [tm.makeFloatSeries(), tm.makeStringSeries(),
                  tm.makeObjectSeries(), tm.makeTimeSeries(),
                  tm.makePeriodSeries()]:
            assert (isinstance(isnull(s), Series))
示例#23
0
def _isfinite(values):
    if is_datetime_or_timedelta_dtype(values):
        return isnull(values)
    if (is_complex_dtype(values) or is_float_dtype(values) or
            is_integer_dtype(values) or is_bool_dtype(values)):
        return ~np.isfinite(values)
    return ~np.isfinite(values.astype('float64'))
示例#24
0
def backfill_2d(values, limit=None, mask=None, dtype=None):
    if dtype is None:
        dtype = values.dtype
    _method = None
    if is_float_dtype(values):
        _method = getattr(algos, 'backfill_2d_inplace_%s' % dtype.name, None)
    elif is_datetime64_dtype(dtype) or is_datetime64tz_dtype(dtype):
        _method = _backfill_2d_datetime
    elif is_integer_dtype(values):
        values = _ensure_float64(values)
        _method = algos.backfill_2d_inplace_float64
    elif values.dtype == np.object_:
        _method = algos.backfill_2d_inplace_object

    if _method is None:
        raise ValueError('Invalid dtype for backfill_2d [%s]' % dtype.name)

    if mask is None:
        mask = isnull(values)
    mask = mask.view(np.uint8)

    if np.all(values.shape):
        _method(values, mask, limit=limit)
    else:
        # for test coverage
        pass
    return values
示例#25
0
def nargsort(items, kind='quicksort', ascending=True, na_position='last'):
    """
    This is intended to be a drop-in replacement for np.argsort which
    handles NaNs. It adds ascending and na_position parameters.
    GH #6399, #5231
    """

    # specially handle Categorical
    if is_categorical_dtype(items):
        return items.argsort(ascending=ascending, kind=kind)

    items = np.asanyarray(items)
    idx = np.arange(len(items))
    mask = isnull(items)
    non_nans = items[~mask]
    non_nan_idx = idx[~mask]
    nan_idx = np.nonzero(mask)[0]
    if not ascending:
        non_nans = non_nans[::-1]
        non_nan_idx = non_nan_idx[::-1]
    indexer = non_nan_idx[non_nans.argsort(kind=kind)]
    if not ascending:
        indexer = indexer[::-1]
    # Finally, place the NaNs at the end or the beginning according to
    # na_position
    if na_position == 'last':
        indexer = np.concatenate([indexer, nan_idx])
    elif na_position == 'first':
        indexer = np.concatenate([nan_idx, indexer])
    else:
        raise ValueError('invalid na_position: {!r}'.format(na_position))
    return indexer
示例#26
0
def nanprod(values, axis=None, skipna=True):
    mask = isnull(values)
    if skipna and not is_any_int_dtype(values):
        values = values.copy()
        values[mask] = 1
    result = values.prod(axis)
    return _maybe_null_out(result, axis, mask)
示例#27
0
文件: tdi.py 项目: shwedosh/pandas
    def wrapper(self, other):
        msg = "cannot compare a TimedeltaIndex with type {0}"
        func = getattr(super(TimedeltaIndex, self), opname)
        if _is_convertible_to_td(other) or other is NaT:
            try:
                other = _to_m8(other)
            except ValueError:
                # failed to parse as timedelta
                raise TypeError(msg.format(type(other)))
            result = func(other)
            if isnull(other):
                result.fill(nat_result)
        else:
            if not is_list_like(other):
                raise TypeError(msg.format(type(other)))

            other = TimedeltaIndex(other).values
            result = func(other)
            result = _values_from_object(result)

            if isinstance(other, Index):
                o_mask = other.values.view('i8') == iNaT
            else:
                o_mask = other.view('i8') == iNaT

            if o_mask.any():
                result[o_mask] = nat_result

        if self.hasnans:
            result[self._isnan] = nat_result

        # support of bool dtype indexers
        if is_bool_dtype(result):
            return result
        return Index(result)
示例#28
0
def nanprod(values, axis=None, skipna=True):
    mask = isnull(values)
    if skipna and not is_any_int_dtype(values):
        values = values.copy()
        values[mask] = 1
    result = values.prod(axis)
    return _maybe_null_out(result, axis, mask)
示例#29
0
    def test_empty_object(self):

        for shape in [(4, 0), (4,)]:
            arr = np.empty(shape=shape, dtype=object)
            result = isnull(arr)
            expected = np.ones(shape=shape, dtype=bool)
            tm.assert_numpy_array_equal(result, expected)
示例#30
0
    def shift(self, periods, freq=None, axis=0):
        if periods == 0:
            return self.copy()

        # no special handling of fill values yet
        if not isnull(self.fill_value):
            shifted = self.to_dense().shift(periods, freq=freq,
                                            axis=axis)
            return shifted.to_sparse(fill_value=self.fill_value,
                                     kind=self.kind)

        if freq is not None:
            return self._constructor(
                self.sp_values, sparse_index=self.sp_index,
                index=self.index.shift(periods, freq),
                fill_value=self.fill_value).__finalize__(self)

        int_index = self.sp_index.to_int_index()
        new_indices = int_index.indices + periods
        start, end = new_indices.searchsorted([0, int_index.length])

        new_indices = new_indices[start:end]
        new_sp_index = _make_index(len(self), new_indices, self.sp_index)

        arr = self.values._simple_new(self.sp_values[start:end].copy(),
                                      new_sp_index, fill_value=np.nan)
        return self._constructor(arr, index=self.index).__finalize__(self)
示例#31
0
    def test_isnull_lists(self):
        result = isnull([[False]])
        exp = np.array([[False]])
        tm.assert_numpy_array_equal(result, exp)

        result = isnull([[1], [2]])
        exp = np.array([[False], [False]])
        tm.assert_numpy_array_equal(result, exp)

        # list of strings / unicode
        result = isnull(['foo', 'bar'])
        exp = np.array([False, False])
        tm.assert_numpy_array_equal(result, exp)

        result = isnull([u('foo'), u('bar')])
        exp = np.array([False, False])
        tm.assert_numpy_array_equal(result, exp)
示例#32
0
    def test_isnull_lists(self):
        result = isnull([[False]])
        exp = np.array([[False]])
        tm.assert_numpy_array_equal(result, exp)

        result = isnull([[1], [2]])
        exp = np.array([[False], [False]])
        tm.assert_numpy_array_equal(result, exp)

        # list of strings / unicode
        result = isnull(['foo', 'bar'])
        exp = np.array([False, False])
        tm.assert_numpy_array_equal(result, exp)

        result = isnull([u('foo'), u('bar')])
        exp = np.array([False, False])
        tm.assert_numpy_array_equal(result, exp)
示例#33
0
def mask_missing(arr, values_to_mask):
    """
    Return a masking array of same size/shape as arr
    with entries equaling any member of values_to_mask set to True
    """
    dtype, values_to_mask = infer_dtype_from_array(values_to_mask)

    try:
        values_to_mask = np.array(values_to_mask, dtype=dtype)

    except Exception:
        values_to_mask = np.array(values_to_mask, dtype=object)

    na_mask = isnull(values_to_mask)
    nonna = values_to_mask[~na_mask]

    mask = None
    for x in nonna:
        if mask is None:

            # numpy elementwise comparison warning
            if is_numeric_v_string_like(arr, x):
                mask = False
            else:
                mask = arr == x

            # if x is a string and arr is not, then we get False and we must
            # expand the mask to size arr.shape
            if is_scalar(mask):
                mask = np.zeros(arr.shape, dtype=bool)
        else:

            # numpy elementwise comparison warning
            if is_numeric_v_string_like(arr, x):
                mask |= False
            else:
                mask |= arr == x

    if na_mask.any():
        if mask is None:
            mask = isnull(arr)
        else:
            mask |= isnull(arr)

    return mask
示例#34
0
def mask_missing(arr, values_to_mask):
    """
    Return a masking array of same size/shape as arr
    with entries equaling any member of values_to_mask set to True
    """
    dtype, values_to_mask = infer_dtype_from_array(values_to_mask)

    try:
        values_to_mask = np.array(values_to_mask, dtype=dtype)

    except Exception:
        values_to_mask = np.array(values_to_mask, dtype=object)

    na_mask = isnull(values_to_mask)
    nonna = values_to_mask[~na_mask]

    mask = None
    for x in nonna:
        if mask is None:

            # numpy elementwise comparison warning
            if is_numeric_v_string_like(arr, x):
                mask = False
            else:
                mask = arr == x

            # if x is a string and arr is not, then we get False and we must
            # expand the mask to size arr.shape
            if is_scalar(mask):
                mask = np.zeros(arr.shape, dtype=bool)
        else:

            # numpy elementwise comparison warning
            if is_numeric_v_string_like(arr, x):
                mask |= False
            else:
                mask |= arr == x

    if na_mask.any():
        if mask is None:
            mask = isnull(arr)
        else:
            mask |= isnull(arr)

    return mask
示例#35
0
def nansem(values, axis=None, skipna=True, ddof=1):
    var = nanvar(values, axis, skipna, ddof=ddof)

    mask = isnull(values)
    if not is_float_dtype(values.dtype):
        values = values.astype('f8')
    count, _ = _get_counts_nanvar(mask, axis, ddof, values.dtype)
    var = nanvar(values, axis, skipna, ddof=ddof)

    return np.sqrt(var) / np.sqrt(count)
示例#36
0
 def test_isnull_numpy_nat(self):
     arr = np.array([
         NaT,
         np.datetime64('NaT'),
         np.timedelta64('NaT'),
         np.datetime64('NaT', 's')
     ])
     result = isnull(arr)
     expected = np.array([True] * 4)
     tm.assert_numpy_array_equal(result, expected)
示例#37
0
def nansem(values, axis=None, skipna=True, ddof=1):
    var = nanvar(values, axis, skipna, ddof=ddof)

    mask = isnull(values)
    if not is_float_dtype(values.dtype):
        values = values.astype('f8')
    count, _ = _get_counts_nanvar(mask, axis, ddof, values.dtype)
    var = nanvar(values, axis, skipna, ddof=ddof)

    return np.sqrt(var) / np.sqrt(count)
示例#38
0
def _bins_to_cuts(x, bins, right=True, labels=None,
                  precision=3, include_lowest=False,
                  dtype=None, duplicates='raise'):

    if duplicates not in ['raise', 'drop']:
        raise ValueError("invalid value for 'duplicates' parameter, "
                         "valid options are: raise, drop")

    if isinstance(bins, IntervalIndex):
        # we have a fast-path here
        ids = bins.get_indexer(x)
        result = algos.take_nd(bins, ids)
        result = Categorical(result, categories=bins, ordered=True)
        return result, bins

    unique_bins = algos.unique(bins)
    if len(unique_bins) < len(bins) and len(bins) != 2:
        if duplicates == 'raise':
            raise ValueError("Bin edges must be unique: {}.\nYou "
                             "can drop duplicate edges by setting "
                             "the 'duplicates' kwarg".format(repr(bins)))
        else:
            bins = unique_bins

    side = 'left' if right else 'right'
    ids = _ensure_int64(bins.searchsorted(x, side=side))

    if include_lowest:
        ids[x == bins[0]] = 1

    na_mask = isnull(x) | (ids == len(bins)) | (ids == 0)
    has_nas = na_mask.any()

    if labels is not False:
        if labels is None:
            labels = _format_labels(bins, precision, right=right,
                                    include_lowest=include_lowest,
                                    dtype=dtype)
        else:
            if len(labels) != len(bins) - 1:
                raise ValueError('Bin labels must be one fewer than '
                                 'the number of bin edges')
        if not is_categorical_dtype(labels):
            labels = Categorical(labels, categories=labels, ordered=True)

        np.putmask(ids, na_mask, 0)
        result = algos.take_nd(labels, ids - 1)

    else:
        result = ids - 1
        if has_nas:
            result = result.astype(np.float64)
            np.putmask(result, na_mask, np.nan)

    return result, bins
示例#39
0
def _bins_to_cuts(x, bins, right=True, labels=None,
                  precision=3, include_lowest=False,
                  dtype=None, duplicates='raise'):

    if duplicates not in ['raise', 'drop']:
        raise ValueError("invalid value for 'duplicates' parameter, "
                         "valid options are: raise, drop")

    if isinstance(bins, IntervalIndex):
        # we have a fast-path here
        ids = bins.get_indexer(x)
        result = algos.take_nd(bins, ids)
        result = Categorical(result, categories=bins, ordered=True)
        return result, bins

    unique_bins = algos.unique(bins)
    if len(unique_bins) < len(bins) and len(bins) != 2:
        if duplicates == 'raise':
            raise ValueError("Bin edges must be unique: {}.\nYou "
                             "can drop duplicate edges by setting "
                             "the 'duplicates' kwarg".format(repr(bins)))
        else:
            bins = unique_bins

    side = 'left' if right else 'right'
    ids = _ensure_int64(bins.searchsorted(x, side=side))

    if include_lowest:
        ids[x == bins[0]] = 1

    na_mask = isnull(x) | (ids == len(bins)) | (ids == 0)
    has_nas = na_mask.any()

    if labels is not False:
        if labels is None:
            labels = _format_labels(bins, precision, right=right,
                                    include_lowest=include_lowest,
                                    dtype=dtype)
        else:
            if len(labels) != len(bins) - 1:
                raise ValueError('Bin labels must be one fewer than '
                                 'the number of bin edges')
        if not is_categorical_dtype(labels):
            labels = Categorical(labels, ordered=True)

        np.putmask(ids, na_mask, 0)
        result = algos.take_nd(labels, ids - 1)

    else:
        result = ids - 1
        if has_nas:
            result = result.astype(np.float64)
            np.putmask(result, na_mask, np.nan)

    return result, bins
示例#40
0
    def fillna(self, value, downcast=None):
        if downcast is not None:
            raise NotImplementedError

        if issubclass(self.dtype.type, np.floating):
            value = float(value)

        new_values = np.where(isnull(self.sp_values), value, self.sp_values)
        fill_value = value if self._null_fill_value else self.fill_value

        return self._simple_new(new_values, self.sp_index,
                                fill_value=fill_value)
示例#41
0
def nanskew(values, axis=None, skipna=True):
    """ Compute the sample skewness.

    The statistic computed here is the adjusted Fisher-Pearson standardized
    moment coefficient G1. The algorithm computes this coefficient directly
    from the second and third central moment.

    """

    values = _values_from_object(values)
    mask = isnull(values)
    if not is_float_dtype(values.dtype):
        values = values.astype('f8')
        count = _get_counts(mask, axis)
    else:
        count = _get_counts(mask, axis, dtype=values.dtype)

    if skipna:
        values = values.copy()
        np.putmask(values, mask, 0)

    mean = values.sum(axis, dtype=np.float64) / count
    if axis is not None:
        mean = np.expand_dims(mean, axis)

    adjusted = values - mean
    if skipna:
        np.putmask(adjusted, mask, 0)
    adjusted2 = adjusted ** 2
    adjusted3 = adjusted2 * adjusted
    m2 = adjusted2.sum(axis, dtype=np.float64)
    m3 = adjusted3.sum(axis, dtype=np.float64)

    # floating point error
    m2 = _zero_out_fperr(m2)
    m3 = _zero_out_fperr(m3)

    with np.errstate(invalid='ignore', divide='ignore'):
        result = (count * (count - 1) ** 0.5 / (count - 2)) * (m3 / m2 ** 1.5)

    dtype = values.dtype
    if is_float_dtype(dtype):
        result = result.astype(dtype)

    if isinstance(result, np.ndarray):
        result = np.where(m2 == 0, 0, result)
        result[count < 3] = np.nan
        return result
    else:
        result = 0 if m2 == 0 else result
        if count < 3:
            return np.nan
        return result
示例#42
0
 def test_0d_array(self):
     self.assertTrue(isnull(np.array(np.nan)))
     self.assertFalse(isnull(np.array(0.0)))
     self.assertFalse(isnull(np.array(0)))
     # test object dtype
     self.assertTrue(isnull(np.array(np.nan, dtype=object)))
     self.assertFalse(isnull(np.array(0.0, dtype=object)))
     self.assertFalse(isnull(np.array(0, dtype=object)))
示例#43
0
 def test_0d_array(self):
     assert isnull(np.array(np.nan))
     assert not isnull(np.array(0.0))
     assert not isnull(np.array(0))
     # test object dtype
     assert isnull(np.array(np.nan, dtype=object))
     assert not isnull(np.array(0.0, dtype=object))
     assert not isnull(np.array(0, dtype=object))
示例#44
0
 def test_0d_array(self):
     self.assertTrue(isnull(np.array(np.nan)))
     self.assertFalse(isnull(np.array(0.0)))
     self.assertFalse(isnull(np.array(0)))
     # test object dtype
     self.assertTrue(isnull(np.array(np.nan, dtype=object)))
     self.assertFalse(isnull(np.array(0.0, dtype=object)))
     self.assertFalse(isnull(np.array(0, dtype=object)))
示例#45
0
    def fillna(self, value, downcast=None):
        if downcast is not None:
            raise NotImplementedError

        if issubclass(self.dtype.type, np.floating):
            value = float(value)

        if self._null_fill_value:
            return self._simple_new(self.sp_values, self.sp_index,
                                    fill_value=value)
        else:
            new_values = self.sp_values.copy()
            new_values[isnull(new_values)] = value
            return self._simple_new(new_values, self.sp_index,
                                    fill_value=self.fill_value)
示例#46
0
    def fillna(self, value, downcast=None):
        if downcast is not None:
            raise NotImplementedError

        if issubclass(self.dtype.type, np.floating):
            value = float(value)

        if self._null_fill_value:
            return self._simple_new(self.sp_values, self.sp_index,
                                    fill_value=value)
        else:
            new_values = self.sp_values.copy()
            new_values[isnull(new_values)] = value
            return self._simple_new(new_values, self.sp_index,
                                    fill_value=self.fill_value)
示例#47
0
    def test_timedelta_other_units(self):
        idx = pd.TimedeltaIndex(['1 days', 'NaT', '2 days'])
        exp = np.array([False, True, False])
        tm.assert_numpy_array_equal(isnull(idx), exp)
        tm.assert_numpy_array_equal(notnull(idx), ~exp)
        tm.assert_numpy_array_equal(isnull(idx.values), exp)
        tm.assert_numpy_array_equal(notnull(idx.values), ~exp)

        for dtype in ['timedelta64[D]', 'timedelta64[h]', 'timedelta64[m]',
                      'timedelta64[s]', 'timedelta64[ms]', 'timedelta64[us]',
                      'timedelta64[ns]']:
            values = idx.values.astype(dtype)

            exp = np.array([False, True, False])
            tm.assert_numpy_array_equal(isnull(values), exp)
            tm.assert_numpy_array_equal(notnull(values), ~exp)

            exp = pd.Series([False, True, False])
            s = pd.Series(values)
            tm.assert_series_equal(isnull(s), exp)
            tm.assert_series_equal(notnull(s), ~exp)
            s = pd.Series(values, dtype=object)
            tm.assert_series_equal(isnull(s), exp)
            tm.assert_series_equal(notnull(s), ~exp)
示例#48
0
    def test_datetime_other_units(self):
        idx = pd.DatetimeIndex(['2011-01-01', 'NaT', '2011-01-02'])
        exp = np.array([False, True, False])
        tm.assert_numpy_array_equal(isnull(idx), exp)
        tm.assert_numpy_array_equal(notnull(idx), ~exp)
        tm.assert_numpy_array_equal(isnull(idx.values), exp)
        tm.assert_numpy_array_equal(notnull(idx.values), ~exp)

        for dtype in ['datetime64[D]', 'datetime64[h]', 'datetime64[m]',
                      'datetime64[s]', 'datetime64[ms]', 'datetime64[us]',
                      'datetime64[ns]']:
            values = idx.values.astype(dtype)

            exp = np.array([False, True, False])
            tm.assert_numpy_array_equal(isnull(values), exp)
            tm.assert_numpy_array_equal(notnull(values), ~exp)

            exp = pd.Series([False, True, False])
            s = pd.Series(values)
            tm.assert_series_equal(isnull(s), exp)
            tm.assert_series_equal(notnull(s), ~exp)
            s = pd.Series(values, dtype=object)
            tm.assert_series_equal(isnull(s), exp)
            tm.assert_series_equal(notnull(s), ~exp)
示例#49
0
 def dropna(self, axis=0, inplace=False, **kwargs):
     """
     Analogous to Series.dropna. If fill_value=NaN, returns a dense Series
     """
     # TODO: make more efficient
     axis = self._get_axis_number(axis or 0)
     dense_valid = self.to_dense().valid()
     if inplace:
         raise NotImplementedError("Cannot perform inplace dropna"
                                   " operations on a SparseSeries")
     if isnull(self.fill_value):
         return dense_valid
     else:
         dense_valid = dense_valid[dense_valid != self.fill_value]
         return dense_valid.to_sparse(fill_value=self.fill_value)
示例#50
0
文件: nanops.py 项目: shwedosh/pandas
def _get_values(values,
                skipna,
                fill_value=None,
                fill_value_typ=None,
                isfinite=False,
                copy=True):
    """ utility to get the values view, mask, dtype
    if necessary copy and mask using the specified fill_value
    copy = True will force the copy
    """
    values = _values_from_object(values)
    if isfinite:
        mask = _isfinite(values)
    else:
        mask = isnull(values)

    dtype = values.dtype
    dtype_ok = _na_ok_dtype(dtype)

    # get our fill value (in case we need to provide an alternative
    # dtype for it)
    fill_value = _get_fill_value(dtype,
                                 fill_value=fill_value,
                                 fill_value_typ=fill_value_typ)

    if skipna:
        if copy:
            values = values.copy()
        if dtype_ok:
            np.putmask(values, mask, fill_value)

        # promote if needed
        else:
            values, changed = maybe_upcast_putmask(values, mask, fill_value)

    elif copy:
        values = values.copy()

    values = _view_if_needed(values)

    # return a platform independent precision dtype
    dtype_max = dtype
    if is_integer_dtype(dtype) or is_bool_dtype(dtype):
        dtype_max = np.int64
    elif is_float_dtype(dtype):
        dtype_max = np.float64

    return values, mask, dtype, dtype_max
示例#51
0
def _ensure_datetimelike_to_i8(other):
    """ helper for coercing an input scalar or array to i8 """
    if lib.isscalar(other) and isnull(other):
        other = iNaT
    elif isinstance(other, ABCIndexClass):
        # convert tz if needed
        if getattr(other, 'tz', None) is not None:
            other = other.tz_localize(None).asi8
        else:
            other = other.asi8
    else:
        try:
            other = np.array(other, copy=False).view('i8')
        except TypeError:
            # period array cannot be coerces to int
            other = Index(other).asi8
    return other
示例#52
0
文件: base.py 项目: shwedosh/pandas
def _ensure_datetimelike_to_i8(other):
    """ helper for coercing an input scalar or array to i8 """
    if lib.isscalar(other) and isnull(other):
        other = iNaT
    elif isinstance(other, ABCIndexClass):
        # convert tz if needed
        if getattr(other, 'tz', None) is not None:
            other = other.tz_localize(None).asi8
        else:
            other = other.asi8
    else:
        try:
            other = np.array(other, copy=False).view('i8')
        except TypeError:
            # period array cannot be coerces to int
            other = Index(other).asi8
    return other
示例#53
0
def make_sparse(arr, kind='block', fill_value=None):
    """
    Convert ndarray to sparse format

    Parameters
    ----------
    arr : ndarray
    kind : {'block', 'integer'}
    fill_value : NaN or another value

    Returns
    -------
    (sparse_values, index) : (ndarray, SparseIndex)
    """

    arr = _sanitize_values(arr)

    if arr.ndim > 1:
        raise TypeError("expected dimension <= 1 data")

    if fill_value is None:
        fill_value = na_value_for_dtype(arr.dtype)

    if isnull(fill_value):
        mask = notnull(arr)
    else:
        # For str arrays in NumPy 1.12.0, operator!= below isn't
        # element-wise but just returns False if fill_value is not str,
        # so cast to object comparison to be safe
        if is_string_dtype(arr):
            arr = arr.astype(object)

        mask = arr != fill_value

    length = len(arr)
    if length != mask.size:
        # the arr is a SparseArray
        indices = mask.sp_index.indices
    else:
        indices = mask.nonzero()[0].astype(np.int32)

    index = _make_index(length, indices, kind)
    sparsified_values = arr[mask]
    return sparsified_values, index, fill_value
示例#54
0
def _get_values(values, skipna, fill_value=None, fill_value_typ=None,
                isfinite=False, copy=True):
    """ utility to get the values view, mask, dtype
    if necessary copy and mask using the specified fill_value
    copy = True will force the copy
    """
    values = _values_from_object(values)
    if isfinite:
        mask = _isfinite(values)
    else:
        mask = isnull(values)

    dtype = values.dtype
    dtype_ok = _na_ok_dtype(dtype)

    # get our fill value (in case we need to provide an alternative
    # dtype for it)
    fill_value = _get_fill_value(dtype, fill_value=fill_value,
                                 fill_value_typ=fill_value_typ)

    if skipna:
        if copy:
            values = values.copy()
        if dtype_ok:
            np.putmask(values, mask, fill_value)

        # promote if needed
        else:
            values, changed = maybe_upcast_putmask(values, mask, fill_value)

    elif copy:
        values = values.copy()

    values = _view_if_needed(values)

    # return a platform independent precision dtype
    dtype_max = dtype
    if is_integer_dtype(dtype) or is_bool_dtype(dtype):
        dtype_max = np.int64
    elif is_float_dtype(dtype):
        dtype_max = np.float64

    return values, mask, dtype, dtype_max
示例#55
0
def test_nan_to_nat_conversions():

    df = DataFrame(dict({
        'A': np.asarray(
            lrange(10), dtype='float64'),
        'B': Timestamp('20010101')
    }))
    df.iloc[3:6, :] = np.nan
    result = df.loc[4, 'B'].value
    assert (result == tslib.iNaT)

    s = df['B'].copy()
    s._data = s._data.setitem(indexer=tuple([slice(8, 9)]), value=np.nan)
    assert (isnull(s[8]))

    # numpy < 1.7.0 is wrong
    from distutils.version import LooseVersion
    if LooseVersion(np.__version__) >= '1.7.0':
        assert (s[8].value == np.datetime64('NaT').astype(np.int64))