示例#1
0
文件: frame.py 项目: amirneto/pandas
    def _combine_match_index(self, other, func, level=None, fill_value=None):
        new_data = {}

        if fill_value is not None:
            raise NotImplementedError
        if level is not None:
            raise NotImplementedError

        new_index = self.index.union(other.index)
        this = self
        if self.index is not new_index:
            this = self.reindex(new_index)

        if other.index is not new_index:
            other = other.reindex(new_index)

        for col, series in compat.iteritems(this):
            new_data[col] = func(series.values, other.values)

        # fill_value is a function of our operator
        if isnull(other.fill_value) or isnull(self.default_fill_value):
            fill_value = np.nan
        else:
            fill_value = func(np.float64(self.default_fill_value),
                              np.float64(other.fill_value))

        return self._constructor(new_data,
                                 index=new_index,
                                 columns=self.columns,
                                 default_fill_value=fill_value,
                                 fill_value=self.default_fill_value).__finalize__(self)
示例#2
0
def assert_almost_equal(a, b):
    if isinstance(a, dict) or isinstance(b, dict):
        return assert_dict_equal(a, b)

    if isinstance(a, basestring):
        assert a == b, (a, b)
        return True

    if isiterable(a):
        np.testing.assert_(isiterable(b))
        np.testing.assert_equal(len(a), len(b))
        if np.array_equal(a, b):
            return True
        else:
            for i in xrange(len(a)):
                assert_almost_equal(a[i], b[i])
        return True

    err_msg = lambda a, b: 'expected %.5f but got %.5f' % (a, b)

    if isnull(a):
        np.testing.assert_(isnull(b))
        return

    if isinstance(a, (bool, float, int)):
        # case for zero
        if abs(a) < 1e-5:
            np.testing.assert_almost_equal(
                a, b, decimal=5, err_msg=err_msg(a, b), verbose=False)
        else:
            np.testing.assert_almost_equal(
                1, a/b, decimal=5, err_msg=err_msg(a, b), verbose=False)
    else:
        assert(a == b)
示例#3
0
文件: ops.py 项目: agijsberts/pandas
    def _convert_to_array(self, values, name=None, other=None):
        """converts values to ndarray"""
        from pandas.tseries.timedeltas import to_timedelta

        coerce = True
        if not is_list_like(values):
            values = np.array([values])
        inferred_type = lib.infer_dtype(values)

        if inferred_type in ('datetime64', 'datetime', 'date', 'time'):
            # if we have a other of timedelta, but use pd.NaT here we
            # we are in the wrong path
            if (other is not None and other.dtype == 'timedelta64[ns]' and
                    all(isnull(v) for v in values)):
                values = np.empty(values.shape, dtype=other.dtype)
                values[:] = iNaT

            # a datelike
            elif isinstance(values, pd.DatetimeIndex):
                values = values.to_series()
            elif not (isinstance(values, (np.ndarray, pd.Series)) and
                      is_datetime64_dtype(values)):
                values = tslib.array_to_datetime(values)
        elif inferred_type in ('timedelta', 'timedelta64'):
            # have a timedelta, convert to to ns here
            values = to_timedelta(values, coerce=coerce)
        elif inferred_type == 'integer':
            # py3 compat where dtype is 'm' but is an integer
            if values.dtype.kind == 'm':
                values = values.astype('timedelta64[ns]')
            elif isinstance(values, pd.PeriodIndex):
                values = values.to_timestamp().to_series()
            elif name not in ('__truediv__', '__div__', '__mul__'):
                raise TypeError("incompatible type for a datetime/timedelta "
                                "operation [{0}]".format(name))
        elif isinstance(values[0], pd.DateOffset):
            # handle DateOffsets
            os = np.array([getattr(v, 'delta', None) for v in values])
            mask = isnull(os)
            if mask.any():
                raise TypeError("cannot use a non-absolute DateOffset in "
                                "datetime/timedelta operations [{0}]".format(
                                    ', '.join([com.pprint_thing(v)
                                               for v in values[mask]])))
            values = to_timedelta(os, coerce=coerce)
        elif inferred_type == 'floating':

            # all nan, so ok, use the other dtype (e.g. timedelta or datetime)
            if isnull(values).all():
                values = np.empty(values.shape, dtype=other.dtype)
                values[:] = iNaT
            else:
                raise TypeError(
                    'incompatible type [{0}] for a datetime/timedelta '
                    'operation'.format(np.array(values).dtype))
        else:
            raise TypeError("incompatible type [{0}] for a datetime/timedelta"
                            " operation".format(np.array(values).dtype))

        return values
示例#4
0
    def test_operators_none_as_na(self):
        df = DataFrame({"col1": [2, 5.0, 123, None],
                        "col2": [1, 2, 3, 4]}, dtype=object)

        ops = [operator.add, operator.sub, operator.mul, operator.truediv]

        # since filling converts dtypes from object, changed expected to be
        # object
        for op in ops:
            filled = df.fillna(np.nan)
            result = op(df, 3)
            expected = op(filled, 3).astype(object)
            expected[com.isnull(expected)] = None
            assert_frame_equal(result, expected)

            result = op(df, df)
            expected = op(filled, filled).astype(object)
            expected[com.isnull(expected)] = None
            assert_frame_equal(result, expected)

            result = op(df, df.fillna(7))
            assert_frame_equal(result, expected)

            result = op(df.fillna(7), df)
            assert_frame_equal(result, expected, check_dtype=False)
示例#5
0
文件: ops.py 项目: ghl3/pandas
    def _convert_to_array(self, values, name=None, other=None):
        """converts values to ndarray"""
        from pandas.tseries.timedeltas import to_timedelta

        ovalues = values
        if not is_list_like(values):
            values = np.array([values])

        inferred_type = lib.infer_dtype(values)

        if inferred_type in ('datetime64', 'datetime', 'date', 'time'):
            # if we have a other of timedelta, but use pd.NaT here we
            # we are in the wrong path
            if (other is not None and other.dtype == 'timedelta64[ns]' and
                    all(isnull(v) for v in values)):
                values = np.empty(values.shape, dtype=other.dtype)
                values[:] = iNaT

            # a datelike
            elif isinstance(values, pd.DatetimeIndex):
                values = values.to_series()
            # datetime with tz
            elif isinstance(ovalues, datetime.datetime) and hasattr(ovalues,'tz'):
                values = pd.DatetimeIndex(values)
            # datetime array with tz
            elif com.is_datetimetz(values):
                if isinstance(values, pd.Series):
                    values = values._values
            elif not (isinstance(values, (np.ndarray, pd.Series)) and
                      is_datetime64_dtype(values)):
                values = tslib.array_to_datetime(values)
        elif inferred_type in ('timedelta', 'timedelta64'):
            # have a timedelta, convert to to ns here
            values = to_timedelta(values, errors='coerce')
        elif inferred_type == 'integer':
            # py3 compat where dtype is 'm' but is an integer
            if values.dtype.kind == 'm':
                values = values.astype('timedelta64[ns]')
            elif isinstance(values, pd.PeriodIndex):
                values = values.to_timestamp().to_series()
            elif name not in ('__truediv__', '__div__', '__mul__'):
                raise TypeError("incompatible type for a datetime/timedelta "
                                "operation [{0}]".format(name))
        elif inferred_type == 'floating':
            # all nan, so ok, use the other dtype (e.g. timedelta or datetime)
            if isnull(values).all():
                values = np.empty(values.shape, dtype=other.dtype)
                values[:] = iNaT
            else:
                raise TypeError(
                    'incompatible type [{0}] for a datetime/timedelta '
                    'operation'.format(np.array(values).dtype))
        elif self._is_offset(values):
            return values
        else:
            raise TypeError("incompatible type [{0}] for a datetime/timedelta"
                            " operation".format(np.array(values).dtype))

        return values
示例#6
0
def test_isnull_lists():
    result = isnull([[False]])
    exp = np.array([[False]])
    assert(np.array_equal(result, exp))

    result = isnull([[1],[2]])
    exp = np.array([[False], [False]])
    assert(np.array_equal(result, exp))
示例#7
0
def test_isnull_nat():
    result = isnull([NaT])
    exp = np.array([True])
    assert(np.array_equal(result, exp))

    result = isnull(np.array([NaT], dtype=object))
    exp = np.array([True])
    assert(np.array_equal(result, exp))
示例#8
0
    def _convert_to_array(self, values, name=None, other=None):
        """converts values to ndarray"""
        from pandas.tseries.timedeltas import _possibly_cast_to_timedelta

        coerce = "compat" if pd._np_version_under1p7 else True
        if not is_list_like(values):
            values = np.array([values])
        inferred_type = lib.infer_dtype(values)

        if inferred_type in ("datetime64", "datetime", "date", "time"):
            # if we have a other of timedelta, but use pd.NaT here we
            # we are in the wrong path
            if other is not None and other.dtype == "timedelta64[ns]" and all(isnull(v) for v in values):
                values = np.empty(values.shape, dtype=other.dtype)
                values[:] = tslib.iNaT

            # a datetlike
            elif not (isinstance(values, (pa.Array, pd.Series)) and com.is_datetime64_dtype(values)):
                values = tslib.array_to_datetime(values)
            elif isinstance(values, pd.DatetimeIndex):
                values = values.to_series()
        elif inferred_type in ("timedelta", "timedelta64"):
            # have a timedelta, convert to to ns here
            values = _possibly_cast_to_timedelta(values, coerce=coerce)
        elif inferred_type == "integer":
            # py3 compat where dtype is 'm' but is an integer
            if values.dtype.kind == "m":
                values = values.astype("timedelta64[ns]")
            elif isinstance(values, pd.PeriodIndex):
                values = values.to_timestamp().to_series()
            elif name not in ("__truediv__", "__div__", "__mul__"):
                raise TypeError("incompatible type for a datetime/timedelta " "operation [{0}]".format(name))
        elif isinstance(values[0], pd.DateOffset):
            # handle DateOffsets
            os = pa.array([getattr(v, "delta", None) for v in values])
            mask = isnull(os)
            if mask.any():
                raise TypeError(
                    "cannot use a non-absolute DateOffset in "
                    "datetime/timedelta operations [{0}]".format(", ".join([com.pprint_thing(v) for v in values[mask]]))
                )
            values = _possibly_cast_to_timedelta(os, coerce=coerce)
        elif inferred_type == "floating":

            # all nan, so ok, use the other dtype (e.g. timedelta or datetime)
            if isnull(values).all():
                values = np.empty(values.shape, dtype=other.dtype)
                values[:] = tslib.iNaT
            else:
                raise TypeError(
                    "incompatible type [{0}] for a datetime/timedelta " "operation".format(pa.array(values).dtype)
                )
        else:
            raise TypeError(
                "incompatible type [{0}] for a datetime/timedelta" " operation".format(pa.array(values).dtype)
            )

        return values
示例#9
0
def _possibly_convert_objects(values, convert_dates=True, convert_numeric=True, convert_timedeltas=True, copy=True):
    """ if we have an object dtype, try to coerce dates and/or numbers """

    # if we have passed in a list or scalar
    if isinstance(values, (list, tuple)):
        values = np.array(values, dtype=np.object_)
    if not hasattr(values, "dtype"):
        values = np.array([values], dtype=np.object_)

    # convert dates
    if convert_dates and values.dtype == np.object_:

        # we take an aggressive stance and convert to datetime64[ns]
        if convert_dates == "coerce":
            new_values = _possibly_cast_to_datetime(values, "M8[ns]", errors="coerce")

            # if we are all nans then leave me alone
            if not isnull(new_values).all():
                values = new_values

        else:
            values = lib.maybe_convert_objects(values, convert_datetime=convert_dates)

    # convert timedeltas
    if convert_timedeltas and values.dtype == np.object_:

        if convert_timedeltas == "coerce":
            from pandas.tseries.timedeltas import to_timedelta

            new_values = to_timedelta(values, coerce=True)

            # if we are all nans then leave me alone
            if not isnull(new_values).all():
                values = new_values

        else:
            values = lib.maybe_convert_objects(values, convert_timedelta=convert_timedeltas)

    # convert to numeric
    if values.dtype == np.object_:
        if convert_numeric:
            try:
                new_values = lib.maybe_convert_numeric(values, set(), coerce_numeric=True)

                # if we are all nans then leave me alone
                if not isnull(new_values).all():
                    values = new_values

            except:
                pass
        else:
            # soft-conversion
            values = lib.maybe_convert_objects(values)

    values = values.copy() if copy else values

    return values
示例#10
0
    def fillna(self, fill_value=None, method=None, limit=None, **kwargs):
        """ Fill NA/NaN values using the specified method.

        Parameters
        ----------
        method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None
            Method to use for filling holes in reindexed Series
            pad / ffill: propagate last valid observation forward to next valid
            backfill / bfill: use NEXT valid observation to fill gap
        value : scalar
            Value to use to fill holes (e.g. 0)
        limit : int, default None
            Maximum size gap to forward or backward fill (not implemented yet!)

        Returns
        -------
        filled : Categorical with NA/NaN filled
        """

        if fill_value is None:
            fill_value = np.nan
        if limit is not None:
            raise NotImplementedError

        values = self._codes

        # Make sure that we also get NA in categories
        if self.categories.dtype.kind in ['S', 'O', 'f']:
            if np.nan in self.categories:
                values = values.copy()
                nan_pos = np.where(isnull(self.categories))[0]
                # we only have one NA in categories
                values[values == nan_pos] = -1


        # pad / bfill
        if method is not None:

            values = self.to_dense().reshape(-1,len(self))
            values = com.interpolate_2d(
                values, method, 0, None, fill_value).astype(self.categories.dtype)[0]
            values = _get_codes_for_values(values, self.categories)

        else:

            if not com.isnull(fill_value) and fill_value not in self.categories:
                raise ValueError("fill value must be in categories")

            mask = values==-1
            if mask.any():
                values = values.copy()
                values[mask] = self.categories.get_loc(fill_value)

        return Categorical(values, categories=self.categories, ordered=self.ordered,
                           name=self.name, fastpath=True)
示例#11
0
def test_isnull():
    assert not isnull(1.)
    assert isnull(None)
    assert isnull(np.NaN)
    assert not isnull(np.inf)
    assert not isnull(-np.inf)

    # series
    for s in [tm.makeFloatSeries(),tm.makeStringSeries(),
              tm.makeObjectSeries(),tm.makeTimeSeries(),tm.makePeriodSeries()]:
        assert(isinstance(isnull(s), Series))

    # frame
    for df in [tm.makeTimeDataFrame(),tm.makePeriodFrame(),tm.makeMixedDataFrame()]:
        result = isnull(df)
        expected = df.apply(isnull)
        tm.assert_frame_equal(result, expected)

    # panel
    for p in [ tm.makePanel(), tm.makePeriodPanel(), tm.add_nans(tm.makePanel()) ]:
        result = isnull(p)
        expected = p.apply(isnull)
        tm.assert_panel_equal(result, expected)

    # panel 4d
    for p in [ tm.makePanel4D(), tm.add_nans_panel4d(tm.makePanel4D()) ]:
        result = isnull(p)
        expected = p.apply(isnull)
        tm.assert_panel4d_equal(result, expected)
示例#12
0
文件: strings.py 项目: APWaldo/pandas
def str_cat(arr, others=None, sep=None, na_rep=None):
    """
    Concatenate arrays of strings with given separator

    Parameters
    ----------
    arr : list or array-like
    others : list or array, or list of arrays
    sep : string or None, default None
    na_rep : string or None, default None
        If None, an NA in any array will propagate

    Returns
    -------
    concat : array
    """
    if sep is None:
        sep = ''

    if others is not None:
        arrays = _get_array_list(arr, others)

        n = _length_check(arrays)
        masks = np.array([isnull(x) for x in arrays])
        cats = None

        if na_rep is None:
            na_mask = np.logical_or.reduce(masks, axis=0)

            result = np.empty(n, dtype=object)
            np.putmask(result, na_mask, np.nan)

            notmask = -na_mask

            tuples = zip(*[x[notmask] for x in arrays])
            cats = [sep.join(tup) for tup in tuples]

            result[notmask] = cats
        else:
            for i, x in enumerate(arrays):
                x = np.where(masks[i], na_rep, x)
                if cats is None:
                    cats = x
                else:
                    cats = cats + sep + x

            result = cats

        return result
    else:
        arr = np.asarray(arr, dtype=object)
        mask = isnull(arr)
        if na_rep is None and mask.any():
            return np.nan
        return sep.join(np.where(mask, na_rep, arr))
示例#13
0
    def test_na_handling(self):
        arr = np.arange(0, 0.75, 0.01)
        arr[::3] = np.nan

        labels = cut(arr, 4)
        ex_labels = np.where(com.isnull(arr), np.nan, labels)

        tm.assert_almost_equal(labels, ex_labels)

        labels = cut(arr, 4, labels=False)
        ex_labels = np.where(com.isnull(arr), np.nan, labels)
        tm.assert_almost_equal(labels, ex_labels)
示例#14
0
文件: ops.py 项目: ankravch/pandas
    def na_op(x, y):

        # dispatch to the categorical if we have a categorical
        # in either operand
        if is_categorical_dtype(x):
            return op(x, y)
        elif is_categorical_dtype(y) and not isscalar(y):
            return op(y, x)

        if is_object_dtype(x.dtype):
            result = _comp_method_OBJECT_ARRAY(op, x, y)
        else:

            # we want to compare like types
            # we only want to convert to integer like if
            # we are not NotImplemented, otherwise
            # we would allow datetime64 (but viewed as i8) against
            # integer comparisons
            if is_datetimelike_v_numeric(x, y):
                raise TypeError("invalid type comparison")

            # numpy does not like comparisons vs None
            if isscalar(y) and isnull(y):
                if name == '__ne__':
                    return np.ones(len(x), dtype=bool)
                else:
                    return np.zeros(len(x), dtype=bool)

            # we have a datetime/timedelta and may need to convert
            mask = None
            if (needs_i8_conversion(x) or
                    (not isscalar(y) and needs_i8_conversion(y))):

                if isscalar(y):
                    mask = isnull(x)
                    y = _index.convert_scalar(x, _values_from_object(y))
                else:
                    mask = isnull(x) | isnull(y)
                    y = y.view('i8')
                x = x.view('i8')

            try:
                result = getattr(x, name)(y)
                if result is NotImplemented:
                    raise TypeError("invalid type comparison")
            except AttributeError:
                result = op(x, y)

            if mask is not None and mask.any():
                result[mask] = masker

        return result
示例#15
0
文件: nanops.py 项目: X1mengYu/pandas
    def f(x, y):
        xmask = isnull(x)
        ymask = isnull(y)
        mask = xmask | ymask

        result = op(x, y)

        if mask.any():
            if result.dtype == np.bool_:
                result = result.astype('O')
            np.putmask(result, mask, np.nan)

        return result
示例#16
0
def test_isnull_datetime():
    assert (not isnull(datetime.now()))
    assert notnull(datetime.now())

    idx = date_range('1/1/1990', periods=20)
    assert(notnull(idx).all())

    idx = np.asarray(idx)
    idx[0] = iNaT
    idx = DatetimeIndex(idx)
    mask = isnull(idx)
    assert(mask[0])
    assert(not mask[1:].any())
示例#17
0
文件: nanops.py 项目: cscanlin/pandas
    def f(x, y):
        xmask = isnull(x)
        ymask = isnull(y)
        mask = xmask | ymask

        result = op(x, y)

        if mask.any():
            if is_bool_dtype(result):
                result = result.astype("O")
            np.putmask(result, mask, np.nan)

        return result
示例#18
0
文件: array.py 项目: Allen1203/pandas
    def to_dense(self, fill=None):
        """
        Convert SparseSeries to (dense) Series
        """
        values = self.values

        # fill the nans
        if fill is None:
            fill = self.fill_value
        if not com.isnull(fill):
            values[com.isnull(values)] = fill

        return values
示例#19
0
def assert_almost_equal(a, b, check_less_precise=False):
    if isinstance(a, dict) or isinstance(b, dict):
        return assert_dict_equal(a, b)

    if isinstance(a, compat.string_types):
        assert a == b, "%s != %s" % (a, b)
        return True

    if isiterable(a):
        np.testing.assert_(isiterable(b))
        na, nb = len(a), len(b)
        assert na == nb, "%s != %s" % (na, nb)
        # TODO: Figure out why I thought this needed instance cheacks...
        # if (isinstance(a, np.ndarray) and isinstance(b, np.ndarray) and
        #     np.array_equal(a, b)):
        if np.array_equal(a, b):
            return True
        else:
            for i in range(na):
                assert_almost_equal(a[i], b[i], check_less_precise)
        return True

    err_msg = lambda a, b: 'expected %.5f but got %.5f' % (b, a)

    if isnull(a):
        np.testing.assert_(isnull(b))
        return

    if isinstance(a, (bool, float, int, np.float32)):
        decimal = 5

        # deal with differing dtypes
        if check_less_precise:
            dtype_a = np.dtype(type(a))
            dtype_b = np.dtype(type(b))
            if dtype_a.kind == 'f' and dtype_b == 'f':
                if dtype_a.itemsize <= 4 and dtype_b.itemsize <= 4:
                    decimal = 3

        if np.isinf(a):
            assert np.isinf(b), err_msg(a, b)

        # case for zero
        elif abs(a) < 1e-5:
            np.testing.assert_almost_equal(
                a, b, decimal=decimal, err_msg=err_msg(a, b), verbose=False)
        else:
            np.testing.assert_almost_equal(
                1, a / b, decimal=decimal, err_msg=err_msg(a, b), verbose=False)
    else:
        assert a == b, "%s != %s" % (a, b)
示例#20
0
文件: testing.py 项目: agconti/pandas
def assert_almost_equal(a, b, check_less_precise = False):
    if isinstance(a, dict) or isinstance(b, dict):
        return assert_dict_equal(a, b)

    if isinstance(a, basestring):
        assert a == b, "{0} != {1}".format(a, b)
        return True

    if isiterable(a):
        np.testing.assert_(isiterable(b))
        na, nb = len(a), len(b)
        assert na == nb, "{0} != {1}".format(na, nb)

        if np.array_equal(a, b):
            return True
        else:
            for i in xrange(na):
                assert_almost_equal(a[i], b[i], check_less_precise)
        return True

    err_msg = lambda a, b: 'expected %.5f but got %.5f' % (a, b)

    if isnull(a):
        np.testing.assert_(isnull(b))
        return

    if isinstance(a, (bool, float, int, np.float32)):
        decimal = 5

        # deal with differing dtypes
        if check_less_precise:
            dtype_a = np.dtype(type(a))
            dtype_b = np.dtype(type(b))
            if dtype_a.kind == 'i' and dtype_b == 'i':
                pass
            if dtype_a.kind == 'f' and dtype_b == 'f':
                if dtype_a.itemsize <= 4 and dtype_b.itemsize <= 4:
                    decimal = 3

        if np.isinf(a):
            assert np.isinf(b), err_msg(a, b)

        # case for zero
        elif abs(a) < 1e-5:
            np.testing.assert_almost_equal(
                a, b, decimal=decimal, err_msg=err_msg(a, b), verbose=False)
        else:
            np.testing.assert_almost_equal(
                1, a / b, decimal=decimal, err_msg=err_msg(a, b), verbose=False)
    else:
        assert a == b, "%s != %s" % (a, b)
示例#21
0
    def fillna(self, value=None, method='pad'):
        """
        Fill NaN values using the specified method.

        Parameters
        ----------
        value : any kind (should be same type as array)
            Value to use to fill holes (e.g. 0)

        method : {'backfill', 'bfill', 'pad', 'ffill', None}, default 'pad'
            Method to use for filling holes in reindexed Series

            pad / ffill: propagate last valid observation forward to next valid
            backfill / bfill: use NEXT valid observation to fill gap

        Returns
        -------
        TimeSeries with NaN's filled

        See also
        --------
        reindex, asfreq
        """
        if value is not None:
            newSeries = self.copy()
            newSeries[isnull(newSeries)] = value
            return newSeries
        else: # Using reindex to pad / backfill
            if method is None: # pragma: no cover
                raise ValueError('must specify a fill method')

            method = method.lower()

            if method == 'ffill':
                method = 'pad'
            if method == 'bfill':
                method = 'backfill'

            mask = isnull(self.values)

            if _numpy_lt_151(): # pragma: no cover
                mask = mask.astype(np.uint8)

            if method == 'pad':
                indexer = _tseries.get_pad_indexer(mask)
            elif method == 'backfill':
                indexer = _tseries.get_backfill_indexer(mask)

            new_values = self.values.take(indexer)
            return Series(new_values, index=self.index)
示例#22
0
def _mask_missing(array, missing_values):
    if np.isscalar(missing_values):
        missing_values = [missing_values]

    missing_values = np.array(missing_values, dtype=object)
    if com.isnull(missing_values).any():
        mask = com.isnull(array)
        missing_values = missing_values[com.notnull(missing_values)]

    for v in missing_values:
        if mask is None:
            mask = array == missing_values
        else:
            mask |= array == missing_values
    return mask
示例#23
0
def test_isnull_datetime():
    assert not isnull(datetime.now())
    assert notnull(datetime.now())

    idx = date_range("1/1/1990", periods=20)
    assert notnull(idx).all()

    import pandas.lib as lib

    idx = np.asarray(idx)
    idx[0] = lib.iNaT
    idx = DatetimeIndex(idx)
    mask = isnull(idx)
    assert mask[0]
    assert not mask[1:].any()
示例#24
0
def test_isnull_lists():
    result = isnull([[False]])
    exp = np.array([[False]])
    assert np.array_equal(result, exp)

    result = isnull([[1], [2]])
    exp = np.array([[False], [False]])
    assert np.array_equal(result, exp)

    # list of strings / unicode
    result = isnull(["foo", "bar"])
    assert not result.any()

    result = isnull([u("foo"), u("bar")])
    assert not result.any()
示例#25
0
def test_isnull_lists():
    result = isnull([[False]])
    exp = np.array([[False]])
    assert(np.array_equal(result, exp))

    result = isnull([[1], [2]])
    exp = np.array([[False], [False]])
    assert(np.array_equal(result, exp))

    # list of strings / unicode
    result = isnull(['foo', 'bar'])
    assert(not result.any())

    result = isnull([u('foo'), u('bar')])
    assert(not result.any())
示例#26
0
    def test_na_handling(self):
        arr = np.arange(0, 0.75, 0.01)
        arr[::3] = np.nan

        result = cut(arr, 4)

        result_arr = np.asarray(result)

        ex_arr = np.where(com.isnull(arr), np.nan, result_arr)

        tm.assert_almost_equal(result_arr, ex_arr)

        result = cut(arr, 4, labels=False)
        ex_result = np.where(com.isnull(arr), np.nan, result)
        tm.assert_almost_equal(result, ex_result)
示例#27
0
def test_notnull():
    assert notnull(1.)
    assert not notnull(None)
    assert not notnull(np.NaN)

    with cf.option_context("mode.use_inf_as_null", False):
        assert notnull(np.inf)
        assert notnull(-np.inf)

        arr = np.array([1.5, np.inf, 3.5, -np.inf])
        result = notnull(arr)
        assert result.all()

    with cf.option_context("mode.use_inf_as_null", True):
        assert not notnull(np.inf)
        assert not notnull(-np.inf)

        arr = np.array([1.5, np.inf, 3.5, -np.inf])
        result = notnull(arr)
        assert result.sum() == 2

    with cf.option_context("mode.use_inf_as_null", False):
        for s in [tm.makeFloatSeries(),tm.makeStringSeries(),
                  tm.makeObjectSeries(),tm.makeTimeSeries(),tm.makePeriodSeries()]:
            assert(isinstance(isnull(s), Series))
示例#28
0
文件: tdi.py 项目: AbnerZheng/pandas
    def wrapper(self, other):
        func = getattr(super(TimedeltaIndex, self), opname)
        if _is_convertible_to_td(other):
            other = _to_m8(other)
            result = func(other)
            if com.isnull(other):
                result.fill(nat_result)
        else:
            if not com.is_list_like(other):
                raise TypeError("cannot compare a TimedeltaIndex with type "
                                "{0}".format(type(other)))

            other = TimedeltaIndex(other).values
            result = func(other)
            result = _values_from_object(result)

            if isinstance(other, Index):
                o_mask = other.values.view('i8') == tslib.iNaT
            else:
                o_mask = other.view('i8') == tslib.iNaT

            if o_mask.any():
                result[o_mask] = nat_result

        if self.hasnans:
            result[self._isnan] = nat_result

        # support of bool dtype indexers
        if com.is_bool_dtype(result):
            return result
        return Index(result)
示例#29
0
def _mask_missing(array, missing_values):
    if not isinstance(missing_values, (list, np.ndarray)):
        missing_values = [missing_values]

    mask = None
    missing_values = np.array(missing_values, dtype=object)
    if com.isnull(missing_values).any():
        mask = com.isnull(array)
        missing_values = missing_values[com.notnull(missing_values)]

    for v in missing_values:
        if mask is None:
            mask = array == missing_values
        else:
            mask |= array == missing_values
    return mask
示例#30
0
    def get_result(self):
        # TODO: find a better way than this masking business

        values, value_mask = self.get_new_values()
        columns = self.get_new_columns()
        index = self.get_new_index()

        # filter out missing levels
        if values.shape[1] > 0:
            col_inds, obs_ids = _compress_group_index(self.sorted_labels[-1])
            # rare case, level values not observed
            if len(obs_ids) < self.full_shape[1]:
                inds = (value_mask.sum(0) > 0).nonzero()[0]
                values = com.take_nd(values, inds, axis=1)
                columns = columns[inds]

        # we might have a missing index
        if len(index) != values.shape[0]:
            mask = isnull(index)
            if mask.any():
                l = np.arange(len(index))
                values, orig_values = np.empty((len(index),values.shape[1])), values
                values.fill(np.nan)
                values_indexer = com._ensure_int64(l[~mask])
                for i, j in enumerate(values_indexer):
                    values[j] = orig_values[i]
            else:
                index = index.take(self.unique_groups)
        
        return DataFrame(values, index=index, columns=columns)
示例#31
0
def test_isnull():
    assert not isnull(1.)
    assert isnull(None)
    assert isnull(np.NaN)
    assert not isnull(np.inf)
    assert not isnull(-np.inf)

    # series
    for s in [
            tm.makeFloatSeries(),
            tm.makeStringSeries(),
            tm.makeObjectSeries(),
            tm.makeTimeSeries(),
            tm.makePeriodSeries()
    ]:
        assert (isinstance(isnull(s), Series))

    # frame
    for df in [
            tm.makeTimeDataFrame(),
            tm.makePeriodFrame(),
            tm.makeMixedDataFrame()
    ]:
        result = isnull(df)
        expected = df.apply(isnull)
        tm.assert_frame_equal(result, expected)

    # panel
    for p in [
            tm.makePanel(),
            tm.makePeriodPanel(),
            tm.add_nans(tm.makePanel())
    ]:
        result = isnull(p)
        expected = p.apply(isnull)
        tm.assert_panel_equal(result, expected)

    # panel 4d
    for p in [tm.makePanel4D(), tm.add_nans_panel4d(tm.makePanel4D())]:
        result = isnull(p)
        expected = p.apply(isnull)
        tm.assert_panel4d_equal(result, expected)
示例#32
0
文件: merge.py 项目: tdhock/pandas
    def get_result(self):

        # series only
        if self._is_series:

            # stack blocks
            if self.axis == 0:
                new_data = com._concat_compat([x._values for x in self.objs])
                name = com._consensus_name_attr(self.objs)
                return Series(new_data, index=self.new_axes[0], name=name).__finalize__(self, method='concat')

            # combine as columns in a frame
            else:
                data = dict(zip(range(len(self.objs)), self.objs))
                index, columns = self.new_axes
                tmpdf = DataFrame(data, index=index)
                # checks if the column variable already stores valid column names (because set via the 'key' argument
                # in the 'concat' function call. If that's not the case, use the series names as column names
                if columns.equals(Index(np.arange(len(self.objs)))) and not self.ignore_index:
                    columns = np.array([ data[i].name for i in range(len(data)) ], dtype='object')
                    indexer = isnull(columns)
                    if indexer.any():
                        columns[indexer] = np.arange(len(indexer[indexer]))
                tmpdf.columns = columns
                return tmpdf.__finalize__(self, method='concat')

        # combine block managers
        else:
            mgrs_indexers = []
            for obj in self.objs:
                mgr = obj._data
                indexers = {}
                for ax, new_labels in enumerate(self.new_axes):
                    if ax == self.axis:
                        # Suppress reindexing on concat axis
                        continue

                    obj_labels = mgr.axes[ax]
                    if not new_labels.equals(obj_labels):
                        indexers[ax] = obj_labels.reindex(new_labels)[1]

                mgrs_indexers.append((obj._data, indexers))

            new_data = concatenate_block_managers(
                mgrs_indexers, self.new_axes, concat_axis=self.axis, copy=self.copy)
            if not self.copy:
                new_data._consolidate_inplace()

            return self.objs[0]._from_axes(new_data, self.new_axes).__finalize__(self, method='concat')
示例#33
0
    def wrapper(self, other):
        if isinstance(other, pd.Series):
            name = _maybe_match_name(self, other)
            if len(self) != len(other):
                raise ValueError('Series lengths must match to compare')
            return self._constructor(na_op(self.values, other.values),
                                     index=self.index,
                                     name=name)
        elif isinstance(other, pd.DataFrame):  # pragma: no cover
            return NotImplemented
        elif isinstance(other, (pa.Array, pd.Index)):
            if len(self) != len(other):
                raise ValueError('Lengths must match to compare')
            return self._constructor(na_op(self.values, np.asarray(other)),
                                     index=self.index).__finalize__(self)
        elif isinstance(other, pd.Categorical):
            if not com.is_categorical_dtype(self):
                msg = "Cannot compare a Categorical for op {op} with Series of dtype {typ}.\n"\
                      "If you want to compare values, use 'series <op> np.asarray(other)'."
                raise TypeError(msg.format(op=op, typ=self.dtype))
        else:

            mask = isnull(self)

            values = self.get_values()
            other = _index.convert_scalar(values, _values_from_object(other))

            if issubclass(values.dtype.type, np.datetime64):
                values = values.view('i8')

            # scalars
            res = na_op(values, other)
            if np.isscalar(res):
                raise TypeError('Could not compare %s type with Series' %
                                type(other))

            # always return a full value series here
            res = _values_from_object(res)

            res = pd.Series(res,
                            index=self.index,
                            name=self.name,
                            dtype='bool')

            # mask out the invalids
            if mask.any():
                res[mask] = masker

            return res
示例#34
0
def value_counts(values, sort=True, ascending=False, normalize=False):
    """
    Compute a histogram of the counts of non-null values

    Parameters
    ----------
    values : ndarray (1-d)
    sort : boolean, default True
        Sort by values
    ascending : boolean, default False
        Sort in ascending order
    normalize: boolean, default False
        If True then compute a relative histogram

    Returns
    -------
    value_counts : Series
    """
    from pandas.core.series import Series

    values = np.asarray(values)

    if com.is_integer_dtype(values.dtype):
        values = com._ensure_int64(values)
        keys, counts = htable.value_count_int64(values)
    elif issubclass(values.dtype.type, (np.datetime64,np.timedelta64)):

        dtype = values.dtype
        values = values.view(np.int64)
        keys, counts = htable.value_count_int64(values)

        # convert the keys back to the dtype we came in
        keys = Series(keys,dtype=dtype)
    else:
        mask = com.isnull(values)
        values = com._ensure_object(values)
        keys, counts = htable.value_count_object(values, mask)

    result = Series(counts, index=keys)

    if sort:
        result.sort()
        if not ascending:
            result = result[::-1]

    if normalize:
        result = result / float(values.size)

    return result
示例#35
0
def nanvar(values, axis=None, skipna=True, ddof=1):
    if not _is_floating_dtype(values):
        values = values.astype('f8')

    mask = isnull(values)

    count, d = _get_counts_nanvar(mask, axis, ddof)

    if skipna:
        values = values.copy()
        np.putmask(values, mask, 0)

    X = _ensure_numeric(values.sum(axis))
    XX = _ensure_numeric((values**2).sum(axis))
    return np.fabs((XX - X**2 / count) / d)
示例#36
0
 def dropna(self, axis=0, inplace=False, **kwargs):
     """
     Analogous to Series.dropna. If fill_value=NaN, returns a dense Series
     """
     # TODO: make more efficient
     axis = self._get_axis_number(axis or 0)
     dense_valid = self.to_dense().valid()
     if inplace:
         raise NotImplementedError("Cannot perform inplace dropna"
                                   " operations on a SparseSeries")
     if isnull(self.fill_value):
         return dense_valid
     else:
         dense_valid = dense_valid[dense_valid != self.fill_value]
         return dense_valid.to_sparse(fill_value=self.fill_value)
示例#37
0
def _nanvar(values, axis=None, skipna=True, ddof=1):
    # private nanvar calculator
    mask = isnull(values)
    if is_any_int_dtype(values):
        values = values.astype('f8')

    count, d = _get_counts_nanvar(mask, axis, ddof)

    if skipna:
        values = values.copy()
        np.putmask(values, mask, 0)

    X = _ensure_numeric(values.sum(axis))
    XX = _ensure_numeric((values**2).sum(axis))
    return np.fabs((XX - X**2 / count) / d)
示例#38
0
文件: nanops.py 项目: ricarpor/pandas
def _nanvar(values, axis=None, skipna=True, ddof=1):
    mask = isnull(values)

    if axis is not None:
        count = (values.shape[axis] - mask.sum(axis)).astype(float)
    else:
        count = float(values.size - mask.sum())

    if skipna:
        values = values.copy()
        np.putmask(values, mask, 0)

    X = _ensure_numeric(values.sum(axis))
    XX = _ensure_numeric((values**2).sum(axis))
    return np.fabs((XX - X**2 / count) / (count - ddof))
示例#39
0
    def _evaluate_with_timedelta_like(self, other, op, opstr):

        # allow division by a timedelta
        if opstr in ['__div__','__truediv__']:
            if _is_convertible_to_td(other):
                other = Timedelta(other)
                if isnull(other):
                    raise NotImplementedError("division by pd.NaT not implemented")

                i8 = self.asi8
                result = i8/float(other.value)
                result = self._maybe_mask_results(result,convert='float64')
                return Index(result,name=self.name,copy=False)

        return NotImplemented
示例#40
0
文件: array.py 项目: yaduart/pandas
    def fillna(self, value, downcast=None):
        if downcast is not None:
            raise NotImplementedError

        if issubclass(self.dtype.type, np.floating):
            value = float(value)

        if self._null_fill_value:
            return self._simple_new(self.sp_values, self.sp_index,
                                    fill_value=value)
        else:
            new_values = self.sp_values.copy()
            new_values[com.isnull(new_values)] = value
            return self._simple_new(new_values, self.sp_index,
                                    fill_value=self.fill_value)
示例#41
0
def _check_bool_indexer(ax, key):
    # boolean indexing, need to check that the data are aligned, otherwise
    # disallowed
    result = key
    if _is_series(key) and key.dtype == np.bool_:
        if not key.index.equals(ax):
            result = key.reindex(ax)

    if isinstance(result, np.ndarray) and result.dtype == np.object_:
        mask = com.isnull(result)
        if mask.any():
            raise IndexingError('cannot index with vector containing '
                                'NA / NaN values')

    return result
示例#42
0
文件: nanops.py 项目: rockg/pandas
def _get_values(values,
                skipna,
                fill_value=None,
                fill_value_typ=None,
                isfinite=False,
                copy=True):
    """ utility to get the values view, mask, dtype
    if necessary copy and mask using the specified fill_value
    copy = True will force the copy
    """
    values = _values_from_object(values)
    if isfinite:
        mask = _isfinite(values)
    else:
        mask = isnull(values)

    dtype = values.dtype
    dtype_ok = _na_ok_dtype(dtype)

    # get our fill value (in case we need to provide an alternative
    # dtype for it)
    fill_value = _get_fill_value(dtype,
                                 fill_value=fill_value,
                                 fill_value_typ=fill_value_typ)

    if skipna:
        if copy:
            values = values.copy()
        if dtype_ok:
            np.putmask(values, mask, fill_value)

        # promote if needed
        else:
            values, changed = _maybe_upcast_putmask(values, mask, fill_value)

    elif copy:
        values = values.copy()

    values = _view_if_needed(values)

    # return a platform independent precision dtype
    dtype_max = dtype
    if is_integer_dtype(dtype) or is_bool_dtype(dtype):
        dtype_max = np.int64
    elif is_float_dtype(dtype):
        dtype_max = np.float64

    return values, mask, dtype, dtype_max
示例#43
0
文件: nanops.py 项目: bshanks/pandas
def nanvar(values, axis=None, skipna=True, copy=True, ddof=1):
    mask = isnull(values)

    if axis is not None:
        count = (values.shape[axis] - mask.sum(axis)).astype(float)
    else:
        count = float(values.size - mask.sum())

    if skipna:
        if copy:
            values = values.copy()
        np.putmask(values, mask, 0)

    X = values.sum(axis)
    XX = (values**2).sum(axis)
    return (XX - X**2 / count) / (count - ddof)
示例#44
0
    def _format_native_types(self,
                             na_rep=u('NaT'),
                             date_format=None,
                             **kwargs):

        values = np.array(list(self), dtype=object)
        mask = isnull(self.values)
        values[mask] = na_rep
        imask = ~mask

        if date_format:
            formatter = lambda dt: dt.strftime(date_format)
        else:
            formatter = lambda dt: u('%s') % dt
        values[imask] = np.array([formatter(dt) for dt in values[imask]])
        return values
示例#45
0
def test_isnull():
    assert not isnull(1.)
    assert isnull(None)
    assert isnull(np.NaN)
    assert isnull(np.inf)
    assert isnull(-np.inf)

    float_series = Series(np.random.randn(5))
    obj_series = Series(np.random.randn(5), dtype=object)
    assert (isinstance(isnull(float_series), Series))
    assert (isinstance(isnull(obj_series), Series))

    # call on DataFrame
    df = DataFrame(np.random.randn(10, 5))
    df['foo'] = 'bar'
    result = isnull(df)
    expected = result.apply(isnull)
    tm.assert_frame_equal(result, expected)
示例#46
0
 def _validate_levels(cls, levels):
     """" Validates that we have good levels """
     if not isinstance(levels, Index):
         dtype = None
         if not hasattr(levels, "dtype"):
             levels = _convert_to_list_like(levels)
             # on levels with NaNs, int values would be converted to float. Use "object" dtype
             # to prevent this.
             if isnull(levels).any():
                 without_na = np.array([x for x in levels if com.notnull(x)])
                 with_na = np.array(levels)
                 if with_na.dtype != without_na.dtype:
                     dtype = "object"
         levels = Index(levels, dtype=dtype)
     if not levels.is_unique:
         raise ValueError('Categorical levels must be unique')
     return levels
示例#47
0
    def fill(self, value=None, method='pad'):
        """
        Fill NaN values using the specified method.

        Member Series / TimeSeries are filled separately.

        Parameters
        ----------
        value : any kind (should be same type as array)
            Value to use to fill holes (e.g. 0)

        method : {'backfill', 'pad', None}
            Method to use for filling holes in new inde

        Returns
        -------
        y : DataMatrix

        See also
        --------
        DataMatrix.reindex, DataMatrix.asfreq
        """
        if value is None:
            result = {}
            series = self._series
            for col, s in series.iteritems():
                result[col] = s.fill(method=method, value=value)

            return DataMatrix(result, index=self.index, objects=self.objects)
        else:
            # Float type values
            if len(self.columns) == 0:
                return self

            vals = self.values.copy()
            vals.flat[common.isnull(vals.ravel())] = value

            objects = None

            if self.objects is not None:
                objects = self.objects.copy()

            return DataMatrix(vals,
                              index=self.index,
                              columns=self.columns,
                              objects=objects)
示例#48
0
    def take_nd(self, indexer, allow_fill=True, fill_value=None):
        """ Take the codes by the indexer, fill with the fill_value. """

        # filling must always be None/nan here
        # but is passed thru internally
        assert isnull(fill_value)

        codes = com.take_1d(self._codes,
                            indexer,
                            allow_fill=True,
                            fill_value=-1)
        result = Categorical(codes,
                             levels=self.levels,
                             ordered=self.ordered,
                             name=self.name,
                             fastpath=True)
        return result
示例#49
0
 def _validate_categories(cls, categories):
     """" Validates that we have good categories """
     if not isinstance(categories, Index):
         dtype = None
         if not hasattr(categories, "dtype"):
             categories = _convert_to_list_like(categories)
             # on categories with NaNs, int values would be converted to float.
             # Use "object" dtype to prevent this.
             if isnull(categories).any():
                 without_na = np.array([x for x in categories if com.notnull(x)])
                 with_na = np.array(categories)
                 if with_na.dtype != without_na.dtype:
                     dtype = "object"
         categories = Index(categories, dtype=dtype)
     if not categories.is_unique:
         raise ValueError('Categorical categories must be unique')
     return categories
示例#50
0
def _check_bool_indexer(ax, key):
    # boolean indexing, need to check that the data are aligned, otherwise
    # disallowed

    # this function assumes that com._is_bool_indexer(key) == True

    result = key
    if _is_series(key) and not key.index.equals(ax):
        result = result.reindex(ax)
        mask = com.isnull(result)
        if mask.any():
            raise IndexingError('Unalignable boolean Series key provided')

    # com._is_bool_indexer has already checked for nulls in the case of an
    # object array key, so no check needed here
    result = np.asarray(result, dtype=bool)
    return result
示例#51
0
    def fillna(self, fill_value=None, method=None, limit=None, **kwargs):
        """ Fill NA/NaN values using the specified method.

        Parameters
        ----------
        method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None
            Method to use for filling holes in reindexed Series
            pad / ffill: propagate last valid observation forward to next valid
            backfill / bfill: use NEXT valid observation to fill gap
        value : scalar
            Value to use to fill holes (e.g. 0)
        limit : int, default None
            Maximum size gap to forward or backward fill (not implemented yet!)

        Returns
        -------
        filled : Categorical with NA/NaN filled
        """

        if fill_value is None:
            fill_value = np.nan
        if limit is not None:
            raise NotImplementedError

        values = self._codes

        # pad / bfill
        if method is not None:

            values = self.to_dense().reshape(-1,len(self))
            values = com.interpolate_2d(
                values, method, 0, None, fill_value).astype(self.levels.dtype)[0]
            values = _get_codes_for_values(values, self.levels)

        else:

            if not com.isnull(fill_value) and fill_value not in self.levels:
                raise ValueError("fill value must be in levels")

            mask = self._codes==-1
            if mask.any():
                values = self._codes.copy()
                values[mask] = self.levels.get_loc(fill_value)

        return Categorical(values, levels=self.levels, ordered=self.ordered,
                           name=self.name, fastpath=True)
示例#52
0
def test_nan_to_nat_conversions():

    df = DataFrame(dict({
        'A' : np.asarray(list(range(10)),dtype='float64'),
        'B' : Timestamp('20010101') }))
    df.iloc[3:6,:] = np.nan
    result = df.loc[4,'B'].value
    assert(result == iNaT)

    values = df['B'].values
    result, changed = com._maybe_upcast_indexer(values,tuple([slice(8,9)]),np.nan)
    assert(isnull(result[8]))

    # numpy < 1.7.0 is wrong
    from distutils.version import LooseVersion
    if LooseVersion(np.__version__) >= '1.7.0':
        assert(result[8] == np.datetime64('NaT'))
示例#53
0
def _bins_to_cuts(x, bins, right=True, labels=None, retbins=False,
                  precision=3, name=None, include_lowest=False):
    if name is None and isinstance(x, Series):
        name = x.name
    x = np.asarray(x)

    side = 'left' if right else 'right'
    ids = bins.searchsorted(x, side=side)

    if include_lowest:
        ids[x == bins[0]] = 1

    na_mask = com.isnull(x) | (ids == len(bins)) | (ids == 0)
    has_nas = na_mask.any()

    if labels is not False:
        if labels is None:
            fmt = lambda v: _format_label(v, precision=precision)
            if right:
                levels = ['(%s, %s]' % (fmt(a), fmt(b))
                           for a, b in zip(bins, bins[1:])]
                if include_lowest:
                    levels[0] = '[' + levels[0][1:]
            else:
                levels = ['[%s, %s)' % (fmt(a), fmt(b))
                           for a, b in zip(bins, bins[1:])]

        else:
            if len(labels) != len(bins) - 1:
                raise ValueError('Bin labels must be one fewer than '
                                 'the number of bin edges')
            levels = labels

        levels = np.asarray(levels, dtype=object)
        np.putmask(ids, na_mask, 0)
        fac = Categorical(ids - 1, levels, name=name)
    else:
        fac = ids - 1
        if has_nas:
            fac = fac.astype(np.float64)
            np.putmask(fac, na_mask, np.nan)

    if not retbins:
        return fac

    return fac, bins
示例#54
0
def test_nan_to_nat_conversions():

    df = DataFrame(dict({
        'A' : np.asarray(lrange(10),dtype='float64'),
        'B' : Timestamp('20010101') }))
    df.iloc[3:6,:] = np.nan
    result = df.loc[4,'B'].value
    assert(result == iNaT)

    s = df['B'].copy()
    s._data = s._data.setitem(indexer=tuple([slice(8,9)]),value=np.nan)
    assert(isnull(s[8]))

    # numpy < 1.7.0 is wrong
    from distutils.version import LooseVersion
    if LooseVersion(np.__version__) >= '1.7.0':
        assert(s[8].value == np.datetime64('NaT').astype(np.int64))
示例#55
0
文件: ops.py 项目: quaintm/pandas
    def wrapper(self, other):
        if isinstance(other, pd.Series):
            name = _maybe_match_name(self, other)
            if len(self) != len(other):
                raise ValueError('Series lengths must match to compare')
            return self._constructor(na_op(self.values, other.values),
                                     index=self.index,
                                     name=name)
        elif isinstance(other, pd.DataFrame):  # pragma: no cover
            return NotImplemented
        elif isinstance(other, (pa.Array, pd.Series)):
            if len(self) != len(other):
                raise ValueError('Lengths must match to compare')
            return self._constructor(na_op(self.values, np.asarray(other)),
                                     index=self.index,
                                     name=self.name)
        else:

            mask = isnull(self)

            values = self.values
            other = _index.convert_scalar(values, other)

            if issubclass(values.dtype.type, np.datetime64):
                values = values.view('i8')

            # scalars
            res = na_op(values, other)
            if np.isscalar(res):
                raise TypeError('Could not compare %s type with Series' %
                                type(other))

            # always return a full value series here
            res = _values_from_object(res)

            res = pd.Series(res,
                            index=self.index,
                            name=self.name,
                            dtype='bool')

            # mask out the invalids
            if mask.any():
                res[mask] = masker

            return res
示例#56
0
文件: nanops.py 项目: ricarpor/pandas
def _nanmean(values, axis=None, skipna=True):
    mask = isnull(values)

    if skipna and not issubclass(values.dtype.type, np.integer):
        values = values.copy()
        np.putmask(values, mask, 0)

    the_sum = _ensure_numeric(values.sum(axis))
    count = _get_counts(mask, axis)

    if axis is not None:
        the_mean = the_sum / count
        ct_mask = count == 0
        if ct_mask.any():
            the_mean[ct_mask] = np.nan
    else:
        the_mean = the_sum / count if count > 0 else np.nan
    return the_mean
示例#57
0
文件: format.py 项目: sbenzev/pandas
def _format_datetime64(x, tz=None):
    if isnull(x):
        return 'NaT'

    stamp = lib.Timestamp(x, tz=tz)
    base = stamp.strftime('%Y-%m-%d %H:%M:%S')

    fraction = stamp.microsecond * 1000 + stamp.nanosecond
    digits = 9

    if fraction == 0:
        return base

    while (fraction % 10) == 0:
        fraction /= 10
        digits -= 1

    return base + ('.%%.%id' % digits) % fraction
示例#58
0
def _nanmax(values, axis=None, skipna=True):
    mask = isnull(values)
    if skipna and not issubclass(values.dtype.type, np.integer):
        values = values.copy()
        np.putmask(values, mask, -np.inf)
    # numpy 1.6.1 workaround in Python 3.x
    if (values.dtype == np.object_
            and sys.version_info[0] >= 3):  # pragma: no cover
        import __builtin__

        if values.ndim > 1:
            apply_ax = axis if axis is not None else 0
            result = np.apply_along_axis(__builtin__.max, apply_ax, values)
        else:
            result = __builtin__.max(values)
    else:
        result = values.max(axis)
    return _maybe_null_out(result, axis, mask)
示例#59
0
def test_isnull():
    assert not isnull(1.)
    assert isnull(None)
    assert isnull(np.NaN)
    assert not isnull(np.inf)
    assert not isnull(-np.inf)

    for s in [tm.makeFloatSeries(),tm.makeStringSeries(),
              tm.makeObjectSeries(),tm.makeTimeSeries(),tm.makePeriodSeries()]:
        assert(isinstance(isnull(s), np.ndarray))

    # call on DataFrame
    df = DataFrame(np.random.randn(10, 5))
    df['foo'] = 'bar'
    result = isnull(df)
    expected = result.apply(isnull)
    tm.assert_frame_equal(result, expected)
示例#60
0
文件: ops.py 项目: quaintm/pandas
    def _convert_to_array(self, values, name=None):
        """converts values to ndarray"""
        from pandas.tseries.timedeltas import _possibly_cast_to_timedelta

        coerce = 'compat' if pd._np_version_under1p7 else True
        if not is_list_like(values):
            values = np.array([values])
        inferred_type = lib.infer_dtype(values)
        if inferred_type in ('datetime64', 'datetime', 'date', 'time'):
            # a datetlike
            if not (isinstance(values, (pa.Array, pd.Series))
                    and com.is_datetime64_dtype(values)):
                values = tslib.array_to_datetime(values)
            elif isinstance(values, pd.DatetimeIndex):
                values = values.to_series()
        elif inferred_type in ('timedelta', 'timedelta64'):
            # have a timedelta, convert to to ns here
            values = _possibly_cast_to_timedelta(values, coerce=coerce)
        elif inferred_type == 'integer':
            # py3 compat where dtype is 'm' but is an integer
            if values.dtype.kind == 'm':
                values = values.astype('timedelta64[ns]')
            elif isinstance(values, pd.PeriodIndex):
                values = values.to_timestamp().to_series()
            elif name not in ('__truediv__', '__div__', '__mul__'):
                raise TypeError("incompatible type for a datetime/timedelta "
                                "operation [{0}]".format(name))
        elif isinstance(values[0], pd.DateOffset):
            # handle DateOffsets
            os = pa.array([getattr(v, 'delta', None) for v in values])
            mask = isnull(os)
            if mask.any():
                raise TypeError(
                    "cannot use a non-absolute DateOffset in "
                    "datetime/timedelta operations [{0}]".format(','.join(
                        [com.pprint_thing(v) for v in values[mask]])))
            values = _possibly_cast_to_timedelta(os, coerce=coerce)
        else:
            raise TypeError(
                "incompatible type [{0}] for a datetime/timedelta operation".
                format(pa.array(values).dtype))

        return values