Пример #1
0
    def test_value_counts_bins(self):
        s = [1, 2, 3, 4]
        result = algos.value_counts(s, bins=1)
        self.assertEqual(result.tolist(), [4])
        self.assertEqual(result.index[0], 0.997)

        result = algos.value_counts(s, bins=2, sort=False)
        self.assertEqual(result.tolist(), [2, 2])
        self.assertEqual(result.index[0], 0.997)
        self.assertEqual(result.index[1], 2.5)
Пример #2
0
    def test_value_counts(self):
        from pandas.tools.tile import cut

        arr = np.random.randn(4)
        factor = cut(arr, 4)

        tm.assert_isinstance(factor, Categorical)

        result = algos.value_counts(factor)
        expected = algos.value_counts(np.asarray(factor))
        tm.assert_series_equal(result, expected)
Пример #3
0
    def test_value_counts_dtypes(self):
        result = algos.value_counts([1, 1.])
        self.assertEqual(len(result), 1)

        result = algos.value_counts([1, 1.], bins=1)
        self.assertEqual(len(result), 1)

        result = algos.value_counts(Series([1, 1., '1']))  # object
        self.assertEqual(len(result), 2)

        self.assertRaises(TypeError, lambda s: algos.value_counts(s, bins=1), ['1', 1])
Пример #4
0
    def test_value_counts_nat(self):
        td = Series([np.timedelta64(10000), pd.NaT], dtype='timedelta64[ns]')
        dt = pd.to_datetime(['NaT', '2014-01-01'])

        for s in [td, dt]:
            vc = algos.value_counts(s)
            vc_with_na = algos.value_counts(s, dropna=False)
            self.assertEqual(len(vc), 1)
            self.assertEqual(len(vc_with_na), 2)

        exp_dt = pd.Series({pd.Timestamp('2014-01-01 00:00:00'): 1})
        tm.assert_series_equal(algos.value_counts(dt), exp_dt)
Пример #5
0
    def value_counts(self, normalize=False, sort=True, ascending=False,
                     bins=None, dropna=True):
        """
        Returns object containing counts of unique values.

        The resulting object will be in descending order so that the
        first element is the most frequently-occurring element.
        Excludes NA values by default.

        Parameters
        ----------
        normalize : boolean, default False
            If True then the object returned will contain the relative
            frequencies of the unique values.
        sort : boolean, default True
            Sort by values
        ascending : boolean, default False
            Sort in ascending order
        bins : integer, optional
            Rather than count values, group them into half-open bins,
            a convenience for pd.cut, only works with numeric data
        dropna : boolean, default True
            Don't include counts of NaN.

        Returns
        -------
        counts : Series
        """
        from pandas.core.algorithms import value_counts
        result = value_counts(self, sort=sort, ascending=ascending,
                              normalize=normalize, bins=bins, dropna=dropna)
        return result
Пример #6
0
    def value_counts(self, dropna=False):
        from pandas import Series, PeriodIndex

        if dropna:
            values = self[~self.isna()]._data
        else:
            values = self._data

        cls = type(self)

        result = algos.value_counts(values, sort=False)
        index = PeriodIndex(cls(result.index, freq=self.freq),
                            name=result.index.name)
        return Series(result.values, index=index, name=result.name)
Пример #7
0
    def test_value_counts(self):
        np.random.seed(1234)
        from pandas.tools.tile import cut

        arr = np.random.randn(4)
        factor = cut(arr, 4)

        tm.assertIsInstance(factor, Categorical)
        result = algos.value_counts(factor)
        cats = ['(-1.194, -0.535]', '(-0.535, 0.121]', '(0.121, 0.777]',
                '(0.777, 1.433]']
        expected_index = CategoricalIndex(cats, cats, ordered=True)
        expected = Series([1, 1, 1, 1], index=expected_index)
        tm.assert_series_equal(result.sort_index(), expected.sort_index())
Пример #8
0
    def value_counts(self, normalize=False, sort=True, ascending=False, bins=None, dropna=True):
        """
        Returns object containing counts of unique values.

        The resulting object will be in descending order so that the
        first element is the most frequently-occurring element.
        Excludes NA values by default.

        Parameters
        ----------
        normalize : boolean, default False
            If True then the object returned will contain the relative
            frequencies of the unique values.
        sort : boolean, default True
            Sort by values
        ascending : boolean, default False
            Sort in ascending order
        bins : integer, optional
            Rather than count values, group them into half-open bins,
            a convenience for pd.cut, only works with numeric data
        dropna : boolean, default True
            Don't include counts of NaN.

        Returns
        -------
        counts : Series
        """
        from pandas.core.algorithms import value_counts
        from pandas.tseries.api import DatetimeIndex, PeriodIndex
        from pandas import Index

        result = value_counts(self, sort=sort, ascending=ascending, normalize=normalize, bins=bins, dropna=dropna)

        if isinstance(self, PeriodIndex):
            # preserve freq
            result.index = self._simple_new(result.index.values, freq=self.freq)
        elif isinstance(self, DatetimeIndex):
            result.index = self._simple_new(result.index.values, tz=getattr(self, "tz", None))

        if isinstance(self, Index):
            return result
        else:
            return self._constructor(result)
Пример #9
0
    def value_counts(self, dropna=True):
        """
        Returns a Series containing counts of each interval.

        Parameters
        ----------
        dropna : boolean, default True
            Don't include counts of NaN.

        Returns
        -------
        counts : Series

        See Also
        --------
        Series.value_counts
        """
        # TODO: implement this is a non-naive way!
        from pandas.core.algorithms import value_counts
        return value_counts(np.asarray(self), dropna=dropna)
Пример #10
0
    def value_counts(self, dropna=True):
        """
        Returns a Series containing counts of each interval.

        Parameters
        ----------
        dropna : boolean, default True
            Don't include counts of NaN.

        Returns
        -------
        counts : Series

        See Also
        --------
        Series.value_counts
        """
        # TODO: implement this is a non-naive way!
        from pandas.core.algorithms import value_counts
        return value_counts(np.asarray(self), dropna=dropna)
Пример #11
0
    def value_counts(self, normalize=False, sort=True, ascending=False,
                     bins=None, dropna=True):
        """
        Returns object containing counts of unique values.

        The resulting object will be in descending order so that the
        first element is the most frequently-occurring element.
        Excludes NA values by default.

        Parameters
        ----------
        normalize : boolean, default False
            If True then the object returned will contain the relative
            frequencies of the unique values.
        sort : boolean, default True
            Sort by values
        ascending : boolean, default False
            Sort in ascending order
        bins : integer, optional
            Rather than count values, group them into half-open bins,
            a convenience for pd.cut, only works with numeric data
        dropna : boolean, default True
            Don't include counts of NaN.

        Returns
        -------
        counts : Series
        """
        from pandas.core.algorithms import value_counts
        from pandas.tseries.api import DatetimeIndex, PeriodIndex
        result = value_counts(self, sort=sort, ascending=ascending,
                              normalize=normalize, bins=bins, dropna=dropna)

        if isinstance(self, PeriodIndex):
            # preserve freq
            result.index = self._simple_new(result.index.values,
                                            freq=self.freq)
        elif isinstance(self, DatetimeIndex):
            result.index = self._simple_new(result.index.values,
                                            tz=getattr(self, 'tz', None))
        return result
Пример #12
0
    def value_counts(self,
                     normalize=False,
                     sort=True,
                     ascending=False,
                     bins=None,
                     dropna=True):
        """
        Returns object containing counts of unique values.

        The resulting object will be in descending order so that the
        first element is the most frequently-occurring element.
        Excludes NA values by default.

        Parameters
        ----------
        normalize : boolean, default False
            If True then the object returned will contain the relative
            frequencies of the unique values.
        sort : boolean, default True
            Sort by values
        ascending : boolean, default False
            Sort in ascending order
        bins : integer, optional
            Rather than count values, group them into half-open bins,
            a convenience for pd.cut, only works with numeric data
        dropna : boolean, default True
            Don't include counts of NaN.

        Returns
        -------
        counts : Series
        """
        from pandas.core.algorithms import value_counts
        return value_counts(self.values,
                            sort=sort,
                            ascending=ascending,
                            normalize=normalize,
                            bins=bins,
                            dropna=dropna)
Пример #13
0
    def value_counts(self,
                     normalize=False,
                     sort=True,
                     ascending=False,
                     bins=None,
                     dropna=True):
        """
        Return a Series containing counts of unique values.

        The resulting object will be in descending order so that the
        first element is the most frequently-occurring element.
        Excludes NA values by default.

        Parameters
        ----------
        normalize : boolean, default False
            If True then the object returned will contain the relative
            frequencies of the unique values.
        sort : boolean, default True
            Sort by frequencies.
        ascending : boolean, default False
            Sort in ascending order.
        bins : integer, optional
            Rather than count values, group them into half-open bins,
            a convenience for ``pd.cut``, only works with numeric data.
        dropna : boolean, default True
            Don't include counts of NaN.

        Returns
        -------
        Series

        See Also
        --------
        Series.count: Number of non-NA elements in a Series.
        DataFrame.count: Number of non-NA elements in a DataFrame.

        Examples
        --------
        >>> index = pd.Index([3, 1, 2, 3, 4, np.nan])
        >>> index.value_counts()
        3.0    2
        4.0    1
        2.0    1
        1.0    1
        dtype: int64

        With `normalize` set to `True`, returns the relative frequency by
        dividing all values by the sum of values.

        >>> s = pd.Series([3, 1, 2, 3, 4, np.nan])
        >>> s.value_counts(normalize=True)
        3.0    0.4
        4.0    0.2
        2.0    0.2
        1.0    0.2
        dtype: float64

        **bins**

        Bins can be useful for going from a continuous variable to a
        categorical variable; instead of counting unique
        apparitions of values, divide the index in the specified
        number of half-open bins.

        >>> s.value_counts(bins=3)
        (2.0, 3.0]      2
        (0.996, 2.0]    2
        (3.0, 4.0]      1
        dtype: int64

        **dropna**

        With `dropna` set to `False` we can also see NaN index values.

        >>> s.value_counts(dropna=False)
        3.0    2
        NaN    1
        4.0    1
        2.0    1
        1.0    1
        dtype: int64
        """
        from pandas.core.algorithms import value_counts
        result = value_counts(self,
                              sort=sort,
                              ascending=ascending,
                              normalize=normalize,
                              bins=bins,
                              dropna=dropna)
        return result
Пример #14
0
    def value_counts(self, dropna: bool = True):
        from pandas.core.algorithms import value_counts

        return value_counts(self.to_numpy(), dropna=dropna)
Пример #15
0
    def value_counts(self, normalize=False, sort=True, ascending=False,
                     bins=None, dropna=True):
        """
        Return a Series containing counts of unique values.

        The resulting object will be in descending order so that the
        first element is the most frequently-occurring element.
        Excludes NA values by default.

        Parameters
        ----------
        normalize : boolean, default False
            If True then the object returned will contain the relative
            frequencies of the unique values.
        sort : boolean, default True
            Sort by values.
        ascending : boolean, default False
            Sort in ascending order.
        bins : integer, optional
            Rather than count values, group them into half-open bins,
            a convenience for ``pd.cut``, only works with numeric data.
        dropna : boolean, default True
            Don't include counts of NaN.

        Returns
        -------
        counts : Series

        See Also
        --------
        Series.count: number of non-NA elements in a Series
        DataFrame.count: number of non-NA elements in a DataFrame

        Examples
        --------
        >>> index = pd.Index([3, 1, 2, 3, 4, np.nan])
        >>> index.value_counts()
        3.0    2
        4.0    1
        2.0    1
        1.0    1
        dtype: int64

        With `normalize` set to `True`, returns the relative frequency by
        dividing all values by the sum of values.

        >>> s = pd.Series([3, 1, 2, 3, 4, np.nan])
        >>> s.value_counts(normalize=True)
        3.0    0.4
        4.0    0.2
        2.0    0.2
        1.0    0.2
        dtype: float64

        **bins**

        Bins can be useful for going from a continuous variable to a
        categorical variable; instead of counting unique
        apparitions of values, divide the index in the specified
        number of half-open bins.

        >>> s.value_counts(bins=3)
        (2.0, 3.0]      2
        (0.996, 2.0]    2
        (3.0, 4.0]      1
        dtype: int64

        **dropna**

        With `dropna` set to `False` we can also see NaN index values.

        >>> s.value_counts(dropna=False)
        3.0    2
        NaN    1
        4.0    1
        2.0    1
        1.0    1
        dtype: int64
        """
        from pandas.core.algorithms import value_counts
        result = value_counts(self, sort=sort, ascending=ascending,
                              normalize=normalize, bins=bins, dropna=dropna)
        return result