示例#1
0
    def test_uint64_overflow(self):
        exp = Series([2**63], dtype=np.uint64)
        s = Series([1, 2**63, 2**63], dtype=np.uint64)
        tm.assert_series_equal(algos.mode(s), exp)

        exp = Series([], dtype=np.uint64)
        s = Series([1, 2**63], dtype=np.uint64)
        tm.assert_series_equal(algos.mode(s), exp)
示例#2
0
    def test_no_mode(self):
        exp = Series([], dtype=np.float64)
        tm.assert_series_equal(algos.mode([]), exp)

        exp = Series([], dtype=np.int)
        tm.assert_series_equal(algos.mode([1]), exp)

        exp = Series([], dtype=np.object)
        tm.assert_series_equal(algos.mode(['a', 'b', 'c']), exp)
示例#3
0
    def test_timedelta_mode(self):
        exp = Series([], dtype='timedelta64[ns]')
        s = Series(['1 days', '-1 days', '0 days'],
                   dtype='timedelta64[ns]')
        tm.assert_series_equal(algos.mode(s), exp)

        exp = Series(['2 min', '1 day'], dtype='timedelta64[ns]')
        s = Series(['1 day', '1 day', '-1 day', '-1 day 2 min',
                    '2 min', '2 min'], dtype='timedelta64[ns]')
        tm.assert_series_equal(algos.mode(s), exp)
示例#4
0
    def test_datelike_mode(self):
        exp = Series([], dtype="M8[ns]")
        s = Series(['2011-01-03', '2013-01-02',
                    '1900-05-03'], dtype='M8[ns]')
        tm.assert_series_equal(algos.mode(s), exp)

        exp = Series(['2011-01-03', '2013-01-02'], dtype='M8[ns]')
        s = Series(['2011-01-03', '2013-01-02', '1900-05-03',
                    '2011-01-03', '2013-01-02'], dtype='M8[ns]')
        tm.assert_series_equal(algos.mode(s), exp)
示例#5
0
    def test_categorical(self):
        c = Categorical([1, 2])
        exp = Series([], dtype=np.int64)
        tm.assert_series_equal(algos.mode(c), exp)

        c = Categorical([1, 'a', 'a'])
        exp = Series(['a'], dtype=object)
        tm.assert_series_equal(algos.mode(c), exp)

        c = Categorical([1, 1, 2, 3, 3])
        exp = Series([1, 3], dtype=np.int64)
        tm.assert_series_equal(algos.mode(c), exp)
 def missing_values(self):
     for attribute in self.meta_data:
         if self.meta_data[attribute] == "NUMERIC":
             self.train[attribute] = self.train.groupby("class").transform(
                 lambda x: x.fillna(x.mean()))
             self.test[attribute] = self.test.groupby("class").transform(
                 lambda x: x.fillna(x.mean()))
         else:
             self.train[attribute].fillna(mode(self.train[attribute])[0],
                                          inplace=True)
             self.test[attribute].fillna(mode(self.test[attribute])[0],
                                         inplace=True)
示例#7
0
    def test_strobj_mode(self):
        exp = ['b']
        data = ['a'] * 2 + ['b'] * 3

        s = Series(data, dtype='c')
        exp = Series(exp, dtype='c')
        tm.assert_series_equal(algos.mode(s), exp)

        exp = ['bar']
        data = ['foo'] * 2 + ['bar'] * 3

        for dt in [str, object]:
            s = Series(data, dtype=dt)
            exp = Series(exp, dtype=dt)
            tm.assert_series_equal(algos.mode(s), exp)
示例#8
0
    def test_number_mode(self):
        exp_single = [1]
        data_single = [1] * 5 + [2] * 3

        exp_multi = [1, 3]
        data_multi = [1] * 5 + [2] * 3 + [3] * 5

        for dt in np.typecodes['AllInteger'] + np.typecodes['Float']:
            s = Series(data_single, dtype=dt)
            exp = Series(exp_single, dtype=dt)
            tm.assert_series_equal(algos.mode(s), exp)

            s = Series(data_multi, dtype=dt)
            exp = Series(exp_multi, dtype=dt)
            tm.assert_series_equal(algos.mode(s), exp)
示例#9
0
def detect_frequency(idx):
    """
    Return the most plausible frequency of DatetimeIndex idx (even when gaps in it).
    It calculates the delta between element of the index (idx[1:] - idx[:1]), gets the 'mode' of the delta (most frequent delta) and transforms it into a frequency ('H','15T',...)

    A solution exists in pandas:
    ..ipython:
        from pandas.tseries.frequencies import _TimedeltaFrequencyInferer
        inferer = _TimedeltaFrequencyInferer(idx)
        freq = inferer.get_freq()

    But for intraday frequencies, if it is not regular (like for 'publication_date'
    of forecast timeseries), then the inferer.get_freq() return None.
    In those cases, we are going to return the smallest frequency possible.

    :param idx: DatetimeIndex
    :return: str
    """
    if len(idx) < 2:
        raise ValueError(
            "Cannot detect frequency of index when index as less than two elements"
        )

    # calculates the delta
    delta_idx = idx[1:] - idx[:-1]
    delta_mode = mode(delta_idx)

    if len(delta_mode) == 0:
        # if no clear mode, take the smallest delta_idx
        td = min(delta_idx)
    else:
        # infer frequency from most frequent timedelta
        td = delta_mode[0]

    return to_offset(td)
示例#10
0
    def test_index(self):
        idx = Index([1, 2, 3])
        exp = Series([], dtype=np.int64)
        tm.assert_series_equal(algos.mode(idx), exp)

        idx = Index([1, 'a', 'a'])
        exp = Series(['a'], dtype=object)
        tm.assert_series_equal(algos.mode(idx), exp)

        idx = Index([1, 1, 2, 3, 3])
        exp = Series([1, 3], dtype=np.int64)
        tm.assert_series_equal(algos.mode(idx), exp)

        exp = Series(['2 min', '1 day'], dtype='timedelta64[ns]')
        idx = Index(['1 day', '1 day', '-1 day', '-1 day 2 min',
                     '2 min', '2 min'], dtype='timedelta64[ns]')
        tm.assert_series_equal(algos.mode(idx), exp)
示例#11
0
 def test_mixed_dtype(self):
     exp = Series(['foo'])
     s = Series([1, 'foo', 'foo'])
     tm.assert_series_equal(algos.mode(s), exp)