def test_uint64_overflow(self): exp = Series([2**63], dtype=np.uint64) s = Series([1, 2**63, 2**63], dtype=np.uint64) tm.assert_series_equal(algos.mode(s), exp) exp = Series([], dtype=np.uint64) s = Series([1, 2**63], dtype=np.uint64) tm.assert_series_equal(algos.mode(s), exp)
def test_no_mode(self): exp = Series([], dtype=np.float64) tm.assert_series_equal(algos.mode([]), exp) exp = Series([], dtype=np.int) tm.assert_series_equal(algos.mode([1]), exp) exp = Series([], dtype=np.object) tm.assert_series_equal(algos.mode(['a', 'b', 'c']), exp)
def test_timedelta_mode(self): exp = Series([], dtype='timedelta64[ns]') s = Series(['1 days', '-1 days', '0 days'], dtype='timedelta64[ns]') tm.assert_series_equal(algos.mode(s), exp) exp = Series(['2 min', '1 day'], dtype='timedelta64[ns]') s = Series(['1 day', '1 day', '-1 day', '-1 day 2 min', '2 min', '2 min'], dtype='timedelta64[ns]') tm.assert_series_equal(algos.mode(s), exp)
def test_datelike_mode(self): exp = Series([], dtype="M8[ns]") s = Series(['2011-01-03', '2013-01-02', '1900-05-03'], dtype='M8[ns]') tm.assert_series_equal(algos.mode(s), exp) exp = Series(['2011-01-03', '2013-01-02'], dtype='M8[ns]') s = Series(['2011-01-03', '2013-01-02', '1900-05-03', '2011-01-03', '2013-01-02'], dtype='M8[ns]') tm.assert_series_equal(algos.mode(s), exp)
def test_categorical(self): c = Categorical([1, 2]) exp = Series([], dtype=np.int64) tm.assert_series_equal(algos.mode(c), exp) c = Categorical([1, 'a', 'a']) exp = Series(['a'], dtype=object) tm.assert_series_equal(algos.mode(c), exp) c = Categorical([1, 1, 2, 3, 3]) exp = Series([1, 3], dtype=np.int64) tm.assert_series_equal(algos.mode(c), exp)
def missing_values(self): for attribute in self.meta_data: if self.meta_data[attribute] == "NUMERIC": self.train[attribute] = self.train.groupby("class").transform( lambda x: x.fillna(x.mean())) self.test[attribute] = self.test.groupby("class").transform( lambda x: x.fillna(x.mean())) else: self.train[attribute].fillna(mode(self.train[attribute])[0], inplace=True) self.test[attribute].fillna(mode(self.test[attribute])[0], inplace=True)
def test_strobj_mode(self): exp = ['b'] data = ['a'] * 2 + ['b'] * 3 s = Series(data, dtype='c') exp = Series(exp, dtype='c') tm.assert_series_equal(algos.mode(s), exp) exp = ['bar'] data = ['foo'] * 2 + ['bar'] * 3 for dt in [str, object]: s = Series(data, dtype=dt) exp = Series(exp, dtype=dt) tm.assert_series_equal(algos.mode(s), exp)
def test_number_mode(self): exp_single = [1] data_single = [1] * 5 + [2] * 3 exp_multi = [1, 3] data_multi = [1] * 5 + [2] * 3 + [3] * 5 for dt in np.typecodes['AllInteger'] + np.typecodes['Float']: s = Series(data_single, dtype=dt) exp = Series(exp_single, dtype=dt) tm.assert_series_equal(algos.mode(s), exp) s = Series(data_multi, dtype=dt) exp = Series(exp_multi, dtype=dt) tm.assert_series_equal(algos.mode(s), exp)
def detect_frequency(idx): """ Return the most plausible frequency of DatetimeIndex idx (even when gaps in it). It calculates the delta between element of the index (idx[1:] - idx[:1]), gets the 'mode' of the delta (most frequent delta) and transforms it into a frequency ('H','15T',...) A solution exists in pandas: ..ipython: from pandas.tseries.frequencies import _TimedeltaFrequencyInferer inferer = _TimedeltaFrequencyInferer(idx) freq = inferer.get_freq() But for intraday frequencies, if it is not regular (like for 'publication_date' of forecast timeseries), then the inferer.get_freq() return None. In those cases, we are going to return the smallest frequency possible. :param idx: DatetimeIndex :return: str """ if len(idx) < 2: raise ValueError( "Cannot detect frequency of index when index as less than two elements" ) # calculates the delta delta_idx = idx[1:] - idx[:-1] delta_mode = mode(delta_idx) if len(delta_mode) == 0: # if no clear mode, take the smallest delta_idx td = min(delta_idx) else: # infer frequency from most frequent timedelta td = delta_mode[0] return to_offset(td)
def test_index(self): idx = Index([1, 2, 3]) exp = Series([], dtype=np.int64) tm.assert_series_equal(algos.mode(idx), exp) idx = Index([1, 'a', 'a']) exp = Series(['a'], dtype=object) tm.assert_series_equal(algos.mode(idx), exp) idx = Index([1, 1, 2, 3, 3]) exp = Series([1, 3], dtype=np.int64) tm.assert_series_equal(algos.mode(idx), exp) exp = Series(['2 min', '1 day'], dtype='timedelta64[ns]') idx = Index(['1 day', '1 day', '-1 day', '-1 day 2 min', '2 min', '2 min'], dtype='timedelta64[ns]') tm.assert_series_equal(algos.mode(idx), exp)
def test_mixed_dtype(self): exp = Series(['foo']) s = Series([1, 'foo', 'foo']) tm.assert_series_equal(algos.mode(s), exp)