def multiindex_year_month_day_dataframe_random_data(): """ DataFrame with 3 level MultiIndex (year, month, day) covering first 100 business days from 2000-01-01 with random data """ tdf = tm.makeTimeDataFrame(100) ymd = tdf.groupby([lambda x: x.year, lambda x: x.month, lambda x: x.day]).sum() # use Int64Index, to make sure things work ymd.index.set_levels([lev.astype("i8") for lev in ymd.index.levels], inplace=True) ymd.index.set_names(["year", "month", "day"], inplace=True) return ymd
def test_setitem_with_expansion_numeric_into_datetimeindex(self, key): # GH#4940 inserting non-strings orig = tm.makeTimeDataFrame() df = orig.copy() df.loc[key, :] = df.iloc[0] ex_index = Index(list(orig.index) + [key], dtype=object, name=orig.index.name) ex_data = np.concatenate([orig.values, df.iloc[[0]].values], axis=0) expected = DataFrame(ex_data, index=ex_index, columns=orig.columns) tm.assert_frame_equal(df, expected)
def test_append_to_multiple_dropna(setup_path): df1 = tm.makeTimeDataFrame() df2 = tm.makeTimeDataFrame().rename(columns="{}_2".format) df1.iloc[1, df1.columns.get_indexer(["A", "B"])] = np.nan df = concat([df1, df2], axis=1) with ensure_clean_store(setup_path) as store: # dropna=True should guarantee rows are synchronized store.append_to_multiple({ "df1": ["A", "B"], "df2": None }, df, selector="df1", dropna=True) result = store.select_as_multiple(["df1", "df2"]) expected = df.dropna() tm.assert_frame_equal(result, expected) tm.assert_index_equal( store.select("df1").index, store.select("df2").index)
def test_squeeze(self): # noop for s in [ tm.makeFloatSeries(), tm.makeStringSeries(), tm.makeObjectSeries() ]: tm.assert_series_equal(s.squeeze(), s) for df in [tm.makeTimeDataFrame()]: tm.assert_frame_equal(df.squeeze(), df) # squeezing df = tm.makeTimeDataFrame().reindex(columns=["A"]) tm.assert_series_equal(df.squeeze(), df["A"]) # don't fail with 0 length dimensions GH11229 & GH8999 empty_series = Series([], name="five", dtype=np.float64) empty_frame = DataFrame([empty_series]) tm.assert_series_equal(empty_series, empty_series.squeeze()) tm.assert_series_equal(empty_series, empty_frame.squeeze()) # axis argument df = tm.makeTimeDataFrame(nper=1).iloc[:, :1] assert df.shape == (1, 1) tm.assert_series_equal(df.squeeze(axis=0), df.iloc[0]) tm.assert_series_equal(df.squeeze(axis="index"), df.iloc[0]) tm.assert_series_equal(df.squeeze(axis=1), df.iloc[:, 0]) tm.assert_series_equal(df.squeeze(axis="columns"), df.iloc[:, 0]) assert df.squeeze() == df.iloc[0, 0] msg = "No axis named 2 for object type DataFrame" with pytest.raises(ValueError, match=msg): df.squeeze(axis=2) msg = "No axis named x for object type DataFrame" with pytest.raises(ValueError, match=msg): df.squeeze(axis="x") df = tm.makeTimeDataFrame(3) tm.assert_frame_equal(df.squeeze(axis=0), df)
def test_granger_pvalues_ssr_f(test_input, expected): # Set random seed, otherwise testing creates a new dataframe each time. np.random.seed(12) data = testing.makeTimeDataFrame(freq="s", nper=1000) granger = (GrangerCausality(target_col="B", x_col="A", max_shift=10, statistics=test_input).fit(data).results_[0]) p_value = granger.values[1] # Not exactly equal but up test to 7 digits np.testing.assert_almost_equal(p_value, expected, decimal=7)
def test_frame_select_complex(setup_path): # select via complex criteria df = tm.makeTimeDataFrame() df["string"] = "foo" df.loc[df.index[0:4], "string"] = "bar" with ensure_clean_store(setup_path) as store: store.put("df", df, format="table", data_columns=["string"]) # empty result = store.select("df", 'index>df.index[3] & string="bar"') expected = df.loc[(df.index > df.index[3]) & (df.string == "bar")] tm.assert_frame_equal(result, expected) result = store.select("df", 'index>df.index[3] & string="foo"') expected = df.loc[(df.index > df.index[3]) & (df.string == "foo")] tm.assert_frame_equal(result, expected) # or result = store.select("df", 'index>df.index[3] | string="bar"') expected = df.loc[(df.index > df.index[3]) | (df.string == "bar")] tm.assert_frame_equal(result, expected) result = store.select( "df", '(index>df.index[3] & index<=df.index[6]) | string="bar"') expected = df.loc[((df.index > df.index[3]) & (df.index <= df.index[6])) | (df.string == "bar")] tm.assert_frame_equal(result, expected) # invert result = store.select("df", 'string!="bar"') expected = df.loc[df.string != "bar"] tm.assert_frame_equal(result, expected) # invert not implemented in numexpr :( msg = "cannot use an invert condition when passing to numexpr" with pytest.raises(NotImplementedError, match=msg): store.select("df", '~(string="bar")') # invert ok for filters result = store.select("df", "~(columns=['A','B'])") expected = df.loc[:, df.columns.difference(["A", "B"])] tm.assert_frame_equal(result, expected) # in result = store.select("df", "index>df.index[3] & columns in ['A','B']") expected = df.loc[df.index > df.index[3]].reindex(columns=["A", "B"]) tm.assert_frame_equal(result, expected)
def test_last_subset(self, frame_or_series): ts = tm.makeTimeDataFrame(freq="12h") if frame_or_series is not DataFrame: ts = ts["A"] result = ts.last("10d") assert len(result) == 20 ts = tm.makeTimeDataFrame(nper=30, freq="D") if frame_or_series is not DataFrame: ts = ts["A"] result = ts.last("10d") assert len(result) == 10 result = ts.last("21D") expected = ts["2000-01-10":] tm.assert_equal(result, expected) result = ts.last("21D") expected = ts[-21:] tm.assert_equal(result, expected) result = ts[:0].last("3M") tm.assert_equal(result, ts[:0])
def test_first_subset(self, frame_or_series): ts = tm.makeTimeDataFrame(freq="12h") if frame_or_series is not DataFrame: ts = ts["A"] result = ts.first("10d") assert len(result) == 20 ts = tm.makeTimeDataFrame(freq="D") if frame_or_series is not DataFrame: ts = ts["A"] result = ts.first("10d") assert len(result) == 10 result = ts.first("3M") expected = ts[:"3/31/2000"] tm.assert_equal(result, expected) result = ts.first("21D") expected = ts[:21] tm.assert_equal(result, expected) result = ts[:0].first("3M") tm.assert_equal(result, ts[:0])
def test_append_to_multiple_dropna_false(setup_path): df1 = tm.makeTimeDataFrame() df2 = tm.makeTimeDataFrame().rename(columns="{}_2".format) df1.iloc[1, df1.columns.get_indexer(["A", "B"])] = np.nan df = concat([df1, df2], axis=1) with ensure_clean_store(setup_path) as store: # dropna=False shouldn't synchronize row indexes store.append_to_multiple({ "df1a": ["A", "B"], "df2a": None }, df, selector="df1a", dropna=False) msg = "all tables must have exactly the same nrows!" with pytest.raises(ValueError, match=msg): store.select_as_multiple(["df1a", "df2a"]) assert not store.select("df1a").index.equals( store.select("df2a").index)
def setup_method(self, method): TestPlotBase.setup_method(self, method) import matplotlib as mpl mpl.rcdefaults() self.tdf = tm.makeTimeDataFrame() self.hexbin_df = DataFrame( { "A": np.random.uniform(size=20), "B": np.random.uniform(size=20), "C": np.arange(20) + np.random.uniform(size=20), } )
def test_numpy_transpose(self): msg = "the 'axes' parameter is not supported" s = tm.makeFloatSeries() tm.assert_series_equal(np.transpose(s), s) with pytest.raises(ValueError, match=msg): np.transpose(s, axes=1) df = tm.makeTimeDataFrame() tm.assert_frame_equal(np.transpose(np.transpose(df)), df) with pytest.raises(ValueError, match=msg): np.transpose(df, axes=1)
def test_partial_set_invalid(self): # GH 4940 # allow only setting of 'valid' values orig = tm.makeTimeDataFrame() # allow object conversion here df = orig.copy() df.loc["a", :] = df.iloc[0] exp = orig.append(Series(df.iloc[0], name="a")) tm.assert_frame_equal(df, exp) tm.assert_index_equal(df.index, Index(orig.index.tolist() + ["a"])) assert df.index.dtype == "object"
def test_unstack_multi_index_categorical_values(): mi = tm.makeTimeDataFrame().stack().index.rename(["major", "minor"]) ser = Series(["foo"] * len(mi), index=mi, name="category", dtype="category") result = ser.unstack() dti = ser.index.levels[0] c = pd.Categorical(["foo"] * len(dti)) expected = DataFrame( {"A": c.copy(), "B": c.copy(), "C": c.copy(), "D": c.copy()}, columns=pd.Index(list("ABCD"), name="minor"), index=dti.rename("major"), ) tm.assert_frame_equal(result, expected)
def test_squeeze_axis(self): # axis argument df = tm.makeTimeDataFrame(nper=1).iloc[:, :1] assert df.shape == (1, 1) tm.assert_series_equal(df.squeeze(axis=0), df.iloc[0]) tm.assert_series_equal(df.squeeze(axis="index"), df.iloc[0]) tm.assert_series_equal(df.squeeze(axis=1), df.iloc[:, 0]) tm.assert_series_equal(df.squeeze(axis="columns"), df.iloc[:, 0]) assert df.squeeze() == df.iloc[0, 0] msg = "No axis named 2 for object type DataFrame" with pytest.raises(ValueError, match=msg): df.squeeze(axis=2) msg = "No axis named x for object type DataFrame" with pytest.raises(ValueError, match=msg): df.squeeze(axis="x")
def test_numpy_transpose(self, frame_or_series): obj = tm.makeTimeDataFrame() obj = tm.get_obj(obj, frame_or_series) if frame_or_series is Series: # 1D -> np.transpose is no-op tm.assert_series_equal(np.transpose(obj), obj) # round-trip preserved tm.assert_equal(np.transpose(np.transpose(obj)), obj) msg = "the 'axes' parameter is not supported" with pytest.raises(ValueError, match=msg): np.transpose(obj, axes=1)
def setup_method(self, method): self.df = tm.makeTimeDataFrame()[:10] self.df["id1"] = (self.df["A"] > 0).astype(np.int64) self.df["id2"] = (self.df["B"] > 0).astype(np.int64) self.var_name = "var" self.value_name = "val" self.df1 = DataFrame([ [1.067683, -1.110463, 0.20867], [-1.321405, 0.368915, -1.055342], [-0.807333, 0.08298, -0.873361], ]) self.df1.columns = [list("ABC"), list("abc")] self.df1.columns.names = ["CAP", "low"]
def test_append_to_multiple_dropna_false(setup_path): df1 = tm.makeTimeDataFrame() df2 = tm.makeTimeDataFrame().rename(columns="{}_2".format) df1.iloc[1, df1.columns.get_indexer(["A", "B"])] = np.nan df = concat([df1, df2], axis=1) with ensure_clean_store(setup_path) as store: # dropna=False shouldn't synchronize row indexes store.append_to_multiple({ "df1a": ["A", "B"], "df2a": None }, df, selector="df1a", dropna=False) # TODO Update error message to desired message for this case msg = "Cannot select as multiple after appending with dropna=False" with pytest.raises(ValueError, match=msg): store.select_as_multiple(["df1a", "df2a"]) assert not store.select("df1a").index.equals( store.select("df2a").index)
def test_transform(): data = Series(np.arange(9) // 3, index=np.arange(9)) index = np.arange(9) np.random.shuffle(index) data = data.reindex(index) grouped = data.groupby(lambda x: x // 3) transformed = grouped.transform(lambda x: x * x.sum()) assert transformed[7] == 12 # GH 8046 # make sure that we preserve the input order df = DataFrame(np.arange(6, dtype="int64").reshape(3, 2), columns=["a", "b"], index=[0, 2, 1]) key = [0, 0, 1] expected = (df.sort_index().groupby(key).transform( lambda x: x - x.mean()).groupby(key).mean()) result = df.groupby(key).transform(lambda x: x - x.mean()).groupby( key).mean() tm.assert_frame_equal(result, expected) def demean(arr): return arr - arr.mean() people = DataFrame( np.random.randn(5, 5), columns=["a", "b", "c", "d", "e"], index=["Joe", "Steve", "Wes", "Jim", "Travis"], ) key = ["one", "two", "one", "two", "one"] result = people.groupby(key).transform(demean).groupby(key).mean() expected = people.groupby(key).apply(demean).groupby(key).mean() tm.assert_frame_equal(result, expected) # GH 8430 df = tm.makeTimeDataFrame() g = df.groupby(pd.Grouper(freq="M")) g.transform(lambda x: x - 1) # GH 9700 df = DataFrame({"a": range(5, 10), "b": range(5)}) result = df.groupby("a").transform(max) expected = DataFrame({"b": range(5)}) tm.assert_frame_equal(result, expected)
def test_mode(setup_path, mode): df = tm.makeTimeDataFrame() msg = r"[\S]* does not exist" with ensure_clean_path(setup_path) as path: # constructor if mode in ["r", "r+"]: with pytest.raises(OSError, match=msg): HDFStore(path, mode=mode) else: store = HDFStore(path, mode=mode) assert store._handle.mode == mode store.close() with ensure_clean_path(setup_path) as path: # context if mode in ["r", "r+"]: with pytest.raises(OSError, match=msg): with HDFStore(path, mode=mode) as store: pass else: with HDFStore(path, mode=mode) as store: assert store._handle.mode == mode with ensure_clean_path(setup_path) as path: # conv write if mode in ["r", "r+"]: with pytest.raises(OSError, match=msg): df.to_hdf(path, "df", mode=mode) df.to_hdf(path, "df", mode="w") else: df.to_hdf(path, "df", mode=mode) # conv read if mode in ["w"]: msg = ( "mode w is not allowed while performing a read. " r"Allowed modes are r, r\+ and a." ) with pytest.raises(ValueError, match=msg): read_hdf(path, "df", mode=mode) else: result = read_hdf(path, "df", mode=mode) tm.assert_frame_equal(result, df)
def test_slice_locs_with_type_mismatch(self): df = tm.makeTimeDataFrame() stacked = df.stack() idx = stacked.index with pytest.raises(TypeError, match="^Level type mismatch"): idx.slice_locs((1, 3)) with pytest.raises(TypeError, match="^Level type mismatch"): idx.slice_locs(df.index[5] + timedelta(seconds=30), (5, 2)) df = tm.makeCustomDataframe(5, 5) stacked = df.stack() idx = stacked.index with pytest.raises(TypeError, match="^Level type mismatch"): idx.slice_locs(timedelta(seconds=30)) # TODO: Try creating a UnicodeDecodeError in exception message with pytest.raises(TypeError, match="^Level type mismatch"): idx.slice_locs(df.index[1], (16, "a"))
def test_take(self): indices = [1, 5, -2, 6, 3, -1] for s in [tm.makeFloatSeries(), tm.makeStringSeries(), tm.makeObjectSeries()]: out = s.take(indices) expected = Series( data=s.values.take(indices), index=s.index.take(indices), dtype=s.dtype ) tm.assert_series_equal(out, expected) for df in [tm.makeTimeDataFrame()]: out = df.take(indices) expected = DataFrame( data=df.values.take(indices, axis=0), index=df.index.take(indices), columns=df.columns, ) tm.assert_frame_equal(out, expected)
def test_agg_grouping_is_list_tuple(ts): df = tm.makeTimeDataFrame() grouped = df.groupby(lambda x: x.year) grouper = grouped.grouper.groupings[0].grouper grouped.grouper.groupings[0] = Grouping(ts.index, list(grouper)) result = grouped.agg(np.mean) expected = grouped.mean() tm.assert_frame_equal(result, expected) grouped.grouper.groupings[0] = Grouping(ts.index, tuple(grouper)) result = grouped.agg(np.mean) expected = grouped.mean() tm.assert_frame_equal(result, expected)
def test_slice_locs(): df = tm.makeTimeDataFrame() stacked = df.stack() idx = stacked.index slob = slice(*idx.slice_locs(df.index[5], df.index[15])) sliced = stacked[slob] expected = df[5:16].stack() tm.assert_almost_equal(sliced.values, expected.values) slob = slice(*idx.slice_locs(df.index[5] + timedelta(seconds=30), df.index[15] - timedelta(seconds=30))) sliced = stacked[slob] expected = df[6:15].stack() tm.assert_almost_equal(sliced.values, expected.values)
def test_take_invalid_kwargs(self): indices = [-3, 2, 0, 1] s = tm.makeFloatSeries() df = tm.makeTimeDataFrame() for obj in (s, df): msg = r"take\(\) got an unexpected keyword argument 'foo'" with pytest.raises(TypeError, match=msg): obj.take(indices, foo=2) msg = "the 'out' parameter is not supported" with pytest.raises(ValueError, match=msg): obj.take(indices, out=indices) msg = "the 'mode' parameter is not supported" with pytest.raises(ValueError, match=msg): obj.take(indices, mode="clip")
def test_take_invalid_kwargs(self, frame_or_series): indices = [-3, 2, 0, 1] obj = tm.makeTimeDataFrame() obj = tm.get_obj(obj, frame_or_series) msg = r"take\(\) got an unexpected keyword argument 'foo'" with pytest.raises(TypeError, match=msg): obj.take(indices, foo=2) msg = "the 'out' parameter is not supported" with pytest.raises(ValueError, match=msg): obj.take(indices, out=indices) msg = "the 'mode' parameter is not supported" with pytest.raises(ValueError, match=msg): obj.take(indices, mode="clip")
def X(request): if "series" in request.param: return tm.makeTimeSeries(freq="D") elif "dataframe" in request.param: result = tm.makeTimeDataFrame(freq="D").drop(columns="A") if "date_col_str" in request.param: return result.assign(index=lambda x: x.index.astype(str)).set_index("index") elif "len_<_3" in request.param: return result.iloc[:2, :] elif "wo_date_col" in request.param: result.index.name = "some_other_index_name" return result else: raise ValueError("Invalid X fixture parameter") else: raise ValueError("Invalid X fixture parameter")
def test_invalid_filtering(setup_path): # can't use more than one filter (atm) df = tm.makeTimeDataFrame() with ensure_clean_store(setup_path) as store: store.put("df", df, format="table") msg = "unable to collapse Joint Filters" # not implemented with pytest.raises(NotImplementedError, match=msg): store.select("df", "columns=['A'] | columns=['B']") # in theory we could deal with this with pytest.raises(NotImplementedError, match=msg): store.select("df", "columns=['A','B'] & columns=['C']")
def test_reshaping_multi_index_categorical(self): cols = ["ItemA", "ItemB", "ItemC"] data = {c: tm.makeTimeDataFrame() for c in cols} df = pd.concat({c: data[c].stack() for c in data}, axis="columns") df.index.names = ["major", "minor"] df["str"] = "foo" df["category"] = df["str"].astype("category") result = df["category"].unstack() dti = df.index.levels[0] c = Categorical(["foo"] * len(dti)) expected = DataFrame( {"A": c.copy(), "B": c.copy(), "C": c.copy(), "D": c.copy()}, columns=Index(list("ABCD"), name="minor"), index=dti.rename("major"), ) tm.assert_frame_equal(result, expected)
def test_resample_frame_basic(): df = tm.makeTimeDataFrame() b = Grouper(freq="M") g = df.groupby(b) # check all cython functions work funcs = ["add", "mean", "prod", "min", "max", "var"] for f in funcs: g._cython_agg_general(f) result = df.resample("A").mean() tm.assert_series_equal(result["A"], df["A"].resample("A").mean()) result = df.resample("M").mean() tm.assert_series_equal(result["A"], df["A"].resample("M").mean()) df.resample("M", kind="period").mean() df.resample("W-WED", kind="period").mean()
def test_getitem_setitem_non_ix_labels(self): df = tm.makeTimeDataFrame() start, end = df.index[[5, 10]] result = df.loc[start:end] result2 = df[start:end] expected = df[5:11] tm.assert_frame_equal(result, expected) tm.assert_frame_equal(result2, expected) result = df.copy() result.loc[start:end] = 0 result2 = df.copy() result2[start:end] = 0 expected = df.copy() expected[5:11] = 0 tm.assert_frame_equal(result, expected) tm.assert_frame_equal(result2, expected)