def test_nan_byvar_transform(self): expect_df = self.df_nan_byvar.copy() expect_df["val_transform"] = expect_df["val"] out = pd_utils.groupby_merge( self.df_nan_byvar, "byvar", "transform", (lambda x: x) ) assert_frame_equal(expect_df, out)
def test_nan_byvar_and_nan_val_transform_numeric(self): non_standard_index = self.df_nan_byvar_and_val.copy() non_standard_index.index = [5, 6, 7, 8] expect_df = self.df_nan_byvar_and_val.copy() expect_df["val_transform"] = expect_df["val"] + 1 expect_df.index = [5, 6, 7, 8] out = pd_utils.groupby_merge( non_standard_index, "byvar", "transform", (lambda x: x + 1) ) assert_frame_equal(expect_df, out)
def test_nan_byvar_sum(self): expect_df = pd.DataFrame( data=[ ("a", 1, 1.0), (nan, 2, nan), ("b", 3, 7.0), ("b", 4, 7.0), ], columns=["byvar", "val", "val_sum"], ) out = pd_utils.groupby_merge(self.df_nan_byvar, "byvar", "sum") assert_frame_equal(expect_df, out)
def test_subset_max(self): byvars = ["PERMNO", "byvar"] out = pd_utils.groupby_merge(self.df, byvars, "max", subset="RET") expect_df = pd.DataFrame( [ (10516, "a", "1/1/2000", 1.01, 1.04), (10516, "a", "1/2/2000", 1.02, 1.04), (10516, "a", "1/3/2000", 1.03, 1.04), (10516, "a", "1/4/2000", 1.04, 1.04), (10516, "b", "1/1/2000", 1.05, 1.08), (10516, "b", "1/2/2000", 1.06, 1.08), (10516, "b", "1/3/2000", 1.07, 1.08), (10516, "b", "1/4/2000", 1.08, 1.08), (10517, "a", "1/1/2000", 1.09, 1.12), (10517, "a", "1/2/2000", 1.10, 1.12), (10517, "a", "1/3/2000", 1.11, 1.12), (10517, "a", "1/4/2000", 1.12, 1.12), ], columns=["PERMNO", "byvar", "Date", "RET", "RET_max"], ) assert_frame_equal(expect_df, out)
def summary_timing_df(parsed_df): df = groupby_merge(parsed_df, 'function', 'sum', subset='time') df = groupby_merge(df, 'function', 'mean', subset='time') return df.drop(['time','orig order'], axis=1).drop_duplicates( ).sort_values('time_sum', ascending=False)