def test_cummin(): df = diamonds.copy() >> head(5) >> select(X.cut, X.x) df_cm = df >> mutate(cm=cummin(X.x)) df_truth = df df_truth['cm'] = pd.Series([3.95, 3.89, 3.89, 3.89, 3.89]) assert df_cm.equals(df_truth) df_cm = df >> groupby(X.cut) >> mutate(cm=cummin(X.x)) df_truth['cm'] = pd.Series([3.95, 3.89, 4.05, 3.89, 4.05]) assert df_cm.equals(df_truth)
def test_cumsum(): df = diamonds.copy() >> head(5) >> select(X.cut, X.x) df_cs = df >> mutate(cs=cumsum(X.x)) df_truth = df df_truth['cs'] = pd.Series([3.95, 7.84, 11.89, 16.09, 20.43]) assert df_cs.equals(df_truth) df_cs = df >> groupby(X.cut) >> mutate(cs=cumsum(X.x)) df_truth['cs'] = pd.Series([3.95, 3.89, 4.05, 8.09, 8.39]) assert df_cs.equals(df_truth)
def test_cummean(): df = diamonds.copy() >> head(5) >> select(X.cut, X.x) df_cm = df >> mutate(cm=cummean(X.x)) df_truth = df df_truth['cm'] = pd.Series([3.950000, 3.920000, 3.963333, 4.022500, 4.086000]) assert df_cm.equals(df_truth) df_cm = df >> groupby(X.cut) >> mutate(cm=cummean(X.x)) df_truth['cm'] = pd.Series([3.950, 3.890, 4.050, 4.045, 4.195]) assert df_cm.equals(df_truth)
def test_mutate(): df = diamonds.copy() df['testcol'] = 1 assert df.equals(diamonds >> mutate(testcol=1)) df['testcol'] = df['x'] assert df.equals(diamonds >> mutate(testcol=X.x)) df['testcol'] = df['x'] * df['y'] assert df.equals(diamonds >> mutate(testcol=X.x * X.y)) df['testcol'] = df['x'].mean() assert df.equals(diamonds >> mutate(testcol=np.mean(X.x)))
def test_cumprod(): df = diamonds.copy() >> head(5) >> select(X.cut, X.x) df_cp = df >> mutate(cp=cumprod(X.x)) df_truth = df df_truth['cp'] = pd.Series([3.950000, 15.365500, 62.230275, 261.367155, 1134.333453]) assert df_cp.equals(df_truth) df_cp = df >> groupby(X.cut) >> mutate(cp=cumprod(X.x)) df_truth['cp'] = pd.Series([3.950, 3.890, 4.050, 16.338, 17.577]) # some tricky floating point stuff going on here diffs = df_cp.cp - df_truth.cp assert all(diffs < .0000001)
def test_min_rank(): df = diamonds.copy() >> head(5) >> select(X.cut, X.x) df_mr = df >> mutate(mr=min_rank(X.x)) df_truth = df df_truth['mr'] = pd.Series([2.0, 1.0, 3.0, 4.0, 5.0]) assert df_mr.equals(df_truth) df_mr = df >> mutate(mr=min_rank(X.cut)) df_truth['mr'] = pd.Series([3.0, 4.0, 1.0, 4.0, 1.0]) assert df_mr.equals(df_truth) df_mr = df >> groupby(X.cut) >> mutate(mr=min_rank(X.x)) df_truth['mr'] = pd.Series([1.0, 1.0, 1.0, 2.0, 2.0]) assert df_mr.equals(df_truth) df_mr = df >> mutate(mr=min_rank(X.x, ascending=False)) df_truth['mr'] = pd.Series([4.0, 5.0, 3.0, 2.0, 1.0]) assert df_mr.equals(df_truth)
def test_group_transmute(): df = diamonds.copy() df = df.groupby('cut').apply(group_mutate_helper).reset_index(drop=True) df = df[['cut','testcol']] d = diamonds >> groupby('cut') >> transmute(testcol=X.x*X.shape[0]) assert df.equals(d)
def test_transmute(): df = diamonds.copy() df['testcol'] = df['x'] * df['y'] df = df[['testcol']] assert df.equals(diamonds >> transmute(testcol=X.x * X.y))
def test_group_mutate(): df = diamonds.copy() df = df.groupby('cut').apply(group_mutate_helper) d = diamonds >> groupby('cut') >> mutate(testcol=X.x*X.shape[0]) >> ungroup() assert df.equals(d)