def summarize_mpars_by_subject(df): return ( df >> p.group_by("subject_id", "condition", "data_id", "headcase") >> p.summarize( x_mean="mean(x)", x_median="median(x)", x_std="std(x)", y_mean="mean(y)", y_median="median(y)", y_std="std(y)", z_mean="mean(z)", z_median="median(z)", z_std="std(z)", pitch_mean="mean(pitch)", pitch_median="median(pitch)", pitch_std="std(pitch)", roll_mean="mean(roll)", roll_median="median(roll)", roll_std="std(roll)", yaw_mean="mean(yaw)", yaw_median="median(yaw)", yaw_std="std(yaw)", ) >> p.call( ".melt", id_vars=["subject_id", "data_id", "condition", "headcase"], value_vars=[ "x_mean", "y_mean", "z_mean", "x_median", "y_median", "z_median", "x_std", "y_std", "z_std", "pitch_mean", "roll_mean", "yaw_mean", "pitch_median", "roll_median", "yaw_median", "pitch_std", "roll_std", "yaw_std", ], var_name="measure", value_name="val", ) >> p.arrange("subject_id") >> p.call(".reset_index", drop=True) )
def summarize_fd_by_subject(df): return ( df >> p.group_by("subject_id", "condition", "data_id", "headcase") >> p.summarize( fd_mean="mean(FramewiseDisplacement)", fd_median="median(FramewiseDisplacement)", fd_mean_filter="filter_mean(FramewiseDisplacement)", fd_median_filter="filter_median(FramewiseDisplacement)", perc_spikes="perc_high_motion(FramewiseDisplacement)", ) >> p.do( lambda df: df.melt( id_vars=["subject_id", "data_id", "condition", "headcase"], value_vars=[ "fd_mean", "fd_median", "fd_mean_filter", "fd_median_filter", "perc_spikes", ], var_name="measure", value_name="val", ) ) >> p.arrange("subject_id") >> p.call(".reset_index", drop=True) )
def test_call(): def remove_column_a(df): _df = df.copy() del _df['a'] return _df df = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, np.nan]}) # External function result = df >> call(remove_column_a) assert 'a' not in result assert 'b' in result # dataframe method result = df >> call('.dropna') assert len(result) == 2 # dataframe method with arguments result = df >> define(c='a*2') >> call('.dropna', axis=1) assert 'a' in result assert 'b' not in result assert 'c' in result
def combine_sherlock_view_runs_and_replace(df): """ Given a summarized dataframe (i.e. 1 datapoint per subject per condition/motion-direction), average the summary params for runs view1 and view2 for sherlocks subs and reattach to the original frame such that it only contains 'view' and 'recall' conditions rather than 'view1', 'view2', and 'recall' conditions. This is because realignment is computed on a per run basis, of which sherlock subs have 2 'view' runs, but summary statistics (i.e. mean FD) are computed as: (mean of run1 + mean of run2 / 2) """ sherlock_combined = ( df >> p.query("data_id == 'sherlock' and condition != 'recall'") >> p.group_by("subject_id", "measure", "data_id", "headcase") >> p.summarize(val="mean(val)") >> p.call(".assign", condition="view") >> p.select("subject_id", "data_id", "condition", "headcase", "measure", "val") ) df_no_sherlock = df.query("condition == 'view' or condition == 'recall'") return pd.concat([df_no_sherlock, sherlock_combined], axis=0).reset_index(drop=True)