def test_group_human_scores(df_feats): adata = adata_manipulations(df_feats) df_small = make_small_dataset(adata) df_gs = get_global_structure(use_cached=True) df_small = df_small.merge(df_gs) df = widen_df(df_small) df = group_human_scores(df) assert len(df) > 0
def test_make_regression_df(df_feats): adata = adata_manipulations(df_feats) df_small = make_small_dataset(adata) df_gs = get_global_structure(use_cached=True) df_small = df_small.merge(df_gs) df = widen_df(df_small) df = group_human_scores(df) df = df.rename(rename_dict, axis="columns") df, df_regression_info = make_regression_df(df) assert len(df) > 0 assert len(df_regression_info) > 0
def load_data(): """Monster function for loading and munging data for plots.""" # load main feature data df_feats = load_main_feat_data() # make anndata from feature data # anndata as intermediate because our general purpose cleaning functions are written for anndata rather than pandas objects adata = adata_manipulations(df_feats) # make a df version of the feature data that only has a handful of simple features df_small = make_small_dataset(adata) # load in global structure features (DNN area classifier + radon transform stuff) df_gs = get_global_structure() # merge in the global structure metrics df_small = df_small.merge(df_gs) # make wide version df = widen_df(df_small) # group manual human structure scores into coarser bins df = group_human_scores(df) # clean up feauter/column names on the dataframes df = df.rename(rename_dict, axis="columns") # add regressed organizational score and grab regression info df, df_regression = make_regression_df(df) # create version of feature data where FISH probes are unpaired (makes facet plots easier) df_tidy = tidy_df(df) # clean up feature/column names on the dataframes df = df.rename(rename_dict, axis="columns") df_tidy = df_tidy.rename(rename_dict, axis="columns") # clean up FISH probe names to drop amplifier ID df, df_tidy = clean_probe_names(df, df_tidy) # move from counts to count densities (normalize to cell area) df, df_tidy = add_densities(df, df_tidy) return df, df_tidy, df_regression