def test_group_human_scores(df_feats):
    adata = adata_manipulations(df_feats)
    df_small = make_small_dataset(adata)
    df_gs = get_global_structure(use_cached=True)
    df_small = df_small.merge(df_gs)
    df = widen_df(df_small)
    df = group_human_scores(df)
    assert len(df) > 0
def test_make_regression_df(df_feats):
    adata = adata_manipulations(df_feats)
    df_small = make_small_dataset(adata)
    df_gs = get_global_structure(use_cached=True)
    df_small = df_small.merge(df_gs)
    df = widen_df(df_small)
    df = group_human_scores(df)
    df = df.rename(rename_dict, axis="columns")
    df, df_regression_info = make_regression_df(df)
    assert len(df) > 0
    assert len(df_regression_info) > 0
def load_data():
    """Monster function for loading and munging data for plots."""

    # load main feature data
    df_feats = load_main_feat_data()

    # make anndata from feature data
    # anndata as intermediate because our general purpose cleaning functions are written for anndata rather than pandas objects
    adata = adata_manipulations(df_feats)

    # make a df version of the feature data that only has a handful of simple features
    df_small = make_small_dataset(adata)

    # load in global structure features (DNN area classifier + radon transform stuff)
    df_gs = get_global_structure()

    # merge in the global structure metrics
    df_small = df_small.merge(df_gs)

    # make wide version
    df = widen_df(df_small)

    # group manual human structure scores into coarser bins
    df = group_human_scores(df)

    # clean up feauter/column names on the dataframes
    df = df.rename(rename_dict, axis="columns")

    # add regressed organizational score and grab regression info
    df, df_regression = make_regression_df(df)

    # create version of feature data where FISH probes are unpaired (makes facet plots easier)
    df_tidy = tidy_df(df)

    # clean up feature/column names on the dataframes
    df = df.rename(rename_dict, axis="columns")
    df_tidy = df_tidy.rename(rename_dict, axis="columns")

    # clean up FISH probe names to drop amplifier ID
    df, df_tidy = clean_probe_names(df, df_tidy)

    # move from counts to count densities (normalize to cell area)
    df, df_tidy = add_densities(df, df_tidy)

    return df, df_tidy, df_regression