Exemplo n.º 1
0
def test_collect_pandas_df_make(plot_switch, sql_out, nc_collect,
                                create_summary, graph_num):
    """
    """
    base_dir = str(Path(__file__).parent / "examples")
    network = None
    ID = '002'

    if graph_num == -1:
        net_mets_csv_list = [
            f"{base_dir}/miscellaneous/002_parcels_Default.nii.gz"
        ]
    elif graph_num == 0:
        net_mets_csv_list = []
    elif graph_num == 1:
        net_mets_csv_list = [
            f"{base_dir}/topology/metrics_sub-0021001_modality-dwi_nodetype-parc_model-csa_thrtype-PROP_thr-0.2.csv"
        ]
    else:
        net_mets_csv_list = [
            f"{base_dir}/topology/metrics_sub-0021001_modality-dwi_nodetype-parc_model-csa_thrtype-PROP_thr-0.2.csv",
            f"{base_dir}/topology/metrics_sub-0021001_modality-dwi_nodetype-parc_model-csa_thrtype-PROP_thr-0.3.csv"
        ]

    combination_complete = netstats.collect_pandas_df_make(
        net_mets_csv_list,
        ID,
        network,
        plot_switch,
        nc_collect=nc_collect,
        create_summary=create_summary,
        sql_out=sql_out)

    assert combination_complete is True
Exemplo n.º 2
0
def test_collect_pandas_df_make(plot_switch, sql_out, nc_collect,
                                create_summary, graph_num):
    """
    """
    base_dir = str(Path(__file__).parent / "examples")
    network = None
    ID = '002'
    plot_switch = False

    if graph_num == -1:
        # This should raise an error but doesn't.
        net_mets_csv_list = [
            f"{base_dir}/miscellaneous/002_parcels_Default.nii.gz"
        ]
    elif graph_num == 0:
        net_mets_csv_list = []
    elif graph_num == 1:
        net_mets_csv_list = [
            f"{base_dir}/netmetrics/0021001_modality-dwi_nodetype-parc_est-csa_thrtype-PROP_thr-0.2_net_mets.csv"
        ]
    else:
        # This was breaking.
        net_mets_csv_list = [
            f"{base_dir}/netmetrics/0021001_modality-dwi_nodetype-parc_est-csa_thrtype-PROP_thr-0.2_net_mets.csv",
            f"{base_dir}/netmetrics/0021001_modality-dwi_nodetype-parc_est-csa_thrtype-PROP_thr-0.3_net_mets.csv"
        ]

    combination_complete = netstats.collect_pandas_df_make(
        net_mets_csv_list,
        ID,
        network,
        plot_switch,
        nc_collect=nc_collect,
        create_summary=create_summary,
        sql_out=sql_out)

    assert combination_complete is True
Exemplo n.º 3
0
def recover_missing(bad_col, bad_cols_dict, rerun_dict, modality, working_path,
                    drop_cols, frame):
    import glob
    import os
    atlas = bad_col.split('_')[0] + '_' + bad_col.split('_')[1]
    rerun = False

    for lab in bad_cols_dict[bad_col]:
        sub = lab.split('_')[0]
        ses = lab.split('_')[1]
        if sub not in rerun_dict.keys():
            rerun_dict[sub] = {}
        if ses not in rerun_dict[sub].keys():
            rerun_dict[sub][ses] = {}
        if modality not in rerun_dict[sub][ses].keys():
            rerun_dict[sub][ses][modality] = {}
        if atlas not in rerun_dict[sub][ses][modality].keys():
            rerun_dict[sub][ses][modality][atlas] = []
        search_str = bad_col.replace(f"{atlas}_", '').split('_thrtype')[0]
        if not os.path.isdir(f"{working_path}/{sub}/{ses}/"
                             f"{modality}/{atlas}/topology/auc"):
            if not os.path.isdir(
                    f"{working_path}/{sub}/{ses}/{modality}/{atlas}/topology"):
                print(f"Missing graph analysis for {sub}, {ses} for "
                      f"{atlas}...")
            else:
                from pynets.stats.netstats import collect_pandas_df_make
                collect_pandas_df_make(
                    glob.glob(f"{working_path}/{sub}/{ses}/"
                              f"{modality}/{atlas}/"
                              f"topology/*_neat.csv"), f"{sub}_{ses}", None,
                    False)
                rerun = True
        outs = [
            i for i in glob.glob(f"{working_path}/{sub}/{ses}/"
                                 f"{modality}/{atlas}/topology/auc/*")
            if search_str in i
        ]

        if len(outs) == 1:
            # Fill in gaps (for things that get dropped during earlier
            # stages)
            try:
                df_tmp = pd.read_csv(outs[0],
                                     chunksize=100000,
                                     compression="gzip",
                                     encoding="utf-8",
                                     engine='python').read()
            except:
                try:
                    df_tmp = pd.read_csv(outs[0],
                                         chunksize=100000,
                                         compression="gzip",
                                         encoding="utf-8",
                                         engine='c').read()
                except:
                    print(f"Cannot load {outs[0]}")
                    continue
            if not df_tmp.empty:
                for drop in drop_cols:
                    if drop in bad_col:
                        print(f"Removing column: {drop}")
                        frame = frame.drop(columns=bad_col)

                if bad_col not in frame.columns:
                    continue
                    from pynets.stats.netstats import \
                        collect_pandas_df_make
                    collect_pandas_df_make(
                        glob.glob(f"{working_path}/{sub}/{ses}/"
                                  f"{modality}/{atlas}/topology/*_neat.csv"),
                        f"{sub}_{ses}", None, False)
                try:
                    frame.loc[lab,
                              bad_col] = df_tmp.filter(regex=bad_col.split(
                                  'auc_')[1:][0]).values.tolist()[0][0]
                    print(f"Recovered missing data from {sub}, {ses} for "
                          f"{bad_col}...")
                except:
                    from pynets.stats.netstats import \
                        collect_pandas_df_make
                    collect_pandas_df_make(
                        glob.glob(f"{working_path}/{sub}/{ses}/"
                                  f"{modality}/{atlas}/topology/*_neat.csv"),
                        f"{sub}_{ses}", None, False)
                    continue
                del df_tmp
            else:
                from pynets.stats.netstats import collect_pandas_df_make
                collect_pandas_df_make(
                    glob.glob(f"{working_path}/{sub}/{ses}/"
                              f"{modality}/{atlas}/topology/*_neat.csv"),
                    f"{sub}_{ses}", None, False)
                rerun_dict[sub][ses][modality][atlas].append(bad_col)
                continue
        elif len(outs) > 1:
            for out in outs:
                try:
                    df_tmp = pd.read_csv(out,
                                         chunksize=100000,
                                         compression="gzip",
                                         encoding="utf-8",
                                         engine='python').read()
                except:
                    try:
                        df_tmp = pd.read_csv(out,
                                             chunksize=100000,
                                             compression="gzip",
                                             encoding="utf-8",
                                             engine='c').read()
                    except:
                        print(f"Cannot load {out}")
                        continue
                if not df_tmp.empty:
                    print(f"Recovered missing data from {sub}, {ses} for "
                          f"{bad_col}...")

                    for drop in drop_cols:
                        if drop in bad_col:
                            print(f"Removing column: {drop}")
                            frame = frame.drop(columns=bad_col)
                    try:
                        frame.loc[lab,
                                  bad_col] = df_tmp.filter(regex=bad_col.split(
                                      'auc_')[1:][0]).values.tolist()[0][0]
                    except:
                        from pynets.stats.netstats import \
                            collect_pandas_df_make
                        collect_pandas_df_make(
                            glob.glob(
                                f"{working_path}/{sub}/{ses}/"
                                f"{modality}/{atlas}/topology/*_neat.csv"),
                            f"{sub}_{ses}", None, False)
                        continue
                    del df_tmp
        else:
            # Add to missingness inventory if not found
            rerun_dict[sub][ses][modality][atlas].append(bad_col)
            from pynets.stats.netstats import \
                collect_pandas_df_make
            collect_pandas_df_make(
                glob.glob(f"{working_path}/{sub}/{ses}/"
                          f"{modality}/{atlas}/topology/*_neat.csv"),
                f"{sub}_{ses}", None, False)
    return rerun_dict, rerun
Exemplo n.º 4
0
def collect_pandas_df(network, ID, net_mets_csv_list, plot_switch, multi_nets, multimodal):
    """
    API for summarizing independent lists of pickled pandas dataframes of graph metrics for each modality, RSN, and roi.

    Parameters
    ----------
    network : str
        Resting-state network based on Yeo-7 and Yeo-17 naming (e.g. 'Default') used to filter nodes in the
        study of brain subgraphs.
    ID : str
        A subject id or other unique identifier.
    net_mets_csv_list : list
        List of file paths to pickled pandas dataframes as themselves.
    plot_switch : bool
        Activate summary plotting (histograms, ROC curves, etc.)
    multi_nets : list
        List of Yeo RSN's specified in workflow(s).
    multimodal : bool
        Indicates whether multiple modalities of input data have been specified.

    Returns
    -------
    combination_complete : bool
        If True, then collect_pandas_df completed successfully
    """
    from pathlib import Path
    import yaml
    from pynets.core.utils import flatten
    from pynets.stats.netstats import collect_pandas_df_make

    # Available functional and structural connectivity models
    with open("%s%s" % (str(Path(__file__).parent.parent), '/runconfig.yaml'), 'r') as stream:
        hardcoded_params = yaml.load(stream)
        try:
            func_models = hardcoded_params['available_models']['func_models']
        except KeyError:
            print('ERROR: available functional models not sucessfully extracted from runconfig.yaml')
        try:
            struct_models = hardcoded_params['available_models']['struct_models']
        except KeyError:
            print('ERROR: available structural models not sucessfully extracted from runconfig.yaml')

    net_mets_csv_list = list(flatten(net_mets_csv_list))

    if multi_nets is not None:
        net_mets_csv_list_nets = net_mets_csv_list
        for network in multi_nets:
            net_mets_csv_list = list(set([i for i in net_mets_csv_list_nets if network in i]))
            if multimodal is True:
                net_mets_csv_list_dwi = list(set([i for i in net_mets_csv_list if i.split('mets_')[1].split('_')[0]
                                                   in struct_models]))
                combination_complete_dwi = collect_pandas_df_make(net_mets_csv_list_dwi, ID, network, plot_switch)
                net_mets_csv_list_func = list(set([i for i in net_mets_csv_list if
                                                    i.split('mets_')[1].split('_')[0] in func_models]))
                combination_complete_func = collect_pandas_df_make(net_mets_csv_list_func, ID, network, plot_switch)

                if combination_complete_dwi is True and combination_complete_func is True:
                    combination_complete = True
                else:
                    combination_complete = False
            else:
                combination_complete = collect_pandas_df_make(net_mets_csv_list, ID, network, plot_switch)
    else:
        if multimodal is True:
            net_mets_csv_list_dwi = list(set([i for i in net_mets_csv_list if i.split('mets_')[1].split('_')[0] in
                                               struct_models]))
            combination_complete_dwi = collect_pandas_df_make(net_mets_csv_list_dwi, ID, network, plot_switch)
            net_mets_csv_list_func = list(set([i for i in net_mets_csv_list if i.split('mets_')[1].split('_')[0]
                                                in func_models]))
            combination_complete_func = collect_pandas_df_make(net_mets_csv_list_func, ID, network, plot_switch)

            if combination_complete_dwi is True and combination_complete_func is True:
                combination_complete = True
            else:
                combination_complete = False
        else:
            combination_complete = collect_pandas_df_make(net_mets_csv_list, ID, network, plot_switch)

    return combination_complete