Пример #1
0
def get_pupil_run_block_diffs_df(sessions, analyspar, stimpar, parallel=False):
    """
    get_pupil_run_block_diffs_df(sessions, analyspar, stimpar)

    Returns pupil and running statistic differences (unexp - exp) by block.

    Required args:
        - sessions (list): 
            session objects
        - analyspar (AnalysPar): 
            named tuple containing analysis parameters
        - stimpar (StimPar): 
            named tuple containing stimulus parameters

    Returns:
        - block_df (pd.DataFrame):
            dataframe with a row for each session, and the following 
            columns, in addition to the basic sess_df columns: 
            - run_block_diffs (1D array):
                split differences per block
            - run_block_stats (3D array): 
                block statistics (split x block x stats (me, err))
            - pupil_block_diffs (1D array):
                split differences per block
            - pupil_block_stats (3D array): 
                block statistics (split x block x stats (me, err))
    """

    block_df = misc_analys.get_check_sess_df(sessions,
                                             None,
                                             analyspar,
                                             roi=False)

    # retrieve ROI index information
    args_dict = {
        "analyspar": analyspar,
        "stimpar": stimpar,
    }

    misc_analys.get_check_sess_df(sessions, block_df)
    for datatype in ["pupil", "run"]:
        args_dict["datatype"] = datatype
        # sess x split x block x stats
        block_stats = gen_util.parallel_wrap(basic_analys.get_block_data,
                                             sessions,
                                             args_dict=args_dict,
                                             parallel=parallel)

        block_diffs = []
        for sess_block_data in block_stats:
            # take difference (unexp - exp statistic) for each block
            stat_diffs = sess_block_data[1, ..., 0] - sess_block_data[0, ...,
                                                                      0]
            block_diffs.append(stat_diffs)

        block_df[f"{datatype}_block_stats"] = block_stats
        block_df[f"{datatype}_block_diffs"] = block_diffs

    return block_df
Пример #2
0
def nrois_sess123(sessions, analyspar, sesspar, figpar):
    """
    nrois_sess123(sessions, analyspar, sesspar, figpar)

    Retrieves number of ROIs for sessions 1 to 3.
        
    Saves results and parameters relevant to analysis in a dictionary.

    Required args:
        - session (Session):
            Session object
        - analyspar (AnalysPar): 
            named tuple containing analysis parameters
        - sesspar (SessPar): 
            named tuple containing session parameters
        - figpar (dict): 
            dictionary containing figure parameters
    """

    logger.info("Compiling ROI numbers from session 1 to 3.",
                extra={"spacing": "\n"})

    nrois_df = misc_analys.get_check_sess_df(sessions, analyspar=analyspar)

    extrapar = dict()

    info = {
        "analyspar": analyspar._asdict(),
        "sesspar": sesspar._asdict(),
        "extrapar": extrapar,
        "nrois_df": nrois_df.to_dict()
    }

    helper_fcts.plot_save_all(info, figpar)
Пример #3
0
def mean_signal_sess123(sessions, analyspar, sesspar, figpar, parallel=False):
    """
    mean_signal_sess123(sessions, analyspar, sesspar, figpar)

    Retrieves ROI mean signal values for sessions 1 to 3.
        
    Saves results and parameters relevant to analysis in a dictionary.

    Required args:
        - session (Session):
            Session object
        - analyspar (AnalysPar): 
            named tuple containing analysis parameters
        - sesspar (SessPar): 
            named tuple containing session parameters
        - figpar (dict): 
            dictionary containing figure parameters
    
    Optional args:
        - parallel (bool): 
            if True, some of the analysis is run in parallel across CPU cores 
            default: False
    """

    logger.info("Compiling ROI signal means from session 1 to 3.",
                extra={"spacing": "\n"})

    logger.info("Calculating ROI signal means for each session...",
                extra={"spacing": TAB})
    all_signal_means = gen_util.parallel_wrap(misc_analys.get_snr,
                                              sessions,
                                              [analyspar, "signal_means"],
                                              parallel=parallel)

    sig_mean_df = misc_analys.get_check_sess_df(sessions, analyspar=analyspar)
    sig_mean_df["signal_means"] = [
        sig_mean.tolist() for sig_mean in all_signal_means
    ]

    extrapar = dict()

    info = {
        "analyspar": analyspar._asdict(),
        "sesspar": sesspar._asdict(),
        "extrapar": extrapar,
        "sig_mean_df": sig_mean_df.to_dict()
    }

    helper_fcts.plot_save_all(info, figpar)
Пример #4
0
def imaging_planes(sessions, sesspar, figpar, parallel=False):
    """
    imaging_planes(sessions, sesspar, figpar)

    Retrieves imaging plane image examples.
        
    Saves results and parameters relevant to analysis in a dictionary.

    Required args:
        - session (Session):
            Session object
        - sesspar (SessPar): 
            named tuple containing session parameters
        - figpar (dict): 
            dictionary containing figure parameters
    
    Optional args:
        - parallel (bool): 
            if True, some of the analysis is run in parallel across CPU cores 
            default: False
    """

    logger.info("Compiling imaging plane projection examples.",
                extra={"spacing": "\n"})

    imaging_plane_df = misc_analys.get_check_sess_df(sessions, roi=False)

    imaging_plane_df["max_projections"] = [
        sess.max_proj.tolist() for sess in sessions
    ]

    extrapar = dict()

    info = {
        "sesspar": sesspar._asdict(),
        "extrapar": extrapar,
        "imaging_plane_df": imaging_plane_df.to_dict()
    }

    helper_fcts.plot_save_all(info, figpar)
Пример #5
0
def set_multcomp(permpar, sessions, analyspar, consec_only=True, factor=1):
    """
    set_multcomp(permpar, sessions, analyspar)

    Returns permpar updated with the number of comparisons computed from the 
    sessions.

    Required args:
        - permpar (PermPar or dict): 
            named tuple containing permutation parameters
        - sessions (list):
            Session objects
        - analyspar (AnalysPar): 
            named tuple containing analysis parameters

    Optional args:
        - consec_only (bool):
            if True, only consecutive session numbers are correlated
            default: True
        - factor (int):
            multiplicative factor
            default: 1

    Returns:
        - permpar (PermPar):
            updated permutation parameter named tuple
    """
    
    sess_df = misc_analys.get_check_sess_df(sessions, analyspar=analyspar)

    n_comps = 0
    for _, lp_df in sess_df.groupby(["lines", "planes"]):
        corr_ns = get_corr_pairs(lp_df, consec_only=consec_only)
        n_comps += len(corr_ns)
    
    n_comps = n_comps * factor

    permpar = sess_ntuple_util.get_modif_ntuple(permpar, "multcomp", n_comps)

    return permpar
Пример #6
0
def get_sess_grped_diffs_df(sessions,
                            analyspar,
                            stimpar,
                            basepar,
                            permpar,
                            split="by_exp",
                            randst=None,
                            parallel=False):
    """
    get_sess_grped_diffs_df(sessions, analyspar, stimpar, basepar)

    Returns split difference statistics for specific sessions, grouped across 
    mice.

    Required args:
        - sessions (list): 
            session objects
        - analyspar (AnalysPar): 
            named tuple containing analysis parameters
        - stimpar (StimPar): 
            named tuple containing stimulus parameters
        - basepar (BasePar): 
            named tuple containing baseline parameters
        - permpar (PermPar): 
            named tuple containing permutation parameters

    Optional args:
        - split (str): 
            how to split data:
            "by_exp" (all exp, all unexp), 
            "unexp_lock" (unexp, preceeding exp), 
            "exp_lock" (exp, preceeding unexp),
            "stim_onset" (grayscr, stim on), 
            "stim_offset" (stim off, grayscr)
            default: "by_exp"
        - randst (int or np.random.RandomState): 
            random state or seed value to use. (-1 treated as None)
            default: None
        - parallel (bool): 
            if True, some of the analysis is run in parallel across CPU cores 
            default: False

    Returns:
        - diffs_df (pd.DataFrame):
            dataframe with one row per session/line/plane, and the following 
            columns, in addition to the basic sess_df columns: 
            - diff_stats (list): split difference stats (me, err)
            - null_CIs (list): adjusted null CI for split differences 
            - raw_p_vals (float): uncorrected p-value for differences within 
                sessions
            - p_vals (float): p-value for differences within sessions, 
                corrected for multiple comparisons and tails
            for session comparisons, e.g. 1v2:
            - raw_p_vals_{}v{} (float): uncorrected p-value for differences
                between sessions 
            - p_vals_{}v{} (float): p-value for differences between sessions, 
                corrected for multiple comparisons and tails
    """

    nanpol = None if analyspar.rem_bad else "omit"

    if analyspar.tracked:
        misc_analys.check_sessions_complete(sessions, raise_err=True)

    sess_diffs_df = misc_analys.get_check_sess_df(sessions, None, analyspar)
    initial_columns = sess_diffs_df.columns.tolist()

    # retrieve ROI index information
    args_dict = {
        "analyspar": analyspar,
        "stimpar": stimpar,
        "basepar": basepar,
        "split": split,
        "return_data": True,
    }

    # sess x split x ROI
    split_stats, split_data = gen_util.parallel_wrap(get_sess_roi_split_stats,
                                                     sessions,
                                                     args_dict=args_dict,
                                                     parallel=parallel,
                                                     zip_output=True)

    misc_analys.get_check_sess_df(sessions, sess_diffs_df)
    sess_diffs_df["roi_split_stats"] = list(split_stats)
    sess_diffs_df["roi_split_data"] = list(split_data)

    columns = initial_columns + ["diff_stats", "null_CIs"]
    diffs_df = pd.DataFrame(columns=columns)

    group_columns = ["lines", "planes", "sess_ns"]
    aggreg_cols = [col for col in initial_columns if col not in group_columns]
    for lp_grp_vals, lp_grp_df in sess_diffs_df.groupby(["lines", "planes"]):
        lp_grp_df = lp_grp_df.sort_values(["sess_ns", "mouse_ns"])
        line, plane = lp_grp_vals
        lp_name = plot_helper_fcts.get_line_plane_name(line, plane)
        logger.info(f"Running permutation tests for {lp_name} sessions...",
                    extra={"spacing": TAB})

        # obtain ROI random split differences per session
        # done here to avoid OOM errors
        lp_rand_diffs = gen_util.parallel_wrap(
            get_rand_split_data,
            lp_grp_df["roi_split_data"].tolist(),
            args_list=[analyspar, permpar, randst],
            parallel=parallel,
            zip_output=False)

        sess_diffs = []
        row_indices = []
        sess_ns = sorted(lp_grp_df["sess_ns"].unique())
        for sess_n in sess_ns:
            row_idx = len(diffs_df)
            row_indices.append(row_idx)
            sess_grp_df = lp_grp_df.loc[lp_grp_df["sess_ns"] == sess_n]

            grp_vals = list(lp_grp_vals) + [sess_n]
            for g, group_column in enumerate(group_columns):
                diffs_df.loc[row_idx, group_column] = grp_vals[g]

            # add aggregated values for initial columns
            diffs_df = misc_analys.aggreg_columns(sess_grp_df,
                                                  diffs_df,
                                                  aggreg_cols,
                                                  row_idx=row_idx,
                                                  in_place=True)

            # group ROI split stats across mice: split x ROIs
            split_stats = np.concatenate(
                sess_grp_df["roi_split_stats"].to_numpy(), axis=-1)

            # take diff and stats across ROIs
            diffs = split_stats[1] - split_stats[0]
            diff_stats = math_util.get_stats(diffs,
                                             stats=analyspar.stats,
                                             error=analyspar.error,
                                             nanpol=nanpol)
            diffs_df.at[row_idx, "diff_stats"] = diff_stats.tolist()
            sess_diffs.append(diffs)

            # group random ROI split diffs across mice, and take stat
            rand_idxs = [
                lp_grp_df.index.tolist().index(idx)
                for idx in sess_grp_df.index
            ]
            rand_diffs = math_util.mean_med(np.concatenate(
                [lp_rand_diffs[r] for r in rand_idxs], axis=0),
                                            axis=0,
                                            stats=analyspar.stats,
                                            nanpol=nanpol)

            # get CIs and p-values
            p_val, null_CI = rand_util.get_p_val_from_rand(
                diff_stats[0],
                rand_diffs,
                return_CIs=True,
                p_thresh=permpar.p_val,
                tails=permpar.tails,
                multcomp=permpar.multcomp,
                nanpol=nanpol)
            diffs_df.loc[row_idx, "p_vals"] = p_val
            diffs_df.at[row_idx, "null_CIs"] = null_CI

        del lp_rand_diffs  # free up memory

        # calculate p-values between sessions (0-1, 0-2, 1-2...)
        p_vals = rand_util.comp_vals_acr_groups(sess_diffs,
                                                n_perms=permpar.n_perms,
                                                stats=analyspar.stats,
                                                paired=analyspar.tracked,
                                                nanpol=nanpol,
                                                randst=randst)
        p = 0
        for i, sess_n in enumerate(sess_ns):
            for j, sess_n2 in enumerate(sess_ns[i + 1:]):
                key = f"p_vals_{int(sess_n)}v{int(sess_n2)}"
                diffs_df.loc[row_indices[i], key] = p_vals[p]
                diffs_df.loc[row_indices[j + 1], key] = p_vals[p]
                p += 1

    # add corrected p-values
    diffs_df = misc_analys.add_corr_p_vals(diffs_df, permpar)

    diffs_df["sess_ns"] = diffs_df["sess_ns"].astype(int)

    return diffs_df
Пример #7
0
def get_sess_roi_trace_df(sessions,
                          analyspar,
                          stimpar,
                          basepar,
                          split="by_exp",
                          parallel=False):
    """
    get_sess_roi_trace_df(sess, analyspar, stimpar, basepar)

    Returns ROI trace statistics for specific sessions, split as requested.

    Required args:
        - sessions (list): 
            session objects
        - analyspar (AnalysPar): 
            named tuple containing analysis parameters
        - stimpar (StimPar): 
            named tuple containing stimulus parameters
        - basepar (BasePar): 
            named tuple containing baseline parameters

    Optional args:
        - split (str): 
            how to split data:
            "by_exp" (all exp, all unexp), 
            "unexp_lock" (unexp, preceeding exp), 
            "exp_lock" (exp, preceeding unexp),
            "stim_onset" (grayscr, stim on), 
            "stim_offset" (stim off, grayscr)
            default: "by_exp"
        - parallel (bool): 
            if True, some of the analysis is run in parallel across CPU cores 
            default: False

    Returns:
        - trace_df (pd.DataFrame):
            dataframe with a row for each session, and the following 
            columns, in addition to the basic sess_df columns: 
            - roi_trace_stats (list): 
                ROI trace stats (split x ROIs x frames x stat (me, err))
            - time_values (list):
                values for each frame, in seconds
                (only 0 to stimpar.post, unless split is "by_exp")
    """

    trace_df = misc_analys.get_check_sess_df(sessions, None, analyspar)

    # retrieve ROI index information
    args_dict = {
        "analyspar": analyspar,
        "stimpar": stimpar,
        "basepar": basepar,
        "split": split,
    }

    # sess x split x ROIs x frames
    roi_trace_stats, all_time_values = gen_util.parallel_wrap(
        basic_analys.get_sess_roi_trace_stats,
        sessions,
        args_dict=args_dict,
        parallel=parallel,
        zip_output=True)

    misc_analys.get_check_sess_df(sessions, trace_df)
    trace_df["roi_trace_stats"] = [stats.tolist() for stats in roi_trace_stats]
    trace_df["time_values"] = [
        time_values.tolist() for time_values in all_time_values
    ]

    return trace_df
Пример #8
0
def unexp_resp_stimulus_comp_sess1v3(sessions,
                                     analyspar,
                                     sesspar,
                                     stimpar,
                                     permpar,
                                     figpar,
                                     seed=None,
                                     parallel=False):
    """
    unexp_resp_stimulus_comp_sess1v3(sessions, analyspar, sesspar, stimpar, 
                                     permpar, figpar)

    Retrieves changes in tracked ROI responses to unexpected sequences for 
    Gabors vs visual flow stimuli.
        
    Saves results and parameters relevant to analysis in a dictionary.

    Required args:
        - sessions (list): 
            Session objects
        - analyspar (AnalysPar): 
            named tuple containing analysis parameters
        - sesspar (SessPar): 
            named tuple containing session parameters
        - stimpar (StimPar): 
            named tuple containing stimulus parameters
        - permpar (PermPar): 
            named tuple containing permutation parameters
        - figpar (dict): 
            dictionary containing figure parameters
    
    Optional args:
        - seed (int): 
            seed value to use. (-1 treated as None)
            default: None
        - parallel (bool): 
            if True, some of the analysis is run in parallel across CPU cores 
            default: False
    """

    logger.info(
        ("Compiling changes in unexpected responses to Gabor vs visual "
         "flow stimuli."),
        extra={"spacing": "\n"})

    if analyspar.scale:
        raise ValueError("analyspar.scale should be set to False.")

    # calculate multiple comparisons
    dummy_df = misc_analys.get_check_sess_df(
        sessions, None, analyspar).drop_duplicates(subset=["lines", "planes"])
    multcomp = len(dummy_df) + 1
    permpar = sess_ntuple_util.get_modif_ntuple(permpar, "multcomp", multcomp)

    comp_sess = [1, 3]
    datatype = "rel_unexp_resp"
    rel_sess = 1
    pop_stats = True
    unexp_comp_df = stim_analys.get_stim_stats_df(
        sessions,
        analyspar=analyspar,
        stimpar=stimpar,
        permpar=permpar,
        comp_sess=comp_sess,
        datatype=datatype,
        rel_sess=rel_sess,
        pop_stats=pop_stats,
        randst=seed,
        parallel=parallel,
    )

    extrapar = {
        "comp_sess": comp_sess,
        "datatype": datatype,
        "rel_sess": rel_sess,
        "pop_stats": pop_stats,
        "seed": seed,
    }

    info = {
        "analyspar": analyspar._asdict(),
        "sesspar": sesspar._asdict(),
        "stimpar": stimpar._asdict(),
        "permpar": permpar._asdict(),
        "extrapar": extrapar,
        "unexp_comp_df": unexp_comp_df.to_dict(),
    }

    helper_fcts.plot_save_all(info, figpar)
Пример #9
0
def visual_flow_tracked_roi_abs_usi_means_sess123(
        sessions, analyspar, sesspar, stimpar, basepar, idxpar, permpar, 
        figpar, seed=None, parallel=False):
    """
    visual_flow_tracked_roi_abs_usi_means_sess123(
        sessions, analyspar, sesspar, stimpar, basepar, idxpar, permpar, 
        figpar)

    Retrieves mean absolute for tracked ROI visual flow USIs for session 1 to 3.
        
    Saves results and parameters relevant to analysis in a dictionary.

    Required args:
        - sessions (list): 
            Session objects
        - analyspar (AnalysPar): 
            named tuple containing analysis parameters
        - sesspar (SessPar): 
            named tuple containing session parameters
        - stimpar (StimPar): 
            named tuple containing stimulus parameters
        - basepar (BasePar): 
            named tuple containing baseline parameters
        - idxpar (IdxPar): 
            named tuple containing index parameters
        - permpar (PermPar): 
            named tuple containing permutation parameters
        - figpar (dict): 
            dictionary containing figure parameters
    
    Optional args:
        - seed (int): 
            seed value to use. (-1 treated as None)
            default: None
        - parallel (bool): 
            if True, some of the analysis is run in parallel across CPU cores 
            default: False
    """

    logger.info(
        ("Compiling absolute means of tracked ROI Gabor USIs for "
        "sessions 1 to 3."), 
        extra={"spacing": "\n"})

    if not analyspar.tracked:
        raise ValueError("analyspar.tracked should be set to True.")

    # remove incomplete session series and warn
    sessions = misc_analys.check_sessions_complete(sessions)

    # calculate multiple comparisons
    dummy_df = misc_analys.get_check_sess_df(
        sessions, None, analyspar).drop_duplicates(
            subset=["lines", "planes", "sess_ns"])

    permpar = misc_analys.set_multcomp(permpar, sess_df=dummy_df, CIs=False)

    absolute = True
    by_mouse = False
    idx_stats_df = usi_analys.get_idx_stats_df(
        sessions, 
        analyspar=analyspar, 
        stimpar=stimpar, 
        basepar=basepar, 
        idxpar=idxpar, 
        permpar=permpar, 
        absolute=absolute, 
        by_mouse=by_mouse, 
        randst=seed,
        parallel=parallel, 
        )

    extrapar = {
        "absolute": absolute,
        "by_mouse": by_mouse,
        "seed": seed,
        }

    info = {"analyspar"   : analyspar._asdict(),
            "sesspar"     : sesspar._asdict(),
            "stimpar"     : stimpar._asdict(),
            "basepar"     : basepar._asdict(),
            "idxpar"      : idxpar._asdict(),
            "permpar"     : permpar._asdict(),
            "extrapar"    : extrapar,
            "idx_stats_df": idx_stats_df.to_dict()
            }

    helper_fcts.plot_save_all(info, figpar)
Пример #10
0
def run_sess_logreg(sess,
                    analyspar,
                    stimpar,
                    logregpar,
                    n_splits=100,
                    n_shuff_splits=300,
                    seed=None,
                    parallel=False):
    """
    run_sess_logreg(sess, analyspar, stimpar, logregpar)

    Runs logistic regressions on a session (real data and shuffled), and 
    returns statistics dataframes.

    Required args:
        - sess (Session): 
            Session object
        - analyspar (AnalysPar): 
            named tuple containing analysis parameters
        - stimpar (StimPar): 
            named tuple containing stimulus parameters
        - logregpar (LogRegPar): 
            named tuple containing logistic regression parameters

    Optional args:
        - n_splits (int):
            number of data splits to run logistic regressions on
            default: 100
        - n_shuff_splits (int):
            number of shuffled data splits to run logistic regressions on
            default: 300
        - seed (int): 
            seed value to use. (-1 treated as None)
            default: None
        - parallel (bool): 
            if True, some of the analysis is run in parallel across CPU cores 
            default: False

    Returns:
        - data_stats_df (pd.DataFrame):
            dataframe with only one data row containing data stats for each 
            score and data subset.
        - shuffle_df (pd.DataFrame):
            dataframe where each row contains data for different data 
            shuffles, and each column contains data for each score and data 
            subset.
    """

    seed = rand_util.seed_all(seed, log_seed=False, seed_now=False)

    # retrieve data
    input_data, target_data, ctrl_ns = get_decoding_data(sess,
                                                         analyspar,
                                                         stimpar,
                                                         comp=logregpar.comp,
                                                         ctrl=logregpar.ctrl)

    scores_df = misc_analys.get_check_sess_df([sess], None, analyspar)
    common_columns = scores_df.columns.tolist()
    logreg_columns = ["comp", "ctrl", "bal", "shuffle"]

    # do checks
    if logregpar.q1v4 or logregpar.exp_v_unexp:
        raise NotImplementedError("q1v4 and exp_v_unexp are not implemented.")
    if n_splits <= 0 or n_shuff_splits <= 0:
        raise ValueError("n_splits and n_shuff_splits must be greater than 0.")

    set_types = ["train", "test"]
    score_types = ["neg_log_loss", "accuracy", "balanced_accuracy"]
    set_score_types = list(itertools.product(set_types, score_types))

    extrapar = dict()
    for shuffle in [False, True]:
        n_runs = n_shuff_splits if shuffle else n_splits
        extrapar["shuffle"] = shuffle

        temp_dfs = []
        for b, n in enumerate(range(0, n_runs, MAX_SIMULT_RUNS)):
            extrapar["n_runs"] = int(np.min([MAX_SIMULT_RUNS, n_runs - n]))

            with logger_util.TempChangeLogLevel(level="warning"):
                mod_cvs, _, _ = logreg_util.run_logreg_cv_sk(
                    input_data,
                    target_data,
                    logregpar._asdict(),
                    extrapar,
                    analyspar.scale,
                    ctrl_ns,
                    randst=seed + b,
                    parallel=parallel,
                    save_models=False,
                    catch_set_prob=False)

            temp_df = pd.DataFrame()
            for set_type, score_type in set_score_types:
                key = f"{set_type}_{score_type}"
                temp_df[key] = mod_cvs[key]
            temp_dfs.append(temp_df)

        # compile batch scores, and get session stats for non shuffled data
        temp_df = pd.concat(temp_dfs, ignore_index=True)
        if not shuffle:
            temp_df = get_df_stats(temp_df, analyspar)

        # add columns to df
        score_columns = temp_df.columns.tolist()
        for col in common_columns:
            temp_df[col] = scores_df.loc[0, col]
        for col in logreg_columns:
            if col != "shuffle":
                temp_df[col] = logregpar._asdict()[col]
            else:
                temp_df[col] = shuffle

        # re-sort columns
        temp_df = temp_df.reindex(common_columns + logreg_columns +
                                  score_columns,
                                  axis=1)

        if shuffle:
            shuffle_df = temp_df
        else:
            data_stats_df = temp_df

    return data_stats_df, shuffle_df
Пример #11
0
def get_pupil_run_trace_df(sessions,
                           analyspar,
                           stimpar,
                           basepar,
                           split="by_exp",
                           parallel=False):
    """
    get_pupil_run_trace_df(sessions, analyspar, stimpar, basepar)

    Returns pupil and running traces for specific sessions, split as 
    requested.

    Required args:
        - sessions (list): 
            session objects
        - analyspar (AnalysPar): 
            named tuple containing analysis parameters
        - stimpar (StimPar): 
            named tuple containing stimulus parameters
        - basepar (BasePar): 
            named tuple containing baseline parameters

    Optional args:
        - split (str): 
            how to split data:
            "by_exp" (all exp, all unexp), 
            "unexp_lock" (unexp, preceeding exp), 
            "exp_lock" (exp, preceeding unexp),
            "stim_onset" (grayscr, stim on), 
            "stim_offset" (stim off, grayscr)
            default: "by_exp"
        - parallel (bool): 
            if True, some of the analysis is run in parallel across CPU cores 
            default: False

    Returns:
        - trace_df (pd.DataFrame):
            dataframe with a row for each session, and the following 
            columns, in addition to the basic sess_df columns: 
            - run_traces (list): 
                running velocity traces (split x seqs x frames)
            - run_time_values (list):
                values for each frame, in seconds
                (only 0 to stimpar.post, unless split is "by_exp")
            - pupil_traces (list): 
                pupil diameter traces (split x seqs x frames)
            - pupil_time_values (list):
                values for each frame, in seconds
                (only 0 to stimpar.post, unless split is "by_exp")    
    """

    trace_df = misc_analys.get_check_sess_df(sessions,
                                             None,
                                             analyspar,
                                             roi=False)

    # retrieve ROI index information
    args_dict = {
        "analyspar": analyspar,
        "stimpar": stimpar,
        "baseline": basepar.baseline,
        "split": split,
    }

    misc_analys.get_check_sess_df(sessions, trace_df)
    for datatype in ["pupil", "run"]:
        args_dict["datatype"] = datatype
        # sess x split x seq x frames
        split_traces, all_time_values = gen_util.parallel_wrap(
            basic_analys.get_split_data_by_sess,
            sessions,
            args_dict=args_dict,
            parallel=parallel,
            zip_output=True)

        # add columns to dataframe
        trace_df[f"{datatype}_traces"] = list(split_traces)
        trace_df[f"{datatype}_time_values"] = list(all_time_values)

    return trace_df
Пример #12
0
def gabor_sequence_diffs_sess123(sessions,
                                 analyspar,
                                 sesspar,
                                 stimpar,
                                 basepar,
                                 permpar,
                                 figpar,
                                 seed=None,
                                 parallel=False):
    """
    gabor_sequence_diffs_sess123(sessions, analyspar, sesspar, stimpar, 
                                 basepar, permpar, figpar)

    Retrieves differences in ROI responses to Gabor sequences from 
    sessions 1 to 3.
        
    Saves results and parameters relevant to analysis in a dictionary.

    Required args:
        - sessions (list): 
            Session objects
        - analyspar (AnalysPar): 
            named tuple containing analysis parameters
        - sesspar (SessPar): 
            named tuple containing session parameters
        - stimpar (StimPar): 
            named tuple containing stimulus parameters
        - basepar (BasePar): 
            named tuple containing baseline parameters
        - permpar (PermPar): 
            named tuple containing permutation parameters
        - figpar (dict): 
            dictionary containing figure parameters
    
    Optional args:
        - seed (int): 
            seed value to use. (-1 treated as None)
            default: None
        - parallel (bool): 
            if True, some of the analysis is run in parallel across CPU cores 
            default: False
    """

    logger.info("Compiling Gabor sequence differences from session 1 to 3.",
                extra={"spacing": "\n"})

    # calculate multiple comparisons
    dummy_df = misc_analys.get_check_sess_df(
        sessions, None,
        analyspar).drop_duplicates(subset=["lines", "planes", "sess_ns"])
    permpar = misc_analys.set_multcomp(permpar, sess_df=dummy_df)

    split = "by_exp"
    diffs_df = seq_analys.get_sess_grped_diffs_df(
        sessions,
        analyspar=analyspar,
        stimpar=stimpar,
        basepar=basepar,
        permpar=permpar,
        split=split,
        randst=seed,
        parallel=parallel,
    )

    extrapar = {
        "split": split,
        "seed": seed,
    }

    info = {
        "analyspar": analyspar._asdict(),
        "sesspar": sesspar._asdict(),
        "stimpar": stimpar._asdict(),
        "basepar": basepar._asdict(),
        "permpar": permpar._asdict(),
        "extrapar": extrapar,
        "diffs_df": diffs_df.to_dict()
    }

    helper_fcts.plot_save_all(info, figpar)
Пример #13
0
def visual_flow_rel_resp_sess123(sessions,
                                 analyspar,
                                 sesspar,
                                 stimpar,
                                 permpar,
                                 figpar,
                                 seed=None,
                                 parallel=False):
    """
    visual_flow_rel_resp_sess123(sessions, analyspar, sesspar, stimpar, 
                                 permpar, figpar)

    Retrieves ROI responses to expected and unexpected visual flow, relative 
    to session 1.
        
    Saves results and parameters relevant to analysis in a dictionary.

    Required args:
        - sessions (list): 
            Session objects
        - analyspar (AnalysPar): 
            named tuple containing analysis parameters
        - sesspar (SessPar): 
            named tuple containing session parameters
        - stimpar (StimPar): 
            named tuple containing stimulus parameters
        - permpar (PermPar): 
            named tuple containing permutation parameters
        - figpar (dict): 
            dictionary containing figure parameters
    
    Optional args:
        - seed (int): 
            seed value to use. (-1 treated as None)
            default: None
        - parallel (bool): 
            if True, some of the analysis is run in parallel across CPU cores 
            default: False
    """

    logger.info("Compiling ROI visual flow responses relative to session 1.",
                extra={"spacing": "\n"})

    if analyspar.scale:
        raise ValueError("analyspar.scale should be set to False.")

    # calculate multiple comparisons
    dummy_df = misc_analys.get_check_sess_df(
        sessions, None,
        analyspar).drop_duplicates(subset=["lines", "planes", "sess_ns"])
    permpar = misc_analys.set_multcomp(permpar,
                                       sess_df=dummy_df,
                                       CIs=False,
                                       factor=2)

    rel_sess = 1
    rel_resp_df = seq_analys.get_rel_resp_stats_df(
        sessions,
        analyspar=analyspar,
        stimpar=stimpar,
        permpar=permpar,
        rel_sess=rel_sess,
        randst=seed,
        parallel=parallel,
    )

    extrapar = {
        "rel_sess": rel_sess,
        "seed": seed,
    }

    info = {
        "analyspar": analyspar._asdict(),
        "sesspar": sesspar._asdict(),
        "stimpar": stimpar._asdict(),
        "permpar": permpar._asdict(),
        "extrapar": extrapar,
        "rel_resp_df": rel_resp_df.to_dict(),
    }

    helper_fcts.plot_save_all(info, figpar)
def gabor_decoding_sess123(sessions,
                           analyspar,
                           sesspar,
                           stimpar,
                           logregpar,
                           permpar,
                           figpar,
                           seed=None,
                           parallel=False):
    """
    gabor_decoding_sess123(sessions, analyspar, sesspar, stimpar, logregpar, 
                           permpar, figpar)

    Runs decoding analyses (D and U orientations).
        
    Saves results and parameters relevant to analysis in a dictionary.

    Required args:
        - sessions (list): 
            Session objects
        - analyspar (AnalysPar): 
            named tuple containing analysis parameters
        - sesspar (SessPar): 
            named tuple containing session parameters
        - stimpar (StimPar): 
            named tuple containing stimulus parameters
        - logregpar (LogRegPar): 
            named tuple containing logistic regression parameters
        - permpar (PermPar): 
            named tuple containing permutation parameters
        - figpar (dict): 
            dictionary containing figure parameters
    
    Optional args:
        - seed (int): 
            seed value to use. (-1 treated as None)
            default: None
        - parallel (bool): 
            if True, some of the analysis is run in parallel across CPU cores 
            default: False
    """

    comp_str = logregpar.comp.replace("ori", " orientation")

    logger.info(
        f"Compiling Gabor {comp_str} decoder performances for sessions 1 to 3.",
        extra={"spacing": "\n"})

    if not analyspar.scale:
        raise ValueError("analyspar.scale should be True.")

    # calculate multiple comparisons
    dummy_df = misc_analys.get_check_sess_df(
        sessions, None,
        analyspar).drop_duplicates(subset=["lines", "planes", "sess_ns"])
    permpar = misc_analys.set_multcomp(permpar,
                                       sess_df=dummy_df,
                                       pairs=False,
                                       factor=2)

    n_splits = 100
    score_df = decoding_analys.run_sess_logregs(
        sessions,
        analyspar=analyspar,
        stimpar=stimpar,
        logregpar=logregpar,
        permpar=permpar,
        n_splits=n_splits,
        seed=seed,
        parallel=parallel,
    )

    extrapar = {
        "n_splits": n_splits,
        "seed": seed,
    }

    info = {
        "analyspar": analyspar._asdict(),
        "sesspar": sesspar._asdict(),
        "stimpar": stimpar._asdict(),
        "logregpar": logregpar._asdict(),
        "permpar": permpar._asdict(),
        "extrapar": extrapar,
        "scores_df": score_df.to_dict(),
    }

    helper_fcts.plot_save_all(info, figpar)
Пример #15
0
def get_ex_traces_df(sessions,
                     analyspar,
                     stimpar,
                     basepar,
                     n_ex=6,
                     rolling_win=4,
                     randst=None,
                     parallel=False):
    """
    get_ex_traces_df(sessions, analyspar, stimpar, basepar)

    Returns example ROI traces dataframe.

    Required args:
        - sessions (list):
            Session objects
        - analyspar (AnalysPar): 
            named tuple containing analysis parameters
        - stimpar (StimPar): 
            named tuple containing stimulus parameters
        - basepar (BasePar): 
            named tuple containing baseline parameters
    
    Optional args:
        - n_ex (int):
            number of example traces to retain
            default: 6
        - rolling_win (int):
            window to use in rolling mean over individual trial traces
            default: 4 
        - randst (int or np.random.RandomState): 
            random state or seed value to use. (-1 treated as None)
            default: None
        - parallel (bool): 
            if True, some of the analysis is run in parallel across CPU cores 
            default: False

    Returns:
        - selected_roi_data (pd.DataFrame):
            dataframe with a row for each ROI, and the following columns, 
            in addition to the basic sess_df columns: 
            - time_values (list): values for each frame, in seconds
                (only 0 to stimpar.post, unless split is "by_exp")
            - roi_ns (list): selected ROI number
            - traces_sm (list): selected ROI sequence traces, smoothed, with 
                dims: seq x frames
            - trace_stats (list): selected ROI trace mean or median
    """

    retained_traces_df = misc_analys.get_check_sess_df(sessions, None,
                                                       analyspar)
    initial_columns = retained_traces_df.columns

    logger.info(f"Identifying example ROIs for each session...",
                extra={"spacing": TAB})

    retained_roi_data = gen_util.parallel_wrap(
        get_sess_ex_traces,
        sessions, [analyspar, stimpar, basepar, rolling_win],
        parallel=parallel)

    randst = rand_util.get_np_rand_state(randst, set_none=True)

    # add data to dataframe
    new_columns = list(retained_roi_data[0])
    retained_traces_df = gen_util.set_object_columns(retained_traces_df,
                                                     new_columns,
                                                     in_place=True)

    for i, sess in enumerate(sessions):
        row_idx = retained_traces_df.loc[retained_traces_df["sessids"] ==
                                         sess.sessid].index

        if len(row_idx) != 1:
            raise RuntimeError(
                "Expected exactly one dataframe row to match session ID.")
        row_idx = row_idx[0]

        for column, value in retained_roi_data[i].items():
            retained_traces_df.at[row_idx, column] = value

    # select a few ROIs per line/plane/session
    columns = retained_traces_df.columns.tolist()
    columns = [column.replace("roi_trace", "trace") for column in columns]
    selected_traces_df = pd.DataFrame(columns=columns)

    group_columns = ["lines", "planes", "sess_ns"]
    for _, trace_grp_df in retained_traces_df.groupby(group_columns):
        trace_grp_df = trace_grp_df.sort_values("mouse_ns")
        grp_indices = trace_grp_df.index
        n_per = np.asarray([len(roi_ns) for roi_ns in trace_grp_df["roi_ns"]])
        roi_ns = np.concatenate(trace_grp_df["roi_ns"].tolist())
        concat_idxs = np.sort(randst.choice(len(roi_ns), n_ex, replace=False))

        for concat_idx in concat_idxs:
            row_idx = len(selected_traces_df)
            sess_idx = np.where(concat_idx < np.cumsum(n_per))[0][0]
            source_row = trace_grp_df.loc[grp_indices[sess_idx]]
            for column in initial_columns:
                selected_traces_df.at[row_idx, column] = source_row[column]

            selected_traces_df.at[row_idx, "time_values"] = \
                source_row["time_values"].tolist()

            roi_idx = concat_idx - n_per[:sess_idx].sum()
            for col in ["roi_ns", "traces_sm", "trace_stats"]:
                source_col = col.replace("trace", "roi_trace")
                selected_traces_df.at[row_idx, col] = \
                    source_row[source_col][roi_idx].tolist()

    for column in [
            "mouse_ns", "mouseids", "sess_ns", "sessids", "nrois", "roi_ns"
    ]:
        selected_traces_df[column] = selected_traces_df[column].astype(int)

    return selected_traces_df
Пример #16
0
def get_resp_df(sessions, analyspar, stimpar, rel_sess=1, parallel=False):
    """
    get_resp_df(sessions, analyspar, stimpar)

    Returns relative response dataframe for requested sessions.

    Required args:
        - sessions (list): 
            session objects
        - analyspar (AnalysPar): 
            named tuple containing analysis parameters
        - stimpar (StimPar): 
            named tuple containing stimulus parameters

    Optional args:
        - rel_sess (int):
            number of session relative to which data should be scaled, for each 
            mouse. If None, relative data is not added.
            default: 1
        - parallel (bool): 
            if True, some of the analysis is run in parallel across CPU cores 
            default: False

    Returns:
        - resp_data_df (pd.DataFrame):
            data dictionary with response stats (2D array, ROI x stats) under 
            keys for expected ("exp") and unexpected ("unexp") data, 
            separated by Gabor frame (e.g., "exp_3", "unexp_G") 
            if stimpar.stimtype == "gabors", and 
            with "rel_{}" columns added for each input column with "exp" in its 
            name if rel_sess is not None.
    """

    if analyspar.tracked:
        misc_analys.check_sessions_complete(sessions, raise_err=True)

    sessids = [sess.sessid for sess in sessions]
    resp_data_df = misc_analys.get_check_sess_df(sessions, analyspar=analyspar)

    # double check that sessions are in correct order
    if resp_data_df["sessids"].tolist() != sessids:
        raise NotImplementedError(
            "Implementation error. Sessions must appear in correct order in "
            "resp_data_df.")

    logger.info(f"Loading data for each session...", extra={"spacing": TAB})
    data_dicts = gen_util.parallel_wrap(get_sess_integ_resp_dict,
                                        sessions,
                                        args_list=[analyspar, stimpar],
                                        parallel=parallel)

    # add data to df
    misc_analys.get_check_sess_df(sessions, resp_data_df)
    for i, idx in enumerate(resp_data_df.index):
        for key, value in data_dicts[i].items():
            if i == 0:
                resp_data_df = gen_util.set_object_columns(resp_data_df, [key],
                                                           in_place=True)
            resp_data_df.at[idx, key] = value[:,
                                              0]  # retain stat only, not error

    # add relative data
    if rel_sess is not None:
        resp_data_df = add_relative_resp_data(resp_data_df,
                                              analyspar,
                                              rel_sess=rel_sess,
                                              in_place=True)

    return resp_data_df
Пример #17
0
def tracked_roi_usis_stimulus_comp_sess1v3(sessions,
                                           analyspar,
                                           sesspar,
                                           stimpar,
                                           basepar,
                                           idxpar,
                                           permpar,
                                           figpar,
                                           seed=None,
                                           parallel=False):
    """
    tracked_roi_usis_stimulus_comp_sess1v3(sessions, analyspar, sesspar, 
                                           stimpar, basepar, idxpar, permpar,
                                           figpar)

    Retrieves changes in tracked ROI USIs for Gabors vs visual flow stimuli.
        
    Saves results and parameters relevant to analysis in a dictionary.

    Required args:
        - sessions (list): 
            Session objects
        - analyspar (AnalysPar): 
            named tuple containing analysis parameters
        - sesspar (SessPar): 
            named tuple containing session parameters
        - stimpar (StimPar): 
            named tuple containing stimulus parameters
        - basepar (BasePar): 
            named tuple containing baseline parameters
        - idxpar (IdxPar): 
            named tuple containing index parameters
        - permpar (PermPar): 
            named tuple containing permutation parameters
        - figpar (dict): 
            dictionary containing figure parameters
    
    Optional args:
        - seed (int): 
            seed value to use. (-1 treated as None)
            default: None
        - parallel (bool): 
            if True, some of the analysis is run in parallel across CPU cores 
            default: False
    """

    logger.info(
        ("Compiling changes in ROI USIs for Gabors vs visual flow stimuli."),
        extra={"spacing": "\n"})

    if not analyspar.tracked:
        raise ValueError("analyspar.tracked should be set to True.")

    # remove incomplete session series and warn
    sessions = misc_analys.check_sessions_complete(sessions)

    # calculate multiple comparisons
    dummy_df = misc_analys.get_check_sess_df(
        sessions, None, analyspar).drop_duplicates(subset=["lines", "planes"])
    multcomp = len(dummy_df) + 1
    permpar = sess_ntuple_util.get_modif_ntuple(permpar, "multcomp", multcomp)

    comp_sess = [1, 3]
    datatype = "usis"
    pop_stats = True
    usi_comp_df = stim_analys.get_stim_stats_df(
        sessions,
        analyspar=analyspar,
        stimpar=stimpar,
        basepar=basepar,
        idxpar=idxpar,
        permpar=permpar,
        comp_sess=comp_sess,
        datatype=datatype,
        pop_stats=pop_stats,
        randst=seed,
        parallel=parallel,
    )

    extrapar = {
        "comp_sess": comp_sess,
        "datatype": datatype,
        "pop_stats": pop_stats,
        "seed": seed,
    }

    info = {
        "analyspar": analyspar._asdict(),
        "sesspar": sesspar._asdict(),
        "stimpar": stimpar._asdict(),
        "basepar": basepar._asdict(),
        "idxpar": idxpar._asdict(),
        "permpar": permpar._asdict(),
        "extrapar": extrapar,
        "usi_comp_df": usi_comp_df.to_dict(),
    }

    helper_fcts.plot_save_all(info, figpar)
Пример #18
0
def run_sess_logregs(sessions,
                     analyspar,
                     stimpar,
                     logregpar,
                     permpar,
                     n_splits=100,
                     seed=None,
                     parallel=False):
    """
    run_sess_logregs(sessions, analyspar, stimpar, logregpar, permpar)

    Runs logistic regressions on sessions (real data and shuffled), and 
    returns statistics dataframe.

    Number of shuffles is determined by permpar.n_perms. 

    Required args:
        - sessions (list): 
            Session objects
        - analyspar (AnalysPar): 
            named tuple containing analysis parameters
        - stimpar (StimPar): 
            named tuple containing stimulus parameters
        - logregpar (LogRegPar): 
            named tuple containing logistic regression parameters
        - permpar (PermPar): 
            named tuple containing permutation parameters

    Optional args:
        - n_splits (int):
            number of data splits to run logistic regressions on
            default: 100
        - seed (int): 
            seed value to use. (-1 treated as None)
            default: None
        - parallel (bool): 
            if True, some of the analysis is run in parallel across CPU cores 
            default: False

    Returns:
        - score_stats_df (pd.DataFrame):
            dataframe with logistic regression score statistics, shuffled score 
            confidence intervals, and test set p-values for each 
            line/plane/session.
    """

    sessids = [sess.sessid for sess in sessions]
    sess_df = misc_analys.get_check_sess_df(sessions, None, analyspar)

    score_stats_dfs = []
    group_columns = ["lines", "planes", "sess_ns"]
    s = 0
    for _, lp_grp_df in sess_df.groupby(group_columns):
        lp_grp_df = lp_grp_df.sort_values("mouse_ns")
        lp_sessions = [
            sessions[sessids.index(sessid)] for sessid in lp_grp_df["sessids"]
        ]

        sess_data_stats_dfs, shuffle_dfs = [], []
        for sess in lp_sessions:
            logger.info(
                f"Running decoders for session {s + 1}/{len(sess_df)}...",
                extra={"spacing": f"\n{TAB}"})
            sess_data_stats_df, shuffle_df = run_sess_logreg(
                sess,
                analyspar=analyspar,
                stimpar=stimpar,
                logregpar=logregpar,
                n_splits=n_splits,
                n_shuff_splits=permpar.n_perms,
                seed=seed,
                parallel=parallel)

            sess_data_stats_dfs.append(sess_data_stats_df)
            shuffle_dfs.append(shuffle_df)
            s += 1

        sess_data_stats_df = pd.concat(sess_data_stats_dfs, ignore_index=True)

        # collect data
        lp_df = collate_results(sess_data_stats_df, shuffle_dfs, analyspar,
                                permpar)
        score_stats_dfs.append(lp_df)

    score_stats_df = pd.concat(score_stats_dfs, ignore_index=True)
    score_stats_df = misc_analys.add_corr_p_vals(score_stats_df, permpar)

    # add splits information
    score_stats_df["n_splits_per"] = n_splits
    score_stats_df["n_shuffled_splits_per"] = permpar.n_perms

    # get unique (first) values for group_columns
    for col in group_columns + list(logregpar._asdict().keys()):
        if col not in score_stats_df.columns:
            continue
        score_stats_df[col] = score_stats_df[col].apply(lambda x: x[0])

    score_stats_df["sess_ns"] = score_stats_df["sess_ns"].astype(int)

    return score_stats_df
Пример #19
0
def get_roi_tracking_df(sessions,
                        analyspar,
                        reg_only=False,
                        proj=False,
                        crop_info=False,
                        parallel=False):
    """
    get_roi_tracking_df(sessions, analyspar)

    Return ROI tracking information for the requested sessions.

    Required args:
        - sessions (list): 
            Session objects
        - analyspar (AnalysPar): 
            named tuple containing analysis parameters

    Optional args:
        - proj (bool):
            if True, max projections are included in the output dataframe
            default: False
        - reg_only (bool):
            if True, only registered masks, and projections if proj is True, 
            are included in the output dataframe
            default: False
        - crop_info (bool or str):
            if not False, the type of cropping information to include 
            ("small" for the small plots, "large" for the large plots)
            default: False
        - parallel (bool): 
            if True, some of the analysis is run in parallel across CPU cores 
            default: False

    Returns:
        - roi_mask_df (pd.DataFrame in dict format):
            dataframe with a row for each mouse, and the following 
            columns, in addition to the basic sess_df columns: 

            - "registered_roi_mask_idxs" (list): list of mask indices, 
                registered across sessions, for each session 
                (flattened across ROIs) ((sess, hei, wid) x val)
            - "roi_mask_shapes" (list): shape into which ROI mask indices index 
                (sess x hei x wid)

            if not reg_only:
            - "roi_mask_idxs" (list): list of mask indices for each session, 
                and each ROI (sess x ((ROI, hei, wid) x val)) (not registered)

            if proj:
            - "registered_max_projections" (list): pixel intensities of maximum 
                projection for the plane (hei x wid), after registration across 
                sessions

            if proj and not reg_only:
            - "max_projections" (list): pixel intensities of maximum projection 
                for the plane (hei x wid)
                
            if crop_info:
            - "crop_fact" (num): factor by which to crop masks (> 1) 
            - "shift_prop_hei" (float): proportion by which to shift cropped 
                mask center vertically from left edge [0, 1]
            - "shift_prop_wid" (float): proportion by which to shift cropped 
                mask center horizontally from left edge [0, 1]
    """

    if not analyspar.tracked:
        raise ValueError("analyspar.tracked must be True for this analysis.")

    misc_analys.check_sessions_complete(sessions, raise_err=True)

    sess_df = misc_analys.get_check_sess_df(sessions, analyspar=analyspar)

    # if cropping, check right away for dictionary with the preset parameters
    if crop_info:
        if crop_info == "small":
            crop_dict = SMALL_CROP_DICT
        elif crop_info == "large":
            crop_dict = LARGE_CROP_DICT
        else:
            gen_util.accepted_values_error("crop_info", crop_info,
                                           ["small", "large"])
        for mouse_n in sess_df["mouse_ns"].unique():
            if int(mouse_n) not in crop_dict.keys():
                raise NotImplementedError(
                    f"No preset cropping information found for mouse {mouse_n}."
                )

    # collect ROI mask data
    sess_dicts = gen_util.parallel_wrap(get_sess_reg_mask_info,
                                        sessions,
                                        args_list=[analyspar, True, proj],
                                        parallel=parallel)
    all_sessids = [sess.sessid for sess in sessions]

    group_columns = ["planes", "lines", "mouse_ns"]
    initial_columns = sess_df.columns.tolist()
    obj_columns = ["registered_roi_mask_idxs", "roi_mask_shapes"]
    if not reg_only:
        obj_columns.append("roi_mask_idxs")
    if proj:
        obj_columns.append("registered_max_projections")
        if not reg_only:
            obj_columns.append("max_projections")

    roi_mask_df = pd.DataFrame(columns=initial_columns + obj_columns)

    aggreg_cols = [col for col in initial_columns if col not in group_columns]
    for grp_vals, grp_df in sess_df.groupby(group_columns):
        row_idx = len(roi_mask_df)
        for g, group_column in enumerate(group_columns):
            roi_mask_df.loc[row_idx, group_column] = grp_vals[g]

        # add aggregated values for initial columns
        roi_mask_df = misc_analys.aggreg_columns(grp_df,
                                                 roi_mask_df,
                                                 aggreg_cols,
                                                 row_idx=row_idx,
                                                 in_place=True,
                                                 by_mouse=True)

        sessids = sorted(grp_df["sessids"].tolist())
        reg_roi_masks, roi_mask_idxs = [], []
        if proj:
            reg_max_projs, max_projs = [], []

        roi_mask_shape = None
        for sessid in sessids:
            sess_dict = sess_dicts[all_sessids.index(sessid)]
            reg_roi_mask = sess_dict["registered_roi_masks"]
            # flatten masks across ROIs
            reg_roi_masks.append(np.max(reg_roi_mask, axis=0))
            if roi_mask_shape is None:
                roi_mask_shape = reg_roi_mask.shape
            elif roi_mask_shape != reg_roi_mask.shape:
                raise RuntimeError(
                    "ROI mask shapes across sessions should match, for the "
                    "same mouse.")
            if not reg_only:
                roi_mask_idxs.append([
                    idxs.tolist() for idxs in np.where(sess_dict["roi_masks"])
                ])
            if proj:
                reg_max_projs.append(
                    sess_dict["registered_max_projection"].tolist())
                if not reg_only:
                    max_projs.append(sess_dict["max_projection"].tolist())

        # add to the dataframe
        roi_mask_df.at[row_idx, "registered_roi_mask_idxs"] = \
            [idxs.tolist() for idxs in np.where(reg_roi_masks)]
        roi_mask_df.at[row_idx, "roi_mask_shapes"] = roi_mask_shape

        if not reg_only:
            roi_mask_df.at[row_idx, "roi_mask_idxs"] = roi_mask_idxs
        if proj:
            roi_mask_df.at[row_idx, "registered_max_projections"] = \
                reg_max_projs
            if not reg_only:
                roi_mask_df.at[row_idx, "max_projections"] = max_projs

        # add cropping info
        if crop_info:
            mouse_n = grp_vals[group_columns.index("mouse_ns")]
            crop_fact, shift_prop_hei, shift_prop_wid = crop_dict[mouse_n]
            roi_mask_df.at[row_idx, "crop_fact"] = crop_fact
            roi_mask_df.at[row_idx, "shift_prop_hei"] = shift_prop_hei
            roi_mask_df.at[row_idx, "shift_prop_wid"] = shift_prop_wid

    roi_mask_df["mouse_ns"] = roi_mask_df["mouse_ns"].astype(int)

    return roi_mask_df