示例#1
0
def log_sklearn_results(model, analyspar, name="bayes_ridge", var_names=None):
    """
    log_sklearn_results(model, analyspar)
    """

    logger.info(f"{name.replace('_', ' ').upper()} regression",
                extra={"spacing": "\n"})
    if var_names is None:
        var_names = [f"coef {i}" for i in range(len(model.coef_))]

    results = "\n".join(
        [f"{varn}: {coef:.5f}" for varn, coef in zip(var_names, model.coef_)])
    logger.info(results)
    logger.info(f"intercept: {model.intercept_:.5f}")
    logger.info(f"alpha: {model.alpha_:.5f}", extra={"spacing": "\n"})
    if name == "ridge_cv":
        alpha_idx = np.where(model.alphas == model.alpha_)[0]
        score_data = model.cv_values_[:, alpha_idx]
        score_name = "MSE"
    elif name == "bayes_ridge":
        score_data = model.scores_
        score_name = "Score"
    else:
        gen_util.accepted_values_error("name", name,
                                       ["ridge_cv", "bayes_ridge"])
    stats = math_util.get_stats(score_data,
                                stats=analyspar.stats,
                                error=analyspar.error)
    math_util.log_stats(stats, f"{score_name}")
示例#2
0
def grp_traces_by_qu_unexp_sess(trace_data, analyspar, roigrppar,
                                all_roi_grps):
    """
    grp_traces_by_qu_unexp_sess(trace_data, analyspar, roigrppar, all_roi_grps)
                               
    Required args:
        - trace_data (list)    : list of 4D array of mean/medians traces 
                                 for each session, structured as:
                                    unexp x quantiles x ROIs x frames
        - analyspar (AnalysPar): named tuple containing analysis parameters
        - roigrppar (RoiGrpPar): named tuple containing roi grouping parameters
        - all_roi_grps (list)  : list of sublists per session, each containing
                                 sublists per roi grp with ROI numbers included 
                                 in the group: session x roi_grp

    Returns:
        - grp_stats (list): nested list of statistics for ROI groups 
                            structured as:
                                sess x qu x ROI grp x stats x frame
    """

    # calculate diff/ratio or retrieve exp/unexp
    op = roigrppar.op
    if roigrppar.plot_vals in ["exp", "unexp"]:
        op = ["exp", "unexp"].index(roigrppar.plot_vals)
    data_me = [math_util.calc_op(sess_me, op, dim=0) for sess_me in trace_data]

    n_sesses = len(data_me)
    n_quants = data_me[0].shape[0]
    n_stats = 2 + (analyspar.stats == "median" and analyspar.error == "std")

    n_frames = [me.shape[2] for me in data_me]

    # sess x quantile (first/last) x ROI grp
    empties = [np.empty([n_stats, n_fr]) * np.nan for n_fr in n_frames]
    grp_stats = [[[] for _ in range(n_quants)] for _ in range(n_sesses)]
    for i, sess in enumerate(data_me):
        for q, quant in enumerate(sess):
            for g, grp_rois in enumerate(all_roi_grps[i]):
                # leave NaNs if no ROIs in group
                if len(grp_rois) != 0:
                    grp_st = math_util.get_stats(quant[grp_rois],
                                                 analyspar.stats,
                                                 analyspar.error,
                                                 axes=0)
                else:
                    grp_st = empties[i]
                grp_stats[i][q].append(grp_st.tolist())

    return grp_stats
示例#3
0
def compile_dict_fold_stats(dict_list, analyspar):
    """
    compile_dict_fold_stats(dict_list, analyspar)
    """

    full_dict = dict()
    all_keys = dict_list[0].keys()
    for key in all_keys:
        fold_vals = np.asarray([sub_dict[key] for sub_dict in dict_list])
        me, de = math_util.get_stats(fold_vals,
                                     stats=analyspar.stats,
                                     error=analyspar.error)
        full_dict[key] = [me, de]
    return full_dict
示例#4
0
def run_explained_variance(x_df, y_df, analyspar, k=10, log_roi_n=True):
    """
    run_explained_variance(x_df, y_df, analyspar)

    Consider splitting 80:20 for a test set?
    """

    x_df = copy.deepcopy(x_df)
    y_df_cols = y_df.columns

    if len(y_df_cols) > 1:  # multiple ROIs (to scale and average)
        all_rois = True
        if "session_id" in x_df.columns:
            sessids = x_df["session_id"].tolist()
        else:
            sessids = [1] * len(x_df)
        logger.info("Calculating explained variance for all ROIs together...")
    else:
        all_rois = False
        if log_roi_n:
            roi_n = int(y_df_cols[0].replace('roi_data_', ''))
            logger.info(f"Calculating explained variance for ROI {roi_n}...")

    kf = model_selection.KFold(k, shuffle=True, random_state=None)

    full, coef_all, coef_uni = [], [], []
    for tr_idx, test_idx in kf.split(x_df):
        if all_rois:
            y_df = scale_across_rois(y_df, tr_idx, sessids, analyspar.stats)
        # one model per category
        full.append(fit_expl_var(x_df, y_df, tr_idx, test_idx))
        coef_all.append(fit_expl_var_per_coeff(x_df, y_df, tr_idx, test_idx))
        coef_uni.append(
            fit_unique_expl_var_per_coeff(x_df, y_df, tr_idx, test_idx,
                                          full[-1]))

    full = math_util.get_stats(np.asarray(full),
                               stats=analyspar.stats,
                               error=analyspar.error).tolist()

    coef_all = compile_dict_fold_stats(coef_all, analyspar)
    coef_uni = compile_dict_fold_stats(coef_uni, analyspar)

    return full, coef_all, coef_uni
示例#5
0
def run_full_traces(sessions,
                    analysis,
                    analyspar,
                    sesspar,
                    figpar,
                    datatype="roi"):
    """
    run_full_traces(sessions, analysis, analyspar, sesspar, figpar)

    Plots full traces across an entire session. If ROI traces are plotted,
    each ROI is scaled and plotted separately and an average is plotted.
    
    Saves results and parameters relevant to analysis in a dictionary.

    Required args:
        - sessions (list)      : list of Session objects
        - analysis (str)       : analysis type (e.g., "f")
        - analyspar (AnalysPar): named tuple containing analysis parameters
        - sesspar (SessPar)    : named tuple containing session parameters
        - figpar (dict)        : dictionary containing figure parameters
    
    Optional args:
        - datatype (str): type of data (e.g., "roi", "run")
    """

    dendstr_pr = sess_str_util.dend_par_str(analyspar.dend, sesspar.plane,
                                            datatype, "print")

    sessstr_pr = (f"session: {sesspar.sess_n}, "
                  f"plane: {sesspar.plane}{dendstr_pr}")

    datastr = sess_str_util.datatype_par_str(datatype)

    logger.info(
        f"Plotting {datastr} traces across an entire "
        f"session\n({sessstr_pr}).",
        extra={"spacing": "\n"})

    figpar = copy.deepcopy(figpar)
    if figpar["save"]["use_dt"] is None:
        figpar["save"]["use_dt"] = gen_util.create_time_str()

    all_tr, roi_tr, all_edges, all_pars = [], [], [], []
    for sess in sessions:
        # get the block edges and parameters
        edge_fr, par_descrs = [], []
        for stim in sess.stims:
            stim_str = stim.stimtype
            if stim.stimtype == "visflow":
                stim_str = "vis. flow"
            if datatype == "roi":
                fr_type = "twop"
            elif datatype == "run":
                fr_type = "stim"
            else:
                gen_util.accepted_values_error("datatype", datatype,
                                               ["roi", "run"])
            for b in stim.block_params.index:
                row = stim.block_params.loc[b]
                edge_fr.append([
                    int(row[f"start_frame_{fr_type}"]),
                    int(row[f"stop_frame_{fr_type}"])
                ])
                par_vals = [row[param] for param in stim.stim_params]
                pars_str = "\n".join([str(par) for par in par_vals][0:2])

                par_descrs.append(
                    sess_str_util.pars_to_descr(
                        f"{stim_str.capitalize()}\n{pars_str}"))

        if datatype == "roi":
            if sess.only_tracked_rois != analyspar.tracked:
                raise RuntimeError(
                    "sess.only_tracked_rois should match analyspar.tracked.")
            nanpol = None
            if not analyspar.rem_bad:
                nanpol = "omit"
            all_rois = gen_util.reshape_df_data(sess.get_roi_traces(
                None, analyspar.fluor, analyspar.rem_bad,
                analyspar.scale)["roi_traces"],
                                                squeeze_cols=True)
            full_tr = math_util.get_stats(all_rois,
                                          analyspar.stats,
                                          analyspar.error,
                                          axes=0,
                                          nanpol=nanpol).tolist()
            roi_tr.append(all_rois.tolist())
        elif datatype == "run":
            full_tr = sess.get_run_velocity(
                rem_bad=analyspar.rem_bad,
                scale=analyspar.scale).to_numpy().squeeze().tolist()
            roi_tr = None
        all_tr.append(full_tr)
        all_edges.append(edge_fr)
        all_pars.append(par_descrs)

    extrapar = {
        "analysis": analysis,
        "datatype": datatype,
    }

    trace_info = {
        "all_tr": all_tr,
        "all_edges": all_edges,
        "all_pars": all_pars
    }

    sess_info = sess_gen_util.get_sess_info(sessions,
                                            analyspar.fluor,
                                            incl_roi=(datatype == "roi"),
                                            rem_bad=analyspar.rem_bad)

    info = {
        "analyspar": analyspar._asdict(),
        "sesspar": sesspar._asdict(),
        "extrapar": extrapar,
        "sess_info": sess_info,
        "trace_info": trace_info
    }

    fulldir, savename = gen_plots.plot_full_traces(roi_tr=roi_tr,
                                                   figpar=figpar,
                                                   **info)
    file_util.saveinfo(info, savename, fulldir, "json")
def get_rel_resp_stats_df(sessions,
                          analyspar,
                          stimpar,
                          permpar,
                          rel_sess=1,
                          randst=None,
                          parallel=False):
    """
    get_rel_resp_stats_df(sessions, analyspar, stimpar, permpar)

    Returns relative response stats dataframe for requested sessions.

    Required args:
        - sessions (list): 
            session objects
        - analyspar (AnalysPar): 
            named tuple containing analysis parameters
        - stimpar (StimPar): 
            named tuple containing stimulus parameters
        - permpar (PermPar): 
            named tuple containing permutation parameters

    Optional args:
        - rel_sess (int):
            number of session relative to which data should be scaled, for each 
            mouse
            default: 1
        - randst (int or np.random.RandomState): 
            random state or seed value to use. (-1 treated as None)
            default: None
        - parallel (bool): 
            if True, some of the analysis is run in parallel across CPU cores 
            default: False

    Returns:
            dataframe with one row per session/line/plane, and the following 
            columns, in addition to the basic sess_df columns: 
            - rel_reg or rel_exp (list): data stats for regular data (me, err)
            - rel_unexp (list): data stats for unexpected data (me, err)
            for reg/exp/unexp data types, session comparisons, e.g. 1v2:
            - {data_type}_raw_p_vals_{}v{} (float): uncorrected p-value for 
                data differences between sessions 
            - {data_type}_p_vals_{}v{} (float): p-value for data between 
                sessions, corrected for multiple comparisons and tails
    """

    nanpol = None if analyspar.rem_bad else "omit"

    initial_columns = misc_analys.get_sess_df_columns(sessions[0], analyspar)

    resp_data_df = get_resp_df(sessions,
                               analyspar,
                               stimpar,
                               rel_sess=rel_sess,
                               parallel=parallel)

    # prepare target dataframe
    source_cols = ["rel_exp", "rel_unexp"]
    if stimpar.stimtype == "gabors":
        # regular means only A, B, C are included
        targ_cols = ["rel_reg", "rel_unexp"]
    else:
        targ_cols = ["rel_exp", "rel_unexp"]
    rel_resp_data_df = pd.DataFrame(columns=initial_columns + targ_cols)

    group_columns = ["lines", "planes"]
    aggreg_cols = [
        col for col in initial_columns
        if col not in group_columns + ["sess_ns"]
    ]
    for grp_vals, resp_grp_df in resp_data_df.groupby(group_columns):
        sess_ns = sorted(resp_grp_df["sess_ns"].unique())

        # take stats across frame types
        for e, (data_col, source_col) in enumerate(zip(targ_cols,
                                                       source_cols)):
            sess_data = []
            if e == 0:
                row_indices = []
            for s, sess_n in enumerate(sess_ns):
                sess_grp_df = resp_grp_df.loc[resp_grp_df["sess_ns"] == sess_n]
                sess_grp_df = sess_grp_df.sort_values("mouse_ns")
                if e == 0:
                    row_idx = len(rel_resp_data_df)
                    row_indices.append(row_idx)
                    rel_resp_data_df.loc[row_idx, "sess_ns"] = sess_n
                    for g, group_column in enumerate(group_columns):
                        rel_resp_data_df.loc[row_idx,
                                             group_column] = grp_vals[g]

                    # add aggregated values for initial columns
                    rel_resp_data_df = misc_analys.aggreg_columns(
                        sess_grp_df,
                        rel_resp_data_df,
                        aggreg_cols,
                        row_idx=row_idx,
                        in_place=True)
                else:
                    row_idx = row_indices[s]

                if stimpar.stimtype == "gabors":
                    # average across Gabor frames included in reg or unexp data
                    cols = [f"{source_col}_{fr}" for fr in stimpar.gabfr[e]]
                    data = sess_grp_df[cols].values.tolist()
                    # sess x frs x ROIs -> sess x ROIs
                    data = [
                        math_util.mean_med(sub,
                                           stats=analyspar.stats,
                                           axis=0,
                                           nanpol=nanpol) for sub in data
                    ]
                else:
                    # sess x ROIs
                    data = sess_grp_df[source_col].tolist()

                data = np.concatenate(data, axis=0)

                # take stats across ROIs, grouped
                rel_resp_data_df.at[row_idx, data_col] = \
                    math_util.get_stats(
                        data,
                        stats=analyspar.stats,
                        error=analyspar.error,
                        nanpol=nanpol
                        ).tolist()

                sess_data.append(data)  # for p-value calculation

            # calculate p-values between sessions (0-1, 0-2, 1-2...)
            p_vals = rand_util.comp_vals_acr_groups(sess_data,
                                                    n_perms=permpar.n_perms,
                                                    stats=analyspar.stats,
                                                    paired=analyspar.tracked,
                                                    nanpol=nanpol,
                                                    randst=randst)
            p = 0
            for i, sess_n in enumerate(sess_ns):
                for j, sess_n2 in enumerate(sess_ns[i + 1:]):
                    key = f"{data_col}_p_vals_{int(sess_n)}v{int(sess_n2)}"
                    rel_resp_data_df.loc[row_indices[i], key] = p_vals[p]
                    rel_resp_data_df.loc[row_indices[j + 1], key] = p_vals[p]
                    p += 1

    rel_resp_data_df["sess_ns"] = rel_resp_data_df["sess_ns"].astype(int)

    # corrected p-values
    rel_resp_data_df = misc_analys.add_corr_p_vals(rel_resp_data_df, permpar)

    return rel_resp_data_df
def get_sess_integ_resp_dict(sess, analyspar, stimpar):
    """
    get_sess_integ_resp_dict(sess, analyspar, stimpar)

    Returns dictionary with integrated ROI response stats for a session.

    Required args:
        - sess (Session):
            Session object
        - analyspar (AnalysPar): 
            named tuple containing analysis parameters
        - stimpar (StimPar): 
            named tuple containing stimulus parameters
    
    Returns:
        - data_dict (dict):
            data dictionary with response stats (2D array, ROI x stats) under 
            keys for expected ("exp") and unexpected ("unexp") data, separated 
            by Gabor frame (e.g., "exp_3", "unexp_G") 
            if stimpar.stimtype == "gabors".
    """

    nanpol = None if analyspar.rem_bad else "omit"

    # a few checks
    if stimpar.stimtype == "gabors":
        gabfrs = [[0, 1, 2], [3, 4]]
        if stimpar.gabfr != gabfrs:
            raise ValueError(f"Expected stimpar.gabfrs to be {gabfrs}")
        if stimpar.pre != 0 or stimpar.post != 0.3:
            raise ValueError(
                f"Expected stimpar.pre and post to be 0 and 0.3, respectively."
            )
    elif stimpar.stimtype == "visflow":
        if stimpar.pre != 0 or stimpar.post != 1:
            raise ValueError(
                f"Expected stimpar.pre and post to be 0 and 1, respectively.")
    else:
        gen_util.accepted_values_error("stimpar.stimtype", stimpar.stimtype,
                                       ["gabors", "visflow"])

    if analyspar.scale:
        raise ValueError("analyspar.scale should be set to False.")

    # collect data
    stim = sess.get_stim(stimpar.stimtype)
    data_dict = {}

    # retrieve integrated sequences for each frame, and return as dictionary
    for e, unexp in enumerate(["exp", "unexp"]):
        if stimpar.stimtype == "gabors":
            cycle_gabfr = gabfrs[e]
            if e == 0:
                cycle_gabfr = cycle_gabfr + gabfrs[
                    1]  # collect expected for all
        else:
            cycle_gabfr = [""]  # dummy variable

        for g, gabfr in enumerate(cycle_gabfr):
            if stimpar.stimtype == "gabors":
                data_key = f"{unexp}_{gabfr}"
            else:
                data_key = unexp

            refs = stim.get_segs_by_criteria(gabfr=gabfr,
                                             gabk=stimpar.gabk,
                                             gab_ori=stimpar.gab_ori,
                                             visflow_dir=stimpar.visflow_dir,
                                             visflow_size=stimpar.visflow_size,
                                             unexp=e,
                                             remconsec=False,
                                             by="seg")

            # ROI x seq
            data, _ = basic_analys.get_data(stim,
                                            refs,
                                            analyspar,
                                            pre=stimpar.pre,
                                            post=stimpar.post,
                                            integ=True,
                                            ref_type="segs")

            # take stats across sequences
            data_dict[data_key] = math_util.get_stats(data,
                                                      stats=analyspar.stats,
                                                      error=analyspar.error,
                                                      axes=1,
                                                      nanpol=nanpol).T

    return data_dict
def get_sess_grped_diffs_df(sessions,
                            analyspar,
                            stimpar,
                            basepar,
                            permpar,
                            split="by_exp",
                            randst=None,
                            parallel=False):
    """
    get_sess_grped_diffs_df(sessions, analyspar, stimpar, basepar)

    Returns split difference statistics for specific sessions, grouped across 
    mice.

    Required args:
        - sessions (list): 
            session objects
        - analyspar (AnalysPar): 
            named tuple containing analysis parameters
        - stimpar (StimPar): 
            named tuple containing stimulus parameters
        - basepar (BasePar): 
            named tuple containing baseline parameters
        - permpar (PermPar): 
            named tuple containing permutation parameters

    Optional args:
        - split (str): 
            how to split data:
            "by_exp" (all exp, all unexp), 
            "unexp_lock" (unexp, preceeding exp), 
            "exp_lock" (exp, preceeding unexp),
            "stim_onset" (grayscr, stim on), 
            "stim_offset" (stim off, grayscr)
            default: "by_exp"
        - randst (int or np.random.RandomState): 
            random state or seed value to use. (-1 treated as None)
            default: None
        - parallel (bool): 
            if True, some of the analysis is run in parallel across CPU cores 
            default: False

    Returns:
        - diffs_df (pd.DataFrame):
            dataframe with one row per session/line/plane, and the following 
            columns, in addition to the basic sess_df columns: 
            - diff_stats (list): split difference stats (me, err)
            - null_CIs (list): adjusted null CI for split differences 
            - raw_p_vals (float): uncorrected p-value for differences within 
                sessions
            - p_vals (float): p-value for differences within sessions, 
                corrected for multiple comparisons and tails
            for session comparisons, e.g. 1v2:
            - raw_p_vals_{}v{} (float): uncorrected p-value for differences
                between sessions 
            - p_vals_{}v{} (float): p-value for differences between sessions, 
                corrected for multiple comparisons and tails
    """

    nanpol = None if analyspar.rem_bad else "omit"

    if analyspar.tracked:
        misc_analys.check_sessions_complete(sessions, raise_err=True)

    sess_diffs_df = misc_analys.get_check_sess_df(sessions, None, analyspar)
    initial_columns = sess_diffs_df.columns.tolist()

    # retrieve ROI index information
    args_dict = {
        "analyspar": analyspar,
        "stimpar": stimpar,
        "basepar": basepar,
        "split": split,
        "return_data": True,
    }

    # sess x split x ROI
    split_stats, split_data = gen_util.parallel_wrap(get_sess_roi_split_stats,
                                                     sessions,
                                                     args_dict=args_dict,
                                                     parallel=parallel,
                                                     zip_output=True)

    misc_analys.get_check_sess_df(sessions, sess_diffs_df)
    sess_diffs_df["roi_split_stats"] = list(split_stats)
    sess_diffs_df["roi_split_data"] = list(split_data)

    columns = initial_columns + ["diff_stats", "null_CIs"]
    diffs_df = pd.DataFrame(columns=columns)

    group_columns = ["lines", "planes", "sess_ns"]
    aggreg_cols = [col for col in initial_columns if col not in group_columns]
    for lp_grp_vals, lp_grp_df in sess_diffs_df.groupby(["lines", "planes"]):
        lp_grp_df = lp_grp_df.sort_values(["sess_ns", "mouse_ns"])
        line, plane = lp_grp_vals
        lp_name = plot_helper_fcts.get_line_plane_name(line, plane)
        logger.info(f"Running permutation tests for {lp_name} sessions...",
                    extra={"spacing": TAB})

        # obtain ROI random split differences per session
        # done here to avoid OOM errors
        lp_rand_diffs = gen_util.parallel_wrap(
            get_rand_split_data,
            lp_grp_df["roi_split_data"].tolist(),
            args_list=[analyspar, permpar, randst],
            parallel=parallel,
            zip_output=False)

        sess_diffs = []
        row_indices = []
        sess_ns = sorted(lp_grp_df["sess_ns"].unique())
        for sess_n in sess_ns:
            row_idx = len(diffs_df)
            row_indices.append(row_idx)
            sess_grp_df = lp_grp_df.loc[lp_grp_df["sess_ns"] == sess_n]

            grp_vals = list(lp_grp_vals) + [sess_n]
            for g, group_column in enumerate(group_columns):
                diffs_df.loc[row_idx, group_column] = grp_vals[g]

            # add aggregated values for initial columns
            diffs_df = misc_analys.aggreg_columns(sess_grp_df,
                                                  diffs_df,
                                                  aggreg_cols,
                                                  row_idx=row_idx,
                                                  in_place=True)

            # group ROI split stats across mice: split x ROIs
            split_stats = np.concatenate(
                sess_grp_df["roi_split_stats"].to_numpy(), axis=-1)

            # take diff and stats across ROIs
            diffs = split_stats[1] - split_stats[0]
            diff_stats = math_util.get_stats(diffs,
                                             stats=analyspar.stats,
                                             error=analyspar.error,
                                             nanpol=nanpol)
            diffs_df.at[row_idx, "diff_stats"] = diff_stats.tolist()
            sess_diffs.append(diffs)

            # group random ROI split diffs across mice, and take stat
            rand_idxs = [
                lp_grp_df.index.tolist().index(idx)
                for idx in sess_grp_df.index
            ]
            rand_diffs = math_util.mean_med(np.concatenate(
                [lp_rand_diffs[r] for r in rand_idxs], axis=0),
                                            axis=0,
                                            stats=analyspar.stats,
                                            nanpol=nanpol)

            # get CIs and p-values
            p_val, null_CI = rand_util.get_p_val_from_rand(
                diff_stats[0],
                rand_diffs,
                return_CIs=True,
                p_thresh=permpar.p_val,
                tails=permpar.tails,
                multcomp=permpar.multcomp,
                nanpol=nanpol)
            diffs_df.loc[row_idx, "p_vals"] = p_val
            diffs_df.at[row_idx, "null_CIs"] = null_CI

        del lp_rand_diffs  # free up memory

        # calculate p-values between sessions (0-1, 0-2, 1-2...)
        p_vals = rand_util.comp_vals_acr_groups(sess_diffs,
                                                n_perms=permpar.n_perms,
                                                stats=analyspar.stats,
                                                paired=analyspar.tracked,
                                                nanpol=nanpol,
                                                randst=randst)
        p = 0
        for i, sess_n in enumerate(sess_ns):
            for j, sess_n2 in enumerate(sess_ns[i + 1:]):
                key = f"p_vals_{int(sess_n)}v{int(sess_n2)}"
                diffs_df.loc[row_indices[i], key] = p_vals[p]
                diffs_df.loc[row_indices[j + 1], key] = p_vals[p]
                p += 1

    # add corrected p-values
    diffs_df = misc_analys.add_corr_p_vals(diffs_df, permpar)

    diffs_df["sess_ns"] = diffs_df["sess_ns"].astype(int)

    return diffs_df
def get_sess_roi_split_stats(sess,
                             analyspar,
                             stimpar,
                             basepar,
                             split="by_exp",
                             return_data=False):
    """
    get_sess_roi_split_stats(sess, analyspar, stimpar, basepar)

    Returns ROI split stats for a specific session (integrated data).

    Required args:
        - sess (Session):
            Session object
        - analyspar (AnalysPar): 
            named tuple containing analysis parameters
        - stimpar (StimPar): 
            named tuple containing stimulus parameters
        - basepar (BasePar): 
            named tuple containing baseline parameters

    Optional args:
        - split (str): 
            how to split data:
            "by_exp" (all exp, all unexp), 
            "unexp_lock" (unexp, preceeding exp), 
            "exp_lock" (exp, preceeding unexp),
            "stim_onset" (grayscr, stim on), 
            "stim_offset" (stim off, grayscr)
            default: "by_exp"
        - return_data (bool):
            if True, split_data is returned in addition to split_stats

    Returns:
        - split_stats (2D array): 
            integrated ROI traces by split
            dims: split x ROIs
        if return_data:
        - split_data (nested list): 
            list of data arrays
            dims: split x ROIs x seq
    """

    nanpol = None if analyspar.rem_bad else "omit"

    split_data, _ = basic_analys.get_split_data_by_sess(
        sess,
        analyspar,
        stimpar,
        split=split,
        baseline=basepar.baseline,
        integ=True)

    split_stats = []
    # split x ROI
    for data in split_data:
        split_stats.append(
            math_util.get_stats(data,
                                stats=analyspar.stats,
                                error=analyspar.error,
                                axes=1,
                                nanpol=nanpol)[0])

    split_stats = np.asarray(split_stats)

    if return_data:
        return split_stats, split_data
    else:
        return split_stats
def get_sess_grped_trace_df(sessions,
                            analyspar,
                            stimpar,
                            basepar,
                            split="by_exp",
                            parallel=False):
    """
    get_sess_grped_trace_df(sessions, analyspar, stimpar, basepar)

    Returns ROI trace statistics for specific sessions, split as requested, 
    and grouped across mice.

    Required args:
        - sessions (list): 
            session objects
        - analyspar (AnalysPar): 
            named tuple containing analysis parameters
        - stimpar (StimPar): 
            named tuple containing stimulus parameters
        - basepar (BasePar): 
            named tuple containing baseline parameters

    Optional args:
        - split (str): 
            how to split data:
            "by_exp" (all exp, all unexp), 
            "unexp_lock" (unexp, preceeding exp), 
            "exp_lock" (exp, preceeding unexp),
            "stim_onset" (grayscr, stim on), 
            "stim_offset" (stim off, grayscr)
            default: "by_exp"
        - parallel (bool): 
            if True, some of the analysis is run in parallel across CPU cores 
            default: False

    Returns:
        - trace_df (pd.DataFrame):
            dataframe with one row per session/line/plane, and the following 
            columns, in addition to the basic sess_df columns: 
            - trace_stats (list): 
                trace stats (split x frames x stat (me, err))
            - time_values (list):
                values for each frame, in seconds
              (only 0 to stimpar.post, unless split is "by_exp")
    """

    nanpol = None if analyspar.rem_bad else "omit"

    trace_df = get_sess_roi_trace_df(sessions,
                                     analyspar=analyspar,
                                     stimpar=stimpar,
                                     basepar=basepar,
                                     split=split,
                                     parallel=parallel)

    columns = trace_df.columns.tolist()
    columns[columns.index("roi_trace_stats")] = "trace_stats"
    grped_trace_df = pd.DataFrame(columns=columns)

    group_columns = ["lines", "planes", "sess_ns"]
    for grp_vals, trace_grp_df in trace_df.groupby(group_columns):
        trace_grp_df = trace_grp_df.sort_values("mouse_ns")
        row_idx = len(grped_trace_df)
        for g, group_column in enumerate(group_columns):
            grped_trace_df.loc[row_idx, group_column] = grp_vals[g]

        for column in columns:
            if column not in group_columns + ["trace_stats", "time_values"]:
                values = trace_grp_df[column].tolist()
                grped_trace_df.at[row_idx, column] = values

        # group ROIs across mice
        n_fr = np.min(
            [len(time_values) for time_values in trace_grp_df["time_values"]])

        if split == "by_exp":
            time_values = np.linspace(-stimpar.pre, stimpar.post, n_fr)
        else:
            time_values = np.linspace(0, stimpar.post, n_fr)

        all_roi_stats = np.concatenate([
            np.asarray(roi_stats)[..., :n_fr, 0]
            for roi_stats in trace_grp_df["roi_trace_stats"]
        ],
                                       axis=1)

        # take stats across ROIs
        trace_stats = np.transpose(
            math_util.get_stats(all_roi_stats,
                                stats=analyspar.stats,
                                error=analyspar.error,
                                axes=1,
                                nanpol=nanpol), [1, 2, 0])

        grped_trace_df.loc[row_idx, "trace_stats"] = trace_stats.tolist()
        grped_trace_df.loc[row_idx, "time_values"] = time_values.tolist()

    grped_trace_df["sess_ns"] = grped_trace_df["sess_ns"].astype(int)

    return grped_trace_df
示例#11
0
def grp_stats(integ_stats,
              grps,
              plot_vals="both",
              op="diff",
              stats="mean",
              error="std",
              scale=False):
    """
    grp_stats(integ_stats, grps)

    Calculate statistics (e.g. mean + sem) across quantiles for each group 
    and session.

    Required args:
        - integ_stats (list): list of 3D arrays of mean/medians of integrated
                              sequences, for each session structured as:
                                 unexp if by_exp x
                                 quantiles x
                                 ROIs if byroi
        - grps (list)       : list of sublists per session, each containing
                              sublists per roi grp with ROI numbers included in 
                              the group: session x roi_grp
    Optional args:
        - plot_vals (str): which values to return ("unexp", "exp" or "both")
                           default: "both"
        - op (str)       : operation to use to compare groups, if plot_vals
                           is "both"
                           i.e. "diff": grp1-grp2, or "ratio": grp1/grp2
                           default: "diff"
        - stats (str)    : statistic parameter, i.e. "mean" or "median"
                           default: "mean"
        - error (str)    : error statistic parameter, i.e. "std" or "sem"
                           default: "std"
        - scale (bool)   : if True, data is scaled using first quantile
    Returns:
        - all_grp_st (4D array): array of group stats (mean/median, error) 
                                 structured as:
                                  session x quantile x grp x stat 
        - all_ns (2D array)    : array of group ns, structured as:
                                  session x grp
    """

    n_sesses = len(integ_stats)
    n_quants = integ_stats[0].shape[1]
    n_stats = 2 + (stats == "median" and error == "std")
    n_grps = len(grps[0])

    all_grp_st = np.empty([n_sesses, n_quants, n_grps, n_stats])
    all_ns = np.empty([n_sesses, n_grps], dtype=int)

    for i, [sess_data, sess_grps] in enumerate(zip(integ_stats, grps)):
        # calculate diff/ratio or retrieve exp/unexp
        if plot_vals in ["exp", "unexp"]:
            op = ["exp", "unexp"].index(plot_vals)
        sess_data = math_util.calc_op(sess_data, op, dim=0)
        for g, grp in enumerate(sess_grps):
            all_ns[i, g] = len(grp)
            all_grp_st[i, :, g, :] = np.nan
            if len(grp) != 0:
                grp_data = sess_data[:, grp]
                if scale:
                    grp_data, _ = math_util.scale_data(grp_data,
                                                       axis=0,
                                                       pos=0,
                                                       sc_type="unit")
                all_grp_st[i, :, g] = math_util.get_stats(grp_data,
                                                          stats,
                                                          error,
                                                          axes=1).T

    return all_grp_st, all_ns
示例#12
0
def get_pupil_run_trace_stats_df(sessions,
                                 analyspar,
                                 stimpar,
                                 basepar,
                                 split="by_exp",
                                 parallel=False):
    """
    get_pupil_run_trace_stats_df(sessions, analyspar, stimpar, basepar)

    Returns pupil and running trace statistics for specific sessions, grouped 
    across mice, split as requested.

    Required args:
        - sessions (list): 
            session objects
        - analyspar (AnalysPar): 
            named tuple containing analysis parameters
        - stimpar (StimPar): 
            named tuple containing stimulus parameters
        - basepar (BasePar): 
            named tuple containing baseline parameters

    Optional args:
        - split (str): 
            how to split data:
            "by_exp" (all exp, all unexp), 
            "unexp_lock" (unexp, preceeding exp), 
            "exp_lock" (exp, preceeding unexp),
            "stim_onset" (grayscr, stim on), 
            "stim_offset" (stim off, grayscr)
            default: "by_exp"
        - parallel (bool): 
            if True, some of the analysis is run in parallel across CPU cores 
            default: False

    Returns:
        - trace_df (pd.DataFrame):
            dataframe with one row per session number, and the following 
            columns, in addition to the basic sess_df columns: 
            - run_trace_stats (list): 
                running velocity trace stats (split x frames x stats (me, err))
            - run_time_values (list):
                values for each frame, in seconds
                (only 0 to stimpar.post, unless split is "by_exp")
            - pupil_trace_stats (list): 
                pupil diameter trace stats (split x frames x stats (me, err))
            - pupil_time_values (list):
                values for each frame, in seconds
                (only 0 to stimpar.post, unless split is "by_exp")    
    """

    nanpol = None if analyspar.rem_bad else "omit"

    all_trace_df = get_pupil_run_trace_df(sessions,
                                          analyspar=analyspar,
                                          stimpar=stimpar,
                                          basepar=basepar,
                                          split=split,
                                          parallel=parallel)

    datatypes = ["pupil", "run"]

    columns = all_trace_df.columns.tolist()
    for datatype in datatypes:
        columns[columns.index(
            f"{datatype}_traces")] = f"{datatype}_trace_stats"
    trace_df = pd.DataFrame(columns=columns)

    group_columns = ["sess_ns"]
    for grp_vals, trace_grp_df in all_trace_df.groupby(group_columns):
        trace_grp_df = trace_grp_df.sort_values(
            ["lines", "planes", "mouse_ns"])
        row_idx = len(trace_df)
        grp_vals = [grp_vals]
        for g, group_column in enumerate(group_columns):
            trace_df.loc[row_idx, group_column] = grp_vals[g]

        for column in columns:
            skip = np.max([datatype in column for datatype in datatypes])
            if column not in group_columns and not skip:
                values = trace_grp_df[column].tolist()
                trace_df.at[row_idx, column] = values

        for datatype in datatypes:
            # group sequences across mice
            n_fr = np.min([
                len(time_values)
                for time_values in trace_grp_df[f"{datatype}_time_values"]
            ])

            if split == "by_exp":
                time_values = np.linspace(-stimpar.pre, stimpar.post, n_fr)
            else:
                time_values = np.linspace(0, stimpar.post, n_fr)

            all_split_stats = []
            for s in range(2):
                split_data = np.concatenate([
                    np.asarray(traces[s])[:, :n_fr]
                    for traces in trace_grp_df[f"{datatype}_traces"]
                ],
                                            axis=0)
                # take stats across sequences
                trace_stats = math_util.get_stats(split_data,
                                                  stats=analyspar.stats,
                                                  error=analyspar.error,
                                                  axes=0,
                                                  nanpol=nanpol).T
                all_split_stats.append(trace_stats)
            all_split_stats = np.asarray(all_split_stats)

            # trace stats (split x frames x stat (me, err))
            trace_df.at[row_idx, f"{datatype}_trace_stats"] = \
                all_split_stats.tolist()
            trace_df.at[row_idx, f"{datatype}_time_values"] = \
                time_values.tolist()

    trace_df["sess_ns"] = trace_df["sess_ns"].astype(int)

    return trace_df
示例#13
0
def get_block_data(sess, analyspar, stimpar, datatype="roi", integ=False):
    """
    get_block_data(sess, analyspar, stimpar)

    Returns data statistics split by expected/unexpected sequences, and by 
    blocks, where one block is defined as consecutive expected sequences, and 
    the subsequent consecutive unexpected sequences.

    Required args:
        - sess (Session):
            Session object
        - analyspar (AnalysPar): 
            named tuple containing analysis parameters
        - stimpar (StimPar): 
            named tuple containing stimulus parameters

    Optional args:
        - datatype (str):
            type of data to return ("roi", "run" or "pupil")
            default: "roi"
        - integ (bool):
            if True, data is integrated across frames, instead of a statistic 
            being taken
            default: False

    Returns:
        - block_data (3 or 4D array):
            data statistics across sequences per block
            dims: split x block (x ROIs) x stats (me, err)
    """

    stim = sess.get_stim(stimpar.stimtype)

    nanpol = None if analyspar.rem_bad else "omit"

    ch_fl = [stimpar.pre, stimpar.post]

    by_exp_fr_ns = []
    by_exp_data = []
    for unexp in [0, 1]:
        segs = stim.get_segs_by_criteria(gabfr=stimpar.gabfr,
                                         gabk=stimpar.gabk,
                                         gab_ori=stimpar.gab_ori,
                                         visflow_dir=stimpar.visflow_dir,
                                         visflow_size=stimpar.visflow_size,
                                         unexp=unexp,
                                         remconsec=False,
                                         by="seg")

        fr_ns, fr_type = get_frame_numbers(stim,
                                           segs,
                                           ch_fl=ch_fl,
                                           ref_type="segs",
                                           datatype=datatype)

        by_exp_fr_ns.append(np.asarray(fr_ns))

        data, _ = get_data(stim,
                           fr_ns,
                           analyspar,
                           pre=stimpar.pre,
                           post=stimpar.post,
                           integ=integ,
                           datatype=datatype,
                           ref_type=fr_type)

        if not integ:  # take statistic across frames
            with gen_util.TempWarningFilter("Mean of empty", RuntimeWarning):
                data = math_util.mean_med(data,
                                          stats=analyspar.stats,
                                          axis=-1,
                                          nanpol=nanpol)

        by_exp_data.append(data)

    # take means per block
    block_idxs = split_seqs_by_block(by_exp_fr_ns)

    n_splits = len(by_exp_data)
    n_blocks = len(block_idxs[0])
    n_stats = 2
    if analyspar.stats == "median" and analyspar.error == "std":
        n_stats = 3

    targ_shape = (n_splits, n_blocks, n_stats)
    if datatype == "roi":
        n_rois = sess.get_nrois(analyspar.rem_bad, analyspar.fluor)
        targ_shape = (n_splits, n_blocks, n_rois, n_stats)

    block_data = np.full(targ_shape, np.nan)
    for b, seq_idxs in enumerate(zip(*block_idxs)):
        for d, data_seq_idxs in enumerate(seq_idxs):
            # take stats across sequences within each split/block
            block_data[d, b] = math_util.get_stats(
                by_exp_data[d][..., data_seq_idxs],
                stats=analyspar.stats,
                error=analyspar.error,
                nanpol=nanpol,
                axes=-1  # sequences within 
            ).T

    return block_data
示例#14
0
def get_sess_roi_trace_stats(sess,
                             analyspar,
                             stimpar,
                             basepar,
                             split="by_exp"):
    """
    get_sess_roi_trace_stats(sess, analyspar, stimpar, basepar)

    Returns ROI trace statistics for a specific session, split as requested.

    Required args:
        - sess (Session):
            Session object
        - analyspar (AnalysPar): 
            named tuple containing analysis parameters
        - stimpar (StimPar): 
            named tuple containing stimulus parameters
        - basepar (BasePar): 
            named tuple containing baseline parameters

    Optional args:
        - split (str): 
            how to split data:
            "by_exp" (all exp, all unexp), 
            "unexp_lock" (unexp, preceeding exp), 
            "exp_lock" (exp, preceeding unexp),
            "stim_onset" (grayscr, stim on), 
            "stim_offset" (stim off, grayscr)
            default: "by_exp"

    Returns:
        - stats (4D array): 
            ROI trace statistics for a sessions
            dims: exp, unexp x ROIs x frames x stats
        - time_values (1D array):
            values for each frame, in seconds 
            (only 0 to stimpar.post, unless split is "by_exp")
    """

    nanpol = None if analyspar.rem_bad else "omit"

    split_data, time_values = get_split_data_by_sess(sess,
                                                     analyspar,
                                                     stimpar,
                                                     split=split,
                                                     baseline=basepar.baseline,
                                                     datatype="roi")

    stats = []
    # split x ROIs x frames x stats
    for data in split_data:
        stats.append(
            np.transpose(
                math_util.get_stats(data,
                                    stats=analyspar.stats,
                                    error=analyspar.error,
                                    axes=1,
                                    nanpol=nanpol), [1, 2, 0]))
    stats = np.asarray(stats)

    return stats, time_values
示例#15
0
def add_stim_roi_stats(stim_stats_df,
                       sessions,
                       analyspar,
                       stimpar,
                       permpar,
                       comp_sess=[1, 3],
                       in_place=False,
                       randst=None):
    """
    add_stim_roi_stats(stim_stats_df, sessions, analyspar, stimpar, permpar)

    Adds to dataframe comparison of absolute fractional data changes 
    between sessions for different stimuli, calculated for individual ROIs.

    Required args:
        - stim_stats_df (pd.DataFrame):
            dataframe with one row per line/plane, and the basic sess_df 
            columns, as well as stimulus columns for each comp_sess:
            - {stimpar.stimtype}_s{comp_sess[0]}: 
                first comp_sess data for each ROI
            - {stimpar.stimtype}_s{comp_sess[1]}: 
                second comp_sess data for each ROI
        - sessions (list): 
            session objects
        - analyspar (AnalysPar): 
            named tuple containing analysis parameters
        - stimpar (StimPar): 
            named tuple containing stimulus parameters
        - permpar (PermPar): 
            named tuple containing permutation parameters

    Optional args:
        - comp_sess (int):
            sessions for which to obtain absolute fractional change 
            [x, y] => |(y - x) / x|
            default: [1, 3]
        - in_place (bool):
            if True, targ_df is modified in place. Otherwise, a deep copy is 
            modified. targ_df is returned in either case.
            default: False
        - randst (int or np.random.RandomState): 
            random state or seed value to use. (-1 treated as None)
            default: None

    Returns:
        - stim_stats_df (pd.DataFrame):
            dataframe with one row per line/plane and one for all line/planes 
            together, and the basic sess_df columns, in addition to the input 
            columns, and for each stimtype:
            - {stimtype} (list): absolute fractional change statistics (me, err)
            - p_vals (float): p-value for data differences between stimulus 
                types, corrected for multiple comparisons and tails
    """

    nanpol = None if analyspar.rem_bad else "omit"

    if analyspar.tracked:
        misc_analys.check_sessions_complete(sessions, raise_err=True)
    else:
        raise ValueError(
            "If analysis is run for individual ROIs and not population "
            "statistics, analyspar.tracked must be set to True.")

    if not in_place:
        stim_stats_df = stim_stats_df.copy(deep=True)

    stimtypes = gen_util.list_if_not(stimpar.stimtype)
    stim_stats_df = gen_util.set_object_columns(stim_stats_df,
                                                stimtypes,
                                                in_place=True)

    # compile all data
    full_data = dict()
    for stimtype in stimpar.stimtype:
        for n in comp_sess:
            stim_col = f"{stimtype}_s{n}"
            full_data[stim_col] = np.concatenate(stim_stats_df[stim_col])

    row_idx = len(stim_stats_df)
    for col in stim_stats_df.columns:
        stim_stats_df.loc[row_idx, col] = "all"
        if col in full_data.keys():
            stim_stats_df.loc[row_idx, col] = full_data[col]

    # take statistics
    for row_idx in stim_stats_df.index:
        comp_data = [None, None]
        for s, stimtype in enumerate(stimpar.stimtype):
            stim_data = []
            for n in comp_sess:
                data_col = f"{stimtype}_s{n}"
                stim_data.append(stim_stats_df.loc[row_idx, data_col])
                abs_fractional_diff(stim_data)

            # get stats and add to dataframe
            stim_stats_df.at[row_idx, stimtype] = \
                math_util.get_stats(
                    comp_data[s], analyspar.stats, analyspar.error,
                    nanpol=nanpol
                    ).tolist()

        # obtain p-values
        stim_stats_df.loc[row_idx, "p_vals"] = rand_util.get_op_p_val(
            comp_data,
            permpar.n_perms,
            stats=analyspar.stats,
            paired=True,
            nanpol=nanpol,
            randst=randst)

    # remove full data columns
    data_cols = []
    for s, stimtype in enumerate(stimpar.stimtype):
        for n in comp_sess:
            data_cols.append(f"{stimtype}_s{n}")
    stim_stats_df = stim_stats_df.drop(data_cols, axis=1)

    return stim_stats_df