示例#1
0
def mean_contrast_variables(batch, modelname):

    df1 = fitted_params_per_batch(batch, modelname, mod_key='fn')

    amplitude_mods = df1[df1.index.str.contains('amplitude_mod')]
    base_mods = df1[df1.index.str.contains('base_mod')]
    kappa_mods = df1[df1.index.str.contains('kappa_mod')]
    shift_mods = df1[df1.index.str.contains('shift_mod')]

    avg_amp = amplitude_mods['mean'][0]
    avg_base = base_mods['mean'][0]
    avg_kappa = kappa_mods['mean'][0]
    avg_shift = shift_mods['mean'][0]

    max_amp = amplitude_mods['max'][0]
    max_base = base_mods['max'][0]
    max_kappa = kappa_mods['max'][0]
    max_shift = shift_mods['max'][0]

    #    raw_amp = amplitude_mods.values[0][5:]
    #    raw_base = base_mods.values[0][5:]
    #    raw_kappa = kappa_mods.values[0][5:]
    #    raw_shift = shift_mods.values[0][5:]

    print("Mean amplitude_mod: %.06f\n"
          "Mean base_mod: %.06f\n"
          "Mean kappa_mod: %.06f\n"
          "Mean shift_mod: %.06f\n" %
          (avg_amp, avg_base, avg_kappa, avg_shift))

    # Better way to tell which ones are being modulated?
    # Can't really tell just from the average.
    print("ratio of max: %.06f, %.06f, %.06f, %.06f" %
          (avg_amp / max_amp, avg_base / max_base, avg_kappa / max_kappa,
           avg_shift / max_shift))
示例#2
0
文件: utils.py 项目: LBHB/nems_db
def get_valid_improvements(batch, model1, model2, threshold=2.5):
    # TODO: threshold 2.5 works for removing outliers in correlation scatter
    #       and maximizes r, but need an unbiased way to pick this number.
    #       Otherwise basically just cherrypicked the cutoff to make
    #       correlation better.

    # NOTE: Also helps to do this for both gc and stp, then
    #       list(set(gc_cells) & set(stp_cells)) to get the intersection.

    df1 = fitted_params_per_batch(batch, model1, stats_keys=[])
    df2 = fitted_params_per_batch(batch, model2, stats_keys=[])

    # fill in missing cellids w/ nan
    celldata = nd.get_batch_cells(batch=batch)
    cellids = celldata['cellid'].tolist()
    nrows = len(df1.index.values.tolist())

    df1_cells = df1.loc['meta--r_test'].index.values.tolist()[5:]
    df2_cells = df2.loc['meta--r_test'].index.values.tolist()[5:]

    nan_series = pd.Series(np.full((nrows), np.nan))

    df1_nans = 0
    df2_nans = 0

    for c in cellids:
        if c not in df1_cells:
            df1[c] = nan_series
            df1_nans += 1
        if c not in df2_cells:
            df2[c] = nan_series
            df2_nans += 1

    print("# missing cells: %d, %d" % (df1_nans, df2_nans))

    # Force same cellid order now that cols are filled in
    df1 = df1[cellids]
    df2 = df2[cellids]
    ratio = df1.loc['meta--r_test'] / df2.loc['meta--r_test']

    valid_improvements = ratio.loc[ratio < threshold].loc[ratio > 1 /
                                                          threshold]

    return valid_improvements.index.values.tolist()
示例#3
0
                           param_scatter_batch


batch = 308
limit = None
modelname1 = 'ozgf100ch18_dlog_wcg18x2_fir2x15_lvl1_dexp1_basic'
modelname2 = 'ozgf100ch18_dlog_wcg18x2_fir2x15_lvl1_dexp1_iter01-T3-T4-T5-T6-T7-ti100-fi15'

#batch = 303
#limit = None
#modelname = 'nostim20pupbeh_stategain3_basic-nf'

# Can use mod_key='fn', mod_key='id', etc to display more info in index.
# Formatted as: '<mspec_index--mod_key--parameter_name>'
# So mod_key='id' gives something like: '0--wc15x1--coefficients'.
df = fitted_params_per_batch(batch, modelname1, mod_key='', #stats_keys=[],
                             limit=limit, multi='mean')
print(df)

param_scatter_batch(batch, modelname1, modelname2, param='shift',
                    multi='mean', limit=limit, mod_key='')

# Not handling arrays yet, just scalar params
plot_all_params(df, only_scalars=True)


# example output (truncated)
"""
                                                              mean  \
0--mean                   [0.6428007712025126, 1.0079999163612767]
0--sd                    [0.4607000818990278, 0.41440913122937123]
1--coefficients  [[0.22104853498428548, 0.3174402233420055, 0.0...
示例#4
0
def gd_scatter(batch,
               model1,
               model2,
               se_filter=True,
               gd_threshold=0,
               param='kappa',
               log_gd=False):

    df_r = nd.batch_comp(batch, [model1, model2], stat='r_ceiling')
    df_e = nd.batch_comp(batch, [model1, model2], stat='se_test')
    # Remove any cellids that have NaN for 1 or more models
    df_r.dropna(axis=0, how='any', inplace=True)
    df_e.dropna(axis=0, how='any', inplace=True)

    cellids = df_r.index.values.tolist()

    gc_test = df_r[model1]
    gc_se = df_e[model1]
    ln_test = df_r[model2]
    ln_se = df_e[model2]

    if se_filter:
        # Remove if performance not significant at all
        good_cells = ((gc_test > gc_se * 2) & (ln_test > ln_se * 2))
    else:
        # Set to series w/ all True, so none are skipped
        good_cells = (gc_test != np.nan)

    df1 = fitted_params_per_batch(batch, model1, stats_keys=[])
    df2 = fitted_params_per_batch(batch, model2, stats_keys=[])

    # fill in missing cellids w/ nan
    celldata = nd.get_batch_cells(batch=batch)
    cellids = celldata['cellid'].tolist()
    cellids = [c for c in cellids if c in good_cells]
    nrows = len(df1.index.values.tolist())

    df1_cells = df1.loc['meta--r_test'].index.values.tolist()[5:]
    df2_cells = df2.loc['meta--r_test'].index.values.tolist()[5:]

    nan_series = pd.Series(np.full((nrows), np.nan))

    df1_nans = 0
    df2_nans = 0

    for c in cellids:
        if c not in df1_cells:
            df1[c] = nan_series
            df1_nans += 1
        if c not in df2_cells:
            df2[c] = nan_series
            df2_nans += 1

    print("# missing cells: %d, %d" % (df1_nans, df2_nans))

    # Force same cellid order now that missing cols are filled in
    df1 = df1[cellids]
    df2 = df2[cellids]

    gc_vs_ln = df1.loc['meta--r_test'].values / df2.loc['meta--r_test'].values
    gc_vs_ln = gc_vs_ln.astype('float32')

    kappa_mod = df1[df1.index.str.contains('%s_mod' % param)]
    kappa = df1[df1.index.str.contains('%s$' % param)]
    gd_ratio = (np.abs(kappa_mod.values /
                       kappa.values)).astype('float32').flatten()

    ff = np.isfinite(gc_vs_ln) & np.isfinite(gd_ratio)
    gc_vs_ln = gc_vs_ln[ff]
    gd_ratio = gd_ratio[ff]
    if log_gd:
        gd_ratio = np.log(gd_ratio)

    # drop cells with excessively large/small gd_ratio or gc_vs_ln
    gcd_big = gd_ratio > 10
    gc_vs_ln_big = gc_vs_ln > 10
    gc_vs_ln_small = gc_vs_ln < 0.1
    keep = ~gcd_big & ~gc_vs_ln_big & ~gc_vs_ln_small
    gd_ratio = gd_ratio[keep]
    gc_vs_ln = gc_vs_ln[keep]

    r = np.corrcoef(gc_vs_ln, gd_ratio)[0, 1]
    n = gc_vs_ln.size

    # Separately do the same comparison but only with cells that had a
    # Gd ratio at least a little greater than 1 (i.e. had *some* GC effect)
    gd_ratio2 = copy.deepcopy(gd_ratio)
    gc_vs_ln2 = copy.deepcopy(gc_vs_ln)
    if log_gd:
        gd_threshold = np.log(gd_threshold)
    thresholded = (gd_ratio2 > gd_threshold)
    gd_ratio2 = gd_ratio2[thresholded]
    gc_vs_ln2 = gc_vs_ln2[thresholded]

    r2 = np.corrcoef(gc_vs_ln2, gd_ratio2)[0, 1]
    n2 = gc_vs_ln2.size

    fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(8, 9))

    ax1.scatter(gd_ratio, gc_vs_ln, c='black', s=1)
    ax1.set_ylabel("GC/LN R")
    ax1.set_xlabel("Gd ratio")
    ax1.set_title("Performance Improvement vs Gd ratio\nr: %.02f, n: %d" %
                  (r, n))

    ax2.hist(gd_ratio, bins=30, histtype='bar', color=['gray'])
    ax2.set_title('Gd ratio distribution')
    ax2.set_xlabel('Gd ratio')
    ax2.set_ylabel('Count')

    ax3.scatter(gd_ratio2, gc_vs_ln2, c='black', s=1)
    ax3.set_ylabel("GC/LN R")
    ax3.set_xlabel("Gd ratio")
    ax3.set_title("Same, only cells w/ Gd > %.02f\nr: %.02f, n: %d" %
                  (gd_threshold, r2, n2))

    ax4.hist(gd_ratio2, bins=30, histtype='bar', color=['gray'])
    ax4.set_title('Gd ratio distribution, only Gd > %.02f' % gd_threshold)
    ax4.set_xlabel('Gd ratio')
    ax4.set_ylabel('Count')

    fig.suptitle('param: %s' % param)
    fig.tight_layout()
示例#5
0
def gain_by_contrast_slopes(batch,
                            gc,
                            stp,
                            LN,
                            combined,
                            se_filter=True,
                            good_LN=0,
                            bins=30,
                            use_exp=True):

    df_r, df_c, df_e = get_dataframes(batch, gc, stp, LN, combined)
    #cellids = df_r[LN] > good_LN
    cellids = df_r[LN] > df_e[LN] * 2
    gc_LN_SE = (df_e[gc] + df_e[LN])
    #    stp_LN_SE = (df_e[stp] + df_e[LN])
    gc_cells = (cellids) & ((df_r[gc] - df_r[LN]) > gc_LN_SE)
    #    stp_cells = (df_r[LN] > good_LN) & ((df_r[stp] - df_r[LN]) > stp_LN_SE)
    #    both_cells = gc_cells & stp_cells
    #    gc_cells = gc_cells & np.logical_not(both_cells)
    #    stp_cells = stp_cells & np.logical_not(both_cells)
    LN_cells = cellids & np.logical_not(gc_cells)  # | stp_cells | both_cells)
    meta = ['r_test', 'ctmax_val', 'ctmax_est', 'ctmin_val', 'ctmin_est']
    gc_params = fitted_params_per_batch(289, gc, stats_keys=[], meta=meta)
    # drop cellids that haven't been fit for all models
    gc_params_cells = gc_params.transpose().index.values.tolist()
    for c in gc_params_cells:
        if c not in LN_cells:
            LN_cells[c] = False
        if c not in gc_cells:
            gc_cells[c] = False
#        if c not in stp_cells:
#            stp_cells[c] = False
#        if c not in both_cells:
#            both_cells[c] = False

# index keys are formatted like "4--dsig.d--kappa"
    mod_keys = gc.split('_')[1]
    for i, k in enumerate(mod_keys.split('-')):
        if 'dsig' in k:
            break
    k_key = f'{i}--{k}--kappa'
    ka_key = k_key + '_mod'
    meta_keys = ['meta--' + k for k in meta]
    all_keys = [k_key, ka_key] + meta_keys
    phi_dfs = [
        gc_params[gc_params.index == k].transpose()[LN_cells].transpose()
        for k in all_keys
    ]
    sep_dfs = [df.values.flatten().astype(np.float64) for df in phi_dfs]
    gc_dfs = [
        gc_params[gc_params.index == k].transpose()[gc_cells].transpose()
        for k in all_keys
    ]
    gc_sep_dfs = [df.values.flatten().astype(np.float64) for df in gc_dfs]
    #    stp_dfs = [gc_params[gc_params.index==k].transpose()[stp_cells].transpose()
    #               for k in all_keys]
    #    stp_sep_dfs = [df.values.flatten().astype(np.float64) for df in stp_dfs]
    #    both_dfs = [gc_params[gc_params.index==k].transpose()[both_cells].transpose()
    #               for k in all_keys]
    #    both_sep_dfs = [df.values.flatten().astype(np.float64) for df in both_dfs]
    low, high, r_test, ctmax_val, ctmax_est, ctmin_val, ctmin_est = sep_dfs
    gc_low, gc_high, gc_r, gc_ctmax_val, \
        gc_ctmax_est, gc_ctmin_val, gc_ctmin_est = gc_sep_dfs
    #    stp_low, stp_high, stp_r, stp_ctmax_val, \
    #        stp_ctmax_est, stp_ctmin_val, stp_ctmin_est = stp_sep_dfs
    #    both_low, both_high, both_r, both_ctmax_val, \
    #        both_ctmax_est, both_ctmin_val, both_ctmin_est = both_sep_dfs

    ctmax = np.maximum(ctmax_val, ctmax_est)
    gc_ctmax = np.maximum(gc_ctmax_val, gc_ctmax_est)
    ctmin = np.minimum(ctmin_val, ctmin_est)
    gc_ctmin = np.minimum(gc_ctmin_val, gc_ctmin_est)
    #    stp_ctmax = np.maximum(stp_ctmax_val, stp_ctmax_est)
    #    stp_ctmin = np.minimum(stp_ctmin_val, stp_ctmin_est)
    #    both_ctmax = np.maximum(both_ctmax_val, both_ctmax_est)
    #    both_ctmin = np.minimum(both_ctmin_val, both_ctmin_est)
    ct_range = ctmax - ctmin
    gc_ct_range = gc_ctmax - gc_ctmin
    #    stp_ct_range = stp_ctmax - stp_ctmin
    #    both_ct_range = both_ctmax - both_ctmin
    gain = (high - low) * ct_range
    gc_gain = (gc_high - gc_low) * gc_ct_range
    # test hyp. that gc_gains are more negative than LN
    gc_LN_p = st.mannwhitneyu(gc_gain, gain, alternative='two-sided')[1]
    med_gain = np.median(gain)
    gc_med_gain = np.median(gc_gain)
    #    stp_gain = (stp_high - stp_low)*stp_ct_range
    #    both_gain = (both_high - both_low)*both_ct_range

    k_low = low + (high - low) * ctmin
    k_high = low + (high - low) * ctmax
    gc_k_low = gc_low + (gc_high - gc_low) * gc_ctmin
    gc_k_high = gc_low + (gc_high - gc_low) * gc_ctmax
    #    stp_k_low = stp_low + (stp_high - stp_low)*stp_ctmin
    #    stp_k_high = stp_low + (stp_high - stp_low)*stp_ctmax
    #    both_k_low = both_low + (both_high - both_low)*both_ctmin
    #    both_k_high = both_low + (both_high - both_low)*both_ctmax

    if use_exp:
        k_low = np.exp(k_low)
        k_high = np.exp(k_high)
        gc_k_low = np.exp(gc_k_low)
        gc_k_high = np.exp(gc_k_high)
#        stp_k_low = np.exp(stp_k_low)
#        stp_k_high = np.exp(stp_k_high)
#        both_k_low = np.exp(both_k_low)
#        both_k_high = np.exp(both_k_high)

#    fig = plt.figure()#, axes = plt.subplots(1, 2, )
#    #axes[0].plot([ctmin, ctmax], [k_low, k_high], color='black', alpha=0.5)
#    plt.hist(high-low, bins=bins, color='black', alpha=0.5)
#
#    #axes[0].plot([gc_ctmin, gc_ctmax], [gc_k_low, gc_k_high], color='red',
#    #              alpha=0.3)
#    plt.hist(gc_high-gc_low, bins=bins, color='red', alpha=0.3)
#
#    #axes[0].plot([stp_ctmin, stp_ctmax], [stp_k_low, stp_k_high], color='blue',
#    #              alpha=0.3)
#    plt.hist(stp_high-stp_low, bins=bins, color='blue', alpha=0.3)
#    plt.xlabel('gain slope')
#    plt.ylabel('count')
#    plt.title(f'raw counts, LN > {good_LN}')
#    plt.legend([f'LN, {len(low)}', f'gc, {len(gc_low)}', f'stp, {len(stp_low)}',
#                f'Both, {len(both_low)}'])

    smallest_slope = min(np.min(gain), np.min(gc_gain))  #, np.min(stp_gain),
    #np.min(both_gain))
    largest_slope = max(np.max(gain), np.max(gc_gain))  #, np.max(stp_gain),
    #np.max(both_gain))
    slope_range = (smallest_slope, largest_slope)
    bins = np.linspace(smallest_slope, largest_slope, bins)
    bar_width = bins[1] - bins[0]
    axis_locs = bins[:-1]
    hist = np.histogram(gain, bins=bins, range=slope_range)
    gc_hist = np.histogram(gc_gain, bins=bins, range=slope_range)
    #    stp_hist = np.histogram(stp_gain, bins=bins, range=slope_range)
    #    both_hist = np.histogram(both_gain, bins=bins, range=slope_range)
    raw = hist[0]
    gc_raw = gc_hist[0]
    #    stp_raw = stp_hist[0]
    #    both_raw = both_hist[0]
    #prop_hist = hist[0] / np.sum(hist[0])
    #prop_gc_hist = gc_hist[0] / np.sum(gc_hist[0])
    #    prop_stp_hist = stp_hist[0] / np.sum(stp_hist[0])
    #    prop_both_hist = both_hist[0] / np.sum(both_hist[0])

    fig1 = plt.figure()
    plt.bar(axis_locs, raw, width=bar_width, color='gray', alpha=0.8)
    plt.bar(axis_locs,
            gc_raw,
            width=bar_width,
            color='maroon',
            alpha=0.8,
            bottom=raw)
    #    plt.bar(axis_locs, stp_raw, width=bar_width, color='teal', alpha=0.8,
    #            bottom=raw+gc_raw)
    #    plt.bar(axis_locs, both_raw, width=bar_width, color='goldenrod', alpha=0.8,
    #            bottom=raw+gc_raw+stp_raw)
    plt.xlabel('gain slope')
    plt.ylabel('count')
    plt.title(f'raw counts, LN > {good_LN}')
    plt.legend([
        f'LN, {len(low)}, md={med_gain:.4f}',
        f'gc, {len(gc_low)}, md={gc_med_gain:.4f}, p={gc_LN_p:.4f}'
    ])
示例#6
0
def gd_ratio(batch,
             gc,
             stp,
             LN,
             combined,
             se_filter=True,
             good_LN=0,
             bins=30,
             use_exp=True):
    df_r, df_c, df_e = get_dataframes(batch, gc, stp, LN, combined)
    #cellids = df_r[LN] > good_LN
    cellids = df_r[LN] > df_e[LN] * 2
    gc_LN_SE = (df_e[gc] + df_e[LN])
    #stp_LN_SE = (df_e[stp] + df_e[LN])
    gc_cells = cellids & ((df_r[gc] - df_r[LN]) > gc_LN_SE)
    #stp_cells = (df_r[LN] > good_LN) & ((df_r[stp] - df_r[LN]) > stp_LN_SE)
    #both_cells = gc_cells & stp_cells
    LN_cells = cellids & np.logical_not(gc_cells)
    #stp_cells = stp_cells & np.logical_not(both_cells)
    meta = ['r_test', 'ctmax_val', 'ctmax_est', 'ctmin_val', 'ctmin_est']
    gc_params = fitted_params_per_batch(289, gc, stats_keys=[], meta=meta)
    # drop cellids that haven't been fit for all models
    gc_params_cells = gc_params.transpose().index.values.tolist()
    for c in gc_params_cells:
        if c not in LN_cells:
            LN_cells[c] = False
        if c not in gc_cells:
            gc_cells[c] = False
#        if c not in stp_cells:
#            stp_cells[c] = False
#        if c not in both_cells:
#            both_cells[c] = False

# index keys are formatted like "4--dsig.d--kappa"
    mod_keys = gc.split('_')[1]
    for i, k in enumerate(mod_keys.split('-')):
        if 'dsig' in k:
            break
    k_key = f'{i}--{k}--kappa'
    ka_key = k_key + '_mod'
    meta_keys = ['meta--' + k for k in meta]
    all_keys = [k_key, ka_key] + meta_keys
    phi_dfs = [
        gc_params[gc_params.index == k].transpose()[LN_cells].transpose()
        for k in all_keys
    ]
    sep_dfs = [df.values.flatten().astype(np.float64) for df in phi_dfs]
    gc_dfs = [
        gc_params[gc_params.index == k].transpose()[gc_cells].transpose()
        for k in all_keys
    ]
    gc_sep_dfs = [df.values.flatten().astype(np.float64) for df in gc_dfs]
    #    stp_dfs = [gc_params[gc_params.index==k].transpose()[stp_cells].transpose()
    #               for k in all_keys]
    #    stp_sep_dfs = [df.values.flatten().astype(np.float64) for df in stp_dfs]
    #    both_dfs = [gc_params[gc_params.index==k].transpose()[both_cells].transpose()
    #               for k in all_keys]
    #    both_sep_dfs = [df.values.flatten().astype(np.float64) for df in both_dfs]
    low, high, r_test, ctmax_val, ctmax_est, ctmin_val, ctmin_est = sep_dfs
    gc_low, gc_high, gc_r, gc_ctmax_val, \
        gc_ctmax_est, gc_ctmin_val, gc_ctmin_est = gc_sep_dfs
    #    stp_low, stp_high, stp_r, stp_ctmax_val, \
    #        stp_ctmax_est, stp_ctmin_val, stp_ctmin_est = stp_sep_dfs
    #    both_low, both_high, both_r, both_ctmax_val, \
    #        both_ctmax_est, both_ctmin_val, both_ctmin_est = both_sep_dfs

    ctmax = np.maximum(ctmax_val, ctmax_est)
    gc_ctmax = np.maximum(gc_ctmax_val, gc_ctmax_est)
    ctmin = np.minimum(ctmin_val, ctmin_est)
    gc_ctmin = np.minimum(gc_ctmin_val, gc_ctmin_est)
    #    stp_ctmax = np.maximum(stp_ctmax_val, stp_ctmax_est)
    #    stp_ctmin = np.minimum(stp_ctmin_val, stp_ctmin_est)
    #    both_ctmax = np.maximum(both_ctmax_val, both_ctmax_est)
    #    both_ctmin = np.minimum(both_ctmin_val, both_ctmin_est)

    k_low = low + (high - low) * ctmin
    k_high = low + (high - low) * ctmax
    gc_k_low = gc_low + (gc_high - gc_low) * gc_ctmin
    gc_k_high = gc_low + (gc_high - gc_low) * gc_ctmax
    #    stp_k_low = stp_low + (stp_high - stp_low)*stp_ctmin
    #    stp_k_high = stp_low + (stp_high - stp_low)*stp_ctmax
    #    both_k_low = both_low + (both_high - both_low)*both_ctmin
    #    both_k_high = both_low + (both_high - both_low)*both_ctmax

    if use_exp:
        k_low = np.exp(k_low)
        k_high = np.exp(k_high)
        gc_k_low = np.exp(gc_k_low)
        gc_k_high = np.exp(gc_k_high)


#        stp_k_low = np.exp(stp_k_low)
#        stp_k_high = np.exp(stp_k_high)
#        both_k_low = np.exp(both_k_low)
#        both_k_high = np.exp(both_k_high)

    ratio = k_low / k_high
    gc_ratio = gc_k_low / gc_k_high
    #    stp_ratio = stp_k_low / stp_k_high
    #    both_ratio = both_k_low / both_k_high

    fig1, ((ax1), (ax2)) = plt.subplots(
        1,
        2,
    )
    ax1.hist(ratio, bins=bins)
    ax1.set_title('all cells')
    ax2.hist(gc_ratio, bins=bins)
    ax2.set_title('gc')
    #    ax3.hist(stp_ratio, bins=bins)
    #    ax3.set_title('stp')
    if not use_exp:
        title = 'k_low / k_high'
    else:
        title = 'e^(k_low - k_high)'
    fig1.suptitle(title)

    fig3 = plt.figure()
    plt.scatter(ratio, r_test)
    plt.title('low/high vs r_test')

    fig4 = plt.figure()
    plt.scatter(gc_ratio, gc_r)
    plt.title('low/high vs r_test, gc improvements only')
示例#7
0
def stp_distributions(batch,
                      gc,
                      stp,
                      LN,
                      combined,
                      se_filter=True,
                      good_ln=0,
                      log_scale=False,
                      legend=False,
                      use_combined=False):

    df_r, df_c, df_e = get_dataframes(batch, gc, stp, LN, combined)
    cellids, under_chance, less_LN = get_filtered_cellids(batch,
                                                          gc,
                                                          stp,
                                                          LN,
                                                          combined,
                                                          as_lists=False)
    _, _, _, _, c = improved_cells_to_list(batch,
                                           gc,
                                           stp,
                                           LN,
                                           combined,
                                           good_ln=good_ln)

    if use_combined:
        params_model = combined
    else:
        params_model = stp
    stp_params = fitted_params_per_batch(batch,
                                         params_model,
                                         stats_keys=[],
                                         meta=[])
    stp_params_cells = stp_params.transpose().index.values.tolist()
    for cell in stp_params_cells:
        if cell not in cellids:
            cellids[cell] = False
    not_c = list(set(stp_params.transpose()[cellids].index.values) - set(c))

    # index keys are formatted like "2--stp.2--tau"
    mod_keys = stp.split('_')[1]
    for i, k in enumerate(mod_keys.split('-')):
        if 'stp' in k:
            break
    tau_key = '%d--%s--tau' % (i, k)
    u_key = '%d--%s--u' % (i, k)

    all_taus = stp_params[stp_params.index ==
                          tau_key].transpose()[cellids].transpose()
    all_us = stp_params[stp_params.index ==
                        u_key].transpose()[cellids].transpose()
    dims = all_taus.values.flatten()[0].shape[0]

    # convert to dims x cells array instead of cells, array w/ multidim values
    #sep_taus = _df_to_array(all_taus, dims).mean(axis=0)
    #sep_us = _df_to_array(all_us, dims).mean(axis=0)
    #med_tau = np.median(sep_taus)
    #med_u = np.median(sep_u)
    sep_taus = _df_to_array(all_taus[not_c], dims).mean(axis=0)
    sep_us = _df_to_array(all_us[not_c], dims).mean(axis=0)
    med_tau = np.median(sep_taus)
    med_u = np.median(sep_us)

    stp_taus = all_taus[c]
    stp_us = all_us[c]
    stp_sep_taus = _df_to_array(stp_taus, dims).mean(axis=0)
    stp_sep_us = _df_to_array(stp_us, dims).mean(axis=0)

    stp_med_tau = np.median(stp_sep_taus)
    stp_med_u = np.median(stp_sep_us)
    #tau_t, tau_p = st.ttest_ind(sep_taus, stp_sep_taus)
    #u_t, u_p = st.ttest_ind(sep_us, stp_sep_us)

    # NOTE: not actually a t statistic now, it's mann-whitney U statistic,
    #       just didn't want to change all of the var names incase i revert
    tau_t, tau_p = st.mannwhitneyu(sep_taus,
                                   stp_sep_taus,
                                   alternative='two-sided')
    u_t, u_p = st.mannwhitneyu(sep_us, stp_sep_us, alternative='two-sided')

    sep_taus, sep_us = drop_common_outliers(sep_taus, sep_us)
    stp_sep_taus, stp_sep_us = drop_common_outliers(stp_sep_taus, stp_sep_us)
    not_imp_outliers = len(sep_taus)
    imp_outliers = len(stp_sep_taus)

    fig1, (a1, a2) = plt.subplots(2, 1, sharex=True, sharey=True)
    color = model_colors['LN']
    imp_color = model_colors['max']
    stp_label = 'STP ++ (%d)' % len(c)
    total_cells = len(c) + len(not_c)
    bin_count = 30
    hist_kwargs = {'linewidth': 1, 'label': ['not imp', 'stp imp']}

    plt.sca(a1)
    weights1 = [np.ones(len(sep_taus)) / len(sep_taus)]
    weights2 = [np.ones(len(stp_sep_taus)) / len(stp_sep_taus)]
    upper = max(sep_taus.max(), stp_sep_taus.max())
    lower = min(sep_taus.min(), stp_sep_taus.min())
    bins = np.linspace(lower, upper, bin_count + 1)
    #    if log_scale:
    #        lower_bound = min(sep_taus.min(), stp_sep_taus.min())
    #        upper_bound = max(sep_taus.max(), stp_sep_taus.max())
    #        bins = np.logspace(lower_bound, upper_bound, bin_count+1)
    #        hist_kwargs['bins'] = bins
    #    plt.hist([sep_taus, stp_sep_taus], weights=weights, **hist_kwargs)
    a1.hist(sep_taus,
            weights=weights1,
            fc=faded_LN,
            edgecolor=dark_LN,
            bins=bins,
            **hist_kwargs)
    a2.hist(stp_sep_taus,
            weights=weights2,
            fc=faded_max,
            edgecolor=dark_max,
            bins=bins,
            **hist_kwargs)
    a1.axes.axvline(med_tau,
                    color=dark_LN,
                    linewidth=2,
                    linestyle='dashed',
                    dashes=dash_spacing)
    a1.axes.axvline(stp_med_tau,
                    color=dark_max,
                    linewidth=2,
                    linestyle='dashed',
                    dashes=dash_spacing)
    a2.axes.axvline(med_tau,
                    color=dark_LN,
                    linewidth=2,
                    linestyle='dashed',
                    dashes=dash_spacing)
    a2.axes.axvline(stp_med_tau,
                    color=dark_max,
                    linewidth=2,
                    linestyle='dashed',
                    dashes=dash_spacing)
    ax_remove_box(a1)
    ax_remove_box(a2)

    #plt.title('tau,  sig diff?:  p=%.4E' % tau_p)
    #plt.xlabel('tau (ms)')

    fig2 = plt.figure(figsize=text_fig)
    text = ("tau distributions, n: %d\n"
            "n stp imp (bot): %d, med: %.4f\n"
            "n not imp (top): %d, med: %.4f\n"
            "yaxes: fraction of cells\n"
            "xaxis: tau(ms)\n"
            "st.mannwhitneyu u: %.4E,\np: %.4E\n"
            "not imp after outliers: %d\n"
            "imp after outliers: %d\n" %
            (total_cells, len(c), stp_med_tau, len(not_c), med_tau, tau_t,
             tau_p, not_imp_outliers, imp_outliers))
    plt.text(0.1, 0.5, text)

    fig3, (a3, a4) = plt.subplots(2, 1, sharex=True, sharey=True)
    weights3 = [np.ones(len(sep_us)) / len(sep_us)]
    weights4 = [np.ones(len(stp_sep_us)) / len(stp_sep_us)]
    upper = max(sep_us.max(), stp_sep_us.max())
    lower = min(sep_us.min(), stp_sep_us.min())
    bins = np.linspace(lower, upper, bin_count + 1)
    #    if log_scale:
    #        lower_bound = min(sep_us.min(), stp_sep_us.min())
    #        upper_bound = max(sep_us.max(), stp_sep_us.max())
    #        bins = np.logspace(lower_bound, upper_bound, bin_count+1)
    #        hist_kwargs['bins'] = bins
    #    plt.hist([sep_us, stp_sep_us], weights=weights, **hist_kwargs)
    a3.hist(sep_us,
            weights=weights3,
            fc=faded_LN,
            edgecolor=dark_LN,
            bins=bins,
            **hist_kwargs)
    a4.hist(stp_sep_us,
            weights=weights4,
            fc=faded_max,
            edgecolor=dark_max,
            bins=bins,
            **hist_kwargs)
    a3.axes.axvline(med_u,
                    color=dark_LN,
                    linewidth=2,
                    linestyle='dashed',
                    dashes=dash_spacing)
    a3.axes.axvline(stp_med_u,
                    color=dark_max,
                    linewidth=2,
                    linestyle='dashed',
                    dashes=dash_spacing)
    a4.axes.axvline(med_u,
                    color=dark_LN,
                    linewidth=2,
                    linestyle='dashed',
                    dashes=dash_spacing)
    a4.axes.axvline(stp_med_u,
                    color=dark_max,
                    linewidth=2,
                    linestyle='dashed',
                    dashes=dash_spacing)
    ax_remove_box(a3)
    ax_remove_box(a4)
    #plt.title('u,  sig diff?:  p=%.4E' % u_p)
    #plt.xlabel('u (fractional change in gain \nper unit of stimulus amplitude)')
    #plt.ylabel('proportion within group')

    fig4 = plt.figure(figsize=text_fig)
    text = ("u distributions, n: %d\n"
            "n stp imp (bot): %d, med: %.4f\n"
            "n not imp (top): %d, med: %.4f\n"
            "yaxes: fraction of cells\n"
            "xaxis: u(fractional change in gain per unit stimulus amplitude)\n"
            "st.mannwhitneyu u: %.4E,\np: %.4E" %
            (total_cells, len(c), stp_med_u, len(not_c), med_u, u_t, u_p))
    plt.text(0.1, 0.5, text)

    stp_mag, stp_yin, stp_out = stp_magnitude(np.array([[stp_med_tau]]),
                                              np.array([[stp_med_u]]))
    mag, yin, out = stp_magnitude(np.array([[med_tau]]), np.array([[med_u]]))
    fig5 = plt.figure(figsize=short_fig)
    plt.plot(stp_out.as_continuous().flatten(),
             color=imp_color,
             label='STP ++')
    plt.plot(out.as_continuous().flatten(), color=color)
    if legend:
        plt.legend()
    ax_remove_box()

    return fig1, fig2, fig3, fig4, fig5
示例#8
0
def gc_distributions(batch,
                     gc,
                     stp,
                     LN,
                     combined,
                     se_filter=True,
                     good_ln=0,
                     use_combined=False):
    df_r, df_c, df_e = get_dataframes(batch, gc, stp, LN, combined)
    cellids, under_chance, less_LN = get_filtered_cellids(batch,
                                                          gc,
                                                          stp,
                                                          LN,
                                                          combined,
                                                          as_lists=False)
    _, _, _, _, c = improved_cells_to_list(batch,
                                           gc,
                                           stp,
                                           LN,
                                           combined,
                                           good_ln=good_ln)

    if use_combined:
        params_model = combined
    else:
        params_model = gc
    gc_params = fitted_params_per_batch(batch,
                                        params_model,
                                        stats_keys=[],
                                        meta=[])
    gc_params_cells = gc_params.transpose().index.values.tolist()
    for cell in gc_params_cells:
        if cell not in cellids:
            cellids[cell] = False
    not_c = list(set(gc_params.transpose()[cellids].index.values) - set(c))

    # index keys are formatted like "4--dsig.d--kappa"
    mod_keys = params_model.split('_')[1]
    for i, k in enumerate(mod_keys.split('-')):
        if 'dsig' in k:
            break
    b_key = f'{i}--{k}--base'
    a_key = f'{i}--{k}--amplitude'
    s_key = f'{i}--{k}--shift'
    k_key = f'{i}--{k}--kappa'
    ka_key = k_key + '_mod'
    ba_key = b_key + '_mod'
    aa_key = a_key + '_mod'
    sa_key = s_key + '_mod'
    all_keys = [b_key, a_key, s_key, k_key, ba_key, aa_key, sa_key, ka_key]

    phi_dfs = [
        gc_params[gc_params.index == k].transpose()[cellids].transpose()
        for k in all_keys
    ]
    sep_dfs = [df[not_c].values.flatten().astype(np.float64) for df in phi_dfs]
    gc_sep_dfs = [df[c].values.flatten().astype(np.float64) for df in phi_dfs]

    # removing extreme outliers b/c kept getting one or two cells with
    # values that were multiple orders of magnitude different than all others
    #    diffs = [sep_dfs[i+1] - sep_dfs[i]
    #             for i, _ in enumerate(sep_dfs[:-1])
    #             if i % 2 == 0]
    #diffs = sep_dfs[1::2] - sep_dfs[::2]

    #    gc_diffs = [gc_sep_dfs[i+1] - gc_sep_dfs[i]
    #                for i, _ in enumerate(gc_sep_dfs[:-1])
    #                if i % 2 == 0]
    #gc_diffs = gc_sep_dfs[1::2] - gc_sep_dfs[::2]

    raw_low, raw_high = sep_dfs[:4], sep_dfs[4:]
    diffs = [high - low for low, high in zip(raw_low, raw_high)]
    medians = [np.median(d) for d in diffs]
    medians_low = [np.median(d) for d in raw_low]
    medians_high = [np.median(d) for d in raw_high]

    gc_raw_low, gc_raw_high = gc_sep_dfs[:4], gc_sep_dfs[4:]
    gc_diffs = [high - low for low, high in zip(gc_raw_low, gc_raw_high)]

    gc_medians = [np.median(d) for d in gc_diffs]
    gc_medians_low = [np.median(d) for d in gc_raw_low]
    gc_medians_high = [np.median(d) for d in gc_raw_high]

    ts, ps = zip(*[
        st.mannwhitneyu(diff, gc_diff, alternative='two-sided')
        for diff, gc_diff in zip(diffs, gc_diffs)
    ])

    diffs = drop_common_outliers(*diffs)
    gc_diffs = drop_common_outliers(*gc_diffs)
    not_imp_outliers = len(diffs[0])
    imp_outliers = len(gc_diffs[0])

    color = model_colors['LN']
    c_color = model_colors['max']
    gc_label = 'GC ++ (%d)' % len(c)
    total_cells = len(c) + len(not_c)
    hist_kwargs = {'label': ['no imp', 'sig imp'], 'linewidth': 1}

    figs = []
    for i, name in zip([0, 1, 2, 3], ['base', 'amplitude', 'shift', 'kappa']):
        f1 = _stacked_hists(diffs[i],
                            gc_diffs[i],
                            medians[i],
                            gc_medians[i],
                            color,
                            c_color,
                            hist_kwargs=hist_kwargs)
        f2 = plt.figure(figsize=text_fig)
        text = ("%s distributions, n: %d\n"
                "n gc imp (bot): %d, med: %.4f\n"
                "n not imp (top): %d, med: %.4f\n"
                "yaxes: fraction of cells\n"
                "xaxis: 'fractional change in parameter per unit contrast'\n"
                "st.mannwhitneyu u: %.4E,\np: %.4E\n"
                "not imp w/o outliers: %d\n"
                "imp w/o outliers: %d" %
                (name, total_cells, len(c), gc_medians[i], len(not_c),
                 medians[i], ts[i], ps[i], not_imp_outliers, imp_outliers))
        plt.text(0.1, 0.5, text)
        figs.append(f1)
        figs.append(f2)

    f3 = plt.figure(figsize=small_fig)
    # median gc effect plots
    yin1, out1 = gc_dummy_sigmoid(*medians_low, low=0.0, high=0.3)
    yin2, out2 = gc_dummy_sigmoid(*medians_high, low=0.0, high=0.3)
    plt.scatter(yin1, out1, color=color, s=big_scatter, alpha=0.3)
    plt.scatter(yin2, out2, color=color, s=big_scatter * 2)
    figs.append(f3)
    plt.tight_layout()
    ax_remove_box()

    f3a = plt.figure(figsize=text_fig)
    text = ("non improved cells\n"
            "median low contrast:\n"
            "base:  %.4f,   amplitude:  %.4f\n"
            "shift:  %.4f,   kappa:  %.4f\n"
            "median high contrast:\n"
            "base:  %.4f,   amplitude:  %.4f\n"
            "shift:  %.4f,   kappa:  %.4f\n" % (*medians_low, *medians_high))
    plt.text(0.1, 0.5, text)
    figs.append(f3a)

    f4 = plt.figure(figsize=small_fig)
    gc_yin1, gc_out1 = gc_dummy_sigmoid(*gc_medians_low, low=0.0, high=0.3)
    gc_yin2, gc_out2 = gc_dummy_sigmoid(*gc_medians_high, low=0.0, high=0.3)
    plt.scatter(gc_yin1, gc_out1, color=c_color, s=big_scatter, alpha=0.3)
    plt.scatter(gc_yin2, gc_out2, color=c_color, s=big_scatter * 2)
    figs.append(f4)
    plt.tight_layout()
    ax_remove_box()

    f4a = plt.figure(figsize=text_fig)
    text = ("improved cells\n"
            "median low contrast:\n"
            "base:  %.4f,   amplitude:  %.4f\n"
            "shift:  %.4f,   kappa:  %.4f\n"
            "median high contrast:\n"
            "base:  %.4f,   amplitude:  %.4f\n"
            "shift:  %.4f,   kappa:  %.4f\n" %
            (*gc_medians_low, *gc_medians_high))
    plt.text(0.1, 0.5, text)
    figs.append(f4a)

    return figs
示例#9
0
def kitchen_sink(batch,
                 gc,
                 stp,
                 LN,
                 combined,
                 equivalence_path,
                 drop_outliers=True,
                 cell_mask=None,
                 mask_name=''):
    # 0.  Get auditory-responsive cells
    _, a, _, _, _ = improved_cells_to_list(batch,
                                           gc,
                                           stp,
                                           LN,
                                           combined,
                                           as_lists=False)
    a_list = a[a == True].index.values.tolist()

    # 1.  load batch parameters (shouldn't need to load models)
    stp_params = fitted_params_per_batch(289,
                                         stp,
                                         stats_keys=[],
                                         meta=['r_test'],
                                         manual_cellids=a_list)
    gc_params = fitted_params_per_batch(289,
                                        gc,
                                        stats_keys=[],
                                        meta=['r_test'],
                                        manual_cellids=a_list)
    LN_params = fitted_params_per_batch(289,
                                        LN,
                                        stats_keys=[],
                                        meta=['r_test'],
                                        manual_cellids=a_list)

    df = pd.read_pickle(equivalence_path)
    equivalence = df.sort_index()['equivalence'].values
    #    for c in gc_params_cells:
    #        if c not in LN_cells:
    #            LN_cells[c] = False

    # assemble each attribute as a vector
    # index keys are formatted like "2--stp.2--tau"
    mod_keys = stp.split('_')[1]
    for i, k in enumerate(mod_keys.split('-')):
        if 'stp' in k:
            break
    tau_key = '%d--%s--tau' % (i, k)
    u_key = '%d--%s--u' % (i, k)

    mod_keys = gc.split('_')[1]
    for i, k in enumerate(mod_keys.split('-')):
        if 'dsig' in k:
            break
    b_key = f'{i}--{k}--base'
    a_key = f'{i}--{k}--amplitude'
    s_key = f'{i}--{k}--shift'
    k_key = f'{i}--{k}--kappa'
    ba_key = b_key + '_mod'
    aa_key = a_key + '_mod'
    sa_key = s_key + '_mod'
    ka_key = k_key + '_mod'

    stp_keys = [tau_key, u_key]
    gc_keys = [b_key, a_key, s_key, k_key, ba_key, aa_key, sa_key, ka_key]
    stp_dfs = [
        stp_params[stp_params.index == k].transpose().sort_index()[a]
        for k in stp_keys
    ]
    gc_dfs = [gc_params[gc_params.index==k].transpose().sort_index()[a]\
              .astype(np.float64).values.flatten()
              for k in gc_keys]
    r_dfs = [
        df[df.index == 'meta--r_test'].transpose().sort_index()[a]
        for df in [gc_params, stp_params, LN_params]
    ]

    diffs = [
        gc_dfs[i + 1] - gc_dfs[i] for i, _ in enumerate(gc_dfs[:-1])
        if i % 2 == 0
    ]
    for i, k in enumerate(gc_keys):
        if '_mod' in k:
            gc_keys[i] = k[:-3] + 'diff'
    #gc_dfs = gc_dfs[:4] + diffs
    gc_dfs = diffs
    gc_keys = gc_keys[4:]

    dims = 3
    gc_vs_LN = (r_dfs[0] - r_dfs[2]).values.astype(np.float64).flatten()
    stp_vs_LN = (r_dfs[1] - r_dfs[2]).values.astype(np.float64).flatten()
    to_corr = [gc_vs_LN, stp_vs_LN, equivalence]
    to_corr.extend([df for df in gc_dfs])
    to_corr.extend([_df_to_array(df, dims).mean(axis=0) for df in stp_dfs])

    replace = []
    if cell_mask is not None:
        for v in to_corr:
            replace.append(v[cell_mask])
        to_corr = replace

    replace = []
    if drop_outliers:
        # drop any cells that are an outlier for at least one of the variables
        out = np.zeros_like(to_corr[0], dtype='bool')
        for v in to_corr:
            out = out | is_outlier(v)
        for v in to_corr:
            replace.append(v[~out])
        to_corr = replace

    n_cells = len(to_corr[0])

    matrix = np.vstack(to_corr)
    labels = ['gc_vs_LN', 'stp_vs_LN', 'equivalence']
    for k in gc_keys + stp_keys:
        labels.append(k.split('-')[-1])

    corr = np.corrcoef(matrix)
    fig1, ax = plt.subplots()
    plt.imshow(corr)
    plt.colorbar()
    ax.set_xticks(np.arange(len(labels)))
    plt.setp(ax.get_xticklabels(),
             rotation=45,
             ha='right',
             rotation_mode='anchor')
    ax.set_yticks(np.arange(len(labels)))
    ax.set_xticklabels(labels)
    ax.set_yticklabels(labels)
    fig1.suptitle("Correlations,  mask:%s\n"
                  "n: %d\n"
                  "outliers dropped?:  %s" %
                  (mask_name, n_cells, drop_outliers))

    for i in range(len(corr)):
        for j in range(len(corr)):
            v = str('%.3f' % corr[i, j])
            ax.text(j, i, v, ha='center', va='center', color='w')

    ps = np.empty_like(corr)
    p_correction = ps.shape[0]  # do a bonferroni correction since it's easy
    for i in range(len(ps)):
        for j in range(len(ps)):
            r, p = st.pearsonr(matrix[i], matrix[j])
            ps[i][j] = p * p_correction

    fig2, ax = plt.subplots()
    plt.imshow(ps)
    plt.colorbar()
    ax.set_xticks(np.arange(len(labels)))
    plt.setp(ax.get_xticklabels(),
             rotation=45,
             ha='right',
             rotation_mode='anchor')
    ax.set_yticks(np.arange(len(labels)))
    ax.set_xticklabels(labels)
    ax.set_yticklabels(labels)

    for i in range(len(corr)):
        for j in range(len(corr)):
            v = str('%.1E' % ps[i, j])
            ax.text(j, i, v, size=12, ha='center', va='center', color='w')
    fig2.suptitle("P-values * %d (bonferroni correction)\n"
                  "mask:%s\n"
                  "n: %d\n"
                  "outliers dropped?:  %s" %
                  (p_correction, mask_name, n_cells, drop_outliers))

    return fig1, fig2