Exemplo n.º 1
0
def get_z(recalculate: bool,
          data_logp: pd.DataFrame,
          data_corr: pd.DataFrame,
          filepath: Optional[str] = None) -> pd.DataFrame:
    """Get the z-score based on p-values of the correlation matrix

    Parameters
    ----------
    recalculate :
        If True, recalculate the z-scores
    data_logp :
        The logp values
    data_corr :
        The correlation matrix of entity-entity correlations.
    filepath :
        If `recalculate==False`: read the z-score values from this file.
        If `recalculate==True`: write the z-score values to this file.
        If not provided, run the calculation and return the z-score dataframe
        without writing it to a file.

    Returns
    -------
    :
        A dataframe with the z-scores
    """
    start = time()
    if recalculate or filepath is None:
        # z_mat = stats.norm.ppf(1 - np.exp(data_logp) / 2)
        # z_mat = -norminv_logcdf(data_logp - np.log(2))
        z_mat = abs(ndtri_exp(data_logp - np.log(2)))
        data_sign = data_corr.copy()
        data_sign[data_sign < 0] = -1
        data_sign[data_sign > 0] = 1
        data_z = data_sign * pd.DataFrame(
            z_mat, index=data_logp.columns, columns=data_logp.columns)
        if filepath is not None:
            logger.info(f'Saving z score dataframe to {"%s.h5" % filepath}')
            data_z.to_hdf('%s.h5' % filepath, filepath.split('/')[-1])
    else:
        logger.info(f'Reading z-score dataframe from {filepath}')
        data_z = pd.read_hdf('%s.h5' % filepath)
    elapsed = time() - start
    print(elapsed, "sec")
    return data_z
Exemplo n.º 2
0
 def test_outside_domain(self):
     assert np.isnan(ndtri_exp(1.0))
Exemplo n.º 3
0
 def test_asymptotes(self):
     assert_equal(ndtri_exp([-np.inf, 0.0]), [-np.inf, np.inf])
Exemplo n.º 4
0
def log_ndtr_ndtri_exp(y):
    return log_ndtr(ndtri_exp(y))
Exemplo n.º 5
0
def main():
    logger.info('Extracting data from explainers')
    expl_data = _loop_explainers(expl_dir)

    # Per graph type, extract what the old code has
    for graph_type, list_of_expl_data in expl_data.items():
        if len(list_of_expl_data) == 0:
            logger.info(f'Skipping graph type {graph_type}')
            continue
        logger.info(f'Plotting for graph type {graph_type}')
        stats_norm = pd.DataFrame(
            columns=['range', 'filter_w_count', 'x_pos'] + labels)

        for data in list_of_expl_data:
            stats_norm = stats_norm.append(other=pd.DataFrame(data=data,
                                                              index=[0]),
                                           sort=False)
        stats_norm.sort_values('x_pos', inplace=True)

        # Plot
        stats_norm.plot(x='x_pos',
                        y=labels,
                        legend=legend_labels,
                        kind='line',
                        marker='o',
                        title=f'{data_title}, {graph_type.capitalize()}')
        ticks = [-1] + list(
            range(int(stats_norm.x_pos.values[1]),
                  int(stats_norm.x_pos.max()) + 2, 2))
        ticks_labels = ['RND'] + [str(n) for n in ticks[1:]]
        fdr_line = abs(ndtri_exp(np.log(0.05)) - np.log(2))  # <-- WRONG, fixme
        fdr_label = 'FDR=|ndtri_exp(ln(.05)-ln(2))|'
        plt.xticks(ticks=ticks, labels=ticks_labels)
        plt.xlabel('abs(z-score) lower bound')
        plt.ylabel('Pct. Corrs. Explained')
        plt.ylim((0, 100))
        plt.axvline(x=fdr_line, ymax=0.65, color='c', label=fdr_label)
        plt.legend()
        fpath = Path(outdir).joinpath(f'{data_title}_{graph_type}.pdf')
        logger.info(f'Saving plot output to {fpath}')
        plt.savefig(fpath)
        if args.show_plot:
            plt.show()

        stats_norm.plot(x='x_pos',
                        y=labels,
                        legend=legend_labels,
                        kind='line',
                        marker='o',
                        logy=True,
                        title=f'{data_title}, '
                        f'{graph_type.capitalize()} (ylog)')
        plt.xticks(ticks=ticks, labels=ticks_labels)
        plt.xlabel('abs(z-score) lower bound')
        plt.ylabel('Pct. Corrs. Explained')
        plt.ylim((10**-2, 10**2))
        plt.axvline(x=fdr_line, ymin=0.35, color='c', label=fdr_label)
        plt.legend()
        plt.savefig(
            Path(outdir).joinpath(f'{data_title}_{graph_type}_ylog.pdf'))
        if args.show_plot:
            plt.show()