Exemplo n.º 1
0
def get_eval(lr=0.01, n_episodes=50, is_train=False, savefig=False):
    # mkdir
    print('qlearning_nn evaluating...')
    base_dir = './results/qlearning_nn'
    if not os.path.exists(base_dir):
        os.makedirs(base_dir)

    log_file = os.path.join(base_dir, 'qlearning_nn.log')
    logger = logging(log_file)
    results_file = os.path.join(base_dir, 'qlearning_nn.csv')
    if os.path.exists(results_file) and not is_train and not savefig:
        results = pd.read_csv(results_file)
        results = results.sort_values(by=['noisy', 'problem_id'])
        return results
    else:
        if os.path.exists(results_file):
            os.remove(results_file)
        if os.path.exists(log_file):
            os.remove(log_file)
        pkl_file = os.path.join(
            base_dir,
            'qlearning_nn_lr={}_episodes={}.pkl'.format(lr, n_episodes))
        if os.path.exists(pkl_file):
            q_learning_nn = pickle.load(open(pkl_file, 'rb'))
        else:
            q_learning_nn = train(lr=lr, n_episodes=n_episodes)
    # eval
    results = pd.DataFrame([],
                           columns=[
                               'problem_id', 'noisy', 'action',
                               'Total_rewards', 'avg_reward_per_action'
                           ])
    for problem_id, noisy, env in get_env():
        states, rewards, actions = implement(env,
                                             q_learning_nn,
                                             1,
                                             discount_factor=0.95)
        result = {
            'problem_id': problem_id,
            'noisy': noisy,
            'Total_rewards': sum(rewards),
            'avg_reward_per_action': sum(rewards) / len(actions)
        }
        results = results.append(pd.DataFrame(result, index=[0]),
                                 ignore_index=0)
        logger(' ' + str(result))
        logger(actions)
        if savefig:
            get_fig(states, rewards)
            pic_name = os.path.join(
                base_dir,
                'problem_id={} noisy={}.jpg'.format(problem_id, noisy))
            plt.savefig(dpi=300, fname=pic_name)
            plt.close()
        env.close()
    results = results.sort_values(by=['noisy', 'problem_id'])
    results.to_csv(results_file, index=0)
    return results
def plot_batch(df, batch):

    # Plot 50uM.

    df_50uM = df[df.conc == -3]

    if batch.startswith('Ala'):
        df_dmso = df_50uM[df_50uM.comp == 'DMSO']
        for comp in [ 'K252a', 'SU11652', 'TG101209', 'RIF', 'IKK16' ]:
            df_comp = df_50uM[df_50uM.comp == comp]
            t, p_2side = ss.ttest_ind(df_comp.fluo, df_dmso.fluo)
            p_1side = p_2side / 2. if t < 0 else 1. - (p_2side / 2.)
            print('{}, one-sided t-test P = {}, n = {}'
                  .format(comp, p_1side, len(df_comp)))

    if batch == 'AlaA':
        order = [ 'K252a', 'SU11652', 'TG101209', 'RIF', 'DMSO' ]
    elif batch == 'AlaB':
        order = [ 'IKK16', 'K252a', 'RIF', 'DMSO' ]
    else:
        return

    plt.figure()
    sns.barplot(x='comp', y='fluo', data=df_50uM, ci=95, dodge=False,
                hue='control', palette=sns.color_palette("RdBu_r", 7),
                order=order, capsize=0.2, errcolor='#888888',)
    sns.swarmplot(x='comp', y='fluo', data=df_50uM, color='black',
                  order=order)
    #plt.ylim([ 10, 300000 ])
    if not batch.startswith('Ala'):
        plt.yscale('log')
    plt.savefig('figures/tb_culture_50uM_{}.svg'.format(batch))
    plt.close()

    # Plot dose-response.

    comps = sorted(set(df.comp))
    concentrations = sorted(set(df.conc))

    plt.figure(figsize=(24, 6))
    for cidx, comp in enumerate(order):
        df_subset = df[df.comp == comp]

        plt.subplot(1, 5, cidx + 1)
        sns.lineplot(x='conc', y='fluo', data=df_subset, ci=95,)
        sns.scatterplot(x='conc', y='fluo', data=df_subset,
                        color='black',)
        plt.title(comp)
        if batch.startswith('Ala'):
            plt.ylim([ 0., 1.3 ])
        else:
            plt.ylim([ 10, 1000000 ])
            plt.yscale('log')
        plt.xticks(list(range(-3, -6, -1)),
                   [ '50', '25', '10', ])#'1', '0.1' ])

    plt.savefig('figures/tb_culture_{}.svg'.format(batch))
    plt.close()
def visualize_heatmap(chem_prot, suffix=''):
    plt.figure()
    cmap = sns.diverging_palette(220, 10, as_cmap=True)
    sns.heatmap(chem_prot, cmap=cmap)
    mkdir_p('figures/')
    if suffix == '':
        plt.savefig('figures/heatmap.png', dpi=300)
    else:
        plt.savefig('figures/heatmap_{}.png'.format(suffix), dpi=300)
    plt.close()
def acquisition_scatter(y_unk_pred, var_unk_pred, acquisition, regress_type):
    y_unk_pred = y_unk_pred[:]
    y_unk_pred[y_unk_pred > 10000] = 10000

    plt.figure()
    plt.scatter(y_unk_pred, var_unk_pred, alpha=0.5, c=-acquisition,
                cmap='hot')
    plt.title(regress_type.title())
    plt.xlabel('Predicted score')
    plt.ylabel('Variance')
    plt.savefig('figures/acquisition_unknown_{}.png'
                .format(regress_type), dpi=200)
    plt.close()
Exemplo n.º 5
0
def score_scatter(y_pred, y, var_pred, regress_type, prefix=''):
    y_pred = y_pred[:]
    y_pred[y_pred < 0] = 0
    y_pred[y_pred > 10000] = 10000

    plt.figure()
    plt.scatter(y_pred, var_pred, alpha=0.3,
                c=(y - y.min()) / (y.max() - y.min()))
    plt.viridis()
    plt.xlabel('Predicted score')
    plt.ylabel('Variance')
    plt.savefig('figures/variance_vs_pred_{}regressors{}.png'
                .format(prefix, regress_type), dpi=300)
    plt.close()
Exemplo n.º 6
0
def plot_values(df, score_fn):
    models = ['mlper1', 'sparsehybrid', 'gp', 'real']

    plt.figure(figsize=(10, 4))

    for midx, model in enumerate(models):
        if model == 'gp':
            color = '#3e5c71'
        elif model == 'sparsehybrid':
            color = '#2d574e'
        elif model == 'mlper1':
            color = '#a12424'
        elif model == 'real':
            color = '#A9A9A9'
        else:
            raise ValueError('Invalid model'.format(model))

        plt.subplot(1, len(models), midx + 1)
        df_subset = df[df.model == model]
        compounds = np.array(df_subset.compound_)
        if model == 'real':
            order = sorted(compounds)
        else:
            order = compounds[np.argsort(-df_subset.affinity)]
        sns.barplot(data=df_subset,
                    x='compound_',
                    y='affinity',
                    color=color,
                    order=order)
        if score_fn == 'rdock':
            plt.ylim([0, -40])
        else:
            plt.ylim([0, -12])
        plt.xticks(rotation=45)

    plt.savefig('figures/design_docking_{}.svg'.format(score_fn))
    plt.close()

    print('Score function: {}'.format(score_fn))
    print('GP vs MLP: {}'.format(
        ttest_ind(
            df[df.model == 'gp'].affinity,
            df[df.model == 'mlper1'].affinity,
        )))
    print('Hybrid vs MLP: {}'.format(
        ttest_ind(
            df[df.model == 'sparsehybrid'].affinity,
            df[df.model == 'mlper1'].affinity,
        )))
    print('')
                    continue
                seen.add(zinc)
                order_list.append((order, Kd))

            order_list = [
                order for order, _ in sorted(order_list, key=lambda x: x[1])
            ]

            plt.subplot(1, 3, bidx + 1)
            sns.barplot(
                x='order',
                y='Kdpoint',
                data=df_subset,
                color=palette[bidx],
                order=order_list,
                ci=95,
                capsize=0.4,
                errcolor='#888888',
            )
            sns.swarmplot(
                x='order',
                y='Kdpoint',
                data=df_subset,
                color='black',
                order=order_list,
            )
            plt.ylim([-100, 10100])

        plt.savefig('figures/prediction_barplot_{}.svg'.format(model))
        plt.close()
def latent_scatter(var_unk_pred, y_unk_pred, acquisition, **kwargs):
    chems = kwargs['chems']
    chem2feature = kwargs['chem2feature']
    idx_obs = kwargs['idx_obs']
    idx_unk = kwargs['idx_unk']
    regress_type = kwargs['regress_type']
    prot_target = kwargs['prot_target']

    chem_idx_obs = sorted(set([i for i, _ in idx_obs]))
    chem_idx_unk = sorted(set([i for i, _ in idx_unk]))

    feature_obs = np.array([chem2feature[chems[i]] for i in chem_idx_obs])
    feature_unk = np.array([chem2feature[chems[i]] for i in chem_idx_unk])

    from sklearn.neighbors import NearestNeighbors
    nbrs = NearestNeighbors(n_neighbors=1).fit(feature_obs)
    dist = np.ravel(nbrs.kneighbors(feature_unk)[0])
    print('Distance Spearman r = {}, P = {}'.format(
        *ss.spearmanr(dist, var_unk_pred)))
    print('Distance Pearson rho = {}, P = {}'.format(
        *ss.pearsonr(dist, var_unk_pred)))

    X = np.vstack([feature_obs, feature_unk])
    labels = np.concatenate(
        [np.zeros(len(chem_idx_obs)),
         np.ones(len(chem_idx_unk))])
    sidx = np.argsort(-var_unk_pred)

    from fbpca import pca
    U, s, Vt = pca(
        X,
        k=3,
    )
    X_pca = U * s

    from umap import UMAP
    um = UMAP(
        n_neighbors=15,
        min_dist=0.5,
        n_components=2,
        metric='euclidean',
    )
    X_umap = um.fit_transform(X)

    from MulticoreTSNE import MulticoreTSNE as TSNE
    tsne = TSNE(
        n_components=2,
        n_jobs=20,
    )
    X_tsne = tsne.fit_transform(X)

    if prot_target is None:
        suffix = ''
    else:
        suffix = '_' + prot_target

    for name, coords in zip(
        ['pca', 'umap', 'tsne'],
        [X_pca, X_umap, X_tsne],
    ):
        plt.figure()
        sns.scatterplot(
            x=coords[labels == 1, 0],
            y=coords[labels == 1, 1],
            color='blue',
            alpha=0.1,
        )
        plt.scatter(
            x=coords[labels == 0, 0],
            y=coords[labels == 0, 1],
            color='orange',
            alpha=1.0,
            marker='x',
            linewidths=10,
        )
        plt.savefig('figures/latent_scatter_{}_ypred_{}{}.png'.format(
            name, regress_type, suffix),
                    dpi=300)
        plt.close()

        plt.figure()
        plt.scatter(x=coords[labels == 1, 0],
                    y=coords[labels == 1, 1],
                    c=ss.rankdata(var_unk_pred),
                    alpha=0.1,
                    cmap='coolwarm')
        plt.savefig('figures/latent_scatter_{}_var_{}{}.png'.format(
            name, regress_type, suffix),
                    dpi=300)
        plt.close()

        plt.figure()
        plt.scatter(x=coords[labels == 1, 0],
                    y=coords[labels == 1, 1],
                    c=-acquisition,
                    alpha=0.1,
                    cmap='hot')
        plt.savefig('figures/latent_scatter_{}_acq_{}{}.png'.format(
            name, regress_type, suffix),
                    dpi=300)
        plt.close()
Exemplo n.º 9
0
def parse_log(regress_type, experiment, **kwargs):
    log_fname = ('iterate_davis2011kinase_{}_{}.log'.format(
        regress_type, experiment))

    iteration = 0
    iter_to_Kds = {}
    iter_to_idxs = {}

    with open(log_fname) as f:

        while True:
            line = f.readline()
            if not line:
                break

            if not line.startswith('2019') and not line.startswith('2020'):
                continue
            if not ' | ' in line:
                continue

            line = line.split(' | ')[1]

            if line.startswith('Iteration'):
                iteration = int(line.strip().split()[-1])
                if not iteration in iter_to_Kds:
                    iter_to_Kds[iteration] = []
                if not iteration in iter_to_idxs:
                    iter_to_idxs[iteration] = []

                continue

            elif line.startswith('\tAcquire '):
                fields = line.strip().split()

                Kd = float(fields[-1])
                iter_to_Kds[iteration].append(Kd)

                chem_idx = int(fields[1].lstrip('(').rstrip(','))
                prot_idx = int(fields[2].strip().rstrip(')'))
                iter_to_idxs[iteration].append((chem_idx, prot_idx))

                continue

    assert (iter_to_Kds.keys() == iter_to_idxs.keys())
    iterations = sorted(iter_to_Kds.keys())

    # Plot Kd over iterations.

    Kd_iter, Kd_iter_max, Kd_iter_min = [], [], []
    all_Kds = []
    for iteration in iterations:
        Kd_iter.append(np.mean(iter_to_Kds[iteration]))
        Kd_iter_max.append(max(iter_to_Kds[iteration]))
        Kd_iter_min.append(min(iter_to_Kds[iteration]))
        all_Kds += list(iter_to_Kds[iteration])

        if iteration == 0:
            print('First average Kd is {}'.format(Kd_iter[0]))
        elif iteration > 4 and experiment == 'perprot':
            break

    print('Average Kd is {}'.format(np.mean(all_Kds)))

    plt.figure()
    plt.scatter(iterations, Kd_iter)
    plt.plot(iterations, Kd_iter)
    plt.fill_between(iterations, Kd_iter_min, Kd_iter_max, alpha=0.3)
    plt.viridis()
    plt.title(' '.join([regress_type, experiment]))
    plt.savefig('figures/Kd_over_iterations_{}_{}.png'.format(
        regress_type, experiment))
    plt.close()

    return

    # Plot differential entropy of acquired samples over iterations.

    chems = kwargs['chems']
    prots = kwargs['prots']
    chem2feature = kwargs['chem2feature']
    prot2feature = kwargs['prot2feature']

    d_entropies = []
    X_acquired = []
    for iteration in iterations:
        for i, j in iter_to_idxs[iteration]:
            chem = chems[i]
            prot = prots[j]
            X_acquired.append(chem2feature[chem] + prot2feature[prot])
        if len(X_acquired) <= 1:
            d_entropies.append(float('nan'))
        else:
            gaussian = GaussianMixture().fit(np.array(X_acquired))
            gaussian = multivariate_normal(gaussian.means_[0],
                                           gaussian.covariances_[0])
            d_entropies.append(gaussian.entropy())

    print('Final differential entropy is {}'.format(d_entropies[-1]))

    plt.figure()
    plt.scatter(iterations, d_entropies)
    plt.plot(iterations, d_entropies)
    plt.viridis()
    plt.title(' '.join([regress_type, experiment]))
    plt.savefig('figures/entropy_over_iterations_{}_{}.png'.format(
        regress_type, experiment))
    plt.close()