示例#1
0
def plot_clusters(coords, clusters, s=1):
    if coords.shape[0] != clusters.shape[0]:
        sys.stderr.write(
            'Mismatch: {} cells, {} labels\n'
            .format(coords.shape[0], clusters.shape[0])
        )
    assert(coords.shape[0] == clusters.shape[0])

    colors = np.array(
        list(islice(cycle([
            '#377eb8', '#ff7f00', '#4daf4a',
            '#f781bf', '#a65628', '#984ea3',
            '#999999', '#e41a1c', '#dede00',
            '#ffe119', '#e6194b', '#ffbea3',
            '#911eb4', '#46f0f0', '#f032e6',
            '#d2f53c', '#008080', '#e6beff',
            '#aa6e28', '#800000', '#aaffc3',
            '#808000', '#ffd8b1', '#000080',
            '#808080', '#fabebe', '#a3f4ff'
        ]), int(max(clusters) + 1)))
    )
        
    plt.figure()
    plt.scatter(coords[:, 0], coords[:, 1],
                c=colors[clusters], s=s)
def acquisition_scatter(y_unk_pred, var_unk_pred, acquisition, regress_type):
    y_unk_pred = y_unk_pred[:]
    y_unk_pred[y_unk_pred > 10000] = 10000

    plt.figure()
    plt.scatter(y_unk_pred, var_unk_pred, alpha=0.5, c=-acquisition,
                cmap='hot')
    plt.title(regress_type.title())
    plt.xlabel('Predicted score')
    plt.ylabel('Variance')
    plt.savefig('figures/acquisition_unknown_{}.png'
                .format(regress_type), dpi=200)
    plt.close()
def score_scatter(y_pred, y, var_pred, regress_type, prefix=''):
    y_pred = y_pred[:]
    y_pred[y_pred < 0] = 0
    y_pred[y_pred > 10000] = 10000

    plt.figure()
    plt.scatter(y_pred, var_pred, alpha=0.3,
                c=(y - y.min()) / (y.max() - y.min()))
    plt.viridis()
    plt.xlabel('Predicted score')
    plt.ylabel('Variance')
    plt.savefig('figures/variance_vs_pred_{}regressors{}.png'
                .format(prefix, regress_type), dpi=300)
    plt.close()
示例#4
0
def plot_mapping(curr_ds, curr_ref, ds_ind, ref_ind):
    tsne = TSNE(n_iter=400, verbose=VERBOSE, random_state=69)

    tsne.fit(curr_ds)
    plt.figure()
    coords_ds = tsne.embedding_[:, :]
    coords_ds[:, 1] += 100
    plt.scatter(coords_ds[:, 0], coords_ds[:, 1])

    tsne.fit(curr_ref)
    coords_ref = tsne.embedding_[:, :]
    plt.scatter(coords_ref[:, 0], coords_ref[:, 1])

    x_list, y_list = [], []
    for r_i, c_i in zip(ds_ind, ref_ind):
        x_list.append(coords_ds[r_i, 0])
        x_list.append(coords_ref[c_i, 0])
        x_list.append(None)
        y_list.append(coords_ds[r_i, 1])
        y_list.append(coords_ref[c_i, 1])
        y_list.append(None)
    plt.plot(x_list, y_list, 'b-', alpha=0.3)
    plt.show()
def latent_scatter(var_unk_pred, y_unk_pred, acquisition, **kwargs):
    chems = kwargs['chems']
    chem2feature = kwargs['chem2feature']
    idx_obs = kwargs['idx_obs']
    idx_unk = kwargs['idx_unk']
    regress_type = kwargs['regress_type']
    prot_target = kwargs['prot_target']

    chem_idx_obs = sorted(set([i for i, _ in idx_obs]))
    chem_idx_unk = sorted(set([i for i, _ in idx_unk]))

    feature_obs = np.array([chem2feature[chems[i]] for i in chem_idx_obs])
    feature_unk = np.array([chem2feature[chems[i]] for i in chem_idx_unk])

    from sklearn.neighbors import NearestNeighbors
    nbrs = NearestNeighbors(n_neighbors=1).fit(feature_obs)
    dist = np.ravel(nbrs.kneighbors(feature_unk)[0])
    print('Distance Spearman r = {}, P = {}'.format(
        *ss.spearmanr(dist, var_unk_pred)))
    print('Distance Pearson rho = {}, P = {}'.format(
        *ss.pearsonr(dist, var_unk_pred)))

    X = np.vstack([feature_obs, feature_unk])
    labels = np.concatenate(
        [np.zeros(len(chem_idx_obs)),
         np.ones(len(chem_idx_unk))])
    sidx = np.argsort(-var_unk_pred)

    from fbpca import pca
    U, s, Vt = pca(
        X,
        k=3,
    )
    X_pca = U * s

    from umap import UMAP
    um = UMAP(
        n_neighbors=15,
        min_dist=0.5,
        n_components=2,
        metric='euclidean',
    )
    X_umap = um.fit_transform(X)

    from MulticoreTSNE import MulticoreTSNE as TSNE
    tsne = TSNE(
        n_components=2,
        n_jobs=20,
    )
    X_tsne = tsne.fit_transform(X)

    if prot_target is None:
        suffix = ''
    else:
        suffix = '_' + prot_target

    for name, coords in zip(
        ['pca', 'umap', 'tsne'],
        [X_pca, X_umap, X_tsne],
    ):
        plt.figure()
        sns.scatterplot(
            x=coords[labels == 1, 0],
            y=coords[labels == 1, 1],
            color='blue',
            alpha=0.1,
        )
        plt.scatter(
            x=coords[labels == 0, 0],
            y=coords[labels == 0, 1],
            color='orange',
            alpha=1.0,
            marker='x',
            linewidths=10,
        )
        plt.savefig('figures/latent_scatter_{}_ypred_{}{}.png'.format(
            name, regress_type, suffix),
                    dpi=300)
        plt.close()

        plt.figure()
        plt.scatter(x=coords[labels == 1, 0],
                    y=coords[labels == 1, 1],
                    c=ss.rankdata(var_unk_pred),
                    alpha=0.1,
                    cmap='coolwarm')
        plt.savefig('figures/latent_scatter_{}_var_{}{}.png'.format(
            name, regress_type, suffix),
                    dpi=300)
        plt.close()

        plt.figure()
        plt.scatter(x=coords[labels == 1, 0],
                    y=coords[labels == 1, 1],
                    c=-acquisition,
                    alpha=0.1,
                    cmap='hot')
        plt.savefig('figures/latent_scatter_{}_acq_{}{}.png'.format(
            name, regress_type, suffix),
                    dpi=300)
        plt.close()
示例#6
0
def parse_log(regress_type, experiment, **kwargs):
    log_fname = ('iterate_davis2011kinase_{}_{}.log'.format(
        regress_type, experiment))

    iteration = 0
    iter_to_Kds = {}
    iter_to_idxs = {}

    with open(log_fname) as f:

        while True:
            line = f.readline()
            if not line:
                break

            if not line.startswith('2019') and not line.startswith('2020'):
                continue
            if not ' | ' in line:
                continue

            line = line.split(' | ')[1]

            if line.startswith('Iteration'):
                iteration = int(line.strip().split()[-1])
                if not iteration in iter_to_Kds:
                    iter_to_Kds[iteration] = []
                if not iteration in iter_to_idxs:
                    iter_to_idxs[iteration] = []

                continue

            elif line.startswith('\tAcquire '):
                fields = line.strip().split()

                Kd = float(fields[-1])
                iter_to_Kds[iteration].append(Kd)

                chem_idx = int(fields[1].lstrip('(').rstrip(','))
                prot_idx = int(fields[2].strip().rstrip(')'))
                iter_to_idxs[iteration].append((chem_idx, prot_idx))

                continue

    assert (iter_to_Kds.keys() == iter_to_idxs.keys())
    iterations = sorted(iter_to_Kds.keys())

    # Plot Kd over iterations.

    Kd_iter, Kd_iter_max, Kd_iter_min = [], [], []
    all_Kds = []
    for iteration in iterations:
        Kd_iter.append(np.mean(iter_to_Kds[iteration]))
        Kd_iter_max.append(max(iter_to_Kds[iteration]))
        Kd_iter_min.append(min(iter_to_Kds[iteration]))
        all_Kds += list(iter_to_Kds[iteration])

        if iteration == 0:
            print('First average Kd is {}'.format(Kd_iter[0]))
        elif iteration > 4 and experiment == 'perprot':
            break

    print('Average Kd is {}'.format(np.mean(all_Kds)))

    plt.figure()
    plt.scatter(iterations, Kd_iter)
    plt.plot(iterations, Kd_iter)
    plt.fill_between(iterations, Kd_iter_min, Kd_iter_max, alpha=0.3)
    plt.viridis()
    plt.title(' '.join([regress_type, experiment]))
    plt.savefig('figures/Kd_over_iterations_{}_{}.png'.format(
        regress_type, experiment))
    plt.close()

    return

    # Plot differential entropy of acquired samples over iterations.

    chems = kwargs['chems']
    prots = kwargs['prots']
    chem2feature = kwargs['chem2feature']
    prot2feature = kwargs['prot2feature']

    d_entropies = []
    X_acquired = []
    for iteration in iterations:
        for i, j in iter_to_idxs[iteration]:
            chem = chems[i]
            prot = prots[j]
            X_acquired.append(chem2feature[chem] + prot2feature[prot])
        if len(X_acquired) <= 1:
            d_entropies.append(float('nan'))
        else:
            gaussian = GaussianMixture().fit(np.array(X_acquired))
            gaussian = multivariate_normal(gaussian.means_[0],
                                           gaussian.covariances_[0])
            d_entropies.append(gaussian.entropy())

    print('Final differential entropy is {}'.format(d_entropies[-1]))

    plt.figure()
    plt.scatter(iterations, d_entropies)
    plt.plot(iterations, d_entropies)
    plt.viridis()
    plt.title(' '.join([regress_type, experiment]))
    plt.savefig('figures/entropy_over_iterations_{}_{}.png'.format(
        regress_type, experiment))
    plt.close()