Пример #1
0
def dimensionality_reduction(
        sample: pd.Series,
        background_df: pd.DataFrame,
        genes: List[str],
        col: str,
        method='trimap') -> Tuple[pd.DataFrame, hv.Scatter]:
    """
    Wrapper for returning trimap plot with column for `color_index` and `size_index`

    Args:
        sample: n-of-1 sample. Gets own label
        background_df: Background dataset
        genes: Genes to use in dimensionality reduction
        col: Column to use for color_index
        method: Method of dimensionality reduction. `trimap` or `tsne`

    Returns:
        Holoviews Scatter object of plot with associated vdims
    """
    assert method == 'trimap' or method == 'tsne', '`method` must be either `trimap` or `tsne`'
    combined = background_df.append(sample)
    if method == 'trimap':
        reduced = trimap.TRIMAP().fit_transform(combined[genes])
    else:
        reduced = t_sne.TSNE().fit_transform(combined[genes])
    df = pd.DataFrame(reduced, columns=['x', 'y'])
    df[col] = background_df[col].tolist() + [f'N-of-1 - {sample[col]}']
    df['size'] = [1 for _ in background_df[col]] + [5]
    return df, hv.Scatter(data=df, kdims=['x'], vdims=['y', col, 'size'])
Пример #2
0
def quest_16(_data):
    """
    Pick some K (or several) apply the EM you’ve implemented to the single cell data with the geneset you had selected.
    Compare the results to k-means. Plot the clustering of the different methods on a 2D tSNE embedding and discuss your
    results.
    """
    max_var_index = np.argsort(np.var(_data, axis=1))[-300:]
    _x = _data[max_var_index, :]

    m = SingleCellExpressionModel(C=_x.shape[1], G=_x.shape[0], T=K[0])
    km = KMeans(n_clusters=K[0]).fit(_x.T)
    m.learn(_x, maxiter=100)
    posterior = m.type_posterior(_x)

    # plt.title('tSNE of k-means clustering colored by centroids')
    tsne = t_sne.TSNE(n_components=2).fit_transform(np.log(1 + _x.T))
    plt.scatter(tsne[:, 0], tsne[:, 1], c=km.labels_, s=4)
    plt.xlim(np.percentile(tsne[:, 0], [1, 100]))
    plt.ylim(np.percentile(tsne[:, 1], [1, 100]))
    plt.show()

    # plt.title('tSNE of EM clustering colored by cell types')
    plt.scatter(tsne[:, 0], tsne[:, 1], c=np.argmax(posterior, axis=1), s=4)
    plt.xlim(np.percentile(tsne[:, 0], [1, 100]))
    plt.ylim(np.percentile(tsne[:, 1], [1, 100]))
    plt.show()
Пример #3
0
def _do_tsne(data, nr_components=2, init='random', plex=30,
             n_iter=1000, lr=200, rs=None):
    """Uses sklearn TSNE non-linear dimension reduction method
    
    Parameters
    ----------
    data : ndarray
    nr_components : int, optional
        desired dimension, by default 2
    init : str, optional
        see sklearn.manifold.t_sne documentation, by default 'random'
    plex : int, optional
        perplexity see sklearn.manifold.t_sne documentation, by default 30
    n_iter : int, optional
        see sklearn.manifold.t_sne documentation, by default 1000
    lr : int, optional
        learning_rate, see sklearn.manifold.t_sne documentation, by default 200
    rs : int, optional
        random seed, by default None
    
    Returns
    -------
    tuple, (transformed data, model)
    
    """

    tsne = t_sne.TSNE(n_components=nr_components, init=init,
                      perplexity=plex, random_state=rs, n_iter=n_iter, learning_rate=lr)

    return tsne.fit_transform(data), tsne
Пример #4
0
def dim_reduction(X, y, n_comp=2, fault_nodes=None):
    tsne = t_sne.TSNE(n_components=n_comp, perplexity=5)

    X_red = tsne.fit_transform(X)

    if fault_nodes:
        flt_plots = []

        X_tsne = []
        y_tsne = []
        for node in fault_nodes:
            X_node = [X_red[n, :] for n in range(len(X_red)) if y[n] == node]
            X_tsne.extend(X_node)
            y_node = [y_train[n] for n in range(len(X_red)) if y[n] == node]
            y_tsne.extend(y_node)
            X_1 = [X_red[n, 0] for n in range(len(X_red)) if y[n] == node]
            X_2 = [X_red[n, 1] for n in range(len(X_red)) if y[n] == node]
            fig = plt.scatter(X_1, X_2)
            flt_plots.append(fig)

        leg = ["f{}".format(n) for n in fault_nodes]
        plt.legend(leg)
        plt.show()

    else:
        return X_red
Пример #5
0
def quest_11(_data):
    """
    Plots of question 11 supplementary code (tSNE vs. PCA)
    """
    tSNE = t_sne.TSNE(n_components=2).fit_transform(np.log(1 + _data.T))
    pca = PCA(n_components=2).fit(np.log(1 + _data.T)).transform(np.log(1 + _data.T))
    plt.figure(figsize=(8, 4))

    plt.subplot(121)
    c = stats.gaussian_kde(tSNE.T)(tSNE.T)
    plt.scatter(tSNE[:, 0], tSNE[:, 1], c=c, s=3)
    plt.title('tSNE')
    plt.xlim(np.percentile(tSNE[:, 0], [1, 100]))
    plt.ylim(np.percentile(tSNE[:, 1], [1, 100]))
    plt.xticks([])
    plt.yticks([])

    plt.subplot(122)
    c = stats.gaussian_kde(pca.T)(pca.T)
    plt.scatter(pca[:, 0], pca[:, 1], c=c, s=3)
    plt.title('PCA')
    plt.xlim(np.percentile(pca[:, 0], [1, 100]))
    plt.ylim(np.percentile(pca[:, 1], [1, 100]))
    plt.xticks([])
    plt.yticks([])
    plt.show()
Пример #6
0
def _test_Tsne(synthetic_data):
    '''
    Example code to show you how to load the MNIST data and plot it.
    '''

    # load the MNIST data:
    digits = datasets.load_digits()
    data = digits.data / 255.
    tsne2 = tsne.TSNE()
    trained_data = tsne2.fit_transform(data)
    plot_with_images(trained_data, data, "Digits example- T-SNE")

    trained_data2 = tsne2.fit_transform(synthetic_data)

    plt.figure()
    plt.title("Synthetic data- TSNE")
    plt.scatter(trained_data2[:, 0], trained_data2[:, 1])
    plt.show()
Пример #7
0
def make_tsne(data, inital_player):
    tsne = t_sne.TSNE(n_components=2, learning_rate=750)
    X_std = StandardScaler().fit_transform(data)
    fit = tsne.fit_transform(X_std)

    trace1 = go.Scatter(showlegend=False,
                        x=fit[:, 0],
                        y=fit[:, 1],
                        text=data.index.get_level_values(0).values,
                        hoverinfo='text',
                        mode='markers',
                        marker=dict(
                            size=12,
                            color='#1F77AA',
                        ))

    num = data.index.get_loc(inital_player)
    trace2 = go.Scatter(name=inital_player,
                        x=[fit[num, 0]],
                        y=[fit[num, 1]],
                        text=inital_player,
                        hoverinfo='text',
                        mode='markers',
                        marker=dict(size=14, ))

    data1 = [trace1, trace2]
    layout = go.Layout({
        'hovermode': 'closest',
        'margin': {
            't': 0,
            'r': 0,
            'l': 0,
            'b': 0
        },
        'legend': {
            'x': 0,
            'y': 1
        },
        'paper_bgcolor': '#F8F3F1',
        'plot_bgcolor': '#F8F3F1'
    })
    fig = go.Figure(data=data1, layout=layout)
    return fig
def patches_tsne_visualization(patches_vector,
                               patches_mask,
                               ratio_thold=0.1,
                               savedir=None,
                               n_iter=5000):
    from sklearn.manifold import t_sne
    import pandas as pd
    mask_labels = []
    for mask in patches_mask:
        ratio = mask.sum() / float(mask.size)
        if ratio == 0:
            label = 'good'
        elif ratio >= ratio_thold:
            label = 'defect'
        else:
            label = 'neutral'
        mask_labels.append(label)
    mask_labels = np.array(mask_labels)
    points_num, source_dims = patches_vector.shape
    sp = sns.color_palette('muted')
    color_palette = {'good': sp[2], 'defect': sp[3], 'neutral': sp[7]}
    for perplexity in [30, 40, 50]:
        sk_tsne = t_sne.TSNE(n_components=2,
                             perplexity=perplexity,
                             n_iter=n_iter,
                             init='random',
                             verbose=1)
        embedded = sk_tsne.fit_transform(patches_vector)
        df = pd.DataFrame(embedded, columns=('x', 'y'))
        df['label'] = mask_labels
        plt.figure(figsize=(12, 8))
        plt.title('Dimension:%d  Perplexity:%d  Iteration:%d' %
                  (source_dims, perplexity, n_iter))
        ax = sns.scatterplot(x='x',
                             y='y',
                             hue='label',
                             data=df,
                             palette=color_palette)
        ax.xaxis.info.set_visible(0)
        ax.yaxis.info.set_visible(0)
        plt.savefig(path.join(savedir, 'tsne_p%d.png' % perplexity))
Пример #9
0
def tsne_plot(X,
              y,
              perplexity=20,
              title='t-SNE',
              label_idxs=[0, 1, 2, 3],
              label_names=['Right', 'Left', 'Tongue', 'Feet'],
              fig=None,
              ax_idx=0):
    ''' Visualize the EEG trials in optimized 2D embedding space.
    Input:
        - X: EEG trials (numpy array of shape (n_trials, n_channels, n_samples)).
        - y: Labels (numpy array of shape (n_trials,)).
        - perplexity: Hyperparameter for t-SNE (int).
        - embedding_name: Name of the embedding (string).
    Output:
        - 2D scatter plot (feature 1 vs feature 2).
    '''
    from sklearn.manifold import t_sne
    if fig:
        ax = fig.get_axes()[ax_idx]
    else:
        fig, ax = plt.subplots()

    n_trials = X.shape[0]
    out = t_sne.TSNE(n_components=2,
                     perplexity=perplexity,
                     n_iter=500,
                     random_state=0).fit_transform(X.reshape((n_trials, -1)))
    outs = np.array([out[y == c] for c in label_idxs])

    colors = np.array(['b', 'r', 'g', 'orange'])
    [
        ax.scatter(*outs[i][:, :].T, c=colors[l], alpha=0.7)
        for i, l in enumerate(label_idxs)
    ]
    ax.set_xlabel('Feature 1')
    ax.set_ylabel('Feature 2')
    ax.legend([label_names[l] for l in label_idxs], loc=0)
    ax.set_title(title)
    return fig
# Use the 2017 data and fill any NaNs
recents = data[data.Year == 2017]
recents = recents.dropna(axis=1, how="all")
recents = recents.fillna(recents.median())

# Use only these specific features
columns = [
    'Log GDP per capita', 'Social support', 'Healthy life expectancy at birth',
    'Freedom to make life choices', 'Generosity', 'Perceptions of corruption',
    'Positive affect', 'Negative affect', 'Confidence in national government',
    'Democratic Quality', 'Delivery Quality'
]

# Transform the data with TSNE
tsne = t_sne.TSNE()
transformed = pd.DataFrame(tsne.fit_transform(recents[columns]))
# Create the data object
cluster_data = oe.data(transformed, [0, 1])

# Create the ensemble
ensemble = oe.cluster(cluster_data)
for i in range(20):
    name = f'kmeans({i}-tsne'
    ensemble.cluster('parent', 'kmeans', name, 10)

# Create the cluster labels
preds = ensemble.finish_co_occ_linkage(threshold=0.5)

# Add Life Ladder to columns
columns = [
    create_scatter(key, 4, 3, i)
    i += 1

t = data[data['Year'] == 2005].copy()
countries = list(t['Country name'].values)
filtered = data[data['Country name'].isin(countries)]

filtered[['Year', 'Life Ladder']].set_index('Year').boxplot(by='Year',
                                                            grid=False)
plt.suptitle("")
plt.title('Life Ladder - Same Countries')
plt.xlabel('Year')

from sklearn.manifold import t_sne

t = t_sne.TSNE()
data = data.fillna(data.median())
transformed = t.fit_transform(data[[
    'Log GDP per capita', 'Social support', 'Healthy life expectancy at birth',
    'Freedom to make life choices', 'Generosity', 'Perceptions of corruption',
    'Positive affect', 'Negative affect', 'Confidence in national government',
    'Democratic Quality', 'Delivery Quality'
]].values)

plt.scatter(transformed[:, 0], transformed[:, 1], c=data['Life Ladder'].values)

regions = {x: 0 for x in regs.Region.unique()}
i = 0
for r in regions:
    regions[r] = i
    i += 1
Пример #12
0
    keep='last')]
stats_for_similar[
    'FGA/3A'] = stats_for_similar['FGA'] / stats_for_similar['3PA']

stats_for_similar = stats_for_similar[cols_for_sim]

stats_for_similar = stats_for_similar.select_dtypes(['number'])

#fig = make_tsne(s, 'Stephen Curry')
#fig1 = make_tsne(s, 'Kevin Durant')

cph = CoxPHFitter()
cph.fit(stats_for_surv, 'NBA_Experience',
        event_col='active')  #fit model once at the begining

tsne = t_sne.TSNE(n_components=2, learning_rate=750)  #fit tsne at begining
X_std = StandardScaler().fit_transform(stats_for_tsne)
fit = tsne.fit_transform(X_std)

kn = NearestNeighbors(n_neighbors=5)
drop = ['FG_pg', '2P_pg', '3P_pg', 'FT_pg']
stand = StandardScaler()
scaled = stand.fit_transform(stats_for_surv)
kn.fit(scaled)

counter = 0

stats_for_similar = stats_for_surv.copy()
stats_for_similar.index = stats_for_similar.index.str.replace('.', '')
stats_for_similar.index = stats_for_similar.index.str.replace("'", '')