Пример #1
0
import sys
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import loompy
import velocyto as vcy
import logging
import pandas as pd
from sklearn.svm import SVR
from sklearn.linear_model import LinearRegression
from statsmodels.nonparametric.smoothers_lowess import lowess
from scipy.interpolate import interp1d

# In[4]:

vlm = vcy.load_velocyto_hdf5(snakemake.input[0])

# In[ ]:

# In[ ]:

## color condes consistent with main text

from collections import defaultdict
monocytelabels = defaultdict(lambda: 'Grey')

#ice_cream = {}
monocytelabels['4'] = '#9382ae'
monocytelabels['3'] = '#fdceb8'
monocytelabels['12'] = '#df5974'
Пример #2
0
def main():

    if len(sys.argv) != 2:
        print('runs the velocyto workflow on a given hdf5 object')
        print('usage: velocyto_workflow <path to file>')
        sys.exit()

    input_path = sys.argv[1]
    output_path = input_path[:-5] + '_tsne33_velocity.hdf5'

    print('loading data')
    vlm = vcy.load_velocyto_hdf5(input_path)

    print(len(vlm.ca['CellID']), 'cells')
    print(len(vlm.ra['Gene']), 'genes')

    print('filtering cells')
    vlm.filter_cells(bool_array=vlm.initial_Ucell_size > np.percentile(
        vlm.initial_Ucell_size, 0.5))

    print('filtering genes')
    vlm.score_cv_vs_mean(3000, plot=False, max_expr_avg=35)
    vlm.filter_genes(by_cv_vs_mean=True)

    print(len(vlm.ca['CellID']), 'cells')
    print(len(vlm.ra['Gene']), 'genes')

    #print('setting sample names as clusters')
    #samplenames = list(map(lambda x: x.split(':')[0], vlm.ca['CellID']))
    #vlm.ca['sample_name'] = samplenames
    #vlm.set_clusters(vlm.ca["sample_name"])

    print('normalizing data matrices')
    vlm._normalize_S(relative_size=vlm.S.sum(0),
                     target_size=vlm.S.sum(0).mean())
    vlm._normalize_U(relative_size=vlm.U.sum(0),
                     target_size=vlm.U.sum(0).mean())

    print('running pca')
    vlm.perform_PCA()

    print('knn smoothing')
    vlm.knn_imputation(n_pca_dims=15,
                       k=500,
                       balanced=True,
                       b_sight=3000,
                       b_maxl=1500,
                       n_jobs=20)

    print('fit gammas')
    vlm.fit_gammas()

    print('calculate velocity')
    vlm.predict_U()
    vlm.calculate_velocity()
    vlm.calculate_shift(assumption="constant_velocity")
    vlm.extrapolate_cell_at_t(delta_t=1.)

    print('running tsne')
    bh_tsne = TSNE(random_state=33)
    vlm.ts = bh_tsne.fit_transform(vlm.pcs[:, :15])

    print('projection of velocity onto embeddings')
    vlm.estimate_transition_prob(hidim="Sx_sz",
                                 embed="ts",
                                 transform="sqrt",
                                 psc=1,
                                 n_neighbors=3500,
                                 knn_random=True,
                                 sampled_fraction=0.5)

    print('calculate embedding shift')
    vlm.calculate_embedding_shift(sigma_corr=0.05, expression_scaling=True)

    print('calculate grid arrows')
    vlm.calculate_grid_arrows(smooth=0.8, steps=(40, 40), n_neighbors=100)

    print('saving hdf5')
    vlm.to_hdf5(output_path)
import loompy
import glob
import velocyto as vcy
import numpy as np
from sklearn.manifold import TSNE

#print('reading loom file')
#vlm = vcy.VelocytoLoom('/projects/pytrik/sc_adipose/analyze_10x_fluidigm/data/velocyto/all10x.loom')

print('loading data')
vlm = vcy.load_velocyto_hdf5(
    '/projects/pytrik/sc_adipose/analyze_10x_fluidigm/data/velocyto/all10x-downsampled-36.hdf5'
)

print(len(vlm.ca['CellID']), 'cells')
print(len(vlm.ra['Gene']), 'genes')

print('filtering cells')
vlm.filter_cells(bool_array=vlm.initial_Ucell_size > np.percentile(
    vlm.initial_Ucell_size, 0.5))

print('filtering genes')
vlm.score_cv_vs_mean(3000, plot=False, max_expr_avg=35)
vlm.filter_genes(by_cv_vs_mean=True)

print(len(vlm.ca['CellID']), 'cells')
print(len(vlm.ra['Gene']), 'genes')

#print('setting sample names as clusters')
#samplenames = list(map(lambda x: x.split(':')[0], vlm.ca['CellID']))
#vlm.ca['sample_name'] = samplenames
Пример #4
0
]
cellid["strain"] = np.where(cellid["sample"].isin(ab1), "ab1", "renca")
cellid["response"] = np.where(cellid["sample"].isin(respondervec), "responder",
                              "nonresponder")

# In[ ]:

targetmono = pd.read_csv(snakemake.input[3])
monos = targetmono["loom_cellid"]
ind = cellid["cellnames"].isin(monos)
cluster15mono = cellid.index[ind]

# In[ ]:

# extract projections from renca velocyto object and calculate momentum
renca = vcy.load_velocyto_hdf5(snakemake.input[1])
delta_embedding_renca = pd.DataFrame(renca.delta_embedding)
delta_embedding_renca.index = renca.ca["CellID"]
renca_ly6_cells_deltas = delta_embedding_renca.reindex(cluster15mono).dropna()
renca_ly6_cells_deltas.columns = ["deltax", "deltay"]
renca_ly6_cells_deltas["vel"] = np.square(
    renca_ly6_cells_deltas["deltax"]) + np.square(
        renca_ly6_cells_deltas["deltay"])
ly6crenca = pd.concat([renca_ly6_cells_deltas, cellid], axis=1,
                      join="inner")  # merge with cell metadata
del (renca)

# In[ ]:

# extract projections from ab1 velocyto object and calculate momentum
ab1 = vcy.load_velocyto_hdf5(snakemake.input[0])
import loompy
import glob
import velocyto as vcy
import numpy as np
from sklearn.manifold import TSNE

print('loading data')
vlm = vcy.load_velocyto_hdf5(
    '/projects/pytrik/sc_adipose/analyze_10x_fluidigm/data/velocyto/180831.hdf5'
)

print(len(vlm.ca['CellID']), 'cells')
print(len(vlm.ra['Gene']), 'genes')

#print('filtering cells')
#vlm.filter_cells(bool_array=vlm.initial_Ucell_size > np.percentile(vlm.initial_Ucell_size, 0.5))

print('filtering genes')
#vlm.score_cv_vs_mean(3000, plot=False, max_expr_avg=35)
vlm.score_cv_vs_mean(3000, plot=False)
vlm.filter_genes(by_cv_vs_mean=True)

print(len(vlm.ca['CellID']), 'cells')
print(len(vlm.ra['Gene']), 'genes')

print('normalizing data matrices')
vlm._normalize_S(relative_size=vlm.S.sum(0), target_size=vlm.S.sum(0).mean())
vlm._normalize_U(relative_size=vlm.U.sum(0), target_size=vlm.U.sum(0).mean())

print('running pca')
vlm.perform_PCA()
Пример #6
0
vlm.estimate_transition_prob(hidim="Sx_sz",
                             embed="ts",
                             transform="sqrt",
                             psc=1,
                             knn_random=True,
                             sampled_fraction=0.3,
                             random_seed=42)
vlm.calculate_embedding_shift(sigma_corr=0.05, expression_scaling=True)
vlm.calculate_grid_arrows(smooth=0.5, steps=(40, 40), n_neighbors=50)

vlm.to_hdf5("combined.hdf5")

###

###
vlm = vcy.load_velocyto_hdf5("combined.hdf5")


def despline():
    ax1 = plt.gca()
    # Hide the right and top spines
    ax1.spines['right'].set_visible(False)
    ax1.spines['top'].set_visible(False)
    # Only show ticks on the left and bottom spines
    ax1.yaxis.set_ticks_position('left')
    ax1.xaxis.set_ticks_position('bottom')


def minimal_xticks(start, end):
    end_ = np.around(end, -int(np.log10(end)) + 1)
    xlims = np.linspace(start, end_, 5)