'Hjurp', 'Kpna2', 'Kif23', 'Cks2', 'Dtl', 'Top2a', 'Bub1', 'Arl6ip1', 'Dlgap5', 'Ube2s', 'Nuf2', 'Hmmr', 'Cdc20', 'Birc5', ] dscc = ds.query_features_by_name(genes_cc2, inplace=False) dscc.counts.log(inplace=True) vsu = dscc.dimensionality.umap() fig, axs = plt.subplots(2, 4, sharex=True, sharey=True, figsize=(9, 5)) genes_plot = ['log_virus_reads_per_million', 'Ccne2', 'Ccnd1', 'Ccnb1'] for icol, gene in enumerate(genes_plot): ax = axs[0][icol] dscc.plot.scatter_reduced_samples(vsu, color_by=gene, alpha=0.4, ax=ax, s=10) ax.set_title(gene) ax.set_axis_off() ax = axs[1][icol] if gene in dscc.counts.index:
dss = ds.split('clusterN_SNV_{:}'.format(ic)) comp = dss[True].compare(dss[False]) # FIXME: maybe look symmetrically for up- and downregulated comp['diff'] = dss[True].counts.mean(axis=1) - dss[False].counts.mean( axis=1) genesi = comp.loc[comp['diff'] > 0, 'P-value'].nsmallest(n=5).index.values genes[ic] = genesi genes_all = np.unique(np.concatenate(list(genes.values()))) with open('../data/genes_diff_expressed_clustersSNV.tsv', 'wt') as f: f.write('\t'.join(genes_all)) dsv = ds.query_features_by_name(genes_all) # Plot distributions fig, axs = plt.subplots(3, 10, figsize=(17, 7), sharex=True, sharey=True) axs = axs.ravel() for ax, gene in zip(axs, genes_all): df = np.log10(0.1 + dsv.counts.loc[[gene]].T) df['clusterN'] = dsv.samplesheet['clusterN_SNV'] sns.boxplot( data=df, y=gene, x='clusterN', ax=ax, order=clusters, ) ax.grid(axis='y')
# Calculate transciptome distances ds = Dataset( samplesheet='dengue', counts_table='dengue', featuresheet='humanGC38', ) ds.samplesheet['cluster_SNV'] = clusters ds.counts.normalize(inplace=True) ds.rename(axis='features', column='GeneName', inplace=True) ds.feature_selection.unique(inplace=True) # Restrict to differentially expresse genes with open('../data/genes_diff_expressed_clustersSNV.tsv', 'rt') as f: genes = f.read().split('\t') dsd = ds.query_features_by_name(genes) dsd.counts.log(inplace=True) dsp = dsd.split('cluster_SNV') dclut = {} for i1, c1 in enumerate(clustersu): ge1 = dsp[c1].counts.values.T for c2 in clustersu[:i1+1]: print(c1, c2) ge2 = dsp[c2].counts.values.T if c1 != c2: d = cdist(ge1, ge2).ravel() else: d = squareform(cdist(ge1, ge2)) dclut[frozenset([c1, c2])] = d