Exemplo n.º 1
0
    & (annotations['Sample Source'] == 'in vivo')].index
annotations.loc[weird_index, 'Platform Category'] = 'Illumina V4 2'
annotations = annotations.loc[~np.in1d(annotations['Tissue Type'].
                                       values, ['skin', 'spleen'])]

# In[4]:

data = functions.transform_to_percentile(data[annotations.index])

# Only need to compute gene variance fraction if not done already, in the above we have already read a previously calculated version into the gene dataframe

# In[5]:

annotations.rename(columns={'Platform Category': 'Platform_Category'},
                   inplace=True)
genes = functions.calculate_platform_dependence(data, annotations)

# In[6]:

pca = sklearn.decomposition.PCA(n_components=10, svd_solver='full')
pca.fit(
    functions.transform_to_percentile(
        data.loc[genes.Platform_VarFraction.values <= 1.0]).transpose())
pca_coords = pca.transform(
    functions.transform_to_percentile(
        data.loc[genes.Platform_VarFraction.values <= 1.0]).transpose())

# In[7]:

annotations['display_metadata'] = annotations.index
    fig,
    auto_open=False,
    filename=
    '/users/pwangel/PlotlyWorkspace/combine_data/naive_stemcells/naive_sc_comp1_2_3.html'
)

genes_conversion = genes_conversion.loc[data.index]
genes_conversion['PC2 loading'] = pca.components_[1, :]
print(genes_conversion.loc[np.in1d(genes_conversion.symbol, gene_list), :])

print(stop)
annotations['Platform_Category'] = annotations.Dataset.values.astype(
    int).astype(str)
annotations.index = data.columns.values
data = data.loc[data.sum(axis=1) > 10, :]
dataset_varFrac = functions.calculate_platform_dependence(
    data.loc[data.sum(axis=1) > 10, :], annotations)
dataset_varFrac = dataset_varFrac.merge(genes_conversion,
                                        how='inner',
                                        left_index=True,
                                        right_index=True)

output = pca.fit_transform(
    functions.transform_to_percentile(data.loc[
        dataset_varFrac.Platform_VarFraction.values < 0.15]).transpose())
print(pca.explained_variance_ratio_)

data_to_plot = []
for i_type in np.unique(annotations.LM_Group_COLOR.values.astype(str)):

    sel = (annotations.LM_Group_COLOR == i_type)
    hover = np.core.defchararray.add(
Exemplo n.º 3
0
expression = pd.read_csv('../data/interim/mouse_atlas/mouse_atlas_expression.tsv', sep='\t', index_col=0)
samples    = pd.read_csv('../data/interim/mouse_atlas/mouse_atlas_samples.tsv', sep='\t', index_col=0)


# In[51]:


samples = samples.rename(columns = {'Platform':'Platform_Category'})


# In[52]:


# calculate platform dependence of each gene 
vp = functions.calculate_platform_dependence(expression, samples)
vp = vp.sort_values(by=['VarFraction'])
get_ipython().run_line_magic('notify', '-m "The cell has finished running"')


# In[28]:


samples.shape


# In[31]:


expression.shape
Exemplo n.º 4
0
data = data[annotations.index]
yidis_data = yidis_data[yidis_annotations.index]

old_genes = yidis_data.index.values
yidis_data.index = [i.replace('.', '_') for i in yidis_data.index.values]
yidis_data.index = [i.replace('-', '_') for i in yidis_data.index.values]

print(stop)

#data = functions.transform_to_percentile(data)

sys.path.append('/Users/pwangel/Gene_Analysis/combine_data')
import mega_functions

print("Calculating platform dependence")
python_platform_varPart = functions.calculate_platform_dependence(
    yidis_data, yidis_annotations)
R_platform_varPart = mega_functions.cut_genes_that_depend_on_platform(
    yidis_data, yidis_annotations, 0.2, False)

import matplotlib
import matplotlib.pyplot as pyplot

pyplot.scatter(python_platform_varPart.Platform_VarFraction,
               yidis_genes.Batch.values,
               s=10)
pyplot.xlabel("R Variance Partition Platform Only")
pyplot.ylabel("R Yidis Platform and Celltype")

pyplot.show()

pyplot.scatter(python_platform_varPart.Platform_VarFraction,