Exemplo n.º 1
0
from DeepTCR.DeepTCR import DeepTCR_SS, DeepTCR_U
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
from scipy.spatial.distance import pdist, squareform
from scipy.stats import spearmanr
import seaborn as sns
from NN_Assessment_utils import *
import pickle
import os
from scipy.stats import ttest_ind

#Run VAE
DTCRU = DeepTCR_U('Sequence_C', device='/gpu:1')
DTCRU.Get_Data(directory='../../Data/Murine_Antigens',
               Load_Prev_Data=False,
               aggregate_by_aa=True,
               aa_column_beta=0,
               count_column=1,
               v_beta_column=2,
               j_beta_column=3)

DTCRU.Train_VAE(Load_Prev_Data=False, use_only_gene=True)
distances_vae_gene = pdist(DTCRU.features, metric='euclidean')

# #VAE_- Sequencs Alone+
DTCRU.Train_VAE(Load_Prev_Data=False, use_only_seq=True)
distances_vae_seq = pdist(DTCRU.features, metric='euclidean')

DTCRU.Train_VAE(Load_Prev_Data=False)
Exemplo n.º 2
0
from DeepTCR.DeepTCR import DeepTCR_U
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from scipy.spatial.distance import pdist, squareform
from scipy.stats import spearmanr
import seaborn as sns
from NN_Assessment_utils import *
import pickle
import os
import matplotlib
matplotlib.rc('font', family='Arial')

#Instantiate training object
DTCRU = DeepTCR_U('Murine_U')
#Load Data
DTCRU.Get_Data(directory='../../Data/Murine_Antigens',
               Load_Prev_Data=False,
               aa_column_beta=0,
               count_column=1,
               v_beta_column=2,
               j_beta_column=3)

#Get distances from various methods
#VAE_- Genes
DTCRU.Train_VAE(Load_Prev_Data=False, use_only_gene=True)
distances_vae_gene = pdist(DTCRU.features, metric='euclidean')

# #VAE_- Sequencs Alone+
DTCRU.Train_VAE(Load_Prev_Data=False, use_only_seq=True)
Exemplo n.º 3
0
"""This script is used to characterize the performance of various featurization
methods on TCRSeq data from 7 Human Antigens."""

from DeepTCR.DeepTCR import DeepTCR_U
import numpy as np
import matplotlib.pyplot as plt
from scipy.spatial.distance import pdist, squareform
import seaborn as sns
from NN_Assessment_utils import *
import pickle
import os
import pandas as pd

#Instantiate training object
DTCRU = DeepTCR_U('Human_U')
#Load Data
DTCRU.Get_Data(directory='../../Data/Human_Antigens',Load_Prev_Data=False,aggregate_by_aa=True,
               aa_column_beta=0,count_column=1,v_beta_column=2,j_beta_column=3)

#Get distances from various methods
#VAE_- Genes
DTCRU.Train_VAE(Load_Prev_Data=False,use_only_gene=True)
distances_vae_gene = pdist(DTCRU.features, metric='euclidean')

# #VAE_- Sequencs Alone
DTCRU.Train_VAE(Load_Prev_Data=False,use_only_seq=True)
distances_vae_seq = pdist(DTCRU.features, metric='euclidean')

#VAE_- Gene+Sequencs
DTCRU.Train_VAE(Load_Prev_Data=False)
Exemplo n.º 4
0
import numpy as np
import seaborn as sns
from scipy.spatial.distance import pdist, squareform
from NN_Assessment_utils import *
import pickle
import os
import matplotlib.pyplot as plt
import pandas as pd
from scipy.stats import ttest_rel

dir_results = 'Repertoire_Classification_Results'
if not os.path.exists(dir_results):
    os.makedirs(dir_results)

# Instantiate training object
DTCRU = DeepTCR_U('Repertoire_Classification')

DTCRU.Get_Data(directory='../../Data/Rudqvist',
               Load_Prev_Data=False,
               aggregate_by_aa=True,
               aa_column_beta=1,
               count_column=2,
               v_beta_column=7,
               d_beta_column=14,
               j_beta_column=21)

# VAE-Gene
DTCRU.Train_VAE(use_only_gene=True)
d_vae_gene = squareform(pdist(DTCRU.features))
prop_vae_gene, _ = phenograph_clustering_freq(d_vae_gene, DTCRU)
Exemplo n.º 5
0
from DeepTCR.DeepTCR import DeepTCR_U

# Instantiate training object
DTCRU = DeepTCR_U('Rep_Dendrogram',device='/gpu:2')

#Load Data from directories
DTCRU.Get_Data(directory='../../Data/Rudqvist',Load_Prev_Data=False,aggregate_by_aa=True,
               aa_column_beta=1,count_column=2,v_beta_column=7,d_beta_column=14,j_beta_column=21)

#Train VAE
DTCRU.Train_VAE(accuracy_min=0.9)
color_dict = {'Control':'limegreen','9H10':'red','RT':'darkorange','Combo':'magenta'}
DTCRU.Repertoire_Dendrogram(n_jobs=40,distance_metric='KL',
                           dendrogram_radius=0.28,repertoire_radius=0.35,Load_Prev_Data=True,gridsize=6,
                            color_dict=color_dict)
Exemplo n.º 6
0
plt.yticks([])
plt.xlabel('')
plt.ylabel('')
plt.show()

beta_sequences = DTCR.beta_sequences
v_beta = DTCR.v_beta
j_beta = DTCR.j_beta
d_beta = DTCR.d_beta
hla = DTCR.hla_data_seq
sample_id = DTCR.sample_id

file = 'cm038_x2_u.pkl'
featurize = False
if featurize:
    DTCR_U = DeepTCR_U('test_hum', device='/device:GPU:6')
    DTCR_U.Load_Data(beta_sequences=beta_sequences,
                     v_beta=v_beta,
                     d_beta=d_beta,
                     j_beta=j_beta,
                     hla=hla)
    DTCR_U.Train_VAE(Load_Prev_Data=False, latent_dim=64, stop_criterion=0.01)
    X_2 = umap.UMAP().fit_transform(DTCR_U.features)
    with open(file, 'wb') as f:
        pickle.dump(X_2, f, protocol=4)
else:
    with open(file, 'rb') as f:
        X_2 = pickle.load(f)

df_plot['x'] = X_2[:, 0]
df_plot['y'] = X_2[:, 1]
Exemplo n.º 7
0
This script compares a hamming distance (scipy.spatial.distance.pdist) to GLIPH's clustering algorithm
with different hamming distance thresholds to compare clustering accuracy as previously demonstrated
in the GLIPH manuscript. The GLIPH.csv file used in this analysis can either be generated from the Run_Gliph.py
script or viewed in the github repository.
"""

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from DeepTCR.DeepTCR import DeepTCR_U
from scipy.spatial.distance import pdist
from scipy.cluster.hierarchy import linkage,fcluster

#Instantiate training object
DTCRU = DeepTCR_U('Glanville_v_Hamming')
DTCRU.Get_Data(directory='../../Data/Glanville/',Load_Prev_Data=False,aa_column_beta=1,aggregate_by_aa=False)

method_dim = 'Hamming'
distances_hamming = pdist(np.squeeze(DTCRU.X_Seq_beta, 1), metric='hamming')

#Clustering Thresholds
r = np.logspace(np.log10(0.001), np.log10(10), 50)

#Collect data for plots
x = []
y = []
total_seq = len(DTCRU.X_Seq_beta)
for t in r:
    print(t)
    Z = linkage(distances_hamming, method='complete')