示例#1
0
from DeepTCR.DeepTCR import DeepTCR_SS, DeepTCR_U
import numpy as np
import seaborn as sns

#Run Supervised Sequence Classifier
DTCRS = DeepTCR_SS('Sequence_C')
DTCRS.Get_Data(directory='../../Data/Murine_Antigens',
               Load_Prev_Data=False,
               aggregate_by_aa=True,
               aa_column_beta=0,
               count_column=1,
               v_beta_column=2,
               j_beta_column=3)

DTCRS.Monte_Carlo_CrossVal(folds=10, stop_criterion=0.01)
DTCRS.Representative_Sequences(top_seq=10, unique=True)
from Bio import SeqIO
from Bio.SeqRecord import SeqRecord
from Bio.Seq import Seq
from Bio.Alphabet import IUPAC

for item in DTCRS.Rep_Seq:
    break
    t = DTCRS.Rep_Seq[item]
    t = t.groupby(['beta']).agg({item: 'first'})
    t = t.sort_values(by=item, ascending=False)
    t.reset_index(inplace=True)
    seq = t['beta'].tolist()
    seq = seq[:10]
    out = []
    for s in seq:
示例#2
0
#Run Supervised Sequence Classifier
DTCRS = DeepTCR_SS('Sequence_C', device=6)
DTCRS.Get_Data(directory='../../Data/Murine_Antigens',
               Load_Prev_Data=True,
               aggregate_by_aa=True,
               aa_column_beta=0,
               count_column=1,
               v_beta_column=2,
               j_beta_column=3)

folds = 100
seeds = np.array(range(folds))
graph_seed = 0
DTCRS.Monte_Carlo_CrossVal(folds=folds, graph_seed=graph_seed, seeds=seeds)
DTCRS.Representative_Sequences(top_seq=25,
                               motif_seq=10,
                               color_scheme='hydrophobicity')

dir = 'Murine_Rep_Sequences'
if os.path.exists(dir):
    shutil.rmtree(dir)
os.makedirs(dir)

for item in DTCRS.Rep_Seq:
    t = DTCRS.Rep_Seq[item]
    t = t.groupby(['beta']).agg({item: 'first'})
    t = t.sort_values(by=item, ascending=False)
    t.reset_index(inplace=True)
    seq = t['beta'].tolist()
    seq = seq[:10]
    out = []
示例#3
0
from DeepTCR.DeepTCR import DeepTCR_SS
from multiprocessing import Pool
import matplotlib.pyplot as plt
from sklearn.metrics import roc_auc_score, roc_curve
import os

p = Pool(80)
dir_results = 'alpha_v_beta_results'
if not os.path.exists(dir_results):
    os.makedirs(dir_results)

antigens = [
    'GANAB-S5F', 'ATP6AP1-KLG_G3W', 'CMV-MLN', 'GNL3L-R4C', 'MART1-A2L',
    'YFV-LLW'
]

for a in antigens:
    DTCR = DeepTCR_SS(a + 'Rep')
    DTCR.Get_Data(directory='../../Data/Zhang/' + a,
                  aa_column_alpha=0,
                  aa_column_beta=1,
                  p=p)
    DTCR.Monte_Carlo_CrossVal(folds=50, weight_by_class=True)
    DTCR.Representative_Sequences()
示例#4
0
    fig, ax = plt.subplots(figsize=(5, 5))
    x = X_2[sel_idx, 0]
    y = X_2[sel_idx, 1]
    x, y, c, _, _ = GKDE(x, y)
    ax.scatter(x, y, c=c, cmap='jet', s=100)
    ax.set_xlim(xlim)
    ax.set_ylim(ylim)
    ax.set_title(l, fontsize=36)
    ax.set_xticks([])
    ax.set_yticks([])
    plt.tight_layout()
    fig.savefig(os.path.join(dir_write, l + '.png'), dpi=1200)
    plt.close()

#Get Residue Sensitivity Logo for select epitopes
DTCR.Representative_Sequences(top_seq=100, make_seq_logos=False)
test_peptide = 'TSTLQEQIGW'
rep_seq = DTCR.Rep_Seq[test_peptide]['beta'][0:10]
models = np.random.choice(range(100), 5, replace=False)
models = ['model_' + str(x) for x in models]
models = None
DTCR.Residue_Sensitivity_Logo(beta_sequences=np.array(rep_seq),
                              models=models,
                              class_sel=test_peptide,
                              Load_Prev_Data=False,
                              background_color='black',
                              edgewidth=0.0,
                              figsize=(3, 4),
                              min_size=0.25,
                              norm_to_seq=True)
plt.savefig(test_peptide + '.png', dpi=1200)