Пример #1
0
def test_ex12():
	import pandas as pd
	import os
	from tcrsampler.sampler import TCRsampler
	# fn = 'britanova_chord_blood.csv' # real file
	fn = os.path.join('tcrdist','test_files', 'britanova_chord_blood_sample_5000.csv') # test_only file
	t = TCRsampler()
	t.ref_df = pd.read_csv(fn)
	t.build_background()
	t.v_freq
	t.j_freq
	t.vj_freq
	t.sample_background(v ='TRBV10-1*01', j ='TRBJ1-1*01',n=3, depth = 1, seed =1, use_frequency= True )
Пример #2
0
import os
import pandas as pd
from tcrsampler.sampler import TCRsampler

t = TCRsampler()
fn = os.path.join('britanova_chord_blood.csv')
t.ref_df = pd.read_csv(fn)
t.build_background(max_rows=1000)
t.sample(
    [['TRBV10-2*01', 'TRBV10-2*01*01', 1], ['TRBV27*01', 'TRBV27*01*01', 4]],
    depth=10)

for k, v in t.ref_dict.items():
    print(k, v.shape[0])
Пример #3
0
def test_v_j_freq_estimates():
    d = {
        'Unnamed: 0': {
            0: 0,
            1: 1,
            2: 2,
            3: 3,
            4: 4
        },
        'v_reps': {
            0: 'TRBV24-1*01',
            1: 'TRBV5-1*01',
            2: 'TRBV7-2*01',
            3: 'TRBV3-1*01',
            4: 'TRBV7-3*01'
        },
        'j_reps': {
            0: 'TRBJ2-1*01',
            1: 'TRBJ2-5*01',
            2: 'TRBJ2-3*01',
            3: 'TRBJ2-5*01',
            4: 'TRBJ2-3*01'
        },
        'cdr3': {
            0: 'CATRQDNEQFF',
            1: 'CASSLEETQYF',
            2: 'CASSLADTQYF',
            3: 'CASSQETQYF',
            4: 'CASSLAGGTDTQYF'
        },
        'count': {
            0: 252,
            1: 166,
            2: 113,
            3: 98,
            4: 89
        },
        'freq': {
            0: 0.0003726818302818776,
            1: 0.0002454967612174273,
            2: 0.00016711526516608003,
            3: 0.00014493182288739684,
            4: 0.00013162175752018694
        },
        'subject': {
            0: 'A5-S11.txt',
            1: 'A5-S11.txt',
            2: 'A5-S11.txt',
            3: 'A5-S11.txt',
            4: 'A5-S11.txt'
        }
    }
    df = pd.DataFrame(d)
    t = TCRsampler()
    t.ref_df = df
    t.build_background()
    assert t.v_occur_freq == {
        'TRBV3-1*01': 0.2,
        'TRBV5-1*01': 0.2,
        'TRBV7-2*01': 0.2,
        'TRBV7-3*01': 0.2,
        'TRBV24-1*01': 0.2
    }
    assert t.j_occur_freq == {
        'TRBJ2-1*01': 0.2,
        'TRBJ2-3*01': 0.4,
        'TRBJ2-5*01': 0.4
    }
Пример #4
0
                on='subject')
dfd['freq'] = dfd['freq_x'] / dfd['freq_y']
print(dfd[['freq', 'subject']].groupby(['subject']).sum())

# Test that these will work with TCRsampler

from tcrsampler.sampler import TCRsampler

from tcrdist import repertoire_db
ref = repertoire_db.RefGeneSet(db_file='alphabeta_gammadelta_db.tsv')
ref.generate_all_genes()
ref.all_genes
ref.all_genes['human'].keys()

tsd = TCRsampler()
tsd.ref_df = dfd
tsd.build_background()
# find potential missing:
print([x for x in tsd.v_freq.keys()])
print([x for x in tsd.v_freq.keys() if x not in ref.all_genes['human'].keys()])
assert len([
    x for x in tsd.v_freq.keys() if x not in ref.all_genes['human'].keys()
]) == 0
print([x for x in tsd.j_freq.keys()])
print([x for x in tsd.j_freq.keys() if x not in ref.all_genes['human'].keys()])
assert len([
    x for x in tsd.j_freq.keys() if x not in ref.all_genes['human'].keys()
]) == 0

tsg = TCRsampler()
tsg.ref_df = dfg