def test_TCRsampler_build(): t = TCRsampler() fn = os.path.join('tcrsampler', 'tests', 'pmbc_mixcr_example_data.txt') t.clean_mixcr(filename=fn) t.build_background() assert isinstance(t.ref_dict, dict) assert isinstance(t.ref_dict.popitem()[1], pd.DataFrame)
def test_prob_sampler_sample_key_warn(): t = TCRsampler() fn = os.path.join('tcrsampler', 'tests', 'pmbc_mixcr_example_data.txt') t.clean_mixcr(filename=fn) t.build_background() with pytest.warns(None): r = t.sample([['TRBV999*01', 'TRBJ2-7*01', 2]]) assert r == [[None]]
def test_prob_sampler_sample_background(): t = TCRsampler() fn = os.path.join('tcrsampler', 'tests', 'pmbc_mixcr_example_data.txt') t.clean_mixcr(filename=fn) t.build_background() r = t.sample_background('TRBV9*01', 'TRBJ2-7*01', n=10) assert r == [ 'CASSRTGSLADEQYF', 'CASSATGVVSAQYF', 'CASSAWGQVYEQYF', 'CASSVSGSPYEQYF', 'CASSAWGQVYEQYF', 'CASSAWGQVYEQYF', 'CASRWGEQYF', 'CASSGDDWEQYF', 'CASSATGTSGPYEQYF', 'CASSSRTSGSNSEQYF' ]
def test_TCRsampler_build_vj_components(): t = TCRsampler() fn = os.path.join('tcrsampler', 'tests', 'pmbc_mixcr_example_data.txt') t.clean_mixcr(filename=fn) t.build_background() assert np.isclose(np.sum([k for _, k in t.vj_freq.items()]), 1.0) assert np.isclose(np.sum([k for _, k in t.j_freq.items()]), 1.0) assert np.isclose(np.sum([k for _, k in t.v_freq.items()]), 1.0) assert np.isclose(np.sum([k for _, k in t.vj_occur_freq.items()]), 1.0) assert np.isclose(np.sum([k for _, k in t.v_occur_freq.items()]), 1.0) assert np.isclose(np.sum([k for _, k in t.j_occur_freq.items()]), 1.0)
def test_prob_sampler_sample(): t = TCRsampler() fn = os.path.join('tcrsampler', 'tests', 'pmbc_mixcr_example_data.txt') t.clean_mixcr(filename=fn) t.build_background() r = t.sample([['TRBV9*01', 'TRBJ2-7*01', 2]]) assert r == [['CASSRTGSLADEQYF', 'CASSATGVVSAQYF']] r = t.sample([['TRBV9*01', 'TRBJ2-7*01', 2]], flatten=True) assert r == ['CASSRTGSLADEQYF', 'CASSATGVVSAQYF'] r = t.sample([['TRBV9*01', 'TRBJ2-7*01', 2], ['TRBV7-7*01', 'TRBJ2-4*01', 4]]) assert r == [['CASSRTGSLADEQYF', 'CASSATGVVSAQYF'], [ 'CASSLGQAARGIQYF', 'CASSLGQAARGIQYF', 'CASSLGQAARGIQYF', 'CASSLGQAARGIQYF' ]]
mixcr exportClones -cloneId -count -fraction -vGene -jGene -vHit -jHit -vHits -jHits -aaFeature CDR3 -nFeature CDR3 SRR2079522.1.clns SRR2079522.1.clns.best.txt -f mixcr exportAlignments SRR2079522.1.vdjca SRR2079522.1.vdjca.txt -f ``` #### Files Available For Download Beta: [SRR2079522.1.clns.best.txt](https://www.dropbox.com/s/czcewp7x7auwdsu/SRR2079522.1.clns.best.txt?dl=1) Alpha: [SRR2079521.1.clns.best.txt](https://www.dropbox.com/s/k4i0mt0cwhcn1h7/SRR2079521.1.clns.best.txt?dl=1) """ from tcrsampler.sampler import TCRsampler fn = 'SRR2079522.1.clns.best.subject.txt' t = TCRsampler() t.clean_mixcr(fn) t.build_background() t.ref_df t.ref_df.to_csv('ruggiero_mouse_beta_t.tsv.sampler.tsv', sep="\t", index=False) fn = 'SRR2079521.1.clns.best.subject.txt' t = TCRsampler() t.clean_mixcr(fn) t.build_background() t.ref_df t.ref_df.to_csv('ruggiero_mouse_alpha_t.tsv.sampler.tsv', sep="\t", index=False)
df['strain'] = 'C57BL6 inbred mouse strain' print(df) wirasinha = pd.read_csv( '/Volumes/Samsung_T5/kmayerbl/tcr_data/wirasinha/Wirasinha.migec.txt', sep='\t') for i, row in df.iterrows(): sdf = subset_wirasinha(df=wirasinha, subset=row['subset'], tcr_b=row['tcr_b'], chain=row['chain']) sdf[['bestv', 'bestj']] = sdf[['v', 'j']].apply(lambda x: x.apply(_pick_best)) sdf[['bestv', 'bestj']] = sdf[['bestv', 'bestj']].apply(lambda x: x.apply(_strip_allele)) sdf = sdf.rename(columns=wirasinha_to_mixcr_headers) sys.stdout.write(f"Writing {row['filename']}\n") sdf.to_csv(row['filename'], sep="\t") sys.stdout.write( f"Testing {row['filename']} for import into TCRsampler\t") t = TCRsampler() t.clean_mixcr(filename=row['filename']) t.build_background() print("\n") print(t.ref_df.head(3)) name = f"{row['filename']}.sampler.tsv" sys.stdout.write(f"Writing {name} \t") t.ref_df.to_csv(name, sep="\t", index=False)
def test_TCRsampler_build_stratified(): t = TCRsampler() fn = os.path.join('tcrsampler', 'tests', 'pmbc_mixcr_example_data.txt') t.clean_mixcr(filename=fn) t.build_background(stratify_by_subject=True) r = t.sample_background('TRBV9*01', 'TRBJ2-7*01', n=10)
def test_TCRsampler_clean_mixcr(): t = TCRsampler() fn = os.path.join('tcrsampler', 'tests', 'pmbc_mixcr_example_data.txt') t.clean_mixcr(filename=fn) assert isinstance(t.ref_df, pd.DataFrame)