def test_find_location_in_bed(): """See if I'm using searching correctly w/ binary search""" # this is in the file chr = "chr5" pos = 180603265 bed_file = "../data/nimblegen/2.1M_Human_Exome_Annotation/2.1M_Human_Exome.bed" chr2st2end, chr2posLs = bed_tools.load_bed(bed_file, "NimbleGen Tiled Regions") location = bed_tools.find_location_in_bed(chr, pos, chr2posLs, chr2st2end) nose.tools.assert_equal(location, 180603263, "did not find location, found " + str(location)) # this is not in the file # too large chr = "chrY" pos = 26180101 location = bed_tools.find_location_in_bed(chr, pos, chr2posLs, chr2st2end) nose.tools.assert_true(not location, "found location in too big test, but should not have, found " + str(location)) # this is not in the file # too small chr = "chr12" pos = 100 location = bed_tools.find_location_in_bed(chr, pos, chr2posLs, chr2st2end) nose.tools.assert_true( not location, "found location in too small test, but should not have, found " + str(location) ) # this is not in the file # between capture regions chr = "chr6" pos = 170731150 location = bed_tools.find_location_in_bed(chr, pos, chr2posLs, chr2st2end) nose.tools.assert_true(not location, "found location in between test, but should not have, found " + str(location))
def test_load_bed(): """See if I get the right tracks""" bed_file = "../data/nimblegen/2.1M_Human_Exome_Annotation/2.1M_Human_Exome.bed" chr2st2end, chr2posLs = bed_tools.load_bed(bed_file, "Target Regions") chr_count = len(chr2st2end.keys()) nose.tools.assert_equal(chr_count, 24, "dont have 24 chrs. have " + str(chr_count)) for chr in chr2posLs: for idx in xrange(len(chr2posLs[chr]) - 1): nose.tools.assert_true(chr2posLs[chr][idx] < chr2posLs[chr][idx + 1], "chr2posLs is not sorted") nose.tools.assert_true(len(chr2posLs[chr]) > 400, chr + " length is " + str(len(chr2posLs[chr])))
def get_my_mutations(quality_cutoff, coverage_cutoff): """Load mutations from working/""" # my_mutations = {} # with open('/home/perry/Projects/loh/working/murim.exome.aa_chg.vars') as f: # for line in f: # my_mutations[line.strip()] = True # return my_mutations bed_file = 'data/nimblegen/2.1M_Human_Exome_Annotation/2.1M_Human_Exome.bed' bed_chr2st2end, bed_chr2posLs = bed_tools.load_bed(bed_file, 'NimbleGen Tiled Regions') # NimbleGen Tiled Regions # Target Regions use_data_dir = '/home/perry/Projects/loh/data/all_non_ref_hg18/' all_somatic = {} all_inherited = {} cancer_qualities = mutations.get_consensus_qualities(use_data_dir + 'yusanT.ann') normal_qualities = mutations.get_consensus_qualities(use_data_dir + 'yusanN.ann') for exome in global_settings.exome_types: data_file = use_data_dir + exome inherited, somatic, murim = mutations.get_mutations(data_file, normal_qualities, cancer_qualities, quality_cutoff, False, coverage_cutoff) # only use the bed_tools NimbleGen # restriction for hg18 data for s in somatic['yusan']: chr, pos = s.split(':') if bed_tools.find_location_in_bed(chr, int(pos), bed_chr2posLs, bed_chr2st2end): all_somatic[s] = True for i in inherited['yusan']: chr, pos = s.split(':') if bed_tools.find_location_in_bed(chr, int(pos), bed_chr2posLs, bed_chr2st2end): all_inherited[i] = True return (set(all_somatic.keys()) & set(get_murim_covered(quality_cutoff)), set(all_inherited.keys()) & set(get_murim_covered(quality_cutoff)))
def get_mutations_yusan(quality_cutoff, coverage_cutoff): """Load mutations from data/all_non_ref_hg18""" exome2mutations = {} bed_file = 'data/nimblegen/2.1M_Human_Exome_Annotation/2.1M_Human_Exome.bed' bed_chr2st2end, bed_chr2posLs = bed_tools.load_bed(bed_file, 'NimbleGen Tiled Regions') # NimbleGen Tiled Regions # Target Regions use_data_dir = '/home/perry/Projects/loh/data/all_non_ref_hg18/' all_somatic = {} all_inherited = {} cancer_qualities = call_class.get_consensus_qualities(use_data_dir + 'yusanT.ann') normal_qualities = call_class.get_consensus_qualities(use_data_dir + 'yusanN.ann') for exome in global_settings.exome_types: data_file = use_data_dir + exome inherited, somatic, murim = mutations.get_mutations(data_file, normal_qualities, cancer_qualities, quality_cutoff, False, coverage_cutoff) exome2mutations[exome] = (inherited, somatic) return exome2mutations