def testGetRestOfPaths(self): pred_str = """##fileformat=VCFv4.0\n ##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n ##source=TVsim\n #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001\n chr19 11 . ACT A 20 PASS . GT 1/1\n chr19 15 . ACGATT AA 20 PASS . GT 1/1\n chr19 16 . ACG A 20 PASS . GT 1/1\n chr19 22 . ATT A 20 PASS . GT 0/1\n """ pred_io = StringIO.StringIO(pred_str) pred_vcf = vcf.Reader(pred_io) pred_vars = Variants(pred_vcf,MAX_INDEL_LEN) viw = extract_range_and_filter(pred_vars.on_chrom('chr19'),10,25,11) paths = _getRestOfPath([], _getOverlaps([],viw)) #all paths take variants at pos 11 and 22; one takes pos 15, one pos 16 self.assertEqual(len(paths),2) self.assertEqual(len(paths[0]),3) self.assertEqual(len(paths[1]),3) self.assertTrue(all(map(lambda e: any(map(lambda x: x.pos == 11, e)), paths))) self.assertTrue(all(map(lambda e: any(map(lambda x: x.pos == 22, e)), paths))) self.assertTrue(any(map(lambda x: x.pos == 15, paths[0]))) self.assertFalse(any(map(lambda x: x.pos == 16, paths[0]))) self.assertFalse(any(map(lambda x: x.pos == 15, paths[1]))) self.assertTrue(any(map(lambda x: x.pos == 16, paths[1])))
def testGetOverlaps(self): pred_str = """##fileformat=VCFv4.0\n ##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n ##source=TVsim\n #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001\n chr19 10 . ACT A 20 PASS . GT 1/1\n chr19 13 . AC A 20 PASS . GT 1/1\n chr19 14 . TAGG TA 20 PASS . GT 1/1\n chr19 15 . AGG A 20 PASS . GT 0/1\n chr19 19 . T TAAAC 20 PASS . GT 0/1 """ pred_io = StringIO.StringIO(pred_str) pred_vcf = vcf.Reader(pred_io) pred_vars = Variants(pred_vcf,MAX_INDEL_LEN) variants_in_window = extract_range_and_filter(pred_vars.on_chrom('chr19'),10,20,10) #the three overlapping variants should be in same group overlaps = _getOverlaps([], variants_in_window) self.assertEqual(len(overlaps),3) self.assertEqual(map(lambda o: len(o),overlaps),[1,3,1])