Пример #1
0
    def testRescueMission(self):
        # false negative variant at location is SV; don't rescue
        fn_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  NA00001\n
chr1   8000   .       G     GATTGCATTGCATTGCATTGCATTGCATTGCATTGCATTGCATTGCATTGCATTGCATTGCATTGCATTGCATTGCATTGCATTGCATTGCATTGCATTGCATTGCATTGCATTGCATTGCATTGCATTGCATTGCATTGCATTGCATTGCATTGCATTGCATTGCATTGCATTGCATTGCATTGCATTGCATTGCATTGCATTGCATTGCATTGCATTGCATTGCATTGCATTGCATTGCATTGCATTGCT       20      PASS    .       GT      1/1\n
"""
        fp_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  NA00001\n
chr1   8000   .       G     GC       20      PASS    .       GT      1/1\n
"""
        true_vars = vcf_to_ChromVariants(fn_str,'chr1')
        pred_vars = vcf_to_ChromVariants(fp_str,'chr1')
        num_new_tp,num_removed_fn,rescuedvars = rescue_mission(true_vars,pred_vars,get_empty_ChromVariants('chr1'),8000,get_reference(),100)
        self.assertFalse(any(map(lambda x: x > 0, num_new_tp.itervalues())))
        self.assertFalse(any(map(lambda x: x > 0, num_removed_fn.itervalues())))
        # variant couldn't be rescued; no change to counts or ChromVariants
        fn_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
##source=TVsim\n
#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  NA00001\n
chr1   2   .       A     C       20      PASS    .       GT      1/1\n
chr1   7   .       C        T       20      PASS    .       GT      0/1\n
"""
        fp_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
##source=TVsim\n
#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  NA00001\n
chr1   4   .       A     C       20      PASS    .       GT      1/1\n
"""
        fn_vars = vcf_to_ChromVariants(fn_str,'chr1')
        fp_vars = vcf_to_ChromVariants(fp_str,'chr1')
        num_new_tp,num_removed_fn,rescuedvars = rescue_mission(fn_vars,fp_vars,get_empty_ChromVariants('chr1'),2,get_reference(),100)
        self.assertFalse(any(map(lambda x: x > 0, num_new_tp.itervalues())))
        self.assertFalse(any(map(lambda x: x > 0, num_removed_fn.itervalues())))
        self.assertEqual(len(fn_vars.all_locations),2)
        self.assertEqual(len(fp_vars.all_locations),1)
        self.assertEqual(rescuedvars,[])
        # variant is rescued; counts change; variants are removed from fn/fp
        fn_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  NA00001\n
chr2   2   .       TGC     TAT       20      PASS    .       GT      1/1\n
"""
        fp_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  NA00001\n
chr2   3   .       G     A       20      PASS    .       GT      1/1\n
chr2   4   .       C     T       20      PASS    .       GT      1/1\n
"""
        fn_vars = vcf_to_ChromVariants(fn_str,'chr2')
        fp_vars = vcf_to_ChromVariants(fp_str,'chr2')
        num_new_tp,num_removed_fn,rescuedvars = rescue_mission(fn_vars,fp_vars,get_empty_ChromVariants('chr2'),2,get_reference(),100)
        self.assertEqual(num_new_tp[VARIANT_TYPE.INDEL_OTH],1)
        self.assertEqual(num_removed_fn[VARIANT_TYPE.SNP],2)
        self.assertEqual(len(fn_vars.all_locations),0)
        self.assertEqual(len(fp_vars.all_locations),0)
        self.assertEqual(map(lambda r: r.pos,rescuedvars),[3,4])
Пример #2
0
    def testRescueMission(self):
        # false negative variant at location is SV; don't rescue
        fn_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  NA00001\n
chr1   8000   .       G     GATTGCATTGCATTGCATTGCATTGCATTGCATTGCATTGCATTGCATTGCATTGCATTGCATTGCATTGCATTGCATTGCATTGCATTGCATTGCATTGCATTGCATTGCATTGCATTGCATTGCATTGCATTGCATTGCATTGCATTGCATTGCATTGCATTGCATTGCATTGCATTGCATTGCATTGCATTGCATTGCATTGCATTGCATTGCATTGCATTGCATTGCATTGCATTGCATTGCATTGCT       20      PASS    .       GT      1/1\n
"""
        fp_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  NA00001\n
chr1   8000   .       G     GC       20      PASS    .       GT      1/1\n
"""
        true_vars = vcf_to_ChromVariants(fn_str,'chr1')
        pred_vars = vcf_to_ChromVariants(fp_str,'chr1')
        num_new_tp,num_removed_fn,rescuedvars = rescue_mission(true_vars,pred_vars,get_empty_ChromVariants('chr1'),8000,get_reference(),100)
        self.assertFalse(any(map(lambda x: x > 0, num_new_tp.itervalues())))
        self.assertFalse(any(map(lambda x: x > 0, num_removed_fn.itervalues())))
        # variant couldn't be rescued; no change to counts or ChromVariants
        fn_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
##source=TVsim\n
#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  NA00001\n
chr1   2   .       A     C       20      PASS    .       GT      1/1\n
chr1   7   .       C        T       20      PASS    .       GT      0/1\n
"""
        fp_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
##source=TVsim\n
#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  NA00001\n
chr1   4   .       A     C       20      PASS    .       GT      1/1\n
"""
        fn_vars = vcf_to_ChromVariants(fn_str,'chr1')
        fp_vars = vcf_to_ChromVariants(fp_str,'chr1')
        num_new_tp,num_removed_fn,rescuedvars = rescue_mission(fn_vars,fp_vars,get_empty_ChromVariants('chr1'),2,get_reference(),100)
        self.assertFalse(any(map(lambda x: x > 0, num_new_tp.itervalues())))
        self.assertFalse(any(map(lambda x: x > 0, num_removed_fn.itervalues())))
        self.assertEqual(len(fn_vars.all_locations),2)
        self.assertEqual(len(fp_vars.all_locations),1)
        self.assertEqual(rescuedvars,[])
        # variant is rescued; counts change; variants are removed from fn/fp
        fn_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  NA00001\n
chr2   2   .       TGC     TAT       20      PASS    .       GT      1/1\n
"""
        fp_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  NA00001\n
chr2   3   .       G     A       20      PASS    .       GT      1/1\n
chr2   4   .       C     T       20      PASS    .       GT      1/1\n
"""
        fn_vars = vcf_to_ChromVariants(fn_str,'chr2')
        fp_vars = vcf_to_ChromVariants(fp_str,'chr2')
        num_new_tp,num_removed_fn,rescuedvars = rescue_mission(fn_vars,fp_vars,get_empty_ChromVariants('chr2'),2,get_reference(),100)
        self.assertEqual(num_new_tp[VARIANT_TYPE.INDEL_OTH],1)
        self.assertEqual(num_removed_fn[VARIANT_TYPE.SNP],2)
        self.assertEqual(len(fn_vars.all_locations),0)
        self.assertEqual(len(fp_vars.all_locations),0)
        self.assertEqual(map(lambda r: r.pos,rescuedvars),[3,4])
Пример #3
0
    def testFullRescue(self):
        fn_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  NA00001\n
chr2   2   .       TGC     TAT       20      PASS    .       GT      1/1\n
"""
        fp_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  NA00001\n
chr2   3   .       G     A       20      PASS    .       GT      1/1\n
chr2   4   .       C     T       20      PASS    .       GT      1/1\n
"""
        fn_vars = vcf_to_ChromVariants(fn_str,'chr2')
        fp_vars = vcf_to_ChromVariants(fp_str,'chr2')
        rescuer = SequenceRescuer('chr2',2,fn_vars,fp_vars,get_empty_ChromVariants('chr2'),get_reference(),50)
        self.assertTrue(rescuer.rescued)
        self.assertEqual(rescuer.windowsRescued,(0,0))

        fp_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  NA00001\n
chr2   3   .       GC     G       20      PASS    .       GT      1/1\n
chr2   6   .       G      A       20      PASS    .       GT      1/1\n
"""
        fn_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  NA00001\n
chr2   3   .       GCCG     GCA       20      PASS    .       GT      1/1\n
"""
        fp_vars = vcf_to_ChromVariants(fp_str,'chr2')
        fn_vars = vcf_to_ChromVariants(fn_str,'chr2')
        rescuer = SequenceRescuer('chr2',3,fn_vars,fp_vars,get_empty_ChromVariants('chr2'),get_reference(),50)
        self.assertTrue(rescuer.rescued)

        fn_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  NA00001\n
chr4   3   .       TC     T       20      PASS    .       GT      1/1\n
chr4   8   .       C      T       20      PASS    .       GT      1/1\n
"""
        fp_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  NA00001\n
chr4   4   .       C     T       20      PASS    .       GT      1/1\n
chr4   7   .       TC    T       20      PASS    .       GT      1/1\n
"""
        tp_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  NA00001\n
chr4   5   .       TC    T       20      PASS    .       GT      1/1\n
        """
        fn_vars = vcf_to_ChromVariants(fn_str,'chr4')
        fp_vars = vcf_to_ChromVariants(fp_str,'chr4')
        tp_vars = vcf_to_ChromVariants(tp_str,'chr4')
        rescuer = SequenceRescuer('chr4',3,fn_vars,fp_vars,tp_vars,get_reference(),50)
        self.assertTrue(rescuer.rescued)
Пример #4
0
    def testTooManyPaths(self):
        fn_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
##source=TVsim\n
#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  NA00001\n
chr1   10049   .       CTTAAGCT     C       20      PASS    .       GT      1/1\n
chr1   10053   .       TGCGT        T       20      PASS    .       GT      0/1\n
chr1   10055   .       GCTAA        G       20      PASS    .       GT      0/1\n
chr1   10057   .       TA           T       20      PASS    .       GT      1/1\n
chr1   10058   .       GC           G       20      PASS    .       GT      1/1\n
"""
        fp_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
##source=TVsim\n
#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  NA00001\n
chr1   10025   .       CTTAAGCT     C       20      PASS    .       GT      1/1\n
chr1   10028   .       TGCGT        T       20      PASS    .       GT      0/1\n
chr1   10029   .       GCTAA        G       20      PASS    .       GT      0/1\n
chr1   10032   .       TA           T       20      PASS    .       GT      1/1\n
"""
        fn_vars = vcf_to_ChromVariants(fn_str, 'chr1')
        fp_vars = vcf_to_ChromVariants(fp_str, 'chr1')
        rescuer = SequenceRescuer('chr1', 10000, fn_vars, fp_vars,
                                  get_empty_ChromVariants('chr2'),
                                  get_reference(), 50)
        self.assertFalse(rescuer.rescued)
Пример #5
0
    def testVariantWithMismatchedRef(self):
        fn_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  NA00001\n
chr2   2   .       TGC     TAT       20      PASS    .       GT      1/1\n
"""
        fp_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  NA00001\n
chr2   3   .       G     C       20      PASS    .       GT      1/1\n
chr2   4   .       C     T       20      PASS    .       GT      1/1\n
"""
        fn_vars = vcf_to_ChromVariants(fn_str,'chr2')
        fp_vars = vcf_to_ChromVariants(fp_str,'chr2')
        rescuer = SequenceRescuer('chr2',2,fn_vars,fp_vars,get_empty_ChromVariants('chr2'),get_reference(),50)
        self.assertFalse(rescuer.rescued)
Пример #6
0
    def testNormalizedVariants(self):
        fp_str = """##fileformat=VCFv4.0
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  NA00001\n
chr4    4       .       C       CTC     20      PASS    .       GT      0/1\n
chr4    6       .       C       CTC     20      PASS    .       GT      0/1\n
"""
        fn_str = """##fileformat=VCFv4.0
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  NA00001\n
chr4    2       .       A       ATCTC     20      PASS    .       GT      0/1\n
"""
        fp_vars = normalize_vcf_to_ChromVariants(fp_str,'chr4')
        fn_vars = vcf_to_ChromVariants(fn_str,'chr4')
        rescuer = SequenceRescuer('chr4',2,fn_vars,fp_vars,get_empty_ChromVariants('chr4'),get_reference(),50)
        self.assertTrue(rescuer.rescued)
Пример #7
0
    def testEmptyWindow(self):
        fn_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
##source=TVsim\n
#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  NA00001\n
chr1   8000   .       G     C       20      PASS    .       GT      1/1\n
"""
        fp_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
##source=TVsim\n
#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  NA00001\n
chr1   10049   .       CTTAAGCT     C       20      PASS    .       GT      1/1\n
"""
        fn_vars = vcf_to_ChromVariants(fn_str,'chr1')
        fp_vars = vcf_to_ChromVariants(fp_str,'chr1')
        rescuer = SequenceRescuer('chr1',10049,fn_vars,fp_vars,get_empty_ChromVariants('chr2'),get_reference(),50)
        self.assertFalse(rescuer.rescued)
Пример #8
0
    def testNormalizedVariants(self):
        fp_str = """##fileformat=VCFv4.0
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  NA00001\n
chr4    4       .       C       CTC     20      PASS    .       GT      0/1\n
chr4    6       .       C       CTC     20      PASS    .       GT      0/1\n
"""
        fn_str = """##fileformat=VCFv4.0
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  NA00001\n
chr4    2       .       A       ATCTC     20      PASS    .       GT      0/1\n
"""
        fp_vars = normalize_vcf_to_ChromVariants(fp_str, 'chr4')
        fn_vars = vcf_to_ChromVariants(fn_str, 'chr4')
        rescuer = SequenceRescuer('chr4', 2, fn_vars, fp_vars,
                                  get_empty_ChromVariants('chr4'),
                                  get_reference(), 50)
        self.assertTrue(rescuer.rescued)
Пример #9
0
    def testVariantWithMismatchedRef(self):
        fn_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  NA00001\n
chr2   2   .       TGC     TAT       20      PASS    .       GT      1/1\n
"""
        fp_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  NA00001\n
chr2   3   .       G     C       20      PASS    .       GT      1/1\n
chr2   4   .       C     T       20      PASS    .       GT      1/1\n
"""
        fn_vars = vcf_to_ChromVariants(fn_str, 'chr2')
        fp_vars = vcf_to_ChromVariants(fp_str, 'chr2')
        rescuer = SequenceRescuer('chr2', 2, fn_vars, fp_vars,
                                  get_empty_ChromVariants('chr2'),
                                  get_reference(), 50)
        self.assertFalse(rescuer.rescued)
Пример #10
0
    def testEmptyWindow(self):
        fn_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
##source=TVsim\n
#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  NA00001\n
chr1   8000   .       G     C       20      PASS    .       GT      1/1\n
"""
        fp_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
##source=TVsim\n
#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  NA00001\n
chr1   10049   .       CTTAAGCT     C       20      PASS    .       GT      1/1\n
"""
        fn_vars = vcf_to_ChromVariants(fn_str, 'chr1')
        fp_vars = vcf_to_ChromVariants(fp_str, 'chr1')
        rescuer = SequenceRescuer('chr1', 10049, fn_vars, fp_vars,
                                  get_empty_ChromVariants('chr2'),
                                  get_reference(), 50)
        self.assertFalse(rescuer.rescued)
Пример #11
0
    def testWindowTooBig(self):
        longsv1 = 'ATTGTTCATGA'*300
        longsv2 = 'GCCTAGGGTCA'*300
        fn_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
##source=TVsim\n
#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  NA00001\n
chr1   7001   .       """ + longsv1 + """     A       20      PASS    .       GT      1/1\n
chr1   10100   .       """ + longsv2 + """     G       20      PASS    .       GT      0/1\n
"""
        fp_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
##source=TVsim\n
#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  NA00001\n
chr1   10049   .       CTTAAGCT     C       20      PASS    .       GT      1/1\n
"""
        fn_vars = vcf_to_ChromVariants(fn_str,'chr1')
        fp_vars = vcf_to_ChromVariants(fp_str,'chr1')
        rescuer = SequenceRescuer('chr1',10049,fn_vars,fp_vars,get_empty_ChromVariants('chr2'),get_reference(),50)
        self.assertFalse(rescuer.rescued)
Пример #12
0
    def testWindowTooBig(self):
        longsv1 = 'ATTGTTCATGA' * 300
        longsv2 = 'GCCTAGGGTCA' * 300
        fn_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
##source=TVsim\n
#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  NA00001\n
chr1   7001   .       """ + longsv1 + """     A       20      PASS    .       GT      1/1\n
chr1   10100   .       """ + longsv2 + """     G       20      PASS    .       GT      0/1\n
"""
        fp_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
##source=TVsim\n
#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  NA00001\n
chr1   10049   .       CTTAAGCT     C       20      PASS    .       GT      1/1\n
"""
        fn_vars = vcf_to_ChromVariants(fn_str, 'chr1')
        fp_vars = vcf_to_ChromVariants(fp_str, 'chr1')
        rescuer = SequenceRescuer('chr1', 10049, fn_vars, fp_vars,
                                  get_empty_ChromVariants('chr2'),
                                  get_reference(), 50)
        self.assertFalse(rescuer.rescued)
Пример #13
0
    def testTooManyPaths(self):
        fn_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
##source=TVsim\n
#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  NA00001\n
chr1   10049   .       CTTAAGCT     C       20      PASS    .       GT      1/1\n
chr1   10053   .       TGCGT        T       20      PASS    .       GT      0/1\n
chr1   10055   .       GCTAA        G       20      PASS    .       GT      0/1\n
chr1   10057   .       TA           T       20      PASS    .       GT      1/1\n
chr1   10058   .       GC           G       20      PASS    .       GT      1/1\n
"""
        fp_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
##source=TVsim\n
#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  NA00001\n
chr1   10025   .       CTTAAGCT     C       20      PASS    .       GT      1/1\n
chr1   10028   .       TGCGT        T       20      PASS    .       GT      0/1\n
chr1   10029   .       GCTAA        G       20      PASS    .       GT      0/1\n
chr1   10032   .       TA           T       20      PASS    .       GT      1/1\n
"""
        fn_vars = vcf_to_ChromVariants(fn_str,'chr1')
        fp_vars = vcf_to_ChromVariants(fp_str,'chr1')
        rescuer = SequenceRescuer('chr1',10000,fn_vars,fp_vars,get_empty_ChromVariants('chr2'),get_reference(),50)
        self.assertFalse(rescuer.rescued)
Пример #14
0
    def testFullRescue(self):
        fn_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  NA00001\n
chr2   2   .       TGC     TAT       20      PASS    .       GT      1/1\n
"""
        fp_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  NA00001\n
chr2   3   .       G     A       20      PASS    .       GT      1/1\n
chr2   4   .       C     T       20      PASS    .       GT      1/1\n
"""
        fn_vars = vcf_to_ChromVariants(fn_str, 'chr2')
        fp_vars = vcf_to_ChromVariants(fp_str, 'chr2')
        rescuer = SequenceRescuer('chr2', 2, fn_vars, fp_vars,
                                  get_empty_ChromVariants('chr2'),
                                  get_reference(), 50)
        self.assertTrue(rescuer.rescued)
        self.assertEqual(rescuer.windowsRescued, (0, 0))

        fp_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  NA00001\n
chr2   3   .       GC     G       20      PASS    .       GT      1/1\n
chr2   6   .       G      A       20      PASS    .       GT      1/1\n
"""
        fn_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  NA00001\n
chr2   3   .       GCCG     GCA       20      PASS    .       GT      1/1\n
"""
        fp_vars = vcf_to_ChromVariants(fp_str, 'chr2')
        fn_vars = vcf_to_ChromVariants(fn_str, 'chr2')
        rescuer = SequenceRescuer('chr2', 3, fn_vars, fp_vars,
                                  get_empty_ChromVariants('chr2'),
                                  get_reference(), 50)
        self.assertTrue(rescuer.rescued)

        fn_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  NA00001\n
chr4   3   .       TC     T       20      PASS    .       GT      1/1\n
chr4   8   .       C      T       20      PASS    .       GT      1/1\n
"""
        fp_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  NA00001\n
chr4   4   .       C     T       20      PASS    .       GT      1/1\n
chr4   7   .       TC    T       20      PASS    .       GT      1/1\n
"""
        tp_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  NA00001\n
chr4   5   .       TC    T       20      PASS    .       GT      1/1\n
        """
        fn_vars = vcf_to_ChromVariants(fn_str, 'chr4')
        fp_vars = vcf_to_ChromVariants(fp_str, 'chr4')
        tp_vars = vcf_to_ChromVariants(tp_str, 'chr4')
        rescuer = SequenceRescuer('chr4', 3, fn_vars, fp_vars, tp_vars,
                                  get_reference(), 50)
        self.assertTrue(rescuer.rescued)