示例#1
0
 def test_update_indel_deletion(self):
     '''Test update_indel extends deletions correctly'''
     deletion = variant.Variant(
         snp.Snp('\t'.join([
             '42', 'A', '.', '100', 'x', 'x', '300', '400', 'x', 'x', 'ref',
             'qry'
         ])))
     to_add = snp.Snp('\t'.join([
         '43', 'C', '.', '100', 'x', 'x', '300', '400', 'x', 'x', 'ref',
         'qry'
     ]))
     expected = copy.copy(deletion)
     # coords stored zero-based, so subtract 1 from the real expected coords
     expected.ref_start = 41
     expected.ref_end = 42
     expected.ref_length = 300
     expected.ref_name = 'ref'
     expected.ref_base = 'AC'
     expected.qry_start = 99
     expected.qry_end = 99
     expected.qry_length = 400
     expected.qry_name = 'qry'
     expected.qry_base = '.'
     self.assertTrue(deletion.update_indel(to_add))
     self.assertEqual(expected, deletion)
示例#2
0
    def test_get_all_variants(self):
        '''Test load all variants from file'''
        deletion_snps = [
            '\t'.join([
                '125', 'T', '.', '124', '1', '124', '500', '497', '1', '1',
                'ref1', 'qry1'
            ]),
            '\t'.join([
                '126', 'A', '.', '124', '1', '124', '500', '497', '1', '1',
                'ref1', 'qry1'
            ]),
            '\t'.join([
                '127', 'C', '.', '124', '1', '124', '500', '497', '1', '1',
                'ref1', 'qry1'
            ]),
        ]
        deletion_snps = [snp.Snp(x) for x in deletion_snps]
        deletion_variant = variant.Variant(deletion_snps[0])
        deletion_variant.update_indel(deletion_snps[1])
        deletion_variant.update_indel(deletion_snps[2])

        just_a_snp = '\t'.join([
            '386', 'C', 'T', '383', '115', '115', '500', '497', '1', '1',
            'ref1', 'qry1'
        ])
        snp_variant = variant.Variant(snp.Snp(just_a_snp))

        insertion_snps = [
            '\t'.join([
                '479', '.', 'G', '480', '0', '22', '500', '504', '1', '1',
                'ref2', 'qry2'
            ]),
            '\t'.join([
                '479', '.', 'A', '481', '0', '22', '500', '504', '1', '1',
                'ref2', 'qry2'
            ]),
            '\t'.join([
                '479', '.', 'T', '482', '0', '22', '500', '504', '1', '1',
                'ref2', 'qry2'
            ]),
            '\t'.join([
                '479', '.', 'A', '483', '0', '22', '500', '504', '1', '1',
                'ref2', 'qry2'
            ]),
        ]
        insertion_snps = [snp.Snp(x) for x in insertion_snps]
        insertion_variant = variant.Variant(insertion_snps[0])
        for i in range(1, len(insertion_snps)):
            insertion_variant.update_indel(insertion_snps[i])

        variants_from_file = snp_file.get_all_variants(
            os.path.join(data_dir, 'snp_file_test_get_all_variants.snps'))
        self.assertEqual(len(variants_from_file), 3)
        self.assertEqual(variants_from_file[0], deletion_variant)
        self.assertEqual(variants_from_file[1], snp_variant)
        self.assertEqual(variants_from_file[2], insertion_variant)
示例#3
0
    def test_snp_file(self):
        '''test coords_file'''
        expected = [
            '\t'.join([
                '133', 'G', '.', '122', '1', '122', '500', '489', '1', '1',
                'ref', 'qry'
            ]), '\t'.join([
                '143', '.', 'C', '131', '1', '132', '500', '489', '1', '1',
                'ref', 'qry'
            ]), '\t'.join([
                '253', 'T', 'A', '242', '120', '242', '500', '489', '1', '1',
                'ref', 'qry'
            ])
        ]

        expected = [snp.Snp(x) for x in expected]

        infiles = [
            os.path.join(data_dir, 'snp_file_test_with_header.snps'),
            os.path.join(data_dir, 'snp_file_test_no_header.snps')
        ]

        for fname in infiles:
            fr = snp_file.reader(fname)
            snps = [x for x in fr]
            self.assertEqual(snps, expected)
示例#4
0
def reader(fname):
    f = pyfastaq.utils.open_file_read(fname)

    for line in f:
        if line.startswith('[') or (not '\t' in line):
            continue

        yield snp.Snp(line)

    pyfastaq.utils.close(f)
示例#5
0
 def test_str_no_c_option(self):
     '''Test __str__ with format with no -C option'''
     l_in = [
         '187', 'A', 'C', '269', '187', '187', '654', '853', '1', '1',
         'ref_name', 'qry_name'
     ]
     s = snp.Snp('\t'.join(l_in))
     expected = '\t'.join(
         ['187', 'A', 'C', '269', '654', '853', 'ref_name', 'qry_name'])
     self.assertEqual(str(s), expected)
示例#6
0
 def test_ref_coords_from_qry_coord_when_variant_not_in_nucmer_match(self):
     '''Test ref_coords_from_qry_coord when variant not in nucmer match'''
     aln = alignment.Alignment('1\t606\t596\t1201\t606\t606\t100.00\t606\t1700\t1\t1\tref\tqry')
     snp0 = snp.Snp('127\tA\t.\t77\t75\t77\t1\t0\t606\t1700\t1\t1\tref\tqry')
     indel = variant.Variant(snp0)
     self.assertEqual((0, False), aln.ref_coords_from_qry_coord(595, []))
     self.assertEqual((0, False), aln.ref_coords_from_qry_coord(595, [indel]))
     self.assertEqual((400, False), aln.ref_coords_from_qry_coord(995, []))
     self.assertEqual((400, False), aln.ref_coords_from_qry_coord(995, [indel]))
     self.assertEqual((605, False), aln.ref_coords_from_qry_coord(1200, []))
     self.assertEqual((605, False), aln.ref_coords_from_qry_coord(1200, [indel]))
示例#7
0
    def test_intersects_variant(self):
        'Test intersects_variant'''
        snp0 = snp.Snp('100\tA\t.\t600\t75\t77\t1\t0\t606\t1700\t1\t1\tref\tqry') #100 in ref, 600 in qry
        indel = variant.Variant(snp0)

        aln1 = alignment.Alignment('100\t500\t600\t1000\t501\t501\t100.00\t600\t1700\t1\t1\tref\tqry')
        aln2 = alignment.Alignment('101\t500\t600\t1000\t501\t501\t100.00\t600\t1700\t1\t1\tref\tqry')
        aln3 = alignment.Alignment('100\t500\t601\t1000\t501\t501\t100.00\t600\t1700\t1\t1\tref\tqry')
        aln4 = alignment.Alignment('101\t500\t601\t1000\t501\t501\t100.00\t600\t1700\t1\t1\tref\tqry')

        self.assertTrue(aln1.intersects_variant(indel))
        self.assertFalse(aln2.intersects_variant(indel))
        self.assertFalse(aln3.intersects_variant(indel))
        self.assertFalse(aln4.intersects_variant(indel))
示例#8
0
    def test_init(self):
        '''Test init gets correct variant type'''
        lines = [[
            '42', 'T', 'A', '42', '42', '42', '1000', '1000', '1', '1', 'ref',
            'ref'
        ],
                 [
                     '242', 'G', '.', '241', '1', '241', '1000', '1000', '1',
                     '1', 'ref', 'ref'
                 ],
                 [
                     '300', '.', 'G', '298', '0', '298', '1000', '1000', '1',
                     '1', 'ref', 'ref'
                 ]]

        variants = [variant.Variant(snp.Snp('\t'.join(x))) for x in lines]
        expected = [variant.SNP, variant.DEL, variant.INS]
        for i in range(len(lines)):
            self.assertEqual(variants[i].var_type, expected[i])
示例#9
0
    def test_qry_coords_from_ref_coord_test_different_strand(self):
        '''Test qry_coords_from_ref_coord on different strand'''
        aln = alignment.Alignment('\t'.join(['100', '200', '101', '1', '100', '100', '100.00', '300', '300', '1', '1', 'ref', 'qry']))
        snp0 = snp.Snp('\t'.join(['140', 'A', 'T', '40', 'x', 'x', '300', '300', 'x', '1', 'ref', 'qry'])) # snp
        snp0 = variant.Variant(snp0)
        snp1 = snp.Snp('\t'.join(['140', 'A', '.', '40', 'x', 'x', '300', '300', 'x', '1', 'ref', 'qry'])) # del from qry
        snp2 = snp.Snp('\t'.join(['141', 'C', '.', '40', 'x', 'x', '300', '300', 'x', '1', 'ref', 'qry'])) # del from qry
        del1 = variant.Variant(snp1)
        del2 = variant.Variant(snp1)
        self.assertTrue(del2.update_indel(snp2))
        snp3 = snp.Snp('\t'.join(['150', '.', 'A', '50', 'x', 'x', '300', '300', 'x', '1', 'ref', 'qry'])) # del from ref
        snp4 = snp.Snp('\t'.join(['150', '.', 'C', '51', 'x', 'x', '300', '300', 'x', '1', 'ref', 'qry'])) # del from ref
        snp5 = snp.Snp('\t'.join(['150', '.', 'G', '52', 'x', 'x', '300', '300', 'x', '1', 'ref', 'qry'])) # del from ref
        ins1 = variant.Variant(snp3)
        ins2 = variant.Variant(snp3)
        self.assertTrue(ins2.update_indel(snp4))
        self.assertTrue(ins2.update_indel(snp5))

        tests = [
            (99, [], (100, False)),
            (100, [], (99, False)),
            (199, [], (0, False)),
            (119, [], (80, False)),
            (119, [del1], (80, False)),
            (149, [], (50, False)),
            (149, [del1], (51, False)),
            (149, [del2], (52, False)),
            (159, [], (40, False)),
            (159, [ins1], (39, False)),
            (159, [ins2], (37, False)),
            (159, [del1, ins1], (40, False)),
            (159, [del1, ins2], (38, False)),
            (159, [del2, ins1], (41, False)),
            (159, [del2, ins2], (39, False)),
            (139, [del1], (39, True)),
            (139, [snp0], (60, False)),
            (149, [ins1], (49, True)),
        ]

        for ref_coord, variant_list, expected in tests:
            got = aln.qry_coords_from_ref_coord(ref_coord, variant_list)
            self.assertEqual(expected, got)
            # if we reverse the direction of hit in query and reference, should get the same answer
            aln.qry_start, aln.qry_end = aln.qry_end, aln.qry_start
            aln.ref_start, aln.ref_end = aln.ref_end, aln.ref_start
            got = aln.qry_coords_from_ref_coord(ref_coord, variant_list)
            self.assertEqual(expected, got)
            aln.qry_start, aln.qry_end = aln.qry_end, aln.qry_start
            aln.ref_start, aln.ref_end = aln.ref_end, aln.ref_start
示例#10
0
    def test_update_indel_no_change(self):
        '''Test update_indel does nothing in the right cases'''
        initial_vars = [
            snp.Snp('\t'.join([
                '42', 'A', 'C', '100', 'x', 'x', '300', '400', 'x', 'x', 'ref',
                'qry'
            ])),
            snp.Snp('\t'.join([
                '42', 'A', 'C', '100', 'x', 'x', '300', '400', 'x', 'x', 'ref',
                'qry'
            ])),
            snp.Snp('\t'.join([
                '42', 'A', '.', '100', 'x', 'x', '300', '400', 'x', 'x', 'ref',
                'qry'
            ])),
            snp.Snp('\t'.join([
                '42', 'A', '.', '100', 'x', 'x', '300', '400', 'x', 'x', 'ref',
                'qry'
            ])),
            snp.Snp('\t'.join([
                '42', 'A', '.', '100', 'x', 'x', '300', '400', 'x', 'x', 'ref',
                'qry'
            ])),
            snp.Snp('\t'.join([
                '42', 'A', '.', '100', 'x', 'x', '300', '400', 'x', 'x', 'ref',
                'qry'
            ])),
            snp.Snp('\t'.join([
                '42', 'A', '.', '100', 'x', 'x', '300', '400', 'x', 'x', 'ref',
                'qry'
            ])),
            snp.Snp('\t'.join([
                '42', '.', 'A', '100', 'x', 'x', '300', '400', 'x', 'x', 'ref',
                'qry'
            ])),
            snp.Snp('\t'.join([
                '42', '.', 'A', '100', 'x', 'x', '300', '400', 'x', 'x', 'ref',
                'qry'
            ])),
            snp.Snp('\t'.join([
                '42', '.', 'A', '100', 'x', 'x', '300', '400', 'x', 'x', 'ref',
                'qry'
            ])),
            snp.Snp('\t'.join([
                '42', '.', 'A', '100', 'x', 'x', '300', '400', 'x', 'x', 'ref',
                'qry'
            ])),
            snp.Snp('\t'.join([
                '42', '.', 'A', '100', 'x', 'x', '300', '400', 'x', 'x', 'ref',
                'qry'
            ])),
        ]

        to_add = [
            snp.Snp('\t'.join([
                '142', 'A', '.', '1000', 'x', 'x', '2000', '3000', 'x', 'x',
                'ref', 'qry'
            ])),
            snp.Snp('\t'.join([
                '142', '.', 'A', '1000', 'x', 'x', '2000', '3000', 'x', 'x',
                'ref', 'qry'
            ])),
            snp.Snp('\t'.join([
                '43', 'A', '.', '100', 'x', 'x', '300', '400', 'x', 'x',
                'ref2', 'qry'
            ])),
            snp.Snp('\t'.join([
                '43', 'A', '.', '100', 'x', 'x', '300', '400', 'x', 'x', 'ref',
                'qry2'
            ])),
            snp.Snp('\t'.join([
                '44', 'A', '.', '100', 'x', 'x', '300', '400', 'x', 'x', 'ref',
                'qry'
            ])),
            snp.Snp('\t'.join([
                '42', 'A', '.', '100', 'x', 'x', '300', '400', 'x', 'x', 'ref',
                'qry'
            ])),
            snp.Snp('\t'.join([
                '43', '.', 'A', '100', 'x', 'x', '300', '400', 'x', 'x', 'ref',
                'qry'
            ])),
            snp.Snp('\t'.join([
                '43', '.', 'A', '100', 'x', 'x', '300', '400', 'x', 'x',
                'ref2', 'qry'
            ])),
            snp.Snp('\t'.join([
                '43', '.', 'A', '100', 'x', 'x', '300', '400', 'x', 'x', 'ref',
                'qry2'
            ])),
            snp.Snp('\t'.join([
                '44', '.', 'A', '100', 'x', 'x', '300', '400', 'x', 'x', 'ref',
                'qry'
            ])),
            snp.Snp('\t'.join([
                '42', '.', 'A', '100', 'x', 'x', '300', '400', 'x', 'x', 'ref',
                'qry'
            ])),
            snp.Snp('\t'.join([
                '42', 'A', '.', '100', 'x', 'x', '300', '400', 'x', 'x', 'ref',
                'qry'
            ])),
        ]

        assert len(initial_vars) == len(to_add)

        for i in range(len(initial_vars)):
            var = variant.Variant(initial_vars[i])
            var_original = copy.copy(var)
            self.assertFalse(var.update_indel(to_add[i]))
            self.assertEqual(var, var_original)