Exemplo n.º 1
0
    def test_read_rst(self):
        # Extract branch information from rst file
        infile = 'example_dataset/data/44/44.phy'
        branches = SWAMP.read_rst(infile)

        self.assertEqual(len(branches['5..7']), 6)
        self.assertEqual(len(branches['5..6']), 0)
Exemplo n.º 2
0
    def test_sliding_window_scan(self):
        infiles = ['example_dataset/data/44/44.phy']
        threshold = 1
        windowsize = 20
        interscan = False

        branch_file = 'example_dataset/branchcodes.txt'
        branch_codes = SWAMP.read_branchcodes(branch_file)

        # Run a sliding window scan on this single file.
        SWAMP.sliding_window_scan(infiles, threshold, windowsize,
                                  interscan, branch_codes)

        # Check that the masked file exists and contains 'NNN's
        masked_file = 'example_dataset/data/44/44_masked.phy'
        masked_dict = SWAMP.read_phylip(masked_file)
        pongo_seq = masked_dict['pongo']
        self.assertTrue('NNN' in pongo_seq)

        # Increase threshold, ensure NNNs are gone.
        threshold = 10
        SWAMP.sliding_window_scan(infiles, threshold, windowsize,
                                  interscan, branch_codes)
        masked_dict = SWAMP.read_phylip(masked_file)
        pongo_seq = masked_dict['pongo']
        self.assertFalse('NNN' in pongo_seq)
Exemplo n.º 3
0
    def test_read_phylip(self):
        infile = 'example_dataset/data/101/101.phy'
        seq_dict = SWAMP.read_phylip(infile)

        # read_phylip returns a dict
        self.assertTrue(type(seq_dict) is dict)

        # Dict contains 4 seqs
        self.assertEqual(len(seq_dict.keys()), 4)

        # Sequence length check.
        self.assertEqual(len(seq_dict['pongo']), 1413)

        # Try another file.
        infile = 'example_dataset/data/44/44.phy'
        seq_dict = SWAMP.read_phylip(infile)

        self.assertEqual(len(seq_dict['pongo']), 1905)
Exemplo n.º 4
0
    def test_interscan(self):
        infile = 'example_dataset/data/44/44.phy'

        threshold = 1
        windowsize = 10
        interscan = False

        branch_file = 'example_dataset/branchcodes.txt'
        branch_codes = SWAMP.read_branchcodes(branch_file)

        # Mask without interscan... fewer masked codons
        result = SWAMP.sliding_window_scan_file(infile, threshold, windowsize,
                                                interscan, branch_codes)
        self.assertEqual(result['masked_column_count'], 215)

        # Mask with interscan... more masked codons
        interscan = True
        result = SWAMP.sliding_window_scan_file(infile, threshold, windowsize,
                                                interscan, branch_codes)
        self.assertEqual(result['masked_column_count'], 301)
Exemplo n.º 5
0
    def test_read_branchcodes(self):
        infile = 'example_dataset/branchcodes.txt'
        branch_codes = SWAMP.read_branchcodes(infile)

        # Branch codes come out like '5..7' -> (papio, colobus)
        branch_a = branch_codes['5..7']
        self.assertIn('papio', branch_a)
        self.assertIn('colobus', branch_a)

        # Single-species branch should have single-length array, e.g.
        # '6..2' -> (human)
        branch_b = branch_codes['6..2']
        self.assertEqual(len(branch_b), 1)
Exemplo n.º 6
0
    def test_write_phylip(self):
        infile = 'example_dataset/data/44/44.phy'
        seq_dict = SWAMP.read_phylip(infile)

        # Create some fake codons to mask.
        codons_to_mask = {}
        for i in range(2, 13):
            codons_to_mask[i] = ['pongo', 'h**o']

        # Write file...
        masked_dict = SWAMP.mask_codons(seq_dict, codons_to_mask)
        SWAMP.print_masked_phyfile(infile, masked_dict)
        # Read it back
        masked_file = 'example_dataset/data/44/44_masked.phy'
        new_masked_dict = SWAMP.read_phylip(masked_file)

        pongo_seq = new_masked_dict['pongo']
        human_seq = new_masked_dict['h**o']
        colobus_seq = new_masked_dict['colobus']

        # Make sure we see the masked seqs
        self.assertTrue(('NNN' * 10) in pongo_seq)
        self.assertTrue(('NNN' * 10) in human_seq)
        self.assertFalse(('NNN' * 10) in colobus_seq)
Exemplo n.º 7
0
    def test_branch_error_check(self):
        seq_file = 'example_dataset/data/44/44.phy'
        seq_dict = SWAMP.read_phylip(seq_file)

        branch_file = 'example_dataset/branchcodes.txt'
        branch_codes = SWAMP.read_branchcodes(branch_file)

        # No error raised when valid branch codes provided.
        SWAMP.branch_error_check(branch_codes, seq_dict)

        # Error is raised when branchcodes is not present
        # (e.g. no file provided by user on command line)
        with self.assertRaises(ValueError) as cm:
            SWAMP.branch_error_check(None, seq_dict)

        # Mess up one of the branch_codes, adding an unknown species
        branch_codes['5..7'] += tuple(['gorilla'])
        # Error is raised when species isn't found in seq_dict.
        with self.assertRaises(ValueError) as cm:
            SWAMP.branch_error_check(branch_codes, seq_dict)