def snpMatrixGenerator(sourceFile, destFile, recordAll=False,
                       recordRandomSample=True):
    if recordAll == recordRandomSample:
        print "Invalid Options"
        exit()

    destNexus = NexusWriter()

    block = ""
    snpCol = 0
    for line in sourceFile:
        if all(x in line.lower() for x in {"begin", "data"}):
            sourceNexus = NexusReader()
            sourceNexus.read_string(block)
            if "data" in sourceNexus.blocks:
                snpCol = _findDifferences(sourceNexus, destNexus, snpCol,
                               recordAll, recordRandomSample)
            block = line
        else:
            block += line

    sourceNexus = NexusReader()
    sourceNexus.read_string(block)
    if "data" in sourceNexus.blocks:
        snpCol = _findDifferences(sourceNexus, destNexus, snpCol,
                       recordAll, recordRandomSample)

    destFile.write(destNexus.make_nexus() + '\n')

    destFile.close()
    sourceFile.close()
def snpMatrixGenerator(sourceFile,
                       destFile,
                       recordAll=False,
                       recordRandomSample=True):
    if recordAll == recordRandomSample:
        print "Invalid Options"
        exit()

    destNexus = NexusWriter()

    block = ""
    snpCol = 0
    for line in sourceFile:
        if all(x in line.lower() for x in {"begin", "data"}):
            sourceNexus = NexusReader()
            sourceNexus.read_string(block)
            if "data" in sourceNexus.blocks:
                snpCol = _findDifferences(sourceNexus, destNexus, snpCol,
                                          recordAll, recordRandomSample)
            block = line
        else:
            block += line

    sourceNexus = NexusReader()
    sourceNexus.read_string(block)
    if "data" in sourceNexus.blocks:
        snpCol = _findDifferences(sourceNexus, destNexus, snpCol, recordAll,
                                  recordRandomSample)

    destFile.write(destNexus.make_nexus() + '\n')

    destFile.close()
    sourceFile.close()
示例#3
0
 def test_count_other_values_two(self):
     expected = {
         'Harry': 1,
         'Simon': 2,
         'Peter': 1,
         'Betty': 0,
         'Louise': 0
     }
     nexus = NexusReader()
     nexus.read_string("""#NEXUS
     Begin data;
     Dimensions ntax=5 nchar=3;
     Format datatype=standard symbols="01" gap=-;
     Matrix
     Harry              0A0  [No missing]
     Simon              0AB  [one missing]
     Peter              0-B  [one gap]
     Betty              ?-1  [one gap and one missing = 2 missing]
     Louise             ???  [three missing]
         ;
     End;
     """)
     count = count_site_values(nexus, ['A', 'B'])
     for taxon in count:
         assert count[taxon] == expected[taxon]
    def test_find_unique_sites_2(self):
        nexus = NexusReader()
        nexus.read_string("""Begin data;
        Dimensions ntax=4 nchar=7;
        Format datatype=standard symbols="01" gap=-;
        Matrix
        Harry              10000?-
        Simon              1100011
        Betty              1110000
        Louise             1111000
        ;""")
        unique = find_unique_sites(nexus)

        # site 1 should NOT be in the uniques (3x1 and 1x0)
        # - i.e. are we ignoring sites with ONE absent taxon
        assert 1 not in unique
        # these should also NOT be in unique
        assert 0 not in unique
        assert 2 not in unique
        assert 4 not in unique  # constant
        # site 3 is a simple unique site - check we found it
        assert 3 in unique
        # sites 5 and 6 should also be unique
        # - are we handling missing data appropriately?
        assert 5 in unique
        assert 6 in unique
    def test_find_unique_sites_2(self):
        nexus = NexusReader()
        nexus.read_string("""Begin data;
        Dimensions ntax=4 nchar=7;
        Format datatype=standard symbols="01" gap=-;
        Matrix
        Harry              10000?-
        Simon              1100011
        Betty              1110000
        Louise             1111000
        ;""")
        unique = find_unique_sites(nexus)

        # site 1 should NOT be in the uniques (3x1 and 1x0)
        # - i.e. are we ignoring sites with ONE absent taxon
        assert 1 not in unique
        # these should also NOT be in unique
        assert 0 not in unique
        assert 2 not in unique
        assert 4 not in unique  # constant
        # site 3 is a simple unique site - check we found it
        assert 3 in unique
        # sites 5 and 6 should also be unique
        # - are we handling missing data appropriately?
        assert 5 in unique
        assert 6 in unique
示例#6
0
 def test_read_string(self):
     handle = open(os.path.join(EXAMPLE_DIR, 'example.nex'))
     data = handle.read()
     handle.close()
     nex = NexusReader()
     nex.read_string(data)
     assert 'data' in nex.blocks
     assert 'Simon' in nex.blocks['data'].matrix
示例#7
0
 def test_read_string(self):
     handle = open(os.path.join(EXAMPLE_DIR, 'example.nex'))
     data = handle.read()
     handle.close()
     nex = NexusReader()
     nex.read_string(data)
     assert 'data' in nex.blocks
     assert 'Simon' in nex.blocks['data'].matrix
示例#8
0
 def test_notimplemented_exception(self):
     with self.assertRaises(NotImplementedError):
         nex = NexusReader()
         nex.read_string("""Begin something;
             Dimensions ntax=5 nchar=1;
             Format datatype=standard symbols="01" gap=-;
             Matrix
             Harry              1
             ;""")
         anonymise(nex)
 def test_notimplemented_exception(self):
     with self.assertRaises(NotImplementedError):
         nex = NexusReader()
         nex.read_string(
             """Begin something;
             Dimensions ntax=5 nchar=1;
             Format datatype=standard symbols="01" gap=-;
             Matrix
             Harry              1
             ;""")
         anonymise(nex)
示例#10
0
 def test_combine_with_character_labels(self):
     n1 = NexusReader()
     n1.read_string(
         """
         BEGIN DATA;
             DIMENSIONS NTAX=3 NCHAR=3;
             FORMAT DATATYPE=STANDARD MISSING=0 GAP=-  SYMBOLS="123";
             CHARSTATELABELS
         		1 char1,
         		2 char2,
         		3 char3
         ;
         MATRIX
         Tax1         123
         Tax2         123
         Tax3         123
         ;
         """
     )
     n2 = NexusReader()
     n2.read_string(
         """
         BEGIN DATA;
             DIMENSIONS NTAX=3 NCHAR=3;
             FORMAT DATATYPE=STANDARD MISSING=0 GAP=-  SYMBOLS="456";
             CHARSTATELABELS
         		1 char1,
         		2 char2,
         		3 char3
         ;
         MATRIX
         Tax1         456
         Tax2         456
         Tax3         456
         ;
         """
     )
     newnex = combine_nexuses([n1, n2])
     assert re.search(r"""\bNTAX=3\b""", newnex.write())
     assert re.search(r"""\bNCHAR=6\b""", newnex.write())
     assert re.search(r'\sSYMBOLS="123456"[\s;]', newnex.write())
     
     for tax in [1,2,3]:
         assert re.search(r"""\bTax%d\s+123456\b""" % tax, newnex.write())
     
     counter = 1
     for nex_id in [1,2]:
         for char_id in [1,2,3]:
             assert re.search(
                 r"""\b%d\s+%d.char%d\b""" % (counter, nex_id, char_id), 
                 newnex.write(charblock=True)
             )
             counter += 1
示例#11
0
 def test_incorrect_dimensions_warnings_nchar(self):
     with warnings.catch_warnings(record=True) as w:
         nex = NexusReader()
         nex.read_string("""Begin data;
             Dimensions ntax=1 nchar=5;
             Format datatype=standard symbols="01" gap=-;
             Matrix
             Harry              1
             ;""")
         assert len(w) == 1, 'Expected 1 warning, got %r' % w
         assert issubclass(w[-1].category, UserWarning)
         assert "Expected" in str(w[-1].message)
         assert nex.data.nchar == 1
示例#12
0
 def test_incorrect_dimensions_warnings_nchar(self):
     with warnings.catch_warnings(record=True) as w:
         nex = NexusReader()
         nex.read_string(
             """Begin data;
             Dimensions ntax=1 nchar=5;
             Format datatype=standard symbols="01" gap=-;
             Matrix
             Harry              1
             ;""")
         assert len(w) == 1, 'Expected 1 warning, got %r' % w 
         assert issubclass(w[-1].category, UserWarning)
         assert "Expected" in str(w[-1].message)
         assert nex.data.nchar == 1
 def test_labelled_unrooted(self):
     nex = NexusReader()
     nex.read_string("""
     #NEXUS
 
     begin trees;
         translate
             0 Tom,
             1 Simon,
             2 Fred;
             tree unrooted [U] = (0,1,2);
     end;
     """)
     assert len(nex.trees.trees) == 1
     assert nex.trees.trees == ['tree unrooted [U] = (0,1,2);']
 def test_treelabel(self):
     nex = NexusReader()
     nex.read_string("""
     #NEXUS
 
     begin trees;
         translate
             0 Tom,
             1 Simon,
             2 Fred;
             tree TREEONE = (0,1,2);
     end;
     """)
     assert len(nex.trees.trees) == 1
     assert nex.trees.trees == ['tree TREEONE = (0,1,2);']
示例#15
0
    def test_combine_with_character_labels(self):
        n1 = NexusReader()
        n1.read_string("""
            BEGIN DATA;
                DIMENSIONS NTAX=3 NCHAR=3;
                FORMAT DATATYPE=STANDARD MISSING=0 GAP=-  SYMBOLS="123";
                CHARSTATELABELS
                    1 char1,
                    2 char2,
                    3 char3
            ;
            MATRIX
            Tax1         123
            Tax2         123
            Tax3         123
            ;
            """)
        n2 = NexusReader()
        n2.read_string("""
            BEGIN DATA;
                DIMENSIONS NTAX=3 NCHAR=3;
                FORMAT DATATYPE=STANDARD MISSING=0 GAP=-  SYMBOLS="456";
                CHARSTATELABELS
                    1 char1,
                    2 char2,
                    3 char3
            ;
            MATRIX
            Tax1         456
            Tax2         456
            Tax3         456
            ;
            """)
        newnex = combine_nexuses([n1, n2])
        assert re.search(r"""\bNTAX=3\b""", newnex.write())
        assert re.search(r"""\bNCHAR=6\b""", newnex.write())
        assert re.search(r'\sSYMBOLS="123456"[\s;]', newnex.write())

        for tax in [1, 2, 3]:
            assert re.search(r"""\bTax%d\s+123456\b""" % tax, newnex.write())

        counter = 1
        for nex_id in [1, 2]:
            for char_id in [1, 2, 3]:
                assert re.search(
                    r"""\b%d\s+%d.char%d\b""" % (counter, nex_id, char_id),
                    newnex.write(charblock=True))
                counter += 1
示例#16
0
class Test_Binarise(unittest.TestCase):
    def setUp(self):
        self.nex = NexusReader()
        self.nex.read_string(
        """Begin data;
        Dimensions ntax=3 nchar=2;
        Format datatype=standard symbols="01" gap=-;
        Charstatelabels
            1 char1, 2 char2;
        Matrix
        Maori               14
        Dutch               25
        Latin               36
        ;""")
        self.nex = binarise(self.nex)
    
    def test_to_binary(self):
        """Test Nexus -> Binary: Two Character"""
        expected = {
            'char1_0': {"Maori": '1', "Dutch": "0", "Latin": "0"},
            'char1_1': {"Maori": '0', "Dutch": "1", "Latin": "0"},
            'char1_2': {"Maori": '0', "Dutch": "0", "Latin": "1"},
            'char2_0': {"Maori": '1', "Dutch": "0", "Latin": "0"},
            'char2_1': {"Maori": '0', "Dutch": "1", "Latin": "0"},
            'char2_2': {"Maori": '0', "Dutch": "0", "Latin": "1"},
        }
        
        for char, data in expected.items():
            for taxon, exp_value in data.items():
                assert self.nex.data[char][taxon] == exp_value
    
    def test_to_binary_nchar(self):
        """Test Nexus -> Binary: Number of Characters"""
        assert len(self.nex.characters) == 6
        
    def test_to_binary_symbollist(self):
        """Test Nexus -> Binary: Update Symbol List"""
        # check symbol list was updated
        assert len(self.nex.symbols) == 2
        assert '1' in self.nex.symbols
        assert '0' in self.nex.symbols
        
    def test_to_binary_nexus(self):
        """Test Nexus -> Binary: Nexus"""
        nexus = self.nex.make_nexus(interleave=False)
        assert re.search("Dutch\s+010010", nexus)
        assert re.search("Maori\s+100100", nexus)
        assert re.search("Latin\s+001001", nexus)
 def test_ok_starting_with_one(self):
     nex = NexusReader()
     nex.read_string("""
     #NEXUS
 
     begin trees;
         translate
             1 Tom,
             2 Simon,
             3 Fred;
             tree tree = (1,2,3)
     end;
     """)
     assert len(nex.trees.translators) == 3
     assert '1' in nex.trees.translators
     assert '2' in nex.trees.translators
     assert '3' in nex.trees.translators
 def test_ok_starting_with_zero(self):
     nex = NexusReader()
     nex.read_string("""
     #NEXUS
 
     begin trees;
         translate
             0 Tom,
             1 Simon,
             2 Fred;
             tree tree = (0,1,2)
     end;
     """)
     assert len(nex.trees.translators) == 3
     assert '0' in nex.trees.translators
     assert '1' in nex.trees.translators
     assert '2' in nex.trees.translators
class Test_DataHandler_Regression_Mesquite(unittest.TestCase):
    """Regression: Test that we can parse MESQUITE data blocks"""

    def setUp(self):
        self.nex = NexusReader()
        self.nex.read_string("""
        #NEXUS

        Begin data;
        TITLE Untitled_Block_of_Taxa;
        LINK Taxa = Untitled_Block_of_Taxa;
        Dimensions ntax=2 nchar=2;
        Format datatype=standard gap=- symbols="01";
        Matrix
        Harry              00
        Simon              01
            ;
        End;
        """)
    
    def test_attributes(self):
        assert len(self.nex.data.attributes) == 2
        assert self.nex.data.attributes[0] == \
            """TITLE Untitled_Block_of_Taxa;"""
        assert self.nex.data.attributes[1] == \
            """LINK Taxa = Untitled_Block_of_Taxa;"""

    def test_write(self):
        expected_patterns = [
            '^begin data;$',
            '^\s+TITLE Untitled_Block_of_Taxa;$',
            '^\s+LINK Taxa = Untitled_Block_of_Taxa;$',
            '^\s+dimensions ntax=2 nchar=2;$',
            '^\s+format datatype=standard gap=- symbols="01";$',
            "^matrix$",
            "^Harry\s+00",
            "^Simon\s+01$",
            '^\s+;$',
            '^end;$',
        ]
        written = self.nex.write()
        for expected in expected_patterns:
            assert re.search(expected, written, re.MULTILINE), \
                'Expected "%s"' % expected
class Test_TallyBySite(unittest.TestCase):
    def setUp(self):
        self.nex = NexusReader()
        self.nex.read_string(
            """Begin data;
            Dimensions ntax=3 nchar=6;
            Format datatype=standard symbols="12" gap=-;
            Matrix
            Harry              0111-?
            Simon              0011-?
            Elvis              0001-?
            ;"""
        )
    
    def test_errorcheck(self):
        self.assertRaises(TypeError, tally_by_site, "I am a string")
        self.assertRaises(TypeError, tally_by_site, 0)
    
    def test_tally_by_site(self):
        tally = tally_by_site(self.nex)
        # 000
        assert 'Harry' in tally[0]['0']
        assert 'Simon' in tally[0]['0']
        assert 'Elvis' in tally[0]['0']
        # 100
        assert 'Harry' in tally[1]['1']
        assert 'Simon' in tally[1]['0']
        assert 'Elvis' in tally[1]['0']
        # 110
        assert 'Harry' in tally[2]['1']
        assert 'Simon' in tally[2]['1']
        assert 'Elvis' in tally[2]['0']
        # 111
        assert 'Harry' in tally[3]['1']
        assert 'Simon' in tally[3]['1']
        assert 'Elvis' in tally[3]['1']
        # ---
        assert 'Harry' in tally[4]['-']
        assert 'Simon' in tally[4]['-']
        assert 'Elvis' in tally[4]['-']
        # ???
        assert 'Harry' in tally[5]['?']
        assert 'Simon' in tally[5]['?']
        assert 'Elvis' in tally[5]['?']
示例#21
0
class Test_CheckZeros(unittest.TestCase):
    def setUp(self):
        self.nex = NexusReader()
        self.nex.read_string("""
        Begin data;
        Dimensions ntax=4 nchar=8;
        Format datatype=standard symbols="01" gap=-;
        Matrix
        [                  01234567]
        Harry              01000000
        Simon              0010000-
        Betty              00010-0?
        Louise             000010?0
        ;""")
        self.found = check_zeros(self.nex)

    def test_find_zero(self):
        self.assertIn(0, self.found)

    def test_find_missing_dash(self):
        self.assertIn(5, self.found)

    def test_find_missing_questionmark(self):
        self.assertIn(6, self.found)

    def test_find_complex(self):
        self.assertIn(7, self.found)

    def test_change_missing(self):
        found = check_zeros(self.nex, missing=['-'])
        assert found == [0, 5]

    def test_change_absence(self):
        found = check_zeros(self.nex, absences=['1', '0'])
        assert found == [0, 1, 2, 3, 4, 5, 6, 7]

    def test_remove_zeros(self):
        new = remove_zeros(self.nex)
        assert new.data.nchar == 4
        assert new.data.matrix['Harry'] == ['1', '0', '0', '0']
        assert new.data.matrix['Simon'] == ['0', '1', '0', '0']
        assert new.data.matrix['Betty'] == ['0', '0', '1', '0']
        assert new.data.matrix['Louise'] == ['0', '0', '0', '1']
 def test_regression_include_invisible_taxa(self):
     """Include taxa that have no entries"""
     data = """
     #NEXUS
     
     BEGIN DATA;
         DIMENSIONS  NTAX=15 NCHAR=7;
         FORMAT DATATYPE=STANDARD MISSING=? GAP=- INTERLEAVE=YES;
     MATRIX
     
     Gertrude                0000001
     Debbie                  0001000
     Zarathrustra            0000000
     Christie                0010000
     Benny                   0100000
     Bertha                  0100000
     Craig                   0010000
     Fannie-May              0000010
     Charles                 0010000
     Annik                   1000000
     Frank                   0000010
     Amber                   1000000
     Andreea                 1000000
     Edward                  0000100
     Donald                  0001000
     ;
     END;
     """
     
     nex = NexusReader()
     nex.read_string(data)
     msnex = multistatise(nex)
     
     for taxon, sites in msnex.data.matrix.items():
         if taxon[0] == 'Z':
             continue  # will check later
         
         # first letter of taxa name is the expected character state
         assert taxon[0] == sites[0], \
             "%s should be %s not %s" % (taxon, taxon[0], sites[0])
     # deal with completely missing taxa
     assert 'Zarathrustra' in msnex.data.matrix
     assert msnex.data.matrix['Zarathrustra'][0] == '?'
示例#23
0
    def test_regression_include_invisible_taxa(self):
        """Include taxa that have no entries"""
        data = """
        #NEXUS
        
        BEGIN DATA;
            DIMENSIONS  NTAX=15 NCHAR=7;
            FORMAT DATATYPE=STANDARD MISSING=? GAP=- INTERLEAVE=YES;
        MATRIX
        
        Gertrude                0000001
        Debbie                  0001000
        Zarathrustra            0000000
        Christie                0010000
        Benny                   0100000
        Bertha                  0100000
        Craig                   0010000
        Fannie-May              0000010
        Charles                 0010000
        Annik                   1000000
        Frank                   0000010
        Amber                   1000000
        Andreea                 1000000
        Edward                  0000100
        Donald                  0001000
        ;
        END;
        """

        nex = NexusReader()
        nex.read_string(data)
        msnex = multistatise(nex)

        for taxon, sites in msnex.data.matrix.items():
            if taxon[0] == 'Z':
                continue  # will check later

            # first letter of taxa name is the expected character state
            assert taxon[0] == sites[0], \
                "%s should be %s not %s" % (taxon, taxon[0], sites[0])
        # deal with completely missing taxa
        assert 'Zarathrustra' in msnex.data.matrix
        assert msnex.data.matrix['Zarathrustra'][0] == '?'
示例#24
0
class Test_TallyBySite(unittest.TestCase):
    def setUp(self):
        self.nex = NexusReader()
        self.nex.read_string("""Begin data;
            Dimensions ntax=3 nchar=6;
            Format datatype=standard symbols="12" gap=-;
            Matrix
            Harry              0111-?
            Simon              0011-?
            Elvis              0001-?
            ;""")

    def test_errorcheck(self):
        self.assertRaises(TypeError, tally_by_site, "I am a string")
        self.assertRaises(TypeError, tally_by_site, 0)

    def test_tally_by_site(self):
        tally = tally_by_site(self.nex)
        # 000
        assert 'Harry' in tally[0]['0']
        assert 'Simon' in tally[0]['0']
        assert 'Elvis' in tally[0]['0']
        # 100
        assert 'Harry' in tally[1]['1']
        assert 'Simon' in tally[1]['0']
        assert 'Elvis' in tally[1]['0']
        # 110
        assert 'Harry' in tally[2]['1']
        assert 'Simon' in tally[2]['1']
        assert 'Elvis' in tally[2]['0']
        # 111
        assert 'Harry' in tally[3]['1']
        assert 'Simon' in tally[3]['1']
        assert 'Elvis' in tally[3]['1']
        # ---
        assert 'Harry' in tally[4]['-']
        assert 'Simon' in tally[4]['-']
        assert 'Elvis' in tally[4]['-']
        # ???
        assert 'Harry' in tally[5]['?']
        assert 'Simon' in tally[5]['?']
        assert 'Elvis' in tally[5]['?']
 def test_count_other_values_two(self):
     expected = {"Harry": 1, "Simon": 2, "Peter": 1, "Betty": 0, "Louise": 0}
     nexus = NexusReader()
     nexus.read_string(
         """#NEXUS 
     Begin data;
     Dimensions ntax=5 nchar=3;
     Format datatype=standard symbols="01" gap=-;
     Matrix
     Harry              0A0  [No missing]
     Simon              0AB  [one missing]
     Peter              0-B  [one gap]
     Betty              ?-1  [one gap and one missing = 2 missing]
     Louise             ???  [three missing]
         ;
     End;
     """
     )
     count = count_site_values(nexus, ["A", "B"])
     for taxon in count:
         assert count[taxon] == expected[taxon]
 def test_count_other_values_one(self):
     expected = {
         'Harry': 1, 'Simon': 1, 'Peter': 0, 'Betty': 0, 'Louise': 0
     }
     nexus = NexusReader()
     nexus.read_string("""#NEXUS
     Begin data;
     Dimensions ntax=5 nchar=3;
     Format datatype=standard symbols="01" gap=-;
     Matrix
     Harry              0A0  [No missing]
     Simon              0A0  [one missing]
     Peter              0-0  [one gap]
     Betty              ?-1  [one gap and one missing = 2 missing]
     Louise             ???  [three missing]
         ;
     End;
     """)
     count = count_site_values(nexus, 'A')
     for taxon in count:
         assert count[taxon] == expected[taxon]
class Test_TallyByTaxon(unittest.TestCase):
    def setUp(self):
        self.nex = NexusReader()
        self.nex.read_string(
            """Begin data;
            Dimensions ntax=3 nchar=6;
            Format datatype=standard symbols="12" gap=-;
            Matrix
            Harry              0111-?
            Simon              0011-?
            Elvis              0001-?
            ;"""
        )
    
    def test_errorcheck(self):
        self.assertRaises(TypeError, tally_by_taxon, "I am a string")
        self.assertRaises(TypeError, tally_by_taxon, 0)
    
    def test_tally_by_taxon(self):
        tally = tally_by_taxon(self.nex)
        # sites that are zero
        assert tally['Harry']['0'] == [0]
        assert tally['Simon']['0'] == [0, 1]
        assert tally['Elvis']['0'] == [0, 1, 2]
        
        # sites that are 1
        assert tally['Harry']['1'] == [1, 2, 3]
        assert tally['Simon']['1'] == [2, 3]
        assert tally['Elvis']['1'] == [3]
        
        # sites that are -
        assert tally['Harry']['-'] == [4]
        assert tally['Simon']['-'] == [4]
        assert tally['Elvis']['-'] == [4]
        
        # sites that are ?
        assert tally['Harry']['?'] == [5]
        assert tally['Simon']['?'] == [5]
        assert tally['Elvis']['?'] == [5]
示例#28
0
class Test_CountBinarySetSize(unittest.TestCase):
    def setUp(self):
        self.nex = NexusReader()
        self.nex.read_string("""Begin data;
            Dimensions ntax=3 nchar=4;
            Format datatype=standard symbols="12" gap=-;
            Matrix
            Harry              0111
            Simon              0011
            Elvis              0001
            ;""")

    def test_errorcheck(self):
        self.assertRaises(TypeError, count_binary_set_size, "I am a string")
        self.assertRaises(TypeError, count_binary_set_size, 0)

    def test_count_binary_set_size(self):
        tally = count_binary_set_size(self.nex)
        assert tally[0] == 1
        assert tally[1] == 1
        assert tally[2] == 1
        assert tally[3] == 1
class Test_TallyByTaxon(unittest.TestCase):
    def setUp(self):
        self.nex = NexusReader()
        self.nex.read_string("""Begin data;
            Dimensions ntax=3 nchar=6;
            Format datatype=standard symbols="12" gap=-;
            Matrix
            Harry              0111-?
            Simon              0011-?
            Elvis              0001-?
            ;""")

    def test_errorcheck(self):
        self.assertRaises(TypeError, tally_by_taxon, "I am a string")
        self.assertRaises(TypeError, tally_by_taxon, 0)

    def test_tally_by_taxon(self):
        tally = tally_by_taxon(self.nex)
        # sites that are zero
        assert tally['Harry']['0'] == [0]
        assert tally['Simon']['0'] == [0, 1]
        assert tally['Elvis']['0'] == [0, 1, 2]

        # sites that are 1
        assert tally['Harry']['1'] == [1, 2, 3]
        assert tally['Simon']['1'] == [2, 3]
        assert tally['Elvis']['1'] == [3]

        # sites that are -
        assert tally['Harry']['-'] == [4]
        assert tally['Simon']['-'] == [4]
        assert tally['Elvis']['-'] == [4]

        # sites that are ?
        assert tally['Harry']['?'] == [5]
        assert tally['Simon']['?'] == [5]
        assert tally['Elvis']['?'] == [5]
示例#30
0
class Test_DataHandler_Regression_Mesquite(unittest.TestCase):
    """Regression: Test that we can parse MESQUITE data blocks"""
    def setUp(self):
        self.nex = NexusReader()
        self.nex.read_string("""
        #NEXUS 
        
        Begin data;
        TITLE something;
        Dimensions ntax=2 nchar=2;
        Format datatype=standard symbols="01" gap=-;
        Matrix
        Harry              00
        Simon              01
            ;
        End;
        """)

    def test_attr_find(self):
        assert len(self.nex.data.attributes) == 1

    def test_write(self):
        expected_patterns = [
            '^begin data;$',
            '^\s+TITLE something;$',
            '^\s+dimensions ntax=2 nchar=2;$',
            '^\s+format datatype=standard symbols="01" gap=-;$',
            "^matrix$",
            "^Harry\s+00",
            "^Simon\s+01$",
            '^\s+;$',
            '^end;$',
        ]
        written = self.nex.write()
        for expected in expected_patterns:
            assert re.search(expected, written,
                             re.MULTILINE), 'Expected "%s"' % expected
class Test_CountBinarySetSize(unittest.TestCase):
    def setUp(self):
        self.nex = NexusReader()
        self.nex.read_string(
            """Begin data;
            Dimensions ntax=3 nchar=4;
            Format datatype=standard symbols="12" gap=-;
            Matrix
            Harry              0111
            Simon              0011
            Elvis              0001
            ;"""
        )
    
    def test_errorcheck(self):
        self.assertRaises(TypeError, count_binary_set_size, "I am a string")
        self.assertRaises(TypeError, count_binary_set_size, 0)
    
    def test_count_binary_set_size(self):
        tally = count_binary_set_size(self.nex)
        assert tally[0] == 1
        assert tally[1] == 1
        assert tally[2] == 1
        assert tally[3] == 1
示例#32
0
class Test_CombineNexuses(unittest.TestCase):
    def setUp(self):
        self.nex1 = NexusReader()
        self.nex1.read_string("""Begin data;
            Dimensions ntax=2 nchar=1;
            Format datatype=standard symbols="12" gap=-;
            Matrix
            Harry              1
            Simon              2
            ;""")
        # set short_filename to test that functionality. If `combine_nexuses`
        # doesn't use `short_filename`, then the nex1 characters will be
        # identified as 1.xx, rather than 0.xx
        self.nex1.short_filename = '0'

        self.nex2 = NexusReader()
        self.nex2.read_string("""Begin data;
            Dimensions ntax=2 nchar=1;
            Format datatype=standard symbols="34" gap=-;
            Matrix
            Harry              3
            Simon              4
            ;""")
        self.nex3 = NexusReader()
        self.nex3.read_string("""Begin data;
            Dimensions ntax=3 nchar=1;
            Format datatype=standard symbols="345" gap=-;
            Matrix
            Betty              3
            Boris              4
            Simon              5
            ;""")

    def test_failure_on_nonlist_1(self):
        self.assertRaises(TypeError, combine_nexuses, "I am not a list")

    def test_failure_on_nonlist_2(self):
        self.assertRaises(TypeError, combine_nexuses, ["hello"])

    def test_combine_simple(self):
        newnex = combine_nexuses([self.nex1, self.nex2])
        assert newnex.data['0.1']['Harry'] == '1'
        assert newnex.data['0.1']['Simon'] == '2'
        assert newnex.data['2.1']['Harry'] == '3'
        assert newnex.data['2.1']['Simon'] == '4'

    def test_combine_simple_generated_matrix(self):
        newnex = combine_nexuses([self.nex1, self.nex2]).write()
        assert re.search(r"""\bSimon\s+24\b""", newnex)
        assert re.search(r"""\bHarry\s+13\b""", newnex)

    def test_combine_simple_generated_formatline(self):
        newnex = combine_nexuses([self.nex1, self.nex2]).write()
        assert re.search(r"""\bNTAX=2\b""", newnex)
        assert re.search(r"""\bNCHAR=2\b""", newnex)
        assert re.search(r'\sSYMBOLS="1234"[\s;]', newnex)

    def test_combine_missing(self):
        newnex = combine_nexuses([self.nex1, self.nex3])
        assert newnex.data['0.1']['Harry'] == '1'
        assert newnex.data['0.1']['Simon'] == '2'
        assert newnex.data['2.1']['Betty'] == '3'
        assert newnex.data['2.1']['Boris'] == '4'

    def test_combine_missing_generated_matrix(self):
        newnex = combine_nexuses([self.nex1, self.nex3]).write()
        assert re.search(r"""\bSimon\s+25\b""", newnex)
        assert re.search(r"""\bHarry\s+1\\?\b""", newnex)
        assert re.search(r"""\bBetty\s+\?3\b""", newnex)
        assert re.search(r"""\bBoris\s+\?4\b""", newnex)

    def test_combine_missing_generated_formatline(self):
        newnex = combine_nexuses([self.nex1, self.nex3]).write()
        assert re.search(r"""\bNTAX=4\b""", newnex)
        assert re.search(r"""\bNCHAR=2\b""", newnex)
        assert re.search(r'\sSYMBOLS="12345"[\s;]', newnex)

    def test_combine_with_character_labels(self):
        n1 = NexusReader()
        n1.read_string("""
            BEGIN DATA;
                DIMENSIONS NTAX=3 NCHAR=3;
                FORMAT DATATYPE=STANDARD MISSING=0 GAP=-  SYMBOLS="123";
                CHARSTATELABELS
                    1 char1,
                    2 char2,
                    3 char3
            ;
            MATRIX
            Tax1         123
            Tax2         123
            Tax3         123
            ;
            """)
        n2 = NexusReader()
        n2.read_string("""
            BEGIN DATA;
                DIMENSIONS NTAX=3 NCHAR=3;
                FORMAT DATATYPE=STANDARD MISSING=0 GAP=-  SYMBOLS="456";
                CHARSTATELABELS
                    1 char1,
                    2 char2,
                    3 char3
            ;
            MATRIX
            Tax1         456
            Tax2         456
            Tax3         456
            ;
            """)
        newnex = combine_nexuses([n1, n2])
        assert re.search(r"""\bNTAX=3\b""", newnex.write())
        assert re.search(r"""\bNCHAR=6\b""", newnex.write())
        assert re.search(r'\sSYMBOLS="123456"[\s;]', newnex.write())

        for tax in [1, 2, 3]:
            assert re.search(r"""\bTax%d\s+123456\b""" % tax, newnex.write())

        counter = 1
        for nex_id in [1, 2]:
            for char_id in [1, 2, 3]:
                assert re.search(
                    r"""\b%d\s+%d.char%d\b""" % (counter, nex_id, char_id),
                    newnex.write(charblock=True))
                counter += 1
示例#33
0
class Test_Multistatise(unittest.TestCase):
    def setUp(self):
        self.nex = NexusReader()
        self.nex.read_string("""
        Begin data;
        Dimensions ntax=4 nchar=4;
        Format datatype=standard symbols="01" gap=-;
        Matrix
        Harry              1000
        Simon              0100
        Betty              0010
        Louise             0001
        ;""")
        self.nex = multistatise(self.nex)

    def test_nexusreader_transformation(self):
        assert isinstance(self.nex, NexusReader), \
            "Nexus_obj should be a NexusReader instance"

    def test_block_find(self):
        assert 'data' in self.nex.blocks

    def test_ntaxa_recovery(self):
        assert self.nex.data.ntaxa == 4

    def test_nchar_recovery(self):
        assert self.nex.data.nchar == 1

    def test_matrix(self):
        assert self.nex.data.matrix['Harry'] == ['A'], self.nex.data.matrix
        assert self.nex.data.matrix['Simon'] == ['B'], self.nex.data.matrix
        assert self.nex.data.matrix['Betty'] == ['C'], self.nex.data.matrix
        assert self.nex.data.matrix['Louise'] == ['D'], self.nex.data.matrix

    def test_regression_include_invisible_taxa(self):
        """Include taxa that have no entries"""
        data = """
        #NEXUS
        
        BEGIN DATA;
            DIMENSIONS  NTAX=15 NCHAR=7;
            FORMAT DATATYPE=STANDARD MISSING=? GAP=- INTERLEAVE=YES;
        MATRIX
        
        Gertrude                0000001
        Debbie                  0001000
        Zarathrustra            0000000
        Christie                0010000
        Benny                   0100000
        Bertha                  0100000
        Craig                   0010000
        Fannie-May              0000010
        Charles                 0010000
        Annik                   1000000
        Frank                   0000010
        Amber                   1000000
        Andreea                 1000000
        Edward                  0000100
        Donald                  0001000
        ;
        END;
        """

        nex = NexusReader()
        nex.read_string(data)
        msnex = multistatise(nex)

        for taxon, sites in msnex.data.matrix.items():
            if taxon[0] == 'Z':
                continue  # will check later

            # first letter of taxa name is the expected character state
            assert taxon[0] == sites[0], \
                "%s should be %s not %s" % (taxon, taxon[0], sites[0])
        # deal with completely missing taxa
        assert 'Zarathrustra' in msnex.data.matrix
        assert msnex.data.matrix['Zarathrustra'][0] == '?'

    def test_error_on_too_many_states(self):
        self.nex = NexusReader()
        self.nex.read_string("""
        Begin data;
        Dimensions ntax=1 nchar=30;
        Format datatype=standard symbols="01" gap=-;
        Matrix
        A   111111111111111111111111111111
        ;""")
        with self.assertRaises(ValueError):
            multistatise(self.nex)
示例#34
0
class Test_CombineNexuses(unittest.TestCase):
    """Test combine_nexuses"""
    def setUp(self):
        self.nex1 = NexusReader()
        self.nex1.read_string(
            """Begin data;
            Dimensions ntax=2 nchar=1;
            Format datatype=standard symbols="12" gap=-;
            Matrix
            Harry              1
            Simon              2
            ;"""
        )
        self.nex2 = NexusReader()
        self.nex2.read_string(
            """Begin data;
            Dimensions ntax=2 nchar=1;
            Format datatype=standard symbols="34" gap=-;
            Matrix
            Harry              3
            Simon              4
            ;"""
        )
        self.nex3 = NexusReader()
        self.nex3.read_string(
            """Begin data;
            Dimensions ntax=3 nchar=1;
            Format datatype=standard symbols="345" gap=-;
            Matrix
            Betty              3
            Boris              4
            Simon              5
            ;"""
        )
    
    def test_failure_on_nonlist_1(self):
        self.assertRaises(TypeError, combine_nexuses, "I am not a list")
        
    def test_failure_on_nonlist_2(self):
        self.assertRaises(TypeError, combine_nexuses, ["hello",]) # should be NexusReader instances
        
    def test_combine_simple(self):
        newnex = combine_nexuses([self.nex1, self.nex2])
        assert newnex.data['1.1']['Harry'] == '1'
        assert newnex.data['1.1']['Simon'] == '2'
        assert newnex.data['2.1']['Harry'] == '3'
        assert newnex.data['2.1']['Simon'] == '4'
    
    def test_combine_simple_generated_matrix(self):
        newnex = combine_nexuses([self.nex1, self.nex2])
        assert re.search(r"""\bSimon\s+24\b""", newnex.write())
        assert re.search(r"""\bHarry\s+13\b""", newnex.write())
    
    def test_combine_simple_generated_formatline(self):
        newnex = combine_nexuses([self.nex1, self.nex2])
        assert re.search(r"""\bNTAX=2\b""", newnex.write())
        assert re.search(r"""\bNCHAR=2\b""", newnex.write())
        assert re.search(r'\sSYMBOLS="1234"[\s;]', newnex.write())
        
    def test_combine_missing(self):
        newnex = combine_nexuses([self.nex1, self.nex3])
        assert newnex.data['1.1']['Harry'] == '1'
        assert newnex.data['1.1']['Simon'] == '2'
        assert newnex.data['2.1']['Betty'] == '3'
        assert newnex.data['2.1']['Boris'] == '4'
        
    def test_combine_missing_generated_matrix(self):
        newnex = combine_nexuses([self.nex1, self.nex3])
        assert re.search(r"""\bSimon\s+25\b""", newnex.write())
        assert re.search(r"""\bHarry\s+1\\?\b""", newnex.write())
        assert re.search(r"""\bBetty\s+\?3\b""", newnex.write())
        assert re.search(r"""\bBoris\s+\?4\b""", newnex.write())
        
    def test_combine_missing_generated_formatline(self):
        newnex = combine_nexuses([self.nex1, self.nex3])
        assert re.search(r"""\bNTAX=4\b""", newnex.write())
        assert re.search(r"""\bNCHAR=2\b""", newnex.write())
        assert re.search(r'\sSYMBOLS="12345"[\s;]', newnex.write())

    def test_combine_with_character_labels(self):
        n1 = NexusReader()
        n1.read_string(
            """
            BEGIN DATA;
                DIMENSIONS NTAX=3 NCHAR=3;
                FORMAT DATATYPE=STANDARD MISSING=0 GAP=-  SYMBOLS="123";
                CHARSTATELABELS
            		1 char1,
            		2 char2,
            		3 char3
            ;
            MATRIX
            Tax1         123
            Tax2         123
            Tax3         123
            ;
            """
        )
        n2 = NexusReader()
        n2.read_string(
            """
            BEGIN DATA;
                DIMENSIONS NTAX=3 NCHAR=3;
                FORMAT DATATYPE=STANDARD MISSING=0 GAP=-  SYMBOLS="456";
                CHARSTATELABELS
            		1 char1,
            		2 char2,
            		3 char3
            ;
            MATRIX
            Tax1         456
            Tax2         456
            Tax3         456
            ;
            """
        )
        newnex = combine_nexuses([n1, n2])
        assert re.search(r"""\bNTAX=3\b""", newnex.write())
        assert re.search(r"""\bNCHAR=6\b""", newnex.write())
        assert re.search(r'\sSYMBOLS="123456"[\s;]', newnex.write())
        
        for tax in [1,2,3]:
            assert re.search(r"""\bTax%d\s+123456\b""" % tax, newnex.write())
        
        counter = 1
        for nex_id in [1,2]:
            for char_id in [1,2,3]:
                assert re.search(
                    r"""\b%d\s+%d.char%d\b""" % (counter, nex_id, char_id), 
                    newnex.write(charblock=True)
                )
                counter += 1
示例#35
0
class Test_Multistatise(unittest.TestCase):
    """Test multistatise"""
    def setUp(self):
        self.nex = NexusReader()
        self.nex.read_string(
        """Begin data;
        Dimensions ntax=4 nchar=4;
        Format datatype=standard symbols="01" gap=-;
        Matrix
        Harry              1000
        Simon              0100
        Betty              0010
        Louise             0001
        ;""")
        self.nex = multistatise(self.nex)
        
    def test_nexusreader_transformation(self):
        assert isinstance(self.nex, NexusReader), "Nexus_obj should be a NexusReader instance"

    def test_block_find(self):
        assert 'data' in self.nex.blocks
    
    def test_ntaxa_recovery(self):
        assert self.nex.data.ntaxa == 4
        
    def test_nchar_recovery(self):
        assert self.nex.data.nchar == 1
        
    def test_matrix(self):
        assert self.nex.data.matrix['Harry'][0] == 'A'
        assert self.nex.data.matrix['Simon'][0] == 'B'
        assert self.nex.data.matrix['Betty'][0] == 'C'
        assert self.nex.data.matrix['Louise'][0] == 'D'
    
    
    def test_regression_include_invisible_taxa(self):
        """Include taxa that have no entries"""
        data = """
        #NEXUS
        
        BEGIN DATA;
            DIMENSIONS  NTAX=15 NCHAR=7;
            FORMAT DATATYPE=STANDARD MISSING=? GAP=- INTERLEAVE=YES;
        MATRIX
        
        Gertrude                0000001
        Debbie                  0001000
        Zarathrustra            0000000
        Christie                0010000
        Benny                   0100000
        Bertha                  0100000
        Craig                   0010000
        Fannie-May              0000010
        Charles                 0010000
        Annik                   1000000
        Frank                   0000010
        Amber                   1000000
        Andreea                 1000000
        Edward                  0000100
        Donald                  0001000
        ;
        END;
        """
        
        nex = NexusReader()
        nex.read_string(data)
        msnex = multistatise(nex)
        
        for taxon,sites in msnex.data.matrix.items():
            if taxon[0] == 'Z':
                continue # will check later
            
            # first letter of taxa name is the expected character state
            assert taxon[0] == sites[0], "%s should be %s not %s" % (taxon, taxon[0], sites[0])
        # deal with completely missing taxa
        assert 'Zarathrustra' in msnex.data.matrix
        assert msnex.data.matrix['Zarathrustra'][0] == '?'