def snpMatrixGenerator(sourceFile, destFile, recordAll=False, recordRandomSample=True): if recordAll == recordRandomSample: print "Invalid Options" exit() destNexus = NexusWriter() block = "" snpCol = 0 for line in sourceFile: if all(x in line.lower() for x in {"begin", "data"}): sourceNexus = NexusReader() sourceNexus.read_string(block) if "data" in sourceNexus.blocks: snpCol = _findDifferences(sourceNexus, destNexus, snpCol, recordAll, recordRandomSample) block = line else: block += line sourceNexus = NexusReader() sourceNexus.read_string(block) if "data" in sourceNexus.blocks: snpCol = _findDifferences(sourceNexus, destNexus, snpCol, recordAll, recordRandomSample) destFile.write(destNexus.make_nexus() + '\n') destFile.close() sourceFile.close()
def snpMatrixGenerator(sourceFile, destFile, recordAll=False, recordRandomSample=True): if recordAll == recordRandomSample: print "Invalid Options" exit() destNexus = NexusWriter() block = "" snpCol = 0 for line in sourceFile: if all(x in line.lower() for x in {"begin", "data"}): sourceNexus = NexusReader() sourceNexus.read_string(block) if "data" in sourceNexus.blocks: snpCol = _findDifferences(sourceNexus, destNexus, snpCol, recordAll, recordRandomSample) block = line else: block += line sourceNexus = NexusReader() sourceNexus.read_string(block) if "data" in sourceNexus.blocks: snpCol = _findDifferences(sourceNexus, destNexus, snpCol, recordAll, recordRandomSample) destFile.write(destNexus.make_nexus() + '\n') destFile.close() sourceFile.close()
def test_count_other_values_two(self): expected = { 'Harry': 1, 'Simon': 2, 'Peter': 1, 'Betty': 0, 'Louise': 0 } nexus = NexusReader() nexus.read_string("""#NEXUS Begin data; Dimensions ntax=5 nchar=3; Format datatype=standard symbols="01" gap=-; Matrix Harry 0A0 [No missing] Simon 0AB [one missing] Peter 0-B [one gap] Betty ?-1 [one gap and one missing = 2 missing] Louise ??? [three missing] ; End; """) count = count_site_values(nexus, ['A', 'B']) for taxon in count: assert count[taxon] == expected[taxon]
def test_find_unique_sites_2(self): nexus = NexusReader() nexus.read_string("""Begin data; Dimensions ntax=4 nchar=7; Format datatype=standard symbols="01" gap=-; Matrix Harry 10000?- Simon 1100011 Betty 1110000 Louise 1111000 ;""") unique = find_unique_sites(nexus) # site 1 should NOT be in the uniques (3x1 and 1x0) # - i.e. are we ignoring sites with ONE absent taxon assert 1 not in unique # these should also NOT be in unique assert 0 not in unique assert 2 not in unique assert 4 not in unique # constant # site 3 is a simple unique site - check we found it assert 3 in unique # sites 5 and 6 should also be unique # - are we handling missing data appropriately? assert 5 in unique assert 6 in unique
def test_find_unique_sites_2(self): nexus = NexusReader() nexus.read_string("""Begin data; Dimensions ntax=4 nchar=7; Format datatype=standard symbols="01" gap=-; Matrix Harry 10000?- Simon 1100011 Betty 1110000 Louise 1111000 ;""") unique = find_unique_sites(nexus) # site 1 should NOT be in the uniques (3x1 and 1x0) # - i.e. are we ignoring sites with ONE absent taxon assert 1 not in unique # these should also NOT be in unique assert 0 not in unique assert 2 not in unique assert 4 not in unique # constant # site 3 is a simple unique site - check we found it assert 3 in unique # sites 5 and 6 should also be unique # - are we handling missing data appropriately? assert 5 in unique assert 6 in unique
def test_read_string(self): handle = open(os.path.join(EXAMPLE_DIR, 'example.nex')) data = handle.read() handle.close() nex = NexusReader() nex.read_string(data) assert 'data' in nex.blocks assert 'Simon' in nex.blocks['data'].matrix
def test_read_string(self): handle = open(os.path.join(EXAMPLE_DIR, 'example.nex')) data = handle.read() handle.close() nex = NexusReader() nex.read_string(data) assert 'data' in nex.blocks assert 'Simon' in nex.blocks['data'].matrix
def test_notimplemented_exception(self): with self.assertRaises(NotImplementedError): nex = NexusReader() nex.read_string("""Begin something; Dimensions ntax=5 nchar=1; Format datatype=standard symbols="01" gap=-; Matrix Harry 1 ;""") anonymise(nex)
def test_notimplemented_exception(self): with self.assertRaises(NotImplementedError): nex = NexusReader() nex.read_string( """Begin something; Dimensions ntax=5 nchar=1; Format datatype=standard symbols="01" gap=-; Matrix Harry 1 ;""") anonymise(nex)
def test_combine_with_character_labels(self): n1 = NexusReader() n1.read_string( """ BEGIN DATA; DIMENSIONS NTAX=3 NCHAR=3; FORMAT DATATYPE=STANDARD MISSING=0 GAP=- SYMBOLS="123"; CHARSTATELABELS 1 char1, 2 char2, 3 char3 ; MATRIX Tax1 123 Tax2 123 Tax3 123 ; """ ) n2 = NexusReader() n2.read_string( """ BEGIN DATA; DIMENSIONS NTAX=3 NCHAR=3; FORMAT DATATYPE=STANDARD MISSING=0 GAP=- SYMBOLS="456"; CHARSTATELABELS 1 char1, 2 char2, 3 char3 ; MATRIX Tax1 456 Tax2 456 Tax3 456 ; """ ) newnex = combine_nexuses([n1, n2]) assert re.search(r"""\bNTAX=3\b""", newnex.write()) assert re.search(r"""\bNCHAR=6\b""", newnex.write()) assert re.search(r'\sSYMBOLS="123456"[\s;]', newnex.write()) for tax in [1,2,3]: assert re.search(r"""\bTax%d\s+123456\b""" % tax, newnex.write()) counter = 1 for nex_id in [1,2]: for char_id in [1,2,3]: assert re.search( r"""\b%d\s+%d.char%d\b""" % (counter, nex_id, char_id), newnex.write(charblock=True) ) counter += 1
def test_incorrect_dimensions_warnings_nchar(self): with warnings.catch_warnings(record=True) as w: nex = NexusReader() nex.read_string("""Begin data; Dimensions ntax=1 nchar=5; Format datatype=standard symbols="01" gap=-; Matrix Harry 1 ;""") assert len(w) == 1, 'Expected 1 warning, got %r' % w assert issubclass(w[-1].category, UserWarning) assert "Expected" in str(w[-1].message) assert nex.data.nchar == 1
def test_incorrect_dimensions_warnings_nchar(self): with warnings.catch_warnings(record=True) as w: nex = NexusReader() nex.read_string( """Begin data; Dimensions ntax=1 nchar=5; Format datatype=standard symbols="01" gap=-; Matrix Harry 1 ;""") assert len(w) == 1, 'Expected 1 warning, got %r' % w assert issubclass(w[-1].category, UserWarning) assert "Expected" in str(w[-1].message) assert nex.data.nchar == 1
def test_labelled_unrooted(self): nex = NexusReader() nex.read_string(""" #NEXUS begin trees; translate 0 Tom, 1 Simon, 2 Fred; tree unrooted [U] = (0,1,2); end; """) assert len(nex.trees.trees) == 1 assert nex.trees.trees == ['tree unrooted [U] = (0,1,2);']
def test_treelabel(self): nex = NexusReader() nex.read_string(""" #NEXUS begin trees; translate 0 Tom, 1 Simon, 2 Fred; tree TREEONE = (0,1,2); end; """) assert len(nex.trees.trees) == 1 assert nex.trees.trees == ['tree TREEONE = (0,1,2);']
def test_combine_with_character_labels(self): n1 = NexusReader() n1.read_string(""" BEGIN DATA; DIMENSIONS NTAX=3 NCHAR=3; FORMAT DATATYPE=STANDARD MISSING=0 GAP=- SYMBOLS="123"; CHARSTATELABELS 1 char1, 2 char2, 3 char3 ; MATRIX Tax1 123 Tax2 123 Tax3 123 ; """) n2 = NexusReader() n2.read_string(""" BEGIN DATA; DIMENSIONS NTAX=3 NCHAR=3; FORMAT DATATYPE=STANDARD MISSING=0 GAP=- SYMBOLS="456"; CHARSTATELABELS 1 char1, 2 char2, 3 char3 ; MATRIX Tax1 456 Tax2 456 Tax3 456 ; """) newnex = combine_nexuses([n1, n2]) assert re.search(r"""\bNTAX=3\b""", newnex.write()) assert re.search(r"""\bNCHAR=6\b""", newnex.write()) assert re.search(r'\sSYMBOLS="123456"[\s;]', newnex.write()) for tax in [1, 2, 3]: assert re.search(r"""\bTax%d\s+123456\b""" % tax, newnex.write()) counter = 1 for nex_id in [1, 2]: for char_id in [1, 2, 3]: assert re.search( r"""\b%d\s+%d.char%d\b""" % (counter, nex_id, char_id), newnex.write(charblock=True)) counter += 1
class Test_Binarise(unittest.TestCase): def setUp(self): self.nex = NexusReader() self.nex.read_string( """Begin data; Dimensions ntax=3 nchar=2; Format datatype=standard symbols="01" gap=-; Charstatelabels 1 char1, 2 char2; Matrix Maori 14 Dutch 25 Latin 36 ;""") self.nex = binarise(self.nex) def test_to_binary(self): """Test Nexus -> Binary: Two Character""" expected = { 'char1_0': {"Maori": '1', "Dutch": "0", "Latin": "0"}, 'char1_1': {"Maori": '0', "Dutch": "1", "Latin": "0"}, 'char1_2': {"Maori": '0', "Dutch": "0", "Latin": "1"}, 'char2_0': {"Maori": '1', "Dutch": "0", "Latin": "0"}, 'char2_1': {"Maori": '0', "Dutch": "1", "Latin": "0"}, 'char2_2': {"Maori": '0', "Dutch": "0", "Latin": "1"}, } for char, data in expected.items(): for taxon, exp_value in data.items(): assert self.nex.data[char][taxon] == exp_value def test_to_binary_nchar(self): """Test Nexus -> Binary: Number of Characters""" assert len(self.nex.characters) == 6 def test_to_binary_symbollist(self): """Test Nexus -> Binary: Update Symbol List""" # check symbol list was updated assert len(self.nex.symbols) == 2 assert '1' in self.nex.symbols assert '0' in self.nex.symbols def test_to_binary_nexus(self): """Test Nexus -> Binary: Nexus""" nexus = self.nex.make_nexus(interleave=False) assert re.search("Dutch\s+010010", nexus) assert re.search("Maori\s+100100", nexus) assert re.search("Latin\s+001001", nexus)
def test_ok_starting_with_one(self): nex = NexusReader() nex.read_string(""" #NEXUS begin trees; translate 1 Tom, 2 Simon, 3 Fred; tree tree = (1,2,3) end; """) assert len(nex.trees.translators) == 3 assert '1' in nex.trees.translators assert '2' in nex.trees.translators assert '3' in nex.trees.translators
def test_ok_starting_with_zero(self): nex = NexusReader() nex.read_string(""" #NEXUS begin trees; translate 0 Tom, 1 Simon, 2 Fred; tree tree = (0,1,2) end; """) assert len(nex.trees.translators) == 3 assert '0' in nex.trees.translators assert '1' in nex.trees.translators assert '2' in nex.trees.translators
class Test_DataHandler_Regression_Mesquite(unittest.TestCase): """Regression: Test that we can parse MESQUITE data blocks""" def setUp(self): self.nex = NexusReader() self.nex.read_string(""" #NEXUS Begin data; TITLE Untitled_Block_of_Taxa; LINK Taxa = Untitled_Block_of_Taxa; Dimensions ntax=2 nchar=2; Format datatype=standard gap=- symbols="01"; Matrix Harry 00 Simon 01 ; End; """) def test_attributes(self): assert len(self.nex.data.attributes) == 2 assert self.nex.data.attributes[0] == \ """TITLE Untitled_Block_of_Taxa;""" assert self.nex.data.attributes[1] == \ """LINK Taxa = Untitled_Block_of_Taxa;""" def test_write(self): expected_patterns = [ '^begin data;$', '^\s+TITLE Untitled_Block_of_Taxa;$', '^\s+LINK Taxa = Untitled_Block_of_Taxa;$', '^\s+dimensions ntax=2 nchar=2;$', '^\s+format datatype=standard gap=- symbols="01";$', "^matrix$", "^Harry\s+00", "^Simon\s+01$", '^\s+;$', '^end;$', ] written = self.nex.write() for expected in expected_patterns: assert re.search(expected, written, re.MULTILINE), \ 'Expected "%s"' % expected
class Test_TallyBySite(unittest.TestCase): def setUp(self): self.nex = NexusReader() self.nex.read_string( """Begin data; Dimensions ntax=3 nchar=6; Format datatype=standard symbols="12" gap=-; Matrix Harry 0111-? Simon 0011-? Elvis 0001-? ;""" ) def test_errorcheck(self): self.assertRaises(TypeError, tally_by_site, "I am a string") self.assertRaises(TypeError, tally_by_site, 0) def test_tally_by_site(self): tally = tally_by_site(self.nex) # 000 assert 'Harry' in tally[0]['0'] assert 'Simon' in tally[0]['0'] assert 'Elvis' in tally[0]['0'] # 100 assert 'Harry' in tally[1]['1'] assert 'Simon' in tally[1]['0'] assert 'Elvis' in tally[1]['0'] # 110 assert 'Harry' in tally[2]['1'] assert 'Simon' in tally[2]['1'] assert 'Elvis' in tally[2]['0'] # 111 assert 'Harry' in tally[3]['1'] assert 'Simon' in tally[3]['1'] assert 'Elvis' in tally[3]['1'] # --- assert 'Harry' in tally[4]['-'] assert 'Simon' in tally[4]['-'] assert 'Elvis' in tally[4]['-'] # ??? assert 'Harry' in tally[5]['?'] assert 'Simon' in tally[5]['?'] assert 'Elvis' in tally[5]['?']
class Test_CheckZeros(unittest.TestCase): def setUp(self): self.nex = NexusReader() self.nex.read_string(""" Begin data; Dimensions ntax=4 nchar=8; Format datatype=standard symbols="01" gap=-; Matrix [ 01234567] Harry 01000000 Simon 0010000- Betty 00010-0? Louise 000010?0 ;""") self.found = check_zeros(self.nex) def test_find_zero(self): self.assertIn(0, self.found) def test_find_missing_dash(self): self.assertIn(5, self.found) def test_find_missing_questionmark(self): self.assertIn(6, self.found) def test_find_complex(self): self.assertIn(7, self.found) def test_change_missing(self): found = check_zeros(self.nex, missing=['-']) assert found == [0, 5] def test_change_absence(self): found = check_zeros(self.nex, absences=['1', '0']) assert found == [0, 1, 2, 3, 4, 5, 6, 7] def test_remove_zeros(self): new = remove_zeros(self.nex) assert new.data.nchar == 4 assert new.data.matrix['Harry'] == ['1', '0', '0', '0'] assert new.data.matrix['Simon'] == ['0', '1', '0', '0'] assert new.data.matrix['Betty'] == ['0', '0', '1', '0'] assert new.data.matrix['Louise'] == ['0', '0', '0', '1']
def test_regression_include_invisible_taxa(self): """Include taxa that have no entries""" data = """ #NEXUS BEGIN DATA; DIMENSIONS NTAX=15 NCHAR=7; FORMAT DATATYPE=STANDARD MISSING=? GAP=- INTERLEAVE=YES; MATRIX Gertrude 0000001 Debbie 0001000 Zarathrustra 0000000 Christie 0010000 Benny 0100000 Bertha 0100000 Craig 0010000 Fannie-May 0000010 Charles 0010000 Annik 1000000 Frank 0000010 Amber 1000000 Andreea 1000000 Edward 0000100 Donald 0001000 ; END; """ nex = NexusReader() nex.read_string(data) msnex = multistatise(nex) for taxon, sites in msnex.data.matrix.items(): if taxon[0] == 'Z': continue # will check later # first letter of taxa name is the expected character state assert taxon[0] == sites[0], \ "%s should be %s not %s" % (taxon, taxon[0], sites[0]) # deal with completely missing taxa assert 'Zarathrustra' in msnex.data.matrix assert msnex.data.matrix['Zarathrustra'][0] == '?'
def test_regression_include_invisible_taxa(self): """Include taxa that have no entries""" data = """ #NEXUS BEGIN DATA; DIMENSIONS NTAX=15 NCHAR=7; FORMAT DATATYPE=STANDARD MISSING=? GAP=- INTERLEAVE=YES; MATRIX Gertrude 0000001 Debbie 0001000 Zarathrustra 0000000 Christie 0010000 Benny 0100000 Bertha 0100000 Craig 0010000 Fannie-May 0000010 Charles 0010000 Annik 1000000 Frank 0000010 Amber 1000000 Andreea 1000000 Edward 0000100 Donald 0001000 ; END; """ nex = NexusReader() nex.read_string(data) msnex = multistatise(nex) for taxon, sites in msnex.data.matrix.items(): if taxon[0] == 'Z': continue # will check later # first letter of taxa name is the expected character state assert taxon[0] == sites[0], \ "%s should be %s not %s" % (taxon, taxon[0], sites[0]) # deal with completely missing taxa assert 'Zarathrustra' in msnex.data.matrix assert msnex.data.matrix['Zarathrustra'][0] == '?'
class Test_TallyBySite(unittest.TestCase): def setUp(self): self.nex = NexusReader() self.nex.read_string("""Begin data; Dimensions ntax=3 nchar=6; Format datatype=standard symbols="12" gap=-; Matrix Harry 0111-? Simon 0011-? Elvis 0001-? ;""") def test_errorcheck(self): self.assertRaises(TypeError, tally_by_site, "I am a string") self.assertRaises(TypeError, tally_by_site, 0) def test_tally_by_site(self): tally = tally_by_site(self.nex) # 000 assert 'Harry' in tally[0]['0'] assert 'Simon' in tally[0]['0'] assert 'Elvis' in tally[0]['0'] # 100 assert 'Harry' in tally[1]['1'] assert 'Simon' in tally[1]['0'] assert 'Elvis' in tally[1]['0'] # 110 assert 'Harry' in tally[2]['1'] assert 'Simon' in tally[2]['1'] assert 'Elvis' in tally[2]['0'] # 111 assert 'Harry' in tally[3]['1'] assert 'Simon' in tally[3]['1'] assert 'Elvis' in tally[3]['1'] # --- assert 'Harry' in tally[4]['-'] assert 'Simon' in tally[4]['-'] assert 'Elvis' in tally[4]['-'] # ??? assert 'Harry' in tally[5]['?'] assert 'Simon' in tally[5]['?'] assert 'Elvis' in tally[5]['?']
def test_count_other_values_two(self): expected = {"Harry": 1, "Simon": 2, "Peter": 1, "Betty": 0, "Louise": 0} nexus = NexusReader() nexus.read_string( """#NEXUS Begin data; Dimensions ntax=5 nchar=3; Format datatype=standard symbols="01" gap=-; Matrix Harry 0A0 [No missing] Simon 0AB [one missing] Peter 0-B [one gap] Betty ?-1 [one gap and one missing = 2 missing] Louise ??? [three missing] ; End; """ ) count = count_site_values(nexus, ["A", "B"]) for taxon in count: assert count[taxon] == expected[taxon]
def test_count_other_values_one(self): expected = { 'Harry': 1, 'Simon': 1, 'Peter': 0, 'Betty': 0, 'Louise': 0 } nexus = NexusReader() nexus.read_string("""#NEXUS Begin data; Dimensions ntax=5 nchar=3; Format datatype=standard symbols="01" gap=-; Matrix Harry 0A0 [No missing] Simon 0A0 [one missing] Peter 0-0 [one gap] Betty ?-1 [one gap and one missing = 2 missing] Louise ??? [three missing] ; End; """) count = count_site_values(nexus, 'A') for taxon in count: assert count[taxon] == expected[taxon]
class Test_TallyByTaxon(unittest.TestCase): def setUp(self): self.nex = NexusReader() self.nex.read_string( """Begin data; Dimensions ntax=3 nchar=6; Format datatype=standard symbols="12" gap=-; Matrix Harry 0111-? Simon 0011-? Elvis 0001-? ;""" ) def test_errorcheck(self): self.assertRaises(TypeError, tally_by_taxon, "I am a string") self.assertRaises(TypeError, tally_by_taxon, 0) def test_tally_by_taxon(self): tally = tally_by_taxon(self.nex) # sites that are zero assert tally['Harry']['0'] == [0] assert tally['Simon']['0'] == [0, 1] assert tally['Elvis']['0'] == [0, 1, 2] # sites that are 1 assert tally['Harry']['1'] == [1, 2, 3] assert tally['Simon']['1'] == [2, 3] assert tally['Elvis']['1'] == [3] # sites that are - assert tally['Harry']['-'] == [4] assert tally['Simon']['-'] == [4] assert tally['Elvis']['-'] == [4] # sites that are ? assert tally['Harry']['?'] == [5] assert tally['Simon']['?'] == [5] assert tally['Elvis']['?'] == [5]
class Test_CountBinarySetSize(unittest.TestCase): def setUp(self): self.nex = NexusReader() self.nex.read_string("""Begin data; Dimensions ntax=3 nchar=4; Format datatype=standard symbols="12" gap=-; Matrix Harry 0111 Simon 0011 Elvis 0001 ;""") def test_errorcheck(self): self.assertRaises(TypeError, count_binary_set_size, "I am a string") self.assertRaises(TypeError, count_binary_set_size, 0) def test_count_binary_set_size(self): tally = count_binary_set_size(self.nex) assert tally[0] == 1 assert tally[1] == 1 assert tally[2] == 1 assert tally[3] == 1
class Test_TallyByTaxon(unittest.TestCase): def setUp(self): self.nex = NexusReader() self.nex.read_string("""Begin data; Dimensions ntax=3 nchar=6; Format datatype=standard symbols="12" gap=-; Matrix Harry 0111-? Simon 0011-? Elvis 0001-? ;""") def test_errorcheck(self): self.assertRaises(TypeError, tally_by_taxon, "I am a string") self.assertRaises(TypeError, tally_by_taxon, 0) def test_tally_by_taxon(self): tally = tally_by_taxon(self.nex) # sites that are zero assert tally['Harry']['0'] == [0] assert tally['Simon']['0'] == [0, 1] assert tally['Elvis']['0'] == [0, 1, 2] # sites that are 1 assert tally['Harry']['1'] == [1, 2, 3] assert tally['Simon']['1'] == [2, 3] assert tally['Elvis']['1'] == [3] # sites that are - assert tally['Harry']['-'] == [4] assert tally['Simon']['-'] == [4] assert tally['Elvis']['-'] == [4] # sites that are ? assert tally['Harry']['?'] == [5] assert tally['Simon']['?'] == [5] assert tally['Elvis']['?'] == [5]
class Test_DataHandler_Regression_Mesquite(unittest.TestCase): """Regression: Test that we can parse MESQUITE data blocks""" def setUp(self): self.nex = NexusReader() self.nex.read_string(""" #NEXUS Begin data; TITLE something; Dimensions ntax=2 nchar=2; Format datatype=standard symbols="01" gap=-; Matrix Harry 00 Simon 01 ; End; """) def test_attr_find(self): assert len(self.nex.data.attributes) == 1 def test_write(self): expected_patterns = [ '^begin data;$', '^\s+TITLE something;$', '^\s+dimensions ntax=2 nchar=2;$', '^\s+format datatype=standard symbols="01" gap=-;$', "^matrix$", "^Harry\s+00", "^Simon\s+01$", '^\s+;$', '^end;$', ] written = self.nex.write() for expected in expected_patterns: assert re.search(expected, written, re.MULTILINE), 'Expected "%s"' % expected
class Test_CountBinarySetSize(unittest.TestCase): def setUp(self): self.nex = NexusReader() self.nex.read_string( """Begin data; Dimensions ntax=3 nchar=4; Format datatype=standard symbols="12" gap=-; Matrix Harry 0111 Simon 0011 Elvis 0001 ;""" ) def test_errorcheck(self): self.assertRaises(TypeError, count_binary_set_size, "I am a string") self.assertRaises(TypeError, count_binary_set_size, 0) def test_count_binary_set_size(self): tally = count_binary_set_size(self.nex) assert tally[0] == 1 assert tally[1] == 1 assert tally[2] == 1 assert tally[3] == 1
class Test_CombineNexuses(unittest.TestCase): def setUp(self): self.nex1 = NexusReader() self.nex1.read_string("""Begin data; Dimensions ntax=2 nchar=1; Format datatype=standard symbols="12" gap=-; Matrix Harry 1 Simon 2 ;""") # set short_filename to test that functionality. If `combine_nexuses` # doesn't use `short_filename`, then the nex1 characters will be # identified as 1.xx, rather than 0.xx self.nex1.short_filename = '0' self.nex2 = NexusReader() self.nex2.read_string("""Begin data; Dimensions ntax=2 nchar=1; Format datatype=standard symbols="34" gap=-; Matrix Harry 3 Simon 4 ;""") self.nex3 = NexusReader() self.nex3.read_string("""Begin data; Dimensions ntax=3 nchar=1; Format datatype=standard symbols="345" gap=-; Matrix Betty 3 Boris 4 Simon 5 ;""") def test_failure_on_nonlist_1(self): self.assertRaises(TypeError, combine_nexuses, "I am not a list") def test_failure_on_nonlist_2(self): self.assertRaises(TypeError, combine_nexuses, ["hello"]) def test_combine_simple(self): newnex = combine_nexuses([self.nex1, self.nex2]) assert newnex.data['0.1']['Harry'] == '1' assert newnex.data['0.1']['Simon'] == '2' assert newnex.data['2.1']['Harry'] == '3' assert newnex.data['2.1']['Simon'] == '4' def test_combine_simple_generated_matrix(self): newnex = combine_nexuses([self.nex1, self.nex2]).write() assert re.search(r"""\bSimon\s+24\b""", newnex) assert re.search(r"""\bHarry\s+13\b""", newnex) def test_combine_simple_generated_formatline(self): newnex = combine_nexuses([self.nex1, self.nex2]).write() assert re.search(r"""\bNTAX=2\b""", newnex) assert re.search(r"""\bNCHAR=2\b""", newnex) assert re.search(r'\sSYMBOLS="1234"[\s;]', newnex) def test_combine_missing(self): newnex = combine_nexuses([self.nex1, self.nex3]) assert newnex.data['0.1']['Harry'] == '1' assert newnex.data['0.1']['Simon'] == '2' assert newnex.data['2.1']['Betty'] == '3' assert newnex.data['2.1']['Boris'] == '4' def test_combine_missing_generated_matrix(self): newnex = combine_nexuses([self.nex1, self.nex3]).write() assert re.search(r"""\bSimon\s+25\b""", newnex) assert re.search(r"""\bHarry\s+1\\?\b""", newnex) assert re.search(r"""\bBetty\s+\?3\b""", newnex) assert re.search(r"""\bBoris\s+\?4\b""", newnex) def test_combine_missing_generated_formatline(self): newnex = combine_nexuses([self.nex1, self.nex3]).write() assert re.search(r"""\bNTAX=4\b""", newnex) assert re.search(r"""\bNCHAR=2\b""", newnex) assert re.search(r'\sSYMBOLS="12345"[\s;]', newnex) def test_combine_with_character_labels(self): n1 = NexusReader() n1.read_string(""" BEGIN DATA; DIMENSIONS NTAX=3 NCHAR=3; FORMAT DATATYPE=STANDARD MISSING=0 GAP=- SYMBOLS="123"; CHARSTATELABELS 1 char1, 2 char2, 3 char3 ; MATRIX Tax1 123 Tax2 123 Tax3 123 ; """) n2 = NexusReader() n2.read_string(""" BEGIN DATA; DIMENSIONS NTAX=3 NCHAR=3; FORMAT DATATYPE=STANDARD MISSING=0 GAP=- SYMBOLS="456"; CHARSTATELABELS 1 char1, 2 char2, 3 char3 ; MATRIX Tax1 456 Tax2 456 Tax3 456 ; """) newnex = combine_nexuses([n1, n2]) assert re.search(r"""\bNTAX=3\b""", newnex.write()) assert re.search(r"""\bNCHAR=6\b""", newnex.write()) assert re.search(r'\sSYMBOLS="123456"[\s;]', newnex.write()) for tax in [1, 2, 3]: assert re.search(r"""\bTax%d\s+123456\b""" % tax, newnex.write()) counter = 1 for nex_id in [1, 2]: for char_id in [1, 2, 3]: assert re.search( r"""\b%d\s+%d.char%d\b""" % (counter, nex_id, char_id), newnex.write(charblock=True)) counter += 1
class Test_Multistatise(unittest.TestCase): def setUp(self): self.nex = NexusReader() self.nex.read_string(""" Begin data; Dimensions ntax=4 nchar=4; Format datatype=standard symbols="01" gap=-; Matrix Harry 1000 Simon 0100 Betty 0010 Louise 0001 ;""") self.nex = multistatise(self.nex) def test_nexusreader_transformation(self): assert isinstance(self.nex, NexusReader), \ "Nexus_obj should be a NexusReader instance" def test_block_find(self): assert 'data' in self.nex.blocks def test_ntaxa_recovery(self): assert self.nex.data.ntaxa == 4 def test_nchar_recovery(self): assert self.nex.data.nchar == 1 def test_matrix(self): assert self.nex.data.matrix['Harry'] == ['A'], self.nex.data.matrix assert self.nex.data.matrix['Simon'] == ['B'], self.nex.data.matrix assert self.nex.data.matrix['Betty'] == ['C'], self.nex.data.matrix assert self.nex.data.matrix['Louise'] == ['D'], self.nex.data.matrix def test_regression_include_invisible_taxa(self): """Include taxa that have no entries""" data = """ #NEXUS BEGIN DATA; DIMENSIONS NTAX=15 NCHAR=7; FORMAT DATATYPE=STANDARD MISSING=? GAP=- INTERLEAVE=YES; MATRIX Gertrude 0000001 Debbie 0001000 Zarathrustra 0000000 Christie 0010000 Benny 0100000 Bertha 0100000 Craig 0010000 Fannie-May 0000010 Charles 0010000 Annik 1000000 Frank 0000010 Amber 1000000 Andreea 1000000 Edward 0000100 Donald 0001000 ; END; """ nex = NexusReader() nex.read_string(data) msnex = multistatise(nex) for taxon, sites in msnex.data.matrix.items(): if taxon[0] == 'Z': continue # will check later # first letter of taxa name is the expected character state assert taxon[0] == sites[0], \ "%s should be %s not %s" % (taxon, taxon[0], sites[0]) # deal with completely missing taxa assert 'Zarathrustra' in msnex.data.matrix assert msnex.data.matrix['Zarathrustra'][0] == '?' def test_error_on_too_many_states(self): self.nex = NexusReader() self.nex.read_string(""" Begin data; Dimensions ntax=1 nchar=30; Format datatype=standard symbols="01" gap=-; Matrix A 111111111111111111111111111111 ;""") with self.assertRaises(ValueError): multistatise(self.nex)
class Test_CombineNexuses(unittest.TestCase): """Test combine_nexuses""" def setUp(self): self.nex1 = NexusReader() self.nex1.read_string( """Begin data; Dimensions ntax=2 nchar=1; Format datatype=standard symbols="12" gap=-; Matrix Harry 1 Simon 2 ;""" ) self.nex2 = NexusReader() self.nex2.read_string( """Begin data; Dimensions ntax=2 nchar=1; Format datatype=standard symbols="34" gap=-; Matrix Harry 3 Simon 4 ;""" ) self.nex3 = NexusReader() self.nex3.read_string( """Begin data; Dimensions ntax=3 nchar=1; Format datatype=standard symbols="345" gap=-; Matrix Betty 3 Boris 4 Simon 5 ;""" ) def test_failure_on_nonlist_1(self): self.assertRaises(TypeError, combine_nexuses, "I am not a list") def test_failure_on_nonlist_2(self): self.assertRaises(TypeError, combine_nexuses, ["hello",]) # should be NexusReader instances def test_combine_simple(self): newnex = combine_nexuses([self.nex1, self.nex2]) assert newnex.data['1.1']['Harry'] == '1' assert newnex.data['1.1']['Simon'] == '2' assert newnex.data['2.1']['Harry'] == '3' assert newnex.data['2.1']['Simon'] == '4' def test_combine_simple_generated_matrix(self): newnex = combine_nexuses([self.nex1, self.nex2]) assert re.search(r"""\bSimon\s+24\b""", newnex.write()) assert re.search(r"""\bHarry\s+13\b""", newnex.write()) def test_combine_simple_generated_formatline(self): newnex = combine_nexuses([self.nex1, self.nex2]) assert re.search(r"""\bNTAX=2\b""", newnex.write()) assert re.search(r"""\bNCHAR=2\b""", newnex.write()) assert re.search(r'\sSYMBOLS="1234"[\s;]', newnex.write()) def test_combine_missing(self): newnex = combine_nexuses([self.nex1, self.nex3]) assert newnex.data['1.1']['Harry'] == '1' assert newnex.data['1.1']['Simon'] == '2' assert newnex.data['2.1']['Betty'] == '3' assert newnex.data['2.1']['Boris'] == '4' def test_combine_missing_generated_matrix(self): newnex = combine_nexuses([self.nex1, self.nex3]) assert re.search(r"""\bSimon\s+25\b""", newnex.write()) assert re.search(r"""\bHarry\s+1\\?\b""", newnex.write()) assert re.search(r"""\bBetty\s+\?3\b""", newnex.write()) assert re.search(r"""\bBoris\s+\?4\b""", newnex.write()) def test_combine_missing_generated_formatline(self): newnex = combine_nexuses([self.nex1, self.nex3]) assert re.search(r"""\bNTAX=4\b""", newnex.write()) assert re.search(r"""\bNCHAR=2\b""", newnex.write()) assert re.search(r'\sSYMBOLS="12345"[\s;]', newnex.write()) def test_combine_with_character_labels(self): n1 = NexusReader() n1.read_string( """ BEGIN DATA; DIMENSIONS NTAX=3 NCHAR=3; FORMAT DATATYPE=STANDARD MISSING=0 GAP=- SYMBOLS="123"; CHARSTATELABELS 1 char1, 2 char2, 3 char3 ; MATRIX Tax1 123 Tax2 123 Tax3 123 ; """ ) n2 = NexusReader() n2.read_string( """ BEGIN DATA; DIMENSIONS NTAX=3 NCHAR=3; FORMAT DATATYPE=STANDARD MISSING=0 GAP=- SYMBOLS="456"; CHARSTATELABELS 1 char1, 2 char2, 3 char3 ; MATRIX Tax1 456 Tax2 456 Tax3 456 ; """ ) newnex = combine_nexuses([n1, n2]) assert re.search(r"""\bNTAX=3\b""", newnex.write()) assert re.search(r"""\bNCHAR=6\b""", newnex.write()) assert re.search(r'\sSYMBOLS="123456"[\s;]', newnex.write()) for tax in [1,2,3]: assert re.search(r"""\bTax%d\s+123456\b""" % tax, newnex.write()) counter = 1 for nex_id in [1,2]: for char_id in [1,2,3]: assert re.search( r"""\b%d\s+%d.char%d\b""" % (counter, nex_id, char_id), newnex.write(charblock=True) ) counter += 1
class Test_Multistatise(unittest.TestCase): """Test multistatise""" def setUp(self): self.nex = NexusReader() self.nex.read_string( """Begin data; Dimensions ntax=4 nchar=4; Format datatype=standard symbols="01" gap=-; Matrix Harry 1000 Simon 0100 Betty 0010 Louise 0001 ;""") self.nex = multistatise(self.nex) def test_nexusreader_transformation(self): assert isinstance(self.nex, NexusReader), "Nexus_obj should be a NexusReader instance" def test_block_find(self): assert 'data' in self.nex.blocks def test_ntaxa_recovery(self): assert self.nex.data.ntaxa == 4 def test_nchar_recovery(self): assert self.nex.data.nchar == 1 def test_matrix(self): assert self.nex.data.matrix['Harry'][0] == 'A' assert self.nex.data.matrix['Simon'][0] == 'B' assert self.nex.data.matrix['Betty'][0] == 'C' assert self.nex.data.matrix['Louise'][0] == 'D' def test_regression_include_invisible_taxa(self): """Include taxa that have no entries""" data = """ #NEXUS BEGIN DATA; DIMENSIONS NTAX=15 NCHAR=7; FORMAT DATATYPE=STANDARD MISSING=? GAP=- INTERLEAVE=YES; MATRIX Gertrude 0000001 Debbie 0001000 Zarathrustra 0000000 Christie 0010000 Benny 0100000 Bertha 0100000 Craig 0010000 Fannie-May 0000010 Charles 0010000 Annik 1000000 Frank 0000010 Amber 1000000 Andreea 1000000 Edward 0000100 Donald 0001000 ; END; """ nex = NexusReader() nex.read_string(data) msnex = multistatise(nex) for taxon,sites in msnex.data.matrix.items(): if taxon[0] == 'Z': continue # will check later # first letter of taxa name is the expected character state assert taxon[0] == sites[0], "%s should be %s not %s" % (taxon, taxon[0], sites[0]) # deal with completely missing taxa assert 'Zarathrustra' in msnex.data.matrix assert msnex.data.matrix['Zarathrustra'][0] == '?'