def test_find_unique_sites_2(self): nexus = NexusReader() nexus.read_string("""Begin data; Dimensions ntax=4 nchar=7; Format datatype=standard symbols="01" gap=-; Matrix Harry 10000?- Simon 1100011 Betty 1110000 Louise 1111000 ;""") unique = find_unique_sites(nexus) # site 1 should NOT be in the uniques (3x1 and 1x0) # - i.e. are we ignoring sites with ONE absent taxon assert 1 not in unique # these should also NOT be in unique assert 0 not in unique assert 2 not in unique assert 4 not in unique # constant # site 3 is a simple unique site - check we found it assert 3 in unique # sites 5 and 6 should also be unique # - are we handling missing data appropriately? assert 5 in unique assert 6 in unique
def setUp(self): self.nex1 = NexusReader() self.nex1.read_string( """Begin data; Dimensions ntax=2 nchar=1; Format datatype=standard symbols="12" gap=-; Matrix Harry 1 Simon 2 ;""" ) self.nex2 = NexusReader() self.nex2.read_string( """Begin data; Dimensions ntax=2 nchar=1; Format datatype=standard symbols="34" gap=-; Matrix Harry 3 Simon 4 ;""" ) self.nex3 = NexusReader() self.nex3.read_string( """Begin data; Dimensions ntax=3 nchar=1; Format datatype=standard symbols="345" gap=-; Matrix Betty 3 Boris 4 Simon 5 ;""" )
def test_read_string(self): handle = open(os.path.join(EXAMPLE_DIR, 'example.nex')) data = handle.read() handle.close() nex = NexusReader() nex.read_string(data) assert 'data' in nex.blocks assert 'Simon' in nex.blocks['data'].matrix
def test_write_to_file(self): tmp = NamedTemporaryFile(delete=False, suffix=".nex") tmp.close() nex = NexusReader(os.path.join(EXAMPLE_DIR, 'example.nex')) nex.write_to_file(tmp.name) assert os.path.isfile(tmp.name) n2 = NexusReader(tmp.name) assert n2.data.matrix == nex.data.matrix assert sorted(n2.data.taxa) == sorted(nex.data.taxa) os.unlink(tmp.name) # cleanup
def test_notimplemented_exception(self): with self.assertRaises(NotImplementedError): nex = NexusReader() nex.read_string( """Begin something; Dimensions ntax=5 nchar=1; Format datatype=standard symbols="01" gap=-; Matrix Harry 1 ;""") anonymise(nex)
def test_incorrect_dimensions_warnings_nchar(self): with warnings.catch_warnings(record=True) as w: nex = NexusReader() nex.read_string( """Begin data; Dimensions ntax=1 nchar=5; Format datatype=standard symbols="01" gap=-; Matrix Harry 1 ;""") assert len(w) == 1, 'Expected 1 warning, got %r' % w assert issubclass(w[-1].category, UserWarning) assert "Expected" in str(w[-1].message) assert nex.data.nchar == 1
def test_treelabel(self): nex = NexusReader() nex.read_string(""" #NEXUS begin trees; translate 0 Tom, 1 Simon, 2 Fred; tree TREEONE = (0,1,2); end; """) assert len(nex.trees.trees) == 1 assert nex.trees.trees == ['tree TREEONE = (0,1,2);']
def test_labelled_unrooted(self): nex = NexusReader() nex.read_string(""" #NEXUS begin trees; translate 0 Tom, 1 Simon, 2 Fred; tree unrooted [U] = (0,1,2); end; """) assert len(nex.trees.trees) == 1 assert nex.trees.trees == ['tree unrooted [U] = (0,1,2);']
class Test_Binarise(unittest.TestCase): def setUp(self): self.nex = NexusReader() self.nex.read_string( """Begin data; Dimensions ntax=3 nchar=2; Format datatype=standard symbols="01" gap=-; Charstatelabels 1 char1, 2 char2; Matrix Maori 14 Dutch 25 Latin 36 ;""") self.nex = binarise(self.nex) def test_to_binary(self): """Test Nexus -> Binary: Two Character""" expected = { 'char1_0': {"Maori": '1', "Dutch": "0", "Latin": "0"}, 'char1_1': {"Maori": '0', "Dutch": "1", "Latin": "0"}, 'char1_2': {"Maori": '0', "Dutch": "0", "Latin": "1"}, 'char2_0': {"Maori": '1', "Dutch": "0", "Latin": "0"}, 'char2_1': {"Maori": '0', "Dutch": "1", "Latin": "0"}, 'char2_2': {"Maori": '0', "Dutch": "0", "Latin": "1"}, } for char, data in expected.items(): for taxon, exp_value in data.items(): assert self.nex.data[char][taxon] == exp_value def test_to_binary_nchar(self): """Test Nexus -> Binary: Number of Characters""" assert len(self.nex.characters) == 6 def test_to_binary_symbollist(self): """Test Nexus -> Binary: Update Symbol List""" # check symbol list was updated assert len(self.nex.symbols) == 2 assert '1' in self.nex.symbols assert '0' in self.nex.symbols def test_to_binary_nexus(self): """Test Nexus -> Binary: Nexus""" nexus = self.nex.make_nexus(interleave=False) assert re.search("Dutch\s+010010", nexus) assert re.search("Maori\s+100100", nexus) assert re.search("Latin\s+001001", nexus)
class Test_TaxaHandler_Regression_Mesquite(unittest.TestCase): """Regression: Test that we can parse MESQUITE taxa blocks""" def setUp(self): self.nex = NexusReader(os.path.join(REGRESSION_DIR, 'mesquite_taxa_block.nex')) def test_taxa_block(self): for taxon in ['A', 'B', 'C']: assert taxon in self.nex.taxa # did we get the right number of taxa in the matrix? assert self.nex.taxa.ntaxa == len(self.nex.taxa.taxa) == 3 def test_taxa_block_attributes(self): assert 'taxa' in self.nex.blocks assert len(self.nex.taxa.attributes) == 1 assert 'TITLE Untitled_Block_of_Taxa;' in self.nex.taxa.attributes def test_write(self): expected_patterns = [ '^begin taxa;$', '^\s+TITLE Untitled_Block_of_Taxa;$', '^\s+dimensions ntax=3;$', '^\s+taxlabels$', "^\s+\[1\] 'A'$", "^\s+\[2\] 'B'$", "^\s+\[3\] 'C'$", '^;$', '^end;$', ] written = self.nex.write() for expected in expected_patterns: assert re.search(expected, written, re.MULTILINE), 'Expected "%s"' % expected
class Test_TreeHandler_Regression_Mesquite(unittest.TestCase): """Regression: Test that we can parse MESQUITE taxa blocks""" def setUp(self): self.nex = NexusReader( os.path.join(REGRESSION_DIR, 'mesquite_formatted_branches.trees') ) def test_attributes(self): assert len(self.nex.trees.attributes) == 2 assert self.nex.trees.attributes[0] == \ """Title 'Trees from "temp.trees"';""" assert self.nex.trees.attributes[1] == \ """LINK Taxa = Untitled_Block_of_Taxa;""" def test_found_trees(self): assert self.nex.trees.ntrees == 1 def test_found_taxa(self): assert len(self.nex.trees.taxa) == 3 assert 'A' in self.nex.trees.taxa assert 'B' in self.nex.trees.taxa assert 'C' in self.nex.trees.taxa def test_was_translated(self): assert self.nex.trees.was_translated def test_translation(self): assert self.nex.trees.translators['1'] == 'A' assert self.nex.trees.translators['2'] == 'B' assert self.nex.trees.translators['3'] == 'C' def test_write(self): written = self.nex.write() assert """Title 'Trees from "temp.trees"';""" in written assert """LINK Taxa = Untitled_Block_of_Taxa;""" in written
def test_ok_starting_with_zero(self): nex = NexusReader() nex.read_string(""" #NEXUS begin trees; translate 0 Tom, 1 Simon, 2 Fred; tree tree = (0,1,2) end; """) assert len(nex.trees.translators) == 3 assert '0' in nex.trees.translators assert '1' in nex.trees.translators assert '2' in nex.trees.translators
def test_ok_starting_with_one(self): nex = NexusReader() nex.read_string(""" #NEXUS begin trees; translate 1 Tom, 2 Simon, 3 Fred; tree tree = (1,2,3) end; """) assert len(nex.trees.translators) == 3 assert '1' in nex.trees.translators assert '2' in nex.trees.translators assert '3' in nex.trees.translators
def test_generic_readwrite(self): expected = """Begin data; Dimensions ntax=4 nchar=2; Format datatype=standard symbols="01" gap=-; Matrix Harry 00 Simon 01 Betty 10 Louise 11 ; """.split("\n") nex = NexusReader() nex.handlers['data'] = GenericHandler nex.read_file(os.path.join(EXAMPLE_DIR, 'example.nex')) for line in nex.data.write().split("\n"): e = expected.pop(0).strip() assert line.strip() == e
class Test_DataHandler_Regression_Mesquite(unittest.TestCase): """Regression: Test that we can parse MESQUITE data blocks""" def setUp(self): self.nex = NexusReader() self.nex.read_string(""" #NEXUS Begin data; TITLE Untitled_Block_of_Taxa; LINK Taxa = Untitled_Block_of_Taxa; Dimensions ntax=2 nchar=2; Format datatype=standard gap=- symbols="01"; Matrix Harry 00 Simon 01 ; End; """) def test_attributes(self): assert len(self.nex.data.attributes) == 2 assert self.nex.data.attributes[0] == \ """TITLE Untitled_Block_of_Taxa;""" assert self.nex.data.attributes[1] == \ """LINK Taxa = Untitled_Block_of_Taxa;""" def test_write(self): expected_patterns = [ '^begin data;$', '^\s+TITLE Untitled_Block_of_Taxa;$', '^\s+LINK Taxa = Untitled_Block_of_Taxa;$', '^\s+dimensions ntax=2 nchar=2;$', '^\s+format datatype=standard gap=- symbols="01";$', "^matrix$", "^Harry\s+00", "^Simon\s+01$", '^\s+;$', '^end;$', ] written = self.nex.write() for expected in expected_patterns: assert re.search(expected, written, re.MULTILINE), \ 'Expected "%s"' % expected
class Test_TallyBySite(unittest.TestCase): def setUp(self): self.nex = NexusReader() self.nex.read_string( """Begin data; Dimensions ntax=3 nchar=6; Format datatype=standard symbols="12" gap=-; Matrix Harry 0111-? Simon 0011-? Elvis 0001-? ;""" ) def test_errorcheck(self): self.assertRaises(TypeError, tally_by_site, "I am a string") self.assertRaises(TypeError, tally_by_site, 0) def test_tally_by_site(self): tally = tally_by_site(self.nex) # 000 assert 'Harry' in tally[0]['0'] assert 'Simon' in tally[0]['0'] assert 'Elvis' in tally[0]['0'] # 100 assert 'Harry' in tally[1]['1'] assert 'Simon' in tally[1]['0'] assert 'Elvis' in tally[1]['0'] # 110 assert 'Harry' in tally[2]['1'] assert 'Simon' in tally[2]['1'] assert 'Elvis' in tally[2]['0'] # 111 assert 'Harry' in tally[3]['1'] assert 'Simon' in tally[3]['1'] assert 'Elvis' in tally[3]['1'] # --- assert 'Harry' in tally[4]['-'] assert 'Simon' in tally[4]['-'] assert 'Elvis' in tally[4]['-'] # ??? assert 'Harry' in tally[5]['?'] assert 'Simon' in tally[5]['?'] assert 'Elvis' in tally[5]['?']
def test_regression_include_invisible_taxa(self): """Include taxa that have no entries""" data = """ #NEXUS BEGIN DATA; DIMENSIONS NTAX=15 NCHAR=7; FORMAT DATATYPE=STANDARD MISSING=? GAP=- INTERLEAVE=YES; MATRIX Gertrude 0000001 Debbie 0001000 Zarathrustra 0000000 Christie 0010000 Benny 0100000 Bertha 0100000 Craig 0010000 Fannie-May 0000010 Charles 0010000 Annik 1000000 Frank 0000010 Amber 1000000 Andreea 1000000 Edward 0000100 Donald 0001000 ; END; """ nex = NexusReader() nex.read_string(data) msnex = multistatise(nex) for taxon, sites in msnex.data.matrix.items(): if taxon[0] == 'Z': continue # will check later # first letter of taxa name is the expected character state assert taxon[0] == sites[0], \ "%s should be %s not %s" % (taxon, taxon[0], sites[0]) # deal with completely missing taxa assert 'Zarathrustra' in msnex.data.matrix assert msnex.data.matrix['Zarathrustra'][0] == '?'
def test_count_other_values_two(self): expected = {"Harry": 1, "Simon": 2, "Peter": 1, "Betty": 0, "Louise": 0} nexus = NexusReader() nexus.read_string( """#NEXUS Begin data; Dimensions ntax=5 nchar=3; Format datatype=standard symbols="01" gap=-; Matrix Harry 0A0 [No missing] Simon 0AB [one missing] Peter 0-B [one gap] Betty ?-1 [one gap and one missing = 2 missing] Louise ??? [three missing] ; End; """ ) count = count_site_values(nexus, ["A", "B"]) for taxon in count: assert count[taxon] == expected[taxon]
def test_count_other_values_one(self): expected = { 'Harry': 1, 'Simon': 1, 'Peter': 0, 'Betty': 0, 'Louise': 0 } nexus = NexusReader() nexus.read_string("""#NEXUS Begin data; Dimensions ntax=5 nchar=3; Format datatype=standard symbols="01" gap=-; Matrix Harry 0A0 [No missing] Simon 0A0 [one missing] Peter 0-0 [one gap] Betty ?-1 [one gap and one missing = 2 missing] Louise ??? [three missing] ; End; """) count = count_site_values(nexus, 'A') for taxon in count: assert count[taxon] == expected[taxon]
def setUp(self): self.nex = NexusReader() self.nex.read_string( """Begin data; Dimensions ntax=3 nchar=4; Format datatype=standard symbols="12" gap=-; Matrix Harry 0111 Simon 0011 Elvis 0001 ;""" )
def snpMatrixGenerator(sourceFile, destFile, recordAll=False, recordRandomSample=True): if recordAll == recordRandomSample: print "Invalid Options" exit() destNexus = NexusWriter() block = "" snpCol = 0 for line in sourceFile: if all(x in line.lower() for x in {"begin", "data"}): sourceNexus = NexusReader() sourceNexus.read_string(block) if "data" in sourceNexus.blocks: snpCol = _findDifferences(sourceNexus, destNexus, snpCol, recordAll, recordRandomSample) block = line else: block += line sourceNexus = NexusReader() sourceNexus.read_string(block) if "data" in sourceNexus.blocks: snpCol = _findDifferences(sourceNexus, destNexus, snpCol, recordAll, recordRandomSample) destFile.write(destNexus.make_nexus() + '\n') destFile.close() sourceFile.close()
def setUp(self): self.nex = NexusReader() self.nex.read_string( """Begin data; Dimensions ntax=4 nchar=4; Format datatype=standard symbols="01" gap=-; Matrix Harry 1000 Simon 0100 Betty 0010 Louise 0001 ;""") self.nex = multistatise(self.nex)
class Test_TallyByTaxon(unittest.TestCase): def setUp(self): self.nex = NexusReader() self.nex.read_string( """Begin data; Dimensions ntax=3 nchar=6; Format datatype=standard symbols="12" gap=-; Matrix Harry 0111-? Simon 0011-? Elvis 0001-? ;""" ) def test_errorcheck(self): self.assertRaises(TypeError, tally_by_taxon, "I am a string") self.assertRaises(TypeError, tally_by_taxon, 0) def test_tally_by_taxon(self): tally = tally_by_taxon(self.nex) # sites that are zero assert tally['Harry']['0'] == [0] assert tally['Simon']['0'] == [0, 1] assert tally['Elvis']['0'] == [0, 1, 2] # sites that are 1 assert tally['Harry']['1'] == [1, 2, 3] assert tally['Simon']['1'] == [2, 3] assert tally['Elvis']['1'] == [3] # sites that are - assert tally['Harry']['-'] == [4] assert tally['Simon']['-'] == [4] assert tally['Elvis']['-'] == [4] # sites that are ? assert tally['Harry']['?'] == [5] assert tally['Simon']['?'] == [5] assert tally['Elvis']['?'] == [5]
def setUp(self): self.nex = NexusReader() self.nex.read_string( """Begin data; Dimensions ntax=3 nchar=2; Format datatype=standard symbols="01" gap=-; Charstatelabels 1 char1, 2 char2; Matrix Maori 14 Dutch 25 Latin 36 ;""") self.nex = binarise(self.nex)
def setUp(self): self.nex = NexusReader() self.nex.read_string(""" #NEXUS Begin data; TITLE something; Dimensions ntax=2 nchar=2; Format datatype=standard symbols="01" gap=-; Matrix Harry 00 Simon 01 ; End; """)
class Test_CountBinarySetSize(unittest.TestCase): def setUp(self): self.nex = NexusReader() self.nex.read_string( """Begin data; Dimensions ntax=3 nchar=4; Format datatype=standard symbols="12" gap=-; Matrix Harry 0111 Simon 0011 Elvis 0001 ;""" ) def test_errorcheck(self): self.assertRaises(TypeError, count_binary_set_size, "I am a string") self.assertRaises(TypeError, count_binary_set_size, 0) def test_count_binary_set_size(self): tally = count_binary_set_size(self.nex) assert tally[0] == 1 assert tally[1] == 1 assert tally[2] == 1 assert tally[3] == 1
def setUp(self): self.nex = NexusReader() self.nex.read_string(""" #NEXUS Begin data; TITLE Untitled_Block_of_Taxa; LINK Taxa = Untitled_Block_of_Taxa; Dimensions ntax=2 nchar=2; Format datatype=standard gap=- symbols="01"; Matrix Harry 00 Simon 01 ; End; """)
def test_combine_with_character_labels(self): n1 = NexusReader() n1.read_string( """ BEGIN DATA; DIMENSIONS NTAX=3 NCHAR=3; FORMAT DATATYPE=STANDARD MISSING=0 GAP=- SYMBOLS="123"; CHARSTATELABELS 1 char1, 2 char2, 3 char3 ; MATRIX Tax1 123 Tax2 123 Tax3 123 ; """ ) n2 = NexusReader() n2.read_string( """ BEGIN DATA; DIMENSIONS NTAX=3 NCHAR=3; FORMAT DATATYPE=STANDARD MISSING=0 GAP=- SYMBOLS="456"; CHARSTATELABELS 1 char1, 2 char2, 3 char3 ; MATRIX Tax1 456 Tax2 456 Tax3 456 ; """ ) newnex = combine_nexuses([n1, n2]) assert re.search(r"""\bNTAX=3\b""", newnex.write()) assert re.search(r"""\bNCHAR=6\b""", newnex.write()) assert re.search(r'\sSYMBOLS="123456"[\s;]', newnex.write()) for tax in [1,2,3]: assert re.search(r"""\bTax%d\s+123456\b""" % tax, newnex.write()) counter = 1 for nex_id in [1,2]: for char_id in [1,2,3]: assert re.search( r"""\b%d\s+%d.char%d\b""" % (counter, nex_id, char_id), newnex.write(charblock=True) ) counter += 1
def test_remove_sites_set(self): nexus = NexusReader(os.path.join(EXAMPLE_DIR, 'example.nex')) nexus = new_nexus_without_sites(nexus, set([1])) assert len(nexus.data) == 1
help="Number of Characters to Generate") options, args = parser.parse_args() try: nexusname = args[0] except IndexError: print __doc__ print "Author: %s\n" % __author__ parser.print_help() sys.exit() try: newnexus = args[1] except IndexError: newnexus = None if options.numchars != False: try: options.numchars = int(options.numchars) except ValueError: print "numchars needs to be a number!" raise nexus = NexusReader(nexusname) nexus = shufflenexus(nexus, options.numchars) if newnexus is not None: nexus.write_to_file(newnexus) print "New random nexus written to %s" % newnexus else: print nexus.write()
def setUp(self): self.nex = NexusReader( os.path.join(REGRESSION_DIR, 'mesquite_formatted_branches.trees') )
def test_failure_on_required_block_two(self): nexus_obj = NexusReader(os.path.join(EXAMPLE_DIR, 'example2.nex')) with self.assertRaises(NexusFormatException): check_for_valid_NexusReader(nexus_obj, ['r8s'])
def setUp(self): self.nex = NexusReader( os.path.join(REGRESSION_DIR, 'mesquite_formatted_branches.trees'))
def setUp(self): self.nex = NexusReader( os.path.join(REGRESSION_DIR, 'mesquite_taxa_block.nex'))
from nexus import NexusReader from Bio import SeqIO import sys n = NexusReader() n.read_file("Razafimandimbison_AppS1.txt") for taxon, characters in n.data: print(">", taxon) print("".join(characters)[6230:7867]) fasta_in = "Psychotria_rps16.fas" #fastaファイルを読みこみ for record in SeqIO.parse(fasta_in, 'fasta'): id_part = record.id desc_part = record.description seq = record.seq print('>', id_part) print(seq)
def setUp(self): self.nex = NexusReader(os.path.join(EXAMPLE_DIR, 'example.nex'))
class Test_DataHandler_SimpleNexusFormat(unittest.TestCase): expected = { 'Harry': ['0', '0'], 'Simon': ['0', '1'], 'Betty': ['1', '0'], 'Louise': ['1', '1'], } def setUp(self): self.nex = NexusReader(os.path.join(EXAMPLE_DIR, 'example.nex')) def test_block_find(self): assert 'data' in self.nex.blocks assert hasattr(self.nex, 'data') assert self.nex.data == self.nex.data def test_raw(self): assert self.nex.data.block == [ 'Begin data;', 'Dimensions ntax=4 nchar=2;', 'Format datatype=standard symbols="01" gap=-;', 'Matrix', 'Harry 00', 'Simon 01', 'Betty 10', 'Louise 11', ';' ] def test_format_string(self): # did we get the expected tokens in the format string? expected = {'datatype': 'standard', 'gap': '-', 'symbols': '01'} for k, v in expected.items(): assert self.nex.data.format[k] == v, \ "%s should equal %s and not %s" % (k, v, self.nex.data.format[k]) # did we get the right number of tokens? assert len(self.nex.data.format) == len(expected) def test_taxa(self): # did we get the right taxa in the matrix? for taxon in self.expected: assert taxon in self.nex.data.matrix # did we get the right number of taxa in the matrix? assert self.nex.data.ntaxa == len(self.expected) == len( self.nex.data.taxa) def test_characters(self): # did we parse the characters properly? assert self.nex.data.nchar == 2 for taxon, expected in self.expected.items(): assert self.nex.data.matrix[taxon] == expected def test_iterable(self): for taxon, block in self.nex.data: assert block == self.expected[taxon] def test_parse_format_line(self): d = DataHandler() f = d.parse_format_line('Format datatype=standard symbols="01" gap=-;') assert f[ 'datatype'] == 'standard', "Expected 'standard', but got '%s'" % f[ 'datatype'] assert f[ 'symbols'] == '01', "Expected '01', but got '%s'" % f['symbols'] assert f['gap'] == '-', "Expected 'gap', but got '%s'" % f['gap'] f = d.parse_format_line( 'FORMAT datatype=RNA missing=? gap=- symbols="ACGU" labels interleave;' ) assert f['datatype'] == 'rna', "Expected 'rna', but got '%s'" % f[ 'datatype'] assert f['missing'] == '?', "Expected '?', but got '%s'" % f['missing'] assert f['gap'] == '-', "Expected '-', but got '%s'" % f['gap'] assert f['symbols'] == 'acgu', "Expected 'acgu', but got '%s'" % f[ 'symbols'] assert f[ 'labels'] == True, "Expected <True>, but got '%s'" % f['labels'] assert f['interleave'] == True, "Expected <True>, but got '%s'" % f[ 'interleave'] def test_write(self): expected_patterns = [ '^begin data;$', '^\s+dimensions ntax=4 nchar=2;$', '^\s+format datatype=standard symbols="01" gap=-;$', '^matrix$', '^Simon\s+01$', '^Louise\s+11$', '^Betty\s+10$', '^Harry\s+00$', '^\s+;$', '^end;$', ] written = self.nex.write() for expected in expected_patterns: assert re.search(expected, written, re.MULTILINE), 'Expected "%s"' % expected def test__load_characters(self): for site, data in self.nex.data.characters.items(): for taxon, value in data.items(): assert value == self.expected[taxon][site] def test_get_site(self): for i in (0, 1): site_data = self.nex.data.characters[i] for taxon, value in site_data.items(): assert self.expected[taxon][i] == value def test_incorrect_dimensions_warnings_ntaxa(self): nex = NexusReader() with warnings.catch_warnings(record=True) as w: nex.read_string("""Begin data; Dimensions ntax=5 nchar=1; Format datatype=standard symbols="01" gap=-; Matrix Harry 1 ;""") assert len(w) == 1, 'Expected 1 warning, got %r' % w assert issubclass(w[-1].category, UserWarning) assert "Expected" in str(w[-1].message) assert nex.data.nchar == 1 def test_incorrect_dimensions_warnings_nchar(self): with warnings.catch_warnings(record=True) as w: nex = NexusReader() nex.read_string("""Begin data; Dimensions ntax=1 nchar=5; Format datatype=standard symbols="01" gap=-; Matrix Harry 1 ;""") assert len(w) == 1, 'Expected 1 warning, got %r' % w assert issubclass(w[-1].category, UserWarning) assert "Expected" in str(w[-1].message) assert nex.data.nchar == 1
def test_write(self): nex = NexusReader(os.path.join(EXAMPLE_DIR, 'example.trees')) text = open(os.path.join(EXAMPLE_DIR, 'example.trees')).read() assert text == nex.write()
def setUp(self): self.nex = NexusReader(os.path.join(EXAMPLE_DIR, 'example-beast.trees'))
#!/usr/bin/env python #coding=utf-8 import ete3 from nexus import NexusReader if __name__ == '__main__': import argparse parser = argparse.ArgumentParser( description='reroots and cleans Michael et al.') parser.add_argument("input", help='filename') parser.add_argument("output", help='filename') args = parser.parse_args() nex = NexusReader(args.input) # make tree into newick for ete3 tree = nex.trees.trees[0].split(" = ")[1].strip().lstrip() tree = ete3.Tree(tree, format=0) # reroot tree.set_outgroup('Mawe') nex.trees.trees[0] = 'tree tg [&R] = %s' % tree.write(format=5) with open(args.output, 'w') as out: out.write(nex.write())
usage="usage: %prog [-o output.nex] nex1.nex nex2.nex ... nexN.nex") parser.add_option("-o", "--output", dest="output", action="store", default=None, type="string", help="output nexus file") options, nexuslist = parser.parse_args() if len(nexuslist) < 1: print(__doc__) parser.print_help() sys.exit() if options.output is not None: outfile = options.output else: outfile = 'multistate.nex' nexuslist2 = [] for nfile in nexuslist: n = NexusReader(nfile) n = multistatise(n) nexuslist2.append(n) out = combine_nexuses(nexuslist2) out.write_to_file(outfile, charblock=True, interleave=False) print("Written to %s" % outfile)
from ete2 import Tree from nexus import NexusReader import sys import re import csv arguments = sys.argv n = NexusReader(arguments[1]) #First get that resistance data--figure out which strain is resistant to what drug_resistance = csv.DictReader(open(arguments[2]), delimiter='\t', fieldnames=("strain", "drug")) #Then We gather a mapping from name to number number = 1 number_name = {} for i in n.taxa.taxa: number_name[i] = number number += 1 keys = list(number_name.keys()) keys.sort(reverse=True) strain_to_resistance = {} for row in drug_resistance: strain_to_resistance[row['strain']] = row["drug"] #Next get the date strains were taken data to figure out the oldest strain that the tree is based upon #Maha said not to do this so let's default to 2018 # reference_date = csv.DictReader(open(arguments[3]),delimiter='\t', fieldnames = ( "strain", "date"))
class Test_DataHandler_CharacterBlockNexusFormat(unittest.TestCase): def setUp(self): self.nex = NexusReader( os.path.join(EXAMPLE_DIR, 'example-characters.nex')) def test_block_find(self): assert 'data' in self.nex.blocks def test_charblock_find(self): assert hasattr(self.nex.data, 'characters') def test_taxa(self): assert self.nex.data.ntaxa == 5 def test_data(self): assert self.nex.data.nchar == 5 def test_charlabels(self): assert self.nex.data.charlabels[0] == 'CHAR_A' assert self.nex.data.charlabels[1] == 'CHAR_B' assert self.nex.data.charlabels[2] == 'CHAR_C' assert self.nex.data.charlabels[3] == 'CHAR_D' assert self.nex.data.charlabels[4] == 'CHAR_E' def test_label_parsing(self): assert 'CHAR_A' in self.nex.data.characters assert 'CHAR_B' in self.nex.data.characters assert 'CHAR_C' in self.nex.data.characters assert 'CHAR_D' in self.nex.data.characters assert 'CHAR_E' in self.nex.data.characters def test_matrix(self): for taxon in ("A", "B", "C", "D", "E"): for index, expected_value in enumerate(("A", "B", "C", "D", "E")): assert self.nex.data.matrix[taxon][index] == expected_value def test_characters(self): for site in ("A", "B", "C", "D", "E"): # All sites in CHAR_A are state "A", and all in CHAR_B and "B" etc for t in ("A", "B", "C", "D", "E"): assert self.nex.data.characters["CHAR_%s" % site][t] == site def test_write(self): expected_patterns = [ '^begin data;$', '^\s+dimensions ntax=5 nchar=5;$', '^\s+format gap=- missing=\?;$', '^\s+charstatelabels$', '^\s+1\s+CHAR_A,$', '^\s+2\s+CHAR_B,$', '^\s+3\s+CHAR_C,$', '^\s+4\s+CHAR_D,$', '^\s+5\s+CHAR_E$', '^matrix$', '^A\s+ABCDE$', '^B\s+ABCDE$', '^C\s+ABCDE$', '^D\s+ABCDE$', '^E\s+ABCDE$', '^\s+;$', '^end;$', ] written = self.nex.write() for expected in expected_patterns: assert re.search(expected, written, re.MULTILINE), \ 'Expected "%s"' % expected
def test_find_constant_sites_1(self): nexus = NexusReader(os.path.join(EXAMPLE_DIR, 'example.nex')) assert not find_constant_sites(nexus)
def test_regression(self): nex = NexusReader(os.path.join(REGRESSION_DIR, 'ape_random.trees')) assert nex.trees.ntrees == 2
if not os.path.exists(basepath + "/bin/FastTree"): print("GMYC.py uses FastTreeUPGMA to infer ultramatric trees,") print("please download the latest source code from: ") print("http://meta.microbesonline.org/fasttree/FastTreeUPGMA.c") print("Please complie with gcc -O3 -finline-functions -funroll-loops -Wall -o FastTree FastTreeUPGMA.c -lm, ") print("and put FastTree it to bin/ \n") sys.exit() print("Building UPGMA tree using FastTree.") stree = call_upgma(salignment) if stree == "": print("Input tree is empty.") print_options() sys.exit() try: treetest = open(stree) l1 = treetest.readline() if l1.strip() == "#NEXUS": nexus = NexusReader(stree) nexus.blocks['trees'].detranslate() stree = nexus.trees.trees[0] treetest.close() sp = gmyc(tree = stree, print_detail = sprint_detail, show_tree = sshow_tree, show_llh = sshow_llh, show_lineages = sshow_lineages, print_species = sprint_species, pv = p_value) print("Final number of estimated species by GMYC: " + repr(len(sp)) ) except ete2.parser.newick.NewickError: print("Unexisting tree file or Malformed newick tree structure.")
def test_valid_with_required_block_two(self): nexus_obj = NexusReader(os.path.join(EXAMPLE_DIR, 'example2.nex')) check_for_valid_NexusReader(nexus_obj, ['data', 'taxa'])
def hash(salt, taxon): return hashlib.md5("%s-%s" % (salt, taxon)).hexdigest() if __name__ == '__main__': from optparse import OptionParser parser = OptionParser(usage="usage: %prog fudge.nex output.nex") options, nexuslist = parser.parse_args() try: nexusname = args[0] except IndexError: print(__doc__) print("Author: %s\n" % __author__) parser.print_help() sys.exit() try: newnexus = args[1] except IndexError: newnexus = None nexus = NexusReader(nexusname) nexus = anonymise(nexus) if newnexus is not None: nexus.write_to_file(newnexus) print("New nexus written to %s" % newnexus) else: print(nexus.write_to_file(hash('filename', filename)))
def test_valid_NexusReader(self): check_for_valid_NexusReader(NexusReader())
def nexus(self): return NexusReader(self.trees.as_posix())
for w in wrapper.wrap(s): print(w) return if __name__ == '__main__': from optparse import OptionParser parser = OptionParser(usage="usage: %prog [taxa/sites] nexus.nex") options, commands = parser.parse_args() if len(commands) != 2: print(__doc__) parser.print_help() quit() command, nex = commands try: nex = NexusReader(nex) except IOError: raise IOError("Unable to read %s" % nex) if command in ('taxa', 't'): tally = tally_by_taxon(nex) elif command in ('site', 's'): tally = tally_by_site(nex) else: quit("Invalid tally command. Only 'taxa' and 'site' are valid.") print_tally(tally)
#!/usr/bin/env python import sys from nexus import NexusReader, VERSION from nexus.tools import combine_nexuses __author__ = 'Simon Greenhill <*****@*****.**>' __doc__ = """combine-nexus - python-nexus tools v%(version)s combines a series of nexuses into one nexus. """ % { 'version': VERSION, } if __name__ == '__main__': #set up command-line options from optparse import OptionParser parser = OptionParser(usage="usage: %prog nex1.nex nex2.nex ... nexN.nex") options, nexuslist = parser.parse_args() if len(nexuslist) <= 1: print __doc__ parser.print_help() sys.exit() nexuslist = [NexusReader(n) for n in nexuslist] out = combine_nexuses(nexuslist) out.write_to_file('combined.nex', charblock=False, interleave=False) print("Written to combined.nex")
#set up command-line options from optparse import OptionParser parser = OptionParser(usage="usage: %prog old.nex new.nex") parser.add_option("-1", "--onefile", dest="onefile", action="store_true", default=False, help="One nexus file for each multistate character") options, args = parser.parse_args() try: nexusname = args[0] newnexusname = args[1] except IndexError: print __doc__ print "Author: %s\n" % __author__ parser.print_help() sys.exit() nexus = NexusReader(nexusname) new = binarise(nexus, one_nexus_per_block=options.onefile) if isinstance(new, NexusWriter): new.write_to_file(newnexusname) elif len(new) > 1: newnexusname, ext = os.path.splitext(newnexusname) for nex in new: nex.write_to_file( "%s-%s%s" % (newnexusname, nex.clean(nex.characters[0]), ext))
def test_count_missing_one(self): nexus = NexusReader(os.path.join(EXAMPLE_DIR, 'example.nex')) missing = count_site_values(nexus) for taxon in missing: assert missing[taxon] == 0
def test_read_file(self): nex = NexusReader(os.path.join(EXAMPLE_DIR, 'example.nex')) assert 'data' in nex.blocks assert 'Simon' in nex.blocks['data'].matrix
def setUp(self): self.nex = NexusReader( os.path.join(EXAMPLE_DIR, 'example-translated.trees'))
try: nexusname = args[0] except IndexError: print __doc__ print __usage__ print "Author: %s\n" % __author__ parser.print_help() sys.exit() try: newnexus = args[1] except IndexError: newnexus = None nexus = NexusReader(nexusname) if "trees" not in nexus.blocks: sys.exit("No trees found in file %s!" % nexusname) if nexus.trees.ntrees == 0: sys.exit("No trees found in found %s!" % nexusname) if options.quiet is False: print "%d trees found with %d translated taxa" % (nexus.trees.ntrees, len(nexus.trees.translators)) # Delete trees if options.deltree: nexus = run_deltree(options.deltree, nexus, options.quiet) # Resample trees if options.resample: nexus = run_resample(options.resample, nexus, options.quiet)
raise IndexError("Character '%s' is not in the nexus" % char) states = {} for taxon, state in nexus_obj.data.characters[index].items(): states[state] = states.get(state, []) states[state].append(taxon) for state in sorted(states): print('State: %s (%d / %d = %0.2f)' % (state, len(states[state]), nexus_obj.data.ntaxa, (len(states[state]) / nexus_obj.data.ntaxa * 100))) print("\n".join(wrapper.wrap(", ".join(states[state])))) print("\n") return if __name__ == '__main__': #set up command-line options from optparse import OptionParser parser = OptionParser(usage="usage: %prog site_index nexusfile.nex") options, args = parser.parse_args() try: char = args[0] nexusname = args[1] except IndexError: parser.print_help() sys.exit() print_character_stats(NexusReader(nexusname), char)