def test_find_unique_sites_2(self):
        nexus = NexusReader()
        nexus.read_string("""Begin data;
        Dimensions ntax=4 nchar=7;
        Format datatype=standard symbols="01" gap=-;
        Matrix
        Harry              10000?-
        Simon              1100011
        Betty              1110000
        Louise             1111000
        ;""")
        unique = find_unique_sites(nexus)

        # site 1 should NOT be in the uniques (3x1 and 1x0)
        # - i.e. are we ignoring sites with ONE absent taxon
        assert 1 not in unique
        # these should also NOT be in unique
        assert 0 not in unique
        assert 2 not in unique
        assert 4 not in unique  # constant
        # site 3 is a simple unique site - check we found it
        assert 3 in unique
        # sites 5 and 6 should also be unique
        # - are we handling missing data appropriately?
        assert 5 in unique
        assert 6 in unique
Exemplo n.º 2
0
 def setUp(self):
     self.nex1 = NexusReader()
     self.nex1.read_string(
         """Begin data;
         Dimensions ntax=2 nchar=1;
         Format datatype=standard symbols="12" gap=-;
         Matrix
         Harry              1
         Simon              2
         ;"""
     )
     self.nex2 = NexusReader()
     self.nex2.read_string(
         """Begin data;
         Dimensions ntax=2 nchar=1;
         Format datatype=standard symbols="34" gap=-;
         Matrix
         Harry              3
         Simon              4
         ;"""
     )
     self.nex3 = NexusReader()
     self.nex3.read_string(
         """Begin data;
         Dimensions ntax=3 nchar=1;
         Format datatype=standard symbols="345" gap=-;
         Matrix
         Betty              3
         Boris              4
         Simon              5
         ;"""
     )
Exemplo n.º 3
0
 def test_read_string(self):
     handle = open(os.path.join(EXAMPLE_DIR, 'example.nex'))
     data = handle.read()
     handle.close()
     nex = NexusReader()
     nex.read_string(data)
     assert 'data' in nex.blocks
     assert 'Simon' in nex.blocks['data'].matrix
Exemplo n.º 4
0
 def test_write_to_file(self):
     tmp = NamedTemporaryFile(delete=False, suffix=".nex")
     tmp.close()
     nex = NexusReader(os.path.join(EXAMPLE_DIR, 'example.nex'))
     nex.write_to_file(tmp.name)
     assert os.path.isfile(tmp.name)
     n2 = NexusReader(tmp.name)
     assert n2.data.matrix == nex.data.matrix
     assert sorted(n2.data.taxa) == sorted(nex.data.taxa)
     os.unlink(tmp.name)        # cleanup
Exemplo n.º 5
0
 def test_notimplemented_exception(self):
     with self.assertRaises(NotImplementedError):
         nex = NexusReader()
         nex.read_string(
             """Begin something;
             Dimensions ntax=5 nchar=1;
             Format datatype=standard symbols="01" gap=-;
             Matrix
             Harry              1
             ;""")
         anonymise(nex)
Exemplo n.º 6
0
 def test_incorrect_dimensions_warnings_nchar(self):
     with warnings.catch_warnings(record=True) as w:
         nex = NexusReader()
         nex.read_string(
             """Begin data;
             Dimensions ntax=1 nchar=5;
             Format datatype=standard symbols="01" gap=-;
             Matrix
             Harry              1
             ;""")
         assert len(w) == 1, 'Expected 1 warning, got %r' % w 
         assert issubclass(w[-1].category, UserWarning)
         assert "Expected" in str(w[-1].message)
         assert nex.data.nchar == 1
 def test_treelabel(self):
     nex = NexusReader()
     nex.read_string("""
     #NEXUS
 
     begin trees;
         translate
             0 Tom,
             1 Simon,
             2 Fred;
             tree TREEONE = (0,1,2);
     end;
     """)
     assert len(nex.trees.trees) == 1
     assert nex.trees.trees == ['tree TREEONE = (0,1,2);']
 def test_labelled_unrooted(self):
     nex = NexusReader()
     nex.read_string("""
     #NEXUS
 
     begin trees;
         translate
             0 Tom,
             1 Simon,
             2 Fred;
             tree unrooted [U] = (0,1,2);
     end;
     """)
     assert len(nex.trees.trees) == 1
     assert nex.trees.trees == ['tree unrooted [U] = (0,1,2);']
Exemplo n.º 9
0
class Test_Binarise(unittest.TestCase):
    def setUp(self):
        self.nex = NexusReader()
        self.nex.read_string(
        """Begin data;
        Dimensions ntax=3 nchar=2;
        Format datatype=standard symbols="01" gap=-;
        Charstatelabels
            1 char1, 2 char2;
        Matrix
        Maori               14
        Dutch               25
        Latin               36
        ;""")
        self.nex = binarise(self.nex)
    
    def test_to_binary(self):
        """Test Nexus -> Binary: Two Character"""
        expected = {
            'char1_0': {"Maori": '1', "Dutch": "0", "Latin": "0"},
            'char1_1': {"Maori": '0', "Dutch": "1", "Latin": "0"},
            'char1_2': {"Maori": '0', "Dutch": "0", "Latin": "1"},
            'char2_0': {"Maori": '1', "Dutch": "0", "Latin": "0"},
            'char2_1': {"Maori": '0', "Dutch": "1", "Latin": "0"},
            'char2_2': {"Maori": '0', "Dutch": "0", "Latin": "1"},
        }
        
        for char, data in expected.items():
            for taxon, exp_value in data.items():
                assert self.nex.data[char][taxon] == exp_value
    
    def test_to_binary_nchar(self):
        """Test Nexus -> Binary: Number of Characters"""
        assert len(self.nex.characters) == 6
        
    def test_to_binary_symbollist(self):
        """Test Nexus -> Binary: Update Symbol List"""
        # check symbol list was updated
        assert len(self.nex.symbols) == 2
        assert '1' in self.nex.symbols
        assert '0' in self.nex.symbols
        
    def test_to_binary_nexus(self):
        """Test Nexus -> Binary: Nexus"""
        nexus = self.nex.make_nexus(interleave=False)
        assert re.search("Dutch\s+010010", nexus)
        assert re.search("Maori\s+100100", nexus)
        assert re.search("Latin\s+001001", nexus)
Exemplo n.º 10
0
class Test_TaxaHandler_Regression_Mesquite(unittest.TestCase):
    """Regression: Test that we can parse MESQUITE taxa blocks"""
    def setUp(self):
        self.nex = NexusReader(os.path.join(REGRESSION_DIR, 'mesquite_taxa_block.nex'))
        
    def test_taxa_block(self):
        for taxon in ['A', 'B', 'C']:
            assert taxon in self.nex.taxa
        # did we get the right number of taxa in the matrix?
        assert self.nex.taxa.ntaxa == len(self.nex.taxa.taxa) == 3
        
    def test_taxa_block_attributes(self):
        assert 'taxa' in self.nex.blocks
        assert len(self.nex.taxa.attributes) == 1
        assert 'TITLE Untitled_Block_of_Taxa;' in self.nex.taxa.attributes
    
    def test_write(self):
        expected_patterns = [
            '^begin taxa;$',
            '^\s+TITLE Untitled_Block_of_Taxa;$',
            '^\s+dimensions ntax=3;$',
            '^\s+taxlabels$',
            "^\s+\[1\] 'A'$",
            "^\s+\[2\] 'B'$",
            "^\s+\[3\] 'C'$",
            '^;$',
            '^end;$',
        ]
        written = self.nex.write()
        for expected in expected_patterns:
            assert re.search(expected, written, re.MULTILINE), 'Expected "%s"' % expected
Exemplo n.º 11
0
class Test_TreeHandler_Regression_Mesquite(unittest.TestCase):
    """Regression: Test that we can parse MESQUITE taxa blocks"""
    def setUp(self):
        self.nex = NexusReader(
            os.path.join(REGRESSION_DIR, 'mesquite_formatted_branches.trees')
        )

    def test_attributes(self):
        assert len(self.nex.trees.attributes) == 2
        assert self.nex.trees.attributes[0] == \
            """Title 'Trees from "temp.trees"';"""
        assert self.nex.trees.attributes[1] == \
            """LINK Taxa = Untitled_Block_of_Taxa;"""

    def test_found_trees(self):
        assert self.nex.trees.ntrees == 1

    def test_found_taxa(self):
        assert len(self.nex.trees.taxa) == 3
        assert 'A' in self.nex.trees.taxa
        assert 'B' in self.nex.trees.taxa
        assert 'C' in self.nex.trees.taxa

    def test_was_translated(self):
        assert self.nex.trees.was_translated

    def test_translation(self):
        assert self.nex.trees.translators['1'] == 'A'
        assert self.nex.trees.translators['2'] == 'B'
        assert self.nex.trees.translators['3'] == 'C'

    def test_write(self):
        written = self.nex.write()
        assert """Title 'Trees from "temp.trees"';""" in written
        assert """LINK Taxa = Untitled_Block_of_Taxa;""" in written
 def test_ok_starting_with_zero(self):
     nex = NexusReader()
     nex.read_string("""
     #NEXUS
 
     begin trees;
         translate
             0 Tom,
             1 Simon,
             2 Fred;
             tree tree = (0,1,2)
     end;
     """)
     assert len(nex.trees.translators) == 3
     assert '0' in nex.trees.translators
     assert '1' in nex.trees.translators
     assert '2' in nex.trees.translators
 def test_ok_starting_with_one(self):
     nex = NexusReader()
     nex.read_string("""
     #NEXUS
 
     begin trees;
         translate
             1 Tom,
             2 Simon,
             3 Fred;
             tree tree = (1,2,3)
     end;
     """)
     assert len(nex.trees.translators) == 3
     assert '1' in nex.trees.translators
     assert '2' in nex.trees.translators
     assert '3' in nex.trees.translators
Exemplo n.º 14
0
 def test_generic_readwrite(self):
     expected = """Begin data;
     Dimensions ntax=4 nchar=2;
     Format datatype=standard symbols="01" gap=-;
     Matrix
     Harry              00
     Simon              01
     Betty              10
     Louise             11
     ;
     """.split("\n")
     nex = NexusReader()
     nex.handlers['data'] = GenericHandler
     nex.read_file(os.path.join(EXAMPLE_DIR, 'example.nex'))
     for line in nex.data.write().split("\n"):
         e = expected.pop(0).strip()
         assert line.strip() == e
Exemplo n.º 15
0
class Test_DataHandler_Regression_Mesquite(unittest.TestCase):
    """Regression: Test that we can parse MESQUITE data blocks"""

    def setUp(self):
        self.nex = NexusReader()
        self.nex.read_string("""
        #NEXUS

        Begin data;
        TITLE Untitled_Block_of_Taxa;
        LINK Taxa = Untitled_Block_of_Taxa;
        Dimensions ntax=2 nchar=2;
        Format datatype=standard gap=- symbols="01";
        Matrix
        Harry              00
        Simon              01
            ;
        End;
        """)
    
    def test_attributes(self):
        assert len(self.nex.data.attributes) == 2
        assert self.nex.data.attributes[0] == \
            """TITLE Untitled_Block_of_Taxa;"""
        assert self.nex.data.attributes[1] == \
            """LINK Taxa = Untitled_Block_of_Taxa;"""

    def test_write(self):
        expected_patterns = [
            '^begin data;$',
            '^\s+TITLE Untitled_Block_of_Taxa;$',
            '^\s+LINK Taxa = Untitled_Block_of_Taxa;$',
            '^\s+dimensions ntax=2 nchar=2;$',
            '^\s+format datatype=standard gap=- symbols="01";$',
            "^matrix$",
            "^Harry\s+00",
            "^Simon\s+01$",
            '^\s+;$',
            '^end;$',
        ]
        written = self.nex.write()
        for expected in expected_patterns:
            assert re.search(expected, written, re.MULTILINE), \
                'Expected "%s"' % expected
class Test_TallyBySite(unittest.TestCase):
    def setUp(self):
        self.nex = NexusReader()
        self.nex.read_string(
            """Begin data;
            Dimensions ntax=3 nchar=6;
            Format datatype=standard symbols="12" gap=-;
            Matrix
            Harry              0111-?
            Simon              0011-?
            Elvis              0001-?
            ;"""
        )
    
    def test_errorcheck(self):
        self.assertRaises(TypeError, tally_by_site, "I am a string")
        self.assertRaises(TypeError, tally_by_site, 0)
    
    def test_tally_by_site(self):
        tally = tally_by_site(self.nex)
        # 000
        assert 'Harry' in tally[0]['0']
        assert 'Simon' in tally[0]['0']
        assert 'Elvis' in tally[0]['0']
        # 100
        assert 'Harry' in tally[1]['1']
        assert 'Simon' in tally[1]['0']
        assert 'Elvis' in tally[1]['0']
        # 110
        assert 'Harry' in tally[2]['1']
        assert 'Simon' in tally[2]['1']
        assert 'Elvis' in tally[2]['0']
        # 111
        assert 'Harry' in tally[3]['1']
        assert 'Simon' in tally[3]['1']
        assert 'Elvis' in tally[3]['1']
        # ---
        assert 'Harry' in tally[4]['-']
        assert 'Simon' in tally[4]['-']
        assert 'Elvis' in tally[4]['-']
        # ???
        assert 'Harry' in tally[5]['?']
        assert 'Simon' in tally[5]['?']
        assert 'Elvis' in tally[5]['?']
 def test_regression_include_invisible_taxa(self):
     """Include taxa that have no entries"""
     data = """
     #NEXUS
     
     BEGIN DATA;
         DIMENSIONS  NTAX=15 NCHAR=7;
         FORMAT DATATYPE=STANDARD MISSING=? GAP=- INTERLEAVE=YES;
     MATRIX
     
     Gertrude                0000001
     Debbie                  0001000
     Zarathrustra            0000000
     Christie                0010000
     Benny                   0100000
     Bertha                  0100000
     Craig                   0010000
     Fannie-May              0000010
     Charles                 0010000
     Annik                   1000000
     Frank                   0000010
     Amber                   1000000
     Andreea                 1000000
     Edward                  0000100
     Donald                  0001000
     ;
     END;
     """
     
     nex = NexusReader()
     nex.read_string(data)
     msnex = multistatise(nex)
     
     for taxon, sites in msnex.data.matrix.items():
         if taxon[0] == 'Z':
             continue  # will check later
         
         # first letter of taxa name is the expected character state
         assert taxon[0] == sites[0], \
             "%s should be %s not %s" % (taxon, taxon[0], sites[0])
     # deal with completely missing taxa
     assert 'Zarathrustra' in msnex.data.matrix
     assert msnex.data.matrix['Zarathrustra'][0] == '?'
 def test_count_other_values_two(self):
     expected = {"Harry": 1, "Simon": 2, "Peter": 1, "Betty": 0, "Louise": 0}
     nexus = NexusReader()
     nexus.read_string(
         """#NEXUS 
     Begin data;
     Dimensions ntax=5 nchar=3;
     Format datatype=standard symbols="01" gap=-;
     Matrix
     Harry              0A0  [No missing]
     Simon              0AB  [one missing]
     Peter              0-B  [one gap]
     Betty              ?-1  [one gap and one missing = 2 missing]
     Louise             ???  [three missing]
         ;
     End;
     """
     )
     count = count_site_values(nexus, ["A", "B"])
     for taxon in count:
         assert count[taxon] == expected[taxon]
 def test_count_other_values_one(self):
     expected = {
         'Harry': 1, 'Simon': 1, 'Peter': 0, 'Betty': 0, 'Louise': 0
     }
     nexus = NexusReader()
     nexus.read_string("""#NEXUS
     Begin data;
     Dimensions ntax=5 nchar=3;
     Format datatype=standard symbols="01" gap=-;
     Matrix
     Harry              0A0  [No missing]
     Simon              0A0  [one missing]
     Peter              0-0  [one gap]
     Betty              ?-1  [one gap and one missing = 2 missing]
     Louise             ???  [three missing]
         ;
     End;
     """)
     count = count_site_values(nexus, 'A')
     for taxon in count:
         assert count[taxon] == expected[taxon]
 def setUp(self):
     self.nex = NexusReader()
     self.nex.read_string(
         """Begin data;
         Dimensions ntax=3 nchar=4;
         Format datatype=standard symbols="12" gap=-;
         Matrix
         Harry              0111
         Simon              0011
         Elvis              0001
         ;"""
     )
def snpMatrixGenerator(sourceFile, destFile, recordAll=False,
                       recordRandomSample=True):
    if recordAll == recordRandomSample:
        print "Invalid Options"
        exit()

    destNexus = NexusWriter()

    block = ""
    snpCol = 0
    for line in sourceFile:
        if all(x in line.lower() for x in {"begin", "data"}):
            sourceNexus = NexusReader()
            sourceNexus.read_string(block)
            if "data" in sourceNexus.blocks:
                snpCol = _findDifferences(sourceNexus, destNexus, snpCol,
                               recordAll, recordRandomSample)
            block = line
        else:
            block += line

    sourceNexus = NexusReader()
    sourceNexus.read_string(block)
    if "data" in sourceNexus.blocks:
        snpCol = _findDifferences(sourceNexus, destNexus, snpCol,
                       recordAll, recordRandomSample)

    destFile.write(destNexus.make_nexus() + '\n')

    destFile.close()
    sourceFile.close()
Exemplo n.º 22
0
 def setUp(self):
     self.nex = NexusReader()
     self.nex.read_string(
     """Begin data;
     Dimensions ntax=4 nchar=4;
     Format datatype=standard symbols="01" gap=-;
     Matrix
     Harry              1000
     Simon              0100
     Betty              0010
     Louise             0001
     ;""")
     self.nex = multistatise(self.nex)
class Test_TallyByTaxon(unittest.TestCase):
    def setUp(self):
        self.nex = NexusReader()
        self.nex.read_string(
            """Begin data;
            Dimensions ntax=3 nchar=6;
            Format datatype=standard symbols="12" gap=-;
            Matrix
            Harry              0111-?
            Simon              0011-?
            Elvis              0001-?
            ;"""
        )
    
    def test_errorcheck(self):
        self.assertRaises(TypeError, tally_by_taxon, "I am a string")
        self.assertRaises(TypeError, tally_by_taxon, 0)
    
    def test_tally_by_taxon(self):
        tally = tally_by_taxon(self.nex)
        # sites that are zero
        assert tally['Harry']['0'] == [0]
        assert tally['Simon']['0'] == [0, 1]
        assert tally['Elvis']['0'] == [0, 1, 2]
        
        # sites that are 1
        assert tally['Harry']['1'] == [1, 2, 3]
        assert tally['Simon']['1'] == [2, 3]
        assert tally['Elvis']['1'] == [3]
        
        # sites that are -
        assert tally['Harry']['-'] == [4]
        assert tally['Simon']['-'] == [4]
        assert tally['Elvis']['-'] == [4]
        
        # sites that are ?
        assert tally['Harry']['?'] == [5]
        assert tally['Simon']['?'] == [5]
        assert tally['Elvis']['?'] == [5]
Exemplo n.º 24
0
 def setUp(self):
     self.nex = NexusReader()
     self.nex.read_string(
     """Begin data;
     Dimensions ntax=3 nchar=2;
     Format datatype=standard symbols="01" gap=-;
     Charstatelabels
         1 char1, 2 char2;
     Matrix
     Maori               14
     Dutch               25
     Latin               36
     ;""")
     self.nex = binarise(self.nex)
Exemplo n.º 25
0
 def setUp(self):
     self.nex = NexusReader()
     self.nex.read_string("""
     #NEXUS 
     
     Begin data;
     TITLE something;
     Dimensions ntax=2 nchar=2;
     Format datatype=standard symbols="01" gap=-;
     Matrix
     Harry              00
     Simon              01
         ;
     End;
     """)
class Test_CountBinarySetSize(unittest.TestCase):
    def setUp(self):
        self.nex = NexusReader()
        self.nex.read_string(
            """Begin data;
            Dimensions ntax=3 nchar=4;
            Format datatype=standard symbols="12" gap=-;
            Matrix
            Harry              0111
            Simon              0011
            Elvis              0001
            ;"""
        )
    
    def test_errorcheck(self):
        self.assertRaises(TypeError, count_binary_set_size, "I am a string")
        self.assertRaises(TypeError, count_binary_set_size, 0)
    
    def test_count_binary_set_size(self):
        tally = count_binary_set_size(self.nex)
        assert tally[0] == 1
        assert tally[1] == 1
        assert tally[2] == 1
        assert tally[3] == 1
Exemplo n.º 27
0
    def setUp(self):
        self.nex = NexusReader()
        self.nex.read_string("""
        #NEXUS

        Begin data;
        TITLE Untitled_Block_of_Taxa;
        LINK Taxa = Untitled_Block_of_Taxa;
        Dimensions ntax=2 nchar=2;
        Format datatype=standard gap=- symbols="01";
        Matrix
        Harry              00
        Simon              01
            ;
        End;
        """)
Exemplo n.º 28
0
 def test_combine_with_character_labels(self):
     n1 = NexusReader()
     n1.read_string(
         """
         BEGIN DATA;
             DIMENSIONS NTAX=3 NCHAR=3;
             FORMAT DATATYPE=STANDARD MISSING=0 GAP=-  SYMBOLS="123";
             CHARSTATELABELS
         		1 char1,
         		2 char2,
         		3 char3
         ;
         MATRIX
         Tax1         123
         Tax2         123
         Tax3         123
         ;
         """
     )
     n2 = NexusReader()
     n2.read_string(
         """
         BEGIN DATA;
             DIMENSIONS NTAX=3 NCHAR=3;
             FORMAT DATATYPE=STANDARD MISSING=0 GAP=-  SYMBOLS="456";
             CHARSTATELABELS
         		1 char1,
         		2 char2,
         		3 char3
         ;
         MATRIX
         Tax1         456
         Tax2         456
         Tax3         456
         ;
         """
     )
     newnex = combine_nexuses([n1, n2])
     assert re.search(r"""\bNTAX=3\b""", newnex.write())
     assert re.search(r"""\bNCHAR=6\b""", newnex.write())
     assert re.search(r'\sSYMBOLS="123456"[\s;]', newnex.write())
     
     for tax in [1,2,3]:
         assert re.search(r"""\bTax%d\s+123456\b""" % tax, newnex.write())
     
     counter = 1
     for nex_id in [1,2]:
         for char_id in [1,2,3]:
             assert re.search(
                 r"""\b%d\s+%d.char%d\b""" % (counter, nex_id, char_id), 
                 newnex.write(charblock=True)
             )
             counter += 1
 def test_remove_sites_set(self):
     nexus = NexusReader(os.path.join(EXAMPLE_DIR, 'example.nex'))
     nexus = new_nexus_without_sites(nexus, set([1]))
     assert len(nexus.data) == 1
Exemplo n.º 30
0
                      help="Number of Characters to Generate")
    options, args = parser.parse_args()

    try:
        nexusname = args[0]
    except IndexError:
        print __doc__
        print "Author: %s\n" % __author__
        parser.print_help()
        sys.exit()

    try:
        newnexus = args[1]
    except IndexError:
        newnexus = None

    if options.numchars != False:
        try:
            options.numchars = int(options.numchars)
        except ValueError:
            print "numchars needs to be a number!"
            raise

    nexus = NexusReader(nexusname)
    nexus = shufflenexus(nexus, options.numchars)
    if newnexus is not None:
        nexus.write_to_file(newnexus)
        print "New random nexus written to %s" % newnexus
    else:
        print nexus.write()
Exemplo n.º 31
0
 def setUp(self):
     self.nex = NexusReader(
         os.path.join(REGRESSION_DIR, 'mesquite_formatted_branches.trees')
     )
 def test_failure_on_required_block_two(self):
     nexus_obj = NexusReader(os.path.join(EXAMPLE_DIR, 'example2.nex'))
     with self.assertRaises(NexusFormatException):
         check_for_valid_NexusReader(nexus_obj, ['r8s'])
Exemplo n.º 33
0
 def setUp(self):
     self.nex = NexusReader(
         os.path.join(REGRESSION_DIR, 'mesquite_formatted_branches.trees'))
Exemplo n.º 34
0
 def setUp(self):
     self.nex = NexusReader(
         os.path.join(REGRESSION_DIR, 'mesquite_taxa_block.nex'))
Exemplo n.º 35
0
from nexus import NexusReader
from Bio import SeqIO
import sys

n = NexusReader()
n.read_file("Razafimandimbison_AppS1.txt")

for taxon, characters in n.data:
    print(">", taxon)
    print("".join(characters)[6230:7867])

fasta_in = "Psychotria_rps16.fas"  #fastaファイルを読みこみ

for record in SeqIO.parse(fasta_in, 'fasta'):
    id_part = record.id
    desc_part = record.description
    seq = record.seq

    print('>', id_part)
    print(seq)
Exemplo n.º 36
0
 def setUp(self):
     self.nex = NexusReader(os.path.join(EXAMPLE_DIR, 'example.nex'))
Exemplo n.º 37
0
class Test_DataHandler_SimpleNexusFormat(unittest.TestCase):
    expected = {
        'Harry': ['0', '0'],
        'Simon': ['0', '1'],
        'Betty': ['1', '0'],
        'Louise': ['1', '1'],
    }

    def setUp(self):
        self.nex = NexusReader(os.path.join(EXAMPLE_DIR, 'example.nex'))

    def test_block_find(self):
        assert 'data' in self.nex.blocks
        assert hasattr(self.nex, 'data')
        assert self.nex.data == self.nex.data

    def test_raw(self):
        assert self.nex.data.block == [
            'Begin data;', 'Dimensions ntax=4 nchar=2;',
            'Format datatype=standard symbols="01" gap=-;', 'Matrix',
            'Harry              00', 'Simon              01',
            'Betty              10', 'Louise             11', ';'
        ]

    def test_format_string(self):
        # did we get the expected tokens in the format string?
        expected = {'datatype': 'standard', 'gap': '-', 'symbols': '01'}
        for k, v in expected.items():
            assert self.nex.data.format[k] == v, \
                "%s should equal %s and not %s" % (k, v, self.nex.data.format[k])
        # did we get the right number of tokens?
        assert len(self.nex.data.format) == len(expected)

    def test_taxa(self):
        # did we get the right taxa in the matrix?
        for taxon in self.expected:
            assert taxon in self.nex.data.matrix
        # did we get the right number of taxa in the matrix?
        assert self.nex.data.ntaxa == len(self.expected) == len(
            self.nex.data.taxa)

    def test_characters(self):
        # did we parse the characters properly?
        assert self.nex.data.nchar == 2
        for taxon, expected in self.expected.items():
            assert self.nex.data.matrix[taxon] == expected

    def test_iterable(self):
        for taxon, block in self.nex.data:
            assert block == self.expected[taxon]

    def test_parse_format_line(self):
        d = DataHandler()
        f = d.parse_format_line('Format datatype=standard symbols="01" gap=-;')
        assert f[
            'datatype'] == 'standard', "Expected 'standard', but got '%s'" % f[
                'datatype']
        assert f[
            'symbols'] == '01', "Expected '01', but got '%s'" % f['symbols']
        assert f['gap'] == '-', "Expected 'gap', but got '%s'" % f['gap']

        f = d.parse_format_line(
            'FORMAT datatype=RNA missing=? gap=- symbols="ACGU" labels interleave;'
        )
        assert f['datatype'] == 'rna', "Expected 'rna', but got '%s'" % f[
            'datatype']
        assert f['missing'] == '?', "Expected '?', but got '%s'" % f['missing']
        assert f['gap'] == '-', "Expected '-', but got '%s'" % f['gap']
        assert f['symbols'] == 'acgu', "Expected 'acgu', but got '%s'" % f[
            'symbols']
        assert f[
            'labels'] == True, "Expected <True>, but got '%s'" % f['labels']
        assert f['interleave'] == True, "Expected <True>, but got '%s'" % f[
            'interleave']

    def test_write(self):
        expected_patterns = [
            '^begin data;$',
            '^\s+dimensions ntax=4 nchar=2;$',
            '^\s+format datatype=standard symbols="01" gap=-;$',
            '^matrix$',
            '^Simon\s+01$',
            '^Louise\s+11$',
            '^Betty\s+10$',
            '^Harry\s+00$',
            '^\s+;$',
            '^end;$',
        ]
        written = self.nex.write()
        for expected in expected_patterns:
            assert re.search(expected, written,
                             re.MULTILINE), 'Expected "%s"' % expected

    def test__load_characters(self):
        for site, data in self.nex.data.characters.items():
            for taxon, value in data.items():
                assert value == self.expected[taxon][site]

    def test_get_site(self):
        for i in (0, 1):
            site_data = self.nex.data.characters[i]
            for taxon, value in site_data.items():
                assert self.expected[taxon][i] == value

    def test_incorrect_dimensions_warnings_ntaxa(self):
        nex = NexusReader()
        with warnings.catch_warnings(record=True) as w:
            nex.read_string("""Begin data;
                Dimensions ntax=5 nchar=1;
                Format datatype=standard symbols="01" gap=-;
                Matrix
                Harry              1
                ;""")
            assert len(w) == 1, 'Expected 1 warning, got %r' % w
            assert issubclass(w[-1].category, UserWarning)
            assert "Expected" in str(w[-1].message)
            assert nex.data.nchar == 1

    def test_incorrect_dimensions_warnings_nchar(self):
        with warnings.catch_warnings(record=True) as w:
            nex = NexusReader()
            nex.read_string("""Begin data;
                Dimensions ntax=1 nchar=5;
                Format datatype=standard symbols="01" gap=-;
                Matrix
                Harry              1
                ;""")
            assert len(w) == 1, 'Expected 1 warning, got %r' % w
            assert issubclass(w[-1].category, UserWarning)
            assert "Expected" in str(w[-1].message)
            assert nex.data.nchar == 1
Exemplo n.º 38
0
 def test_write(self):
     nex = NexusReader(os.path.join(EXAMPLE_DIR, 'example.trees'))
     text = open(os.path.join(EXAMPLE_DIR, 'example.trees')).read()
     assert text == nex.write()
Exemplo n.º 39
0
 def setUp(self):
     self.nex = NexusReader(os.path.join(EXAMPLE_DIR,
                                         'example-beast.trees'))
Exemplo n.º 40
0
#!/usr/bin/env python
#coding=utf-8

import ete3
from nexus import NexusReader

if __name__ == '__main__':
    import argparse
    parser = argparse.ArgumentParser(
        description='reroots and cleans Michael et al.')
    parser.add_argument("input", help='filename')
    parser.add_argument("output", help='filename')
    args = parser.parse_args()

    nex = NexusReader(args.input)
    # make tree into newick for ete3
    tree = nex.trees.trees[0].split(" = ")[1].strip().lstrip()
    tree = ete3.Tree(tree, format=0)
    # reroot
    tree.set_outgroup('Mawe')
    nex.trees.trees[0] = 'tree tg [&R] = %s' % tree.write(format=5)

    with open(args.output, 'w') as out:
        out.write(nex.write())
Exemplo n.º 41
0
        usage="usage: %prog [-o output.nex] nex1.nex nex2.nex ... nexN.nex")
    parser.add_option("-o",
                      "--output",
                      dest="output",
                      action="store",
                      default=None,
                      type="string",
                      help="output nexus file")
    options, nexuslist = parser.parse_args()

    if len(nexuslist) < 1:
        print(__doc__)
        parser.print_help()
        sys.exit()

    if options.output is not None:
        outfile = options.output
    else:
        outfile = 'multistate.nex'

    nexuslist2 = []
    for nfile in nexuslist:
        n = NexusReader(nfile)
        n = multistatise(n)
        nexuslist2.append(n)

    out = combine_nexuses(nexuslist2)

    out.write_to_file(outfile, charblock=True, interleave=False)
    print("Written to %s" % outfile)
Exemplo n.º 42
0
from ete2 import Tree
from nexus import NexusReader
import sys
import re
import csv

arguments = sys.argv
n = NexusReader(arguments[1])

#First get that resistance data--figure out which strain is resistant to what
drug_resistance = csv.DictReader(open(arguments[2]),
                                 delimiter='\t',
                                 fieldnames=("strain", "drug"))

#Then We gather a mapping from name to number
number = 1
number_name = {}
for i in n.taxa.taxa:
    number_name[i] = number
    number += 1
keys = list(number_name.keys())
keys.sort(reverse=True)

strain_to_resistance = {}

for row in drug_resistance:
    strain_to_resistance[row['strain']] = row["drug"]

#Next get the date strains were taken data to figure out the oldest strain that the tree is based upon
#Maha said not to do this so let's default to 2018
# reference_date = csv.DictReader(open(arguments[3]),delimiter='\t', fieldnames = ( "strain", "date"))
Exemplo n.º 43
0
class Test_DataHandler_CharacterBlockNexusFormat(unittest.TestCase):
    def setUp(self):
        self.nex = NexusReader(
            os.path.join(EXAMPLE_DIR, 'example-characters.nex'))

    def test_block_find(self):
        assert 'data' in self.nex.blocks

    def test_charblock_find(self):
        assert hasattr(self.nex.data, 'characters')

    def test_taxa(self):
        assert self.nex.data.ntaxa == 5

    def test_data(self):
        assert self.nex.data.nchar == 5

    def test_charlabels(self):
        assert self.nex.data.charlabels[0] == 'CHAR_A'
        assert self.nex.data.charlabels[1] == 'CHAR_B'
        assert self.nex.data.charlabels[2] == 'CHAR_C'
        assert self.nex.data.charlabels[3] == 'CHAR_D'
        assert self.nex.data.charlabels[4] == 'CHAR_E'

    def test_label_parsing(self):
        assert 'CHAR_A' in self.nex.data.characters
        assert 'CHAR_B' in self.nex.data.characters
        assert 'CHAR_C' in self.nex.data.characters
        assert 'CHAR_D' in self.nex.data.characters
        assert 'CHAR_E' in self.nex.data.characters

    def test_matrix(self):
        for taxon in ("A", "B", "C", "D", "E"):
            for index, expected_value in enumerate(("A", "B", "C", "D", "E")):
                assert self.nex.data.matrix[taxon][index] == expected_value

    def test_characters(self):
        for site in ("A", "B", "C", "D", "E"):
            # All sites in CHAR_A are state "A", and all in CHAR_B and "B" etc
            for t in ("A", "B", "C", "D", "E"):
                assert self.nex.data.characters["CHAR_%s" % site][t] == site

    def test_write(self):
        expected_patterns = [
            '^begin data;$',
            '^\s+dimensions ntax=5 nchar=5;$',
            '^\s+format gap=- missing=\?;$',
            '^\s+charstatelabels$',
            '^\s+1\s+CHAR_A,$',
            '^\s+2\s+CHAR_B,$',
            '^\s+3\s+CHAR_C,$',
            '^\s+4\s+CHAR_D,$',
            '^\s+5\s+CHAR_E$',
            '^matrix$',
            '^A\s+ABCDE$',
            '^B\s+ABCDE$',
            '^C\s+ABCDE$',
            '^D\s+ABCDE$',
            '^E\s+ABCDE$',
            '^\s+;$',
            '^end;$',
        ]
        written = self.nex.write()
        for expected in expected_patterns:
            assert re.search(expected, written, re.MULTILINE), \
                'Expected "%s"' % expected
Exemplo n.º 44
0
 def test_find_constant_sites_1(self):
     nexus = NexusReader(os.path.join(EXAMPLE_DIR, 'example.nex'))
     assert not find_constant_sites(nexus)
Exemplo n.º 45
0
 def test_regression(self):
     nex = NexusReader(os.path.join(REGRESSION_DIR, 'ape_random.trees'))
     assert nex.trees.ntrees == 2
Exemplo n.º 46
0
		if not os.path.exists(basepath + "/bin/FastTree"):
			print("GMYC.py uses FastTreeUPGMA to infer ultramatric trees,")
			print("please download the latest source code from: ")
			print("http://meta.microbesonline.org/fasttree/FastTreeUPGMA.c")
			print("Please complie with gcc -O3 -finline-functions -funroll-loops -Wall -o FastTree FastTreeUPGMA.c -lm, ")
			print("and put FastTree it to bin/  \n")
			sys.exit() 
		print("Building UPGMA tree using FastTree.")
		stree = call_upgma(salignment)
	
	if stree == "":
		print("Input tree is empty.")
		print_options()
		sys.exit()
	
	try:
		treetest = open(stree)
		l1 = treetest.readline()
		if l1.strip() == "#NEXUS":
			nexus = NexusReader(stree)
			nexus.blocks['trees'].detranslate()
			stree = nexus.trees.trees[0] 
		treetest.close()
		
		sp = gmyc(tree = stree, print_detail = sprint_detail, show_tree = sshow_tree, show_llh = sshow_llh, show_lineages = sshow_lineages, print_species = sprint_species, pv = p_value)
		print("Final number of estimated species by GMYC: " +  repr(len(sp)) )
	except ete2.parser.newick.NewickError:
		print("Unexisting tree file or Malformed newick tree structure.")


 def test_valid_with_required_block_two(self):
     nexus_obj = NexusReader(os.path.join(EXAMPLE_DIR, 'example2.nex'))
     check_for_valid_NexusReader(nexus_obj, ['data', 'taxa'])
Exemplo n.º 48
0
def hash(salt, taxon):
    return hashlib.md5("%s-%s" % (salt, taxon)).hexdigest()


if __name__ == '__main__':
    from optparse import OptionParser
    parser = OptionParser(usage="usage: %prog fudge.nex output.nex")
    options, nexuslist = parser.parse_args()

    try:
        nexusname = args[0]
    except IndexError:
        print(__doc__)
        print("Author: %s\n" % __author__)
        parser.print_help()
        sys.exit()

    try:
        newnexus = args[1]
    except IndexError:
        newnexus = None

    nexus = NexusReader(nexusname)
    nexus = anonymise(nexus)

    if newnexus is not None:
        nexus.write_to_file(newnexus)
        print("New nexus written to %s" % newnexus)
    else:
        print(nexus.write_to_file(hash('filename', filename)))
 def test_valid_NexusReader(self):
     check_for_valid_NexusReader(NexusReader())
Exemplo n.º 50
0
 def nexus(self):
     return NexusReader(self.trees.as_posix())
Exemplo n.º 51
0
            for w in wrapper.wrap(s):
                print(w)
    return


if __name__ == '__main__':
    from optparse import OptionParser
    parser = OptionParser(usage="usage: %prog [taxa/sites] nexus.nex")
    options, commands = parser.parse_args()

    if len(commands) != 2:
        print(__doc__)
        parser.print_help()
        quit()

    command, nex = commands

    try:
        nex = NexusReader(nex)
    except IOError:
        raise IOError("Unable to read %s" % nex)

    if command in ('taxa', 't'):
        tally = tally_by_taxon(nex)
    elif command in ('site', 's'):
        tally = tally_by_site(nex)
    else:
        quit("Invalid tally command. Only 'taxa' and 'site' are valid.")

    print_tally(tally)
Exemplo n.º 52
0
#!/usr/bin/env python
import sys
from nexus import NexusReader, VERSION
from nexus.tools import combine_nexuses

__author__ = 'Simon Greenhill <*****@*****.**>'
__doc__ = """combine-nexus - python-nexus tools v%(version)s
combines a series of nexuses into one nexus.
""" % {
    'version': VERSION,
}

if __name__ == '__main__':
    #set up command-line options
    from optparse import OptionParser
    parser = OptionParser(usage="usage: %prog nex1.nex nex2.nex ... nexN.nex")
    options, nexuslist = parser.parse_args()

    if len(nexuslist) <= 1:
        print __doc__
        parser.print_help()
        sys.exit()

    nexuslist = [NexusReader(n) for n in nexuslist]
    out = combine_nexuses(nexuslist)
    out.write_to_file('combined.nex', charblock=False, interleave=False)
    print("Written to combined.nex")
    #set up command-line options
    from optparse import OptionParser
    parser = OptionParser(usage="usage: %prog old.nex new.nex")
    parser.add_option("-1",
                      "--onefile",
                      dest="onefile",
                      action="store_true",
                      default=False,
                      help="One nexus file for each multistate character")
    options, args = parser.parse_args()

    try:
        nexusname = args[0]
        newnexusname = args[1]
    except IndexError:
        print __doc__
        print "Author: %s\n" % __author__
        parser.print_help()
        sys.exit()

    nexus = NexusReader(nexusname)

    new = binarise(nexus, one_nexus_per_block=options.onefile)
    if isinstance(new, NexusWriter):
        new.write_to_file(newnexusname)
    elif len(new) > 1:
        newnexusname, ext = os.path.splitext(newnexusname)
        for nex in new:
            nex.write_to_file(
                "%s-%s%s" % (newnexusname, nex.clean(nex.characters[0]), ext))
Exemplo n.º 54
0
 def test_count_missing_one(self):
     nexus = NexusReader(os.path.join(EXAMPLE_DIR, 'example.nex'))
     missing = count_site_values(nexus)
     for taxon in missing:
         assert missing[taxon] == 0
Exemplo n.º 55
0
 def test_read_file(self):
     nex = NexusReader(os.path.join(EXAMPLE_DIR, 'example.nex'))
     assert 'data' in nex.blocks
     assert 'Simon' in nex.blocks['data'].matrix
Exemplo n.º 56
0
 def setUp(self):
     self.nex = NexusReader(
         os.path.join(EXAMPLE_DIR, 'example-translated.trees'))
Exemplo n.º 57
0
    try:
        nexusname = args[0]
    except IndexError:
        print __doc__
        print __usage__
        print "Author: %s\n" % __author__
        parser.print_help()
        sys.exit()

    try:
        newnexus = args[1]
    except IndexError:
        newnexus = None

    nexus = NexusReader(nexusname)
    if "trees" not in nexus.blocks:
        sys.exit("No trees found in file %s!" % nexusname)
    if nexus.trees.ntrees == 0:
        sys.exit("No trees found in found %s!" % nexusname)
    if options.quiet is False:
        print "%d trees found with %d translated taxa" % (nexus.trees.ntrees, len(nexus.trees.translators))

    # Delete trees
    if options.deltree:
        nexus = run_deltree(options.deltree, nexus, options.quiet)

    # Resample trees
    if options.resample:
        nexus = run_resample(options.resample, nexus, options.quiet)
Exemplo n.º 58
0
        raise IndexError("Character '%s' is not in the nexus" % char)

    states = {}
    for taxon, state in nexus_obj.data.characters[index].items():
        states[state] = states.get(state, [])
        states[state].append(taxon)

    for state in sorted(states):
        print('State: %s (%d / %d = %0.2f)' %
              (state, len(states[state]), nexus_obj.data.ntaxa,
               (len(states[state]) / nexus_obj.data.ntaxa * 100)))
        print("\n".join(wrapper.wrap(", ".join(states[state]))))
        print("\n")
    return


if __name__ == '__main__':
    #set up command-line options
    from optparse import OptionParser
    parser = OptionParser(usage="usage: %prog site_index nexusfile.nex")
    options, args = parser.parse_args()

    try:
        char = args[0]
        nexusname = args[1]
    except IndexError:
        parser.print_help()
        sys.exit()

    print_character_stats(NexusReader(nexusname), char)