示例#1
0
def get_nexus_reader(thing):
    if isinstance(thing, str):
        return NexusReader.from_string(thing)
    if isinstance(thing, pathlib.Path):
        return NexusReader.from_file(thing)
    if isinstance(thing, NexusWriter):
        return NexusReader.from_string(thing.write())
    assert isinstance(thing, NexusReader)
    return thing
示例#2
0
def test_combine():
    nex1 = NexusReader.from_string("""Begin trees;
            tree 1 = (a,b,c);
        end;""")
    nex2 = NexusReader.from_string("""Begin trees;
            tree 2 = (b,a,c);
            tree 3 = (b,c,a);
        end;""")

    newnex = combine_nexuses([nex1, nex2])
    assert len(newnex.trees) == 3
    assert newnex.trees[0] == "tree 1 = (a,b,c);"
    assert newnex.trees[1] == "tree 2 = (b,a,c);"
    assert newnex.trees[2] == "tree 3 = (b,c,a);"
示例#3
0
def test_tally_by_taxon():
    nex = NexusReader.from_string("""Begin data;
        Dimensions ntax=3 nchar=6;
        Format datatype=standard symbols="12" gap=-;
        Matrix
        Harry              0111-?
        Simon              0011-?
        Elvis              0001-?
        ;"""
    )
    tally = tally_by_taxon(nex)
    # sites that are zero
    assert tally['Harry']['0'] == [0]
    assert tally['Simon']['0'] == [0, 1]
    assert tally['Elvis']['0'] == [0, 1, 2]

    # sites that are 1
    assert tally['Harry']['1'] == [1, 2, 3]
    assert tally['Simon']['1'] == [2, 3]
    assert tally['Elvis']['1'] == [3]

    # sites that are -
    assert tally['Harry']['-'] == [4]
    assert tally['Simon']['-'] == [4]
    assert tally['Elvis']['-'] == [4]

    # sites that are ?
    assert tally['Harry']['?'] == [5]
    assert tally['Simon']['?'] == [5]
    assert tally['Elvis']['?'] == [5]
def test_find_unique_sites_2():
    nexus = NexusReader.from_string("""Begin data;
    Dimensions ntax=4 nchar=7;
    Format datatype=standard symbols="01" gap=-;
    Matrix
    Harry              10000?-
    Simon              1100011
    Betty              1110000
    Louise             1111000
    ;""")
    unique = list(iter_unique_sites(nexus))

    # site 1 should NOT be in the uniques (3x1 and 1x0)
    # - i.e. are we ignoring sites with ONE absent taxon
    assert 1 not in unique
    # these should also NOT be in unique
    assert 0 not in unique
    assert 2 not in unique
    assert 4 not in unique  # constant
    # site 3 is a simple unique site - check we found it
    assert 3 in unique
    # sites 5 and 6 should also be unique
    # - are we handling missing data appropriately?
    assert 5 in unique
    assert 6 in unique
def test_tally_by_site():
    nex = NexusReader.from_string("""Begin data;
        Dimensions ntax=3 nchar=6;
        Format datatype=standard symbols="12" gap=-;
        Matrix
        Harry              0111-?
        Simon              0011-?
        Elvis              0001-?
        ;""")
    tally = tally_by_site(nex)
    # 000
    assert 'Harry' in tally[0]['0']
    assert 'Simon' in tally[0]['0']
    assert 'Elvis' in tally[0]['0']
    # 100
    assert 'Harry' in tally[1]['1']
    assert 'Simon' in tally[1]['0']
    assert 'Elvis' in tally[1]['0']
    # 110
    assert 'Harry' in tally[2]['1']
    assert 'Simon' in tally[2]['1']
    assert 'Elvis' in tally[2]['0']
    # 111
    assert 'Harry' in tally[3]['1']
    assert 'Simon' in tally[3]['1']
    assert 'Elvis' in tally[3]['1']
    # ---
    assert 'Harry' in tally[4]['-']
    assert 'Simon' in tally[4]['-']
    assert 'Elvis' in tally[4]['-']
    # ???
    assert 'Harry' in tally[5]['?']
    assert 'Simon' in tally[5]['?']
    assert 'Elvis' in tally[5]['?']
示例#6
0
def test_combine_with_character_labels():
    n1 = NexusReader.from_string("""
        BEGIN DATA;
            DIMENSIONS NTAX=3 NCHAR=3;
            FORMAT DATATYPE=STANDARD MISSING=0 GAP=-  SYMBOLS="123";
            CHARSTATELABELS
                1 char1,
                2 char2,
                3 char3
        ;
        MATRIX
        Tax1         123
        Tax2         123
        Tax3         123
        ;
        """)
    n2 = NexusReader.from_string("""
        BEGIN DATA;
            DIMENSIONS NTAX=3 NCHAR=3;
            FORMAT DATATYPE=STANDARD MISSING=0 GAP=-  SYMBOLS="456";
            CHARSTATELABELS
                1 char1,
                2 char2,
                3 char3
        ;
        MATRIX
        Tax1         456
        Tax2         456
        Tax3         456
        ;
        """)
    newnex = combine_nexuses([n1, n2])
    assert re.search(r"""\bNTAX=3\b""", newnex.write())
    assert re.search(r"""\bNCHAR=6\b""", newnex.write())
    assert re.search(r'\sSYMBOLS="123456"[\s;]', newnex.write())

    for tax in [1, 2, 3]:
        assert re.search(r"""\bTax%d\s+123456\b""" % tax, newnex.write())

    counter = 1
    for nex_id in [1, 2]:
        for char_id in [1, 2, 3]:
            assert re.search(
                r"""\b%d\s+%d.char%d\b""" % (counter, nex_id, char_id),
                newnex.write(charblock=True))
            counter += 1
示例#7
0
def test_error_on_too_many_states():
    nex = NexusReader.from_string("""
    Begin data;
    Dimensions ntax=1 nchar=30;
    Format datatype=standard symbols="01" gap=-;
    Matrix
    A   111111111111111111111111111111
    ;""")
    with pytest.raises(ValueError):
        multistatise(nex)
示例#8
0
def nex():
    res = NexusReader.from_string("""
        Begin data;
        Dimensions ntax=4 nchar=4;
        Format datatype=standard symbols="01" gap=-;
        Matrix
        Harry              1000
        Simon              0100
        Betty              0010
        Louise             0001
        ;""")
    return multistatise(res)
示例#9
0
def nex():
    res = NexusReader.from_string("""
        Begin data;
        Dimensions ntax=3 nchar=2;
        Format datatype=standard symbols="01" gap=-;
        Charstatelabels
            1 char1, 2 char2;
        Matrix
        Maori               14
        Dutch               25
        Latin               36
        ;""")
    return binarise(res)
示例#10
0
def nex():
    nex = NexusReader.from_string("""
        Begin data;
        Dimensions ntax=4 nchar=8;
        Format datatype=standard symbols="01" gap=-;
        Matrix
        [                  01234567]
        Harry              01000000
        Simon              0010000-
        Betty              00010-0?
        Louise             000010?0
        ;""")
    return nex
示例#11
0
def nex1():
    res = NexusReader.from_string("""Begin data;
            Dimensions ntax=2 nchar=1;
            Format datatype=standard symbols="12" gap=-;
            Matrix
            Harry              1
            Simon              2
            ;""")
    # set short_filename to test that functionality. If `combine_nexuses`
    # doesn't use `short_filename`, then the nex1 characters will be
    # identified as 1.xx, rather than 0.xx
    res.short_filename = '0'
    return res
示例#12
0
def test_count_binary_set_size():
    nex = NexusReader.from_string("""Begin data;
        Dimensions ntax=3 nchar=4;
        Format datatype=standard symbols="12" gap=-;
        Matrix
        Harry              0111
        Simon              0011
        Elvis              0001
        ;""")
    tally = count_binary_set_size(nex)
    assert tally[0] == 1
    assert tally[1] == 1
    assert tally[2] == 1
    assert tally[3] == 1
def test_count_other_values_two():
    expected = {'Harry': 1, 'Simon': 2, 'Peter': 1, 'Betty': 0, 'Louise': 0}
    nexus = NexusReader.from_string("""#NEXUS
    Begin data;
    Dimensions ntax=5 nchar=3;
    Format datatype=standard symbols="01" gap=-;
    Matrix
    Harry              0A0  [No missing]
    Simon              0AB  [one missing]
    Peter              0-B  [one gap]
    Betty              ?-1  [one gap and one missing = 2 missing]
    Louise             ???  [three missing]
        ;
    End;
    """)
    count = count_site_values(nexus, ['A', 'B'])
    for taxon in count:
        assert count[taxon] == expected[taxon]
def test_count_missing_two():
    expected = {'Harry': 0, 'Simon': 1, 'Peter': 1, 'Betty': 2, 'Louise': 3}
    nexus = NexusReader.from_string("""#NEXUS
    Begin data;
    Dimensions ntax=5 nchar=3;
    Format datatype=standard symbols="01" gap=-;
    Matrix
    Harry              010  [No missing]
    Simon              0?0  [one missing]
    Peter              0-0  [one gap]
    Betty              ?-1  [one gap and one missing = 2 missing]
    Louise             ???  [three missing]
        ;
    End;
    """)
    missing = count_site_values(nexus)
    for taxon in missing:
        assert missing[taxon] == expected[taxon]
示例#15
0
def get_reader(args, many=False, required_blocks=None):
    res = []
    for f in (args.filename if many else [args.filename]):
        if f is None:
            res.append(NexusReader.from_string(sys.stdin.read()))
        else:
            res.append(NexusReader.from_file(f))
    if required_blocks:
        for nex in res:
            for block in required_blocks:
                if not getattr(nex, block, None):
                    raise ParserError(
                        colored(
                            'Nexus file {0} has no {1} block'.format(
                                nex.filename, block),
                            'red',
                            attrs=['bold'],
                        ))
    return res if many else res[0]
示例#16
0
def test_regression_include_invisible_taxa():
    """Include taxa that have no entries"""
    data = """
    #NEXUS
    
    BEGIN DATA;
        DIMENSIONS  NTAX=15 NCHAR=7;
        FORMAT DATATYPE=STANDARD MISSING=? GAP=- INTERLEAVE=YES;
    MATRIX
    
    Gertrude                0000001
    Debbie                  0001000
    Zarathrustra            0000000
    Christie                0010000
    Benny                   0100000
    Bertha                  0100000
    Craig                   0010000
    Fannie-May              0000010
    Charles                 0010000
    Annik                   1000000
    Frank                   0000010
    Amber                   1000000
    Andreea                 1000000
    Edward                  0000100
    Donald                  0001000
    ;
    END;
    """

    nex = NexusReader.from_string(data)
    msnex = multistatise(nex)

    for taxon, sites in msnex.data.matrix.items():
        if taxon[0] == 'Z':
            continue  # will check later

        # first letter of taxa name is the expected character state
        assert taxon[0] == sites[0], \
            "%s should be %s not %s" % (taxon, taxon[0], sites[0])
    # deal with completely missing taxa
    assert 'Zarathrustra' in msnex.data.matrix
    assert msnex.data.matrix['Zarathrustra'][0] == '?'
示例#17
0
def test_to_binary_alphabetical():
    """Test Nexus -> Binary: alphabetical states"""
    nex = binarise(
        NexusReader.from_string("""
        #NEXUS
        BEGIN DATA;
            DIMENSIONS NTAX=5 NCHAR=2;
            FORMAT MISSING=? GAP=- SYMBOLS="ABCDE";
            CHARSTATELABELS
            1 ALL,
            2 ASHES
        ;
        MATRIX
        Mehri        AB
        Geto         AB
        Walani       A-
        Hebrew       A(C,D)
        Soqotri      BC
        ;
        END;
        """))
    nexus = nex.make_nexus(charblock=True, interleave=False)

    assert re.search(r"\s+NCHAR=5;", nexus)

    assert re.search(r"1\s+ALL_A,", nexus)
    assert re.search(r"2\s+ALL_B,", nexus)
    assert re.search(r"3\s+ASHES_B,", nexus)
    assert re.search(r"4\s+ASHES_C,", nexus)
    assert re.search(r"5\s+ASHES_D", nexus)

    assert re.search(r"Geto\s+10100", nexus)
    assert re.search(r"Hebrew\s+10011", nexus)
    assert re.search(r"Mehri\s+10100", nexus)
    assert re.search(r"Soqotri\s+01010", nexus)
    assert re.search(r"Walani\s+10000", nexus)