示例#1
0
    def setUp(self):
        self.nex1 = NexusReader()
        self.nex1.read_string("""Begin data;
            Dimensions ntax=2 nchar=1;
            Format datatype=standard symbols="12" gap=-;
            Matrix
            Harry              1
            Simon              2
            ;""")
        # set short_filename to test that functionality. If `combine_nexuses`
        # doesn't use `short_filename`, then the nex1 characters will be
        # identified as 1.xx, rather than 0.xx
        self.nex1.short_filename = '0'

        self.nex2 = NexusReader()
        self.nex2.read_string("""Begin data;
            Dimensions ntax=2 nchar=1;
            Format datatype=standard symbols="34" gap=-;
            Matrix
            Harry              3
            Simon              4
            ;""")
        self.nex3 = NexusReader()
        self.nex3.read_string("""Begin data;
            Dimensions ntax=3 nchar=1;
            Format datatype=standard symbols="345" gap=-;
            Matrix
            Betty              3
            Boris              4
            Simon              5
            ;""")
def snpMatrixGenerator(sourceFile,
                       destFile,
                       recordAll=False,
                       recordRandomSample=True):
    if recordAll == recordRandomSample:
        print "Invalid Options"
        exit()

    destNexus = NexusWriter()

    block = ""
    snpCol = 0
    for line in sourceFile:
        if all(x in line.lower() for x in {"begin", "data"}):
            sourceNexus = NexusReader()
            sourceNexus.read_string(block)
            if "data" in sourceNexus.blocks:
                snpCol = _findDifferences(sourceNexus, destNexus, snpCol,
                                          recordAll, recordRandomSample)
            block = line
        else:
            block += line

    sourceNexus = NexusReader()
    sourceNexus.read_string(block)
    if "data" in sourceNexus.blocks:
        snpCol = _findDifferences(sourceNexus, destNexus, snpCol, recordAll,
                                  recordRandomSample)

    destFile.write(destNexus.make_nexus() + '\n')

    destFile.close()
    sourceFile.close()
示例#3
0
    def validate(self):
        # check scaling
        if self.details.get('scaling') not in SCALINGS:
            warn("Unknown Scaling '%s'" % self.details.get('scaling'))

        # check taxa file
        if len(self.taxa) == 0:
            warn("No taxa defined")
        if self.taxa and not len(self.taxa.keys()):
            warn("Empty taxa file")

        # check source file
        if not self.source:
            warn("No source bibtex")
        if self.source and len(self.source.read_text()) == 0:
            warn("Empty bibtex file")

        # check trees
        for tf in [self.summary, self.posterior]:
            if tf and tf.exists():
                nex = NexusReader(tf)
                if not nex.trees:
                    warn("No trees in %s.%s!" %
                         (self.details.get('id', '?'), tf.stem))
                # are all the taxa in the tree listed in the taxa table?
                unknown = [t for t in nex.trees.taxa if t not in self.taxa]
                if len(unknown):
                    warn("Unknown tips in %s.%s: %r" %
                         (self.details.get('id', '?'), tf.stem, unknown))

        # if we have a data file, the taxa should match the taxa.csv
        if self.nexus and self.taxa:
            nex = NexusReader(self.nexus)
            if not nex.data:
                warn("No data in %s data.nex!" % self.details.get('id', '?'))
            else:
                unknown = [t for t in nex.data.taxa if t not in self.taxa]
                if len(unknown):
                    warn("Unknown tips in %s data.nex: %r" %
                         (self.details.get('id', '?'), unknown))

        # if we have characters they should match the nexus
        if self.characters and self.nexus:
            nex = NexusReader(self.nexus)
            if not nex.data or not nex.data.taxa:
                warn("No data in %s.%s!" %
                     (self.details.get('id', '?'), tf.stem))
            else:
                nchar = [
                    i for i, r in enumerate(read_csv(self.characters), 1)
                ][-1]
                if nchar != nex.data.nchar:
                    warn("characters.csv incorrect in %s - expected %d, got %d"
                         %
                         (self.details.get('id', '?'), nex.data.nchar, nchar))
示例#4
0
    def test_combine_with_character_labels(self):
        n1 = NexusReader()
        n1.read_string("""
            BEGIN DATA;
                DIMENSIONS NTAX=3 NCHAR=3;
                FORMAT DATATYPE=STANDARD MISSING=0 GAP=-  SYMBOLS="123";
                CHARSTATELABELS
                    1 char1,
                    2 char2,
                    3 char3
            ;
            MATRIX
            Tax1         123
            Tax2         123
            Tax3         123
            ;
            """)
        n2 = NexusReader()
        n2.read_string("""
            BEGIN DATA;
                DIMENSIONS NTAX=3 NCHAR=3;
                FORMAT DATATYPE=STANDARD MISSING=0 GAP=-  SYMBOLS="456";
                CHARSTATELABELS
                    1 char1,
                    2 char2,
                    3 char3
            ;
            MATRIX
            Tax1         456
            Tax2         456
            Tax3         456
            ;
            """)
        newnex = combine_nexuses([n1, n2])
        assert re.search(r"""\bNTAX=3\b""", newnex.write())
        assert re.search(r"""\bNCHAR=6\b""", newnex.write())
        assert re.search(r'\sSYMBOLS="123456"[\s;]', newnex.write())

        for tax in [1, 2, 3]:
            assert re.search(r"""\bTax%d\s+123456\b""" % tax, newnex.write())

        counter = 1
        for nex_id in [1, 2]:
            for char_id in [1, 2, 3]:
                assert re.search(
                    r"""\b%d\s+%d.char%d\b""" % (counter, nex_id, char_id),
                    newnex.write(charblock=True))
                counter += 1
示例#5
0
 def test_interleave_matrix_parsing(self):
     nexus = NexusReader(os.path.join(EXAMPLE_DIR, 'example3.nex'))
     assert nexus.data.ntaxa == 2 == len(nexus.data.taxa)
     assert nexus.data.nchar == 6
     for taxon, blocks in nexus.data:
         for i in range(0, nexus.data.nchar):
             assert blocks[i] == str(i)
    def test_find_unique_sites_2(self):
        nexus = NexusReader()
        nexus.read_string("""Begin data;
        Dimensions ntax=4 nchar=7;
        Format datatype=standard symbols="01" gap=-;
        Matrix
        Harry              10000?-
        Simon              1100011
        Betty              1110000
        Louise             1111000
        ;""")
        unique = find_unique_sites(nexus)

        # site 1 should NOT be in the uniques (3x1 and 1x0)
        # - i.e. are we ignoring sites with ONE absent taxon
        assert 1 not in unique
        # these should also NOT be in unique
        assert 0 not in unique
        assert 2 not in unique
        assert 4 not in unique  # constant
        # site 3 is a simple unique site - check we found it
        assert 3 in unique
        # sites 5 and 6 should also be unique
        # - are we handling missing data appropriately?
        assert 5 in unique
        assert 6 in unique
示例#7
0
def test_interleave_matrix_parsing(examples):
    nexus = NexusReader(str(examples / 'example3.nex'))
    assert nexus.data.ntaxa == 2 == len(nexus.data.taxa)
    assert nexus.data.nchar == 6
    for taxon, blocks in nexus.data:
        for i in range(0, nexus.data.nchar):
            assert blocks[i] == str(i), "Error for %s:%d" % (taxon, i)
示例#8
0
 def test_detranslate(self):
     assert self.nex.trees._been_detranslated == False
     self.nex.trees.detranslate()
     # should NOW be the same as tree 0 in example.trees
     other_tree_file = NexusReader(
         os.path.join(EXAMPLE_DIR, 'example.trees'))
     assert other_tree_file.trees[0] == self.nex.trees[0]
示例#9
0
 def test_anonymise_data_with_interleave(self):
     nex = NexusReader(os.path.join(EXAMPLE_DIR, 'example3.nex'))
     nex = anonymise(nex)
     for old_taxon in ['Harry', 'Simon']:
         assert old_taxon not in nex.data.matrix, '%s should have been anonymised' % old_taxon
     assert nex.data.matrix['6eb7148a2d4155085e517979410b9f23'] == ['0', '1', '2', '3', '4', '5']
     assert nex.data.matrix['698de77f637e7fae18ead22f2172102a'] == ['0', '1', '2', '3', '4', '5']
示例#10
0
 def test_count_other_values_two(self):
     expected = {
         'Harry': 1,
         'Simon': 2,
         'Peter': 1,
         'Betty': 0,
         'Louise': 0
     }
     nexus = NexusReader()
     nexus.read_string("""#NEXUS
     Begin data;
     Dimensions ntax=5 nchar=3;
     Format datatype=standard symbols="01" gap=-;
     Matrix
     Harry              0A0  [No missing]
     Simon              0AB  [one missing]
     Peter              0-B  [one gap]
     Betty              ?-1  [one gap and one missing = 2 missing]
     Louise             ???  [three missing]
         ;
     End;
     """)
     count = count_site_values(nexus, ['A', 'B'])
     for taxon in count:
         assert count[taxon] == expected[taxon]
示例#11
0
文件: bPTP.py 项目: tokebe/niclassify
    def __init__(self,
                 filename,
                 ftype="nexus",
                 reroot=False,
                 method="H1",
                 seed=1234,
                 thinning=100,
                 sampling=10000,
                 burnin=0.1,
                 firstktrees=0,
                 taxa_order=[]):
        self.method = method
        self.seed = seed
        self.thinning = thinning
        self.sampling = sampling
        self.burnin = burnin
        self.firstktrees = firstktrees
        if ftype == "nexus":
            self.nexus = NexusReader(filename)
            self.nexus.blocks['trees'].detranslate()
            self.trees = self.nexus.trees.trees
        else:
            self.trees = self.raxmlTreeParser(filename)

        if self.firstktrees > 0 and self.firstktrees <= len(self.trees):
            self.trees = self.trees[:self.firstktrees]

        self.taxa_order = taxa_order
        if len(self.taxa_order) == 0:
            self.taxa_order = Tree(self.trees[0], format=1).get_leaf_names()
        self.numtaxa = len(self.taxa_order)
        self.numtrees = len(self.trees)
        self.reroot = reroot
示例#12
0
 def test_run_deltree(self):
     nex = NexusReader(os.path.join(EXAMPLE_DIR, 'example.trees'))
     new_nex = run_deltree('2', nex, do_print=False)
     assert len(new_nex.trees.trees) == 2
     assert new_nex.trees.ntrees == 2
     assert new_nex.trees[0].startswith('tree tree.0.1065.603220')
     assert new_nex.trees[1].startswith('tree tree.20000.883.396049')
示例#13
0
def _load_tree(name, fname, get_language, verbose=False, phylo=None):
    # now add languages to the tree
    reader = NexusReader(fname.as_posix())

    # make a tree if not exists. Use the name of the tree
    tree, created = LanguageTree.objects.get_or_create(name=name)
    if not created:
        return 0

    if phylo:
        source = phylo.as_source()
        source.save()
        tree.source = source

    with open(fname.as_posix(), 'rb') as f:
        tree.file = ContentFile(f.read())
        tree.save()

    # Remove '[&R]' from newick string
    reader.trees.detranslate()
    newick = re.sub(r'\[.*?\]', '', reader.trees.trees[0])
    try:
        newick = newick[newick.index('=') + 1:]
    except ValueError:  # pragma: no cover
        newick = newick

    if verbose:  # pragma: no cover
        logging.info("Formatting newick string %s" % (newick))
        
    tree.newick_string = str(newick)
    if phylo:
        tree.save()
        return 1

    # phylogeny taxa require reading of CSV mapping files, glottolog trees do not
    for taxon_name in reader.trees.taxa:
        if taxon_name is '1':
            continue  # pragma: no cover

        languages = get_language(taxon_name)
        if not languages:
            continue

        for l in languages:
            society = Society.objects.filter(language=l)
            label, created = LanguageTreeLabels.objects.get_or_create(
                languageTree=tree,
                label=taxon_name,
                language=l
            )
            for s in society:
                LanguageTreeLabelsSequence.objects.get_or_create(
                    society=s,
                    labels=label,
                    fixed_order=0
                )
            tree.taxa.add(label)
    tree.save()
    return 1
示例#14
0
 def test_anonymise_translated_trees(self):
     nex = NexusReader(os.path.join(EXAMPLE_DIR, 'example-translated.trees'))
     nex = anonymise(nex)
     expected = ['Chris', 'Bruce', 'Tom', 'Henry', 'Timothy', 'Mark', 'Simon', 'Fred', 'Kevin', 'Roger', 'Michael', 'Andrew', 'David']
     assert len(nex.trees.taxa) == len(expected)
     for taxon in expected:
         hashtaxon = hash(os.path.join(EXAMPLE_DIR, 'example-translated.trees'), taxon)
         assert hashtaxon in nex.trees.taxa
示例#15
0
 def test_regression(self):
     nex = NexusReader(
         os.path.join(REGRESSION_DIR, 'white_space_in_matrix.nex'))
     assert nex.blocks['data'].nchar == 2
     assert nex.blocks['data'].matrix['Harry'] == ['0', '0']
     assert nex.blocks['data'].matrix['Simon'] == ['0', '1']
     assert nex.blocks['data'].matrix['Betty'] == ['1', '0']
     assert nex.blocks['data'].matrix['Louise'] == ['1', '1']
示例#16
0
 def test_find_constant_sites_2(self):
     nexus = NexusReader(os.path.join(EXAMPLE_DIR, 'example2.nex'))
     const = find_constant_sites(nexus)
     assert len(const) == 4
     assert 0 in const
     assert 1 in const
     assert 2 in const
     assert 3 in const
示例#17
0
 def test_read_string(self):
     handle = open(os.path.join(EXAMPLE_DIR, 'example.nex'))
     data = handle.read()
     handle.close()
     nex = NexusReader()
     nex.read_string(data)
     assert 'data' in nex.blocks
     assert 'Simon' in nex.blocks['data'].matrix
示例#18
0
 def test_run_resample_1(self):
     # shouldn't resample anything..
     nex = NexusReader(os.path.join(EXAMPLE_DIR, 'example.trees'))
     new_nex = run_resample('1', nex, do_print=False)
     assert len(new_nex.trees.trees) == 3
     assert new_nex.trees.ntrees == 3
     assert new_nex.trees[0].startswith('tree tree.0.1065.603220')
     assert new_nex.trees[1].startswith('tree tree.10000.874.808756')
     assert new_nex.trees[2].startswith('tree tree.20000.883.396049')
示例#19
0
 def test_notimplemented_exception(self):
     with self.assertRaises(NotImplementedError):
         nex = NexusReader()
         nex.read_string("""Begin something;
             Dimensions ntax=5 nchar=1;
             Format datatype=standard symbols="01" gap=-;
             Matrix
             Harry              1
             ;""")
         anonymise(nex)
示例#20
0
 def test_anonymise_data_with_interleave(self):
     filename = os.path.join(EXAMPLE_DIR, 'example3.nex')
     nex = anonymise(NexusReader(filename), salt="test")
     for old_taxon in ['Harry', 'Simon']:
         assert old_taxon not in nex.data.matrix, \
             '%s should have been anonymised' % old_taxon
         h = hash("test", old_taxon)
         assert h in nex.data.matrix
         assert h in nex.data.taxa
         assert nex.data.matrix[h] == ['0', '1', '2', '3', '4', '5']
 def setUp(self):
     self.nex = NexusReader()
     self.nex.read_string("""Begin data;
         Dimensions ntax=3 nchar=6;
         Format datatype=standard symbols="12" gap=-;
         Matrix
         Harry              0111-?
         Simon              0011-?
         Elvis              0001-?
         ;""")
示例#22
0
 def test_anonymise_data(self):
     nex = NexusReader(os.path.join(EXAMPLE_DIR, 'example.nex'))
     nex = anonymise(nex)
     for old_taxon in ['Harry', 'Simon', 'Betty', 'Louise']:
         assert old_taxon not in nex.data.matrix, '%s should have been anonymised' % old_taxon
     
     assert nex.data.matrix['894a76c65225a9812d31ff75edf38feb'] == ['1', '0']
     assert nex.data.matrix['a0434190848c0d64332dce12a8a27961'] == ['0', '0']
     assert nex.data.matrix['bbf0da40d536d862e184a6eccb433a73'] == ['0', '1']
     assert nex.data.matrix['d24eb4091c14b87b6cd0bd94fd0704be'] == ['1', '1']
示例#23
0
 def test_error_on_too_many_states(self):
     self.nex = NexusReader()
     self.nex.read_string("""
     Begin data;
     Dimensions ntax=1 nchar=30;
     Format datatype=standard symbols="01" gap=-;
     Matrix
     A   111111111111111111111111111111
     ;""")
     with self.assertRaises(ValueError):
         multistatise(self.nex)
示例#24
0
 def test_anonymise_data_with_labels(self):
     filename = os.path.join(EXAMPLE_DIR, 'example2.nex')
     nex = anonymise(NexusReader(filename), salt="test")
     for old_taxon in ['John', 'Paul', 'George', 'Ringo']:
         assert old_taxon not in nex.data.matrix, \
             '%s should have been anonymised' % old_taxon
         h = hash("test", old_taxon)
         # check data block
         assert h in nex.data.matrix, "Missing %s" % h
         assert nex.data.matrix[h] == ['a', 'c', 't', 'g']
         # check taxa block
         assert h in nex.taxa.taxa
示例#25
0
 def setUp(self):
     self.nex = NexusReader()
     self.nex.read_string("""
     Begin data;
     Dimensions ntax=4 nchar=4;
     Format datatype=standard symbols="01" gap=-;
     Matrix
     Harry              1000
     Simon              0100
     Betty              0010
     Louise             0001
     ;""")
     self.nex = multistatise(self.nex)
示例#26
0
 def test_incorrect_dimensions_warnings_nchar(self):
     with warnings.catch_warnings(record=True) as w:
         nex = NexusReader()
         nex.read_string("""Begin data;
             Dimensions ntax=1 nchar=5;
             Format datatype=standard symbols="01" gap=-;
             Matrix
             Harry              1
             ;""")
         assert len(w) == 1, 'Expected 1 warning, got %r' % w
         assert issubclass(w[-1].category, UserWarning)
         assert "Expected" in str(w[-1].message)
         assert nex.data.nchar == 1
示例#27
0
 def setUp(self):
     self.nex = NexusReader()
     self.nex.read_string("""
     Begin data;
     Dimensions ntax=4 nchar=8;
     Format datatype=standard symbols="01" gap=-;
     Matrix
     [                  01234567]
     Harry              01000000
     Simon              0010000-
     Betty              00010-0?
     Louise             000010?0
     ;""")
     self.found = check_zeros(self.nex)
示例#28
0
 def test_read_gzip_file(self):
     # first, MAKE a gzip file
     import gzip
     from tempfile import NamedTemporaryFile
     tmp = NamedTemporaryFile(delete=False, suffix=".gz")
     tmp.close()
     with open(os.path.join(EXAMPLE_DIR, 'example.nex'), 'rU') as f_in:
         with gzip.open(tmp.name, 'wb') as f_out:
             f_out.writelines(f_in)
     # test it's ok
     nex = NexusReader(tmp.name)
     assert 'data' in nex.blocks
     assert 'Simon' in nex.blocks['data'].matrix
     os.unlink(tmp.name)  # cleanup
示例#29
0
 def setUp(self):
     self.nex = NexusReader()
     self.nex.read_string(
     """Begin data;
     Dimensions ntax=3 nchar=2;
     Format datatype=standard symbols="01" gap=-;
     Charstatelabels
         1 char1, 2 char2;
     Matrix
     Maori               14
     Dutch               25
     Latin               36
     ;""")
     self.nex = binarise(self.nex)
示例#30
0
    def test_anonymise_taxa(self):
        filename = os.path.join(EXAMPLE_DIR, 'example.nex')
        nex = anonymise(NexusReader(filename), salt="test")
        for old_taxon in ['Harry', 'Simon', 'Betty', 'Louise']:
            assert old_taxon not in nex.data.matrix, \
                '%s should have been anonymised' % old_taxon

        assert nex.data.matrix[hash("test", "Betty")] == \
            ['1', '0']
        assert nex.data.matrix[hash("test", "Harry")] == \
            ['0', '0']
        assert nex.data.matrix[hash("test", "Simon")] == \
            ['0', '1']
        assert nex.data.matrix[hash("test", "Louise")] == \
            ['1', '1']