def setUp(self): self.nex1 = NexusReader() self.nex1.read_string("""Begin data; Dimensions ntax=2 nchar=1; Format datatype=standard symbols="12" gap=-; Matrix Harry 1 Simon 2 ;""") # set short_filename to test that functionality. If `combine_nexuses` # doesn't use `short_filename`, then the nex1 characters will be # identified as 1.xx, rather than 0.xx self.nex1.short_filename = '0' self.nex2 = NexusReader() self.nex2.read_string("""Begin data; Dimensions ntax=2 nchar=1; Format datatype=standard symbols="34" gap=-; Matrix Harry 3 Simon 4 ;""") self.nex3 = NexusReader() self.nex3.read_string("""Begin data; Dimensions ntax=3 nchar=1; Format datatype=standard symbols="345" gap=-; Matrix Betty 3 Boris 4 Simon 5 ;""")
def snpMatrixGenerator(sourceFile, destFile, recordAll=False, recordRandomSample=True): if recordAll == recordRandomSample: print "Invalid Options" exit() destNexus = NexusWriter() block = "" snpCol = 0 for line in sourceFile: if all(x in line.lower() for x in {"begin", "data"}): sourceNexus = NexusReader() sourceNexus.read_string(block) if "data" in sourceNexus.blocks: snpCol = _findDifferences(sourceNexus, destNexus, snpCol, recordAll, recordRandomSample) block = line else: block += line sourceNexus = NexusReader() sourceNexus.read_string(block) if "data" in sourceNexus.blocks: snpCol = _findDifferences(sourceNexus, destNexus, snpCol, recordAll, recordRandomSample) destFile.write(destNexus.make_nexus() + '\n') destFile.close() sourceFile.close()
def validate(self): # check scaling if self.details.get('scaling') not in SCALINGS: warn("Unknown Scaling '%s'" % self.details.get('scaling')) # check taxa file if len(self.taxa) == 0: warn("No taxa defined") if self.taxa and not len(self.taxa.keys()): warn("Empty taxa file") # check source file if not self.source: warn("No source bibtex") if self.source and len(self.source.read_text()) == 0: warn("Empty bibtex file") # check trees for tf in [self.summary, self.posterior]: if tf and tf.exists(): nex = NexusReader(tf) if not nex.trees: warn("No trees in %s.%s!" % (self.details.get('id', '?'), tf.stem)) # are all the taxa in the tree listed in the taxa table? unknown = [t for t in nex.trees.taxa if t not in self.taxa] if len(unknown): warn("Unknown tips in %s.%s: %r" % (self.details.get('id', '?'), tf.stem, unknown)) # if we have a data file, the taxa should match the taxa.csv if self.nexus and self.taxa: nex = NexusReader(self.nexus) if not nex.data: warn("No data in %s data.nex!" % self.details.get('id', '?')) else: unknown = [t for t in nex.data.taxa if t not in self.taxa] if len(unknown): warn("Unknown tips in %s data.nex: %r" % (self.details.get('id', '?'), unknown)) # if we have characters they should match the nexus if self.characters and self.nexus: nex = NexusReader(self.nexus) if not nex.data or not nex.data.taxa: warn("No data in %s.%s!" % (self.details.get('id', '?'), tf.stem)) else: nchar = [ i for i, r in enumerate(read_csv(self.characters), 1) ][-1] if nchar != nex.data.nchar: warn("characters.csv incorrect in %s - expected %d, got %d" % (self.details.get('id', '?'), nex.data.nchar, nchar))
def test_combine_with_character_labels(self): n1 = NexusReader() n1.read_string(""" BEGIN DATA; DIMENSIONS NTAX=3 NCHAR=3; FORMAT DATATYPE=STANDARD MISSING=0 GAP=- SYMBOLS="123"; CHARSTATELABELS 1 char1, 2 char2, 3 char3 ; MATRIX Tax1 123 Tax2 123 Tax3 123 ; """) n2 = NexusReader() n2.read_string(""" BEGIN DATA; DIMENSIONS NTAX=3 NCHAR=3; FORMAT DATATYPE=STANDARD MISSING=0 GAP=- SYMBOLS="456"; CHARSTATELABELS 1 char1, 2 char2, 3 char3 ; MATRIX Tax1 456 Tax2 456 Tax3 456 ; """) newnex = combine_nexuses([n1, n2]) assert re.search(r"""\bNTAX=3\b""", newnex.write()) assert re.search(r"""\bNCHAR=6\b""", newnex.write()) assert re.search(r'\sSYMBOLS="123456"[\s;]', newnex.write()) for tax in [1, 2, 3]: assert re.search(r"""\bTax%d\s+123456\b""" % tax, newnex.write()) counter = 1 for nex_id in [1, 2]: for char_id in [1, 2, 3]: assert re.search( r"""\b%d\s+%d.char%d\b""" % (counter, nex_id, char_id), newnex.write(charblock=True)) counter += 1
def test_interleave_matrix_parsing(self): nexus = NexusReader(os.path.join(EXAMPLE_DIR, 'example3.nex')) assert nexus.data.ntaxa == 2 == len(nexus.data.taxa) assert nexus.data.nchar == 6 for taxon, blocks in nexus.data: for i in range(0, nexus.data.nchar): assert blocks[i] == str(i)
def test_find_unique_sites_2(self): nexus = NexusReader() nexus.read_string("""Begin data; Dimensions ntax=4 nchar=7; Format datatype=standard symbols="01" gap=-; Matrix Harry 10000?- Simon 1100011 Betty 1110000 Louise 1111000 ;""") unique = find_unique_sites(nexus) # site 1 should NOT be in the uniques (3x1 and 1x0) # - i.e. are we ignoring sites with ONE absent taxon assert 1 not in unique # these should also NOT be in unique assert 0 not in unique assert 2 not in unique assert 4 not in unique # constant # site 3 is a simple unique site - check we found it assert 3 in unique # sites 5 and 6 should also be unique # - are we handling missing data appropriately? assert 5 in unique assert 6 in unique
def test_interleave_matrix_parsing(examples): nexus = NexusReader(str(examples / 'example3.nex')) assert nexus.data.ntaxa == 2 == len(nexus.data.taxa) assert nexus.data.nchar == 6 for taxon, blocks in nexus.data: for i in range(0, nexus.data.nchar): assert blocks[i] == str(i), "Error for %s:%d" % (taxon, i)
def test_detranslate(self): assert self.nex.trees._been_detranslated == False self.nex.trees.detranslate() # should NOW be the same as tree 0 in example.trees other_tree_file = NexusReader( os.path.join(EXAMPLE_DIR, 'example.trees')) assert other_tree_file.trees[0] == self.nex.trees[0]
def test_anonymise_data_with_interleave(self): nex = NexusReader(os.path.join(EXAMPLE_DIR, 'example3.nex')) nex = anonymise(nex) for old_taxon in ['Harry', 'Simon']: assert old_taxon not in nex.data.matrix, '%s should have been anonymised' % old_taxon assert nex.data.matrix['6eb7148a2d4155085e517979410b9f23'] == ['0', '1', '2', '3', '4', '5'] assert nex.data.matrix['698de77f637e7fae18ead22f2172102a'] == ['0', '1', '2', '3', '4', '5']
def test_count_other_values_two(self): expected = { 'Harry': 1, 'Simon': 2, 'Peter': 1, 'Betty': 0, 'Louise': 0 } nexus = NexusReader() nexus.read_string("""#NEXUS Begin data; Dimensions ntax=5 nchar=3; Format datatype=standard symbols="01" gap=-; Matrix Harry 0A0 [No missing] Simon 0AB [one missing] Peter 0-B [one gap] Betty ?-1 [one gap and one missing = 2 missing] Louise ??? [three missing] ; End; """) count = count_site_values(nexus, ['A', 'B']) for taxon in count: assert count[taxon] == expected[taxon]
def __init__(self, filename, ftype="nexus", reroot=False, method="H1", seed=1234, thinning=100, sampling=10000, burnin=0.1, firstktrees=0, taxa_order=[]): self.method = method self.seed = seed self.thinning = thinning self.sampling = sampling self.burnin = burnin self.firstktrees = firstktrees if ftype == "nexus": self.nexus = NexusReader(filename) self.nexus.blocks['trees'].detranslate() self.trees = self.nexus.trees.trees else: self.trees = self.raxmlTreeParser(filename) if self.firstktrees > 0 and self.firstktrees <= len(self.trees): self.trees = self.trees[:self.firstktrees] self.taxa_order = taxa_order if len(self.taxa_order) == 0: self.taxa_order = Tree(self.trees[0], format=1).get_leaf_names() self.numtaxa = len(self.taxa_order) self.numtrees = len(self.trees) self.reroot = reroot
def test_run_deltree(self): nex = NexusReader(os.path.join(EXAMPLE_DIR, 'example.trees')) new_nex = run_deltree('2', nex, do_print=False) assert len(new_nex.trees.trees) == 2 assert new_nex.trees.ntrees == 2 assert new_nex.trees[0].startswith('tree tree.0.1065.603220') assert new_nex.trees[1].startswith('tree tree.20000.883.396049')
def _load_tree(name, fname, get_language, verbose=False, phylo=None): # now add languages to the tree reader = NexusReader(fname.as_posix()) # make a tree if not exists. Use the name of the tree tree, created = LanguageTree.objects.get_or_create(name=name) if not created: return 0 if phylo: source = phylo.as_source() source.save() tree.source = source with open(fname.as_posix(), 'rb') as f: tree.file = ContentFile(f.read()) tree.save() # Remove '[&R]' from newick string reader.trees.detranslate() newick = re.sub(r'\[.*?\]', '', reader.trees.trees[0]) try: newick = newick[newick.index('=') + 1:] except ValueError: # pragma: no cover newick = newick if verbose: # pragma: no cover logging.info("Formatting newick string %s" % (newick)) tree.newick_string = str(newick) if phylo: tree.save() return 1 # phylogeny taxa require reading of CSV mapping files, glottolog trees do not for taxon_name in reader.trees.taxa: if taxon_name is '1': continue # pragma: no cover languages = get_language(taxon_name) if not languages: continue for l in languages: society = Society.objects.filter(language=l) label, created = LanguageTreeLabels.objects.get_or_create( languageTree=tree, label=taxon_name, language=l ) for s in society: LanguageTreeLabelsSequence.objects.get_or_create( society=s, labels=label, fixed_order=0 ) tree.taxa.add(label) tree.save() return 1
def test_anonymise_translated_trees(self): nex = NexusReader(os.path.join(EXAMPLE_DIR, 'example-translated.trees')) nex = anonymise(nex) expected = ['Chris', 'Bruce', 'Tom', 'Henry', 'Timothy', 'Mark', 'Simon', 'Fred', 'Kevin', 'Roger', 'Michael', 'Andrew', 'David'] assert len(nex.trees.taxa) == len(expected) for taxon in expected: hashtaxon = hash(os.path.join(EXAMPLE_DIR, 'example-translated.trees'), taxon) assert hashtaxon in nex.trees.taxa
def test_regression(self): nex = NexusReader( os.path.join(REGRESSION_DIR, 'white_space_in_matrix.nex')) assert nex.blocks['data'].nchar == 2 assert nex.blocks['data'].matrix['Harry'] == ['0', '0'] assert nex.blocks['data'].matrix['Simon'] == ['0', '1'] assert nex.blocks['data'].matrix['Betty'] == ['1', '0'] assert nex.blocks['data'].matrix['Louise'] == ['1', '1']
def test_find_constant_sites_2(self): nexus = NexusReader(os.path.join(EXAMPLE_DIR, 'example2.nex')) const = find_constant_sites(nexus) assert len(const) == 4 assert 0 in const assert 1 in const assert 2 in const assert 3 in const
def test_read_string(self): handle = open(os.path.join(EXAMPLE_DIR, 'example.nex')) data = handle.read() handle.close() nex = NexusReader() nex.read_string(data) assert 'data' in nex.blocks assert 'Simon' in nex.blocks['data'].matrix
def test_run_resample_1(self): # shouldn't resample anything.. nex = NexusReader(os.path.join(EXAMPLE_DIR, 'example.trees')) new_nex = run_resample('1', nex, do_print=False) assert len(new_nex.trees.trees) == 3 assert new_nex.trees.ntrees == 3 assert new_nex.trees[0].startswith('tree tree.0.1065.603220') assert new_nex.trees[1].startswith('tree tree.10000.874.808756') assert new_nex.trees[2].startswith('tree tree.20000.883.396049')
def test_notimplemented_exception(self): with self.assertRaises(NotImplementedError): nex = NexusReader() nex.read_string("""Begin something; Dimensions ntax=5 nchar=1; Format datatype=standard symbols="01" gap=-; Matrix Harry 1 ;""") anonymise(nex)
def test_anonymise_data_with_interleave(self): filename = os.path.join(EXAMPLE_DIR, 'example3.nex') nex = anonymise(NexusReader(filename), salt="test") for old_taxon in ['Harry', 'Simon']: assert old_taxon not in nex.data.matrix, \ '%s should have been anonymised' % old_taxon h = hash("test", old_taxon) assert h in nex.data.matrix assert h in nex.data.taxa assert nex.data.matrix[h] == ['0', '1', '2', '3', '4', '5']
def setUp(self): self.nex = NexusReader() self.nex.read_string("""Begin data; Dimensions ntax=3 nchar=6; Format datatype=standard symbols="12" gap=-; Matrix Harry 0111-? Simon 0011-? Elvis 0001-? ;""")
def test_anonymise_data(self): nex = NexusReader(os.path.join(EXAMPLE_DIR, 'example.nex')) nex = anonymise(nex) for old_taxon in ['Harry', 'Simon', 'Betty', 'Louise']: assert old_taxon not in nex.data.matrix, '%s should have been anonymised' % old_taxon assert nex.data.matrix['894a76c65225a9812d31ff75edf38feb'] == ['1', '0'] assert nex.data.matrix['a0434190848c0d64332dce12a8a27961'] == ['0', '0'] assert nex.data.matrix['bbf0da40d536d862e184a6eccb433a73'] == ['0', '1'] assert nex.data.matrix['d24eb4091c14b87b6cd0bd94fd0704be'] == ['1', '1']
def test_error_on_too_many_states(self): self.nex = NexusReader() self.nex.read_string(""" Begin data; Dimensions ntax=1 nchar=30; Format datatype=standard symbols="01" gap=-; Matrix A 111111111111111111111111111111 ;""") with self.assertRaises(ValueError): multistatise(self.nex)
def test_anonymise_data_with_labels(self): filename = os.path.join(EXAMPLE_DIR, 'example2.nex') nex = anonymise(NexusReader(filename), salt="test") for old_taxon in ['John', 'Paul', 'George', 'Ringo']: assert old_taxon not in nex.data.matrix, \ '%s should have been anonymised' % old_taxon h = hash("test", old_taxon) # check data block assert h in nex.data.matrix, "Missing %s" % h assert nex.data.matrix[h] == ['a', 'c', 't', 'g'] # check taxa block assert h in nex.taxa.taxa
def setUp(self): self.nex = NexusReader() self.nex.read_string(""" Begin data; Dimensions ntax=4 nchar=4; Format datatype=standard symbols="01" gap=-; Matrix Harry 1000 Simon 0100 Betty 0010 Louise 0001 ;""") self.nex = multistatise(self.nex)
def test_incorrect_dimensions_warnings_nchar(self): with warnings.catch_warnings(record=True) as w: nex = NexusReader() nex.read_string("""Begin data; Dimensions ntax=1 nchar=5; Format datatype=standard symbols="01" gap=-; Matrix Harry 1 ;""") assert len(w) == 1, 'Expected 1 warning, got %r' % w assert issubclass(w[-1].category, UserWarning) assert "Expected" in str(w[-1].message) assert nex.data.nchar == 1
def setUp(self): self.nex = NexusReader() self.nex.read_string(""" Begin data; Dimensions ntax=4 nchar=8; Format datatype=standard symbols="01" gap=-; Matrix [ 01234567] Harry 01000000 Simon 0010000- Betty 00010-0? Louise 000010?0 ;""") self.found = check_zeros(self.nex)
def test_read_gzip_file(self): # first, MAKE a gzip file import gzip from tempfile import NamedTemporaryFile tmp = NamedTemporaryFile(delete=False, suffix=".gz") tmp.close() with open(os.path.join(EXAMPLE_DIR, 'example.nex'), 'rU') as f_in: with gzip.open(tmp.name, 'wb') as f_out: f_out.writelines(f_in) # test it's ok nex = NexusReader(tmp.name) assert 'data' in nex.blocks assert 'Simon' in nex.blocks['data'].matrix os.unlink(tmp.name) # cleanup
def setUp(self): self.nex = NexusReader() self.nex.read_string( """Begin data; Dimensions ntax=3 nchar=2; Format datatype=standard symbols="01" gap=-; Charstatelabels 1 char1, 2 char2; Matrix Maori 14 Dutch 25 Latin 36 ;""") self.nex = binarise(self.nex)
def test_anonymise_taxa(self): filename = os.path.join(EXAMPLE_DIR, 'example.nex') nex = anonymise(NexusReader(filename), salt="test") for old_taxon in ['Harry', 'Simon', 'Betty', 'Louise']: assert old_taxon not in nex.data.matrix, \ '%s should have been anonymised' % old_taxon assert nex.data.matrix[hash("test", "Betty")] == \ ['1', '0'] assert nex.data.matrix[hash("test", "Harry")] == \ ['0', '0'] assert nex.data.matrix[hash("test", "Simon")] == \ ['0', '1'] assert nex.data.matrix[hash("test", "Louise")] == \ ['1', '1']