def run_random(num_trees, nexus_obj, do_print=False): """ Returns a specified number (`num_trees`) of random trees from the nexus. :param num_trees: The number of trees to resample :type num_trees: Integer :param nexus_obj: A `NexusReader` instance :type nexus_obj: NexusReader :return: A NexusReader instance. :raises AssertionError: if nexus_obj is not a nexus :raises NexusFormatException: if nexus_obj does not have a `trees` block :raises ValueError: if num_trees is not an integer :raises ValueError: if num_trees is larger than population """ check_for_valid_NexusReader(nexus_obj, required_blocks=['trees']) try: num_trees = int(num_trees) except ValueError: raise ValueError("num_trees should be an integer") if num_trees > nexus_obj.trees.ntrees: raise ValueError("Treefile only has %d trees in it." % nexus_obj.trees.ntrees) elif num_trees == nexus_obj.trees.ntrees: # pragma: no cover return nexus_obj # um. ok. else: if do_print: # pragma: no cover print("%d trees read. Sampling %d" % (nexus_obj.trees.ntrees, num_trees)) nexus_obj.trees.trees = sample(nexus_obj.trees.trees, num_trees) return nexus_obj
def run_deltree(deltree, nexus_obj, do_print=False): """ Returns a list of trees to be deleted :param deltree: A string of trees to be deleted. :type deltree: String :param nexus_obj: A `NexusReader` instance :type nexus_obj: NexusReader :param do_print: flag to print() logging information or not :type do_print: Boolean :return: A NexusReader instance with the given trees removed. :raises AssertionError: if nexus_obj is not a nexus :raises NexusFormatException: if nexus_obj does not have a `trees` block """ check_for_valid_NexusReader(nexus_obj, required_blocks=['trees']) new = [] delitems = parse_deltree(deltree) if do_print: # pragma: no cover print('Deleting: %d trees' % len(delitems)) for index, tree in enumerate(nexus_obj.trees, 1): if index in delitems: if do_print: # pragma: no cover print('Deleting tree %d' % index) else: new.append(tree) nexus_obj.trees.trees = new return nexus_obj
def run_random(num_trees, nexus_obj, do_print=False): """ Returns a specified number (`num_trees`) of random trees from the nexus file. :param num_trees: The number of trees to resample :type num_trees: Integer :param nexus_obj: A `NexusReader` instance :type nexus_obj: NexusReader :return: A NexusReader instance. :raises AssertionError: if nexus_obj is not a nexus :raises NexusFormatException: if nexus_obj does not have a `trees` block :raises ValueError: if num_trees is not an integer :raises ValueError: if num_trees is larger than population """ check_for_valid_NexusReader(nexus_obj, required_blocks=["trees"]) try: num_trees = int(num_trees) except ValueError: raise ValueError("num_trees should be an integer") if num_trees > nexus_obj.trees.ntrees: raise ValueError("Treefile only has %d trees in it." % nexus_obj.trees.ntrees) elif num_trees == nexus_obj.trees.ntrees: return nexus_obj # um. ok. else: if do_print: print ("%d trees read... sampling %d" % (nexus_obj.trees.ntrees, num_trees)) nexus_obj.trees.trees = sample(nexus_obj.trees.trees, num_trees) return nexus_obj
def find_constant_sites(nexus_obj): """ Returns a list of the constant sites in a nexus :param nexus_obj: A `NexusReader` instance :type nexus_obj: NexusReader :return: A list of constant site positions. :raises AssertionError: if nexus_obj is not a nexus :raises NexusFormatException: if nexus_obj does not have a `data` block """ check_for_valid_NexusReader(nexus_obj, required_blocks=['data']) const = [] for i in range(0, nexus_obj.data.nchar): states = [] for taxa, data in nexus_obj.data: c = data[i] if c in ('?', '-'): continue elif c not in states: states.append(c) if len(states) == 1: const.append(i) return const
def tally_by_taxon(nexus_obj): """ Counts the number of states per site that each taxon has (i.e. taxon 1 has three sites coded as "A" and 1 coded as "G") Returns a dictionary of the cognate sets by members in the nexus :param nexus_obj: A `NexusReader` instance :type nexus_obj: NexusReader :return: A Dictionary :raises AssertionError: if nexus_obj is not a nexus :raises NexusFormatException: if nexus_obj does not have a `data` block e.g. { 'taxon1': {'state1': ['site1', 'site2'], 'state0': ['site3'], } 'taxon2': {'state1': ['site2'], 'state0': ['site1', 'site3'], } } """ check_for_valid_NexusReader(nexus_obj, required_blocks=['data']) tally = {} for taxon, characters in nexus_obj.data: tally[taxon] = {} for pos, char in enumerate(characters): label = nexus_obj.data.charlabels.get(pos, pos) tally[taxon][char] = tally[taxon].get(char, []) tally[taxon][char].append(label) return tally
def tally_by_site(nexus_obj): """ Counts the number of taxa per state per site (i.e. site 1 has three taxa coded as "A", and 1 taxa coded as "G") Returns a dictionary of the cognate sets by members in the nexus :param nexus_obj: A `NexusReader` instance :type nexus_obj: NexusReader :return: A Dictionary :raises AssertionError: if nexus_obj is not a nexus :raises NexusFormatException: if nexus_obj does not have a `data` block e.g. { 'site1': {'state1': ['taxon1', 'taxon2'], 'state0': ['taxon3'], } 'site2': {'state1': ['taxon2'], 'state0': ['taxon1', 'taxon3'], } } """ check_for_valid_NexusReader(nexus_obj, required_blocks=['data']) tally = {} for site, data in nexus_obj.data.characters.items(): tally[site] = tally.get(site, {}) for taxon, state in data.items(): tally[site][state] = tally[site].get(state, []) tally[site][state].append(taxon) return tally
def find_unique_sites(nexus_obj): """ Returns a list of the unique sites in a binary nexus i.e. sites with only one taxon belonging to them. (this only really makes sense if the data is coded as presence/absence) :param nexus_obj: A `NexusReader` instance :type nexus_obj: NexusReader :return: A list of unique site positions. :raises AssertionError: if nexus_obj is not a nexus :raises NexusFormatException: if nexus_obj does not have a `data` block """ check_for_valid_NexusReader(nexus_obj, required_blocks=['data']) unique = [] for i in range(0, nexus_obj.data.nchar): members = {} missing = 0 for taxa, characters in nexus_obj.data: c = characters[i] if c in (u'?', u'-'): missing += 1 else: members[c] = members.get(c, 0) + 1 # a character is unique if there's only two states # AND there's a state with 1 member # AND the state with 1 member is NOT the 0 (absence) state if len(members) == 2: for state, count in members.items(): if state != '0' and count == 1: unique.append(i) return unique
def count_site_values(nexus_obj, characters=('-', '?')): """ Counts the number of sites with values in `characters` in a nexus :param nexus_obj: A `NexusReader` instance :type nexus_obj: NexusReader :param characters: An iterable of the characters to count :type characters: tuple :return: (A dictionary of taxa and missing counts, and a list of log comments) :raises AssertionError: if nexus_obj is not a nexus :raises NexusFormatException: if nexus_obj does not have a `data` block """ try: iter(characters) # just check it's iterable. Don't _change_ it to an # iterable or we won't be able to compare the characters properly below. except TypeError: raise TypeError("characters should be iterable") check_for_valid_NexusReader(nexus_obj, required_blocks=['data']) tally = {} for taxon, sites in nexus_obj.data: tally[taxon] = tally.get(taxon, 0) for site in sites: if site in characters: tally[taxon] += 1 return tally
def run_deltree(deltree, nexus_obj, do_print=False): """ Returns a list of trees to be deleted :param deltree: A string of trees to be deleted. :type deltree: String :param nexus_obj: A `NexusReader` instance :type nexus_obj: NexusReader :param do_print: flag to print() logging information or not :type do_print: Boolean :return: A NexusReader instance with the given trees removed. :raises AssertionError: if nexus_obj is not a nexus :raises NexusFormatException: if nexus_obj does not have a `trees` block """ check_for_valid_NexusReader(nexus_obj, required_blocks=["trees"]) new = [] delitems = parse_deltree(deltree) if do_print: print ("Deleting: %d trees" % len(delitems)) for index, tree in enumerate(nexus_obj.trees, 1): if index in delitems: if do_print: print ("Deleting tree %d" % index) else: new.append(tree) nexus_obj.trees.trees = new return nexus_obj
def run_removecomments(nexus_obj, do_print=False): """ Removes comments from the trees in a nexus :param nexus_obj: A `NexusReader` instance :type nexus_obj: NexusReader :param do_print: flag to print() logging information or not :type do_print: Boolean :return: A NexusReader instance with the comments removed. :raises AssertionError: if nexus_obj is not a nexus :raises NexusFormatException: if nexus_obj does not have a `trees` block """ check_for_valid_NexusReader(nexus_obj, required_blocks=['trees']) new = [] for tree in nexus_obj.trees: new.append(nexus_obj.trees.remove_comments(tree)) if do_print: # pragma: no cover print("Removed comments") nexus_obj.trees.trees = new return nexus_obj
def run_removecomments(nexus_obj, do_print=False): """ Removes comments from the trees in a nexus :param nexus_obj: A `NexusReader` instance :type nexus_obj: NexusReader :param do_print: flag to print() logging information or not :type do_print: Boolean :return: A NexusReader instance with the comments removed. :raises AssertionError: if nexus_obj is not a nexus :raises NexusFormatException: if nexus_obj does not have a `trees` block """ check_for_valid_NexusReader(nexus_obj, required_blocks=["trees"]) new = [] for index, tree in enumerate(nexus_obj.trees, 1): new.append(nexus_obj.trees.remove_comments(tree)) if do_print: print ("Removed comments") nexus_obj.trees.trees = new return nexus_obj
def new_nexus_without_sites(nexus_obj, sites_to_remove): """ Returns a new NexusReader instance with the sites in `sites_to_remove` removed. :param nexus_obj: A `NexusReader` instance :type nexus_obj: NexusReader :param sites_to_remove: A list of site numbers :type sites_to_remove: List :return: A NexusWriter instance :raises AssertionError: if nexus_obj is not a nexus :raises NexusFormatException: if nexus_obj does not have a `data` block """ check_for_valid_NexusReader(nexus_obj, required_blocks=['data']) # make new nexus nexout = NexusWriter() nexout.add_comment( "Removed %d sites: %s" % (len(sites_to_remove), ",".join(["%s" % s for s in sites_to_remove]))) new_sitepos = 0 for sitepos in range(nexus_obj.data.nchar): if sitepos in sites_to_remove: continue # skip! for taxon, data in nexus_obj.data: nexout.add(taxon, new_sitepos, data[sitepos]) new_sitepos += 1 return nexout
def run_detranslate(nexus_obj): """ Removes comments from the trees in a nexus :param nexus_obj: A `NexusReader` instance :type nexus_obj: NexusReader :return: A NexusReader instance with the comments removed. :raises AssertionError: if nexus_obj is not a nexus :raises NexusFormatException: if nexus_obj does not have a `trees` block """ check_for_valid_NexusReader(nexus_obj, required_blocks=['trees']) nexus_obj.trees.detranslate() return nexus_obj
def print_character_stats(nexus_obj, character_index): """ Prints the character/site statistics for a given `nexus_obj` and character index :param nexus_obj: A `NexusReader` instance :type nexus_obj: NexusReader :param character_index: The character index of the character to summarise :type character_index: Int or String :raises AssertionError: if nexus_obj is not a nexus :raises IndexError: if character_index is not in nexus data block :raises NexusFormatException: if nexus_obj does not have a `data` block """ check_for_valid_NexusReader(nexus_obj, required_blocks=["data"]) index = None if character_index in nexus_obj.data.characters: index = character_index # string index else: try: character_index = int(character_index) except ValueError: pass if character_index in nexus_obj.data.characters: index = character_index if index is None: raise IndexError("Character '%s' is not in the nexus" % char) states = {} for taxon, state in nexus_obj.data.characters[index].items(): states[state] = states.get(state, []) states[state].append(taxon) for state in sorted(states): print "State: %s (%d / %d = %0.2f)" % ( state, len(states[state]), nexus_obj.data.ntaxa, (len(states[state]) / nexus_obj.data.ntaxa * 100), ) print "\n".join(wrapper.wrap(", ".join(states[state]))) print return
def run_detranslate(nexus_obj, do_print=False): """ Removes comments from the trees in a nexus :param nexus_obj: A `NexusReader` instance :type nexus_obj: NexusReader :param do_print: flag to print() logging information or not :type do_print: Boolean :return: A NexusReader instance with the comments removed. :raises AssertionError: if nexus_obj is not a nexus :raises NexusFormatException: if nexus_obj does not have a `trees` block """ check_for_valid_NexusReader(nexus_obj, required_blocks=["trees"]) nexus_obj.trees.detranslate() return nexus_obj
def binarise(nexus_obj, one_nexus_per_block=False): """ Returns a binary variant of the given `nexus_obj`. If `one_nexus_per_block` then we return a list of NexusWriter instances. :param nexus_obj: A `NexusReader` instance :type nexus_obj: NexusReader :param one_nexus_per_block: Whether to return a single NexusWriter, or a list of NexusWriter's (one per character) :type one_nexus_per_block: Boolean :return: A NexusWriter instance or a list of NexusWriter instances. :raises AssertionError: if nexus_obj is not a nexus :raises NexusFormatException: if nexus_obj does not have a `data` block """ check_for_valid_NexusReader(nexus_obj, required_blocks=['data']) nexuslist = [] n = NexusWriter() for i in sorted(nexus_obj.data.charlabels): label = nexus_obj.data.charlabels[i] # character label char = nexus_obj.data.characters[ label] # character dict (taxon->state) recoding = _recode_to_binary(char) # recode new_char_length = len(recoding[recoding.keys()[0]]) # loop over recoded data for j in range(new_char_length): for taxon, state in recoding.items(): # make new label new_label = "%s_%d" % (str(label), j) # add to nexus n.add(taxon, new_label, state[j]) if one_nexus_per_block: nexuslist.append(n) n = NexusWriter() if one_nexus_per_block: return nexuslist else: return n
def print_character_stats(nexus_obj, character_index): """ Prints the character/site statistics for a given `nexus_obj` and character index :param nexus_obj: A `NexusReader` instance :type nexus_obj: NexusReader :param character_index: The character index of the character to summarise :type character_index: Int or String :raises AssertionError: if nexus_obj is not a nexus :raises IndexError: if character_index is not in nexus data block :raises NexusFormatException: if nexus_obj does not have a `data` block """ check_for_valid_NexusReader(nexus_obj, required_blocks=['data']) index = None if character_index in nexus_obj.data.characters: index = character_index # string index else: try: character_index = int(character_index) except ValueError: pass if character_index in nexus_obj.data.characters: index = character_index if index is None: raise IndexError("Character '%s' is not in the nexus" % char) states = {} for taxon, state in nexus_obj.data.characters[index].items(): states[state] = states.get(state, []) states[state].append(taxon) for state in sorted(states): print 'State: %s (%d / %d = %0.2f)' % ( state, len(states[state]), nexus_obj.data.ntaxa, (len(states[state]) / nexus_obj.data.ntaxa * 100)) print "\n".join(wrapper.wrap(", ".join(states[state]))) print return
def shufflenexus(nexus_obj, resample=False): """ Shuffles the characters between each taxon to create a new nexus :param nexus_obj: A `NexusReader` instance :type nexus_obj: NexusReader :param resample: The number of characters to resample. If set to False, then the number of characters will equal the number of characters in the original data file. :type resample: Integer :return: A shuffled NexusReader instance :raises AssertionError: if nexus_obj is not a nexus :raises ValueError: if resample is not False or a positive Integer :raises NexusFormatException: if nexus_obj does not have a `data` block """ check_for_valid_NexusReader(nexus_obj, required_blocks=['data']) if resample is False: resample = nexus_obj.data.nchar try: resample = int(resample) except ValueError: raise ValueError('resample must be a positive integer or False!') if resample < 1: raise ValueError('resample must be a positive integer or False!') newnexus = NexusWriter() newnexus.add_comment("Randomised Nexus generated from %s" % nexus_obj.filename) for i in range(resample): # pick existing character character = randrange(0, nexus_obj.data.nchar) chars = nexus_obj.data.characters[character] site_values = [chars[taxon] for taxon in nexus_obj.data.taxa] shuffle(site_values) for taxon in nexus_obj.data.taxa: newnexus.add(taxon, i, site_values.pop(0)) return newnexus
def binarise(nexus_obj, one_nexus_per_block=False): """ Returns a binary variant of the given `nexus_obj`. If `one_nexus_per_block` then we return a list of NexusWriter instances. :param nexus_obj: A `NexusReader` instance :type nexus_obj: NexusReader :param one_nexus_per_block: Whether to return a single NexusWriter, or a list of NexusWriter's (one per character) :type one_nexus_per_block: Boolean :return: A NexusWriter instance or a list of NexusWriter instances. :raises AssertionError: if nexus_obj is not a nexus :raises NexusFormatException: if nexus_obj does not have a `data` block """ check_for_valid_NexusReader(nexus_obj, required_blocks=['data']) nexuslist = [] n = NexusWriter() for i in sorted(nexus_obj.data.charlabels): label = nexus_obj.data.charlabels[i] # character label char = nexus_obj.data.characters[label] # character dict (taxon->state) recoding = _recode_to_binary(char) # recode new_char_length = len(recoding[recoding.keys()[0]]) # loop over recoded data for j in range(new_char_length): for taxon, state in recoding.items(): # make new label new_label = "%s_%d" % (str(label), j) # add to nexus n.add(taxon, new_label, state[j]) if one_nexus_per_block: nexuslist.append(n) n = NexusWriter() if one_nexus_per_block: return nexuslist else: return n
def run_resample(resample, nexus_obj, do_print=False): """ Resamples the trees in a nexus :param resample: Resample every `resample` trees :type resample: Integer :param nexus_obj: A `NexusReader` instance :type nexus_obj: NexusReader :param do_print: flag to print() logging information or not :type do_print: Boolean :return: A NexusReader instance with the given trees removed. :raises AssertionError: if nexus_obj is not a nexus :raises NexusFormatException: if nexus_obj does not have a `trees` block """ check_for_valid_NexusReader(nexus_obj, required_blocks=['trees']) new = [] try: every = int(resample) except TypeError: raise ValueError( "Invalid resample option %s - should be an integer" % resample ) if do_print: # pragma: no cover print('Resampling ever %d trees' % every) ignore_count = 0 for index, tree in enumerate(nexus_obj.trees, 1): if index % every == 0: new.append(tree) else: ignore_count += 1 if do_print: # pragma: no cover print("Ignored %d trees" % ignore_count) nexus_obj.trees.trees = new return nexus_obj
def run_resample(resample, nexus_obj, do_print=False): """ Resamples the trees in a nexus :param resample: Resample every `resample` trees :type resample: Integer :param nexus_obj: A `NexusReader` instance :type nexus_obj: NexusReader :param do_print: flag to print() logging information or not :type do_print: Boolean :return: A NexusReader instance with the given trees removed. :raises AssertionError: if nexus_obj is not a nexus :raises NexusFormatException: if nexus_obj does not have a `trees` block """ check_for_valid_NexusReader(nexus_obj, required_blocks=['trees']) new = [] try: every = int(resample) except TypeError: raise ValueError("Invalid resample option %s - should be an integer" % resample) if do_print: # pragma: no cover print('Resampling ever %d trees' % every) ignore_count = 0 for index, tree in enumerate(nexus_obj.trees, 1): if index % every == 0: new.append(tree) else: ignore_count += 1 if do_print: # pragma: no cover print("Ignored %d trees" % ignore_count) nexus_obj.trees.trees = new return nexus_obj
def print_taxa_stats(nexus_obj): """ Prints the taxa state statistics for a given `nexus_obj` :param nexus_obj: A `NexusReader` instance :type nexus_obj: NexusReader :raises AssertionError: if nexus_obj is not a nexus :raises NexusFormatException: if nexus_obj does not have a `data` block """ check_for_valid_NexusReader(nexus_obj, required_blocks=['data']) for taxon in sorted(nexus_obj.data.matrix): tally = {} for site in nexus_obj.data.matrix[taxon]: tally[site] = tally.get(site, 0) + 1 tally = ", ".join(['%s x %s' % (k,tally[k]) for k in sorted(tally)]) print taxon.ljust(20), tally return
def print_taxa_stats(nexus_obj): """ Prints the taxa state statistics for a given `nexus_obj` :param nexus_obj: A `NexusReader` instance :type nexus_obj: NexusReader :raises AssertionError: if nexus_obj is not a nexus :raises NexusFormatException: if nexus_obj does not have a `data` block """ check_for_valid_NexusReader(nexus_obj, required_blocks=['data']) for taxon in sorted(nexus_obj.data.matrix): tally = {} for site in nexus_obj.data.matrix[taxon]: tally[site] = tally.get(site, 0) + 1 tally = ", ".join(['%s x %s' % (k, tally[k]) for k in sorted(tally)]) print taxon.ljust(20), tally return
def multistatise(nexus_obj): """ Returns a multistate variant of the given `nexus_obj`. :param nexus_obj: A `NexusReader` instance :type nexus_obj: NexusReader :return: A NexusReader instance :raises AssertionError: if nexus_obj is not a nexus :raises NexusFormatException: if nexus_obj does not have a `data` block """ check_for_valid_NexusReader(nexus_obj, required_blocks=['data']) site_idx = 0 nexout = NexusWriter() missing = [] charlabel = getattr(nexus_obj, 'short_filename', 1) for site, data in nexus_obj.data.characters.items(): multistate_value = chr(65 + site_idx) for taxon, value in data.items(): assert value == str(value) if value in ('?', '-'): missing.append(taxon) if value == '1': nexout.add(taxon, charlabel, multistate_value) if taxon in missing: # remove taxon if we've seen a non-? entry missing.remove(taxon) site_idx += 1 assert site_idx < 26, "Too many characters to handle! - run out of A-Z" # add missing state for anything that is all missing, and has not been # observed anywhere for taxon in nexus_obj.data.taxa: if taxon not in nexout.data[str(charlabel)]: nexout.add(taxon, charlabel, '?') return nexout._convert_to_reader()
def combine_nexuses(nexuslist): """ Combines a list of NexusReader instances into a single nexus :param nexuslist: A list of NexusReader instances :type nexuslist: List :return: A NexusWriter instance :raises TypeError: if nexuslist is not a list of NexusReader instances :raises IOError: if unable to read an file in nexuslist :raises NexusFormatException: if a nexus file does not have a `data` block """ out = NexusWriter() charpos = 0 for nex_id, nex in enumerate(nexuslist, 1): check_for_valid_NexusReader(nex, required_blocks=['data']) if hasattr(nex, 'short_filename'): nexus_label = os.path.splitext(nex.short_filename)[0] elif hasattr(nex, 'label'): nexus_label = nex.label else: nexus_label = str(nex_id) out.add_comment("%d - %d: %s" % (charpos, charpos + nex.data.nchar - 1, nexus_label)) for site_idx, site in enumerate(sorted(nex.data.characters), 0): data = nex.data.characters.get(site) charpos += 1 # work out character label charlabel = nex.data.charlabels.get(site_idx, site_idx + 1) label = '%s.%s' % (nexus_label, charlabel) for taxon, value in data.items(): out.add(taxon, label, value) return out
def test_valid_NexusReader(self): check_for_valid_NexusReader(NexusReader())
def test_valid_with_required_block_two(self): nexus_obj = NexusReader(os.path.join(EXAMPLE_DIR, 'example2.nex')) check_for_valid_NexusReader(nexus_obj, ['data', 'taxa'])
def test_failure_on_required_block_two(self): nexus_obj = NexusReader(os.path.join(EXAMPLE_DIR, 'example2.nex')) with self.assertRaises(NexusFormatException): check_for_valid_NexusReader(nexus_obj, ['r8s'])