#!/usr/bin/env python import sys, logging from dendropy.utility.messaging import get_logger _LOG = get_logger('sankoff') from dendropy import DataSet _DEBUGGING = True verbose = False def get_min_edge_costs(cost_row, costs_for_one_child): min_score = cost_row[0] + costs_for_one_child[0] for i in xrange(1, len(cost_row)): y = cost_row[i] + costs_for_one_child[i] if y < min_score: min_score = y return min_score def get_min_cost(step_mat_row, child_costs): total_cost = 0 for e in child_costs: total_cost = total_cost + get_min_edge_costs(step_mat_row, e) return total_cost def sankoff(postorder_node_list, taxa_to_state_set_map, step_matrix): max_cost = 0 num_states = len(step_matrix) for row in step_matrix: for cell in row:
#!/usr/bin/env python import sys import copy import logging import itertools from dendropy.utility.messaging import get_logger from dendropy.treesplit import encode_splits from dendropy.treemanip import collapse_edge from dendropy.treecalc import symmetric_difference from dendropy import format_split, Edge, TaxonSet, DataSet _LOG = get_logger('scripts.long_branch_symmdiff') verbose = False def long_branch_symmdiff(trees_to_compare, edge_len_threshold, copy_trees=False, rooted=False): """Returns matrix of the symmetric_differences between trees after all internal edges with lengths < `edge_len_threshold` have been collapsed. If `copy_trees` is True then the trees will be copied first (if False, then the trees may will have their short edges collapsed on exit). """ if copy_trees: tree_list = [copy.copy(i) for i in trees_to_compare] else: tree_list = list(trees_to_compare) n_trees = len(tree_list) _LOG.debug('%d Trees to compare:\n%s\n' % (n_trees, '\n'.join([str(i) for i in tree_list]))) if n_trees < 2: return [0 for t in tree_list]
def _get_logger(self): if not hasattr(self, "_logger") or self._logger is None: self._logger = messaging.get_logger(self.__class__.__name__) return self._logger
#!/usr/bin/env python import math import logging from dendropy.utility.messaging import get_logger _LOG = get_logger('offspring') class SingleLocusGenotype(object): "object that has an allele 1 & an allele 2" def __init__(self, first, second): self.first = min(first, second) self.second = max(first, second) def __str__(self): return repr(self.first) + '/' + repr(self.second) def calcLnL(self, outcrossing_prob, allele_freq, mom_g): prob_offspring_given_selfing = self.calc_prob_offspring_given_selfing(mom_g) _LOG.debug(str(self) + " from " + str(mom_g) + " P(self) = " + str(prob_offspring_given_selfing)) prob_offspring_given_outcrossing = self.calc_prob_offspring_given_outcrossing(allele_freq, mom_g) _LOG.debug(str(self) + " from " + str(mom_g) + " P(outcross) = " + str(prob_offspring_given_outcrossing)) return math.log(((1.0 - outcrossing_prob) * prob_offspring_given_selfing) + (outcrossing_prob * prob_offspring_given_outcrossing)) def calc_prob_offspring_given_selfing(self, mom_g): if mom_g.first == mom_g.second: # mom is homozygous if self.first == self.second and (self.first == mom_g.first): return 1.0 return 0.0 else: if (self.first == self.second): # mom is het
""" Tree simulation and generation. """ import sys import copy import math from dendropy.utility import GLOBAL_RNG from dendropy.mathlib import probability from dendropy import coalescent from dendropy import dataobject from dendropy import treemanip from dendropy.utility.messaging import get_logger _LOG = get_logger(__name__) class TreeSimTotalExtinctionException(Exception): """Exception to be raised when branching process results in all lineages going extinct.""" def __init__(self, *args, **kwargs): Exception.__init__(self, *args, **kwargs) def star_tree(taxon_set): "Builds and returns a star tree from the given taxa block." star_tree = dataobject.Tree(taxon_set=taxon_set) for taxon in taxon_set: star_tree.seed_node.new_child(taxon=taxon) return star_tree
#!/usr/bin/env python import sys import copy import logging import itertools from dendropy.utility.messaging import get_logger from dendropy.treesplit import encode_splits from dendropy.treemanip import collapse_edge from dendropy.treecalc import symmetric_difference from dendropy import format_split, Edge, TaxonSet, DataSet _LOG = get_logger('scripts.long_branch_symmdiff') verbose = False def long_branch_symmdiff(trees_to_compare, edge_len_threshold, copy_trees=False, rooted=False): """Returns matrix of the symmetric_differences between trees after all internal edges with lengths < `edge_len_threshold` have been collapsed. If `copy_trees` is True then the trees will be copied first (if False, then the trees may will have their short edges collapsed on exit). """ if copy_trees: tree_list = [copy.copy(i) for i in trees_to_compare] else: tree_list = list(trees_to_compare) n_trees = len(tree_list)
#!/usr/bin/env python import sys from dendropy.utility.messaging import get_logger _LOG = get_logger('sankoff') from dendropy import DataSet from dendropy.utility.error import DataParseError _DEBUGGING = True verbose = False def get_min_edge_costs(step_mat_row, child_costs): min_score = step_mat_row[0] + child_costs[0] for i in xrange(1, len(step_mat_row)): y = step_mat_row[i] + child_costs[i] if y < min_score: min_score = y return min_score def get_min_cost(step_mat_row, child_costs): total_cost = 0 for e in child_costs: total_cost = total_cost + get_min_edge_costs(step_mat_row, e) return total_cost def sankoff(postorder_node_list, step_matrix, taxa_to_state_set_map): max_cost = 0 num_states = len(step_matrix) for row in step_matrix: for cell in row: if cell > max_cost: max_cost = cell impossible_cost = 1 + max_cost
""" Tree simulation and generation. """ import sys import copy import math from dendropy.utility import GLOBAL_RNG from dendropy.utility import probability from dendropy import coalescent from dendropy import dataobject from dendropy import treemanip from dendropy.utility.messaging import get_logger _LOG = get_logger(__name__) class TreeSimTotalExtinctionException(Exception): """Exception to be raised when branching process results in all lineages going extinct.""" def __init__(self, *args, **kwargs): Exception.__init__(self, *args, **kwargs) def star_tree(taxon_set): "Builds and returns a star tree from the given taxa block." star_tree = dataobject.Tree(taxon_set=taxon_set) for taxon in taxon_set: star_tree.seed_node.new_child(taxon=taxon) return star_tree
#!/usr/bin/env python import sys import os import subprocess from dendropy.utility.messaging import get_logger from dendropy.treecalc import fitch_down_pass, fitch_up_pass from dendropy import DataSet from dendropy.utility.error import DataParseError from dendropy.utility.textutils import escape_nexus_token _DEBUGGING = True _LOG = get_logger('geodispersal') verbose = False AREA_NAME_LIST = [] col_width = 17 def warn(msg): _LOG.warn(msg) LAST_COMMAND = '' def write_as_nexus(stream, patterns, label): global LAST_COMMAND stream.write("\n[!%s ]\n" % label) p = patterns[0] num_chars = len(patterns) num_areas = len(p) if num_areas < len(AREA_NAME_LIST): warn('%d labels were found in the labels file, but only %d areas were read in the input NEXUS files' % ( len(AREA_NAME_LIST), num_areas)) elif num_areas > len(AREA_NAME_LIST): warn('Only %d labels were found in the labels file, but %d areas were read in the input NEXUS files' % ( len(AREA_NAME_LIST),
## ## If you use this work or any portion thereof in published work, ## please cite it as: ## ## Sukumaran, J. and M. T. Holder. 2010. DendroPy: a Python library ## for phylogenetic computing. Bioinformatics 26: 1569-1571. ## ############################################################################## """ Facultative use of NCL for NEXUS parsing. """ import os from dendropy.utility.messaging import get_logger _LOG = get_logger("dataio.ncl") DENDROPY_NCL_AVAILABILITY = False try: from nexusclasslib import nclwrapper DENDROPY_NCL_AVAILABILITY = True except ImportError: DENDROPY_NCL_AVAILABILITY = False else: import os from threading import Thread, Event from dendropy import dataobject from dendropy.dataio import nexusreader_py from dendropy.dataio import nexustokenizer from dendropy.utility import iosys
Wrapper for interacting with RSPR """ import subprocess import uuid import tempfile import socket import random import os import sys import dendropy from dendropy.utility.messaging import get_logger from dendropy.utility import processio from dendropy.utility import textprocessing _LOG = get_logger("interop.rspr") HOSTNAME = socket.gethostname() PID = os.getpid() class Rspr(object): """ This class wraps all attributes and input needed to make a call to RSPR. https://github.com/cwhidden/rspr RSPR: Calculate approximate and exact Subtree Prune and Regraft (rSPR) distances and the associated maximum agreement forests (MAFs) between pairs
import subprocess from cStringIO import StringIO import uuid import tempfile import socket import random import os import sys from optparse import OptionGroup from optparse import OptionParser import dendropy from dendropy.utility.messaging import get_logger _LOG = get_logger("interop.seqgen") HOSTNAME = socket.gethostname() PID = os.getpid() def _get_strongly_unique_tempfile(dir=None): return tempfile.NamedTemporaryFile(dir=dir, prefix="dendropy_tempfile-{0}-{1}-{2}".format(HOSTNAME, PID, uuid.uuid4())) def _get_tempfile(dir=None): return tempfile.NamedTemporaryFile(dir=dir) class SeqGen(object): """ This class wraps all attributes and input needed to make a call to SeqGen. """
#!/usr/bin/env python #!/usr/bin/env python import sys import os import subprocess from dendropy.utility.messaging import get_logger from dendropy.treecalc import fitch_down_pass, fitch_up_pass from dendropy import DataSet from dendropy.utility.error import DataParseError from dendropy.utility.textutils import escape_nexus_token _DEBUGGING = True _LOG = get_logger('geodispersal') verbose = False AREA_NAME_LIST = [] col_width = 17 def warn(msg): _LOG.warn(msg) LAST_COMMAND = '' def write_as_nexus(stream, patterns, label): global LAST_COMMAND stream.write("\n[!%s ]\n" % label) p = patterns[0] num_chars = len(patterns) num_areas = len(p) if num_areas < len(AREA_NAME_LIST):
except ImportError: from io import StringIO # Python 3 import uuid import tempfile import socket import random import os import sys from optparse import OptionGroup from optparse import OptionParser import dendropy from dendropy.utility.messaging import get_logger from dendropy.utility import processio _LOG = get_logger("interop.seqgen") HOSTNAME = socket.gethostname() PID = os.getpid() def _get_strongly_unique_tempfile(dir=None): return tempfile.NamedTemporaryFile(dir=dir, prefix="dendropy_tempfile-{0}-{1}-{2}".format(HOSTNAME, PID, uuid.uuid4())) def _get_tempfile(dir=None): return tempfile.NamedTemporaryFile(dir=dir) class SeqGen(object): """ This class wraps all attributes and input needed to make a call to SeqGen. """
def main(): group_names = ( ("@all" , ".*"), ("@datamodel" , ".*_datamodel_.*"), ("@dataio" , ".*_dataio_.*"), ("@newick" , ".*_newick_.*"), ("@tree" , ".*_tree_.*"), ) test_group_patterns = collections.OrderedDict(group_names) test_group_names = list(test_group_patterns) parser = argparse.ArgumentParser() parser.add_argument("test_names", metavar="TEST", nargs="*", help= "Name of test(s) to run. These can be (dot-)qualified module, test" "case, or test name (e.g., 'test_module', 'test_module.TestCase1'," "'test_module.TestCase1.test1') or special pre-defined groups of" "tests (e.g., '@datamodel', '@dataio'). Type '--help-testgroups' for" "a list of available groups.") parser.add_argument("--help-testgroups", action="store_true", default=False, help="Show list of available test groups and exit.") parser.add_argument("--list-only", action="store_true", default=False, help="Do not actually run tests: just print list of test module names and exit.") parser.add_argument("-v", "--verbosity", default=3, type=int, help="Messaging noisiness (default: %(default)s)") parser.add_argument("--logging-level", default=os.environ.get(metavar.LOGGING_LEVEL_ENVAR, "NOTSET"), choices=["NOTSET", "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"], help="Test logging level (default: '%(default)s')") parser.add_argument("-f", "--fail-fast", action="store_true", default=False, help="Stop the test run on the first error or failure.") parser.add_argument("-I", "--fail-incomplete", action="store_true", default=False, help="Fail incomplete or partially-complete test stubs.") args = parser.parse_args() if args.help_testgroups: out = sys.stdout out.write("Available special test groups:\n") for name in test_group_names: out.write(" - {}\n".format(name)) sys.exit(0) # Set logging level: os.environ[metavar.LOGGING_LEVEL_ENVAR] = args.logging_level _LOG = messaging.get_logger("dendropy") # Set test specifications if args.fail_incomplete: os.environ[metavar.FAIL_INCOMPLETE_TESTS_ENVAR] = "1" # get test modules test_names = [] filter_patterns = [] for name in args.test_names: if name is None: continue if name.startswith("@"): try: filter_patterns.append(test_group_patterns[name]) except KeyError: sys.exit("Unrecognized test group name '{}'. Accepted names: {}".format(name, test_group_names)) else: name = name.replace(os.sep, ".") if name.endswith(".py"): name = name[:-3] if not name.startswith("dendropy.test."): if name.startswith("test."): name = "dendropy." + name else: name = "dendropy.test." + name test_names.append(name) if not test_names and not filter_patterns: test_names = dendropytest.discover_test_module_paths() # get all if filter_patterns: test_names.extend(dendropytest.discover_test_module_paths(filter_patterns)) test_names = sorted(set(test_names)) # 0: nothing # 1: errors and mishaps only + 0 # 2: warnings + 1 # 3: general messages + 2 if args.verbosity >= 3 or args.list_only: if args.list_only: out = sys.stdout else: out = sys.stderr out.write("DendroPy tests to be run:\n") for mp in test_names: out.write(" + {}\n".format(mp)) if args.list_only: sys.exit(0) tests = unittest.defaultTestLoader.loadTestsFromNames(test_names) test_suite = unittest.TestSuite(tests) test_runner = unittest.TextTestRunner(verbosity=args.verbosity, failfast=args.fail_fast) test_runner.run(test_suite)
#!/usr/bin/env python import math import logging from dendropy.utility.messaging import get_logger _LOG = get_logger('offspring') class SingleLocusGenotype(object): "object that has an allele 1 & an allele 2" def __init__(self, first, second): self.first = min(first, second) self.second = max(first, second) def __str__(self): return repr(self.first) + '/' + repr(self.second) def calcLnL(self, outcrossing_prob, allele_freq, mom_g): prob_offspring_given_selfing = self.calc_prob_offspring_given_selfing( mom_g) _LOG.debug( str(self) + " from " + str(mom_g) + " P(self) = " + str(prob_offspring_given_selfing)) prob_offspring_given_outcrossing = self.calc_prob_offspring_given_outcrossing( allele_freq, mom_g) _LOG.debug( str(self) + " from " + str(mom_g) + " P(outcross) = " + str(prob_offspring_given_outcrossing)) return math.log(( (1.0 - outcrossing_prob) * prob_offspring_given_selfing) + (outcrossing_prob * prob_offspring_given_outcrossing))
import sys import subprocess import tempfile import re import csv import dendropy from dendropy.utility import textprocessing from dendropy.utility import error from dendropy.utility import metavar from dendropy.utility import container from dendropy.utility import messaging from dendropy.utility import filesys from dendropy.utility import processio from dendropy.dataio import nexuswriter _LOG = messaging.get_logger(__name__) import dendropy PAUP_PATH = os.environ.get(metavar.DENDROPY_PAUP_PATH_ENVAR, "NONE") if PAUP_PATH == "NONE": DENDROPY_PAUP_INTEROPERABILITY = False else: DENDROPY_PAUP_INTEROPERABILITY = True STANDARD_PREAMBLE = "set warnreset=no increase=auto warnroot=no warnReset=no warnTree=no warnTSave=no warnBlkName=no errorStop=no errorBeep=no queryBeep=no" class PaupService(object): @staticmethod def call(
## ## Sukumaran, J. and M. T. Holder. 2010. DendroPy: a Python library ## for phylogenetic computing. Bioinformatics 26: 1569-1571. ## ############################################################################## """ Support for coverage analysis. """ import unittest import shutil import sys from optparse import OptionParser from dendropy.utility import messaging _LOG = messaging.get_logger(__name__) DENDROPY_COVERAGE_ANALYSIS_AVAILABLE = False try: from setuptools import Command except ImportError: _LOG.warn( "setuptools.Command could not be imported: setuptools extensions not available" ) else: try: import coverage except ImportError: _LOG.warn( "coverage could not be imported: test coverage analysis not available" )