Пример #1
0
    def optimize_model(self, gtree, stree, gene2species):
        """Optimizes the model"""
        CostModel.optimize_model(self, gtree, stree, gene2species)
        
        # ensure gtree and stree are both rooted and binary
        if not (treelib.is_rooted(gtree) and treelib.is_binary(gtree)):
            raise Exception("gene tree must be rooted and binary")
        if not (treelib.is_rooted(stree) and treelib.is_binary(stree)):
            raise Exception("species tree must be rooted and binary")
        try:
            junk = phylo.reconcile(gtree, stree, gene2species)
        except:
            raise Exception("problem mapping gene tree to species tree")
    
        treeout = StringIO.StringIO()
        if not self.printed:
            import pprint
            treelib.draw_tree(gtree, out=treeout, minlen=5, maxlen=5)
            print "gene tree:\n"
            print(treeout.getvalue())
            
            treelib.draw_tree(self.stree, out=treeout, minlen=5, maxlen=5)
            print "spec tree:\n"
            print(treeout.getvalue())
            pprint.pprint(junk)

            self.printed = True
Пример #2
0
    def __init__(self, extra):
        """Initializes the model"""
        CostModel.__init__(self, extra)

        self.VERSION = "1.0.1"
        self.mincost = 0

        parser = optparse.OptionParser(prog="DupLossModel")
        parser.add_option(
            "-D",
            "--dupcost",
            dest="dupcost",
            metavar="<dup cost>",
            default=1.0,
            type="float",
            help="duplication cost (default: 1.0)",
        )
        parser.add_option(
            "-L",
            "--losscost",
            dest="losscost",
            metavar="<loss cost>",
            default=1.0,
            type="float",
            help="loss cost (default: 1.0)",
        )
        self.parser = parser

        CostModel._parse_args(self, extra)
Пример #3
0
    def __init__(self, extra):
        """Initializes the model"""
        CostModel.__init__(self, extra)

        self.VERSION = "0.1.0"
        self.mincost = 0

        parser = optparse.OptionParser(prog="DTLModel")
        parser.add_option("-D", "--dupcost", dest="dupcost",
                          metavar="<dup cost>",
                          default=2, type="int",
                          help="duplication cost (default: 2)")
        parser.add_option("-T", "--transfercost", dest="transfercost",
                          metavar="<transfer cost>",
                          default=3, type="int",
                          help="transfer cost (default: 3)")
        parser.add_option("-L", "--losscost", dest="losscost",
                          metavar="<loss cost>",
                          default=1, type="int",
                          help="loss cost (default: 1)")
        self.parser = parser

        CostModel._parse_args(self, extra)

        # make temporary file
        fd, self.treefile = tempfile.mkstemp()
        os.close(fd)
Пример #4
0
    def __init__(self, extra):
        """Initializes the model"""
        CostModel.__init__(self, extra)

        self.VERSION = "1.0.1"
        self.mincost = 0
        self.printed = False
    def __init__(self, extra):
        """Initializes the model"""
        CostModel.__init__(self, extra)

        self.VERSION = "1.0.1"
        self.mincost = 0
        self.count = 0
        self.log = open('matched.txt', 'w')
Пример #6
0
    def __init__(self, extra):
        """Initializes the model"""
        CostModel.__init__(self, extra)

        self.VERSION = "0.1.0"
        self.mincost = 0

        CostModel._parse_args(self, extra)

        # make temporary file
        fd, self.treefile = tempfile.mkstemp()
        os.close(fd)
Пример #7
0
    def __init__(self, extra):
        """Initializes the model"""
        CostModel.__init__(self, extra)

        self.VERSION = "0.1.2"
        self.mincost = 0

        parser = optparse.OptionParser(prog="DTLModel")
        parser.add_option("--cmd", dest="cmd",
                          metavar="<ranger-dtl-U command>",
                          default="ranger-dtl-U",
                          help="ranger-dtl-U command (default: \"ranger-dtl-U\")")
        parser.add_option("-D", "--dupcost", dest="dupcost",
                          metavar="<duplication cost>",
                          default=2, type="int",
                          help="duplication cost, integer only (default: 2)")
        parser.add_option("-T", "--transcost", "--transfercost", dest="transcost",
                          metavar="<transfer cost>",
                          default=3, type="int",
                          help="transfer cost, integer only (default: 3)")
        parser.add_option("-L", "--losscost", dest="losscost",
                          metavar="<loss cost>",
                          default=1, type="int",
                          help="loss cost, integer only (default: 1)")
        parser.add_option("--seed", dest="seed",
                          metavar="<seed>",
                          type="int",
                          help="user defined random number generator seed")
        parser.add_option("--tmp", dest="tmp",
                          metavar="<tmp directory>",
                          help="directory for temporary files (must exist)")

        self.parser = parser

        CostModel._parse_args(self, extra)

        # check temporary directory
        if self.tmp:
            if not os.path.exists(os.path.realpath(self.tmp)):
                raise Exception("--tmp directory does not exist")

        # make temporary file
        fd, self.treefile = tempfile.mkstemp(dir=self.tmp)
        os.close(fd)

        # hack for cygwin (ranger-dtl-U cannot handle system files)
        if sys.platform == 'cygwin':
            cwd = os.getcwd()
            if self.treefile.startswith(cwd):
                # remove working path (and backslash)
                self.treefile = self.treefile[len(cwd)+1:]
            else:
                raise Exception("--tmp must be a relative path when using cygwin")
 def optimize_model(self, gtree, stree, gene2species):
     """Optimizes the model"""
     CostModel.optimize_model(self, gtree, stree, gene2species)
     
     # ensure gtree and stree are both rooted and binary
     if not (treelib.is_rooted(gtree) and treelib.is_binary(gtree)):
         raise Exception("gene tree must be rooted and binary")
     if not (treelib.is_rooted(stree) and treelib.is_binary(stree)):
         raise Exception("species tree must be rooted and binary")
     try:
         junk = phylo.reconcile(gtree, stree, gene2species)
     except:
         raise Exception("problem mapping gene tree to species tree")
Пример #9
0
    def optimize_model(self, gtree, stree, gene2species):
        """Optimizes the model"""
        CostModel.optimize_model(self, gtree, stree, gene2species)

        #=============================
        # read sequences
        if not self.align:
            self.parser.error("--align must be specified")
        self.align = fasta.read_fasta(self.align)

        #=============================
        # read SPIDIR parameters
        if not self.params:
            self.parser.error("--param must be specified")
        self.params = spidir.read_params(self.params)

        #=============================
        # determine background base frequency
        if self.bgfreq:
            # use supplied frequency
            vals = map(float, self.bgfreq.split(","))
            if len(vals) != 4:
                self.parser.error("invalid --bgfreq: %s" % self.bgfreq)
            self.bgfreq = vals
        else:
            # compute frequency from alignment
            self.bgfreq = alignlib.compute_bgfreq(self.align)

        #=============================
        # branch lengths
        if self.kappa >= 0:
            # use supplied kappa
            self.kappa = self.kappa
        else:
            # compute kappa from alignment
            # from spidir.find_ml_kapp_hky
            minkappa = 0.4; maxkappa = 5.0; stepkappa = 0.1
            maxlk = -util.INF
            maxk = minkappa

            for k in util.frange(minkappa, maxkappa, stepkappa):
                l = spidir.find_ml_branch_lengths_hky(gtree, self.align, self.bgfreq, k, maxiter=1,
                                                      parsinit=(k == minkappa))
                if l > maxlk:
                    maxlk = l
                    maxk = k

            self.kappa = maxk
Пример #10
0
    def __init__(self, extra):
        """Initializes the model"""
        CostModel.__init__(self, extra)

        self.VERSION = "0.1.0"
        self.mincost = -util.INF

        parser = optparse.OptionParser(prog="SpimapModel")
        parser.add_option("-a", "--align", dest="align",
                          metavar="<alignment fasta>",
                          help="sequence alignment in fasta format")
        parser.add_option("-p", "--param", dest="params",
                          metavar="<params file>",
                          help="substitution rate parameters file")
        parser.add_option("--simplereroot", dest="simplereroot",
                          metavar="<simple reroot>",
                          default=False, action="store_true",
                          help="set to reroot using dup/loss cost")

        grp_seq = optparse.OptionGroup(parser, "Sequence evolution model")
        grp_seq.add_option("-k", "--kappa", dest="kappa",
                           metavar="<transition/transversion estimate>",
                           default=-1.0, type="float",
                           help="used for HKY model (default: estimate)")
        grp_seq.add_option("-f", "--bgfreq", dest="bgfreq",
                           metavar="<A freq>,<C freq>,<G freq>,<T freq>",
                           help="background frequencies (default: estimate)")
        parser.add_option_group(grp_seq)

        grp_duploss = optparse.OptionGroup(parser, "Dup/loss evolution model")
        grp_duploss.add_option("-D", "--duprate", dest="duprate",
                               metavar="<duplication rate>",
                               default=0.1, type="float",
                               help="rate of gene duplication (default: 0.1)")
        grp_duploss.add_option("-L", "--lossrate", dest="lossrate",
                               metavar="<loss rate>",
                               default=0.1, type="float",
                               help="rate of gene loss (default: 0.1)")
        grp_duploss.add_option("-P", "--pretime", dest="pretime",
                               metavar="<pre-speciation time parameter>",
                               default=1.0, type="float",
                               help="lambda param of pre-speciation distribution (default: 1.0)")
        parser.add_option_group(grp_duploss)

        self.parser = parser

        CostModel._parse_args(self, extra)
Пример #11
0
    def optimize_model(self, gtree, stree, gene2species):
        """Optimizes the model"""
        CostModel.optimize_model(self, gtree, stree, gene2species)

        if self.dupcost < 0:
            self.parser.error("-D/--dupcost must be >= 0")
        if self.losscost < 0:
            self.parser.error("-L/--losscost must be >= 0")

        # ensure gtree and stree are both rooted and binary
        if not (treelib.is_rooted(gtree) and treelib.is_binary(gtree)):
            raise Exception("gene tree must be rooted and binary")
        if not (treelib.is_rooted(stree) and treelib.is_binary(stree)):
            raise Exception("species tree must be rooted and binary")
        try:
            junk = phylo.reconcile(gtree, stree, gene2species)
        except:
            raise Exception("problem mapping gene tree to species tree")
Пример #12
0
    def __init__(self, extra):
        """Initializes the model"""
        CostModel.__init__(self, extra)

        self.VERSION = "0.1.0"
        self.mincost = 0

        parser = optparse.OptionParser(prog="DLCModel")
        parser.add_option("-D", "--dupcost", dest="dupcost",
                          metavar="<dup cost>",
                          default=1.0, type="float",
                          help="duplication cost (default: 1.0)")
        parser.add_option("-L", "--losscost", dest="losscost",
                          metavar="<loss cost>",
                          default=1.0, type="float",
                          help="loss cost (default: 1.0)")
        parser.add_option("-C", "--coalcost", dest="coalcost",
                          metavar="<(deep) coalescence cost>",
                          default=1.0, type="float",
                          help="deep coalescence cost (default: 1.0)")
        parser.add_option("-o", "--output", dest="output",
                          metavar="<locus tree output file>",
                          help="locus tree output file")

        grp_search = optparse.OptionGroup(parser, "Search Options")
        grp_search.add_option("--dcs", dest="dcs",
                              metavar="<DCS threshold>",
                              default=0.25, type="float",
                              help="duplication consistency score threshold: " +
                              "DCS >/<= thr resolves to DUP/ILS (default: 0)")
        grp_search.add_option("--switch", dest="switch",
                              default=False, action="store_true",
                              help="switches equality assignment for DCS threshold, " +
                              "i.e. DCS >=/< thr resolves to DUP/ILS")
        parser.add_option_group(grp_search)

        self.parser = parser

        CostModel._parse_args(self, extra)

        self.locustree = None
Пример #13
0
    def optimize_model(self, gtree, stree, gene2species):
        """Optimizes the model"""
        CostModel.optimize_model(self, gtree, stree, gene2species)

        # check arguments
        if self.dupcost < 0:
            self.parser.error("-D/--dupcost must be >= 0: " + str(self.dupcost))
        if self.losscost < 0:
            self.parser.error("-L/--losscost must be >= 0: " + str(self.losscost))
        if self.coalcost < 0:
            self.parser.error("-C/--coalcost must be >= 0: " + str(self.coalcost))
        if self.output is None:
            self.parser.error("-o/--output must be specified")

        if self.dcs < 0 or self.dcs > 1:
            self.parser.error("--dcs must be in [0,1]: " + str(self.dcs))

        # copy over tree topology
        ltree = treelib.Tree(nextname=gtree.nextname)
        def walk(node):
            # copy of node
            newnode = treelib.TreeNode(node.name)

            # recurse
            for child in node.children:
                ltree.add_child(newnode, walk(child))

            return newnode
        if gtree.root:
            walk(gtree.root)
            ltree.root = ltree.nodes[gtree.root.name]
        else:
            raise Exception("input gene tree must be rooted")
        self.locustree = ltree

        # search functions
        self.search = phylo.TreeSearchMix(None)
        self.search.add_proposer(phylo.TreeSearchNni(None), 0.5)
        self.search.add_proposer(phylo.TreeSearchSpr(None), 0.5)
Пример #14
0
 def recon_root(self, gtree, newCopy=True, returnCost=False):
     """
     Reroots the tree by minimizing the duplication/loss cost
     Note, may NOT minimize the cost function
     """
     if self.simplereroot:
         tree = phylo.recon_root(gtree, self.stree, self.gene2species,
                                 newCopy = newCopy,
                                 keepName = True, returnCost = False)
         if returnCost:
             return tree, self.compute_cost(tree)
         else:
             return tree
     else:
         return CostModel.recon_root(self, gtree, newCopy, returnCost)