def main(): usage = "usage: %prog <project> <output graphviz .dot file>" description = "TEST: draw the outgroup DAG" parser = OptionParser(usage=usage, description=description) parser.add_option("--justLeaves", dest="justLeaves", action="store_true", default=False, help="Assign only leaves as outgroups") parser.add_option("--threshold", dest="threshold", type='int', default=None, help="greedy threshold") parser.add_option("--numOutgroups", dest="maxNumOutgroups", help="Maximum number of outgroups to provide", type=int) parser.add_option("--dynamic", help="Use new dynamic programming" " algorithm", action="store_true", default=False) options, args = parser.parse_args() if len(args) != 2: parser.print_help() raise RuntimeError("Wrong number of arguments") proj = MultiCactusProject() proj.readXML(args[0]) if not options.dynamic: outgroup = GreedyOutgroup() outgroup.importTree(proj.mcTree) if options.justLeaves: candidates = set( [proj.mcTree.getName(x) for x in proj.mcTree.getLeaves()]) else: candidates = None outgroup.greedy(threshold=options.threshold, candidateSet=candidates, candidateChildFrac=1.1, maxNumOutgroups=options.maxNumOutgroups) else: outgroup = DynamicOutgroup() outgroup.importTree(proj.mcTree, proj.getInputSequenceMap()) outgroup.compute(options.maxNumOutgroups) try: NX.drawing.nx_agraph.write_dot(outgroup.dag, args[1]) except Exception as e: print "NetworkX failed: %s" % str(e) print "Writing ogMap in non-graphviz format" with open(args[1], "w") as f: for node, ogs in outgroup.ogMap.items(): f.write("%s -> %s\n" % (node, str(ogs))) return 0
def main(): usage = "usage: %prog <project> <output graphviz .dot file>" description = "TEST: draw the outgroup DAG" parser = OptionParser(usage=usage, description=description) parser.add_option("--justLeaves", dest="justLeaves", action="store_true", default = False, help="Assign only leaves as outgroups") parser.add_option("--threshold", dest="threshold", type='int', default = None, help="greedy threshold") parser.add_option("--numOutgroups", dest="maxNumOutgroups", help="Maximum number of outgroups to provide", type=int) parser.add_option("--dynamic", help="Use new dynamic programming" " algorithm", action="store_true", default=False) options, args = parser.parse_args() if len(args) != 2: parser.print_help() raise RuntimeError("Wrong number of arguments") proj = MultiCactusProject() proj.readXML(args[0]) if not options.dynamic: outgroup = GreedyOutgroup() outgroup.importTree(proj.mcTree) if options.justLeaves: candidates = set([proj.mcTree.getName(x) for x in proj.mcTree.getLeaves()]) else: candidates = None outgroup.greedy(threshold=options.threshold, candidateSet=candidates, candidateChildFrac=1.1, maxNumOutgroups=options.maxNumOutgroups) else: outgroup = DynamicOutgroup() outgroup.importTree(proj.mcTree, proj.getInputSequenceMap()) outgroup.compute(options.maxNumOutgroups) try: NX.drawing.nx_agraph.write_dot(outgroup.dag, args[1]) except Exception as e: print "NetworkX failed: %s" % str(e) print "Writing ogMap in non-graphviz format" with open(args[1], "w") as f: for node, ogs in outgroup.ogMap.items(): f.write("%s -> %s\n" % (node, str(ogs))) return 0
def createMCProject(tree, experiment, config, options): mcTree = MultiCactusTree(tree, config.getSubtreeSize()) mcTree.nameUnlabeledInternalNodes(config.getDefaultInternalNodePrefix()) mcTree.computeSubtreeRoots() mcProj = MultiCactusProject() mcProj.mcTree = mcTree mcProj.inputSequences = experiment.getSequences()[:] if config.getDoSelfAlignment(): mcTree.addSelfEdges() for name in mcProj.mcTree.getSubtreeRootNames(): expPath = "%s/%s/%s_experiment.xml" % (options.path, name, name) mcProj.expMap[name] = os.path.abspath(expPath) alignmentRootId = mcProj.mcTree.getRootId() if options.root is not None: try: alignmentRootId = mcProj.mcTree.getNodeId(options.root) except: raise RuntimeError("Specified root name %s not found in tree" % options.root) mcProj.outgroup = None if config.getOutgroupStrategy() == 'greedy': # use the provided outgroup candidates, or use all outgroups # as candidates if none are given mcProj.outgroup = GreedyOutgroup() mcProj.outgroup.importTree(mcProj.mcTree, alignmentRootId) mcProj.outgroup.greedy( threshold=config.getOutgroupThreshold(), candidateSet=options.outgroupNames, candidateChildFrac=config.getOutgroupAncestorQualityFraction(), maxNumOutgroups=config.getMaxNumOutgroups()) elif config.getOutgroupStrategy() == 'greedyLeaves': # use all leaves as outgroups, unless outgroup candidates are given mcProj.outgroup = GreedyOutgroup() mcProj.outgroup.importTree(mcProj.mcTree, alignmentRootId) ogSet = options.outgroupNames if ogSet is None: ogSet = set( [mcProj.mcTree.getName(x) for x in mcProj.mcTree.getLeaves()]) mcProj.outgroup.greedy(threshold=config.getOutgroupThreshold(), candidateSet=ogSet, candidateChildFrac=2.0, maxNumOutgroups=config.getMaxNumOutgroups()) elif config.getOutgroupStrategy() == 'greedyPreference': # prefer the provided outgroup candidates, if any, but use # other nodes as "filler" if we can't find enough. mcProj.outgroup = GreedyOutgroup() mcProj.outgroup.importTree(mcProj.mcTree, alignmentRootId) mcProj.outgroup.greedy( threshold=config.getOutgroupThreshold(), candidateSet=options.outgroupNames, candidateChildFrac=config.getOutgroupAncestorQualityFraction(), maxNumOutgroups=config.getMaxNumOutgroups()) mcProj.outgroup.greedy( threshold=config.getOutgroupThreshold(), candidateSet=None, candidateChildFrac=config.getOutgroupAncestorQualityFraction(), maxNumOutgroups=config.getMaxNumOutgroups()) elif config.getOutgroupStrategy() == 'dynamic': # dynamic programming algorithm that exactly optimizes probability # that base in target node aligns to at least one base in the # outgroup set. Caveats are that it only returns leaves, and # the model used for optimization is super naive. Still, it does # some things better than greedy approaches such as properly account # for phylogenetic redundancy, as well as try to factor assembly # size/quality automatically. mcProj.outgroup = DynamicOutgroup() mcProj.outgroup.importTree(mcProj.mcTree, mcProj.getInputSequenceMap(), alignmentRootId, candidateSet=options.outgroupNames) mcProj.outgroup.compute(maxNumOutgroups=config.getMaxNumOutgroups()) elif config.getOutgroupStrategy() != 'none': raise RuntimeError("Could not understand outgroup strategy %s" % config.getOutgroupStrategy()) # if necessary, we reroot the tree at the specified alignment root id. all leaf genomes # that are no longer in the tree, but still used as outgroups, are moved into special fields # so that we can remember to, say, get their paths for preprocessing. specifyAlignmentRoot(mcProj, alignmentRootId) return mcProj
def createMCProject(tree, experiment, config, options): mcTree = MultiCactusTree(tree, config.getSubtreeSize()) mcTree.nameUnlabeledInternalNodes(config.getDefaultInternalNodePrefix()) mcTree.computeSubtreeRoots() mcProj = MultiCactusProject() mcProj.mcTree = mcTree mcProj.inputSequences = experiment.getSequences()[:] mcProj.outputSequenceDir = experiment.getOutputSequenceDir() if config.getDoSelfAlignment(): mcTree.addSelfEdges() for name in mcProj.mcTree.getSubtreeRootNames(): expPath = "%s/%s/%s_experiment.xml" % (options.path, name, name) mcProj.expMap[name] = os.path.abspath(expPath) alignmentRootId = mcProj.mcTree.getRootId() if options.root is not None: try: alignmentRootId = mcProj.mcTree.getNodeId(options.root) except Exception as e: raise RuntimeError("Specified root name %s not found in tree" % options.root) mcProj.outgroup = None if config.getOutgroupStrategy() == 'greedy': # use the provided outgroup candidates, or use all outgroups # as candidates if none are given mcProj.outgroup = GreedyOutgroup() mcProj.outgroup.importTree(mcProj.mcTree, alignmentRootId) mcProj.outgroup.greedy(threshold=config.getOutgroupThreshold(), candidateSet=options.outgroupNames, candidateChildFrac=config.getOutgroupAncestorQualityFraction(), maxNumOutgroups=config.getMaxNumOutgroups()) elif config.getOutgroupStrategy() == 'greedyLeaves': # use all leaves as outgroups, unless outgroup candidates are given mcProj.outgroup = GreedyOutgroup() mcProj.outgroup.importTree(mcProj.mcTree, alignmentRootId) ogSet = options.outgroupNames if ogSet is None: ogSet = set([mcProj.mcTree.getName(x) for x in mcProj.mcTree.getLeaves()]) mcProj.outgroup.greedy(threshold=config.getOutgroupThreshold(), candidateSet=ogSet, candidateChildFrac=2.0, maxNumOutgroups=config.getMaxNumOutgroups()) elif config.getOutgroupStrategy() == 'greedyPreference': # prefer the provided outgroup candidates, if any, but use # other nodes as "filler" if we can't find enough. mcProj.outgroup = GreedyOutgroup() mcProj.outgroup.importTree(mcProj.mcTree, alignmentRootId) mcProj.outgroup.greedy(threshold=config.getOutgroupThreshold(), candidateSet=options.outgroupNames, candidateChildFrac=config.getOutgroupAncestorQualityFraction(), maxNumOutgroups=config.getMaxNumOutgroups()) mcProj.outgroup.greedy(threshold=config.getOutgroupThreshold(), candidateSet=None, candidateChildFrac=config.getOutgroupAncestorQualityFraction(), maxNumOutgroups=config.getMaxNumOutgroups()) elif config.getOutgroupStrategy() == 'dynamic': # dynamic programming algorithm that exactly optimizes probability # that base in target node aligns to at least one base in the # outgroup set. Caveats are that it only returns leaves, and # the model used for optimization is super naive. Still, it does # some things better than greedy approaches such as properly account # for phylogenetic redundancy, as well as try to factor assembly # size/quality automatically. mcProj.outgroup = DynamicOutgroup() mcProj.outgroup.importTree(mcProj.mcTree, mcProj.getInputSequenceMap(), alignmentRootId, candidateSet=options.outgroupNames) mcProj.outgroup.compute(maxNumOutgroups=config.getMaxNumOutgroups()) elif config.getOutgroupStrategy() != 'none': raise RuntimeError("Could not understand outgroup strategy %s" % config.getOutgroupStrategy()) # if necessary, we reroot the tree at the specified alignment root id. all leaf genomes # that are no longer in the tree, but still used as outgroups, are moved into special fields # so that we can remember to, say, get their paths for preprocessing. specifyAlignmentRoot(mcProj, alignmentRootId) return mcProj