def make_branch_lengths(alignment_path, topology_path, datatype, cmdline_extras): # Now we re-estimate branchlengths using a GTR+I+G model on the # (unpartitioned) dataset cmdline_extras = check_defaults(cmdline_extras) dir_path, fname = os.path.split(topology_path) tree_path = os.path.join(dir_path, 'topology_tree.phy') log.debug("Copying %s to %s", topology_path, tree_path) util.dupfile(topology_path, tree_path) if datatype == "DNA": log.info("Estimating GTR+I+G branch lengths on tree") command = "-i '%s' -u '%s' -m GTR -c 4 -a e -v e -o lr -b 0 %s" % ( alignment_path, tree_path, cmdline_extras) run_phyml(command) if datatype == "protein": log.info("Estimating LG+F branch lengths on tree") command = "-i '%s' -u '%s' -m LG -c 1 -v 0 -f m -d aa -o lr -b 0 %s" % ( alignment_path, tree_path, cmdline_extras) run_phyml(command) tree_path = make_tree_path(alignment_path) log.info("Branchlength estimation finished") # Now return the path of the final tree alignment return tree_path
def make_branch_lengths(alignment_path, topology_path, datatype, cmdline_extras): # Now we re-estimate branchlengths using a GTR+G model on the # (unpartitioned) dataset cmdline_extras = check_defaults(cmdline_extras) dir_path, fname = os.path.split(topology_path) tree_path = os.path.join(dir_path, 'topology_tree.phy') log.debug("Copying %s to %s", topology_path, tree_path) util.dupfile(topology_path, tree_path) os.remove(topology_path) # saves headaches later... if datatype == "DNA": log.info("Estimating GTR+G branch lengths on tree using RAxML") command = "-f e -s '%s' -t '%s' -m GTRGAMMA -n BLTREE -w '%s' %s " % ( alignment_path, tree_path, os.path.abspath(dir_path), cmdline_extras) elif datatype == "protein": log.info("Estimating LG+G branch lengths on tree using RAxML") command = "-f e -s '%s' -t '%s' -m PROTGAMMALG -n BLTREE -w '%s' %s " % ( alignment_path, tree_path, os.path.abspath(dir_path), cmdline_extras) elif datatype == "morphology": # LOOK OUT: this relies on the assumption that we can only specify a single # model for morphology analyses... # choose a model for the data - necessary for RAxML to load the data properly model = models.get_model_commandline(the_config.models[0]) log.info("Estimating %s branch lengths on tree using RAxML", the_config.models[0]) command = "-f e -s %s -t %s %s -K MK -n BLTREE -w %s %s" % ( alignment_path, tree_path, model, os.path.abspath(dir_path), cmdline_extras) else: log.error("Unrecognised datatype: '%s'" % (datatype)) raise util.PartitionFinderError run_raxml(command) dir, aln = os.path.split(alignment_path) tree_path = os.path.join(dir, "RAxML_result.BLTREE") if not os.path.exists(tree_path): log.error( "RAxML tree topology should be here but can't be be found: '%s'" % (tree_path)) raise util.PartitionFinderError else: log.debug("RAxML tree with branch lengths ('%s') looks like this: ", tree_path) with open(tree_path, 'r') as fin: log.debug('%s', fin.read()) log.info("Branchlength estimation finished") # Now return the path of the final tree with branch lengths return tree_path
def make_branch_lengths(alignment_path, topology_path, datatype, cmdline_extras): # Now we re-estimate branchlengths using a GTR+G model on the # (unpartitioned) dataset cmdline_extras = check_defaults(cmdline_extras) dir_path, fname = os.path.split(topology_path) tree_path = os.path.join(dir_path, 'topology_tree.phy') log.debug("Copying %s to %s", topology_path, tree_path) util.dupfile(topology_path, tree_path) os.remove(topology_path) # saves headaches later... if datatype == "DNA": log.info("Estimating GTR+G branch lengths on tree using RAxML") command = "-f e -s '%s' -t '%s' -m GTRGAMMA -n BLTREE -w '%s' %s " % ( alignment_path, tree_path, os.path.abspath(dir_path), cmdline_extras) elif datatype == "protein": log.info("Estimating LG+G branch lengths on tree using RAxML") command = "-f e -s '%s' -t '%s' -m PROTGAMMALG -n BLTREE -w '%s' %s " % ( alignment_path, tree_path, os.path.abspath(dir_path), cmdline_extras) elif datatype == "morphology": # LOOK OUT: this relies on the assumption that we can only specify a single # model for morphology analyses... # choose a model for the data - necessary for RAxML to load the data properly model = models.get_model_commandline(the_config.models[0]) log.info("Estimating %s branch lengths on tree using RAxML", the_config.models[0]) command = "-f e -s '%s' -t '%s' %s -K MK -n BLTREE -w '%s' %s" % ( alignment_path, tree_path, model, os.path.abspath(dir_path), cmdline_extras) else: log.error("Unrecognised datatype: '%s'" % (datatype)) raise util.PartitionFinderError run_raxml(command) dir, aln = os.path.split(alignment_path) tree_path = os.path.join(dir, "RAxML_result.BLTREE") if not os.path.exists(tree_path): log.error("RAxML tree topology should be here but can't be be found: '%s'" % (tree_path)) raise util.PartitionFinderError else: log.debug("RAxML tree with branch lengths ('%s') looks like this: ", tree_path) with open(tree_path, 'r') as fin: log.debug('%s', fin.read()) log.info("Branchlength estimation finished") # Now return the path of the final tree with branch lengths return tree_path
def make_branch_lengths(alignment_path, topology_path, datatype, cmdline_extras): # Now we re-estimate branchlengths using a GTR+G model on the # (unpartitioned) dataset cmdline_extras = check_defaults(cmdline_extras) dir_path, fname = os.path.split(topology_path) tree_path = os.path.join(dir_path, 'topology_tree.phy') log.debug("Copying %s to %s", topology_path, tree_path) util.dupfile(topology_path, tree_path) os.remove(topology_path) # saves headaches later... if datatype == "DNA": log.info("Estimating GTR+G branch lengths on tree using RAxML") command = "-f e -s '%s' -t '%s' -m GTRGAMMA -n BLTREE -w '%s' %s " % ( alignment_path, tree_path, os.path.abspath(dir_path), cmdline_extras) run_raxml(command) if datatype == "protein": log.info("Estimating LG+G branch lengths on tree using RAxML") command = "-f e -s '%s' -t '%s' -m PROTGAMMALG -n BLTREE -w '%s' %s " % ( alignment_path, tree_path, os.path.abspath(dir_path), cmdline_extras) run_raxml(command) dir, aln = os.path.split(alignment_path) tree_path = os.path.join(dir, "RAxML_result.BLTREE") if not os.path.exists(tree_path): log.error( "RAxML tree topology should be here but can't be be found: '%s'" % (tree_path)) raise (RaxmlError) else: log.debug("RAxML tree with branch lengths ('%s') looks like this: ", tree_path) with open(tree_path, 'r') as fin: log.debug('%s', fin.read()) log.info("Branchlength estimation finished") # Now return the path of the final tree with branch lengths return tree_path
def make_branch_lengths(alignment_path, topology_path, datatype, cmdline_extras): # Now we re-estimate branchlengths using a GTR+G model on the # (unpartitioned) dataset cmdline_extras = check_defaults(cmdline_extras) dir_path, fname = os.path.split(topology_path) tree_path = os.path.join(dir_path, 'topology_tree.phy') log.debug("Copying %s to %s", topology_path, tree_path) util.dupfile(topology_path, tree_path) os.remove(topology_path) # saves headaches later... if datatype == "DNA": log.info("Estimating GTR+G branch lengths on tree using RAxML") command = "-f e -s '%s' -t '%s' -m GTRGAMMA -n BLTREE -w '%s' %s " % ( alignment_path, tree_path, os.path.abspath(dir_path), cmdline_extras) run_raxml(command) if datatype == "protein": log.info("Estimating LG+G branch lengths on tree using RAxML") command = "-f e -s '%s' -t '%s' -m PROTGAMMALG -n BLTREE -w '%s' %s " % ( alignment_path, tree_path, os.path.abspath(dir_path), cmdline_extras) run_raxml(command) dir, aln = os.path.split(alignment_path) tree_path = os.path.join(dir, "RAxML_result.BLTREE") if not os.path.exists(tree_path): log.error("RAxML tree topology should be here but can't be be found: '%s'" % (tree_path)) raise(RaxmlError) else: log.debug("RAxML tree with branch lengths ('%s') looks like this: ", tree_path) with open(tree_path, 'r') as fin: log.debug('%s', fin.read()) log.info("Branchlength estimation finished") # Now return the path of the final tree with branch lengths return tree_path
def make_tree(self, user_path): # Begin by making a filtered alignment, containing ONLY those columns # that are defined in the subsets subset_with_everything = subset_ops.merge_subsets( the_config.user_subsets) self.filtered_alignment = SubsetAlignment(self.alignment, subset_with_everything) self.filtered_alignment_path = os.path.join(the_config.start_tree_path, 'filtered_source.phy') self.filtered_alignment.write(self.filtered_alignment_path) # Check the full subset against the alignment subset_ops.check_against_alignment(subset_with_everything, self.alignment, the_config) # We start by copying the alignment self.alignment_path = os.path.join(the_config.start_tree_path, 'source.phy') # Now check for the tree tree_path = the_config.processor.make_tree_path( self.filtered_alignment_path) if self.need_new_tree(tree_path): log.debug("Estimating new starting tree, no old tree found") # If we have a user tree, then use that, otherwise, create a topology util.clean_out_folder(the_config.start_tree_path, keep=["filtered_source.phy", "source.phy"]) if user_path is not None and user_path != "": # Copy it into the start tree folder log.info("Using user supplied topology at %s" % user_path) topology_path = os.path.join(the_config.start_tree_path, 'user_topology.phy') util.dupfile(user_path, topology_path) need_bl = True elif the_config.no_ml_tree == True: log.debug("didn't find tree at %s, making a new one" % tree_path) topology_path = the_config.processor.make_topology( self.filtered_alignment_path, the_config.datatype, the_config.cmdline_extras) need_bl = True elif the_config.no_ml_tree == False: log.debug( "didn't find tree at %s, making an ML tree with RAxML" % tree_path) tree_scheme = scheme.create_scheme( the_config, "tree_scheme", range(len(the_config.user_subsets))) topology_path = raxml.make_ml_topology( self.filtered_alignment_path, the_config.datatype, the_config.cmdline_extras, tree_scheme, self.threads) # here we copy the ML tree topology so it can be used with PhyML too # TODO: this is a hack, and it would be better to decide on a universal # name for the different types of tree we might have. phyml_tree = os.path.join( os.path.dirname(topology_path), "filtered_source.phy_phyml_tree.txt") copyfile(topology_path, phyml_tree) need_bl = False if need_bl == True: # Now estimate branch lengths tree_path = the_config.processor.make_branch_lengths( self.filtered_alignment_path, topology_path, the_config.datatype, the_config.cmdline_extras) self.tree_path = tree_path log.debug("Starting tree with branch lengths is here: %s" % self.tree_path)
def make_tree(self, user_path): # Begin by making a filtered alignment, containing ONLY those columns # that are defined in the subsets subset_with_everything = subset_ops.merge_subsets(the_config.user_subsets) self.filtered_alignment = SubsetAlignment( self.alignment, subset_with_everything) self.filtered_alignment_path = os.path.join( the_config.start_tree_path, 'filtered_source.phy') self.filtered_alignment.write(self.filtered_alignment_path) # Check the full subset against the alignment subset_ops.check_against_alignment(subset_with_everything, self.alignment, the_config) # We start by copying the alignment self.alignment_path = os.path.join( the_config.start_tree_path, 'source.phy') # Now check for the tree tree_path = the_config.processor.make_tree_path( self.filtered_alignment_path) if self.need_new_tree(tree_path): log.debug("Estimating new starting tree, no old tree found") # If we have a user tree, then use that, otherwise, create a topology util.clean_out_folder(the_config.start_tree_path, keep=["filtered_source.phy", "source.phy"]) if user_path is not None and user_path != "": # Copy it into the start tree folder log.info("Using user supplied topology at %s" % user_path) topology_path = os.path.join(the_config.start_tree_path, 'user_topology.phy') util.dupfile(user_path, topology_path) need_bl = True elif the_config.no_ml_tree == True: log.debug( "didn't find tree at %s, making a new one" % tree_path) topology_path = the_config.processor.make_topology( self.filtered_alignment_path, the_config.datatype, the_config.cmdline_extras) need_bl = True elif the_config.no_ml_tree == False: log.debug( "didn't find tree at %s, making an ML tree with RAxML" % tree_path) tree_scheme = scheme.create_scheme( the_config, "tree_scheme", range(len(the_config.user_subsets))) topology_path = raxml.make_ml_topology( self.filtered_alignment_path, the_config.datatype, the_config.cmdline_extras, tree_scheme, self.threads) # here we copy the ML tree topology so it can be used with PhyML too # TODO: this is a hack, and it would be better to decide on a universal # name for the different types of tree we might have. phyml_tree = os.path.join(os.path.dirname(topology_path), "filtered_source.phy_phyml_tree.txt") copyfile(topology_path, phyml_tree) need_bl = False if need_bl == True: # Now estimate branch lengths tree_path = the_config.processor.make_branch_lengths( self.filtered_alignment_path, topology_path, the_config.datatype, the_config.cmdline_extras) self.tree_path = tree_path log.debug("Starting tree with branch lengths is here: %s" % self.tree_path)