Exemplo n.º 1
0
def analyse(model, alignment_path, tree_path, branchlengths, cmdline_extras):
    """Do the analysis -- this will overwrite stuff!"""

    # Move it to a new name to stop raxml stomping on different model analyses
    # dupfile(alignment_path, analysis_path)
    model_params = models.get_model_commandline(model)

    if branchlengths == 'linked':
        #constrain all branchlengths to be equal
        bl = ' -f B '
    elif branchlengths == 'unlinked':
        #let branchlenghts vary among subsets
        bl = ' -f e '
    else:
        # WTF?
        log.error("Unknown option for branchlengths: %s", branchlengths)
        raise util.PartitionFinderError

    cmdline_extras = check_defaults(cmdline_extras)

    # we can save memory on gappy alignments like this
    #if str(model).count('LG4')==0:
    #    cmdline_extras = ' '.join([cmdline_extras, '-U '])

    #raxml doesn't append alignment names automatically, like PhyML, let's do that here
    analysis_ID = raxml_analysis_ID(alignment_path, model)

    #force raxml to write to the dir with the alignment in it
    #-e 1.0 sets the precision to 1 lnL unit. This is all that's required here, and helps with speed.
    aln_dir, fname = os.path.split(alignment_path)
    command = " %s -s '%s' -t '%s' %s -n %s -w '%s' %s" % (
        bl, alignment_path, tree_path, model_params, analysis_ID, os.path.abspath(aln_dir), cmdline_extras)
    run_raxml(command)
Exemplo n.º 2
0
def analyse(model, alignment_path, tree_path, branchlengths, cmdline_extras):
    """Do the analysis -- this will overwrite stuff!"""

    # Move it to a new name to stop raxml stomping on different model analyses
    # dupfile(alignment_path, analysis_path)
    model_params = models.get_model_commandline(model)

    if branchlengths == 'linked':
        #constrain all branchlengths to be equal
        bl = ' -f B '
    elif branchlengths == 'unlinked':
        #let branchlenghts vary among subsets
        bl = ' -f e '
    else:
        # WTF?
        log.error("Unknown option for branchlengths: %s", branchlengths)
        raise util.PartitionFinderError

    cmdline_extras = check_defaults(cmdline_extras)

    # we can save memory on gappy alignments like this
    #if str(model).count('LG4')==0:
    #    cmdline_extras = ' '.join([cmdline_extras, '-U '])

    #raxml doesn't append alignment names automatically, like PhyML, let's do that here
    analysis_ID = raxml_analysis_ID(alignment_path, model)

    #force raxml to write to the dir with the alignment in it
    #-e 1.0 sets the precision to 1 lnL unit. This is all that's required here, and helps with speed.
    aln_dir, fname = os.path.split(alignment_path)
    command = " %s -s '%s' -t '%s' %s -n %s -w '%s' %s" % (
        bl, alignment_path, tree_path, model_params, analysis_ID,
        os.path.abspath(aln_dir), cmdline_extras)
    run_raxml(command)
Exemplo n.º 3
0
def make_branch_lengths(alignment_path, topology_path, datatype,
                        cmdline_extras):
    # Now we re-estimate branchlengths using a GTR+G model on the
    # (unpartitioned) dataset
    cmdline_extras = check_defaults(cmdline_extras)
    dir_path, fname = os.path.split(topology_path)
    tree_path = os.path.join(dir_path, 'topology_tree.phy')
    log.debug("Copying %s to %s", topology_path, tree_path)
    util.dupfile(topology_path, tree_path)
    os.remove(topology_path)  # saves headaches later...

    if datatype == "DNA":
        log.info("Estimating GTR+G branch lengths on tree using RAxML")
        command = "-f e -s '%s' -t '%s' -m GTRGAMMA -n BLTREE -w '%s' %s  " % (
            alignment_path, tree_path, os.path.abspath(dir_path),
            cmdline_extras)
    elif datatype == "protein":
        log.info("Estimating LG+G branch lengths on tree using RAxML")
        command = "-f e -s '%s' -t '%s' -m PROTGAMMALG -n BLTREE -w '%s' %s " % (
            alignment_path, tree_path, os.path.abspath(dir_path),
            cmdline_extras)
    elif datatype == "morphology":
        # LOOK OUT: this relies on the assumption that we can only specify a single
        # model for morphology analyses...
        # choose a model for the data - necessary for RAxML to load the data properly
        model = models.get_model_commandline(the_config.models[0])
        log.info("Estimating %s branch lengths on tree using RAxML",
                 the_config.models[0])
        command = "-f e -s %s -t %s %s -K MK -n BLTREE -w %s %s" % (
            alignment_path, tree_path, model, os.path.abspath(dir_path),
            cmdline_extras)
    else:
        log.error("Unrecognised datatype: '%s'" % (datatype))
        raise util.PartitionFinderError

    run_raxml(command)

    dir, aln = os.path.split(alignment_path)
    tree_path = os.path.join(dir, "RAxML_result.BLTREE")

    if not os.path.exists(tree_path):
        log.error(
            "RAxML tree topology should be here but can't be be found: '%s'" %
            (tree_path))
        raise util.PartitionFinderError
    else:
        log.debug("RAxML tree with branch lengths ('%s') looks like this: ",
                  tree_path)
        with open(tree_path, 'r') as fin:
            log.debug('%s', fin.read())

    log.info("Branchlength estimation finished")

    # Now return the path of the final tree with branch lengths
    return tree_path
Exemplo n.º 4
0
def make_branch_lengths(alignment_path, topology_path, datatype, cmdline_extras):
    # Now we re-estimate branchlengths using a GTR+G model on the
    # (unpartitioned) dataset
    cmdline_extras = check_defaults(cmdline_extras)
    dir_path, fname = os.path.split(topology_path)
    tree_path = os.path.join(dir_path, 'topology_tree.phy')
    log.debug("Copying %s to %s", topology_path, tree_path)
    util.dupfile(topology_path, tree_path)
    os.remove(topology_path)  # saves headaches later...

    if datatype == "DNA":
        log.info("Estimating GTR+G branch lengths on tree using RAxML")
        command = "-f e -s '%s' -t '%s' -m GTRGAMMA -n BLTREE -w '%s' %s  " % (
            alignment_path, tree_path, os.path.abspath(dir_path), cmdline_extras)
    elif datatype == "protein":
        log.info("Estimating LG+G branch lengths on tree using RAxML")
        command = "-f e -s '%s' -t '%s' -m PROTGAMMALG -n BLTREE -w '%s' %s " % (
            alignment_path, tree_path, os.path.abspath(dir_path), cmdline_extras)
    elif datatype == "morphology":
        # LOOK OUT: this relies on the assumption that we can only specify a single
        # model for morphology analyses...
        # choose a model for the data - necessary for RAxML to load the data properly
        model = models.get_model_commandline(the_config.models[0])
        log.info("Estimating %s branch lengths on tree using RAxML", the_config.models[0])
        command = "-f e -s '%s' -t '%s' %s -K MK -n BLTREE -w '%s' %s" % (
                alignment_path, tree_path, model, os.path.abspath(dir_path), cmdline_extras)
    else:
        log.error("Unrecognised datatype: '%s'" % (datatype))
        raise util.PartitionFinderError

    run_raxml(command)


    dir, aln = os.path.split(alignment_path)
    tree_path = os.path.join(dir, "RAxML_result.BLTREE")

    if not os.path.exists(tree_path):
        log.error("RAxML tree topology should be here but can't be be found: '%s'" % (tree_path))
        raise util.PartitionFinderError
    else:
        log.debug("RAxML tree with branch lengths ('%s') looks like this: ", tree_path)
        with open(tree_path, 'r') as fin:
            log.debug('%s', fin.read())

    log.info("Branchlength estimation finished")

    # Now return the path of the final tree with branch lengths
    return tree_path
Exemplo n.º 5
0
def make_topology(alignment_path, datatype, cmdline_extras):
    '''Make a MP tree to start the analysis'''
    log.info("Making MP tree for %s", alignment_path)

    cmdline_extras = check_defaults(cmdline_extras)

    # First get the MP topology like this (-p is a hard-coded random number seed):
    if datatype == "DNA":
        command = "-y -s '%s' -m GTRGAMMA -n MPTREE -p 123456789 %s" % (
            alignment_path, cmdline_extras)
    elif datatype == "protein":
        command = "-y -s '%s' -m PROTGAMMALG -n MPTREE -p 123456789 %s" % (
            alignment_path, cmdline_extras)
    elif datatype == "morphology":
        # LOOK OUT: this relies on the assumption that we can only specify a single
        # model for morphology analyses...
        # choose a model for the data - necessary for RAxML to load the data properly
        model = models.get_model_commandline(the_config.models[0])
        command = "-y -s %s %s -K MK -n MPTREE -p 123456789 %s" % (
            alignment_path, model, cmdline_extras)
    else:
        log.error("Unrecognised datatype: '%s'" % (datatype))
        raise util.PartitionFinderError

    # Force raxml to write to the dir with the alignment in it
    aln_dir, fname = os.path.split(alignment_path)
    command = ''.join([command, " -w '%s'" % os.path.abspath(aln_dir)])

    run_raxml(command)
    dir, aln = os.path.split(alignment_path)
    tree_path = os.path.join(dir, "RAxML_parsimonyTree.MPTREE")

    if not os.path.exists(tree_path):
        log.error(
            "RAxML tree topology should be here but can't be be found: '%s'" %
            (tree_path))
        raise (RaxmlError)
    else:
        log.debug("RAxML tree with branch lengths ('%s') looks like this: ",
                  tree_path)
        with open(tree_path, 'r') as fin:
            log.debug('%s', fin.read())

    log.info("Topology estimation finished")

    return tree_path
Exemplo n.º 6
0
def make_topology(alignment_path, datatype, cmdline_extras):
    '''Make a MP tree to start the analysis'''
    log.info("Making MP tree for %s", alignment_path)

    cmdline_extras = check_defaults(cmdline_extras)

    # First get the MP topology like this (-p is a hard-coded random number seed):
    if datatype == "DNA":
        command = "-y -s '%s' -m GTRGAMMA -n MPTREE -p 123456789 %s" % (
            alignment_path, cmdline_extras)
    elif datatype == "protein":
        command = "-y -s '%s' -m PROTGAMMALG -n MPTREE -p 123456789 %s" % (
            alignment_path, cmdline_extras)
    elif datatype == "morphology":
        # LOOK OUT: this relies on the assumption that we can only specify a single
        # model for morphology analyses...
        # choose a model for the data - necessary for RAxML to load the data properly
        model = models.get_model_commandline(the_config.models[0])
        command = "-y -s '%s' %s -K MK -n MPTREE -p 123456789 %s" % (
            alignment_path, model, cmdline_extras)
    else:
        log.error("Unrecognised datatype: '%s'" % (datatype))
        raise util.PartitionFinderError

    # Force raxml to write to the dir with the alignment in it
    aln_dir, fname = os.path.split(alignment_path)
    command = ''.join([command, " -w '%s'" % os.path.abspath(aln_dir)])

    run_raxml(command)
    dir, aln = os.path.split(alignment_path)
    tree_path = os.path.join(dir, "RAxML_parsimonyTree.MPTREE")

    if not os.path.exists(tree_path):
        log.error("RAxML tree topology should be here but can't be be found: '%s'" % (tree_path))
        raise(RaxmlError)
    else:
        log.debug("RAxML tree with branch lengths ('%s') looks like this: ", tree_path)
        with open(tree_path, 'r') as fin:
            log.debug('%s', fin.read())

    log.info("Topology estimation finished")

    return tree_path
Exemplo n.º 7
0
def make_ml_topology(alignment_path, datatype, cmdline_extras, scheme, cpus):
    '''Make a ML tree to from a given partitioning scheme'''
    log.info(
        "Estimating Maximum Likelihood tree with RAxML fast experimental tree search for %s",
        alignment_path)

    if (the_config.datatype != "morphology"):
        partition_file = write_partition_file(scheme, alignment_path)

    # First get the ML topology like this (-p is a hard-coded random number seed):
    # we do this to an accuracy of 10 log likelihood units with -e 10
    # we use the rapid ML option in RAxML -f E
    if datatype == "DNA":
        log.info("Using a separate GTR+G model for each data block")
        command = " -f E -s '%s' -m GTRGAMMA -O -n fastTREE -# 1 -p 123456789 -q '%s' -e 10 " % (
            alignment_path, partition_file)
    elif datatype == "protein":
        log.info("Using a separate LG+G model for each data block")
        command = " -f E -s '%s' -m PROTGAMMALG -O -n fastTREE -# 1 -p 123456789 -q '%s' -e 10 " % (
            alignment_path, partition_file)
    elif datatype == "morphology":
        model = models.get_model_commandline(the_config.models[0])
        log.info("Using the model specified in the .cfg file")
        command = "-f E -s %s %s -n fastTREE -p 123456789 %s" % (
            alignment_path, model, cmdline_extras)
    else:
        log.error("Unrecognised datatype: '%s'" % (datatype))
        raise util.PartitionFinderError

    # Force raxml to write to the dir with the alignment in it
    aln_dir, fname = os.path.split(alignment_path)
    command = ''.join([command, " -w '%s'" % os.path.abspath(aln_dir)])

    run_raxml_pthreads(command, cpus)
    alndir, aln = os.path.split(alignment_path)

    fast_tree_path = os.path.join(alndir, "RAxML_fastTree.fastTREE")

    # now we make the branch lengths with a partitioned model without rate multipliers
    if datatype == "DNA":
        log.info(
            "Estimating GTR+G branch lengths on ML tree using all partitions")
        command = "-f e -s '%s' -t '%s' -m GTRGAMMA -O -n BLTREE -p 123456789 -q '%s' -w '%s' -e 1  " % (
            alignment_path, fast_tree_path, partition_file,
            os.path.abspath(alndir))
    elif datatype == "protein":
        log.info(
            "Estimating LG+G branch lengths on ML tree using all partitions")
        command = "-f e -s '%s' -t '%s' -m PROTGAMMALG -O -n BLTREE -p 123456789 -q '%s' -w '%s' -e 1  " % (
            alignment_path, fast_tree_path, partition_file,
            os.path.abspath(alndir))
    elif datatype == "morphology":
        log.info("Estimating branch lengths on ML tree")
        command = "-f e -s '%s' -t '%s' %s -O -n BLTREE -p 123456789 -w '%s' -e 1  " % (
            alignment_path, fast_tree_path, model, os.path.abspath(alndir))
    else:
        log.error("Unrecognised datatype: '%s'" % (datatype))
        raise util.PartitionFinderError

    run_raxml_pthreads(command, cpus)
    tree_path = os.path.join(alndir, "RAxML_result.BLTREE")

    if not os.path.exists(tree_path):
        log.error(
            "RAxML tree topology should be here but can't be be found: '%s'" %
            (tree_path))
        raise (util.PartitionFinderError)
    else:
        log.debug("RAxML tree with branch lengths ('%s') looks like this: ",
                  tree_path)
        with open(tree_path, 'r') as fin:
            log.debug('%s', fin.read())

    log.info("ML topology estimation finished")

    return tree_path
Exemplo n.º 8
0
def make_ml_topology(alignment_path, datatype, cmdline_extras, scheme, cpus):
    '''Make a ML tree to from a given partitioning scheme'''
    log.info("Estimating Maximum Likelihood tree with RAxML fast experimental tree search for %s", alignment_path)

    if(the_config.datatype != "morphology"):
        partition_file = write_partition_file(scheme, alignment_path)


    # First get the ML topology like this (-p is a hard-coded random number seed):
    # we do this to an accuracy of 10 log likelihood units with -e 10
    # we use the rapid ML option in RAxML -f E
    if datatype == "DNA":
        log.info("Using a separate GTR+G model for each data block")
        command = " -f E -s '%s' -m GTRGAMMA -O -n fastTREE -# 1 -p 123456789 -q '%s' -e 10 " % (
            alignment_path, partition_file)
    elif datatype == "protein":
        log.info("Using a separate LG+G model for each data block")
        command = " -f E -s '%s' -m PROTGAMMALG -O -n fastTREE -# 1 -p 123456789 -q '%s' -e 10 " % (
            alignment_path, partition_file)
    elif datatype == "morphology":
        model = models.get_model_commandline(the_config.models[0])
        log.info("Using the model specified in the .cfg file")
        command = "-f E -s '%s' %s -n fastTREE -p 123456789 %s" % (
            alignment_path, model, cmdline_extras)
    else:
        log.error("Unrecognised datatype: '%s'" % (datatype))
        raise util.PartitionFinderError


    # Force raxml to write to the dir with the alignment in it
    aln_dir, fname = os.path.split(alignment_path)
    command = ''.join([command, " -w '%s'" % os.path.abspath(aln_dir)])

    run_raxml_pthreads(command, cpus)
    alndir, aln = os.path.split(alignment_path)

    fast_tree_path = os.path.join(alndir, "RAxML_fastTree.fastTREE")

    # now we make the branch lengths with a partitioned model without rate multipliers
    if datatype == "DNA":
        log.info("Estimating GTR+G branch lengths on ML tree using all partitions")
        command = "-f e -s '%s' -t '%s' -m GTRGAMMA -O -n BLTREE -p 123456789 -q '%s' -w '%s' -e 1  " % (
            alignment_path, fast_tree_path, partition_file, os.path.abspath(alndir)) 
    elif datatype == "protein":
        log.info("Estimating LG+G branch lengths on ML tree using all partitions")
        command = "-f e -s '%s' -t '%s' -m PROTGAMMALG -O -n BLTREE -p 123456789 -q '%s' -w '%s' -e 1  " % (
            alignment_path, fast_tree_path, partition_file, os.path.abspath(alndir)) 
    elif datatype == "morphology":
        log.info("Estimating branch lengths on ML tree")
        command = "-f e -s '%s' -t '%s' %s -O -n BLTREE -p 123456789 -w '%s' -e 1  " % (
            alignment_path, fast_tree_path, model, os.path.abspath(alndir)) 
    else:
        log.error("Unrecognised datatype: '%s'" % (datatype))
        raise util.PartitionFinderError

    run_raxml_pthreads(command, cpus)
    tree_path = os.path.join(alndir, "RAxML_result.BLTREE")

    if not os.path.exists(tree_path):
        log.error("RAxML tree topology should be here but can't be be found: '%s'" % (tree_path))
        raise(util.PartitionFinderError)
    else:
        log.debug("RAxML tree with branch lengths ('%s') looks like this: ", tree_path)
        with open(tree_path, 'r') as fin:
            log.debug('%s', fin.read())

    log.info("ML topology estimation finished")

    return tree_path