Пример #1
0
def main():
    """Restarts a toil workflow.
    """
    
    ##########################################
    #Construct the arguments.
    ##########################################  
    
    parser = OptionParser()
    addOptions(parser)
    
    options, args = parser.parse_args()
    
    if len(args) != 0:
        parser.error("Unrecognised input arguments: %s" % " ".join(args))
        
    if len(sys.argv) == 1:
        parser.print_help()
        sys.exit(0)
    
    assert len(args) <= 1 #Only toil may be specified as argument
    if len(args) == 1: #Allow toil directory as arg
        options.jobStore = args[0]
        
    ##########################################
    #Now run the toil construction/leader
    ##########################################  
        
    setLoggingFromOptions(options)
    options.restart = True
    with setupToil(options) as (config, batchSystem, jobStore):
        jobStore.clean()
        mainLoop(config, batchSystem, jobStore, Job._loadRootJob(jobStore))
Пример #2
0
def main():
    parser = getBasicOptionParser()
    parser.add_argument("--version", action='version', version=version)
    parser.add_argument(
        '-p',
        "--provisioner",
        dest='provisioner',
        choices=['aws'],
        required=True,
        help="The provisioner for cluster auto-scaling. Only aws is currently"
        "supported")
    parser.add_argument(
        "clusterName",
        help="The name that the cluster will be identifiable by")
    config = parseBasicOptions(parser)
    setLoggingFromOptions(config)
    provisioner = None
    if config.provisioner == 'aws':
        logger.info('Using aws provisioner.')
        try:
            from toil.provisioners.aws.awsProvisioner import AWSProvisioner
        except ImportError:
            raise RuntimeError(
                'The aws extra must be installed to use this provisioner')
        provisioner = AWSProvisioner
    else:
        assert False

    provisioner.sshLeader(clusterName=config.clusterName)
Пример #3
0
def main():
    """Restarts a toil workflow.
    """

    ##########################################
    #Construct the arguments.
    ##########################################

    parser = OptionParser()
    addOptions(parser)

    options, args = parser.parse_args()

    if len(args) != 0:
        parser.error("Unrecognised input arguments: %s" % " ".join(args))

    if len(sys.argv) == 1:
        parser.print_help()
        sys.exit(0)

    assert len(args) <= 1  #Only toil may be specified as argument
    if len(args) == 1:  #Allow toil directory as arg
        options.jobStore = args[0]

    ##########################################
    #Now run the toil construction/leader
    ##########################################

    setLoggingFromOptions(options)
    options.restart = True
    with setupToil(options) as (config, batchSystem, jobStore):
        jobStore.clean()
        mainLoop(config, batchSystem, jobStore, Job._loadRootJob(jobStore))
Пример #4
0
def main():
    parser = getBasicOptionParser()
    parser = addBasicProvisionerOptions(parser)
    config = parseBasicOptions(parser)
    setLoggingFromOptions(config)
    cluster = Cluster(provisioner=config.provisioner, clusterName=config.clusterName)
    cluster.destroyCluster()
Пример #5
0
def main():
    """Restarts a toil workflow.
    """
    
    ##########################################
    #Construct the arguments.
    ##########################################  

    parser = getBasicOptionParser()

    parser.add_argument("--version", action='version', version=version)

    parser.add_argument("jobStore", type=str,
          help=("Store in which to place job management files \
          and the global accessed temporary files"
          "(If this is a file path this needs to be globally accessible "
          "by all machines running jobs).\n"
          "If the store already exists and restart is false an"
          " ExistingJobStoreException exception will be thrown."))

    options = parseBasicOptions(parser)
        
    ##########################################
    #Now run the toil construction/leader
    ##########################################  
        
    setLoggingFromOptions(options)
    options.restart = True
    with setupToil(options) as (config, batchSystem, jobStore):
        jobStore.clean(Job._loadRootJob(jobStore))
        mainLoop(config, batchSystem, jobStore, Job._loadRootJob(jobStore))
Пример #6
0
def main():
    """Restarts a toil.
    """
    
    ##########################################
    #Construct the arguments.
    ##########################################  
    
    parser = OptionParser()
    addOptions(parser)
    
    options, args = parser.parse_args()
    
    if len(args) != 0:
        parser.error("Unrecognised input arguments: %s" % " ".join(args))
        
    if len(sys.argv) == 1:
        parser.print_help()
        sys.exit(0)
    
    assert len(args) <= 1 #Only toil may be specified as argument
    if len(args) == 1: #Allow toil directory as arg
        options.toil = args[0]
        
    ##########################################
    #Now run the toil construction/leader
    ##########################################  
        
    setLoggingFromOptions(options)
    with setupToil(options) as (config, batchSystem, jobStore):
        jobStore.clean()
        if "rootJob" not in config.attrib:
            print "There is no root batchjob in the toil from which to start, exiting"
            sys.exit(0)
        return mainLoop(config, batchSystem, jobStore, jobStore.load(config.attrib["rootJob"]))
Пример #7
0
def main():
    parser = getBasicOptionParser()
    parser = addBasicProvisionerOptions(parser)
    config = parseBasicOptions(parser)
    setLoggingFromOptions(config)
    cluster = Cluster(provisioner=config.provisioner,
                      clusterName=config.clusterName)
    cluster.destroyCluster()
Пример #8
0
def main():
    parser = getBasicOptionParser()
    parser = addBasicProvisionerOptions(parser)
    parser.add_argument('args', nargs=argparse.REMAINDER)
    config = parseBasicOptions(parser)
    setLoggingFromOptions(config)
    cluster = Cluster(provisioner=config.provisioner,
                      clusterName=config.clusterName)
    cluster.sshCluster(args=config.args)
Пример #9
0
def main():
    parser = getBasicOptionParser()
    parser = addBasicProvisionerOptions(parser)
    parser.add_argument("--nodeType", dest='nodeType', required=True,
                        help="Node type for {non-|}preemptable nodes. The syntax depends on the "
                             "provisioner used. For the aws provisioner this is the name of an "
                             "EC2 instance type followed by a colon and the price in dollar to "
                             "bid for a spot instance, for example 'c3.8xlarge:0.42'.")
    parser.add_argument("--keyPairName", dest='keyPairName', required=True,
                        help="The name of the AWS key pair to include on the instance")
    parser.add_argument("-t", "--tag", metavar='NAME=VALUE', dest='tags', default=[], action='append',
                        help="Tags are added to the AWS cluster for this node and all of its"
                             "children. Tags are of the form: "
                             " -t key1=value1 --tag key2=value2 "
                             "Multiple tags are allowed and each tag needs its own flag. By "
                             "default the cluster is tagged with "
                             " {"
                             "      \"Name\": clusterName,"
                             "      \"Owner\": IAM username"
                             " }. ")
    parser.add_argument("--vpcSubnet",
                        help="VPC subnet ID to launch cluster in. Uses default subnet if not specified."
                        "This subnet needs to have auto assign IPs turned on.")
    parser.add_argument("-w", "--workers", dest='workers', default=0, type=int,
                        help="Specify a number of workers to launch alongside the leader when the "
                             "cluster is created. This can be useful if running toil without "
                             "auto-scaling but with need of more hardware support")
    config = parseBasicOptions(parser)
    setLoggingFromOptions(config)
    tagsDict = None if config.tags is None else createTagsDict(config.tags)

    spotBid = None
    if config.provisioner == 'aws':
        logger.info('Using aws provisioner.')
        try:
            from toil.provisioners.aws.awsProvisioner import AWSProvisioner
        except ImportError:
            raise RuntimeError('The aws extra must be installed to use this provisioner')
        provisioner = AWSProvisioner()
        parsedBid = config.nodeType.split(':', 1)
        if len(config.nodeType) != len(parsedBid[0]):
            # there is a bid
            spotBid = float(parsedBid[1])
            config.nodeType = parsedBid[0]
    else:
        assert False

    provisioner.launchCluster(instanceType=config.nodeType,
                              keyName=config.keyPairName,
                              clusterName=config.clusterName,
                              workers=config.workers,
                              spotBid=spotBid,
                              userTags=tagsDict,
                              zone=config.zone,
                              vpcSubnet=config.vpcSubnet)
Пример #10
0
def main():
    parser = ArgumentParser()
    Job.Runner.addToilOptions(parser)
    addCactusWorkflowOptions(parser)

    parser.add_argument("seqFile", help = "Seq file")
    parser.add_argument("outputHal", type=str, help = "Output HAL file")

    #Progressive Cactus Options
    parser.add_argument("--configFile", dest="configFile",
                        help="Specify cactus configuration file",
                        default=os.path.join(cactusRootPath(), "cactus_progressive_config.xml"))
    parser.add_argument("--root", dest="root", help="Name of ancestral node (which"
                      " must appear in NEWICK tree in <seqfile>) to use as a "
                      "root for the alignment.  Any genomes not below this node "
                      "in the tree may be used as outgroups but will never appear"
                      " in the output.  If no root is specifed then the root"
                      " of the tree is used. ", default=None)
    parser.add_argument("--latest", dest="latest", action="store_true",
                        help="Use the latest version of the docker container "
                        "rather than pulling one matching this version of cactus")
    parser.add_argument("--containerImage", dest="containerImage", default=None,
                        help="Use the the specified pre-built containter image "
                        "rather than pulling one from quay.io")
    parser.add_argument("--binariesMode", choices=["docker", "local", "singularity"],
                        help="The way to run the Cactus binaries", default=None)
    parser.add_argument("--database", choices=["kyoto_tycoon", "redis"],
                        help="The type of database", default="kyoto_tycoon")

    options = parser.parse_args()

    setupBinaries(options)
    setLoggingFromOptions(options)
    enableDumpStack()

    # cactus doesn't run with 1 core
    if options.batchSystem == 'singleMachine':
        if options.maxCores is not None:
            if int(options.maxCores) < 2:
                raise RuntimeError('Cactus requires --maxCores > 1')
        else:
            # is there a way to get this out of Toil?  That would be more consistent
            if cpu_count() < 2:
                raise RuntimeError('Only 1 CPU detected.  Cactus requires at least 2')

    # Mess with some toil options to create useful defaults.
    cactus_override_toil_options(options)

    start_time = timeit.default_timer()
    runCactusProgressive(options)
    end_time = timeit.default_timer()
    run_time = end_time - start_time
    logger.info("Cactus has finished after {} seconds".format(run_time))
Пример #11
0
def main():
    parser = ArgumentParser()
    Job.Runner.addToilOptions(parser)
    parser.add_argument("outputSequenceDir", help='Directory where the processed sequences will be placed')
    parser.add_argument("--configFile", default=os.path.join(cactusRootPath(), "cactus_progressive_config.xml"))
    parser.add_argument("inputSequences", nargs='+', help='input FASTA file(s)')

    options = parser.parse_args()
    setLoggingFromOptions(options)

    with Toil(options) as toil:
        stageWorkflow(outputSequenceDir=options.outputSequenceDir, configFile=options.configFile, inputSequences=options.inputSequences, toil=toil, restart=options.restart)
Пример #12
0
def main():
    parser = ArgumentParser()
    Job.Runner.addToilOptions(parser)
    parser.add_argument("outputSequenceDir", help='Directory where the processed sequences will be placed')
    parser.add_argument("--configFile", default=os.path.join(cactusRootPath(), "cactus_progressive_config.xml"))
    parser.add_argument("inputSequences", nargs='+', help='input FASTA file(s)')

    options = parser.parse_args()
    setLoggingFromOptions(options)

    with Toil(options) as toil:
        stageWorkflow(outputSequenceDir=options.outputSequenceDir, configFile=options.configFile, inputSequences=options.inputSequences, toil=toil, restart=options.restart)
Пример #13
0
def main():
    parser = ArgumentParser()
    Job.Runner.addToilOptions(parser)
    addCactusWorkflowOptions(parser)

    parser.add_argument("seqFile", help = "Seq file (will be modified if necessary to include graph Fasta sequence)")
    parser.add_argument("minigraphGFA", help = "Minigraph-compatible reference graph in GFA format (can be gzipped)")
    parser.add_argument("outputPAF", type=str, help = "Output pairwise alignment file in PAF format")
    parser.add_argument("--outputFasta", type=str, help = "Output graph sequence file in FASTA format (required if not present in seqFile)")
    parser.add_argument("--maskFilter", type=int, help = "Ignore softmasked sequence intervals > Nbp (overrides config option of same name)")    
    parser.add_argument("--outputGAFDir", type=str, help = "Output GAF alignments (raw minigraph output before PAF conversion) to this directory")
    parser.add_argument("--refFromGFA", type=str, help = "Do not align given genome from seqfile, and instead extract its alignment from the rGFA tags (must have been used as reference for minigraph GFA construction)")

    #WDL hacks
    parser.add_argument("--pathOverrides", nargs="*", help="paths (multiple allowed) to override from seqFile")
    parser.add_argument("--pathOverrideNames", nargs="*", help="names (must be same number as --pathOverrides) of path overrides")

    #Progressive Cactus Options
    parser.add_argument("--configFile", dest="configFile",
                        help="Specify cactus configuration file",
                        default=os.path.join(cactusRootPath(), "cactus_progressive_config.xml"))
    parser.add_argument("--latest", dest="latest", action="store_true",
                        help="Use the latest version of the docker container "
                        "rather than pulling one matching this version of cactus")
    parser.add_argument("--containerImage", dest="containerImage", default=None,
                        help="Use the the specified pre-built containter image "
                        "rather than pulling one from quay.io")
    parser.add_argument("--binariesMode", choices=["docker", "local", "singularity"],
                        help="The way to run the Cactus binaries", default=None)

    options = parser.parse_args()

    setupBinaries(options)
    setLoggingFromOptions(options)
    enableDumpStack()

    if options.outputGAFDir:
        if not os.path.isdir(options.outputGAFDir):
            os.makedirs(options.outputGAFDir)

    if (options.pathOverrides or options.pathOverrideNames):
        if not options.pathOverrides or not options.pathOverrideNames or \
           len(options.pathOverrideNames) != len(options.pathOverrides):
            raise RuntimeError('same number of values must be passed to --pathOverrides and --pathOverrideNames')

    # Mess with some toil options to create useful defaults.
    cactus_override_toil_options(options)

    start_time = timeit.default_timer()
    runCactusGraphMap(options)
    end_time = timeit.default_timer()
    run_time = end_time - start_time
    logger.info("cactus-graphmap has finished after {} seconds".format(run_time))
Пример #14
0
def main():
    parser = getBasicOptionParser()
    parser.add_argument("--version", action='version', version=version)
    parser.add_argument(
        "--nodeType",
        dest='nodeType',
        required=True,
        help="Node type for {non-|}preemptable nodes. The syntax depends on the "
        "provisioner used. For the aws provisioner this is the name of an "
        "EC2 instance type followed by a colon and the price in dollar to "
        "bid for a spot instance, for example 'c3.8xlarge:0.42'.")
    parser.add_argument(
        '-p',
        "--provisioner",
        dest='provisioner',
        choices=['aws'],
        required=True,
        help="The provisioner for cluster auto-scaling. Only aws is currently"
        "supported")
    parser.add_argument(
        "clusterName",
        help="The name that the cluster will be identifiable by")
    parser.add_argument(
        "--keyPairName",
        dest='keyPairName',
        required=True,
        help="The name of the AWS key pair to include on the instance")
    config = parseBasicOptions(parser)
    setLoggingFromOptions(config)
    spotBid = None
    provisioner = None
    if config.provisioner == 'aws':
        logger.info('Using aws provisioner.')
        try:
            from toil.provisioners.aws.awsProvisioner import AWSProvisioner
        except ImportError:
            raise RuntimeError(
                'The aws extra must be installed to use this provisioner')
        provisioner = AWSProvisioner
        parsedBid = config.nodeType.split(':', 1)
        if len(config.nodeType) != len(parsedBid[0]):
            # there is a bid
            spotBid = float(parsedBid[1])
            config.nodeType = parsedBid[0]
    else:
        assert False

    provisioner.launchCluster(instanceType=config.nodeType,
                              clusterName=config.clusterName,
                              keyName=config.keyPairName,
                              spotBid=spotBid)
Пример #15
0
def main():
    parser = ArgumentParser()
    Job.Runner.addToilOptions(parser)
    addCactusWorkflowOptions(parser)

    parser.add_argument("--vg", required=True, nargs='+',  help = "Input vg files (PackedGraph or HashGraph format)")
    parser.add_argument("--outDir", required=True, type=str, help = "Output directory")
    parser.add_argument("--outName", required=True, type=str, help = "Basename of all output files")
    parser.add_argument("--reference", required=True, type=str, help = "Reference event name")
    parser.add_argument("--vcfReference", type=str, help = "Reference event for VCF (if different from --reference)")
    parser.add_argument("--rename", nargs='+', default = [], help = "Path renaming, each of form src>dest (see clip-vg -r)")
    parser.add_argument("--clipLength", type=int, default=None, help = "clip out unaligned sequences longer than this")
    parser.add_argument("--wlineSep", type=str, help = "wline separator for vg convert")
    parser.add_argument("--indexCores", type=int, default=1, help = "cores for indexing processes")
    parser.add_argument("--decoyGraph", help= "decoy sequences vg graph to add (PackedGraph or HashGraph format)")
    parser.add_argument("--hal", nargs='+', default = [], help = "Input hal files (for merging)")
    
    #Progressive Cactus Options
    parser.add_argument("--configFile", dest="configFile",
                        help="Specify cactus configuration file",
                        default=os.path.join(cactusRootPath(), "cactus_progressive_config.xml"))
    parser.add_argument("--latest", dest="latest", action="store_true",
                        help="Use the latest version of the docker container "
                        "rather than pulling one matching this version of cactus")
    parser.add_argument("--containerImage", dest="containerImage", default=None,
                        help="Use the the specified pre-built containter image "
                        "rather than pulling one from quay.io")
    parser.add_argument("--binariesMode", choices=["docker", "local", "singularity"],
                        help="The way to run the Cactus binaries", default=None)

    options = parser.parse_args()

    setupBinaries(options)
    setLoggingFromOptions(options)
    enableDumpStack()

    if options.outDir and not options.outDir.startswith('s3://'):
        if not os.path.isdir(options.outDir):
            os.makedirs(options.outDir)

    if options.hal and len(options.hal) != len(options.vg):
        raise RuntimeError("If --hal and --vg should specify the same number of files")
        
    # Mess with some toil options to create useful defaults.
    cactus_override_toil_options(options)

    start_time = timeit.default_timer()
    runCactusGraphMapJoin(options)
    end_time = timeit.default_timer()
    run_time = end_time - start_time
    logger.info("cactus-graphmap-join has finished after {} seconds".format(run_time))
Пример #16
0
 def startToil(job, options):
     """
     Runs the toil using the given options (see Job.Runner.getDefaultOptions
     and Job.Runner.addToilOptions) starting with this job.
     
     Raises an exception if the given toil already exists.
     """
     setLoggingFromOptions(options)
     with setupToil(options) as (config, batchSystem, jobStore):
         jobStore.clean()
         if "rootJob" not in config.attrib: #No jobs have yet been run
             # Setup the first batchjob.
             rootJob = job._serialiseFirstJob(jobStore)
         else:
             rootJob = jobStore.load(config.attrib["rootJob"])
         return mainLoop(config, batchSystem, jobStore, rootJob)
Пример #17
0
 def startToil(job, options):
     """
     Runs the toil using the given options (see Job.Runner.getDefaultOptions
     and Job.Runner.addToilOptions) starting with this job.
     
     Raises an exception if the given toil already exists.
     """
     setLoggingFromOptions(options)
     with setupToil(options) as (config, batchSystem, jobStore):
         jobStore.clean()
         if "rootJob" not in config.attrib:  #No jobs have yet been run
             # Setup the first batchjob.
             rootJob = job._serialiseFirstJob(jobStore)
         else:
             rootJob = jobStore.load(config.attrib["rootJob"])
         return mainLoop(config, batchSystem, jobStore, rootJob)
Пример #18
0
def main():
    parser = ArgumentParser()
    Job.Runner.addToilOptions(parser)
    addCactusWorkflowOptions(parser)

    parser.add_argument("seqFile", help = "Seq file (gzipped fastas supported)")
    parser.add_argument("minigraphGFA", help = "Minigraph-compatible reference graph in GFA format (can be gzipped)")
    parser.add_argument("graphmapPAF", type=str, help = "Output pairwise alignment file in PAF format (can be gzipped)")
    parser.add_argument("--outDir", required=True, type=str, help = "Output directory")
    parser.add_argument("--refContigs", nargs="*", help = "Subset to these reference contigs (multiple allowed)", default=[])
    parser.add_argument("--refContigsFile", type=str, help = "Subset to (newline-separated) reference contigs in this file")
    parser.add_argument("--otherContig", type=str, help = "Lump all reference contigs unselected by above options into single one with this name")
    parser.add_argument("--reference", type=str, help = "Name of reference (in seqFile).  Ambiguity filters will not be applied to it")
    parser.add_argument("--maskFilter", type=int, help = "Ignore softmasked sequence intervals > Nbp")
    
    #Progressive Cactus Options
    parser.add_argument("--configFile", dest="configFile",
                        help="Specify cactus configuration file",
                        default=os.path.join(cactusRootPath(), "cactus_progressive_config.xml"))
    parser.add_argument("--latest", dest="latest", action="store_true",
                        help="Use the latest version of the docker container "
                        "rather than pulling one matching this version of cactus")
    parser.add_argument("--containerImage", dest="containerImage", default=None,
                        help="Use the the specified pre-built containter image "
                        "rather than pulling one from quay.io")
    parser.add_argument("--binariesMode", choices=["docker", "local", "singularity"],
                        help="The way to run the Cactus binaries", default=None)

    options = parser.parse_args()

    setupBinaries(options)
    setLoggingFromOptions(options)
    enableDumpStack()

    if options.outDir and not options.outDir.startswith('s3://'):
        if not os.path.isdir(options.outDir):
            os.makedirs(options.outDir)
        
    # Mess with some toil options to create useful defaults.
    cactus_override_toil_options(options)

    start_time = timeit.default_timer()
    runCactusGraphMapSplit(options)
    end_time = timeit.default_timer()
    run_time = end_time - start_time
    logger.info("cactus-graphmap-split has finished after {} seconds".format(run_time))
Пример #19
0
def main():
    parser = getBasicOptionParser()
    parser = addBasicProvisionerOptions(parser)
    parser.add_argument(
        "args",
        nargs=argparse.REMAINDER,
        help="Arguments to pass to"
        "`rsync`. Takes any arguments that rsync accepts. Specify the"
        " remote with a colon. For example, to upload `example.py`,"
        " specify `toil rsync-cluster -p aws test-cluster example.py :`."
        "\nOr, to download a file from the remote:, `toil rsync-cluster"
        " -p aws test-cluster :example.py .`")
    config = parseBasicOptions(parser)
    setLoggingFromOptions(config)
    cluster = Cluster(provisioner=config.provisioner,
                      clusterName=config.clusterName,
                      zone=config.zone)
    cluster.rsyncCluster(args=config.args)
Пример #20
0
     def startToil(job, options):
         """
         Runs the toil workflow using the given options 
         (see Job.Runner.getDefaultOptions and Job.Runner.addToilOptions) 
         starting with this job. 
         
         :raises: toil.leader.FailedJobsException if at the end of function their remain
 failed jobs
         """
         setLoggingFromOptions(options)
         with setupToil(options, userScript=job.getUserScript()) as (config, batchSystem, jobStore):
             if options.restart:
                 jobStore.clean() #This cleans up any half written jobs after a restart
                 rootJob = job._loadRootJob(jobStore)
             else:
                 #Setup the first wrapper.
                 rootJob = job._serialiseFirstJob(jobStore)
             return mainLoop(config, batchSystem, jobStore, rootJob)
Пример #21
0
     def startToil(job, options):
         """
         Runs the toil workflow using the given options 
         (see Job.Runner.getDefaultOptions and Job.Runner.addToilOptions) 
         starting with this job. 
         
         :raises: toil.leader.FailedJobsException if at the end of function their remain
 failed jobs
         """
         setLoggingFromOptions(options)
         with setupToil(options, userScript=job.getUserScript()) as (config, batchSystem, jobStore):
             if options.restart:
                 jobStore.clean() #This cleans up any half written jobs after a restart
                 rootJob = job._loadRootJob(jobStore)
             else:
                 #Setup the first wrapper.
                 rootJob = job._serialiseFirstJob(jobStore)
             return mainLoop(config, batchSystem, jobStore, rootJob)
Пример #22
0
def main():
    """Restarts a toil workflow.
    """

    ##########################################
    #Construct the arguments.
    ##########################################

    parser = getBasicOptionParser()

    parser.add_argument("--version", action='version', version=version)

    parser.add_argument(
        "jobStore",
        type=str,
        help=("Store in which to place job management files \
          and the global accessed temporary files"
              "(If this is a file path this needs to be globally accessible "
              "by all machines running jobs).\n"
              "If the store already exists and restart is false an"
              " ExistingJobStoreException exception will be thrown."))

    options = parseBasicOptions(parser)

    ##########################################
    #Now run the toil construction/leader
    ##########################################

    setLoggingFromOptions(options)
    options.restart = True
    with setupToil(options) as (config, batchSystem, jobStore):
        # Load the whole jobstore into memory in a batch
        logger.info("Downloading entire JobStore")
        jobCache = {
            jobWrapper.jobStoreID: jobWrapper
            for jobWrapper in jobStore.jobs()
        }
        logger.info("{} jobs downloaded.".format(len(jobCache)))
        jobStore.clean(Job._loadRootJob(jobStore), jobCache=jobCache)
        mainLoop(config,
                 batchSystem,
                 jobStore,
                 Job._loadRootJob(jobStore),
                 jobCache=jobCache)
Пример #23
0
    def __enter__(self):
        """
        Derive configuration from the command line options, load the job store and, on restart,
        consolidate the derived configuration with the one from the previous invocation of the
        workflow.
        """
        setLoggingFromOptions(self.options)
        self._inContextManager = True
        self.config = Config()
        self.config.setOptions(self.options)
        self._jobStore = self.loadOrCreateJobStore(self.config.jobStore,
                                                   config=None if self.config.restart else self.config)
        if self.config.restart:
            # Reload configuration from job store
            self.config = self._jobStore.config
            self.config.setOptions(self.options)
            self._jobStore.writeConfigToStore()

        return self
Пример #24
0
    def __enter__(self):
        """
        Derive configuration from the command line options, load the job store and, on restart,
        consolidate the derived configuration with the one from the previous invocation of the
        workflow.
        """
        setLoggingFromOptions(self.options)
        self._inContextManager = True
        self.config = Config()
        self.config.setOptions(self.options)
        self._jobStore = self.loadOrCreateJobStore(self.config.jobStore,
                                                   config=None if self.config.restart else self.config)
        if self.config.restart:
            # Reload configuration from job store
            self.config = self._jobStore.config
            self.config.setOptions(self.options)
            self.config.workflowAttemptNumber += 1
            self._jobStore.writeConfigToStore()

        return self
Пример #25
0
def main():
    parser = ArgumentParser()
    Job.Runner.addToilOptions(parser)
    parser.add_argument("--outputSequenceDir",
                        dest="outputSequenceDir",
                        type=str)
    parser.add_argument("--configFile", dest="configFile", type=str)
    parser.add_argument("--inputSequences",
                        dest="inputSequences",
                        type=str,
                        nargs='+')

    options = parser.parse_args()
    setLoggingFromOptions(options)

    if not (options.outputSequenceDir and options.configFile
            and options.inputSequences):
        raise RuntimeError("Too few input arguments")
    with Toil(options) as toil:
        stageWorkflow(outputSequenceDir=options.outputSequenceDir,
                      configFile=options.configFile,
                      inputSequences=options.inputSequences,
                      toil=toil,
                      restart=options.restart)
Пример #26
0
 def __enter__(self):
     """
     Derive configuration from the command line options, load the job store and, on restart,
     consolidate the derived configuration with the one from the previous invocation of the
     workflow.
     """
     setLoggingFromOptions(self.options)
     config = Config()
     config.setOptions(self.options)
     jobStore = self.getJobStore(config.jobStore)
     if not config.restart:
         config.workflowAttemptNumber = 0
         jobStore.initialize(config)
     else:
         jobStore.resume()
         # Merge configuration from job store with command line options
         config = jobStore.config
         config.setOptions(self.options)
         config.workflowAttemptNumber += 1
         jobStore.writeConfig()
     self.config = config
     self._jobStore = jobStore
     self._inContextManager = True
     return self
Пример #27
0
 def __enter__(self):
     """
     Derive configuration from the command line options, load the job store and, on restart,
     consolidate the derived configuration with the one from the previous invocation of the
     workflow.
     """
     setLoggingFromOptions(self.options)
     config = Config()
     config.setOptions(self.options)
     jobStore = self.getJobStore(config.jobStore)
     if not config.restart:
         config.workflowAttemptNumber = 0
         jobStore.initialize(config)
     else:
         jobStore.resume()
         # Merge configuration from job store with command line options
         config = jobStore.config
         config.setOptions(self.options)
         config.workflowAttemptNumber += 1
         jobStore.writeConfig()
     self.config = config
     self._jobStore = jobStore
     self._inContextManager = True
     return self
Пример #28
0
def main():
    """Restarts a toil workflow.
    """
    
    ##########################################
    #Construct the arguments.
    ##########################################  

    parser = getBasicOptionParser()

    parser.add_argument("--version", action='version', version=version)

    parser.add_argument("jobStore", type=str,
          help=("Store in which to place job management files \
          and the global accessed temporary files"
          "(If this is a file path this needs to be globally accessible "
          "by all machines running jobs).\n"
          "If the store already exists and restart is false an"
          " ExistingJobStoreException exception will be thrown."))

    options = parseBasicOptions(parser)
        
    ##########################################
    #Now run the toil construction/leader
    ##########################################  
        
    setLoggingFromOptions(options)
    options.restart = True
    with setupToil(options) as (config, batchSystem, jobStore):
        # Load the whole jobstore into memory in a batch
        logger.info("Downloading entire JobStore")
        jobCache = {jobWrapper.jobStoreID: jobWrapper
            for jobWrapper in jobStore.jobs()}
        logger.info("{} jobs downloaded.".format(len(jobCache)))
        jobStore.clean(Job._loadRootJob(jobStore), jobCache=jobCache)
        mainLoop(config, batchSystem, jobStore, Job._loadRootJob(jobStore), jobCache=jobCache)
Пример #29
0
def main():
    parser = ArgumentParser()
    Job.Runner.addToilOptions(parser)
    addCactusWorkflowOptions(parser)

    parser.add_argument("seqFile", help="Seq file")
    parser.add_argument("outputHal", type=str, help="Output HAL file")

    #Progressive Cactus Options
    parser.add_argument("--database",
                        dest="database",
                        help="Database type: tokyo_cabinet or kyoto_tycoon"
                        " [default: %(default)s]",
                        default="kyoto_tycoon")
    parser.add_argument("--configFile",
                        dest="configFile",
                        help="Specify cactus configuration file",
                        default=None)
    parser.add_argument(
        "--root",
        dest="root",
        help="Name of ancestral node (which"
        " must appear in NEWICK tree in <seqfile>) to use as a "
        "root for the alignment.  Any genomes not below this node "
        "in the tree may be used as outgroups but will never appear"
        " in the output.  If no root is specifed then the root"
        " of the tree is used. ",
        default=None)
    parser.add_argument(
        "--latest",
        dest="latest",
        action="store_true",
        help="Use the latest version of the docker container "
        "rather than pulling one matching this version of cactus")
    parser.add_argument(
        "--containerImage",
        dest="containerImage",
        default=None,
        help="Use the the specified pre-built containter image "
        "rather than pulling one from quay.io")
    parser.add_argument("--binariesMode",
                        choices=["docker", "local", "singularity"],
                        help="The way to run the Cactus binaries",
                        default=None)

    options = parser.parse_args()

    setupBinaries(options)
    setLoggingFromOptions(options)

    # Mess with some toil options to create useful defaults.

    # Caching generally slows down the cactus workflow, plus some
    # methods like readGlobalFileStream don't support forced
    # reads directly from the job store rather than from cache.
    options.disableCaching = True
    # Job chaining breaks service termination timing, causing unused
    # databases to accumulate and waste memory for no reason.
    options.disableChaining = True
    # The default deadlockWait is currently 60 seconds. This can cause
    # issues if the database processes take a while to actually begin
    # after they're issued. Change it to at least an hour so that we
    # don't preemptively declare a deadlock.
    if options.deadlockWait is None or options.deadlockWait < 3600:
        options.deadlockWait = 3600
    if options.retryCount is None:
        # If the user didn't specify a retryCount value, make it 5
        # instead of Toil's default (1).
        options.retryCount = 5
    runCactusProgressive(options)
Пример #30
0
def main(args=None, stdout=sys.stdout):
    parser = ArgumentParser()
    Job.Runner.addToilOptions(parser)
    parser.add_argument("cwltool", type=str)
    parser.add_argument("cwljob", type=str, nargs="?", default=None)

    # Will override the "jobStore" positional argument, enables
    # user to select jobStore or get a default from logic one below.
    parser.add_argument("--jobStore", type=str)
    parser.add_argument("--conformance-test", action="store_true")
    parser.add_argument("--no-container", action="store_true")
    parser.add_argument("--quiet",
                        dest="logLevel",
                        action="store_const",
                        const="ERROR")
    parser.add_argument("--basedir", type=str)
    parser.add_argument("--outdir", type=str, default=os.getcwd())
    parser.add_argument("--version", action='version', version=version)
    parser.add_argument(
        "--preserve-environment",
        type=str,
        nargs='+',
        help=
        "Preserve specified environment variables when running CommandLineTools",
        metavar=("VAR1,VAR2"),
        default=("PATH", ),
        dest="preserve_environment")

    # mkdtemp actually creates the directory, but
    # toil requires that the directory not exist,
    # so make it and delete it and allow
    # toil to create it again (!)
    workdir = tempfile.mkdtemp()
    os.rmdir(workdir)

    if args is None:
        args = sys.argv[1:]

    options = parser.parse_args([workdir] + args)

    use_container = not options.no_container

    setLoggingFromOptions(options)
    if options.logLevel:
        cwllogger.setLevel(options.logLevel)

    try:
        t = cwltool.load_tool.load_tool(options.cwltool,
                                        cwltool.workflow.defaultMakeTool)
    except cwltool.process.UnsupportedRequirement as e:
        logging.error(e)
        return 33

    if options.conformance_test:
        loader = schema_salad.ref_resolver.Loader({})
    else:
        jobloaderctx = {"path": {"@type": "@id"}, "format": {"@type": "@id"}}
        jobloaderctx.update(t.metadata.get("$namespaces", {}))
        loader = schema_salad.ref_resolver.Loader(jobloaderctx)

    if options.cwljob:
        uri = (options.cwljob if urlparse.urlparse(options.cwljob).scheme else
               "file://" + os.path.abspath(options.cwljob))
        job, _ = loader.resolve_ref(uri, checklinks=False)
    else:
        job = {}

    def unsupportedCheck(p):
        """Check for file inputs we don't current support in Toil:

        - Directories
        - File literals
        """
        if p.get("class") == "Directory":
            raise cwltool.process.UnsupportedRequirement(
                "CWL Directory inputs not yet supported in Toil")
        if p.get("contents") and (not p.get("path") and not p.get("location")):
            raise cwltool.process.UnsupportedRequirement(
                "CWL File literals not yet supported in Toil")

    try:
        cwltool.builder.adjustDirObjs(job, unsupportedCheck)
        cwltool.builder.adjustFileObjs(job, unsupportedCheck)
    except cwltool.process.UnsupportedRequirement as e:
        logging.error(e)
        return 33

    cwltool.builder.adjustDirObjs(job, pathToLoc)
    cwltool.builder.adjustFileObjs(job, pathToLoc)

    if type(t) == int:
        return t

    fillInDefaults(t.tool["inputs"], job)

    if options.conformance_test:
        adjustFiles(job, lambda x: x.replace("file://", ""))
        stdout.write(
            json.dumps(cwltool.main.single_job_executor(
                t,
                job,
                basedir=options.basedir,
                tmpdir_prefix="tmp",
                conformance_test=True,
                use_container=use_container,
                preserve_environment=options.preserve_environment),
                       indent=4))
        return 0

    if not options.basedir:
        options.basedir = os.path.dirname(
            os.path.abspath(options.cwljob or options.cwltool))

    outdir = options.outdir

    with Toil(options) as toil:

        def importDefault(tool):
            cwltool.builder.adjustDirObjs(tool, locToPath)
            cwltool.builder.adjustFileObjs(tool, locToPath)
            adjustFiles(
                tool, lambda x: "file://%s" % x
                if not urlparse.urlparse(x).scheme else x)
            adjustFiles(tool, functools.partial(writeFile, toil.importFile,
                                                {}))

        t.visit(importDefault)

        basedir = os.path.dirname(
            os.path.abspath(options.cwljob or options.cwltool))
        builder = t._init_job(job, basedir=basedir)
        (wf1, wf2) = makeJob(t, {},
                             use_container=use_container,
                             preserve_environment=options.preserve_environment,
                             tmpdir=os.path.realpath(outdir))
        cwltool.builder.adjustDirObjs(builder.job, locToPath)
        cwltool.builder.adjustFileObjs(builder.job, locToPath)
        adjustFiles(
            builder.job,
            lambda x: "file://%s" % os.path.abspath(os.path.join(basedir, x))
            if not urlparse.urlparse(x).scheme else x)
        cwltool.builder.adjustDirObjs(builder.job, pathToLoc)
        cwltool.builder.adjustFileObjs(builder.job, pathToLoc)
        cwltool.builder.adjustFileObjs(builder.job, addFilePartRefs)
        adjustFiles(builder.job,
                    functools.partial(writeFile, toil.importFile, {}))
        wf1.cwljob = builder.job

        outobj = toil.start(wf1)
        outobj = resolve_indirect(outobj)

        adjustFilesWithSecondary(
            outobj,
            functools.partial(getFile,
                              toil,
                              outdir,
                              index={},
                              export=True,
                              rename_collision=True))

        stdout.write(json.dumps(outobj, indent=4))

    return 0
Пример #31
0
def main():
    parser = ArgumentParser()
    Job.Runner.addToilOptions(parser)
    parser.add_argument("seqFile", help="Input Seq file")
    parser.add_argument(
        "outSeqFile",
        help="Output Seq file (ex generated with cactus-prepare)")
    parser.add_argument("--configFile",
                        default=os.path.join(cactusRootPath(),
                                             "cactus_progressive_config.xml"))
    parser.add_argument(
        "--inputNames",
        nargs='*',
        help=
        'input genome names (not paths) to preprocess (all leaves from Input Seq file if none specified)'
    )
    parser.add_argument(
        "--latest",
        dest="latest",
        action="store_true",
        help="Use the latest version of the docker container "
        "rather than pulling one matching this version of cactus")
    parser.add_argument(
        "--containerImage",
        dest="containerImage",
        default=None,
        help="Use the the specified pre-built containter image "
        "rather than pulling one from quay.io")
    parser.add_argument("--binariesMode",
                        choices=["docker", "local", "singularity"],
                        help="The way to run the Cactus binaries",
                        default=None)

    options = parser.parse_args()
    setupBinaries(options)
    setLoggingFromOptions(options)
    enableDumpStack()

    inSeqFile = SeqFile(options.seqFile)
    outSeqFile = SeqFile(options.outSeqFile)

    inNames = options.inputNames
    if not inNames:
        inNames = [
            inSeqFile.tree.getName(node)
            for node in inSeqFile.tree.getLeaves()
        ]

    inSeqPaths = []
    outSeqPaths = []

    for inName in inNames:
        if inName not in inSeqFile.pathMap or inName not in outSeqFile.pathMap:
            raise RuntimeError(
                '{} not present in input and output Seq files'.format(inNmae))
        inPath = inSeqFile.pathMap[inName]
        outPath = outSeqFile.pathMap[inName]
        if os.path.isdir(inPath):
            try:
                os.makedirs(outPath)
            except:
                pass
            assert os.path.isdir(inPath) == os.path.isdir(outPath)
            inSeqPaths += [
                os.path.join(inPath, seqPath) for seqPath in os.listdir(inPath)
            ]
            outSeqPaths += [
                os.path.join(outPath, seqPath)
                for seqPath in os.listdir(inPath)
            ]
        else:
            inSeqPaths += [inPath]
            outSeqPaths += [outPath]

    with Toil(options) as toil:
        stageWorkflow(outputSequenceDir=None,
                      configFile=options.configFile,
                      inputSequences=inSeqPaths,
                      toil=toil,
                      restart=options.restart,
                      outputSequences=outSeqPaths)
Пример #32
0
def main(args=None, stdout=sys.stdout):
    parser = argparse.ArgumentParser()
    Job.Runner.addToilOptions(parser)
    parser.add_argument("cwltool", type=str)
    parser.add_argument("cwljob", nargs=argparse.REMAINDER)

    # Will override the "jobStore" positional argument, enables
    # user to select jobStore or get a default from logic one below.
    parser.add_argument("--jobStore", type=str)
    parser.add_argument("--not-strict", action="store_true")
    parser.add_argument("--no-container", action="store_true")
    parser.add_argument("--quiet",
                        dest="logLevel",
                        action="store_const",
                        const="ERROR")
    parser.add_argument("--basedir", type=str)
    parser.add_argument("--outdir", type=str, default=os.getcwd())
    parser.add_argument("--version", action='version', version=baseVersion)
    parser.add_argument("--user-space-docker-cmd",
                        help="(Linux/OS X only) Specify a user space docker "
                        "command (like udocker or dx-docker) that will be "
                        "used to call 'pull' and 'run'")
    parser.add_argument(
        "--preserve-environment",
        type=str,
        nargs='+',
        help=
        "Preserve specified environment variables when running CommandLineTools",
        metavar=("VAR1 VAR2"),
        default=("PATH", ),
        dest="preserve_environment")
    # help="Dependency resolver configuration file describing how to adapt 'SoftwareRequirement' packages to current system."
    parser.add_argument("--beta-dependency-resolvers-configuration",
                        default=None)
    # help="Defaut root directory used by dependency resolvers configuration."
    parser.add_argument("--beta-dependencies-directory", default=None)
    # help="Use biocontainers for tools without an explicitly annotated Docker container."
    parser.add_argument("--beta-use-biocontainers",
                        default=None,
                        action="store_true")
    # help="Short cut to use Conda to resolve 'SoftwareRequirement' packages."
    parser.add_argument("--beta-conda-dependencies",
                        default=None,
                        action="store_true")
    parser.add_argument("--tmpdir-prefix",
                        type=Text,
                        help="Path prefix for temporary directories",
                        default="tmp")
    parser.add_argument("--tmp-outdir-prefix",
                        type=Text,
                        help="Path prefix for intermediate output directories",
                        default="tmp")

    # mkdtemp actually creates the directory, but
    # toil requires that the directory not exist,
    # so make it and delete it and allow
    # toil to create it again (!)
    workdir = tempfile.mkdtemp()
    os.rmdir(workdir)

    if args is None:
        args = sys.argv[1:]

    options = parser.parse_args([workdir] + args)

    use_container = not options.no_container

    setLoggingFromOptions(options)
    if options.logLevel:
        cwllogger.setLevel(options.logLevel)

    outdir = os.path.abspath(options.outdir)
    fileindex = {}
    existing = {}
    make_tool_kwargs = {}
    conf_file = getattr(options, "beta_dependency_resolvers_configuration",
                        None)  # Text
    use_conda_dependencies = getattr(options, "beta_conda_dependencies",
                                     None)  # Text
    job_script_provider = None
    if conf_file or use_conda_dependencies:
        dependencies_configuration = DependenciesConfiguration(
            options)  # type: DependenciesConfiguration
        job_script_provider = dependencies_configuration

    options.default_container = None
    make_tool_kwargs["find_default_container"] = functools.partial(
        find_default_container, options)

    with Toil(options) as toil:
        if options.restart:
            outobj = toil.restart()
        else:
            toil.config.linkImports = False
            useStrict = not options.not_strict
            make_tool_kwargs["hints"] = [{
                "class":
                "ResourceRequirement",
                "coresMin":
                toil.config.defaultCores,
                "ramMin":
                toil.config.defaultMemory / (2**20),
                "outdirMin":
                toil.config.defaultDisk / (2**20),
                "tmpdirMin":
                0
            }]
            try:
                t = cwltool.load_tool.load_tool(
                    options.cwltool,
                    toilMakeTool,
                    kwargs=make_tool_kwargs,
                    resolver=cwltool.resolver.tool_resolver,
                    strict=useStrict)
                unsupportedRequirementsCheck(t.requirements)
            except cwltool.process.UnsupportedRequirement as e:
                logging.error(e)
                return 33

            if type(t) == int:
                return t

            options.workflow = options.cwltool
            options.job_order = options.cwljob
            options.tool_help = None
            options.debug = options.logLevel == "DEBUG"
            job, options.basedir, loader = cwltool.main.load_job_order(
                options, sys.stdin, None, [], options.job_order)
            job = cwltool.main.init_job_order(job, options, t, loader=loader)

            fillInDefaults(t.tool["inputs"], job)

            def pathToLoc(p):
                if "location" not in p and "path" in p:
                    p["location"] = p["path"]
                    del p["path"]

            def importFiles(tool):
                visit_class(tool, ("File", "Directory"), pathToLoc)
                normalizeFilesDirs(tool)
                adjustDirObjs(
                    tool,
                    functools.partial(get_listing,
                                      cwltool.stdfsaccess.StdFsAccess(""),
                                      recursive=True))
                adjustFileObjs(
                    tool,
                    functools.partial(uploadFile,
                                      toil.importFile,
                                      fileindex,
                                      existing,
                                      skip_broken=True))

            t.visit(importFiles)

            for inp in t.tool["inputs"]:

                def setSecondary(fileobj):
                    if isinstance(fileobj,
                                  dict) and fileobj.get("class") == "File":
                        if "secondaryFiles" not in fileobj:
                            fileobj["secondaryFiles"] = [{
                                "location":
                                cwltool.builder.substitute(
                                    fileobj["location"], sf),
                                "class":
                                "File"
                            } for sf in inp["secondaryFiles"]]

                    if isinstance(fileobj, list):
                        for e in fileobj:
                            setSecondary(e)

                if shortname(inp["id"]) in job and inp.get("secondaryFiles"):
                    setSecondary(job[shortname(inp["id"])])

            importFiles(job)
            visitSteps(t, importFiles)

            try:
                make_opts = copy.deepcopy(vars(options))
                make_opts.update({
                    'tool': t,
                    'jobobj': {},
                    'use_container': use_container,
                    'tmpdir': os.path.realpath(outdir),
                    'job_script_provider': job_script_provider
                })

                (wf1, wf2) = makeJob(**make_opts)
            except cwltool.process.UnsupportedRequirement as e:
                logging.error(e)
                return 33

            wf1.cwljob = job
            outobj = toil.start(wf1)

        outobj = resolve_indirect(outobj)

        toilStageFiles(toil, outobj, outdir, fileindex, existing, True)

        visit_class(
            outobj, ("File", ),
            functools.partial(compute_checksums,
                              cwltool.stdfsaccess.StdFsAccess("")))

        stdout.write(json.dumps(outobj, indent=4))

    return 0
Пример #33
0
def main():
    parser = ArgumentParser()
    Job.Runner.addToilOptions(parser)
    addCactusWorkflowOptions(parser)

    parser.add_argument("seqFile", help = "Seq file")
    parser.add_argument("outputHal", type=str, help = "Output HAL file")

    #Progressive Cactus Options
    parser.add_argument("--database", dest="database",
                      help="Database type: tokyo_cabinet or kyoto_tycoon"
                      " [default: %(default)s]",
                      default="kyoto_tycoon")
    parser.add_argument("--configFile", dest="configFile",
                      help="Specify cactus configuration file",
                      default=None)
    parser.add_argument("--root", dest="root", help="Name of ancestral node (which"
                      " must appear in NEWICK tree in <seqfile>) to use as a "
                      "root for the alignment.  Any genomes not below this node "
                      "in the tree may be used as outgroups but will never appear"
                      " in the output.  If no root is specifed then the root"
                      " of the tree is used. ", default=None)   
    parser.add_argument("--latest", dest="latest", action="store_true",
                        help="Use the latest version of the docker container "
                        "rather than pulling one matching this version of cactus")
    parser.add_argument("--containerImage", dest="containerImage", default=None,
                        help="Use the the specified pre-built containter image "
                        "rather than pulling one from quay.io")
    parser.add_argument("--binariesMode", choices=["docker", "local", "singularity"],
                        help="The way to run the Cactus binaries", default=None)

    options = parser.parse_args()

    setupBinaries(options)
    setLoggingFromOptions(options)

    # Mess with some toil options to create useful defaults.

    # Caching generally slows down the cactus workflow, plus some
    # methods like readGlobalFileStream don't support forced
    # reads directly from the job store rather than from cache.
    options.disableCaching = True
    # Job chaining breaks service termination timing, causing unused
    # databases to accumulate and waste memory for no reason.
    options.disableChaining = True
    # The default deadlockWait is currently 60 seconds. This can cause
    # issues if the database processes take a while to actually begin
    # after they're issued. Change it to at least an hour so that we
    # don't preemptively declare a deadlock.
    if options.deadlockWait is None or options.deadlockWait < 3600:
        options.deadlockWait = 3600
    if options.retryCount is None:
        # If the user didn't specify a retryCount value, make it 5
        # instead of Toil's default (1).
        options.retryCount = 5

    with Toil(options) as toil:
        importSingularityImage()
        #Run the workflow
        if options.restart:
            halID = toil.restart()
        else:
            options.cactusDir = getTempDirectory()
            #Create the progressive cactus project 
            projWrapper = ProjectWrapper(options)
            projWrapper.writeXml()

            pjPath = os.path.join(options.cactusDir, ProjectWrapper.alignmentDirName,
                                  '%s_project.xml' % ProjectWrapper.alignmentDirName)
            assert os.path.exists(pjPath)

            project = MultiCactusProject()

            if not os.path.isdir(options.cactusDir):
                os.makedirs(options.cactusDir)

            project.readXML(pjPath)
            #import the sequences
            seqIDs = []
            print "Importing %s sequences" % (len(project.getInputSequencePaths()))
            for seq in project.getInputSequencePaths():
                if os.path.isdir(seq):
                    tmpSeq = getTempFile()
                    catFiles([os.path.join(seq, subSeq) for subSeq in os.listdir(seq)], tmpSeq)
                    seq = tmpSeq
                seq = makeURL(seq)
                seqIDs.append(toil.importFile(seq))
            project.setInputSequenceIDs(seqIDs)

            #import cactus config
            if options.configFile:
                cactusConfigID = toil.importFile(makeURL(options.configFile))
            else:
                cactusConfigID = toil.importFile(makeURL(project.getConfigPath()))
            project.setConfigID(cactusConfigID)

            project.syncToFileStore(toil)
            configNode = ET.parse(project.getConfigPath()).getroot()
            configWrapper = ConfigWrapper(configNode)
            configWrapper.substituteAllPredefinedConstantsWithLiterals()


            project.writeXML(pjPath)
            halID = toil.start(RunCactusPreprocessorThenProgressiveDown(options, project, memory=configWrapper.getDefaultMemory()))

        toil.exportFile(halID, makeURL(options.outputHal))
Пример #34
0
def main():
    parser = ArgumentParser()
    Job.Runner.addToilOptions(parser)
    addCactusWorkflowOptions(parser)

    parser.add_argument("seqFile", help="Seq file")
    parser.add_argument("outputHal", type=str, help="Output HAL file")

    #Progressive Cactus Options
    parser.add_argument("--configFile",
                        dest="configFile",
                        help="Specify cactus configuration file",
                        default=os.path.join(cactusRootPath(),
                                             "cactus_progressive_config.xml"))
    parser.add_argument(
        "--root",
        dest="root",
        help="Name of ancestral node (which"
        " must appear in NEWICK tree in <seqfile>) to use as a "
        "root for the alignment.  Any genomes not below this node "
        "in the tree may be used as outgroups but will never appear"
        " in the output.  If no root is specifed then the root"
        " of the tree is used. ",
        default=None)
    parser.add_argument(
        "--latest",
        dest="latest",
        action="store_true",
        help="Use the latest version of the docker container "
        "rather than pulling one matching this version of cactus")
    parser.add_argument(
        "--containerImage",
        dest="containerImage",
        default=None,
        help="Use the the specified pre-built containter image "
        "rather than pulling one from quay.io")
    parser.add_argument("--binariesMode",
                        choices=["docker", "local", "singularity"],
                        help="The way to run the Cactus binaries",
                        default=None)

    options = parser.parse_args()

    setupBinaries(options)
    setLoggingFromOptions(options)
    enableDumpStack()

    # cactus doesn't run with 1 core
    if options.batchSystem == 'singleMachine':
        if options.maxCores is not None:
            if int(options.maxCores) < 2:
                raise RuntimeError('Cactus requires --maxCores > 1')
        else:
            # is there a way to get this out of Toil?  That would be more consistent
            if cpu_count() < 2:
                raise RuntimeError(
                    'Only 1 CPU detected.  Cactus requires at least 2')

    # tokyo_cabinet is no longer supported
    options.database = "kyoto_tycoon"

    # Mess with some toil options to create useful defaults.

    # Caching generally slows down the cactus workflow, plus some
    # methods like readGlobalFileStream don't support forced
    # reads directly from the job store rather than from cache.
    options.disableCaching = True
    # Job chaining breaks service termination timing, causing unused
    # databases to accumulate and waste memory for no reason.
    options.disableChaining = True
    # The default deadlockWait is currently 60 seconds. This can cause
    # issues if the database processes take a while to actually begin
    # after they're issued. Change it to at least an hour so that we
    # don't preemptively declare a deadlock.
    if options.deadlockWait is None or options.deadlockWait < 3600:
        options.deadlockWait = 3600
    if options.retryCount is None and options.batchSystem != 'singleMachine':
        # If the user didn't specify a retryCount value, make it 5
        # instead of Toil's default (1).
        options.retryCount = 5

    start_time = timeit.default_timer()
    runCactusProgressive(options)
    end_time = timeit.default_timer()
    run_time = end_time - start_time
    logger.info("Cactus has finished after {} seconds".format(run_time))
Пример #35
0
def main(args=None, stdout=sys.stdout):
    parser = ArgumentParser()
    Job.Runner.addToilOptions(parser)
    parser.add_argument("cwltool", type=str)
    parser.add_argument("cwljob", type=str, nargs="?", default=None)

    # Will override the "jobStore" positional argument, enables
    # user to select jobStore or get a default from logic one below.
    parser.add_argument("--jobStore", type=str)
    parser.add_argument("--conformance-test", action="store_true")
    parser.add_argument("--not-strict", action="store_true")
    parser.add_argument("--no-container", action="store_true")
    parser.add_argument("--quiet", dest="logLevel", action="store_const", const="ERROR")
    parser.add_argument("--basedir", type=str)
    parser.add_argument("--outdir", type=str, default=os.getcwd())
    parser.add_argument("--version", action='version', version=baseVersion)
    parser.add_argument("--preserve-environment", type=str, nargs='+',
                    help="Preserve specified environment variables when running CommandLineTools",
                    metavar=("VAR1 VAR2"),
                    default=("PATH",),
                    dest="preserve_environment")

    # mkdtemp actually creates the directory, but
    # toil requires that the directory not exist,
    # so make it and delete it and allow
    # toil to create it again (!)
    workdir = tempfile.mkdtemp()
    os.rmdir(workdir)

    if args is None:
        args = sys.argv[1:]

    options = parser.parse_args([workdir] + args)

    use_container = not options.no_container

    setLoggingFromOptions(options)
    if options.logLevel:
        cwllogger.setLevel(options.logLevel)

    useStrict = not options.not_strict
    try:
        t = cwltool.load_tool.load_tool(options.cwltool, cwltool.workflow.defaultMakeTool,
                                        resolver=cwltool.resolver.tool_resolver, strict=useStrict)
        unsupportedRequirementsCheck(t.requirements)
    except cwltool.process.UnsupportedRequirement as e:
        logging.error(e)
        return 33

    if options.conformance_test:
        loader = schema_salad.ref_resolver.Loader({})
    else:
        jobloaderctx = {"path": {"@type": "@id"}, "format": {"@type": "@id"}}
        jobloaderctx.update(t.metadata.get("$namespaces", {}))
        loader = schema_salad.ref_resolver.Loader(jobloaderctx)

    if options.cwljob:
        uri = (options.cwljob if urlparse.urlparse(options.cwljob).scheme
               else "file://" + os.path.abspath(options.cwljob))
        job, _ = loader.resolve_ref(uri, checklinks=False)
    else:
        job = {}

    try:
        cwltool.pathmapper.adjustDirObjs(job, unsupportedInputCheck)
        cwltool.pathmapper.adjustFileObjs(job, unsupportedInputCheck)
    except cwltool.process.UnsupportedRequirement as e:
        logging.error(e)
        return 33

    cwltool.pathmapper.adjustDirObjs(job, pathToLoc)
    cwltool.pathmapper.adjustFileObjs(job, pathToLoc)

    if type(t) == int:
        return t

    fillInDefaults(t.tool["inputs"], job)

    if options.conformance_test:
        adjustFiles(job, lambda x: x.replace("file://", ""))
        stdout.write(json.dumps(
            cwltool.main.single_job_executor(t, job, basedir=options.basedir,
                                             tmpdir_prefix="tmp",
                                             conformance_test=True, use_container=use_container,
                                             preserve_environment=options.preserve_environment), indent=4))
        return 0

    if not options.basedir:
        options.basedir = os.path.dirname(os.path.abspath(options.cwljob or options.cwltool))

    outdir = options.outdir

    with Toil(options) as toil:
        def importDefault(tool):
            cwltool.pathmapper.adjustDirObjs(tool, locToPath)
            cwltool.pathmapper.adjustFileObjs(tool, locToPath)
            adjustFiles(tool, lambda x: "file://%s" % x if not urlparse.urlparse(x).scheme else x)
            adjustFiles(tool, functools.partial(writeFile, toil.importFile, {}, {}))
        t.visit(importDefault)

        if options.restart:
            outobj = toil.restart()
        else:
            basedir = os.path.dirname(os.path.abspath(options.cwljob or options.cwltool))
            builder = t._init_job(job, basedir=basedir, use_container=use_container)
            (wf1, wf2) = makeJob(t, {}, use_container=use_container,
                    preserve_environment=options.preserve_environment,
                    tmpdir=os.path.realpath(outdir), builder=builder)
            try:
                if isinstance(wf1, CWLWorkflow):
                    [unsupportedDefaultCheck(s.tool) for s in wf1.cwlwf.steps]
            except cwltool.process.UnsupportedRequirement as e:
                logging.error(e)
                return 33

            cwltool.pathmapper.adjustDirObjs(builder.job, locToPath)
            cwltool.pathmapper.adjustFileObjs(builder.job, locToPath)
            adjustFiles(builder.job, lambda x: "file://%s" % os.path.abspath(os.path.join(basedir, x))
                        if not urlparse.urlparse(x).scheme else x)
            cwltool.pathmapper.adjustDirObjs(builder.job, pathToLoc)
            cwltool.pathmapper.adjustFileObjs(builder.job, pathToLoc)
            cwltool.pathmapper.adjustFileObjs(builder.job, addFilePartRefs)
            adjustFiles(builder.job, functools.partial(writeFile, toil.importFile, {}, {}))
            wf1.cwljob = builder.job
            outobj = toil.start(wf1)

        outobj = resolve_indirect(outobj)

        try:
            adjustFilesWithSecondary(outobj, functools.partial(getFile, toil, outdir, index={}, existing={},
                                                               export=True, rename_collision=True))
            cwltool.pathmapper.adjustFileObjs(outobj, pathToLoc)
        except cwltool.process.UnsupportedRequirement as e:
            logging.error(e)
            return 33

        stdout.write(json.dumps(outobj, indent=4))

    return 0
Пример #36
0
def main():
    parser = ArgumentParser()
    Job.Runner.addToilOptions(parser)
    addCactusWorkflowOptions(parser)

    parser.add_argument("seqFile", help = "Seq file")
    parser.add_argument("blastOutput", nargs="+", help = "Blast output (from cactus-blast)")
    parser.add_argument("outputHal", type=str, help = "Output HAL file")
    parser.add_argument("--pathOverrides", nargs="*", help="paths (multiple allowd) to override from seqFile")
    parser.add_argument("--pathOverrideNames", nargs="*", help="names (must be same number as --paths) of path overrides")

    #Progressive Cactus Options
    parser.add_argument("--configFile", dest="configFile",
                        help="Specify cactus configuration file",
                        default=os.path.join(cactusRootPath(), "cactus_progressive_config.xml"))
    parser.add_argument("--root", dest="root", help="Name of ancestral node (which"
                        " must appear in NEWICK tree in <seqfile>) to use as a "
                        "root for the alignment.  Any genomes not below this node "
                        "in the tree may be used as outgroups but will never appear"
                        " in the output.  If no root is specifed then the root"
                        " of the tree is used. ", default=None, required=True)
    parser.add_argument("--latest", dest="latest", action="store_true",
                        help="Use the latest version of the docker container "
                        "rather than pulling one matching this version of cactus")
    parser.add_argument("--containerImage", dest="containerImage", default=None,
                        help="Use the the specified pre-built containter image "
                        "rather than pulling one from quay.io")
    parser.add_argument("--binariesMode", choices=["docker", "local", "singularity"],
                        help="The way to run the Cactus binaries", default=None)
    parser.add_argument("--nonBlastInput", action="store_true",
                        help="Input does not come from cactus-blast: Do not append ids to fasta names")
    parser.add_argument("--nonBlastMegablockFilter", action="store_true",
                        help="By default, the megablock filter is off for --nonBlastInput, as it does not play"
                        "nicely with reference-based alignments.  This flag will turn it back on")
    parser.add_argument("--pafInput", action="store_true",
                        help="'blastOutput' input is in paf format, rather than lastz cigars.")    

    options = parser.parse_args()

    setupBinaries(options)
    setLoggingFromOptions(options)
    enableDumpStack()

    if (options.pathOverrides or options.pathOverrideNames):
        if not options.pathOverrides or not options.pathOverrideNames or \
           len(options.pathOverrideNames) != len(options.pathOverrides):
            raise RuntimeError('same number of values must be passed to --pathOverrides and --pathOverrideNames')

    # cactus doesn't run with 1 core
    if options.batchSystem == 'singleMachine':
        if options.maxCores is not None:
            if int(options.maxCores) < 2:
                raise RuntimeError('Cactus requires --maxCores > 1')
        else:
            # is there a way to get this out of Toil?  That would be more consistent
            if cpu_count() < 2:
                raise RuntimeError('Only 1 CPU detected.  Cactus requires at least 2')

    # tokyo_cabinet is no longer supported
    options.database = "kyoto_tycoon"
        
    options.database = 'kyoto_tycoon'

    options.buildHal = True
    options.buildFasta = True

    # Mess with some toil options to create useful defaults.
    cactus_override_toil_options(options)

    start_time = timeit.default_timer()
    runCactusAfterBlastOnly(options)
    end_time = timeit.default_timer()
    run_time = end_time - start_time
    logger.info("cactus-align has finished after {} seconds".format(run_time))
Пример #37
0
def main():
    parser = ArgumentParser()
    Job.Runner.addToilOptions(parser)
    addCactusWorkflowOptions(parser)

    parser.add_argument("seqFile", help="Seq file")
    parser.add_argument(
        "cigarsFile",
        nargs="*",
        help=
        "Pairiwse aliginments (from cactus-blast, cactus-refmap or cactus-graphmap)"
    )
    parser.add_argument("outHal",
                        type=str,
                        help="Output HAL file (or directory in --batch mode)")
    parser.add_argument(
        "--pathOverrides",
        nargs="*",
        help="paths (multiple allowd) to override from seqFile")
    parser.add_argument(
        "--pathOverrideNames",
        nargs="*",
        help="names (must be same number as --paths) of path overrides")

    #Pangenome Options
    parser.add_argument(
        "--pangenome",
        action="store_true",
        help=
        "Activate pangenome mode (suitable for star trees of closely related samples) by overriding several configuration settings."
        " The overridden configuration will be saved in <outHal>.pg-conf.xml")
    parser.add_argument(
        "--pafInput",
        action="store_true",
        help="'cigarsFile' arugment is in PAF format, rather than lastz cigars."
    )
    parser.add_argument(
        "--usePafSecondaries",
        action="store_true",
        help=
        "use the secondary alignments from the PAF input.  They are ignored by default."
    )
    parser.add_argument("--singleCopySpecies",
                        type=str,
                        help="Filter out all self-alignments in given species")
    parser.add_argument(
        "--barMaskFilter",
        type=int,
        default=None,
        help=
        "BAR's POA aligner will ignore softmasked regions greater than this length. (overrides partialOrderAlignmentMaskFilter in config)"
    )
    parser.add_argument(
        "--outVG",
        action="store_true",
        help="export pangenome graph in VG (.vg) in addition to HAL")
    parser.add_argument(
        "--outGFA",
        action="store_true",
        help="export pangenome grpah in GFA (.gfa.gz) in addition to HAL")
    parser.add_argument(
        "--batch",
        action="store_true",
        help=
        "Launch batch of alignments.  Input seqfile is expected to be chromfile as generated by cactus-graphmap-slit"
    )
    parser.add_argument(
        "--stagger",
        type=int,
        help=
        "Stagger alignment jobs in batch mode by this many seconds (to avoid starting all at once)",
        default=0)
    parser.add_argument(
        "--acyclic",
        type=str,
        help=
        "Ensure that given genome is cyclic by deleting all paralogy edges in postprocessing"
    )

    #Progressive Cactus Options
    parser.add_argument("--configFile",
                        dest="configFile",
                        help="Specify cactus configuration file",
                        default=os.path.join(cactusRootPath(),
                                             "cactus_progressive_config.xml"))
    parser.add_argument(
        "--root",
        dest="root",
        help="Name of ancestral node (which"
        " must appear in NEWICK tree in <seqfile>) to use as a "
        "root for the alignment.  Any genomes not below this node "
        "in the tree may be used as outgroups but will never appear"
        " in the output.  If no root is specifed then the root"
        " of the tree is used. ",
        default=None)
    parser.add_argument(
        "--latest",
        dest="latest",
        action="store_true",
        help="Use the latest version of the docker container "
        "rather than pulling one matching this version of cactus")
    parser.add_argument(
        "--containerImage",
        dest="containerImage",
        default=None,
        help="Use the the specified pre-built containter image "
        "rather than pulling one from quay.io")
    parser.add_argument("--binariesMode",
                        choices=["docker", "local", "singularity"],
                        help="The way to run the Cactus binaries",
                        default=None)
    parser.add_argument(
        "--nonCactusInput",
        action="store_true",
        help=
        "Input lastz cigars do not come from cactus-blast or cactus-refmap: Prepend ids in cigars"
    )
    parser.add_argument("--database",
                        choices=["kyoto_tycoon", "redis"],
                        help="The type of database",
                        default="kyoto_tycoon")

    options = parser.parse_args()

    setupBinaries(options)
    setLoggingFromOptions(options)
    enableDumpStack()

    if (options.pathOverrides or options.pathOverrideNames):
        if not options.pathOverrides or not options.pathOverrideNames or \
           len(options.pathOverrideNames) != len(options.pathOverrides):
            raise RuntimeError(
                'same number of values must be passed to --pathOverrides and --pathOverrideNames'
            )

    # cactus doesn't run with 1 core
    if options.batchSystem == 'singleMachine':
        if options.maxCores is not None:
            if int(options.maxCores) < 2:
                raise RuntimeError('Cactus requires --maxCores > 1')
        else:
            # is there a way to get this out of Toil?  That would be more consistent
            if cpu_count() < 2:
                raise RuntimeError(
                    'Only 1 CPU detected.  Cactus requires at least 2')

    options.buildHal = True
    options.buildFasta = True

    if options.outHal.startswith('s3://'):
        if not has_s3:
            raise RuntimeError(
                "S3 support requires toil to be installed with [aws]")
        # write a little something to the bucket now to catch any glaring problems asap
        test_file = os.path.join(getTempDirectory(), 'check')
        with open(test_file, 'w') as test_o:
            test_o.write("\n")
        region = get_aws_region(
            options.jobStore) if options.jobStore.startswith('aws:') else None
        write_s3(test_file,
                 options.outHal if options.outHal.endswith('.hal') else
                 os.path.join(options.outHal, 'test'),
                 region=region)
        options.checkpointInfo = (get_aws_region(options.jobStore),
                                  options.outHal)
    else:
        options.checkpointInfo = None

    if options.batch:
        # the output hal is a directory, make sure it's there
        if not os.path.isdir(options.outHal):
            os.makedirs(options.outHal)
        assert len(options.cigarsFile) == 0
    else:
        assert len(options.cigarsFile) > 0

    # Mess with some toil options to create useful defaults.
    cactus_override_toil_options(options)

    # We set which type of unique ids to expect.  Numeric (from cactus-blast) or Eventname (cactus-refmap or cactus-grpahmap)
    # This is a bit ugly, since we don't have a good way to differentiate refmap from blast, and use --pangenome as a proxy
    # But I don't think there's a real use case yet of making a separate parameter
    options.eventNameAsID = os.environ.get('CACTUS_EVENT_NAME_AS_UNIQUE_ID')
    if options.eventNameAsID is not None:
        options.eventNameAsID = False if not bool(
            eventName) or eventName == '0' else True
    else:
        options.eventNameAsID = options.pangenome or options.pafInput
    os.environ['CACTUS_EVENT_NAME_AS_UNIQUE_ID'] = str(
        int(options.eventNameAsID))

    start_time = timeit.default_timer()
    with Toil(options) as toil:
        importSingularityImage(options)
        if options.restart:
            results_dict = toil.restart()
        else:
            align_jobs = make_batch_align_jobs(options, toil)
            results_dict = toil.start(
                Job.wrapJobFn(run_batch_align_jobs, align_jobs))

        # when using s3 output urls, things get checkpointed as they're made so no reason to export
        # todo: make a more unified interface throughout cactus for this
        # (see toil-vg's outstore logic which, while not perfect, would be an improvement
        if not options.outHal.startswith('s3://'):
            if options.batch:
                for chrom, results in results_dict.items():
                    toil.exportFile(
                        results[0],
                        makeURL(
                            os.path.join(options.outHal,
                                         '{}.hal'.format(chrom))))
                    if options.outVG:
                        toil.exportFile(
                            results[1],
                            makeURL(
                                os.path.join(options.outHal,
                                             '{}.vg'.format(chrom))))
                    if options.outGFA:
                        toil.exportFile(
                            results[2],
                            makeURL(
                                os.path.join(options.outHal,
                                             '{}.gfa.gz'.format(chrom))))
            else:
                assert len(results_dict) == 1 and None in results_dict
                halID, vgID, gfaID = results_dict[None][0], results_dict[None][
                    1], results_dict[None][2]
                # export the hal
                toil.exportFile(halID, makeURL(options.outHal))
                # export the vg
                if options.outVG:
                    toil.exportFile(
                        vgID,
                        makeURL(os.path.splitext(options.outHal)[0] + '.vg'))
                if options.outGFA:
                    toil.exportFile(
                        gfaID,
                        makeURL(
                            os.path.splitext(options.outHal)[0] + '.gfa.gz'))

    end_time = timeit.default_timer()
    run_time = end_time - start_time
    logger.info("cactus-align has finished after {} seconds".format(run_time))
Пример #38
0
def main():
    parser = ArgumentParser()
    Job.Runner.addToilOptions(parser)
    addCactusWorkflowOptions(parser)

    parser.add_argument("seqFile", help="Seq file")
    parser.add_argument(
        "cigarsFile",
        nargs="+",
        help=
        "Pairiwse aliginments (from cactus-blast, cactus-refmap or cactus-graphmap)"
    )
    parser.add_argument("outputHal", type=str, help="Output HAL file")
    parser.add_argument(
        "--pathOverrides",
        nargs="*",
        help="paths (multiple allowd) to override from seqFile")
    parser.add_argument(
        "--pathOverrideNames",
        nargs="*",
        help="names (must be same number as --paths) of path overrides")

    #Progressive Cactus Options
    parser.add_argument("--configFile",
                        dest="configFile",
                        help="Specify cactus configuration file",
                        default=os.path.join(cactusRootPath(),
                                             "cactus_progressive_config.xml"))
    parser.add_argument(
        "--root",
        dest="root",
        help="Name of ancestral node (which"
        " must appear in NEWICK tree in <seqfile>) to use as a "
        "root for the alignment.  Any genomes not below this node "
        "in the tree may be used as outgroups but will never appear"
        " in the output.  If no root is specifed then the root"
        " of the tree is used. ",
        default=None,
        required=True)
    parser.add_argument(
        "--latest",
        dest="latest",
        action="store_true",
        help="Use the latest version of the docker container "
        "rather than pulling one matching this version of cactus")
    parser.add_argument(
        "--containerImage",
        dest="containerImage",
        default=None,
        help="Use the the specified pre-built containter image "
        "rather than pulling one from quay.io")
    parser.add_argument("--binariesMode",
                        choices=["docker", "local", "singularity"],
                        help="The way to run the Cactus binaries",
                        default=None)
    parser.add_argument(
        "--nonCactusInput",
        action="store_true",
        help=
        "Input lastz cigars do not come from cactus-blast or cactus-refmap: Prepend ids in cigars"
    )
    parser.add_argument(
        "--pangenome",
        action="store_true",
        help=
        "Override some CAF settings whose defaults are not suited to star trees"
    )
    parser.add_argument(
        "--pafInput",
        action="store_true",
        help="'cigarsFile' arugment is in PAF format, rather than lastz cigars."
    )
    parser.add_argument("--database",
                        choices=["kyoto_tycoon", "redis"],
                        help="The type of database",
                        default="kyoto_tycoon")

    options = parser.parse_args()

    setupBinaries(options)
    setLoggingFromOptions(options)
    enableDumpStack()

    if (options.pathOverrides or options.pathOverrideNames):
        if not options.pathOverrides or not options.pathOverrideNames or \
           len(options.pathOverrideNames) != len(options.pathOverrides):
            raise RuntimeError(
                'same number of values must be passed to --pathOverrides and --pathOverrideNames'
            )

    # cactus doesn't run with 1 core
    if options.batchSystem == 'singleMachine':
        if options.maxCores is not None:
            if int(options.maxCores) < 2:
                raise RuntimeError('Cactus requires --maxCores > 1')
        else:
            # is there a way to get this out of Toil?  That would be more consistent
            if cpu_count() < 2:
                raise RuntimeError(
                    'Only 1 CPU detected.  Cactus requires at least 2')

    if options.pafInput:
        # cactus-graphmap does not do any prepending to simplify interface with minigraph node names
        # so it must be done here
        options.nonCactusInput = True

    options.buildHal = True
    options.buildFasta = True

    # Mess with some toil options to create useful defaults.
    cactus_override_toil_options(options)

    start_time = timeit.default_timer()
    runCactusAfterBlastOnly(options)
    end_time = timeit.default_timer()
    run_time = end_time - start_time
    logger.info("cactus-align has finished after {} seconds".format(run_time))
Пример #39
0
def main(args=None, stdout=sys.stdout):
    parser = argparse.ArgumentParser()
    Job.Runner.addToilOptions(parser)
    parser.add_argument("cwltool", type=str)
    parser.add_argument("cwljob", nargs=argparse.REMAINDER)

    # Will override the "jobStore" positional argument, enables
    # user to select jobStore or get a default from logic one below.
    parser.add_argument("--jobStore", type=str)
    parser.add_argument("--not-strict", action="store_true")
    parser.add_argument("--no-container", action="store_true")
    parser.add_argument("--quiet", dest="logLevel", action="store_const", const="ERROR")
    parser.add_argument("--basedir", type=str)
    parser.add_argument("--outdir", type=str, default=os.getcwd())
    parser.add_argument("--version", action='version', version=baseVersion)
    parser.add_argument("--preserve-environment", type=str, nargs='+',
                    help="Preserve specified environment variables when running CommandLineTools",
                    metavar=("VAR1 VAR2"),
                    default=("PATH",),
                    dest="preserve_environment")

    # mkdtemp actually creates the directory, but
    # toil requires that the directory not exist,
    # so make it and delete it and allow
    # toil to create it again (!)
    workdir = tempfile.mkdtemp()
    os.rmdir(workdir)

    if args is None:
        args = sys.argv[1:]

    options = parser.parse_args([workdir] + args)

    use_container = not options.no_container

    setLoggingFromOptions(options)
    if options.logLevel:
        cwllogger.setLevel(options.logLevel)

    outdir = os.path.abspath(options.outdir)
    fileindex = {}
    existing = {}

    with Toil(options) as toil:
        if options.restart:
            outobj = toil.restart()
        else:
            useStrict = not options.not_strict
            try:
                t = cwltool.load_tool.load_tool(options.cwltool, toilMakeTool,
                                                kwargs={
                                                    "hints": [{
                                                        "class": "ResourceRequirement",
                                                        "coresMin": toil.config.defaultCores,
                                                        "ramMin": toil.config.defaultMemory / (2**20),
                                                        "outdirMin": toil.config.defaultDisk / (2**20),
                                                        "tmpdirMin": 0
                                                    }]},
                                                resolver=cwltool.resolver.tool_resolver,
                                                strict=useStrict)
                unsupportedRequirementsCheck(t.requirements)
            except cwltool.process.UnsupportedRequirement as e:
                logging.error(e)
                return 33

            if type(t) == int:
                return t

            options.workflow = options.cwltool
            options.job_order = options.cwljob
            options.tool_help = None
            options.debug = options.logLevel == "DEBUG"
            job = cwltool.main.load_job_order(options, t, sys.stdin)

            if type(job) == int:
                return job

            job, options.basedir = job

            fillInDefaults(t.tool["inputs"], job)

            def pathToLoc(p):
                if "location" not in p and "path" in p:
                    p["location"] = p["path"]
                    del p["path"]

            def importFiles(tool):
                visit_class(tool, ("File", "Directory"), pathToLoc)
                normalizeFilesDirs(tool)
                adjustDirObjs(tool, functools.partial(get_listing,
                                                      cwltool.stdfsaccess.StdFsAccess(""),
                                                      recursive=True))
                adjustFileObjs(tool, functools.partial(uploadFile,
                                                       toil.importFile,
                                                       fileindex, existing, skip_broken=True))

            t.visit(importFiles)

            for inp in t.tool["inputs"]:
                def setSecondary(fileobj):
                    if isinstance(fileobj, dict) and fileobj.get("class") == "File":
                        if "secondaryFiles" not in fileobj:
                            fileobj["secondaryFiles"] = [{
                                "location": cwltool.builder.substitute(fileobj["location"], sf), "class": "File"}
                                                         for sf in inp["secondaryFiles"]]

                    if isinstance(fileobj, list):
                        for e in fileobj:
                            setSecondary(e)

                if shortname(inp["id"]) in job and inp.get("secondaryFiles"):
                    setSecondary(job[shortname(inp["id"])])

            importFiles(job)
            visitSteps(t, importFiles)

            make_fs_access = functools.partial(ToilFsAccess, fileStore=toil)
            try:
                (wf1, wf2) = makeJob(t, {}, use_container=use_container,
                                     preserve_environment=options.preserve_environment,
                                     tmpdir=os.path.realpath(outdir), workdir=options.workDir)
            except cwltool.process.UnsupportedRequirement as e:
                logging.error(e)
                return 33

            wf1.cwljob = job
            outobj = toil.start(wf1)

        outobj = resolve_indirect(outobj)

        toilStageFiles(toil, outobj, outdir, fileindex, existing, True)

        visit_class(outobj, ("File",), functools.partial(compute_checksums, cwltool.stdfsaccess.StdFsAccess("")))

        stdout.write(json.dumps(outobj, indent=4))

    return 0
Пример #40
0
def main():
    parser = ArgumentParser()
    Job.Runner.addToilOptions(parser)
    parser.add_argument("inSeqFile", type=str, nargs='?', default=None, help = "Input Seq file")
    parser.add_argument("outSeqFile", type=str, nargs='?', default=None, help = "Output Seq file (ex generated with cactus-prepare)")
    parser.add_argument("--configFile", default=os.path.join(cactusRootPath(), "cactus_progressive_config.xml"))
    parser.add_argument("--inputNames", nargs='*', help='input genome names (not paths) to preprocess (all leaves from Input Seq file if none specified)')
    parser.add_argument("--inPaths", nargs='*', help='Space-separated list of input fasta paths (to be used in place of --inSeqFile')
    parser.add_argument("--outPaths", nargs='*', help='Space-separated list of output fasta paths (one for each inPath, used in place of --outSeqFile)')
    parser.add_argument("--latest", dest="latest", action="store_true",
                        help="Use the latest version of the docker container "
                        "rather than pulling one matching this version of cactus")
    parser.add_argument("--containerImage", dest="containerImage", default=None,
                        help="Use the the specified pre-built containter image "
                        "rather than pulling one from quay.io")
    parser.add_argument("--binariesMode", choices=["docker", "local", "singularity"],
                        help="The way to run the Cactus binaries", default=None)

    options = parser.parse_args()
    setupBinaries(options)
    setLoggingFromOptions(options)
    enableDumpStack()

    # we have two modes: operate directly on paths or rely on the seqfiles.  they cannot be mixed
    if options.inSeqFile or options.outSeqFile:
        if not options.inSeqFile or not options.outSeqFile or options.inPaths or options.outPaths:
            raise RuntimeError('--inSeqFile must be used in conjunction with --outSeqFile and not with --inPaths nor --outPaths')
    elif options.inPaths or options.outPaths:
        if not options.inPaths or not options.outPaths or options.inSeqFile or options.outSeqFile or options.inputNames:
            raise RuntimeError('--inPaths must be used in conjunction with --outPaths and not with --inSeqFile, --outSeqFile nor --inputNames')
        if len(options.inPaths) != len(options.outPaths):
            raise RuntimeError('--inPaths and --outPaths must have the same number of arguments')
    else:
        raise RuntimeError('--inSeqFile/--outSeqFile/--inputNames or --inPaths/--outPaths required to specify input')


    inSeqPaths = []
    outSeqPaths = []

    # mine the paths out of the seqfiles
    if options.inSeqFile:
        inSeqFile = SeqFile(options.inSeqFile)
        outSeqFile = SeqFile(options.outSeqFile)

        inNames = options.inputNames
        if not inNames:
            inNames = [inSeqFile.tree.getName(node) for node in inSeqFile.tree.getLeaves()]


        for inName in inNames:
            if inName not in inSeqFile.pathMap or inName not in outSeqFile.pathMap:
                raise RuntimeError('{} not present in input and output Seq files'.format(inNmae))
            inPath = inSeqFile.pathMap[inName]
            outPath = outSeqFile.pathMap[inName]
            if os.path.isdir(inPath):
                try:
                    os.makedirs(outPath)
                except:
                    pass
                assert os.path.isdir(inPath) == os.path.isdir(outPath)
                inSeqPaths += [os.path.join(inPath, seqPath) for seqPath in os.listdir(inPath)]
                outSeqPaths += [os.path.join(outPath, seqPath) for seqPath in os.listdir(inPath)]
            else:
                inSeqPaths += [inPath]
                outSeqPaths += [outPath]

    # we got path names directly from the command line
    else:
        inSeqPaths = options.inPaths
        outSeqPaths = options.outPaths

    with Toil(options) as toil:
        stageWorkflow(outputSequenceDir=None, configFile=options.configFile, inputSequences=inSeqPaths, toil=toil, restart=options.restart, outputSequences=outSeqPaths)
Пример #41
0
def main(args=None, stdout=sys.stdout):
    parser = argparse.ArgumentParser()
    Job.Runner.addToilOptions(parser)
    parser.add_argument("cwltool", type=str)
    parser.add_argument("cwljob", nargs=argparse.REMAINDER)

    # Will override the "jobStore" positional argument, enables
    # user to select jobStore or get a default from logic one below.
    parser.add_argument("--jobStore", type=str)
    parser.add_argument("--not-strict", action="store_true")
    parser.add_argument("--no-container", action="store_true")
    parser.add_argument("--quiet",
                        dest="logLevel",
                        action="store_const",
                        const="ERROR")
    parser.add_argument("--basedir", type=str)
    parser.add_argument("--outdir", type=str, default=os.getcwd())
    parser.add_argument("--version", action='version', version=baseVersion)
    parser.add_argument(
        "--preserve-environment",
        type=str,
        nargs='+',
        help=
        "Preserve specified environment variables when running CommandLineTools",
        metavar=("VAR1 VAR2"),
        default=("PATH", ),
        dest="preserve_environment")

    # mkdtemp actually creates the directory, but
    # toil requires that the directory not exist,
    # so make it and delete it and allow
    # toil to create it again (!)
    workdir = tempfile.mkdtemp()
    os.rmdir(workdir)

    if args is None:
        args = sys.argv[1:]

    options = parser.parse_args([workdir] + args)

    use_container = not options.no_container

    setLoggingFromOptions(options)
    if options.logLevel:
        cwllogger.setLevel(options.logLevel)

    outdir = os.path.abspath(options.outdir)
    fileindex = {}
    existing = {}

    with Toil(options) as toil:
        if options.restart:
            outobj = toil.restart()
        else:
            useStrict = not options.not_strict
            try:
                t = cwltool.load_tool.load_tool(
                    options.cwltool,
                    toilMakeTool,
                    kwargs={
                        "hints": [{
                            "class":
                            "ResourceRequirement",
                            "coresMin":
                            toil.config.defaultCores,
                            "ramMin":
                            toil.config.defaultMemory / (2**20),
                            "outdirMin":
                            toil.config.defaultDisk / (2**20),
                            "tmpdirMin":
                            0
                        }]
                    },
                    resolver=cwltool.resolver.tool_resolver,
                    strict=useStrict)
                unsupportedRequirementsCheck(t.requirements)
            except cwltool.process.UnsupportedRequirement as e:
                logging.error(e)
                return 33

            if type(t) == int:
                return t

            options.workflow = options.cwltool
            options.job_order = options.cwljob
            options.tool_help = None
            options.debug = options.logLevel == "DEBUG"
            job = cwltool.main.load_job_order(options, t, sys.stdin)

            if type(job) == int:
                return job

            job, options.basedir = job

            fillInDefaults(t.tool["inputs"], job)

            def pathToLoc(p):
                if "location" not in p and "path" in p:
                    p["location"] = p["path"]
                    del p["path"]

            def importFiles(tool):
                visit_class(tool, ("File", "Directory"), pathToLoc)
                normalizeFilesDirs(tool)
                adjustDirObjs(
                    tool,
                    functools.partial(get_listing,
                                      cwltool.stdfsaccess.StdFsAccess(""),
                                      recursive=True))
                adjustFileObjs(
                    tool,
                    functools.partial(uploadFile,
                                      toil.importFile,
                                      fileindex,
                                      existing,
                                      skip_broken=True))

            t.visit(importFiles)

            for inp in t.tool["inputs"]:

                def setSecondary(fileobj):
                    if isinstance(fileobj,
                                  dict) and fileobj.get("class") == "File":
                        if "secondaryFiles" not in fileobj:
                            fileobj["secondaryFiles"] = [{
                                "location":
                                cwltool.builder.substitute(
                                    fileobj["location"], sf),
                                "class":
                                "File"
                            } for sf in inp["secondaryFiles"]]

                    if isinstance(fileobj, list):
                        for e in fileobj:
                            setSecondary(e)

                if shortname(inp["id"]) in job and inp.get("secondaryFiles"):
                    setSecondary(job[shortname(inp["id"])])

            importFiles(job)
            visitSteps(t, importFiles)

            make_fs_access = functools.partial(ToilFsAccess, fileStore=toil)
            try:
                (wf1, wf2) = makeJob(
                    t, {},
                    use_container=use_container,
                    preserve_environment=options.preserve_environment,
                    tmpdir=os.path.realpath(outdir),
                    workdir=options.workDir)
            except cwltool.process.UnsupportedRequirement as e:
                logging.error(e)
                return 33

            wf1.cwljob = job
            outobj = toil.start(wf1)

        outobj = resolve_indirect(outobj)

        toilStageFiles(toil, outobj, outdir, fileindex, existing, True)

        visit_class(
            outobj, ("File", ),
            functools.partial(compute_checksums,
                              cwltool.stdfsaccess.StdFsAccess("")))

        stdout.write(json.dumps(outobj, indent=4))

    return 0
Пример #42
0
def main_batch():
    """ this is a bit like cactus-align --batch except it will use toil-in-toil to assign each chromosome to a machine.
    pros: much less chance of a problem with one chromosome affecting anything else
          more forgiving for inexact resource specs
          could be ported to Terra
    cons: less efficient use of resources
    """
    parser = ArgumentParser()
    Job.Runner.addToilOptions(parser)
    addCactusWorkflowOptions(parser)

    parser.add_argument("chromFile", help="chroms file")
    parser.add_argument("outHal",
                        type=str,
                        help="Output directory (can be s3://)")
    parser.add_argument(
        "--alignOptions",
        type=str,
        help=
        "Options to pass through to cactus-align (don't forget to wrap in quotes)"
    )
    parser.add_argument("--alignCores",
                        type=int,
                        help="Number of cores per align job")
    parser.add_argument(
        "--alignCoresOverrides",
        nargs="*",
        help=
        "Override align job cores for a chromosome. Space-separated list of chrom,cores pairse epxected"
    )

    parser.add_argument("--configFile",
                        dest="configFile",
                        help="Specify cactus configuration file",
                        default=os.path.join(cactusRootPath(),
                                             "cactus_progressive_config.xml"))

    options = parser.parse_args()

    options.containerImage = None
    options.binariesMode = None
    options.root = None
    options.latest = None
    options.database = "kyoto_tycoon"

    setupBinaries(options)
    setLoggingFromOptions(options)
    enableDumpStack()

    # Mess with some toil options to create useful defaults.
    cactus_override_toil_options(options)

    # Turn the overrides into a dict
    cores_overrides = {}
    if options.alignCoresOverrides:
        for o in options.alignCoresOverrides:
            try:
                chrom, cores = o.split(',')
                cores_overrides[chrom] = int(cores)
            except:
                raise RuntimeError(
                    "Error parsing alignCoresOverrides \"{}\"".format(o))
    options.alignCoresOverrides = cores_overrides

    start_time = timeit.default_timer()
    with Toil(options) as toil:
        importSingularityImage(options)
        if options.restart:
            results_dict = toil.restart()
        else:
            config_id = toil.importFile(makeURL(options.configFile))
            # load the chromfile into memory
            chrom_dict = {}
            with open(options.chromFile, 'r') as chrom_file:
                for line in chrom_file:
                    toks = line.strip().split()
                    if len(toks):
                        assert len(toks) == 3
                        chrom, seqfile, alnFile = toks[0], toks[1], toks[2]
                        chrom_dict[chrom] = toil.importFile(
                            makeURL(seqfile)), toil.importFile(
                                makeURL(alnFile))
            results_dict = toil.start(
                Job.wrapJobFn(align_toil_batch, chrom_dict, config_id,
                              options))

        # when using s3 output urls, things get checkpointed as they're made so no reason to export
        # todo: make a more unified interface throughout cactus for this
        # (see toil-vg's outstore logic which, while not perfect, would be an improvement
        if not options.outHal.startswith('s3://'):
            if options.batch:
                for chrom, results in results_dict.items():
                    toil.exportFile(
                        results[0],
                        makeURL(
                            os.path.join(options.outHal,
                                         '{}.hal'.format(chrom))))
                    if options.outVG:
                        toil.exportFile(
                            results[1],
                            makeURL(
                                os.path.join(options.outHal,
                                             '{}.vg'.format(chrom))))
                    if options.outGFA:
                        toil.exportFile(
                            results[2],
                            makeURL(
                                os.path.join(options.outHal,
                                             '{}.gfa.gz'.format(chrom))))
                    toil.exportFile(
                        results[3],
                        makeURL(
                            os.path.join(options.outHal,
                                         '{}.hal.log'.format(chrom))))

    end_time = timeit.default_timer()
    run_time = end_time - start_time
    logger.info(
        "cactus-align-batch has finished after {} seconds".format(run_time))