Exemplo n.º 1
0
 def issueJob(self, jobNode):
     """
     Add a job to the queue of jobs
     """
     jobNode.command = ' '.join((resolveEntryPoint('_toil_worker'),
                                 self.jobStoreLocator, jobNode.jobStoreID))
     jobBatchSystemID = self.batchSystem.issueBatchJob(jobNode)
     self.jobBatchSystemIDToIssuedJob[jobBatchSystemID] = jobNode
     if jobNode.preemptable:
         # len(jobBatchSystemIDToIssuedJob) should always be greater than or equal to preemptableJobsIssued,
         # so increment this value after the job is added to the issuedJob dict
         self.preemptableJobsIssued += 1
     cur_logger = (logger.debug if jobNode.jobName.startswith(self.debugJobNames)
                   else logger.info)
     cur_logger("Issued job %s with job batch system ID: "
                "%s and cores: %s, disk: %s, and memory: %s",
                jobNode, str(jobBatchSystemID), int(jobNode.cores),
                bytes2human(jobNode.disk), bytes2human(jobNode.memory))
Exemplo n.º 2
0
 def issueJob(self, jobNode):
     """
     Add a job to the queue of jobs
     """
     jobNode.command = ' '.join((resolveEntryPoint('_toil_worker'),
                                 self.jobStoreLocator, jobNode.jobStoreID))
     jobBatchSystemID = self.batchSystem.issueBatchJob(jobNode)
     self.jobBatchSystemIDToIssuedJob[jobBatchSystemID] = jobNode
     if jobNode.preemptable:
         # len(jobBatchSystemIDToIssuedJob) should always be greater than or equal to preemptableJobsIssued,
         # so increment this value after the job is added to the issuedJob dict
         self.preemptableJobsIssued += 1
     cur_logger = (logger.debug if jobNode.jobName.startswith(CWL_INTERNAL_JOBS)
                   else logger.info)
     cur_logger("Issued job %s with job batch system ID: "
                "%s and cores: %s, disk: %s, and memory: %s",
                jobNode, str(jobBatchSystemID), int(jobNode.cores),
                bytes2human(jobNode.disk), bytes2human(jobNode.memory))
Exemplo n.º 3
0
def _addOptions(addGroupFn, config):
    #
    #Core options
    #
    addOptionFn = addGroupFn("toil core options", "Options to specify the \
    location of the toil workflow and turn on stats collation about the performance of jobs.")
    addOptionFn('jobStore', type=str,
                help=("Store in which to place job management files and the global accessed "
                      "temporary files. Job store locator strings should be formatted as follows\n"
                      "aws:<AWS region>:<name prefix>\n"
                      "azure:<account>:<name prefix>'\n"
                      "google:<project id>:<name prefix>\n"
                      "file:<file path>\n"
                      "Note that for backwards compatibility ./foo is equivalent to file:/foo and "
                      "/bar is equivalent to file:/bar.\n"
                      "(If this is a file path this needs to be globally accessible by all machines"
                      " running jobs).\n"
                      "If the store already exists and restart is false a JobStoreCreationException"
                      " exception will be thrown."))
    addOptionFn("--workDir", dest="workDir", default=None,
                help="Absolute path to directory where temporary files generated during the Toil "
                     "run should be placed. Temp files and folders will be placed in a directory "
                     "toil-<workflowID> within workDir (The workflowID is generated by Toil and "
                     "will be reported in the workflow logs. Default is determined by the "
                     "user-defined environmental variable TOIL_TEMPDIR, or the environment "
                     "variables (TMPDIR, TEMP, TMP) via mkdtemp. This directory needs to exist on "
                     "all machines running jobs.")
    addOptionFn("--stats", dest="stats", action="store_true", default=None,
                      help="Records statistics about the toil workflow to be used by 'toil stats'.")
    addOptionFn("--clean", dest="clean", choices=['always', 'onError','never', 'onSuccess'], default=None,
                      help=("Determines the deletion of the jobStore upon completion of the program. "
                            "Choices: 'always', 'onError','never', 'onSuccess'. The --stats option requires "
                            "information from the jobStore upon completion so the jobStore will never be deleted with"
                            "that flag. If you wish to be able to restart the run, choose \'never\' or \'onSuccess\'. "
                            "Default is \'never\' if stats is enabled, and \'onSuccess\' otherwise"))
    addOptionFn("--cleanWorkDir", dest="cleanWorkDir",
                choices=['always', 'never', 'onSuccess', 'onError'], default='always',
                help=("Determines deletion of temporary worker directory upon completion of a job. Choices: 'always', "
                      "'never', 'onSuccess'. Default = always. WARNING: This option should be changed for debugging "
                      "only. Running a full pipeline with this option could fill your disk with intermediate data."))
    #
    #Restarting the workflow options
    #
    addOptionFn = addGroupFn("toil options for restarting an existing workflow",
                             "Allows the restart of an existing workflow")
    addOptionFn("--restart", dest="restart", default=None, action="store_true",
                help="If --restart is specified then will attempt to restart existing workflow "
                "at the location pointed to by the --jobStore option. Will raise an exception if the workflow does not exist")

    #
    #Batch system options
    #

    addOptionFn = addGroupFn("toil options for specifying the batch system",
                             "Allows the specification of the batch system, and arguments to the batch system/big batch system (see below).")
    addOptionFn("--batchSystem", dest="batchSystem", default=None,
                      help=("The type of batch system to run the job(s) with, currently can be one "
                            "of singleMachine, parasol, gridEngine, lsf or mesos'. default=%s" % config.batchSystem))
    addOptionFn("--scale", dest="scale", default=None,
                help=("A scaling factor to change the value of all submitted tasks's submitted cores. "
                      "Used in singleMachine batch system. default=%s" % config.scale))
    addOptionFn("--mesosMaster", dest="mesosMasterAddress", default=None,
                help=("The host and port of the Mesos master separated by colon. default=%s" % config.mesosMasterAddress))
    addOptionFn("--parasolCommand", dest="parasolCommand", default=None,
                      help="The name or path of the parasol program. Will be looked up on PATH "
                           "unless it starts with a slashdefault=%s" % config.parasolCommand)
    addOptionFn("--parasolMaxBatches", dest="parasolMaxBatches", default=None,
                help="Maximum number of job batches the Parasol batch is allowed to create. One "
                     "batch is created for jobs with a a unique set of resource requirements. "
                     "default=%i" % config.parasolMaxBatches)

    #
    #Auto scaling options
    #
    addOptionFn = addGroupFn("toil options for autoscaling the cluster of worker nodes",
                             "Allows the specification of the minimum and maximum number of nodes "
                             "in an autoscaled cluster, as well as parameters to control the "
                             "level of provisioning.")

    addOptionFn("--provisioner", dest="provisioner", choices=['cgcloud'],
                help="The provisioner for cluster auto-scaling. Currently only the cgcloud "
                     "provisioner exists. The default is %s." % config.provisioner)

    for preemptable in (False, True):
        def _addOptionFn(*name, **kwargs):
            name = list(name)
            if preemptable:
                name.insert(-1, 'preemptable' )
            name = ''.join((s[0].upper() + s[1:]) if i else s for i, s in enumerate(name))
            terms = re.compile(r'\{([^{}]+)\}')
            _help = kwargs.pop('help')
            _help = ''.join((term.split('|') * 2)[int(preemptable)] for term in terms.split(_help))
            addOptionFn('--' + name, dest=name,
                        help=_help + ' The default is %s.' % getattr(config, name),
                        **kwargs)

        _addOptionFn('nodeType', metavar='TYPE',
                     help="Node type for {non-|}preemptable nodes. The syntax depends on the "
                          "provisioner used. For the cgcloud provisioner this is the name of an "
                          "EC2 instance type{|, followed by a colon and the price in dollar to "
                          "bid for a spot instance}, for example 'c3.8xlarge{|:0.42}'.")
        _addOptionFn('nodeOptions', metavar='OPTIONS',
                     help="Provisioning options for the {non-|}preemptable node type. The syntax "
                          "depends on the provisioner used. For the cgcloud provisioner this is a "
                          "space-separated list of options to cgcloud's grow-cluster command (run "
                          "'cgcloud grow-cluster --help' for details.")
        for p, q in [('min', 'Minimum'), ('max', 'Maximum')]:
            _addOptionFn(p, 'nodes', default=None, metavar='NUM',
                         help=q + " number of {non-|}preemptable nodes in the cluster, if using "
                                  "auto-scaling.")

    # TODO: DESCRIBE THE FOLLOWING TWO PARAMETERS
    addOptionFn("--alphaPacking", dest="alphaPacking", default=None,
                help=(" default=%s" % config.alphaPacking))
    addOptionFn("--betaInertia", dest="betaInertia", default=None,
                help=(" default=%s" % config.betaInertia))
    addOptionFn("--scaleInterval", dest="scaleInterval", default=None,
                help=("The interval (seconds) between assessing if the scale of"
                      " the cluster needs to change. default=%s" % config.scaleInterval))

    #
    #Resource requirements
    #
    addOptionFn = addGroupFn("toil options for cores/memory requirements",
                             "The options to specify default cores/memory requirements (if not "
                             "specified by the jobs themselves), and to limit the total amount of "
                             "memory/cores requested from the batch system.")
    addOptionFn('--defaultMemory', dest='defaultMemory', default=None, metavar='INT',
                help='The default amount of memory to request for a job. Only applicable to jobs '
                     'that do not specify an explicit value for this requirement. Standard '
                     'suffixes like K, Ki, M, Mi, G or Gi are supported. Default is %s' %
                     bytes2human( config.defaultMemory, symbols='iec' ))
    addOptionFn('--defaultCores', dest='defaultCores', default=None, metavar='FLOAT',
                help='The default number of CPU cores to dedicate a job. Only applicable to jobs '
                     'that do not specify an explicit value for this requirement. Fractions of a '
                     'core (for example 0.1) are supported on some batch systems, namely Mesos '
                     'and singleMachine. Default is %.1f ' % config.defaultCores)
    addOptionFn('--defaultDisk', dest='defaultDisk', default=None, metavar='INT',
                help='The default amount of disk space to dedicate a job. Only applicable to jobs '
                     'that do not specify an explicit value for this requirement. Standard '
                     'suffixes like K, Ki, M, Mi, G or Gi are supported. Default is %s' %
                     bytes2human( config.defaultDisk, symbols='iec' ))
    addOptionFn("--readGlobalFileMutableByDefault", dest="readGlobalFileMutableByDefault",
                action='store_true', default=None, help='Toil disallows modification of read '
                                                        'global files by default. This flag makes '
                                                        'it makes read file mutable by default, '
                                                        'however it also defeats the purpose of '
                                                        'shared caching via hard links to save '
                                                        'space. Default is False')
    addOptionFn('--maxCores', dest='maxCores', default=None, metavar='INT',
                help='The maximum number of CPU cores to request from the batch system at any one '
                     'time. Standard suffixes like K, Ki, M, Mi, G or Gi are supported. Default '
                     'is %s' % bytes2human(config.maxCores, symbols='iec'))
    addOptionFn('--maxMemory', dest='maxMemory', default=None, metavar='INT',
                help="The maximum amount of memory to request from the batch system at any one "
                     "time. Standard suffixes like K, Ki, M, Mi, G or Gi are supported. Default "
                     "is %s" % bytes2human( config.maxMemory, symbols='iec'))
    addOptionFn('--maxDisk', dest='maxDisk', default=None, metavar='INT',
                help='The maximum amount of disk space to request from the batch system at any '
                     'one time. Standard suffixes like K, Ki, M, Mi, G or Gi are supported. '
                     'Default is %s' % bytes2human(config.maxDisk, symbols='iec'))

    #
    #Retrying/rescuing jobs
    #
    addOptionFn = addGroupFn("toil options for rescuing/killing/restarting jobs", \
            "The options for jobs that either run too long/fail or get lost \
            (some batch systems have issues!)")
    addOptionFn("--retryCount", dest="retryCount", default=None,
                      help=("Number of times to retry a failing job before giving up and "
                            "labeling job failed. default=%s" % config.retryCount))
    addOptionFn("--maxJobDuration", dest="maxJobDuration", default=None,
                      help=("Maximum runtime of a job (in seconds) before we kill it "
                            "(this is a lower bound, and the actual time before killing "
                            "the job may be longer). default=%s" % config.maxJobDuration))
    addOptionFn("--rescueJobsFrequency", dest="rescueJobsFrequency", default=None,
                      help=("Period of time to wait (in seconds) between checking for "
                            "missing/overlong jobs, that is jobs which get lost by the batch system. Expert parameter. default=%s" % config.rescueJobsFrequency))

    #
    #Misc options
    #
    addOptionFn = addGroupFn("toil miscellaneous options", "Miscellaneous options")
    addOptionFn("--maxLogFileSize", dest="maxLogFileSize", default=None,
                      help=("The maximum size of a job log file to keep (in bytes), log files larger "
                            "than this will be truncated to the last X bytes. Default is 50 "
                            "kilobytes, default=%s" % config.maxLogFileSize))
    addOptionFn("--realTimeLogging", dest="realTimeLogging", action="store_true", default=False,
                help="Enable real-time logging from workers to masters")

    addOptionFn("--sseKey", dest="sseKey", default=None,
            help="Path to file containing 32 character key to be used for server-side encryption on awsJobStore. SSE will "
                 "not be used if this flag is not passed.")
    addOptionFn("--cseKey", dest="cseKey", default=None,
                help="Path to file containing 256-bit key to be used for client-side encryption on "
                "azureJobStore. By default, no encryption is used.")
    addOptionFn("--setEnv", '-e', metavar='NAME=VALUE or NAME',
                dest="environment", default=[], action="append",
                help="Set an environment variable early on in the worker. If VALUE is omitted, "
                     "it will be looked up in the current environment. Independently of this "
                     "option, the worker will try to emulate the leader's environment before "
                     "running a job. Using this option, a variable can be injected into the "
                     "worker process itself before it is started.")
    addOptionFn("--servicePollingInterval", dest="servicePollingInterval", default=None,
                help="Interval of time service jobs wait between polling for the existence"
                " of the keep-alive flag (defailt=%s)" % config.servicePollingInterval)
    #
    #Debug options
    #
    addOptionFn = addGroupFn("toil debug options", "Debug options")
    addOptionFn("--badWorker", dest="badWorker", default=None,
                      help=("For testing purposes randomly kill 'badWorker' proportion of jobs using SIGKILL, default=%s" % config.badWorker))
    addOptionFn("--badWorkerFailInterval", dest="badWorkerFailInterval", default=None,
                      help=("When killing the job pick uniformly within the interval from 0.0 to "
                            "'badWorkerFailInterval' seconds after the worker starts, default=%s" % config.badWorkerFailInterval))
Exemplo n.º 4
0
def _addOptions(addGroupFn, config):
    #
    #Core options
    #
    addOptionFn = addGroupFn(
        "toil core options",
        "Options to specify the location of the Toil workflow and turn on "
        "stats collation about the performance of jobs.")
    addOptionFn('jobStore',
                type=str,
                help="The location of the job store for the workflow. " +
                jobStoreLocatorHelp)
    addOptionFn(
        "--workDir",
        dest="workDir",
        default=None,
        help=
        "Absolute path to directory where temporary files generated during the Toil "
        "run should be placed. Temp files and folders will be placed in a directory "
        "toil-<workflowID> within workDir (The workflowID is generated by Toil and "
        "will be reported in the workflow logs. Default is determined by the "
        "user-defined environmental variable TOIL_TEMPDIR, or the environment "
        "variables (TMPDIR, TEMP, TMP) via mkdtemp. This directory needs to exist on "
        "all machines running jobs.")
    addOptionFn(
        "--stats",
        dest="stats",
        action="store_true",
        default=None,
        help=
        "Records statistics about the toil workflow to be used by 'toil stats'."
    )
    addOptionFn(
        "--clean",
        dest="clean",
        choices=['always', 'onError', 'never', 'onSuccess'],
        default=None,
        help=
        ("Determines the deletion of the jobStore upon completion of the program. "
         "Choices: 'always', 'onError','never', 'onSuccess'. The --stats option requires "
         "information from the jobStore upon completion so the jobStore will never be deleted with"
         "that flag. If you wish to be able to restart the run, choose \'never\' or \'onSuccess\'. "
         "Default is \'never\' if stats is enabled, and \'onSuccess\' otherwise"
         ))
    addOptionFn(
        "--cleanWorkDir",
        dest="cleanWorkDir",
        choices=['always', 'never', 'onSuccess', 'onError'],
        default='always',
        help=
        ("Determines deletion of temporary worker directory upon completion of a job. Choices: 'always', "
         "'never', 'onSuccess'. Default = always. WARNING: This option should be changed for debugging "
         "only. Running a full pipeline with this option could fill your disk with intermediate data."
         ))
    #
    #Restarting the workflow options
    #
    addOptionFn = addGroupFn(
        "toil options for restarting an existing workflow",
        "Allows the restart of an existing workflow")
    addOptionFn(
        "--restart",
        dest="restart",
        default=None,
        action="store_true",
        help=
        "If --restart is specified then will attempt to restart existing workflow "
        "at the location pointed to by the --jobStore option. Will raise an exception if the workflow does not exist"
    )

    #
    #Batch system options
    #

    addOptionFn = addGroupFn(
        "toil options for specifying the batch system",
        "Allows the specification of the batch system, and arguments to the batch system/big batch system (see below)."
    )
    addOptionFn(
        "--batchSystem",
        dest="batchSystem",
        default=None,
        help=
        ("The type of batch system to run the job(s) with, currently can be one "
         "of singleMachine, parasol, gridEngine, lsf or mesos'. default=%s" %
         config.batchSystem))
    addOptionFn(
        "--scale",
        dest="scale",
        default=None,
        help=
        ("A scaling factor to change the value of all submitted tasks's submitted cores. "
         "Used in singleMachine batch system. default=%s" % config.scale))
    addOptionFn(
        "--mesosMaster",
        dest="mesosMasterAddress",
        default=None,
        help=
        ("The host and port of the Mesos master separated by colon. default=%s"
         % config.mesosMasterAddress))
    addOptionFn(
        "--parasolCommand",
        dest="parasolCommand",
        default=None,
        help=
        "The name or path of the parasol program. Will be looked up on PATH "
        "unless it starts with a slashdefault=%s" % config.parasolCommand)
    addOptionFn(
        "--parasolMaxBatches",
        dest="parasolMaxBatches",
        default=None,
        help=
        "Maximum number of job batches the Parasol batch is allowed to create. One "
        "batch is created for jobs with a a unique set of resource requirements. "
        "default=%i" % config.parasolMaxBatches)

    #
    #Auto scaling options
    #
    addOptionFn = addGroupFn(
        "toil options for autoscaling the cluster of worker nodes",
        "Allows the specification of the minimum and maximum number of nodes "
        "in an autoscaled cluster, as well as parameters to control the "
        "level of provisioning.")

    addOptionFn(
        "--provisioner",
        dest="provisioner",
        choices=['cgcloud', 'aws'],
        help=
        "The provisioner for cluster auto-scaling. The currently supported choices are"
        "'cgcloud' or 'aws'. The default is %s." % config.provisioner)

    for preemptable in (False, True):

        def _addOptionFn(*name, **kwargs):
            name = list(name)
            if preemptable:
                name.insert(-1, 'preemptable')
            name = ''.join(
                (s[0].upper() + s[1:]) if i else s for i, s in enumerate(name))
            terms = re.compile(r'\{([^{}]+)\}')
            _help = kwargs.pop('help')
            _help = ''.join((term.split('|') * 2)[int(preemptable)]
                            for term in terms.split(_help))
            addOptionFn('--' + name,
                        dest=name,
                        help=_help +
                        ' The default is %s.' % getattr(config, name),
                        **kwargs)

        _addOptionFn(
            'nodeType',
            metavar='TYPE',
            help=
            "Node type for {non-|}preemptable nodes. The syntax depends on the "
            "provisioner used. For the cgcloud provisioner this is the name of an "
            "EC2 instance type{|, followed by a colon and the price in dollar to "
            "bid for a spot instance}, for example 'c3.8xlarge{|:0.42}'. The AWS provisioner "
            "is the name of the EC2 instance type followed by a colon and the price "
            "in dollars, for example: 'm3.medium:0.10'")
        _addOptionFn(
            'nodeOptions',
            metavar='OPTIONS',
            help=
            "Provisioning options for the {non-|}preemptable node type. The syntax "
            "depends on the provisioner used. The CGCloud provisioner doesn't "
            "currently support any node options.")
        for p, q in [('min', 'Minimum'), ('max', 'Maximum')]:
            _addOptionFn(
                p,
                'nodes',
                default=None,
                metavar='NUM',
                help=q +
                " number of {non-|}preemptable nodes in the cluster, if using "
                "auto-scaling.")

    # TODO: DESCRIBE THE FOLLOWING TWO PARAMETERS
    addOptionFn("--alphaPacking",
                dest="alphaPacking",
                default=None,
                help=(" default=%s" % config.alphaPacking))
    addOptionFn("--betaInertia",
                dest="betaInertia",
                default=None,
                help=(" default=%s" % config.betaInertia))
    addOptionFn(
        "--scaleInterval",
        dest="scaleInterval",
        default=None,
        help=("The interval (seconds) between assessing if the scale of"
              " the cluster needs to change. default=%s" %
              config.scaleInterval))
    addOptionFn(
        "--preemptableCompensation",
        dest="preemptableCompensation",
        default=None,
        help=
        ("The preference of the autoscaler to replace preemptable nodes with "
         "non-preemptable nodes, when preemptable nodes cannot be started for some "
         "reason. Defaults to %s. This value must be between 0.0 and 1.0, inclusive. "
         "A value of 0.0 disables such compensation, a value of 0.5 compensates two "
         "missing preemptable nodes with a non-preemptable one. A value of 1.0 "
         "replaces every missing pre-emptable node with a non-preemptable one."
         % config.preemptableCompensation))

    #
    #Resource requirements
    #
    addOptionFn = addGroupFn(
        "toil options for cores/memory requirements",
        "The options to specify default cores/memory requirements (if not "
        "specified by the jobs themselves), and to limit the total amount of "
        "memory/cores requested from the batch system.")
    addOptionFn(
        '--defaultMemory',
        dest='defaultMemory',
        default=None,
        metavar='INT',
        help=
        'The default amount of memory to request for a job. Only applicable to jobs '
        'that do not specify an explicit value for this requirement. Standard '
        'suffixes like K, Ki, M, Mi, G or Gi are supported. Default is %s' %
        bytes2human(config.defaultMemory, symbols='iec'))
    addOptionFn(
        '--defaultCores',
        dest='defaultCores',
        default=None,
        metavar='FLOAT',
        help=
        'The default number of CPU cores to dedicate a job. Only applicable to jobs '
        'that do not specify an explicit value for this requirement. Fractions of a '
        'core (for example 0.1) are supported on some batch systems, namely Mesos '
        'and singleMachine. Default is %.1f ' % config.defaultCores)
    addOptionFn(
        '--defaultDisk',
        dest='defaultDisk',
        default=None,
        metavar='INT',
        help=
        'The default amount of disk space to dedicate a job. Only applicable to jobs '
        'that do not specify an explicit value for this requirement. Standard '
        'suffixes like K, Ki, M, Mi, G or Gi are supported. Default is %s' %
        bytes2human(config.defaultDisk, symbols='iec'))
    assert not config.defaultPreemptable, 'User would be unable to reset config.defaultPreemptable'
    addOptionFn('--defaultPreemptable',
                dest='defaultPreemptable',
                action='store_true')
    addOptionFn("--readGlobalFileMutableByDefault",
                dest="readGlobalFileMutableByDefault",
                action='store_true',
                default=None,
                help='Toil disallows modification of read '
                'global files by default. This flag makes '
                'it makes read file mutable by default, '
                'however it also defeats the purpose of '
                'shared caching via hard links to save '
                'space. Default is False')
    addOptionFn(
        '--maxCores',
        dest='maxCores',
        default=None,
        metavar='INT',
        help=
        'The maximum number of CPU cores to request from the batch system at any one '
        'time. Standard suffixes like K, Ki, M, Mi, G or Gi are supported. Default '
        'is %s' % bytes2human(config.maxCores, symbols='iec'))
    addOptionFn(
        '--maxMemory',
        dest='maxMemory',
        default=None,
        metavar='INT',
        help=
        "The maximum amount of memory to request from the batch system at any one "
        "time. Standard suffixes like K, Ki, M, Mi, G or Gi are supported. Default "
        "is %s" % bytes2human(config.maxMemory, symbols='iec'))
    addOptionFn(
        '--maxDisk',
        dest='maxDisk',
        default=None,
        metavar='INT',
        help=
        'The maximum amount of disk space to request from the batch system at any '
        'one time. Standard suffixes like K, Ki, M, Mi, G or Gi are supported. '
        'Default is %s' % bytes2human(config.maxDisk, symbols='iec'))

    #
    #Retrying/rescuing jobs
    #
    addOptionFn = addGroupFn("toil options for rescuing/killing/restarting jobs", \
            "The options for jobs that either run too long/fail or get lost \
            (some batch systems have issues!)"                                              )
    addOptionFn(
        "--retryCount",
        dest="retryCount",
        default=None,
        help=("Number of times to retry a failing job before giving up and "
              "labeling job failed. default=%s" % config.retryCount))
    addOptionFn(
        "--maxJobDuration",
        dest="maxJobDuration",
        default=None,
        help=("Maximum runtime of a job (in seconds) before we kill it "
              "(this is a lower bound, and the actual time before killing "
              "the job may be longer). default=%s" % config.maxJobDuration))
    addOptionFn(
        "--rescueJobsFrequency",
        dest="rescueJobsFrequency",
        default=None,
        help=
        ("Period of time to wait (in seconds) between checking for "
         "missing/overlong jobs, that is jobs which get lost by the batch system. Expert parameter. default=%s"
         % config.rescueJobsFrequency))

    #
    #Misc options
    #
    addOptionFn = addGroupFn("toil miscellaneous options",
                             "Miscellaneous options")
    addOptionFn(
        '--disableCaching',
        dest='disableCaching',
        action='store_true',
        default=False,
        help='Disables caching in the file store. This flag must be set to use '
        'a batch system that does not support caching such as Grid Engine, Parasol, '
        'LSF, or Slurm')
    addOptionFn(
        "--maxLogFileSize",
        dest="maxLogFileSize",
        default=None,
        help=
        ("The maximum size of a job log file to keep (in bytes), log files larger "
         "than this will be truncated to the last X bytes. Default is 50 "
         "kilobytes, default=%s" % config.maxLogFileSize))
    addOptionFn("--realTimeLogging",
                dest="realTimeLogging",
                action="store_true",
                default=False,
                help="Enable real-time logging from workers to masters")

    addOptionFn(
        "--sseKey",
        dest="sseKey",
        default=None,
        help=
        "Path to file containing 32 character key to be used for server-side encryption on awsJobStore. SSE will "
        "not be used if this flag is not passed.")
    addOptionFn(
        "--cseKey",
        dest="cseKey",
        default=None,
        help=
        "Path to file containing 256-bit key to be used for client-side encryption on "
        "azureJobStore. By default, no encryption is used.")
    addOptionFn(
        "--setEnv",
        '-e',
        metavar='NAME=VALUE or NAME',
        dest="environment",
        default=[],
        action="append",
        help=
        "Set an environment variable early on in the worker. If VALUE is omitted, "
        "it will be looked up in the current environment. Independently of this "
        "option, the worker will try to emulate the leader's environment before "
        "running a job. Using this option, a variable can be injected into the "
        "worker process itself before it is started.")
    addOptionFn(
        "--servicePollingInterval",
        dest="servicePollingInterval",
        default=None,
        help=
        "Interval of time service jobs wait between polling for the existence"
        " of the keep-alive flag (defailt=%s)" % config.servicePollingInterval)
    #
    #Debug options
    #
    addOptionFn = addGroupFn("toil debug options", "Debug options")
    addOptionFn(
        "--badWorker",
        dest="badWorker",
        default=None,
        help=
        ("For testing purposes randomly kill 'badWorker' proportion of jobs using SIGKILL, default=%s"
         % config.badWorker))
    addOptionFn(
        "--badWorkerFailInterval",
        dest="badWorkerFailInterval",
        default=None,
        help=
        ("When killing the job pick uniformly within the interval from 0.0 to "
         "'badWorkerFailInterval' seconds after the worker starts, default=%s"
         % config.badWorkerFailInterval))
Exemplo n.º 5
0
def _addOptions(addGroupFn, config):
    #
    #Core options
    #
    addOptionFn = addGroupFn("toil core options", "Options to specify the \
    location of the toil and turn on stats collation about the performance of jobs.")
    #TODO - specify how this works when path is AWS
    addOptionFn('jobStore', type=str,
                      help=("Store in which to place job management files \
                      and the global accessed temporary files"
                      "(If this is a file path this needs to be globally accessible "
                      "by all machines running jobs).\n"
                      "If the store already exists and restart is false an"
                      " ExistingJobStoreException exception will be thrown."))
    addOptionFn("--workDir", dest="workDir", default=None,
                help="Absolute path to directory where temporary files generated during the Toil "
                     "run should be placed. Temp files and folders will be placed in a directory "
                     "toil-<workflowID> within workDir (The workflowID is generated by Toil and "
                     "will be reported in the workflow logs. Default is determined by the "
                     "user-defined environmental variable TOIL_TEMPDIR, or the environment "
                     "variables (TMPDIR, TEMP, TMP) via mkdtemp. This directory needs to exist on "
                     "all machines running jobs.")
    addOptionFn("--stats", dest="stats", action="store_true", default=None,
                      help="Records statistics about the toil workflow to be used by 'toil stats'.")
    addOptionFn("--clean", dest="clean", choices=['always', 'onError','never', 'onSuccess'], default=None,
                      help=("Determines the deletion of the jobStore upon completion of the program. "
                            "Choices: 'always', 'onError','never', 'onSuccess'. The --stats option requires "
                            "information from the jobStore upon completion so the jobStore will never be deleted with"
                            "that flag. If you wish to be able to restart the run, choose \'never\' or \'onSuccess\'. "
                            "Default is \'never\' if stats is enabled, and \'onSuccess\' otherwise"))
    addOptionFn("--cleanWorkDir", dest="cleanWorkDir",
                choices=['always', 'never', 'onSuccess', 'onError'], default='always',
                help=("Determines deletion of temporary worker directory upon completion of a job. Choices: 'always', "
                      "'never', 'onSuccess'. Default = always. WARNING: This option should be changed for debugging "
                      "only. Running a full pipeline with this option could fill your disk with intermediate data."))
    #
    #Restarting the workflow options
    #
    addOptionFn = addGroupFn("toil options for restarting an existing workflow",
                             "Allows the restart of an existing workflow")
    addOptionFn("--restart", dest="restart", default=None, action="store_true",
                help="If --restart is specified then will attempt to restart existing workflow "
                "at the location pointed to by the --jobStore option. Will raise an exception if the workflow does not exist")

    #
    #Batch system options
    #
    addOptionFn = addGroupFn("toil options for specifying the batch system",
                             "Allows the specification of the batch system, and arguments to the batch system/big batch system (see below).")
    addOptionFn("--batchSystem", dest="batchSystem", default=None,
                      help=("The type of batch system to run the job(s) with, currently can be one "
                            "of singleMachine, parasol, gridEngine, lsf or mesos'. default=%s" % config.batchSystem))
    addOptionFn("--scale", dest="scale", default=None,
                help=("A scaling factor to change the value of all submitted tasks's submitted cores. "
                      "Used in singleMachine batch system. default=%s" % config.scale))
    addOptionFn("--mesosMaster", dest="mesosMasterAddress", default=None,
                help=("The host and port of the Mesos master separated by colon. default=%s" % config.mesosMasterAddress))
    addOptionFn("--parasolCommand", dest="parasolCommand", default=None,
                      help="The name or path of the parasol program. Will be looked up on PATH "
                           "unless it starts with a slashdefault=%s" % config.parasolCommand)
    addOptionFn("--parasolMaxBatches", dest="parasolMaxBatches", default=None,
                help="Maximum number of job batches the Parasol batch is allowed to create. One "
                     "batch is created for jobs with a a unique set of resource requirements. "
                     "default=%i" % config.parasolMaxBatches)

    #
    #Resource requirements
    #
    addOptionFn = addGroupFn("toil options for cores/memory requirements",
                             "The options to specify default cores/memory requirements (if not "
                             "specified by the jobs themselves), and to limit the total amount of "
                             "memory/cores requested from the batch system.")
    addOptionFn('--defaultMemory', dest='defaultMemory', default=None, metavar='INT',
                help='The default amount of memory to request for a job. Only applicable to jobs '
                     'that do not specify an explicit value for this requirement. Standard '
                     'suffixes like K, Ki, M, Mi, G or Gi are supported. Default is %s' %
                     bytes2human( config.defaultMemory, symbols='iec' ))
    addOptionFn('--defaultCores', dest='defaultCores', default=None, metavar='FLOAT',
                help='The default number of CPU cores to dedicate a job. Only applicable to jobs '
                     'that do not specify an explicit value for this requirement. Fractions of a '
                     'core (for example 0.1) are supported on some batch systems, namely Mesos '
                     'and singleMachine. Default is %.1f ' % config.defaultCores)
    addOptionFn('--defaultDisk', dest='defaultDisk', default=None, metavar='INT',
                help='The default amount of disk space to dedicate a job. Only applicable to jobs '
                     'that do not specify an explicit value for this requirement. Standard '
                     'suffixes like K, Ki, M, Mi, G or Gi are supported. Default is %s' %
                     bytes2human( config.defaultDisk, symbols='iec' ))
    addOptionFn('--defaultCache', dest='defaultCache', default=None, metavar='INT',
                help='The default amount of disk space to use for caching files shared between '
                     'jobs. Only applicable to jobs that do not specify an explicit value for '
                     'this requirement. Standard suffixes like K, Ki, M, Mi, G or Gi are '
                     'supported. Default is %s' % bytes2human( config.defaultCache, symbols='iec' ))
    addOptionFn('--maxCores', dest='maxCores', default=None, metavar='INT',
                help='The maximum number of CPU cores to request from the batch system at any one '
                     'time. Standard suffixes like K, Ki, M, Mi, G or Gi are supported. Default '
                     'is %s' % bytes2human(config.maxCores, symbols='iec'))
    addOptionFn('--maxMemory', dest='maxMemory', default=None, metavar='INT',
                help="The maximum amount of memory to request from the batch system at any one "
                     "time. Standard suffixes like K, Ki, M, Mi, G or Gi are supported. Default "
                     "is %s" % bytes2human( config.maxMemory, symbols='iec'))
    addOptionFn('--maxDisk', dest='maxDisk', default=None, metavar='INT',
                help='The maximum amount of disk space to request from the batch system at any '
                     'one time. Standard suffixes like K, Ki, M, Mi, G or Gi are supported. '
                     'Default is %s' % bytes2human(config.maxDisk, symbols='iec'))

    #
    #Retrying/rescuing jobs
    #
    addOptionFn = addGroupFn("toil options for rescuing/killing/restarting jobs", \
            "The options for jobs that either run too long/fail or get lost \
            (some batch systems have issues!)")
    addOptionFn("--retryCount", dest="retryCount", default=None,
                      help=("Number of times to retry a failing job before giving up and "
                            "labeling job failed. default=%s" % config.retryCount))
    addOptionFn("--maxJobDuration", dest="maxJobDuration", default=None,
                      help=("Maximum runtime of a job (in seconds) before we kill it "
                            "(this is a lower bound, and the actual time before killing "
                            "the job may be longer). default=%s" % config.maxJobDuration))
    addOptionFn("--rescueJobsFrequency", dest="rescueJobsFrequency", default=None,
                      help=("Period of time to wait (in seconds) between checking for "
                            "missing/overlong jobs, that is jobs which get lost by the batch system. Expert parameter. default=%s" % config.rescueJobsFrequency))

    #
    #Misc options
    #
    addOptionFn = addGroupFn("toil miscellaneous options", "Miscellaneous options")
    addOptionFn("--maxLogFileSize", dest="maxLogFileSize", default=None,
                      help=("The maximum size of a job log file to keep (in bytes), log files larger "
                            "than this will be truncated to the last X bytes. Default is 50 "
                            "kilobytes, default=%s" % config.maxLogFileSize))
    addOptionFn("--realTimeLogging", dest="realTimeLogging", action="store_true", default=False,
                help="Enable real-time logging from workers to masters")

    addOptionFn("--sseKey", dest="sseKey", default=None,
            help="Path to file containing 32 character key to be used for server-side encryption on awsJobStore. SSE will "
                 "not be used if this flag is not passed.")
    addOptionFn("--cseKey", dest="cseKey", default=None,
                help="Path to file containing 256-bit key to be used for client-side encryption on "
                "azureJobStore. By default, no encryption is used.")
    addOptionFn("--setEnv", '-e', metavar='NAME=VALUE or NAME',
                dest="environment", default=[], action="append",
                help="Set an environment variable early on in the worker. If VALUE is omitted, "
                     "it will be looked up in the current environment. Independently of this "
                     "option, the worker will try to emulate the leader's environment before "
                     "running a job. Using this option, a variable can be injected into the "
                     "worker process itself before it is started.")
    addOptionFn("--servicePollingInterval", dest="servicePollingInterval", default=None,
                help="Interval of time service jobs wait between polling for the existence"
                " of the keep-alive flag (defailt=%s)" % config.servicePollingInterval)
    #
    #Debug options
    #
    addOptionFn = addGroupFn("toil debug options", "Debug options")
    addOptionFn("--badWorker", dest="badWorker", default=None,
                      help=("For testing purposes randomly kill 'badWorker' proportion of jobs using SIGKILL, default=%s" % config.badWorker))
    addOptionFn("--badWorkerFailInterval", dest="badWorkerFailInterval", default=None,
                      help=("When killing the job pick uniformly within the interval from 0.0 to "
                            "'badWorkerFailInterval' seconds after the worker starts, default=%s" % config.badWorkerFailInterval))
Exemplo n.º 6
0
def _addOptions(addGroupFn, config):
    #
    #Core options
    #
    addOptionFn = addGroupFn(
        "toil core options", "Options to specify the \
    location of the toil and turn on stats collation about the performance of jobs."
    )
    #TODO - specify how this works when path is AWS
    addOptionFn(
        'jobStore',
        type=str,
        help=("Store in which to place job management files \
                      and the global accessed temporary files"
              "(If this is a file path this needs to be globally accessible "
              "by all machines running jobs).\n"
              "If the store already exists and restart is false an"
              " ExistingJobStoreException exception will be thrown."))
    addOptionFn(
        "--workDir",
        dest="workDir",
        default=None,
        help=
        "Absolute path to directory where temporary files generated during the Toil run should be placed. "
        "Default is determined by environmental variables (TMPDIR, TEMP, TMP) via mkdtemp"
    )
    addOptionFn(
        "--stats",
        dest="stats",
        action="store_true",
        default=None,
        help=
        "Records statistics about the toil workflow to be used by 'toil stats'."
    )
    addOptionFn(
        "--clean",
        dest="clean",
        choices=['always', 'onError', 'never', 'onSuccess'],
        default=None,
        help=
        ("Determines the deletion of the jobStore upon completion of the program. "
         "Choices: 'always', 'onError','never', 'onSuccess'. The --stats option requires "
         "information from the jobStore upon completion so the jobStore will never be deleted with"
         "that flag. If you wish to be able to restart the run, choose \'never\' or \'onSuccess\'. "
         "Default is \'never\' if stats is enabled, and \'onSuccess\' otherwise"
         ))

    #
    #Restarting the workflow options
    #
    addOptionFn = addGroupFn(
        "toil options for restarting an existing workflow",
        "Allows the restart of an existing workflow")
    addOptionFn(
        "--restart",
        dest="restart",
        default=None,
        action="store_true",
        help=
        "If --restart is specified then will attempt to restart existing workflow "
        "at the location pointed to by the --jobStore option. Will raise an exception if the workflow does not exist"
    )

    #
    #Batch system options
    #
    addOptionFn = addGroupFn(
        "toil options for specifying the batch system",
        "Allows the specification of the batch system, and arguments to the batch system/big batch system (see below)."
    )
    addOptionFn(
        "--batchSystem",
        dest="batchSystem",
        default=None,
        help=
        ("The type of batch system to run the job(s) with, currently can be one "
         "of singleMachine, parasol, gridEngine, lsf or mesos'. default=%s" %
         config.batchSystem))
    addOptionFn(
        "--scale",
        dest="scale",
        default=None,
        help=
        ("A scaling factor to change the value of all submitted tasks's submitted cores. "
         "Used in singleMachine batch system. default=%s" % config.scale))
    addOptionFn(
        "--mesosMaster",
        dest="mesosMasterAddress",
        default=None,
        help=
        ("The host and port of the Mesos master separated by colon. default=%s"
         % config.mesosMasterAddress))
    addOptionFn(
        "--parasolCommand",
        dest="parasolCommand",
        default=None,
        help=
        "The name or path of the parasol program. Will be looked up on PATH "
        "unless it starts with a slashdefault=%s" % config.parasolCommand)
    addOptionFn(
        "--parasolMaxBatches",
        dest="parasolMaxBatches",
        default=None,
        help=
        "Maximum number of job batches the Parasol batch is allowed to create. One "
        "batch is created for jobs with a a unique set of resource requirements. "
        "default=%i" % config.parasolMaxBatches)

    #
    #Resource requirements
    #
    addOptionFn = addGroupFn(
        "toil options for cores/memory requirements",
        "The options to specify default cores/memory requirements (if not "
        "specified by the jobs themselves), and to limit the total amount of "
        "memory/cores requested from the batch system.")
    addOptionFn(
        '--defaultMemory',
        dest='defaultMemory',
        default=None,
        metavar='INT',
        help=
        'The default amount of memory to request for a job. Only applicable to jobs '
        'that do not specify an explicit value for this requirement. Standard '
        'suffixes like K, Ki, M, Mi, G or Gi are supported. Default is %s' %
        bytes2human(config.defaultMemory, symbols='iec'))
    addOptionFn(
        '--defaultCores',
        dest='defaultCores',
        default=None,
        metavar='FLOAT',
        help=
        'The default number of CPU cores to dedicate a job. Only applicable to jobs '
        'that do not specify an explicit value for this requirement. Fractions of a '
        'core (for example 0.1) are supported on some batch systems, namely Mesos '
        'and singleMachine. Default is %.1f ' % config.defaultCores)
    addOptionFn(
        '--defaultDisk',
        dest='defaultDisk',
        default=None,
        metavar='INT',
        help=
        'The default amount of disk space to dedicate a job. Only applicable to jobs '
        'that do not specify an explicit value for this requirement. Standard '
        'suffixes like K, Ki, M, Mi, G or Gi are supported. Default is %s' %
        bytes2human(config.defaultDisk, symbols='iec'))
    addOptionFn(
        '--defaultCache',
        dest='defaultCache',
        default=None,
        metavar='INT',
        help=
        'The default amount of disk space to use for caching files shared between '
        'jobs. Only applicable to jobs that do not specify an explicit value for '
        'this requirement. Standard suffixes like K, Ki, M, Mi, G or Gi are '
        'supported. Default is %s' %
        bytes2human(config.defaultCache, symbols='iec'))
    addOptionFn(
        '--maxCores',
        dest='maxCores',
        default=None,
        metavar='INT',
        help=
        'The maximum number of CPU cores to request from the batch system at any one '
        'time. Standard suffixes like K, Ki, M, Mi, G or Gi are supported. Default '
        'is %s' % bytes2human(config.maxCores, symbols='iec'))
    addOptionFn(
        '--maxMemory',
        dest='maxMemory',
        default=None,
        metavar='INT',
        help=
        "The maximum amount of memory to request from the batch system at any one "
        "time. Standard suffixes like K, Ki, M, Mi, G or Gi are supported. Default "
        "is %s" % bytes2human(config.maxMemory, symbols='iec'))
    addOptionFn(
        '--maxDisk',
        dest='maxDisk',
        default=None,
        metavar='INT',
        help=
        'The maximum amount of disk space to request from the batch system at any '
        'one time. Standard suffixes like K, Ki, M, Mi, G or Gi are supported. '
        'Default is %s' % bytes2human(config.maxDisk, symbols='iec'))

    #
    #Retrying/rescuing jobs
    #
    addOptionFn = addGroupFn("toil options for rescuing/killing/restarting jobs", \
            "The options for jobs that either run too long/fail or get lost \
            (some batch systems have issues!)"                                              )
    addOptionFn(
        "--retryCount",
        dest="retryCount",
        default=None,
        help=("Number of times to retry a failing job before giving up and "
              "labeling job failed. default=%s" % config.retryCount))
    addOptionFn(
        "--maxJobDuration",
        dest="maxJobDuration",
        default=None,
        help=("Maximum runtime of a job (in seconds) before we kill it "
              "(this is a lower bound, and the actual time before killing "
              "the job may be longer). default=%s" % config.maxJobDuration))
    addOptionFn(
        "--rescueJobsFrequency",
        dest="rescueJobsFrequency",
        default=None,
        help=
        ("Period of time to wait (in seconds) between checking for "
         "missing/overlong jobs, that is jobs which get lost by the batch system. Expert parameter. default=%s"
         % config.rescueJobsFrequency))

    #
    #Misc options
    #
    addOptionFn = addGroupFn("toil miscellaneous options",
                             "Miscellaneous options")
    addOptionFn(
        "--maxLogFileSize",
        dest="maxLogFileSize",
        default=None,
        help=
        ("The maximum size of a job log file to keep (in bytes), log files larger "
         "than this will be truncated to the last X bytes. Default is 50 "
         "kilobytes, default=%s" % config.maxLogFileSize))
    addOptionFn("--realTimeLogging",
                dest="realTimeLogging",
                action="store_true",
                default=False,
                help="Enable real-time logging from workers to masters")

    addOptionFn(
        "--sseKey",
        dest="sseKey",
        default=None,
        help=
        "Path to file containing 32 character key to be used for server-side encryption on awsJobStore. SSE will "
        "not be used if this flag is not passed.")
    addOptionFn(
        "--cseKey",
        dest="cseKey",
        default=None,
        help=
        "Path to file containing 256-bit key to be used for client-side encryption on "
        "azureJobStore. By default, no encryption is used.")
    addOptionFn(
        "--setEnv",
        '-e',
        metavar='NAME=VALUE or NAME',
        dest="environment",
        default=[],
        action="append",
        help=
        "Set an environment variable early on in the worker. If VALUE is omitted, "
        "it will be looked up in the current environment. Independently of this "
        "option, the worker will try to emulate the leader's environment before "
        "running a job. Using this option, a variable can be injected into the "
        "worker process itself before it is started.")

    #
    #Debug options
    #
    addOptionFn = addGroupFn("toil debug options", "Debug options")
    addOptionFn(
        "--badWorker",
        dest="badWorker",
        default=None,
        help=
        ("For testing purposes randomly kill 'badWorker' proportion of jobs using SIGKILL, default=%s"
         % config.badWorker))
    addOptionFn(
        "--badWorkerFailInterval",
        dest="badWorkerFailInterval",
        default=None,
        help=
        ("When killing the job pick uniformly within the interval from 0.0 to "
         "'badWorkerFailInterval' seconds after the worker starts, default=%s"
         % config.badWorkerFailInterval))