def issueJob(self, jobNode): """ Add a job to the queue of jobs """ jobNode.command = ' '.join((resolveEntryPoint('_toil_worker'), self.jobStoreLocator, jobNode.jobStoreID)) jobBatchSystemID = self.batchSystem.issueBatchJob(jobNode) self.jobBatchSystemIDToIssuedJob[jobBatchSystemID] = jobNode if jobNode.preemptable: # len(jobBatchSystemIDToIssuedJob) should always be greater than or equal to preemptableJobsIssued, # so increment this value after the job is added to the issuedJob dict self.preemptableJobsIssued += 1 cur_logger = (logger.debug if jobNode.jobName.startswith(self.debugJobNames) else logger.info) cur_logger("Issued job %s with job batch system ID: " "%s and cores: %s, disk: %s, and memory: %s", jobNode, str(jobBatchSystemID), int(jobNode.cores), bytes2human(jobNode.disk), bytes2human(jobNode.memory))
def issueJob(self, jobNode): """ Add a job to the queue of jobs """ jobNode.command = ' '.join((resolveEntryPoint('_toil_worker'), self.jobStoreLocator, jobNode.jobStoreID)) jobBatchSystemID = self.batchSystem.issueBatchJob(jobNode) self.jobBatchSystemIDToIssuedJob[jobBatchSystemID] = jobNode if jobNode.preemptable: # len(jobBatchSystemIDToIssuedJob) should always be greater than or equal to preemptableJobsIssued, # so increment this value after the job is added to the issuedJob dict self.preemptableJobsIssued += 1 cur_logger = (logger.debug if jobNode.jobName.startswith(CWL_INTERNAL_JOBS) else logger.info) cur_logger("Issued job %s with job batch system ID: " "%s and cores: %s, disk: %s, and memory: %s", jobNode, str(jobBatchSystemID), int(jobNode.cores), bytes2human(jobNode.disk), bytes2human(jobNode.memory))
def _addOptions(addGroupFn, config): # #Core options # addOptionFn = addGroupFn("toil core options", "Options to specify the \ location of the toil workflow and turn on stats collation about the performance of jobs.") addOptionFn('jobStore', type=str, help=("Store in which to place job management files and the global accessed " "temporary files. Job store locator strings should be formatted as follows\n" "aws:<AWS region>:<name prefix>\n" "azure:<account>:<name prefix>'\n" "google:<project id>:<name prefix>\n" "file:<file path>\n" "Note that for backwards compatibility ./foo is equivalent to file:/foo and " "/bar is equivalent to file:/bar.\n" "(If this is a file path this needs to be globally accessible by all machines" " running jobs).\n" "If the store already exists and restart is false a JobStoreCreationException" " exception will be thrown.")) addOptionFn("--workDir", dest="workDir", default=None, help="Absolute path to directory where temporary files generated during the Toil " "run should be placed. Temp files and folders will be placed in a directory " "toil-<workflowID> within workDir (The workflowID is generated by Toil and " "will be reported in the workflow logs. Default is determined by the " "user-defined environmental variable TOIL_TEMPDIR, or the environment " "variables (TMPDIR, TEMP, TMP) via mkdtemp. This directory needs to exist on " "all machines running jobs.") addOptionFn("--stats", dest="stats", action="store_true", default=None, help="Records statistics about the toil workflow to be used by 'toil stats'.") addOptionFn("--clean", dest="clean", choices=['always', 'onError','never', 'onSuccess'], default=None, help=("Determines the deletion of the jobStore upon completion of the program. " "Choices: 'always', 'onError','never', 'onSuccess'. The --stats option requires " "information from the jobStore upon completion so the jobStore will never be deleted with" "that flag. If you wish to be able to restart the run, choose \'never\' or \'onSuccess\'. " "Default is \'never\' if stats is enabled, and \'onSuccess\' otherwise")) addOptionFn("--cleanWorkDir", dest="cleanWorkDir", choices=['always', 'never', 'onSuccess', 'onError'], default='always', help=("Determines deletion of temporary worker directory upon completion of a job. Choices: 'always', " "'never', 'onSuccess'. Default = always. WARNING: This option should be changed for debugging " "only. Running a full pipeline with this option could fill your disk with intermediate data.")) # #Restarting the workflow options # addOptionFn = addGroupFn("toil options for restarting an existing workflow", "Allows the restart of an existing workflow") addOptionFn("--restart", dest="restart", default=None, action="store_true", help="If --restart is specified then will attempt to restart existing workflow " "at the location pointed to by the --jobStore option. Will raise an exception if the workflow does not exist") # #Batch system options # addOptionFn = addGroupFn("toil options for specifying the batch system", "Allows the specification of the batch system, and arguments to the batch system/big batch system (see below).") addOptionFn("--batchSystem", dest="batchSystem", default=None, help=("The type of batch system to run the job(s) with, currently can be one " "of singleMachine, parasol, gridEngine, lsf or mesos'. default=%s" % config.batchSystem)) addOptionFn("--scale", dest="scale", default=None, help=("A scaling factor to change the value of all submitted tasks's submitted cores. " "Used in singleMachine batch system. default=%s" % config.scale)) addOptionFn("--mesosMaster", dest="mesosMasterAddress", default=None, help=("The host and port of the Mesos master separated by colon. default=%s" % config.mesosMasterAddress)) addOptionFn("--parasolCommand", dest="parasolCommand", default=None, help="The name or path of the parasol program. Will be looked up on PATH " "unless it starts with a slashdefault=%s" % config.parasolCommand) addOptionFn("--parasolMaxBatches", dest="parasolMaxBatches", default=None, help="Maximum number of job batches the Parasol batch is allowed to create. One " "batch is created for jobs with a a unique set of resource requirements. " "default=%i" % config.parasolMaxBatches) # #Auto scaling options # addOptionFn = addGroupFn("toil options for autoscaling the cluster of worker nodes", "Allows the specification of the minimum and maximum number of nodes " "in an autoscaled cluster, as well as parameters to control the " "level of provisioning.") addOptionFn("--provisioner", dest="provisioner", choices=['cgcloud'], help="The provisioner for cluster auto-scaling. Currently only the cgcloud " "provisioner exists. The default is %s." % config.provisioner) for preemptable in (False, True): def _addOptionFn(*name, **kwargs): name = list(name) if preemptable: name.insert(-1, 'preemptable' ) name = ''.join((s[0].upper() + s[1:]) if i else s for i, s in enumerate(name)) terms = re.compile(r'\{([^{}]+)\}') _help = kwargs.pop('help') _help = ''.join((term.split('|') * 2)[int(preemptable)] for term in terms.split(_help)) addOptionFn('--' + name, dest=name, help=_help + ' The default is %s.' % getattr(config, name), **kwargs) _addOptionFn('nodeType', metavar='TYPE', help="Node type for {non-|}preemptable nodes. The syntax depends on the " "provisioner used. For the cgcloud provisioner this is the name of an " "EC2 instance type{|, followed by a colon and the price in dollar to " "bid for a spot instance}, for example 'c3.8xlarge{|:0.42}'.") _addOptionFn('nodeOptions', metavar='OPTIONS', help="Provisioning options for the {non-|}preemptable node type. The syntax " "depends on the provisioner used. For the cgcloud provisioner this is a " "space-separated list of options to cgcloud's grow-cluster command (run " "'cgcloud grow-cluster --help' for details.") for p, q in [('min', 'Minimum'), ('max', 'Maximum')]: _addOptionFn(p, 'nodes', default=None, metavar='NUM', help=q + " number of {non-|}preemptable nodes in the cluster, if using " "auto-scaling.") # TODO: DESCRIBE THE FOLLOWING TWO PARAMETERS addOptionFn("--alphaPacking", dest="alphaPacking", default=None, help=(" default=%s" % config.alphaPacking)) addOptionFn("--betaInertia", dest="betaInertia", default=None, help=(" default=%s" % config.betaInertia)) addOptionFn("--scaleInterval", dest="scaleInterval", default=None, help=("The interval (seconds) between assessing if the scale of" " the cluster needs to change. default=%s" % config.scaleInterval)) # #Resource requirements # addOptionFn = addGroupFn("toil options for cores/memory requirements", "The options to specify default cores/memory requirements (if not " "specified by the jobs themselves), and to limit the total amount of " "memory/cores requested from the batch system.") addOptionFn('--defaultMemory', dest='defaultMemory', default=None, metavar='INT', help='The default amount of memory to request for a job. Only applicable to jobs ' 'that do not specify an explicit value for this requirement. Standard ' 'suffixes like K, Ki, M, Mi, G or Gi are supported. Default is %s' % bytes2human( config.defaultMemory, symbols='iec' )) addOptionFn('--defaultCores', dest='defaultCores', default=None, metavar='FLOAT', help='The default number of CPU cores to dedicate a job. Only applicable to jobs ' 'that do not specify an explicit value for this requirement. Fractions of a ' 'core (for example 0.1) are supported on some batch systems, namely Mesos ' 'and singleMachine. Default is %.1f ' % config.defaultCores) addOptionFn('--defaultDisk', dest='defaultDisk', default=None, metavar='INT', help='The default amount of disk space to dedicate a job. Only applicable to jobs ' 'that do not specify an explicit value for this requirement. Standard ' 'suffixes like K, Ki, M, Mi, G or Gi are supported. Default is %s' % bytes2human( config.defaultDisk, symbols='iec' )) addOptionFn("--readGlobalFileMutableByDefault", dest="readGlobalFileMutableByDefault", action='store_true', default=None, help='Toil disallows modification of read ' 'global files by default. This flag makes ' 'it makes read file mutable by default, ' 'however it also defeats the purpose of ' 'shared caching via hard links to save ' 'space. Default is False') addOptionFn('--maxCores', dest='maxCores', default=None, metavar='INT', help='The maximum number of CPU cores to request from the batch system at any one ' 'time. Standard suffixes like K, Ki, M, Mi, G or Gi are supported. Default ' 'is %s' % bytes2human(config.maxCores, symbols='iec')) addOptionFn('--maxMemory', dest='maxMemory', default=None, metavar='INT', help="The maximum amount of memory to request from the batch system at any one " "time. Standard suffixes like K, Ki, M, Mi, G or Gi are supported. Default " "is %s" % bytes2human( config.maxMemory, symbols='iec')) addOptionFn('--maxDisk', dest='maxDisk', default=None, metavar='INT', help='The maximum amount of disk space to request from the batch system at any ' 'one time. Standard suffixes like K, Ki, M, Mi, G or Gi are supported. ' 'Default is %s' % bytes2human(config.maxDisk, symbols='iec')) # #Retrying/rescuing jobs # addOptionFn = addGroupFn("toil options for rescuing/killing/restarting jobs", \ "The options for jobs that either run too long/fail or get lost \ (some batch systems have issues!)") addOptionFn("--retryCount", dest="retryCount", default=None, help=("Number of times to retry a failing job before giving up and " "labeling job failed. default=%s" % config.retryCount)) addOptionFn("--maxJobDuration", dest="maxJobDuration", default=None, help=("Maximum runtime of a job (in seconds) before we kill it " "(this is a lower bound, and the actual time before killing " "the job may be longer). default=%s" % config.maxJobDuration)) addOptionFn("--rescueJobsFrequency", dest="rescueJobsFrequency", default=None, help=("Period of time to wait (in seconds) between checking for " "missing/overlong jobs, that is jobs which get lost by the batch system. Expert parameter. default=%s" % config.rescueJobsFrequency)) # #Misc options # addOptionFn = addGroupFn("toil miscellaneous options", "Miscellaneous options") addOptionFn("--maxLogFileSize", dest="maxLogFileSize", default=None, help=("The maximum size of a job log file to keep (in bytes), log files larger " "than this will be truncated to the last X bytes. Default is 50 " "kilobytes, default=%s" % config.maxLogFileSize)) addOptionFn("--realTimeLogging", dest="realTimeLogging", action="store_true", default=False, help="Enable real-time logging from workers to masters") addOptionFn("--sseKey", dest="sseKey", default=None, help="Path to file containing 32 character key to be used for server-side encryption on awsJobStore. SSE will " "not be used if this flag is not passed.") addOptionFn("--cseKey", dest="cseKey", default=None, help="Path to file containing 256-bit key to be used for client-side encryption on " "azureJobStore. By default, no encryption is used.") addOptionFn("--setEnv", '-e', metavar='NAME=VALUE or NAME', dest="environment", default=[], action="append", help="Set an environment variable early on in the worker. If VALUE is omitted, " "it will be looked up in the current environment. Independently of this " "option, the worker will try to emulate the leader's environment before " "running a job. Using this option, a variable can be injected into the " "worker process itself before it is started.") addOptionFn("--servicePollingInterval", dest="servicePollingInterval", default=None, help="Interval of time service jobs wait between polling for the existence" " of the keep-alive flag (defailt=%s)" % config.servicePollingInterval) # #Debug options # addOptionFn = addGroupFn("toil debug options", "Debug options") addOptionFn("--badWorker", dest="badWorker", default=None, help=("For testing purposes randomly kill 'badWorker' proportion of jobs using SIGKILL, default=%s" % config.badWorker)) addOptionFn("--badWorkerFailInterval", dest="badWorkerFailInterval", default=None, help=("When killing the job pick uniformly within the interval from 0.0 to " "'badWorkerFailInterval' seconds after the worker starts, default=%s" % config.badWorkerFailInterval))
def _addOptions(addGroupFn, config): # #Core options # addOptionFn = addGroupFn( "toil core options", "Options to specify the location of the Toil workflow and turn on " "stats collation about the performance of jobs.") addOptionFn('jobStore', type=str, help="The location of the job store for the workflow. " + jobStoreLocatorHelp) addOptionFn( "--workDir", dest="workDir", default=None, help= "Absolute path to directory where temporary files generated during the Toil " "run should be placed. Temp files and folders will be placed in a directory " "toil-<workflowID> within workDir (The workflowID is generated by Toil and " "will be reported in the workflow logs. Default is determined by the " "user-defined environmental variable TOIL_TEMPDIR, or the environment " "variables (TMPDIR, TEMP, TMP) via mkdtemp. This directory needs to exist on " "all machines running jobs.") addOptionFn( "--stats", dest="stats", action="store_true", default=None, help= "Records statistics about the toil workflow to be used by 'toil stats'." ) addOptionFn( "--clean", dest="clean", choices=['always', 'onError', 'never', 'onSuccess'], default=None, help= ("Determines the deletion of the jobStore upon completion of the program. " "Choices: 'always', 'onError','never', 'onSuccess'. The --stats option requires " "information from the jobStore upon completion so the jobStore will never be deleted with" "that flag. If you wish to be able to restart the run, choose \'never\' or \'onSuccess\'. " "Default is \'never\' if stats is enabled, and \'onSuccess\' otherwise" )) addOptionFn( "--cleanWorkDir", dest="cleanWorkDir", choices=['always', 'never', 'onSuccess', 'onError'], default='always', help= ("Determines deletion of temporary worker directory upon completion of a job. Choices: 'always', " "'never', 'onSuccess'. Default = always. WARNING: This option should be changed for debugging " "only. Running a full pipeline with this option could fill your disk with intermediate data." )) # #Restarting the workflow options # addOptionFn = addGroupFn( "toil options for restarting an existing workflow", "Allows the restart of an existing workflow") addOptionFn( "--restart", dest="restart", default=None, action="store_true", help= "If --restart is specified then will attempt to restart existing workflow " "at the location pointed to by the --jobStore option. Will raise an exception if the workflow does not exist" ) # #Batch system options # addOptionFn = addGroupFn( "toil options for specifying the batch system", "Allows the specification of the batch system, and arguments to the batch system/big batch system (see below)." ) addOptionFn( "--batchSystem", dest="batchSystem", default=None, help= ("The type of batch system to run the job(s) with, currently can be one " "of singleMachine, parasol, gridEngine, lsf or mesos'. default=%s" % config.batchSystem)) addOptionFn( "--scale", dest="scale", default=None, help= ("A scaling factor to change the value of all submitted tasks's submitted cores. " "Used in singleMachine batch system. default=%s" % config.scale)) addOptionFn( "--mesosMaster", dest="mesosMasterAddress", default=None, help= ("The host and port of the Mesos master separated by colon. default=%s" % config.mesosMasterAddress)) addOptionFn( "--parasolCommand", dest="parasolCommand", default=None, help= "The name or path of the parasol program. Will be looked up on PATH " "unless it starts with a slashdefault=%s" % config.parasolCommand) addOptionFn( "--parasolMaxBatches", dest="parasolMaxBatches", default=None, help= "Maximum number of job batches the Parasol batch is allowed to create. One " "batch is created for jobs with a a unique set of resource requirements. " "default=%i" % config.parasolMaxBatches) # #Auto scaling options # addOptionFn = addGroupFn( "toil options for autoscaling the cluster of worker nodes", "Allows the specification of the minimum and maximum number of nodes " "in an autoscaled cluster, as well as parameters to control the " "level of provisioning.") addOptionFn( "--provisioner", dest="provisioner", choices=['cgcloud', 'aws'], help= "The provisioner for cluster auto-scaling. The currently supported choices are" "'cgcloud' or 'aws'. The default is %s." % config.provisioner) for preemptable in (False, True): def _addOptionFn(*name, **kwargs): name = list(name) if preemptable: name.insert(-1, 'preemptable') name = ''.join( (s[0].upper() + s[1:]) if i else s for i, s in enumerate(name)) terms = re.compile(r'\{([^{}]+)\}') _help = kwargs.pop('help') _help = ''.join((term.split('|') * 2)[int(preemptable)] for term in terms.split(_help)) addOptionFn('--' + name, dest=name, help=_help + ' The default is %s.' % getattr(config, name), **kwargs) _addOptionFn( 'nodeType', metavar='TYPE', help= "Node type for {non-|}preemptable nodes. The syntax depends on the " "provisioner used. For the cgcloud provisioner this is the name of an " "EC2 instance type{|, followed by a colon and the price in dollar to " "bid for a spot instance}, for example 'c3.8xlarge{|:0.42}'. The AWS provisioner " "is the name of the EC2 instance type followed by a colon and the price " "in dollars, for example: 'm3.medium:0.10'") _addOptionFn( 'nodeOptions', metavar='OPTIONS', help= "Provisioning options for the {non-|}preemptable node type. The syntax " "depends on the provisioner used. The CGCloud provisioner doesn't " "currently support any node options.") for p, q in [('min', 'Minimum'), ('max', 'Maximum')]: _addOptionFn( p, 'nodes', default=None, metavar='NUM', help=q + " number of {non-|}preemptable nodes in the cluster, if using " "auto-scaling.") # TODO: DESCRIBE THE FOLLOWING TWO PARAMETERS addOptionFn("--alphaPacking", dest="alphaPacking", default=None, help=(" default=%s" % config.alphaPacking)) addOptionFn("--betaInertia", dest="betaInertia", default=None, help=(" default=%s" % config.betaInertia)) addOptionFn( "--scaleInterval", dest="scaleInterval", default=None, help=("The interval (seconds) between assessing if the scale of" " the cluster needs to change. default=%s" % config.scaleInterval)) addOptionFn( "--preemptableCompensation", dest="preemptableCompensation", default=None, help= ("The preference of the autoscaler to replace preemptable nodes with " "non-preemptable nodes, when preemptable nodes cannot be started for some " "reason. Defaults to %s. This value must be between 0.0 and 1.0, inclusive. " "A value of 0.0 disables such compensation, a value of 0.5 compensates two " "missing preemptable nodes with a non-preemptable one. A value of 1.0 " "replaces every missing pre-emptable node with a non-preemptable one." % config.preemptableCompensation)) # #Resource requirements # addOptionFn = addGroupFn( "toil options for cores/memory requirements", "The options to specify default cores/memory requirements (if not " "specified by the jobs themselves), and to limit the total amount of " "memory/cores requested from the batch system.") addOptionFn( '--defaultMemory', dest='defaultMemory', default=None, metavar='INT', help= 'The default amount of memory to request for a job. Only applicable to jobs ' 'that do not specify an explicit value for this requirement. Standard ' 'suffixes like K, Ki, M, Mi, G or Gi are supported. Default is %s' % bytes2human(config.defaultMemory, symbols='iec')) addOptionFn( '--defaultCores', dest='defaultCores', default=None, metavar='FLOAT', help= 'The default number of CPU cores to dedicate a job. Only applicable to jobs ' 'that do not specify an explicit value for this requirement. Fractions of a ' 'core (for example 0.1) are supported on some batch systems, namely Mesos ' 'and singleMachine. Default is %.1f ' % config.defaultCores) addOptionFn( '--defaultDisk', dest='defaultDisk', default=None, metavar='INT', help= 'The default amount of disk space to dedicate a job. Only applicable to jobs ' 'that do not specify an explicit value for this requirement. Standard ' 'suffixes like K, Ki, M, Mi, G or Gi are supported. Default is %s' % bytes2human(config.defaultDisk, symbols='iec')) assert not config.defaultPreemptable, 'User would be unable to reset config.defaultPreemptable' addOptionFn('--defaultPreemptable', dest='defaultPreemptable', action='store_true') addOptionFn("--readGlobalFileMutableByDefault", dest="readGlobalFileMutableByDefault", action='store_true', default=None, help='Toil disallows modification of read ' 'global files by default. This flag makes ' 'it makes read file mutable by default, ' 'however it also defeats the purpose of ' 'shared caching via hard links to save ' 'space. Default is False') addOptionFn( '--maxCores', dest='maxCores', default=None, metavar='INT', help= 'The maximum number of CPU cores to request from the batch system at any one ' 'time. Standard suffixes like K, Ki, M, Mi, G or Gi are supported. Default ' 'is %s' % bytes2human(config.maxCores, symbols='iec')) addOptionFn( '--maxMemory', dest='maxMemory', default=None, metavar='INT', help= "The maximum amount of memory to request from the batch system at any one " "time. Standard suffixes like K, Ki, M, Mi, G or Gi are supported. Default " "is %s" % bytes2human(config.maxMemory, symbols='iec')) addOptionFn( '--maxDisk', dest='maxDisk', default=None, metavar='INT', help= 'The maximum amount of disk space to request from the batch system at any ' 'one time. Standard suffixes like K, Ki, M, Mi, G or Gi are supported. ' 'Default is %s' % bytes2human(config.maxDisk, symbols='iec')) # #Retrying/rescuing jobs # addOptionFn = addGroupFn("toil options for rescuing/killing/restarting jobs", \ "The options for jobs that either run too long/fail or get lost \ (some batch systems have issues!)" ) addOptionFn( "--retryCount", dest="retryCount", default=None, help=("Number of times to retry a failing job before giving up and " "labeling job failed. default=%s" % config.retryCount)) addOptionFn( "--maxJobDuration", dest="maxJobDuration", default=None, help=("Maximum runtime of a job (in seconds) before we kill it " "(this is a lower bound, and the actual time before killing " "the job may be longer). default=%s" % config.maxJobDuration)) addOptionFn( "--rescueJobsFrequency", dest="rescueJobsFrequency", default=None, help= ("Period of time to wait (in seconds) between checking for " "missing/overlong jobs, that is jobs which get lost by the batch system. Expert parameter. default=%s" % config.rescueJobsFrequency)) # #Misc options # addOptionFn = addGroupFn("toil miscellaneous options", "Miscellaneous options") addOptionFn( '--disableCaching', dest='disableCaching', action='store_true', default=False, help='Disables caching in the file store. This flag must be set to use ' 'a batch system that does not support caching such as Grid Engine, Parasol, ' 'LSF, or Slurm') addOptionFn( "--maxLogFileSize", dest="maxLogFileSize", default=None, help= ("The maximum size of a job log file to keep (in bytes), log files larger " "than this will be truncated to the last X bytes. Default is 50 " "kilobytes, default=%s" % config.maxLogFileSize)) addOptionFn("--realTimeLogging", dest="realTimeLogging", action="store_true", default=False, help="Enable real-time logging from workers to masters") addOptionFn( "--sseKey", dest="sseKey", default=None, help= "Path to file containing 32 character key to be used for server-side encryption on awsJobStore. SSE will " "not be used if this flag is not passed.") addOptionFn( "--cseKey", dest="cseKey", default=None, help= "Path to file containing 256-bit key to be used for client-side encryption on " "azureJobStore. By default, no encryption is used.") addOptionFn( "--setEnv", '-e', metavar='NAME=VALUE or NAME', dest="environment", default=[], action="append", help= "Set an environment variable early on in the worker. If VALUE is omitted, " "it will be looked up in the current environment. Independently of this " "option, the worker will try to emulate the leader's environment before " "running a job. Using this option, a variable can be injected into the " "worker process itself before it is started.") addOptionFn( "--servicePollingInterval", dest="servicePollingInterval", default=None, help= "Interval of time service jobs wait between polling for the existence" " of the keep-alive flag (defailt=%s)" % config.servicePollingInterval) # #Debug options # addOptionFn = addGroupFn("toil debug options", "Debug options") addOptionFn( "--badWorker", dest="badWorker", default=None, help= ("For testing purposes randomly kill 'badWorker' proportion of jobs using SIGKILL, default=%s" % config.badWorker)) addOptionFn( "--badWorkerFailInterval", dest="badWorkerFailInterval", default=None, help= ("When killing the job pick uniformly within the interval from 0.0 to " "'badWorkerFailInterval' seconds after the worker starts, default=%s" % config.badWorkerFailInterval))
def _addOptions(addGroupFn, config): # #Core options # addOptionFn = addGroupFn("toil core options", "Options to specify the \ location of the toil and turn on stats collation about the performance of jobs.") #TODO - specify how this works when path is AWS addOptionFn('jobStore', type=str, help=("Store in which to place job management files \ and the global accessed temporary files" "(If this is a file path this needs to be globally accessible " "by all machines running jobs).\n" "If the store already exists and restart is false an" " ExistingJobStoreException exception will be thrown.")) addOptionFn("--workDir", dest="workDir", default=None, help="Absolute path to directory where temporary files generated during the Toil " "run should be placed. Temp files and folders will be placed in a directory " "toil-<workflowID> within workDir (The workflowID is generated by Toil and " "will be reported in the workflow logs. Default is determined by the " "user-defined environmental variable TOIL_TEMPDIR, or the environment " "variables (TMPDIR, TEMP, TMP) via mkdtemp. This directory needs to exist on " "all machines running jobs.") addOptionFn("--stats", dest="stats", action="store_true", default=None, help="Records statistics about the toil workflow to be used by 'toil stats'.") addOptionFn("--clean", dest="clean", choices=['always', 'onError','never', 'onSuccess'], default=None, help=("Determines the deletion of the jobStore upon completion of the program. " "Choices: 'always', 'onError','never', 'onSuccess'. The --stats option requires " "information from the jobStore upon completion so the jobStore will never be deleted with" "that flag. If you wish to be able to restart the run, choose \'never\' or \'onSuccess\'. " "Default is \'never\' if stats is enabled, and \'onSuccess\' otherwise")) addOptionFn("--cleanWorkDir", dest="cleanWorkDir", choices=['always', 'never', 'onSuccess', 'onError'], default='always', help=("Determines deletion of temporary worker directory upon completion of a job. Choices: 'always', " "'never', 'onSuccess'. Default = always. WARNING: This option should be changed for debugging " "only. Running a full pipeline with this option could fill your disk with intermediate data.")) # #Restarting the workflow options # addOptionFn = addGroupFn("toil options for restarting an existing workflow", "Allows the restart of an existing workflow") addOptionFn("--restart", dest="restart", default=None, action="store_true", help="If --restart is specified then will attempt to restart existing workflow " "at the location pointed to by the --jobStore option. Will raise an exception if the workflow does not exist") # #Batch system options # addOptionFn = addGroupFn("toil options for specifying the batch system", "Allows the specification of the batch system, and arguments to the batch system/big batch system (see below).") addOptionFn("--batchSystem", dest="batchSystem", default=None, help=("The type of batch system to run the job(s) with, currently can be one " "of singleMachine, parasol, gridEngine, lsf or mesos'. default=%s" % config.batchSystem)) addOptionFn("--scale", dest="scale", default=None, help=("A scaling factor to change the value of all submitted tasks's submitted cores. " "Used in singleMachine batch system. default=%s" % config.scale)) addOptionFn("--mesosMaster", dest="mesosMasterAddress", default=None, help=("The host and port of the Mesos master separated by colon. default=%s" % config.mesosMasterAddress)) addOptionFn("--parasolCommand", dest="parasolCommand", default=None, help="The name or path of the parasol program. Will be looked up on PATH " "unless it starts with a slashdefault=%s" % config.parasolCommand) addOptionFn("--parasolMaxBatches", dest="parasolMaxBatches", default=None, help="Maximum number of job batches the Parasol batch is allowed to create. One " "batch is created for jobs with a a unique set of resource requirements. " "default=%i" % config.parasolMaxBatches) # #Resource requirements # addOptionFn = addGroupFn("toil options for cores/memory requirements", "The options to specify default cores/memory requirements (if not " "specified by the jobs themselves), and to limit the total amount of " "memory/cores requested from the batch system.") addOptionFn('--defaultMemory', dest='defaultMemory', default=None, metavar='INT', help='The default amount of memory to request for a job. Only applicable to jobs ' 'that do not specify an explicit value for this requirement. Standard ' 'suffixes like K, Ki, M, Mi, G or Gi are supported. Default is %s' % bytes2human( config.defaultMemory, symbols='iec' )) addOptionFn('--defaultCores', dest='defaultCores', default=None, metavar='FLOAT', help='The default number of CPU cores to dedicate a job. Only applicable to jobs ' 'that do not specify an explicit value for this requirement. Fractions of a ' 'core (for example 0.1) are supported on some batch systems, namely Mesos ' 'and singleMachine. Default is %.1f ' % config.defaultCores) addOptionFn('--defaultDisk', dest='defaultDisk', default=None, metavar='INT', help='The default amount of disk space to dedicate a job. Only applicable to jobs ' 'that do not specify an explicit value for this requirement. Standard ' 'suffixes like K, Ki, M, Mi, G or Gi are supported. Default is %s' % bytes2human( config.defaultDisk, symbols='iec' )) addOptionFn('--defaultCache', dest='defaultCache', default=None, metavar='INT', help='The default amount of disk space to use for caching files shared between ' 'jobs. Only applicable to jobs that do not specify an explicit value for ' 'this requirement. Standard suffixes like K, Ki, M, Mi, G or Gi are ' 'supported. Default is %s' % bytes2human( config.defaultCache, symbols='iec' )) addOptionFn('--maxCores', dest='maxCores', default=None, metavar='INT', help='The maximum number of CPU cores to request from the batch system at any one ' 'time. Standard suffixes like K, Ki, M, Mi, G or Gi are supported. Default ' 'is %s' % bytes2human(config.maxCores, symbols='iec')) addOptionFn('--maxMemory', dest='maxMemory', default=None, metavar='INT', help="The maximum amount of memory to request from the batch system at any one " "time. Standard suffixes like K, Ki, M, Mi, G or Gi are supported. Default " "is %s" % bytes2human( config.maxMemory, symbols='iec')) addOptionFn('--maxDisk', dest='maxDisk', default=None, metavar='INT', help='The maximum amount of disk space to request from the batch system at any ' 'one time. Standard suffixes like K, Ki, M, Mi, G or Gi are supported. ' 'Default is %s' % bytes2human(config.maxDisk, symbols='iec')) # #Retrying/rescuing jobs # addOptionFn = addGroupFn("toil options for rescuing/killing/restarting jobs", \ "The options for jobs that either run too long/fail or get lost \ (some batch systems have issues!)") addOptionFn("--retryCount", dest="retryCount", default=None, help=("Number of times to retry a failing job before giving up and " "labeling job failed. default=%s" % config.retryCount)) addOptionFn("--maxJobDuration", dest="maxJobDuration", default=None, help=("Maximum runtime of a job (in seconds) before we kill it " "(this is a lower bound, and the actual time before killing " "the job may be longer). default=%s" % config.maxJobDuration)) addOptionFn("--rescueJobsFrequency", dest="rescueJobsFrequency", default=None, help=("Period of time to wait (in seconds) between checking for " "missing/overlong jobs, that is jobs which get lost by the batch system. Expert parameter. default=%s" % config.rescueJobsFrequency)) # #Misc options # addOptionFn = addGroupFn("toil miscellaneous options", "Miscellaneous options") addOptionFn("--maxLogFileSize", dest="maxLogFileSize", default=None, help=("The maximum size of a job log file to keep (in bytes), log files larger " "than this will be truncated to the last X bytes. Default is 50 " "kilobytes, default=%s" % config.maxLogFileSize)) addOptionFn("--realTimeLogging", dest="realTimeLogging", action="store_true", default=False, help="Enable real-time logging from workers to masters") addOptionFn("--sseKey", dest="sseKey", default=None, help="Path to file containing 32 character key to be used for server-side encryption on awsJobStore. SSE will " "not be used if this flag is not passed.") addOptionFn("--cseKey", dest="cseKey", default=None, help="Path to file containing 256-bit key to be used for client-side encryption on " "azureJobStore. By default, no encryption is used.") addOptionFn("--setEnv", '-e', metavar='NAME=VALUE or NAME', dest="environment", default=[], action="append", help="Set an environment variable early on in the worker. If VALUE is omitted, " "it will be looked up in the current environment. Independently of this " "option, the worker will try to emulate the leader's environment before " "running a job. Using this option, a variable can be injected into the " "worker process itself before it is started.") addOptionFn("--servicePollingInterval", dest="servicePollingInterval", default=None, help="Interval of time service jobs wait between polling for the existence" " of the keep-alive flag (defailt=%s)" % config.servicePollingInterval) # #Debug options # addOptionFn = addGroupFn("toil debug options", "Debug options") addOptionFn("--badWorker", dest="badWorker", default=None, help=("For testing purposes randomly kill 'badWorker' proportion of jobs using SIGKILL, default=%s" % config.badWorker)) addOptionFn("--badWorkerFailInterval", dest="badWorkerFailInterval", default=None, help=("When killing the job pick uniformly within the interval from 0.0 to " "'badWorkerFailInterval' seconds after the worker starts, default=%s" % config.badWorkerFailInterval))
def _addOptions(addGroupFn, config): # #Core options # addOptionFn = addGroupFn( "toil core options", "Options to specify the \ location of the toil and turn on stats collation about the performance of jobs." ) #TODO - specify how this works when path is AWS addOptionFn( 'jobStore', type=str, help=("Store in which to place job management files \ and the global accessed temporary files" "(If this is a file path this needs to be globally accessible " "by all machines running jobs).\n" "If the store already exists and restart is false an" " ExistingJobStoreException exception will be thrown.")) addOptionFn( "--workDir", dest="workDir", default=None, help= "Absolute path to directory where temporary files generated during the Toil run should be placed. " "Default is determined by environmental variables (TMPDIR, TEMP, TMP) via mkdtemp" ) addOptionFn( "--stats", dest="stats", action="store_true", default=None, help= "Records statistics about the toil workflow to be used by 'toil stats'." ) addOptionFn( "--clean", dest="clean", choices=['always', 'onError', 'never', 'onSuccess'], default=None, help= ("Determines the deletion of the jobStore upon completion of the program. " "Choices: 'always', 'onError','never', 'onSuccess'. The --stats option requires " "information from the jobStore upon completion so the jobStore will never be deleted with" "that flag. If you wish to be able to restart the run, choose \'never\' or \'onSuccess\'. " "Default is \'never\' if stats is enabled, and \'onSuccess\' otherwise" )) # #Restarting the workflow options # addOptionFn = addGroupFn( "toil options for restarting an existing workflow", "Allows the restart of an existing workflow") addOptionFn( "--restart", dest="restart", default=None, action="store_true", help= "If --restart is specified then will attempt to restart existing workflow " "at the location pointed to by the --jobStore option. Will raise an exception if the workflow does not exist" ) # #Batch system options # addOptionFn = addGroupFn( "toil options for specifying the batch system", "Allows the specification of the batch system, and arguments to the batch system/big batch system (see below)." ) addOptionFn( "--batchSystem", dest="batchSystem", default=None, help= ("The type of batch system to run the job(s) with, currently can be one " "of singleMachine, parasol, gridEngine, lsf or mesos'. default=%s" % config.batchSystem)) addOptionFn( "--scale", dest="scale", default=None, help= ("A scaling factor to change the value of all submitted tasks's submitted cores. " "Used in singleMachine batch system. default=%s" % config.scale)) addOptionFn( "--mesosMaster", dest="mesosMasterAddress", default=None, help= ("The host and port of the Mesos master separated by colon. default=%s" % config.mesosMasterAddress)) addOptionFn( "--parasolCommand", dest="parasolCommand", default=None, help= "The name or path of the parasol program. Will be looked up on PATH " "unless it starts with a slashdefault=%s" % config.parasolCommand) addOptionFn( "--parasolMaxBatches", dest="parasolMaxBatches", default=None, help= "Maximum number of job batches the Parasol batch is allowed to create. One " "batch is created for jobs with a a unique set of resource requirements. " "default=%i" % config.parasolMaxBatches) # #Resource requirements # addOptionFn = addGroupFn( "toil options for cores/memory requirements", "The options to specify default cores/memory requirements (if not " "specified by the jobs themselves), and to limit the total amount of " "memory/cores requested from the batch system.") addOptionFn( '--defaultMemory', dest='defaultMemory', default=None, metavar='INT', help= 'The default amount of memory to request for a job. Only applicable to jobs ' 'that do not specify an explicit value for this requirement. Standard ' 'suffixes like K, Ki, M, Mi, G or Gi are supported. Default is %s' % bytes2human(config.defaultMemory, symbols='iec')) addOptionFn( '--defaultCores', dest='defaultCores', default=None, metavar='FLOAT', help= 'The default number of CPU cores to dedicate a job. Only applicable to jobs ' 'that do not specify an explicit value for this requirement. Fractions of a ' 'core (for example 0.1) are supported on some batch systems, namely Mesos ' 'and singleMachine. Default is %.1f ' % config.defaultCores) addOptionFn( '--defaultDisk', dest='defaultDisk', default=None, metavar='INT', help= 'The default amount of disk space to dedicate a job. Only applicable to jobs ' 'that do not specify an explicit value for this requirement. Standard ' 'suffixes like K, Ki, M, Mi, G or Gi are supported. Default is %s' % bytes2human(config.defaultDisk, symbols='iec')) addOptionFn( '--defaultCache', dest='defaultCache', default=None, metavar='INT', help= 'The default amount of disk space to use for caching files shared between ' 'jobs. Only applicable to jobs that do not specify an explicit value for ' 'this requirement. Standard suffixes like K, Ki, M, Mi, G or Gi are ' 'supported. Default is %s' % bytes2human(config.defaultCache, symbols='iec')) addOptionFn( '--maxCores', dest='maxCores', default=None, metavar='INT', help= 'The maximum number of CPU cores to request from the batch system at any one ' 'time. Standard suffixes like K, Ki, M, Mi, G or Gi are supported. Default ' 'is %s' % bytes2human(config.maxCores, symbols='iec')) addOptionFn( '--maxMemory', dest='maxMemory', default=None, metavar='INT', help= "The maximum amount of memory to request from the batch system at any one " "time. Standard suffixes like K, Ki, M, Mi, G or Gi are supported. Default " "is %s" % bytes2human(config.maxMemory, symbols='iec')) addOptionFn( '--maxDisk', dest='maxDisk', default=None, metavar='INT', help= 'The maximum amount of disk space to request from the batch system at any ' 'one time. Standard suffixes like K, Ki, M, Mi, G or Gi are supported. ' 'Default is %s' % bytes2human(config.maxDisk, symbols='iec')) # #Retrying/rescuing jobs # addOptionFn = addGroupFn("toil options for rescuing/killing/restarting jobs", \ "The options for jobs that either run too long/fail or get lost \ (some batch systems have issues!)" ) addOptionFn( "--retryCount", dest="retryCount", default=None, help=("Number of times to retry a failing job before giving up and " "labeling job failed. default=%s" % config.retryCount)) addOptionFn( "--maxJobDuration", dest="maxJobDuration", default=None, help=("Maximum runtime of a job (in seconds) before we kill it " "(this is a lower bound, and the actual time before killing " "the job may be longer). default=%s" % config.maxJobDuration)) addOptionFn( "--rescueJobsFrequency", dest="rescueJobsFrequency", default=None, help= ("Period of time to wait (in seconds) between checking for " "missing/overlong jobs, that is jobs which get lost by the batch system. Expert parameter. default=%s" % config.rescueJobsFrequency)) # #Misc options # addOptionFn = addGroupFn("toil miscellaneous options", "Miscellaneous options") addOptionFn( "--maxLogFileSize", dest="maxLogFileSize", default=None, help= ("The maximum size of a job log file to keep (in bytes), log files larger " "than this will be truncated to the last X bytes. Default is 50 " "kilobytes, default=%s" % config.maxLogFileSize)) addOptionFn("--realTimeLogging", dest="realTimeLogging", action="store_true", default=False, help="Enable real-time logging from workers to masters") addOptionFn( "--sseKey", dest="sseKey", default=None, help= "Path to file containing 32 character key to be used for server-side encryption on awsJobStore. SSE will " "not be used if this flag is not passed.") addOptionFn( "--cseKey", dest="cseKey", default=None, help= "Path to file containing 256-bit key to be used for client-side encryption on " "azureJobStore. By default, no encryption is used.") addOptionFn( "--setEnv", '-e', metavar='NAME=VALUE or NAME', dest="environment", default=[], action="append", help= "Set an environment variable early on in the worker. If VALUE is omitted, " "it will be looked up in the current environment. Independently of this " "option, the worker will try to emulate the leader's environment before " "running a job. Using this option, a variable can be injected into the " "worker process itself before it is started.") # #Debug options # addOptionFn = addGroupFn("toil debug options", "Debug options") addOptionFn( "--badWorker", dest="badWorker", default=None, help= ("For testing purposes randomly kill 'badWorker' proportion of jobs using SIGKILL, default=%s" % config.badWorker)) addOptionFn( "--badWorkerFailInterval", dest="badWorkerFailInterval", default=None, help= ("When killing the job pick uniformly within the interval from 0.0 to " "'badWorkerFailInterval' seconds after the worker starts, default=%s" % config.badWorkerFailInterval))