Пример #1
0
def main():
    parser = parser_with_common_options()
    options = parser.parse_args()
    set_logging_from_options(options)
    config = Config()
    config.setOptions(options)
    config.jobStore = config.jobStore[5:] if config.jobStore.startswith('file:') else config.jobStore

    # ':' means an aws/google jobstore; use the old (broken?) method
    if ':' in config.jobStore:
        jobStore = Toil.resumeJobStore(config.jobStore)
        logger.info("Starting routine to kill running jobs in the toil workflow: %s", config.jobStore)
        # TODO: This behaviour is now broken: https://github.com/DataBiosphere/toil/commit/a3d65fc8925712221e4cda116d1825d4a1e963a1
        batchSystem = Toil.createBatchSystem(jobStore.config)  # Should automatically kill existing jobs, so we're good.
        for jobID in batchSystem.getIssuedBatchJobIDs():  # Just in case we do it again.
            batchSystem.killBatchJobs(jobID)
        logger.info("All jobs SHOULD have been killed")
    # otherwise, kill the pid recorded in the jobstore
    else:
        pid_log = os.path.join(os.path.abspath(config.jobStore), 'pid.log')
        with open(pid_log, 'r') as f:
            pid2kill = f.read().strip()
        try:
            os.kill(int(pid2kill), signal.SIGKILL)
            logger.info("Toil process %s successfully terminated." % str(pid2kill))
        except OSError:
            logger.error("Toil process %s could not be terminated." % str(pid2kill))
            raise
Пример #2
0
def main():
    parser = parser_with_common_options(provisioner_options=True, jobstore_option=False)
    options = parser.parse_args()
    set_logging_from_options(options)
    cluster = cluster_factory(provisioner=options.provisioner,
                              clusterName=options.clusterName,
                              zone=options.zone)
    cluster.destroyCluster()
Пример #3
0
def main() -> None:
    parser = parser_with_common_options(provisioner_options=True, jobstore_option=False)
    options = parser.parse_args()
    set_logging_from_options(options)

    logger.info('Destroying cluster %s', options.clusterName)

    cluster = cluster_factory(provisioner=options.provisioner,
                              clusterName=options.clusterName,
                              zone=options.zone)
    cluster.destroyCluster()

    logger.info('Cluster %s is now gone.', options.clusterName)
Пример #4
0
def main() -> None:
    parser = parser_with_common_options(jobstore_option=True)

    options = parser.parse_args()
    set_logging_from_options(options)
    try:
        jobstore = Toil.getJobStore(options.jobStore)
        jobstore.resume()
        jobstore.destroy()
        logger.info(f"Successfully deleted the job store: {options.jobStore}")
    except NoSuchJobStoreException:
        logger.info(f"Failed to delete the job store: {options.jobStore} is non-existent.")
    except:
        logger.info(f"Failed to delete the job store: {options.jobStore}")
        raise
Пример #5
0
def main() -> None:
    parser = parser_with_common_options(provisioner_options=True,
                                        jobstore_option=False)
    parser.add_argument("--insecure",
                        action='store_true',
                        help="Temporarily disable strict host key checking.")
    parser.add_argument("--sshOption",
                        dest='sshOptions',
                        default=[],
                        action='append',
                        help="Pass an additional option to the SSH command.")
    parser.add_argument(
        "--grafana_port",
        dest='grafana_port',
        default=3000,
        help="Assign a local port to be used for the Grafana dashboard.")
    parser.add_argument('args', nargs=argparse.REMAINDER)
    options = parser.parse_args()
    set_logging_from_options(options)

    # Since we collect all the remaining arguments at the end for a command to
    # run, it's easy to lose options.
    if len(options.args) > 0 and options.args[0].startswith('-'):
        logger.warning(
            'Argument \'%s\' interpreted as a command to run '
            'despite looking like an option.', options.args[0])

    cluster = cluster_factory(provisioner=options.provisioner,
                              clusterName=options.clusterName,
                              zone=options.zone)
    command = options.args if options.args else ['bash']
    sshOptions: List[str] = options.sshOptions

    # Forward ports:
    # 3000 for Grafana dashboard
    # 9090 for Prometheus dashboard
    sshOptions.extend([
        '-L', f'{options.grafana_port}:localhost:3000', '-L',
        '9090:localhost:9090'
    ])

    cluster.getLeader().sshAppliance(*command,
                                     strict=not options.insecure,
                                     tty=sys.stdin.isatty(),
                                     sshOptions=sshOptions)
Пример #6
0
def main() -> None:
    parser = parser_with_common_options(jobstore_option=True)
    parser.add_argument("--localFilePath",
                        nargs=1,
                        help="Location to which to copy job store files.")
    parser.add_argument("--fetch",
                        nargs="+",
                        help="List of job-store files to be copied locally."
                        "Use either explicit names (i.e. 'data.txt'), or "
                        "specify glob patterns (i.e. '*.txt')")
    parser.add_argument(
        "--listFilesInJobStore",
        help="Prints a list of the current files in the jobStore.")
    parser.add_argument(
        "--fetchEntireJobStore",
        help="Copy all job store files into a local directory.")
    parser.add_argument(
        "--useSymlinks",
        help="Creates symlink 'shortcuts' of files in the localFilePath"
        " instead of hardlinking or copying, where possible.  If this is"
        " not possible, it will copy the files (shutil.copyfile()).")

    # Load the jobStore
    options = parser.parse_args()
    set_logging_from_options(options)
    config = Config()
    config.setOptions(options)
    jobStore = Toil.resumeJobStore(config.jobStore)
    logger.debug("Connected to job store: %s", config.jobStore)

    if options.fetch:
        # Copy only the listed files locally
        logger.debug("Fetching local files: %s", options.fetch)
        fetchJobStoreFiles(jobStore=jobStore, options=options)

    elif options.fetchEntireJobStore:
        # Copy all jobStore files locally
        logger.debug("Fetching all local files.")
        options.fetch = "*"
        fetchJobStoreFiles(jobStore=jobStore, options=options)

    if options.listFilesInJobStore:
        # Log filenames and create a file containing these names in cwd
        printContentsOfJobStore(jobStorePath=options.jobStore)
Пример #7
0
def main() -> None:
    """Reports stats on the workflow, use with --stats option to toil."""
    parser = parser_with_common_options()
    add_stats_options(parser)
    options = parser.parse_args()

    for c in options.categories.split(","):
        if c.strip() not in category_choices:
            raise ValueError(f'{c} not in {category_choices}!')
    options.categories = [
        x.strip().lower() for x in options.categories.split(",")
    ]

    set_logging_from_options(options)
    config = Config()
    config.setOptions(options)
    jobStore = Toil.resumeJobStore(config.jobStore)
    stats = getStats(jobStore)
    collatedStatsTag = processData(jobStore.config, stats)
    reportData(collatedStatsTag, options)
Пример #8
0
def main():
    parser = parser_with_common_options(provisioner_options=True,
                                        jobstore_option=False)
    parser.add_argument("--insecure",
                        action='store_true',
                        help="Temporarily disable strict host key checking.")
    parser.add_argument("--sshOption",
                        dest='sshOptions',
                        default=[],
                        action='append',
                        help="Pass an additional option to the SSH command.")
    parser.add_argument('args', nargs=argparse.REMAINDER)
    options = parser.parse_args()
    set_logging_from_options(options)
    cluster = cluster_factory(provisioner=options.provisioner,
                              clusterName=options.clusterName,
                              zone=options.zone)
    command = options.args if options.args else ['bash']
    cluster.getLeader().sshAppliance(*command,
                                     strict=not options.insecure,
                                     tty=sys.stdin.isatty(),
                                     sshOptions=options.sshOptions)
Пример #9
0
def main():
    parser = parser_with_common_options(provisioner_options=True, jobstore_option=False)
    parser.add_argument("--insecure", action='store_true',
                        help="Temporarily disable strict host key checking.")
    parser.add_argument("--sshOption", dest='sshOptions', default=[], action='append',
                        help="Pass an additional option to the SSH command.")
    parser.add_argument('args', nargs=argparse.REMAINDER)
    options = parser.parse_args()
    set_logging_from_options(options)

    # Since we collect all the remaining arguments at the end for a command to
    # run, it's easy to lose options.
    if len(options.args) > 0 and options.args[0].startswith('-'):
        logger.warning('Argument \'%s\' interpreted as a command to run '
                       'despite looking like an option.', options.args[0])

    cluster = cluster_factory(provisioner=options.provisioner,
                              clusterName=options.clusterName,
                              zone=options.zone)
    command = options.args if options.args else ['bash']
    cluster.getLeader().sshAppliance(*command, strict=not options.insecure, tty=sys.stdin.isatty(),
                                     sshOptions=options.sshOptions)
Пример #10
0
def main() -> None:
    parser = parser_with_common_options()
    options = parser.parse_args()
    set_logging_from_options(options)
    config = Config()
    config.setOptions(options)

    job_store_type, _ = Toil.parseLocator(config.jobStore)

    if job_store_type != 'file':
        # Remote (aws/google) jobstore; use the old (broken?) method
        job_store = Toil.resumeJobStore(config.jobStore)
        logger.info("Starting routine to kill running jobs in the toil workflow: %s", config.jobStore)
        # TODO: This behaviour is now broken: https://github.com/DataBiosphere/toil/commit/a3d65fc8925712221e4cda116d1825d4a1e963a1
        # There's no guarantee that the batch system in use can enumerate
        # running jobs belonging to the job store we've attached to. And
        # moreover we don't even bother trying to kill the leader at its
        # recorded PID, even if it is a local process.
        batch_system = Toil.createBatchSystem(job_store.config)  # Should automatically kill existing jobs, so we're good.
        for job_id in batch_system.getIssuedBatchJobIDs():  # Just in case we do it again.
            batch_system.killBatchJobs([job_id])
        logger.info("All jobs SHOULD have been killed")
    else:
        # otherwise, kill the pid recorded in the jobstore.
        # TODO: We assume thnis is a local PID.
        job_store = Toil.resumeJobStore(config.jobStore)
        assert isinstance(job_store, FileJobStore), "Need a FileJobStore which has a sharedFilesDir"
        pid_log = os.path.join(job_store.sharedFilesDir, 'pid.log')
        with open(pid_log) as f:
            pid_to_kill = f.read().strip()
        try:
            os.kill(int(pid_to_kill), signal.SIGTERM)
            logger.info("Toil process %s successfully terminated." % str(pid_to_kill))
        except OSError:
            logger.error("Toil process %s could not be terminated." % str(pid_to_kill))
            raise
Пример #11
0
def main() -> None:
    parser = parser_with_common_options(provisioner_options=True,
                                        jobstore_option=False)
    parser.add_argument("--insecure",
                        dest='insecure',
                        action='store_true',
                        required=False,
                        help="Temporarily disable strict host key checking.")
    parser.add_argument(
        "args",
        nargs=argparse.REMAINDER,
        help="Arguments to pass to"
        "`rsync`. Takes any arguments that rsync accepts. Specify the"
        " remote with a colon. For example, to upload `example.py`,"
        " specify `toil rsync-cluster -p aws test-cluster example.py :`."
        "\nOr, to download a file from the remote:, `toil rsync-cluster"
        " -p aws test-cluster :example.py .`")
    options = parser.parse_args()
    set_logging_from_options(options)
    cluster = cluster_factory(provisioner=options.provisioner,
                              clusterName=options.clusterName,
                              zone=options.zone)
    cluster.getLeader().coreRsync(args=options.args,
                                  strict=not options.insecure)
Пример #12
0
def main():
    parser = parser_with_common_options(provisioner_options=True, jobstore_option=False)
    parser.add_argument("-T", "--clusterType", dest="clusterType",
                        choices=['mesos', 'kubernetes'], default='mesos',
                        help="Cluster scheduler to use.")
    parser.add_argument("--leaderNodeType", dest="leaderNodeType", required=True,
                        help="Non-preemptable node type to use for the cluster leader.")
    parser.add_argument("--keyPairName", dest='keyPairName',
                        help="On AWS, the name of the AWS key pair to include on the instance."
                        " On Google/GCE, this is the ssh key pair.")
    parser.add_argument("--owner", dest='owner',
                        help="The owner tag for all instances. If not given, the value in"
                        " --keyPairName will be used if given.")
    parser.add_argument("--boto", dest='botoPath',
                        help="The path to the boto credentials directory. This is transferred "
                        "to all nodes in order to access the AWS jobStore from non-AWS instances.")
    parser.add_argument("-t", "--tag", metavar='NAME=VALUE', dest='tags',
                        default=[], action='append',
                        help="Tags are added to the AWS cluster for this node and all of its "
                             "children. Tags are of the form:\n"
                             " -t key1=value1 --tag key2=value2\n"
                             "Multiple tags are allowed and each tag needs its own flag. By "
                             "default the cluster is tagged with "
                             " {\n"
                             "      \"Name\": clusterName,\n"
                             "      \"Owner\": IAM username\n"
                             " }. ")
    parser.add_argument("--vpcSubnet",
                        help="VPC subnet ID to launch cluster in. Uses default subnet if not "
                        "specified. This subnet needs to have auto assign IPs turned on.")
    parser.add_argument("--nodeTypes", dest='nodeTypes', default=None, type=str,
                        help="Specifies a list of comma-separated node types, each of which is "
                             "composed of slash-separated instance types, and an optional spot "
                             "bid set off by a colon, making the node type preemptable. Instance "
                             "types may appear in multiple node types, and the same node type "
                             "may appear as both preemptable and non-preemptable.\n"
                             "Valid argument specifying two node types:\n"
                             "\tc5.4xlarge/c5a.4xlarge:0.42,t2.large\n"
                             "Node types:\n"
                             "\tc5.4xlarge/c5a.4xlarge:0.42 and t2.large\n"
                             "Instance types:\n"
                             "\tc5.4xlarge, c5a.4xlarge, and t2.large\n"
                             "Semantics:\n"
                             "\tBid $0.42/hour for either c5.4xlarge or c5a.4xlarge instances,\n"
                             "\ttreated interchangeably, while they are available at that price,\n"
                             "\tand buy t2.large instances at full price\n"
                             "Must also provide the --workers argument to specify how many "
                             "workers of each node type to create.")
    parser.add_argument("-w", "--workers", dest='workers', default=None, type=str,
                        help="Comma-separated list of the ranges of numbers of workers of each "
                             "node type to launch, such as '0-2,5,1-3'. If a range is given, "
                             "workers will automatically be launched and terminated by the cluster "
                             "to auto-scale to the workload.")
    parser.add_argument("--leaderStorage", dest='leaderStorage', type=int, default=50,
                        help="Specify the size (in gigabytes) of the root volume for the leader "
                             "instance. This is an EBS volume.")
    parser.add_argument("--nodeStorage", dest='nodeStorage', type=int, default=50,
                        help="Specify the size (in gigabytes) of the root volume for any worker "
                             "instances created when using the -w flag. This is an EBS volume.")
    parser.add_argument('--forceDockerAppliance', dest='forceDockerAppliance', action='store_true',
                        default=False,
                        help="Disables sanity checking the existence of the docker image specified "
                             "by TOIL_APPLIANCE_SELF, which Toil uses to provision mesos for "
                             "autoscaling.")
    parser.add_argument('--awsEc2ProfileArn', dest='awsEc2ProfileArn', default=None, type=str,
                        help="If provided, the specified ARN is used as the instance profile for EC2 instances."
                             "Useful for setting custom IAM profiles. If not specified, a new IAM role is created "
                             "by default with sufficient access to perform basic cluster operations.")
    parser.add_argument('--awsEc2ExtraSecurityGroupId', dest='awsEc2ExtraSecurityGroupIds', default=[], action='append',
                        help="Any additional security groups to attach to EC2 instances. Note that a security group "
                             "with its name equal to the cluster name will always be created, thus ensure that "
                             "the extra security groups do not have the same name as the cluster name.")
    options = parser.parse_args()
    set_logging_from_options(options)
    tags = create_tags_dict(options.tags) if options.tags else dict()

    # Get worker node types
    worker_node_types = parse_node_types(options.nodeTypes)
    check_valid_node_types(options.provisioner, worker_node_types + [({options.leaderNodeType}, None)])

    # Holds string ranges, like "5", or "3-10"
    worker_node_ranges = options.workers.split(',') if options.workers else []

    # checks the validity of TOIL_APPLIANCE_SELF before proceeding
    applianceSelf(forceDockerAppliance=options.forceDockerAppliance)

    # This holds either ints to launch static nodes, or tuples of ints
    # specifying ranges to launch managed auto-scaling nodes, for each type.
    nodeCounts = []

    if ((worker_node_types != [] or worker_node_ranges != []) and not
        (worker_node_types != [] and worker_node_ranges != [])):
        raise RuntimeError("The --nodeTypes option requires --workers, and visa versa.")
    if worker_node_types and worker_node_ranges:
            if not len(worker_node_types) == len(worker_node_ranges):
                raise RuntimeError("List of worker count ranges must be the same length as the list of node types.")

            for spec in worker_node_ranges:
                if '-' in spec:
                    # Provision via autoscaling
                    parts = spec.split('-')
                    if len(parts) != 2:
                        raise RuntimeError("Unacceptable range: " + spec)
                    nodeCounts.append((int(parts[0]), int(parts[1])))
                else:
                    # Provision fixed nodes
                    nodeCounts.append(int(spec))

    owner = options.owner or options.keyPairName or 'toil'

    # Check to see if the user specified a zone. If not, see if one is stored in an environment variable.
    options.zone = options.zone or os.environ.get(f'TOIL_{options.provisioner.upper()}_ZONE')

    if not options.zone:
        raise RuntimeError(f'Please provide a value for --zone or set a default in the '
                           f'TOIL_{options.provisioner.upper()}_ZONE environment variable.')

    logger.info('Creating cluster %s...', options.clusterName)

    cluster = cluster_factory(provisioner=options.provisioner,
                              clusterName=options.clusterName,
                              clusterType=options.clusterType,
                              zone=options.zone,
                              nodeStorage=options.nodeStorage)

    cluster.launchCluster(leaderNodeType=options.leaderNodeType,
                          leaderStorage=options.leaderStorage,
                          owner=owner,
                          keyName=options.keyPairName,
                          botoPath=options.botoPath,
                          userTags=tags,
                          vpcSubnet=options.vpcSubnet,
                          awsEc2ProfileArn=options.awsEc2ProfileArn,
                          awsEc2ExtraSecurityGroupIds=options.awsEc2ExtraSecurityGroupIds)

    for typeNum, spec in enumerate(nodeCounts):
        # For each batch of workers to make
        wanted = worker_node_types[typeNum]

        if isinstance(spec, int):
            # Make static nodes

            if spec == 0:
                # Don't make anything
                continue

            if wanted[1] is None:
                # Make non-spot instances
                cluster.addNodes(nodeTypes=wanted[0], numNodes=spec, preemptable=False)
            else:
                # We have a spot bid
                cluster.addNodes(nodeTypes=wanted[0], numNodes=spec, preemptable=True,
                                 spotBid=wanted[1])

        elif isinstance(spec, tuple):
            # Make a range of auto-scaling nodes

            max_count, min_count = spec

            if max_count < min_count:
                # Flip them around
                min_count, max_count = max_count, min_count

            if max_count == 0:
                # Don't want any
                continue

            if wanted[1] is None:
                # Make non-spot instances
                cluster.addManagedNodes(nodeTypes=wanted[0], minNodes=min_count, maxNodes=max_count,
                                        preemptable=False)
            else:
                # Bid at the given price.
                cluster.addManagedNodes(nodeTypes=wanted[0], minNodes=min_count, maxNodes=max_count,
                                        preemptable=True, spotBid=wanted[1])

    logger.info('Cluster created successfully.')
Пример #13
0
def main() -> None:
    """Reports the state of a Toil workflow."""
    parser = parser_with_common_options()
    parser.add_argument(
        "--failIfNotComplete",
        action="store_true",
        help=
        "Return exit value of 1 if toil jobs not all completed. default=%(default)s",
        default=False)

    parser.add_argument(
        "--noAggStats",
        dest="stats",
        action="store_false",
        help="Do not print overall, aggregate status of workflow.",
        default=True)

    parser.add_argument(
        "--printDot",
        action="store_true",
        help=
        "Print dot formatted description of the graph. If using --jobs will "
        "restrict to subgraph including only those jobs. default=%(default)s",
        default=False)

    parser.add_argument(
        "--jobs",
        nargs='+',
        help=
        "Restrict reporting to the following jobs (allows subsetting of the report).",
        default=None)

    parser.add_argument("--printPerJobStats",
                        action="store_true",
                        help="Print info about each job. default=%(default)s",
                        default=False)

    parser.add_argument(
        "--printLogs",
        action="store_true",
        help="Print the log files of jobs (if they exist). default=%(default)s",
        default=False)

    parser.add_argument("--printChildren",
                        action="store_true",
                        help="Print children of each job. default=%(default)s",
                        default=False)

    options = parser.parse_args()
    set_logging_from_options(options)

    if len(sys.argv) == 1:
        parser.print_help()
        sys.exit(0)

    config = Config()
    config.setOptions(options)

    try:
        status = ToilStatus(config.jobStore, options.jobs)
    except NoSuchJobStoreException:
        print('No job store found.')
        return
    except JobException:  # Workflow likely complete, user informed in ToilStatus()
        return

    jobStats = status.report_on_jobs()

    # Info to be reported.
    hasChildren = jobStats['hasChildren']
    readyToRun = jobStats['readyToRun']
    zombies = jobStats['zombies']
    hasServices = jobStats['hasServices']
    services = jobStats['services']
    hasLogFile = jobStats['hasLogFile']
    properties = jobStats['properties']
    childNumber = jobStats['childNumber']

    if options.printPerJobStats:
        status.printAggregateJobStats(properties, childNumber)
    if options.printLogs:
        status.printJobLog()
    if options.printChildren:
        status.printJobChildren()
    if options.printDot:
        status.print_dot_chart()
    if options.stats:
        print('Of the %i jobs considered, '
              'there are %i jobs with children, '
              '%i jobs ready to run, '
              '%i zombie jobs, '
              '%i jobs with services, '
              '%i services, '
              'and %i jobs with log files currently in %s.' %
              (len(status.jobsToReport), len(hasChildren), len(readyToRun),
               len(zombies), len(hasServices), len(services), len(hasLogFile),
               status.jobStore))

    if len(status.jobsToReport) > 0 and options.failIfNotComplete:
        # Upon workflow completion, all jobs will have been removed from job store
        exit(1)
Пример #14
0
def setLoggingFromOptions(options):
    logger.warning(
        'Deprecated toil method.  Please use "toil.statsAndLogging.set_logging_from_options()" instead."'
    )
    set_logging_from_options(options)
Пример #15
0
def main():
    parser = parser_with_common_options(provisioner_options=True,
                                        jobstore_option=False)
    parser.add_argument(
        "--leaderNodeType",
        dest="leaderNodeType",
        required=True,
        help="Non-preemptable node type to use for the cluster leader.")
    parser.add_argument(
        "--keyPairName",
        dest='keyPairName',
        help="On AWS, the name of the AWS key pair to include on the instance."
        " On Google/GCE, this is the ssh key pair.")
    parser.add_argument(
        "--owner",
        dest='owner',
        help="The owner tag for all instances. If not given, the value in"
        " --keyPairName will be used if given.")
    parser.add_argument(
        "--boto",
        dest='botoPath',
        help="The path to the boto credentials directory. This is transferred "
        "to all nodes in order to access the AWS jobStore from non-AWS instances."
    )
    parser.add_argument(
        "-t",
        "--tag",
        metavar='NAME=VALUE',
        dest='tags',
        default=[],
        action='append',
        help="Tags are added to the AWS cluster for this node and all of its "
        "children. Tags are of the form:\n"
        " -t key1=value1 --tag key2=value2\n"
        "Multiple tags are allowed and each tag needs its own flag. By "
        "default the cluster is tagged with "
        " {\n"
        "      \"Name\": clusterName,\n"
        "      \"Owner\": IAM username\n"
        " }. ")
    parser.add_argument(
        "--vpcSubnet",
        help="VPC subnet ID to launch cluster in. Uses default subnet if not "
        "specified. This subnet needs to have auto assign IPs turned on.")
    parser.add_argument(
        "--nodeTypes",
        dest='nodeTypes',
        default=None,
        type=str,
        help="Comma-separated list of node types to create while launching the "
        "leader. The syntax for each node type depends on the provisioner "
        "used. For the aws provisioner this is the name of an EC2 instance "
        "type followed by a colon and the price in dollar to bid for a spot "
        "instance, for example 'c3.8xlarge:0.42'. Must also provide the "
        "--workers argument to specify how many workers of each node type "
        "to create.")
    parser.add_argument(
        "-w",
        "--workers",
        dest='workers',
        default=None,
        type=str,
        help=
        "Comma-separated list of the number of workers of each node type to "
        "launch alongside the leader when the cluster is created. This can be "
        "useful if running toil without auto-scaling but with need of more "
        "hardware support")
    parser.add_argument(
        "--leaderStorage",
        dest='leaderStorage',
        type=int,
        default=50,
        help="Specify the size (in gigabytes) of the root volume for the leader "
        "instance.  This is an EBS volume.")
    parser.add_argument(
        "--nodeStorage",
        dest='nodeStorage',
        type=int,
        default=50,
        help="Specify the size (in gigabytes) of the root volume for any worker "
        "instances created when using the -w flag. This is an EBS volume.")
    parser.add_argument(
        '--forceDockerAppliance',
        dest='forceDockerAppliance',
        action='store_true',
        default=False,
        help=
        "Disables sanity checking the existence of the docker image specified "
        "by TOIL_APPLIANCE_SELF, which Toil uses to provision mesos for "
        "autoscaling.")
    parser.add_argument(
        '--awsEc2ProfileArn',
        dest='awsEc2ProfileArn',
        default=None,
        type=str,
        help=
        "If provided, the specified ARN is used as the instance profile for EC2 instances."
        "Useful for setting custom IAM profiles. If not specified, a new IAM role is created "
        "by default with sufficient access to perform basic cluster operations."
    )
    parser.add_argument(
        '--awsEc2ExtraSecurityGroupId',
        dest='awsEc2ExtraSecurityGroupIds',
        default=[],
        action='append',
        help=
        "Any additional security groups to attach to EC2 instances. Note that a security group "
        "with its name equal to the cluster name will always be created, thus ensure that "
        "the extra security groups do not have the same name as the cluster name."
    )
    options = parser.parse_args()
    set_logging_from_options(options)
    tags = create_tags_dict(options.tags) if options.tags else dict()

    worker_node_types = options.nodeTypes.split(
        ',') if options.nodeTypes else []
    worker_quantities = options.workers.split(',') if options.workers else []
    check_valid_node_types(options.provisioner,
                           worker_node_types + [options.leaderNodeType])

    # checks the validity of TOIL_APPLIANCE_SELF before proceeding
    applianceSelf(forceDockerAppliance=options.forceDockerAppliance)

    owner = options.owner or options.keyPairName or 'toil'

    # Check to see if the user specified a zone. If not, see if one is stored in an environment variable.
    options.zone = options.zone or os.environ.get(
        f'TOIL_{options.provisioner.upper()}_ZONE')

    if not options.zone:
        raise RuntimeError(
            f'Please provide a value for --zone or set a default in the '
            f'TOIL_{options.provisioner.upper()}_ZONE environment variable.')

    if (options.nodeTypes or
            options.workers) and not (options.nodeTypes and options.workers):
        raise RuntimeError(
            "The --nodeTypes and --workers options must be specified together."
        )

    if not len(worker_node_types) == len(worker_quantities):
        raise RuntimeError(
            "List of node types must be the same length as the list of workers."
        )

    cluster = cluster_factory(provisioner=options.provisioner,
                              clusterName=options.clusterName,
                              zone=options.zone,
                              nodeStorage=options.nodeStorage)

    cluster.launchCluster(
        leaderNodeType=options.leaderNodeType,
        leaderStorage=options.leaderStorage,
        owner=owner,
        keyName=options.keyPairName,
        botoPath=options.botoPath,
        userTags=tags,
        vpcSubnet=options.vpcSubnet,
        awsEc2ProfileArn=options.awsEc2ProfileArn,
        awsEc2ExtraSecurityGroupIds=options.awsEc2ExtraSecurityGroupIds)

    for worker_node_type, num_workers in zip(worker_node_types,
                                             worker_quantities):
        if ':' in worker_node_type:
            worker_node_type, bid = worker_node_type.split(':', 1)
            cluster.addNodes(nodeType=worker_node_type,
                             numNodes=int(num_workers),
                             preemptable=True,
                             spotBid=float(bid))
        else:
            cluster.addNodes(nodeType=worker_node_type,
                             numNodes=int(num_workers),
                             preemptable=False)