Пример #1
0
    def _getScript(self):
        def userScript():
            from toil.common import Toil
            from toil.job import Job

            # Because this is the only job in the pipeline and because it is preemptable,
            # there will be no non-preemptable jobs. The non-preemptable scaler will therefore
            # not request any nodes initially. And since we made it impossible for the
            # preemptable scaler to allocate any nodes (using an abnormally low spot bid),
            # we will observe a deficit of preemptable nodes that the non-preemptable scaler will
            # compensate for by spinning up non-preemptable nodes instead.
            #
            def job(job, disk='10M', cores=1, memory='10M', preemptable=True):
                pass

            if __name__ == '__main__':
                options = Job.Runner.getDefaultArgumentParser().parse_args()
                with Toil(options) as toil:
                    if toil.config.restart:
                        toil.restart()
                    else:
                        toil.start(Job.wrapJobFn(job))

        script = dedent('\n'.join(getsource(userScript).split('\n')[1:]))
        # use appliance ssh method instead of sshutil so we can specify input param
        cluster = cluster_factory(provisioner='aws',
                                  clusterName=self.clusterName)
        leader = cluster.getLeader()
        leader.sshAppliance('tee', '/home/userScript.py', input=script)
Пример #2
0
    def launchCluster(self):
        from boto.ec2.blockdevicemapping import BlockDeviceType

        from toil.lib.ec2 import wait_instances_running
        self.createClusterUtil(args=[
            '--leaderStorage',
            str(self.requestedLeaderStorage), '--nodeTypes', ",".join(
                self.instanceTypes), '-w', ",".join(self.numWorkers),
            '--nodeStorage',
            str(self.requestedLeaderStorage)
        ])

        self.cluster = cluster_factory(provisioner='aws',
                                       zone=self.zone,
                                       clusterName=self.clusterName)
        nodes = self.cluster._getNodesInCluster(both=True)
        nodes.sort(key=lambda x: x.launch_time)
        # assuming that leader is first
        workers = nodes[1:]
        # test that two worker nodes were created
        self.assertEqual(2, len(workers))
        # test that workers have expected storage size
        # just use the first worker
        worker = workers[0]
        worker = next(wait_instances_running(self.cluster._boto2.ec2,
                                             [worker]))
        rootBlockDevice = worker.block_device_mapping["/dev/xvda"]
        self.assertTrue(isinstance(rootBlockDevice, BlockDeviceType))
        rootVolume = self.cluster._boto2.ec2.get_all_volumes(
            volume_ids=[rootBlockDevice.volume_id])[0]
        self.assertGreaterEqual(rootVolume.size, self.requestedNodeStorage)
Пример #3
0
    def _getScript(self):
        def restartScript():
            import argparse
            import os

            from toil.job import Job

            def f0(job):
                if 'FAIL' in os.environ:
                    raise RuntimeError('failed on purpose')

            if __name__ == '__main__':
                parser = argparse.ArgumentParser()
                Job.Runner.addToilOptions(parser)
                options = parser.parse_args()
                rootJob = Job.wrapJobFn(f0,
                                        cores=0.5,
                                        memory='50 M',
                                        disk='50 M')
                Job.Runner.startToil(rootJob, options)

        script = dedent('\n'.join(getsource(restartScript).split('\n')[1:]))
        tempfile_path = '/tmp/temp-or-ary.txt'
        with open(tempfile_path, 'w') as f:
            # use appliance ssh method instead of sshutil so we can specify input param
            f.write(script)
        cluster = cluster_factory(provisioner='aws',
                                  clusterName=self.clusterName)
        leader = cluster.getLeader()
        self.sshUtil([
            'mkdir', '-p', self.scriptDir
        ])  # hot deploy doesn't seem permitted to work in normal /tmp or /home
        leader.injectFile(tempfile_path, self.scriptName, 'toil_leader')
        if os.path.exists(tempfile_path):
            os.remove(tempfile_path)
Пример #4
0
def main():
    parser = parser_with_common_options(provisioner_options=True, jobstore_option=False)
    options = parser.parse_args()
    set_logging_from_options(options)
    cluster = cluster_factory(provisioner=options.provisioner,
                              clusterName=options.clusterName,
                              zone=options.zone)
    cluster.destroyCluster()
Пример #5
0
def main() -> None:
    parser = parser_with_common_options(provisioner_options=True, jobstore_option=False)
    options = parser.parse_args()
    set_logging_from_options(options)

    logger.info('Destroying cluster %s', options.clusterName)

    cluster = cluster_factory(provisioner=options.provisioner,
                              clusterName=options.clusterName,
                              zone=options.zone)
    cluster.destroyCluster()

    logger.info('Cluster %s is now gone.', options.clusterName)
Пример #6
0
    def launchCluster(self):
        from boto.ec2.blockdevicemapping import BlockDeviceType

        from toil.lib.ec2 import wait_instances_running
        self.createClusterUtil(args=[
            '--leaderStorage',
            str(self.requestedLeaderStorage), '--nodeTypes', ",".join(
                self.instanceTypes), '--workers', ",".join(
                    [f'0-{c}' for c in self.numWorkers]), '--nodeStorage',
            str(self.requestedLeaderStorage), '--clusterType', 'kubernetes'
        ])

        self.cluster = cluster_factory(provisioner='aws',
                                       zone=self.zone,
                                       clusterName=self.clusterName)
Пример #7
0
    def putScript(self, content: str):
        """
        Helper method for _getScript to inject a script file at the configured script path, from text.
        """
        cluster = cluster_factory(provisioner='aws',
                                  zone=self.zone,
                                  clusterName=self.clusterName)
        leader = cluster.getLeader()

        self.sshUtil(['mkdir', '-p', self.scriptDir])

        with tempfile.NamedTemporaryFile(mode='w') as t:
            # use appliance ssh method instead of sshutil so we can specify input param
            t.write(content)
            # This works to make writes visible on non-Windows
            t.flush()
            leader.injectFile(t.name, self.script(), 'toil_leader')
Пример #8
0
def main() -> None:
    parser = parser_with_common_options(provisioner_options=True,
                                        jobstore_option=False)
    parser.add_argument("--insecure",
                        action='store_true',
                        help="Temporarily disable strict host key checking.")
    parser.add_argument("--sshOption",
                        dest='sshOptions',
                        default=[],
                        action='append',
                        help="Pass an additional option to the SSH command.")
    parser.add_argument(
        "--grafana_port",
        dest='grafana_port',
        default=3000,
        help="Assign a local port to be used for the Grafana dashboard.")
    parser.add_argument('args', nargs=argparse.REMAINDER)
    options = parser.parse_args()
    set_logging_from_options(options)

    # Since we collect all the remaining arguments at the end for a command to
    # run, it's easy to lose options.
    if len(options.args) > 0 and options.args[0].startswith('-'):
        logger.warning(
            'Argument \'%s\' interpreted as a command to run '
            'despite looking like an option.', options.args[0])

    cluster = cluster_factory(provisioner=options.provisioner,
                              clusterName=options.clusterName,
                              zone=options.zone)
    command = options.args if options.args else ['bash']
    sshOptions: List[str] = options.sshOptions

    # Forward ports:
    # 3000 for Grafana dashboard
    # 9090 for Prometheus dashboard
    sshOptions.extend([
        '-L', f'{options.grafana_port}:localhost:3000', '-L',
        '9090:localhost:9090'
    ])

    cluster.getLeader().sshAppliance(*command,
                                     strict=not options.insecure,
                                     tty=sys.stdin.isatty(),
                                     sshOptions=sshOptions)
Пример #9
0
def main():
    parser = parser_with_common_options(provisioner_options=True,
                                        jobstore_option=False)
    parser.add_argument("--insecure",
                        action='store_true',
                        help="Temporarily disable strict host key checking.")
    parser.add_argument("--sshOption",
                        dest='sshOptions',
                        default=[],
                        action='append',
                        help="Pass an additional option to the SSH command.")
    parser.add_argument('args', nargs=argparse.REMAINDER)
    options = parser.parse_args()
    set_logging_from_options(options)
    cluster = cluster_factory(provisioner=options.provisioner,
                              clusterName=options.clusterName,
                              zone=options.zone)
    command = options.args if options.args else ['bash']
    cluster.getLeader().sshAppliance(*command,
                                     strict=not options.insecure,
                                     tty=sys.stdin.isatty(),
                                     sshOptions=options.sshOptions)
Пример #10
0
def main():
    parser = parser_with_common_options(provisioner_options=True, jobstore_option=False)
    parser.add_argument("--insecure", action='store_true',
                        help="Temporarily disable strict host key checking.")
    parser.add_argument("--sshOption", dest='sshOptions', default=[], action='append',
                        help="Pass an additional option to the SSH command.")
    parser.add_argument('args', nargs=argparse.REMAINDER)
    options = parser.parse_args()
    set_logging_from_options(options)

    # Since we collect all the remaining arguments at the end for a command to
    # run, it's easy to lose options.
    if len(options.args) > 0 and options.args[0].startswith('-'):
        logger.warning('Argument \'%s\' interpreted as a command to run '
                       'despite looking like an option.', options.args[0])

    cluster = cluster_factory(provisioner=options.provisioner,
                              clusterName=options.clusterName,
                              zone=options.zone)
    command = options.args if options.args else ['bash']
    cluster.getLeader().sshAppliance(*command, strict=not options.insecure, tty=sys.stdin.isatty(),
                                     sshOptions=options.sshOptions)
Пример #11
0
def main() -> None:
    parser = parser_with_common_options(provisioner_options=True,
                                        jobstore_option=False)
    parser.add_argument("--insecure",
                        dest='insecure',
                        action='store_true',
                        required=False,
                        help="Temporarily disable strict host key checking.")
    parser.add_argument(
        "args",
        nargs=argparse.REMAINDER,
        help="Arguments to pass to"
        "`rsync`. Takes any arguments that rsync accepts. Specify the"
        " remote with a colon. For example, to upload `example.py`,"
        " specify `toil rsync-cluster -p aws test-cluster example.py :`."
        "\nOr, to download a file from the remote:, `toil rsync-cluster"
        " -p aws test-cluster :example.py .`")
    options = parser.parse_args()
    set_logging_from_options(options)
    cluster = cluster_factory(provisioner=options.provisioner,
                              clusterName=options.clusterName,
                              zone=options.zone)
    cluster.getLeader().coreRsync(args=options.args,
                                  strict=not options.insecure)
Пример #12
0
def main():
    parser = parser_with_common_options(provisioner_options=True, jobstore_option=False)
    parser.add_argument("-T", "--clusterType", dest="clusterType",
                        choices=['mesos', 'kubernetes'], default='mesos',
                        help="Cluster scheduler to use.")
    parser.add_argument("--leaderNodeType", dest="leaderNodeType", required=True,
                        help="Non-preemptable node type to use for the cluster leader.")
    parser.add_argument("--keyPairName", dest='keyPairName',
                        help="On AWS, the name of the AWS key pair to include on the instance."
                        " On Google/GCE, this is the ssh key pair.")
    parser.add_argument("--owner", dest='owner',
                        help="The owner tag for all instances. If not given, the value in"
                        " --keyPairName will be used if given.")
    parser.add_argument("--boto", dest='botoPath',
                        help="The path to the boto credentials directory. This is transferred "
                        "to all nodes in order to access the AWS jobStore from non-AWS instances.")
    parser.add_argument("-t", "--tag", metavar='NAME=VALUE', dest='tags',
                        default=[], action='append',
                        help="Tags are added to the AWS cluster for this node and all of its "
                             "children. Tags are of the form:\n"
                             " -t key1=value1 --tag key2=value2\n"
                             "Multiple tags are allowed and each tag needs its own flag. By "
                             "default the cluster is tagged with "
                             " {\n"
                             "      \"Name\": clusterName,\n"
                             "      \"Owner\": IAM username\n"
                             " }. ")
    parser.add_argument("--vpcSubnet",
                        help="VPC subnet ID to launch cluster in. Uses default subnet if not "
                        "specified. This subnet needs to have auto assign IPs turned on.")
    parser.add_argument("--nodeTypes", dest='nodeTypes', default=None, type=str,
                        help="Specifies a list of comma-separated node types, each of which is "
                             "composed of slash-separated instance types, and an optional spot "
                             "bid set off by a colon, making the node type preemptable. Instance "
                             "types may appear in multiple node types, and the same node type "
                             "may appear as both preemptable and non-preemptable.\n"
                             "Valid argument specifying two node types:\n"
                             "\tc5.4xlarge/c5a.4xlarge:0.42,t2.large\n"
                             "Node types:\n"
                             "\tc5.4xlarge/c5a.4xlarge:0.42 and t2.large\n"
                             "Instance types:\n"
                             "\tc5.4xlarge, c5a.4xlarge, and t2.large\n"
                             "Semantics:\n"
                             "\tBid $0.42/hour for either c5.4xlarge or c5a.4xlarge instances,\n"
                             "\ttreated interchangeably, while they are available at that price,\n"
                             "\tand buy t2.large instances at full price\n"
                             "Must also provide the --workers argument to specify how many "
                             "workers of each node type to create.")
    parser.add_argument("-w", "--workers", dest='workers', default=None, type=str,
                        help="Comma-separated list of the ranges of numbers of workers of each "
                             "node type to launch, such as '0-2,5,1-3'. If a range is given, "
                             "workers will automatically be launched and terminated by the cluster "
                             "to auto-scale to the workload.")
    parser.add_argument("--leaderStorage", dest='leaderStorage', type=int, default=50,
                        help="Specify the size (in gigabytes) of the root volume for the leader "
                             "instance. This is an EBS volume.")
    parser.add_argument("--nodeStorage", dest='nodeStorage', type=int, default=50,
                        help="Specify the size (in gigabytes) of the root volume for any worker "
                             "instances created when using the -w flag. This is an EBS volume.")
    parser.add_argument('--forceDockerAppliance', dest='forceDockerAppliance', action='store_true',
                        default=False,
                        help="Disables sanity checking the existence of the docker image specified "
                             "by TOIL_APPLIANCE_SELF, which Toil uses to provision mesos for "
                             "autoscaling.")
    parser.add_argument('--awsEc2ProfileArn', dest='awsEc2ProfileArn', default=None, type=str,
                        help="If provided, the specified ARN is used as the instance profile for EC2 instances."
                             "Useful for setting custom IAM profiles. If not specified, a new IAM role is created "
                             "by default with sufficient access to perform basic cluster operations.")
    parser.add_argument('--awsEc2ExtraSecurityGroupId', dest='awsEc2ExtraSecurityGroupIds', default=[], action='append',
                        help="Any additional security groups to attach to EC2 instances. Note that a security group "
                             "with its name equal to the cluster name will always be created, thus ensure that "
                             "the extra security groups do not have the same name as the cluster name.")
    options = parser.parse_args()
    set_logging_from_options(options)
    tags = create_tags_dict(options.tags) if options.tags else dict()

    # Get worker node types
    worker_node_types = parse_node_types(options.nodeTypes)
    check_valid_node_types(options.provisioner, worker_node_types + [({options.leaderNodeType}, None)])

    # Holds string ranges, like "5", or "3-10"
    worker_node_ranges = options.workers.split(',') if options.workers else []

    # checks the validity of TOIL_APPLIANCE_SELF before proceeding
    applianceSelf(forceDockerAppliance=options.forceDockerAppliance)

    # This holds either ints to launch static nodes, or tuples of ints
    # specifying ranges to launch managed auto-scaling nodes, for each type.
    nodeCounts = []

    if ((worker_node_types != [] or worker_node_ranges != []) and not
        (worker_node_types != [] and worker_node_ranges != [])):
        raise RuntimeError("The --nodeTypes option requires --workers, and visa versa.")
    if worker_node_types and worker_node_ranges:
            if not len(worker_node_types) == len(worker_node_ranges):
                raise RuntimeError("List of worker count ranges must be the same length as the list of node types.")

            for spec in worker_node_ranges:
                if '-' in spec:
                    # Provision via autoscaling
                    parts = spec.split('-')
                    if len(parts) != 2:
                        raise RuntimeError("Unacceptable range: " + spec)
                    nodeCounts.append((int(parts[0]), int(parts[1])))
                else:
                    # Provision fixed nodes
                    nodeCounts.append(int(spec))

    owner = options.owner or options.keyPairName or 'toil'

    # Check to see if the user specified a zone. If not, see if one is stored in an environment variable.
    options.zone = options.zone or os.environ.get(f'TOIL_{options.provisioner.upper()}_ZONE')

    if not options.zone:
        raise RuntimeError(f'Please provide a value for --zone or set a default in the '
                           f'TOIL_{options.provisioner.upper()}_ZONE environment variable.')

    logger.info('Creating cluster %s...', options.clusterName)

    cluster = cluster_factory(provisioner=options.provisioner,
                              clusterName=options.clusterName,
                              clusterType=options.clusterType,
                              zone=options.zone,
                              nodeStorage=options.nodeStorage)

    cluster.launchCluster(leaderNodeType=options.leaderNodeType,
                          leaderStorage=options.leaderStorage,
                          owner=owner,
                          keyName=options.keyPairName,
                          botoPath=options.botoPath,
                          userTags=tags,
                          vpcSubnet=options.vpcSubnet,
                          awsEc2ProfileArn=options.awsEc2ProfileArn,
                          awsEc2ExtraSecurityGroupIds=options.awsEc2ExtraSecurityGroupIds)

    for typeNum, spec in enumerate(nodeCounts):
        # For each batch of workers to make
        wanted = worker_node_types[typeNum]

        if isinstance(spec, int):
            # Make static nodes

            if spec == 0:
                # Don't make anything
                continue

            if wanted[1] is None:
                # Make non-spot instances
                cluster.addNodes(nodeTypes=wanted[0], numNodes=spec, preemptable=False)
            else:
                # We have a spot bid
                cluster.addNodes(nodeTypes=wanted[0], numNodes=spec, preemptable=True,
                                 spotBid=wanted[1])

        elif isinstance(spec, tuple):
            # Make a range of auto-scaling nodes

            max_count, min_count = spec

            if max_count < min_count:
                # Flip them around
                min_count, max_count = max_count, min_count

            if max_count == 0:
                # Don't want any
                continue

            if wanted[1] is None:
                # Make non-spot instances
                cluster.addManagedNodes(nodeTypes=wanted[0], minNodes=min_count, maxNodes=max_count,
                                        preemptable=False)
            else:
                # Bid at the given price.
                cluster.addManagedNodes(nodeTypes=wanted[0], minNodes=min_count, maxNodes=max_count,
                                        preemptable=True, spotBid=wanted[1])

    logger.info('Cluster created successfully.')
Пример #13
0
    def _test(self, preemptableJobs=False):
        """Does the work of the testing.  Many features' tests are thrown in here in no particular order."""
        self.launchCluster()
        # get the leader so we know the IP address - we don't need to wait since create cluster
        # already insures the leader is running
        self.cluster = cluster_factory(provisioner='aws',
                                       zone=self.zone,
                                       clusterName=self.clusterName)
        self.leader = self.cluster.getLeader()
        self.sshUtil(['mkdir', '-p', self.scriptDir])
        self.sshUtil(['mkdir', '-p', self.dataDir])

        assert len(self.getMatchingRoles()) == 1
        # --never-download prevents silent upgrades to pip, wheel and setuptools
        venv_command = [
            'virtualenv', '--system-site-packages', '--python', exactPython,
            '--never-download', self.venvDir
        ]
        self.sshUtil(venv_command)

        upgrade_command = [
            self.pip(), 'install', 'setuptools==28.7.1', 'pyyaml==3.12'
        ]
        self.sshUtil(upgrade_command)

        log.info('Set up script...')
        self._getScript()

        toilOptions = [
            self.jobStore, '--workDir=/var/lib/toil', '--clean=always',
            '--retryCount=2', '--logDebug',
            '--logFile=' + os.path.join(self.scriptDir, 'sort.log')
        ]

        if preemptableJobs:
            toilOptions.extend(['--defaultPreemptable'])

        log.info('Run script...')
        self._runScript(toilOptions)

        assert len(self.getMatchingRoles()) == 1

        from boto.exception import EC2ResponseError
        volumeID = self.getRootVolID()
        self.cluster.destroyCluster()
        for attempt in range(6):
            # https://github.com/BD2KGenomics/toil/issues/1567
            # retry this for up to 1 minute until the volume disappears
            try:
                self.cluster._boto2.ec2.get_all_volumes(volume_ids=[volumeID])
                time.sleep(10)
            except EC2ResponseError as e:
                if e.status == 400 and 'InvalidVolume.NotFound' in e.code:
                    break
                else:
                    raise
        else:
            self.fail('Volume with ID %s was not cleaned up properly' %
                      volumeID)

        assert len(self.getMatchingRoles()) == 0
Пример #14
0
    def _test(self, preemptableJobs=False):
        """Does the work of the testing.  Many features' tests are thrown in here in no particular order."""
        self.launchCluster()
        # get the leader so we know the IP address - we don't need to wait since create cluster
        # already insures the leader is running
        self.cluster = cluster_factory(provisioner='aws',
                                       clusterName=self.clusterName)
        self.leader = self.cluster.getLeader()
        self.sshUtil([
            'mkdir', '-p', self.scriptDir
        ])  # hot deploy doesn't seem permitted to work in normal /tmp or /home

        assert len(self.getMatchingRoles()) == 1
        # --never-download prevents silent upgrades to pip, wheel and setuptools
        venv_command = [
            'virtualenv', '--system-site-packages', '--python', exactPython,
            '--never-download', '/home/venv'
        ]
        self.sshUtil(venv_command)

        upgrade_command = [
            '/home/venv/bin/pip', 'install', 'setuptools==28.7.1',
            'pyyaml==3.12'
        ]
        self.sshUtil(upgrade_command)

        self._getScript()

        toilOptions = [
            self.jobStore, '--batchSystem=mesos', '--workDir=/var/lib/toil',
            '--clean=always', '--retryCount=2', '--clusterStats=/tmp/t/',
            '--logDebug', '--logFile=/tmp/t/sort.log', '--provisioner=aws'
        ]

        toilOptions.extend([
            '--nodeTypes=' + ",".join(self.instanceTypes),
            '--maxNodes=' + ",".join(self.numWorkers)
        ])
        if preemptableJobs:
            toilOptions.extend(['--defaultPreemptable'])

        self._runScript(toilOptions)

        assert len(self.getMatchingRoles()) == 1

        # check stats
        self.sshUtil([
            '/home/venv/bin/python', '-c', 'import json; import os; '
            'json.load(open("/home/" + [f for f in os.listdir("/tmp/t/") if f.endswith(".json")].pop()))'
        ])

        from boto.exception import EC2ResponseError
        volumeID = self.getRootVolID()
        self.cluster.destroyCluster()
        for attempt in range(6):
            # https://github.com/BD2KGenomics/toil/issues/1567
            # retry this for up to 1 minute until the volume disappears
            try:
                self.cluster._ctx.ec2.get_all_volumes(volume_ids=[volumeID])
                time.sleep(10)
            except EC2ResponseError as e:
                if e.status == 400 and 'InvalidVolume.NotFound' in e.code:
                    break
                else:
                    raise
        else:
            self.fail('Volume with ID %s was not cleaned up properly' %
                      volumeID)

        assert len(self.getMatchingRoles()) == 0
Пример #15
0
def main():
    parser = parser_with_common_options(provisioner_options=True,
                                        jobstore_option=False)
    parser.add_argument(
        "--leaderNodeType",
        dest="leaderNodeType",
        required=True,
        help="Non-preemptable node type to use for the cluster leader.")
    parser.add_argument(
        "--keyPairName",
        dest='keyPairName',
        help="On AWS, the name of the AWS key pair to include on the instance."
        " On Google/GCE, this is the ssh key pair.")
    parser.add_argument(
        "--owner",
        dest='owner',
        help="The owner tag for all instances. If not given, the value in"
        " --keyPairName will be used if given.")
    parser.add_argument(
        "--boto",
        dest='botoPath',
        help="The path to the boto credentials directory. This is transferred "
        "to all nodes in order to access the AWS jobStore from non-AWS instances."
    )
    parser.add_argument(
        "-t",
        "--tag",
        metavar='NAME=VALUE',
        dest='tags',
        default=[],
        action='append',
        help="Tags are added to the AWS cluster for this node and all of its "
        "children. Tags are of the form:\n"
        " -t key1=value1 --tag key2=value2\n"
        "Multiple tags are allowed and each tag needs its own flag. By "
        "default the cluster is tagged with "
        " {\n"
        "      \"Name\": clusterName,\n"
        "      \"Owner\": IAM username\n"
        " }. ")
    parser.add_argument(
        "--vpcSubnet",
        help="VPC subnet ID to launch cluster in. Uses default subnet if not "
        "specified. This subnet needs to have auto assign IPs turned on.")
    parser.add_argument(
        "--nodeTypes",
        dest='nodeTypes',
        default=None,
        type=str,
        help="Comma-separated list of node types to create while launching the "
        "leader. The syntax for each node type depends on the provisioner "
        "used. For the aws provisioner this is the name of an EC2 instance "
        "type followed by a colon and the price in dollar to bid for a spot "
        "instance, for example 'c3.8xlarge:0.42'. Must also provide the "
        "--workers argument to specify how many workers of each node type "
        "to create.")
    parser.add_argument(
        "-w",
        "--workers",
        dest='workers',
        default=None,
        type=str,
        help=
        "Comma-separated list of the number of workers of each node type to "
        "launch alongside the leader when the cluster is created. This can be "
        "useful if running toil without auto-scaling but with need of more "
        "hardware support")
    parser.add_argument(
        "--leaderStorage",
        dest='leaderStorage',
        type=int,
        default=50,
        help="Specify the size (in gigabytes) of the root volume for the leader "
        "instance.  This is an EBS volume.")
    parser.add_argument(
        "--nodeStorage",
        dest='nodeStorage',
        type=int,
        default=50,
        help="Specify the size (in gigabytes) of the root volume for any worker "
        "instances created when using the -w flag. This is an EBS volume.")
    parser.add_argument(
        '--forceDockerAppliance',
        dest='forceDockerAppliance',
        action='store_true',
        default=False,
        help=
        "Disables sanity checking the existence of the docker image specified "
        "by TOIL_APPLIANCE_SELF, which Toil uses to provision mesos for "
        "autoscaling.")
    parser.add_argument(
        '--awsEc2ProfileArn',
        dest='awsEc2ProfileArn',
        default=None,
        type=str,
        help=
        "If provided, the specified ARN is used as the instance profile for EC2 instances."
        "Useful for setting custom IAM profiles. If not specified, a new IAM role is created "
        "by default with sufficient access to perform basic cluster operations."
    )
    parser.add_argument(
        '--awsEc2ExtraSecurityGroupId',
        dest='awsEc2ExtraSecurityGroupIds',
        default=[],
        action='append',
        help=
        "Any additional security groups to attach to EC2 instances. Note that a security group "
        "with its name equal to the cluster name will always be created, thus ensure that "
        "the extra security groups do not have the same name as the cluster name."
    )
    options = parser.parse_args()
    set_logging_from_options(options)
    tags = create_tags_dict(options.tags) if options.tags else dict()

    worker_node_types = options.nodeTypes.split(
        ',') if options.nodeTypes else []
    worker_quantities = options.workers.split(',') if options.workers else []
    check_valid_node_types(options.provisioner,
                           worker_node_types + [options.leaderNodeType])

    # checks the validity of TOIL_APPLIANCE_SELF before proceeding
    applianceSelf(forceDockerAppliance=options.forceDockerAppliance)

    owner = options.owner or options.keyPairName or 'toil'

    # Check to see if the user specified a zone. If not, see if one is stored in an environment variable.
    options.zone = options.zone or os.environ.get(
        f'TOIL_{options.provisioner.upper()}_ZONE')

    if not options.zone:
        raise RuntimeError(
            f'Please provide a value for --zone or set a default in the '
            f'TOIL_{options.provisioner.upper()}_ZONE environment variable.')

    if (options.nodeTypes or
            options.workers) and not (options.nodeTypes and options.workers):
        raise RuntimeError(
            "The --nodeTypes and --workers options must be specified together."
        )

    if not len(worker_node_types) == len(worker_quantities):
        raise RuntimeError(
            "List of node types must be the same length as the list of workers."
        )

    cluster = cluster_factory(provisioner=options.provisioner,
                              clusterName=options.clusterName,
                              zone=options.zone,
                              nodeStorage=options.nodeStorage)

    cluster.launchCluster(
        leaderNodeType=options.leaderNodeType,
        leaderStorage=options.leaderStorage,
        owner=owner,
        keyName=options.keyPairName,
        botoPath=options.botoPath,
        userTags=tags,
        vpcSubnet=options.vpcSubnet,
        awsEc2ProfileArn=options.awsEc2ProfileArn,
        awsEc2ExtraSecurityGroupIds=options.awsEc2ExtraSecurityGroupIds)

    for worker_node_type, num_workers in zip(worker_node_types,
                                             worker_quantities):
        if ':' in worker_node_type:
            worker_node_type, bid = worker_node_type.split(':', 1)
            cluster.addNodes(nodeType=worker_node_type,
                             numNodes=int(num_workers),
                             preemptable=True,
                             spotBid=float(bid))
        else:
            cluster.addNodes(nodeType=worker_node_type,
                             numNodes=int(num_workers),
                             preemptable=False)