示例#1
0
    def _addNodes(self, instances, numNodes, preemptable=False):
        bdm = self._getBlockDeviceMapping(self.instanceType)
        arn = self._getProfileARN(self.ctx)
        workerData = dict(role='worker',
                          image=applianceSelf(),
                          entrypoint='mesos-slave',
                          args=workerArgs.format(ip=self.leaderIP, preemptable=preemptable))
        userData = awsUserData.format(**workerData)
        kwargs = {'key_name': self.keyName, 'security_groups': [self.clusterName],
                  'instance_type': self.instanceType.name,
                  'user_data': userData, 'block_device_map': bdm,
                  'instance_profile_arn': arn}

        instancesLaunched = []

        if not preemptable:
            logger.info('Launching %s non-preemptable nodes', numNodes)
            instancesLaunched = create_ondemand_instances(self.ctx.ec2, image_id=self._discoverAMI(self.ctx),
                                      spec=kwargs, num_instances=1)
        else:
            logger.info('Launching %s preemptable nodes', numNodes)
            # force generator to evaluate
            instancesLaunched = list(create_spot_instances(ec2=self.ctx.ec2,
                                       price=self.spotBid,
                                       image_id=self._discoverAMI(self.ctx),
                                       tags={'clusterName': self.clusterName},
                                       spec=kwargs,
                                       num_instances=numNodes))
        wait_instances_running(self.ctx.ec2, instancesLaunched)
        logger.info('Launched %s new instance(s)', numNodes)
        return len(instancesLaunched)
示例#2
0
    def addNodes(self, nodeType, numNodes, preemptable):
        instanceType = ec2_instance_types[nodeType]
        bdm = self._getBlockDeviceMapping(instanceType, rootVolSize=self.nodeStorage)
        arn = self._getProfileARN(self.ctx)
        keyPath = '' if not self.config or not self.config.sseKey else self.config.sseKey
        entryPoint = 'mesos-slave' if not self.config or not self.config.sseKey else "waitForKey.sh"
        workerData = dict(role='worker',
                          image=applianceSelf(),
                          entrypoint=entryPoint,
                          sshKey=self.masterPublicKey,
                          args=workerArgs.format(ip=self.leaderIP, preemptable=preemptable, keyPath=keyPath))
        userData = awsUserData.format(**workerData)
        sgs = [sg for sg in self.ctx.ec2.get_all_security_groups() if sg.name == self.clusterName]
        kwargs = {'key_name': self.keyName,
                  'security_group_ids': [sg.id for sg in sgs],
                  'instance_type': instanceType.name,
                  'user_data': userData,
                  'block_device_map': bdm,
                  'instance_profile_arn': arn,
                  'placement': getCurrentAWSZone()}
        kwargs["subnet_id"] = self.subnetID if self.subnetID else self._getClusterInstance(self.instanceMetaData).subnet_id

        instancesLaunched = []

        for attempt in retry(predicate=AWSProvisioner._throttlePredicate):
            with attempt:
                # after we start launching instances we want to insure the full setup is done
                # the biggest obstacle is AWS request throttling, so we retry on these errors at
                # every request in this method
                if not preemptable:
                    logger.info('Launching %s non-preemptable nodes', numNodes)
                    instancesLaunched = create_ondemand_instances(self.ctx.ec2, image_id=self._discoverAMI(self.ctx),
                                                                  spec=kwargs, num_instances=numNodes)
                else:
                    logger.info('Launching %s preemptable nodes', numNodes)
                    kwargs['placement'] = getSpotZone(self.spotBids[nodeType], instanceType.name, self.ctx)
                    # force generator to evaluate
                    instancesLaunched = list(create_spot_instances(ec2=self.ctx.ec2,
                                                                   price=self.spotBids[nodeType],
                                                                   image_id=self._discoverAMI(self.ctx),
                                                                   tags={'clusterName': self.clusterName},
                                                                   spec=kwargs,
                                                                   num_instances=numNodes,
                                                                   tentative=True)
                                             )
                    # flatten the list
                    instancesLaunched = [item for sublist in instancesLaunched for item in sublist]

        for attempt in retry(predicate=AWSProvisioner._throttlePredicate):
            with attempt:
                wait_instances_running(self.ctx.ec2, instancesLaunched)

        # request throttling retry happens internally to these two methods to insure proper granularity
        AWSProvisioner._addTags(instancesLaunched, self.tags)
        self._propagateKey(instancesLaunched)

        logger.info('Launched %s new instance(s)', numNodes)
        return len(instancesLaunched)
示例#3
0
    def addNodes(self, numNodes, preemptable):
        instanceType = self._getInstanceType(preemptable)
        bdm = self._getBlockDeviceMapping(instanceType, rootVolSize=self.nodeStorage)
        arn = self._getProfileARN(self.ctx)
        keyPath = '' if not self.config or not self.config.sseKey else self.config.sseKey
        entryPoint = 'mesos-slave' if not self.config or not self.config.sseKey else "waitForKey.sh"
        workerData = dict(role='worker',
                          image=applianceSelf(),
                          entrypoint=entryPoint,
                          sshKey=self.masterPublicKey,
                          args=workerArgs.format(ip=self.leaderIP, preemptable=preemptable, keyPath=keyPath))
        userData = awsUserData.format(**workerData)
        sgs = [sg for sg in self.ctx.ec2.get_all_security_groups() if sg.name == self.clusterName]
        kwargs = {'key_name': self.keyName,
                  'security_group_ids': [sg.id for sg in sgs],
                  'instance_type': instanceType.name,
                  'user_data': userData,
                  'block_device_map': bdm,
                  'instance_profile_arn': arn,
                  'placement': getCurrentAWSZone()}
        kwargs["subnet_id"] = self.subnetID if self.subnetID else self._getClusterInstance(self.instanceMetaData).subnet_id

        instancesLaunched = []

        for attempt in retry(predicate=AWSProvisioner._throttlePredicate):
            with attempt:
                # after we start launching instances we want to insure the full setup is done
                # the biggest obstacle is AWS request throttling, so we retry on these errors at
                # every request in this method
                if not preemptable:
                    logger.info('Launching %s non-preemptable nodes', numNodes)
                    instancesLaunched = create_ondemand_instances(self.ctx.ec2, image_id=self._discoverAMI(self.ctx),
                                                                  spec=kwargs, num_instances=numNodes)
                else:
                    logger.info('Launching %s preemptable nodes', numNodes)
                    kwargs['placement'] = getSpotZone(self.spotBid, instanceType.name, self.ctx)
                    # force generator to evaluate
                    instancesLaunched = list(create_spot_instances(ec2=self.ctx.ec2,
                                                                   price=self.spotBid,
                                                                   image_id=self._discoverAMI(self.ctx),
                                                                   tags={'clusterName': self.clusterName},
                                                                   spec=kwargs,
                                                                   num_instances=numNodes,
                                                                   tentative=True)
                                             )
                    # flatten the list
                    instancesLaunched = [item for sublist in instancesLaunched for item in sublist]

        for attempt in retry(predicate=AWSProvisioner._throttlePredicate):
            with attempt:
                wait_instances_running(self.ctx.ec2, instancesLaunched)

        # request throttling retry happens internally to these two methods to insure proper granularity
        AWSProvisioner._addTags(instancesLaunched, self.tags)
        self._propagateKey(instancesLaunched)

        logger.info('Launched %s new instance(s)', numNodes)
        return len(instancesLaunched)
示例#4
0
 def _getLeader(cls, clusterName, wait=False, zone=None):
     ctx = cls._buildContext(clusterName=clusterName, zone=zone)
     instances = cls.__getNodesInCluster(ctx, clusterName, both=True)
     instances.sort(key=lambda x: x.launch_time)
     leader = instances[0]  # assume leader was launched first
     if wait:
         logger.info("Waiting for toil_leader to enter 'running' state...")
         wait_instances_running(ctx.ec2, [leader])
         logger.info('... toil_leader is running')
         cls._waitForNode(leader, 'toil_leader')
     return leader
示例#5
0
 def _getLeader(cls, clusterName, wait=False, zone=None):
     ctx = cls._buildContext(clusterName=clusterName, zone=zone)
     instances = cls._getNodesInCluster(ctx, clusterName, both=True)
     instances.sort(key=lambda x: x.launch_time)
     try:
         leader = instances[0]  # assume leader was launched first
     except IndexError:
         raise NoSuchClusterException(clusterName)
     if wait:
         logger.info("Waiting for toil_leader to enter 'running' state...")
         wait_instances_running(ctx.ec2, [leader])
         logger.info('... toil_leader is running')
         cls._waitForNode(leader, 'toil_leader')
     return leader
示例#6
0
 def launchCluster(self):
     self.createClusterUtil(args=['--leaderStorage', str(self.requestedLeaderStorage),
                                  '-w', '2', '--nodeStorage', str(self.requestedLeaderStorage)])
     ctx = AWSProvisioner._buildContext(self.clusterName)
     nodes = AWSProvisioner._getNodesInCluster(ctx, self.clusterName, both=True)
     nodes.sort(key=lambda x: x.launch_time)
     # assuming that leader is first
     workers = nodes[1:]
     # test that two worker nodes were created
     self.assertEqual(2, len(workers))
     # test that workers have expected storage size
     # just use the first worker
     worker = workers[0]
     wait_instances_running(ctx.ec2, [worker])
     rootBlockDevice = worker.block_device_mapping["/dev/xvda"]
     self.assertTrue(isinstance(rootBlockDevice, BlockDeviceType))
     rootVolume = ctx.ec2.get_all_volumes(volume_ids=[rootBlockDevice.volume_id])[0]
     self.assertGreaterEqual(rootVolume.size, self.requestedNodeStorage)
示例#7
0
    def _addNodes(self, instances, numNodes, preemptable=False):
        bdm = self._getBlockDeviceMapping(self.instanceType)
        arn = self._getProfileARN(self.ctx)
        workerData = dict(role='worker',
                          image=applianceSelf(),
                          entrypoint='mesos-slave',
                          args=workerArgs.format(ip=self.leaderIP, preemptable=preemptable))
        userData = awsUserData.format(**workerData)
        kwargs = {'key_name': self.keyName,
                  'security_groups': [self.clusterName],
                  'instance_type': self.instanceType.name,
                  'user_data': userData,
                  'block_device_map': bdm,
                  'instance_profile_arn': arn}

        instancesLaunched = []

        if not preemptable:
            logger.info('Launching %s non-preemptable nodes', numNodes)
            instancesLaunched = create_ondemand_instances(self.ctx.ec2, image_id=self._discoverAMI(self.ctx),
                                      spec=kwargs, num_instances=1)
        else:
            logger.info('Launching %s preemptable nodes', numNodes)
            kwargs['placement'] = getSpotZone(self.spotBid, self.instanceType.name, self.ctx)
            # force generator to evaluate
            instancesLaunched = list(create_spot_instances(ec2=self.ctx.ec2,
                                                           price=self.spotBid,
                                                           image_id=self._discoverAMI(self.ctx),
                                                           tags={'clusterName': self.clusterName},
                                                           spec=kwargs,
                                                           num_instances=numNodes,
                                                           tentative=True)
                                     )
        wait_instances_running(self.ctx.ec2, instancesLaunched)
        logger.info('Launched %s new instance(s)', numNodes)
        return len(instancesLaunched)
示例#8
0
 def launchCluster(self):
     self.createClusterUtil(args=['--leaderStorage', str(self.requestedLeaderStorage),
                                  '-w', '2', '--nodeStorage', str(self.requestedLeaderStorage)])
     ctx = AWSProvisioner._buildContext(self.clusterName)
     nodes = AWSProvisioner._getNodesInCluster(ctx, self.clusterName, both=True)
     nodes.sort(key=lambda x: x.launch_time)
     # assuming that leader is first
     workers = nodes[1:]
     # test that two worker nodes were created
     self.assertEqual(2, len(workers))
     # test that workers have expected storage size
     # just use the first worker
     worker = workers[0]
     worker = next(wait_instances_running(ctx.ec2, [worker]))
     rootBlockDevice = worker.block_device_mapping["/dev/xvda"]
     self.assertTrue(isinstance(rootBlockDevice, BlockDeviceType))
     rootVolume = ctx.ec2.get_all_volumes(volume_ids=[rootBlockDevice.volume_id])[0]
     self.assertGreaterEqual(rootVolume.size, self.requestedNodeStorage)