def tryRun(self): while not self.stop: try: with throttle(self.scaler.config.scaleInterval): queuedJobs = self.scaler.leader.getJobs() queuedJobShapes = [ Shape(wallTime=self.scaler.getAverageRuntime( jobName=job.jobName, service=isinstance(job, ServiceJobNode)), memory=job.memory, cores=job.cores, disk=job.disk, preemptable=job.preemptable) for job in queuedJobs ] currentNodeCounts = {} for nodeShape in self.scaler.nodeShapes: nodeType = self.scaler.nodeShapeToType[nodeShape] currentNodeCounts[nodeShape] = len( self.scaler.leader.provisioner. getProvisionedWorkers( nodeType=nodeType, preemptable=nodeShape.preemptable)) estimatedNodeCounts = self.scaler.getEstimatedNodeCounts( queuedJobShapes, currentNodeCounts) self.scaler.updateClusterSize(estimatedNodeCounts) if self.stats: self.stats.checkStats() except: logger.exception( "Exception encountered in scaler thread. Making a best-effort " "attempt to keep going, but things may go wrong from now on." ) self.scaler.shutDown()
def tryRun(self): global _preemptableNodeDeficit while not self.scaler.stop: with throttle(self.scaler.config.scaleInterval): self.totalNodes = len( self.scaler.leader.provisioner.getProvisionedWorkers( self.preemptable)) # Estimate the number of nodes to run the issued jobs. # Number of jobs issued queueSize = self.scaler.leader.getNumberOfJobsIssued( preemptable=self.preemptable) # Job shapes of completed jobs recentJobShapes = self.jobShapes.get() assert len(recentJobShapes) > 0 # Estimate of number of nodes needed to run recent jobs nodesToRunRecentJobs = binPacking(recentJobShapes, self.nodeShape) # Actual calculation of the estimated number of nodes required estimatedNodes = 0 if queueSize == 0 else max( 1, int( round(self.scaler.config.alphaPacking * nodesToRunRecentJobs * float(queueSize) / len(recentJobShapes)))) # Account for case where the average historical runtime of completed jobs is less # than the runtime of currently running jobs. This is important # to avoid a deadlock where the estimated number of nodes to run the jobs # is too small to schedule a set service jobs and their dependent jobs, leading # to service jobs running indefinitely. # How many jobs are currently running and their average runtime. numberOfRunningJobs, currentAvgRuntime = self.scaler.leader.getNumberAndAvgRuntimeOfCurrentlyRunningJobs( ) # Average runtime of recently completed jobs historicalAvgRuntime = sum( map(lambda jS: jS.wallTime, recentJobShapes)) / len(recentJobShapes) # Ratio of avg. runtime of currently running and completed jobs runtimeCorrection = float( currentAvgRuntime ) / historicalAvgRuntime if currentAvgRuntime > historicalAvgRuntime and numberOfRunningJobs >= estimatedNodes else 1.0 # Make correction, if necessary (only do so if cluster is busy and average runtime is higher than historical # average) if runtimeCorrection != 1.0: estimatedNodes = int( round(estimatedNodes * runtimeCorrection)) if self.totalNodes < self.maxNodes: logger.warn( "Historical avg. runtime (%s) is less than current avg. runtime (%s) and cluster" " is being well utilised (%s running jobs), increasing cluster requirement by: %s" % (historicalAvgRuntime, currentAvgRuntime, numberOfRunningJobs, runtimeCorrection)) # If we're the non-preemptable scaler, we need to see if we have a deficit of # preemptable nodes that we should compensate for. if not self.preemptable: compensation = self.scaler.config.preemptableCompensation assert 0.0 <= compensation <= 1.0 # The number of nodes we provision as compensation for missing preemptable # nodes is the product of the deficit (the number of preemptable nodes we did # _not_ allocate) and configuration preference. compensationNodes = int( round(_preemptableNodeDeficit * compensation)) if compensationNodes > 0: logger.info( 'Adding %d preemptable nodes to compensate for a deficit of %d ' 'non-preemptable ones.', compensationNodes, _preemptableNodeDeficit) estimatedNodes += compensationNodes jobsPerNode = (0 if nodesToRunRecentJobs <= 0 else len(recentJobShapes) / float(nodesToRunRecentJobs)) if estimatedNodes > 0 and self.totalNodes < self.maxNodes: logger.info( 'Estimating that cluster needs %s %s of shape %s, from current ' 'size of %s, given a queue size of %s, the number of jobs per node ' 'estimated to be %s, an alpha parameter of %s and a run-time length correction of %s.', estimatedNodes, self.nodeTypeString, self.nodeShape, self.totalNodes, queueSize, jobsPerNode, self.scaler.config.alphaPacking, runtimeCorrection) # Use inertia parameter to stop small fluctuations delta = self.totalNodes * max( 0.0, self.scaler.config.betaInertia - 1.0) if self.totalNodes - delta <= estimatedNodes <= self.totalNodes + delta: logger.debug( 'Difference in new (%s) and previous estimates in number of ' '%s (%s) required is within beta (%s), making no change.', estimatedNodes, self.nodeTypeString, self.totalNodes, self.scaler.config.betaInertia) estimatedNodes = self.totalNodes # Bound number using the max and min node parameters if estimatedNodes > self.maxNodes: logger.debug( 'Limiting the estimated number of necessary %s (%s) to the ' 'configured maximum (%s).', self.nodeTypeString, estimatedNodes, self.maxNodes) estimatedNodes = self.maxNodes elif estimatedNodes < self.minNodes: logger.info( 'Raising the estimated number of necessary %s (%s) to the ' 'configured mininimum (%s).', self.nodeTypeString, estimatedNodes, self.minNodes) estimatedNodes = self.minNodes if estimatedNodes != self.totalNodes: logger.info('Changing the number of %s from %s to %s.', self.nodeTypeString, self.totalNodes, estimatedNodes) self.totalNodes = self.setNodeCount( numNodes=estimatedNodes, preemptable=self.preemptable) # If we were scaling up the number of preemptable nodes and failed to meet # our target, we need to update the slack so that non-preemptable nodes will # be allocated instead and we won't block. If we _did_ meet our target, # we need to reset the slack to 0. if self.preemptable: if self.totalNodes < estimatedNodes: deficit = estimatedNodes - self.totalNodes logger.info( 'Preemptable scaler detected deficit of %d nodes.', deficit) _preemptableNodeDeficit = deficit else: _preemptableNodeDeficit = 0 if self.stats: self.stats.checkStats() self.shutDown(preemptable=self.preemptable) logger.info('Scaler exited normally.')
def tryRun(self): global _preemptableNodeDeficit if isinstance(self.scaler.jobBatcher.batchSystem, AbstractScalableBatchSystem): totalNodes = len(self.scaler.jobBatcher.batchSystem.getNodes(self.preemptable)) else: totalNodes = 0 logger.info('Starting with %s node(s) in the cluster.', totalNodes) while not self.scaler.stop: with throttle(self.scaler.config.scaleInterval): # Calculate the approx. number nodes needed # TODO: Correct for jobs already running which can be considered fractions of a job queueSize = self.scaler.jobBatcher.getNumberOfJobsIssued(preemptable=self.preemptable) recentJobShapes = self.jobShapes.get() assert len(recentJobShapes) > 0 nodesToRunRecentJobs = binPacking(recentJobShapes, self.nodeShape) estimatedNodes = 0 if queueSize == 0 else max(1, int(round( self.scaler.config.alphaPacking * nodesToRunRecentJobs * float(queueSize) / len(recentJobShapes)))) # If we're the non-preemptable scaler, we need to see if we have a deficit of # preemptable nodes that we should compensate for. if not self.preemptable: compensation = self.scaler.config.preemptableCompensation assert 0.0 <= compensation <= 1.0 # The number of nodes we provision as compensation for missing preemptable # nodes is the product of the deficit (the number of preemptable nodes we did # _not_ allocate) and configuration preference. compensationNodes = int(round(_preemptableNodeDeficit * compensation)) logger.info('Adding %d preemptable nodes to compensate for a deficit of %d ' 'non-preemptable ones.', compensationNodes, _preemptableNodeDeficit) estimatedNodes += compensationNodes fix_my_name = (0 if nodesToRunRecentJobs <= 0 else len(recentJobShapes) / float(nodesToRunRecentJobs)) logger.debug('Estimating that cluster needs %s nodes of shape %s, from current ' 'size of %s, given a queue size of %s, the number of jobs per node ' 'estimated to be %s and an alpha parameter of %s.', estimatedNodes, self.nodeShape, totalNodes, queueSize, fix_my_name, self.scaler.config.alphaPacking) # Use inertia parameter to stop small fluctuations if estimatedNodes <= totalNodes * self.scaler.config.betaInertia <= estimatedNodes: logger.debug('Difference in new (%s) and previous estimates in number of ' 'nodes (%s) required is within beta (%s), making no change.', estimatedNodes, totalNodes, self.scaler.config.betaInertia) estimatedNodes = totalNodes # Bound number using the max and min node parameters if estimatedNodes > self.maxNodes: logger.info('Limiting the estimated number of necessary nodes (%s) to the ' 'configured maximum (%s).', estimatedNodes, self.maxNodes) estimatedNodes = self.maxNodes elif estimatedNodes < self.minNodes: logger.info('Raising the estimated number of necessary nodes (%s) to the ' 'configured mininimum (%s).', estimatedNodes, self.minNodes) estimatedNodes = self.minNodes if estimatedNodes != totalNodes: logger.info('Changing the number of worker nodes from %s to %s.', totalNodes, estimatedNodes) totalNodes = self.scaler.provisioner.setNodeCount(numNodes=estimatedNodes, preemptable=self.preemptable) # If we were scaling up the number of preemptable nodes and failed to meet # our target, we need to update the slack so that non-preemptable nodes will # be allocated instead and we won't block. If we _did_ meet our target, # we need to reset the slack to 0. if self.preemptable: if totalNodes < estimatedNodes: deficit = estimatedNodes - totalNodes logger.info('Preemptable scaler detected deficit of %d nodes.', deficit) _preemptableNodeDeficit = deficit else: _preemptableNodeDeficit = 0 logger.info('Forcing provisioner to reduce cluster size to zero.') totalNodes = self.scaler.provisioner.setNodeCount(numNodes=0, preemptable=self.preemptable, force=True) if totalNodes != 0: raise RuntimeError('Provisioner was not able to reduce cluster size to zero.') else: logger.info('Scaler exited normally.')
def _addNodes(self, instances, numNodes, preemptable=False): deadline = time.time() + provisioning_timeout spec = dict(key_name=self._keyName, user_data=self._userData(), instance_type=self.instanceType[preemptable].name, instance_profile_arn=self._instanceProfileArn, security_group_ids=self._securityGroupIds, ebs_optimized=self.ebsOptimized, dry_run=False) # Offset the ordinals of the preemptable nodes to be disjunct from the non-preemptable # ones. Without this, the two scaler threads would inevitably allocate colliding ordinals. offset = 1000 if preemptable else 0 used_ordinals = { int(i.tags['cluster_ordinal']) - offset for i in instances } # Since leader is absent from the instances iterable, we need to explicitly reserve its # ordinal unless we're allocating offset ordinals reserved for preemptable instances: assert len(used_ordinals) == len(instances) # check for collisions if not preemptable: used_ordinals.add(0) ordinals = (ordinal + offset for ordinal in allocate_cluster_ordinals( num=numNodes, used=used_ordinals)) def createInstances(): """ :rtype: Iterable[list[Instance]] """ if preemptable: for batch in create_spot_instances( self._ec2, self.spotBid, self.imageId, spec, # Don't insist on spot requests and don't raise # if no requests were fulfilled: tentative=True, num_instances=numNodes, timeout=deadline - time.time()): yield batch else: yield create_ondemand_instances(self._ec2, self.imageId, spec, num_instances=numNodes) instancesByAddress = {} def handleInstance(instance): log.debug('Tagging instance %s.', instance.id) leader_tags = self._instance.tags name = leader_tags['Name'].replace('toil-leader', 'toil-worker') tag_object_persistently( instance, dict(leader_tags, Name=name, cluster_ordinal=next(ordinals))) assert instance.private_ip_address instancesByAddress[instance.private_ip_address] = instance # Each instance gets a different ordinal so we can't tag an entire batch at once but have # to tag each instance individually. It needs to be done quickly because the tags are # crucial for the boot code running inside the instance to join the cluster. Hence we do # it in a thread pool. If the pool is too large, we'll hit the EC2 limit on the number of # of concurrent requests. If it is too small, we won't be able to tag all instances in # time. with thread_pool(min(numNodes, 32)) as pool: for batch in createInstances(): log.debug('Got a batch of %i instance(s).', len(batch)) for instance in batch: log.debug( 'Submitting instance %s to thread pool for tagging.', instance.id) pool.apply_async(handleInstance, (instance, )) numInstancesAdded = len(instancesByAddress) log.info('Created and tagged %i instance(s).', numInstancesAdded) if preemptable: # Reset deadline such that slow spot creation does not take away from instance boot-up deadline = time.time() + provisioning_timeout if isinstance(self.batchSystem, AbstractScalableBatchSystem): while instancesByAddress and time.time() < deadline: with throttle(10): log.debug( 'Waiting for batch system to report back %i node(s).', len(instancesByAddress)) # Get all nodes to be safe, not just the ones whose preemptability matches, # in case there's a problem with a node determining its own preemptability. nodes = self.batchSystem.getNodes() for nodeAddress in nodes.iterkeys(): instancesByAddress.pop(nodeAddress, None) if instancesByAddress: log.warn( '%i instance(s) out of %i did not join the cluster as worker nodes. They ' 'will be terminated.', len(instancesByAddress), numInstancesAdded) instanceIds = [i.id for i in instancesByAddress.itervalues()] self._logAndTerminate(instanceIds) numInstancesAdded -= len(instanceIds) else: log.info('All %i node(s) joined the cluster.', numInstancesAdded) else: log.warn( 'Batch system is not scalable. Assuming all instances joined the cluster.' ) return numInstancesAdded
def tryRun(self): while not self.scaler.stop: with throttle(self.scaler.config.scaleInterval): # Estimate of number of nodes needed to run recent jobs recentJobShapes = self.jobShapes.get() queuedJobs = self.scaler.leader.getJobs() logger.info("Detected %i queued jobs." % len(queuedJobs)) queuedJobShapes = [ Shape(wallTime=self.scaler.getAverageRuntime( jobName=job.jobName), memory=job.memory, cores=job.cores, disk=job.disk, preemptable=job.preemptable) for job in queuedJobs ] nodesToRunRecentJobs = binPacking(jobShapes=recentJobShapes, nodeShapes=self.nodeShapes) nodesToRunQueuedJobs = binPacking(jobShapes=queuedJobShapes, nodeShapes=self.nodeShapes) for nodeShape in self.nodeShapes: nodeType = self.nodeShapeToType[nodeShape] self.totalNodes[nodeShape] = len( self.scaler.leader.provisioner.getProvisionedWorkers( nodeType=nodeType, preemptable=nodeShape.preemptable)) logger.info("Nodes of type %s to run recent jobs: %s" % (nodeType, nodesToRunRecentJobs[nodeShape])) logger.info("Nodes of type %s to run queued jobs = %s" % (nodeType, nodesToRunQueuedJobs[nodeShape])) # Actual calculation of the estimated number of nodes required estimatedNodes = 0 if nodesToRunQueuedJobs[ nodeShape] == 0 else max( 1, int( round(self.scaler.config.alphaPacking * nodesToRunRecentJobs[nodeShape] + (1 - self.scaler.config.alphaPacking) * nodesToRunQueuedJobs[nodeShape]))) logger.info("Estimating %i nodes of shape %s" % (estimatedNodes, nodeShape)) # If we're scaling a non-preemptable node type, we need to see if we have a # deficit of preemptable nodes of this type that we should compensate for. if not nodeShape.preemptable: compensation = self.scaler.config.preemptableCompensation assert 0.0 <= compensation <= 1.0 # The number of nodes we provision as compensation for missing preemptable # nodes is the product of the deficit (the number of preemptable nodes we did # _not_ allocate) and configuration preference. compensationNodes = int( round(self.preemptableNodeDeficit[nodeType] * compensation)) if compensationNodes > 0: logger.info( 'Adding %d preemptable nodes of type %s to compensate for a deficit of %d ' 'non-preemptable ones.', compensationNodes, nodeType, self.preemptableNodeDeficit[nodeType]) estimatedNodes += compensationNodes jobsPerNode = (0 if nodesToRunRecentJobs[nodeShape] <= 0 else old_div( len(recentJobShapes), float(nodesToRunRecentJobs[nodeShape]))) if estimatedNodes > 0 and self.totalNodes[ nodeShape] < self.maxNodes[nodeShape]: logger.info( 'Estimating that cluster needs %s of shape %s, from current ' 'size of %s, given a queue size of %s, the number of jobs per node ' 'estimated to be %s, an alpha parameter of %s.', estimatedNodes, nodeShape, self.totalNodes[nodeShape], len(queuedJobs), jobsPerNode, self.scaler.config.alphaPacking) # Use inertia parameter to stop small fluctuations logger.info("Currently %i nodes of type %s in cluster" % (self.totalNodes[nodeShape], nodeType)) if self.scaler.leader.toilMetrics: self.scaler.leader.toilMetrics.logClusterSize( nodeType=nodeType, currentSize=self.totalNodes[nodeShape], desiredSize=estimatedNodes) delta = self.totalNodes[nodeShape] * max( 0.0, self.scaler.config.betaInertia - 1.0) if self.totalNodes[ nodeShape] - delta <= estimatedNodes <= self.totalNodes[ nodeShape] + delta: logger.debug( 'Difference in new (%s) and previous estimates in number of ' '%s (%s) required is within beta (%s), making no change.', estimatedNodes, nodeType, self.totalNodes[nodeShape], self.scaler.config.betaInertia) estimatedNodes = self.totalNodes[nodeShape] # Bound number using the max and min node parameters if estimatedNodes > self.maxNodes[nodeShape]: logger.debug( 'Limiting the estimated number of necessary %s (%s) to the ' 'configured maximum (%s).', nodeType, estimatedNodes, self.maxNodes[nodeShape]) estimatedNodes = self.maxNodes[nodeShape] elif estimatedNodes < self.minNodes[nodeShape]: logger.info( 'Raising the estimated number of necessary %s (%s) to the ' 'configured mininimum (%s).', nodeType, estimatedNodes, self.minNodes[nodeShape]) estimatedNodes = self.minNodes[nodeShape] if estimatedNodes != self.totalNodes[nodeShape]: logger.info('Changing the number of %s from %s to %s.', nodeType, self.totalNodes[nodeShape], estimatedNodes) self.totalNodes[nodeShape] = self.setNodeCount( nodeType=nodeType, numNodes=estimatedNodes, preemptable=nodeShape.preemptable) # If we were scaling up a preemptable node type and failed to meet # our target, we will attempt to compensate for the deficit while scaling # non-preemptable nodes of this type. if nodeShape.preemptable: if self.totalNodes[nodeShape] < estimatedNodes: deficit = estimatedNodes - self.totalNodes[nodeType] logger.info( 'Preemptable scaler detected deficit of %d nodes of type %s.' % (deficit, nodeType)) self.preemptableNodeDeficit[nodeType] = deficit else: self.preemptableNodeDeficit[nodeType] = 0 #Attempt to terminate any nodes that we previously designated for #termination, but which still had workers running. self._terminateIgnoredNodes() if self.stats: self.stats.checkStats() self.shutDown() logger.info('Scaler exited normally.')
def tryRun(self): global _preemptableNodeDeficit while not self.scaler.stop: with throttle(self.scaler.config.scaleInterval): self.totalNodes = len(self.scaler.leader.provisioner.getProvisionedWorkers(self.preemptable)) # Estimate the number of nodes to run the issued jobs. # Number of jobs issued queueSize = self.scaler.leader.getNumberOfJobsIssued(preemptable=self.preemptable) # Job shapes of completed jobs recentJobShapes = self.jobShapes.get() assert len(recentJobShapes) > 0 # Estimate of number of nodes needed to run recent jobs nodesToRunRecentJobs = binPacking(recentJobShapes, self.nodeShape) # Actual calculation of the estimated number of nodes required estimatedNodes = 0 if queueSize == 0 else max(1, int(round( self.scaler.config.alphaPacking * nodesToRunRecentJobs * float(queueSize) / len(recentJobShapes)))) # Account for case where the average historical runtime of completed jobs is less # than the runtime of currently running jobs. This is important # to avoid a deadlock where the estimated number of nodes to run the jobs # is too small to schedule a set service jobs and their dependent jobs, leading # to service jobs running indefinitely. # How many jobs are currently running and their average runtime. numberOfRunningJobs, currentAvgRuntime = self.scaler.leader.getNumberAndAvgRuntimeOfCurrentlyRunningJobs() # Average runtime of recently completed jobs historicalAvgRuntime = old_div(sum([jS.wallTime for jS in recentJobShapes]),len(recentJobShapes)) # Ratio of avg. runtime of currently running and completed jobs runtimeCorrection = old_div(float(currentAvgRuntime),historicalAvgRuntime) if currentAvgRuntime > historicalAvgRuntime and numberOfRunningJobs >= estimatedNodes else 1.0 # Make correction, if necessary (only do so if cluster is busy and average runtime is higher than historical # average) if runtimeCorrection != 1.0: estimatedNodes = int(round(estimatedNodes * runtimeCorrection)) if self.totalNodes < self.maxNodes: logger.warn("Historical avg. runtime (%s) is less than current avg. runtime (%s) and cluster" " is being well utilised (%s running jobs), increasing cluster requirement by: %s" % (historicalAvgRuntime, currentAvgRuntime, numberOfRunningJobs, runtimeCorrection)) # If we're the non-preemptable scaler, we need to see if we have a deficit of # preemptable nodes that we should compensate for. if not self.preemptable: compensation = self.scaler.config.preemptableCompensation assert 0.0 <= compensation <= 1.0 # The number of nodes we provision as compensation for missing preemptable # nodes is the product of the deficit (the number of preemptable nodes we did # _not_ allocate) and configuration preference. compensationNodes = int(round(_preemptableNodeDeficit * compensation)) if compensationNodes > 0: logger.info('Adding %d preemptable nodes to compensate for a deficit of %d ' 'non-preemptable ones.', compensationNodes, _preemptableNodeDeficit) estimatedNodes += compensationNodes jobsPerNode = (0 if nodesToRunRecentJobs <= 0 else old_div(len(recentJobShapes), float(nodesToRunRecentJobs))) if estimatedNodes > 0 and self.totalNodes < self.maxNodes: logger.info('Estimating that cluster needs %s %s of shape %s, from current ' 'size of %s, given a queue size of %s, the number of jobs per node ' 'estimated to be %s, an alpha parameter of %s and a run-time length correction of %s.', estimatedNodes, self.nodeTypeString, self.nodeShape, self.totalNodes, queueSize, jobsPerNode, self.scaler.config.alphaPacking, runtimeCorrection) # Use inertia parameter to stop small fluctuations delta = self.totalNodes * max(0.0, self.scaler.config.betaInertia - 1.0) if self.totalNodes - delta <= estimatedNodes <= self.totalNodes + delta: logger.debug('Difference in new (%s) and previous estimates in number of ' '%s (%s) required is within beta (%s), making no change.', estimatedNodes, self.nodeTypeString, self.totalNodes, self.scaler.config.betaInertia) estimatedNodes = self.totalNodes # Bound number using the max and min node parameters if estimatedNodes > self.maxNodes: logger.debug('Limiting the estimated number of necessary %s (%s) to the ' 'configured maximum (%s).', self.nodeTypeString, estimatedNodes, self.maxNodes) estimatedNodes = self.maxNodes elif estimatedNodes < self.minNodes: logger.info('Raising the estimated number of necessary %s (%s) to the ' 'configured mininimum (%s).', self.nodeTypeString, estimatedNodes, self.minNodes) estimatedNodes = self.minNodes if estimatedNodes != self.totalNodes: logger.info('Changing the number of %s from %s to %s.', self.nodeTypeString, self.totalNodes, estimatedNodes) self.totalNodes = self.setNodeCount(numNodes=estimatedNodes, preemptable=self.preemptable) # If we were scaling up the number of preemptable nodes and failed to meet # our target, we need to update the slack so that non-preemptable nodes will # be allocated instead and we won't block. If we _did_ meet our target, # we need to reset the slack to 0. if self.preemptable: if self.totalNodes < estimatedNodes: deficit = estimatedNodes - self.totalNodes logger.info('Preemptable scaler detected deficit of %d nodes.', deficit) _preemptableNodeDeficit = deficit else: _preemptableNodeDeficit = 0 if self.stats: self.stats.checkStats() self.shutDown(preemptable=self.preemptable) logger.info('Scaler exited normally.')
def tryRun(self): if isinstance(self.scaler.jobBatcher.batchSystem, AbstractScalableBatchSystem): totalNodes = len( self.scaler.jobBatcher.batchSystem.getNodes(self.preemptable)) else: totalNodes = 0 logger.info('Starting with %s node(s) in the cluster.', totalNodes) while not self.scaler.stop: with throttle(self.scaler.config.scaleInterval): # Calculate the approx. number nodes needed # TODO: Correct for jobs already running which can be considered fractions of a job queueSize = self.scaler.jobBatcher.getNumberOfJobsIssued() recentJobShapes = self.jobShapes.get() assert len(recentJobShapes) > 0 nodesToRunRecentJobs = binPacking(recentJobShapes, self.nodeShape) estimatedNodes = 0 if queueSize == 0 else max( 1, int( round(self.scaler.config.alphaPacking * nodesToRunRecentJobs * float(queueSize) / len(recentJobShapes)))) fix_my_name = (0 if nodesToRunRecentJobs <= 0 else len(recentJobShapes) / float(nodesToRunRecentJobs)) logger.debug( 'Estimating that cluster needs %s nodes of shape %s, from current ' 'size of %s, given a queue size of %s, the number of jobs per node ' 'estimated to be %s and an alpha parameter of %s.', estimatedNodes, self.nodeShape, totalNodes, queueSize, fix_my_name, self.scaler.config.alphaPacking) # Use inertia parameter to stop small fluctuations if estimatedNodes <= totalNodes * self.scaler.config.betaInertia <= estimatedNodes: logger.debug( 'Difference in new (%s) and previous estimates in number of ' 'nodes (%s) required is within beta (%s), making no change.', estimatedNodes, totalNodes, self.scaler.config.betaInertia) estimatedNodes = totalNodes # Bound number using the max and min node parameters if estimatedNodes > self.maxNodes: logger.info( 'Limiting the estimated number of necessary nodes (%s) to the ' 'configured maximum (%s).', estimatedNodes, self.maxNodes) estimatedNodes = self.maxNodes elif estimatedNodes < self.minNodes: logger.info( 'Raising the estimated number of necessary nodes (%s) to the ' 'configured mininimum (%s).', estimatedNodes, self.minNodes) estimatedNodes = self.minNodes if estimatedNodes != totalNodes: logger.info( 'Changing the number of worker nodes from %s to %s.', totalNodes, estimatedNodes) totalNodes = self.scaler.provisioner.setNodeCount( numNodes=estimatedNodes, preemptable=self.preemptable) logger.info('Forcing provisioner to reduce cluster size to zero.') totalNodes = self.scaler.provisioner.setNodeCount( numNodes=0, preemptable=self.preemptable, force=True) if totalNodes != 0: raise RuntimeError( 'Provisioner was not able to reduce cluster size to zero.') else: logger.info('Scaler exited normally.')