def test_over_filter_limits(value_numbers): instance_ids = list(range(value_numbers)) with mock.patch('clusterman.aws.client.ec2.describe_instances') as mock_describe_instances: ec2_describe_instances(instance_ids) target_call_count = math.ceil(value_numbers / MAX_PAGE_SIZE) assert mock_describe_instances.call_count == target_call_count assert mock_describe_instances.call_args_list == [ call(InstanceIds=instance_ids[i * MAX_PAGE_SIZE:(i + 1) * MAX_PAGE_SIZE]) for i in range(target_call_count) ]
def get_instance_metadatas( self, state_filter: Optional[Collection[str]] = None ) -> Sequence[InstanceMetadata]: instance_metadatas = [] for instance_dict in ec2_describe_instances( instance_ids=self.instance_ids): aws_state = instance_dict['State']['Name'] if state_filter and aws_state not in state_filter: continue instance_market = get_instance_market(instance_dict) instance_ip = instance_dict.get('PrivateIpAddress') hostname = gethostbyaddr(instance_ip)[0] if instance_ip else None metadata = InstanceMetadata( group_id=self.id, hostname=hostname, instance_id=instance_dict['InstanceId'], ip_address=instance_ip, is_stale=(instance_dict['InstanceId'] in self.stale_instance_ids), market=instance_market, state=aws_state, uptime=(arrow.now() - arrow.get(instance_dict['LaunchTime'])), weight=self.market_weight(instance_market), ) instance_metadatas.append(metadata) return instance_metadatas
def _instances_by_market(self): """ Responses from this API call are cached to prevent hitting any AWS request limits """ instance_dict: Mapping[InstanceMarket, List[Mapping]] = defaultdict(list) for instance in ec2_describe_instances(self.instance_ids): instance_dict[get_instance_market(instance)].append(instance) return instance_dict
def terminate_instances_by_id(self, instance_ids: List[str], batch_size: int = 500) -> Sequence[str]: """ Terminate instances in this resource group :param instance_ids: a list of instance IDs to terminate :param batch_size: number of instances to terminate at one time :returns: a list of terminated instance IDs """ if not instance_ids: logger.warning(f'No instances to terminate in {self.group_id}') return [] instance_weights = {} for instance in ec2_describe_instances(instance_ids): instance_market = get_instance_market(instance) if not instance_market.az: logger.warning( f"Instance {instance['InstanceId']} missing AZ info, likely already terminated so skipping", ) instance_ids.remove(instance['InstanceId']) continue instance_weights[instance['InstanceId']] = self.market_weight( get_instance_market(instance)) # AWS API recommends not terminating more than 1000 instances at a time, and to # terminate larger numbers in batches terminated_instance_ids = [] for batch in range(0, len(instance_ids), batch_size): response = ec2.terminate_instances( InstanceIds=instance_ids[batch:batch + batch_size]) terminated_instance_ids.extend([ instance['InstanceId'] for instance in response['TerminatingInstances'] ]) # It's possible that not every instance is terminated. The most likely cause for this # is that AWS terminated the instance in between getting its status and the terminate_instances # request. This is probably fine but let's log a warning just in case. missing_instances = set(instance_ids) - set(terminated_instance_ids) if missing_instances: logger.warning( 'Some instances could not be terminated; they were probably killed previously' ) logger.warning(f'Missing instances: {list(missing_instances)}') terminated_capacity = sum(instance_weights[i] for i in instance_ids) logger.info( f'{self.id} terminated weight: {terminated_capacity}; instances: {terminated_instance_ids}' ) return terminated_instance_ids
def get_tasks_and_frameworks(): rg = context.pool_manager.resource_groups[context.rg_ids[0]] instances = ec2_describe_instances(instance_ids=rg.instance_ids[:1]) return ( [{ 'slave_id': instances[0]['InstanceId'], 'state': 'TASK_RUNNING', 'framework_id': 'framework_a' }] * int(tasks), { 'framework_a': { 'name': 'framework_a_name' } }, )
def host_from_instance_id( sender: str, receipt_handle: str, instance_id: str, ) -> Optional[Host]: instance_data = ec2_describe_instances(instance_ids=[instance_id]) if not instance_data: logger.warning(f'No instance data found for {instance_id}') return None try: sfr_ids = [ tag['Value'] for tag in instance_data[0]['Tags'] if tag['Key'] == 'aws:ec2spot:fleet-request-id' ] scheduler = 'mesos' for tag in instance_data[0]['Tags']: if tag['Key'] == 'KubernetesCluster': scheduler = 'kubernetes' break except KeyError as e: logger.warning(f'SFR tag key not found: {e}') sfr_ids = [] if not sfr_ids: logger.warning(f'No SFR ID found for {instance_id}') return None try: ip = instance_data[0]['PrivateIpAddress'] except KeyError: logger.warning(f'No primary IP found for {instance_id}') return None try: hostnames = socket.gethostbyaddr(ip) except socket.error: logger.warning(f"Couldn't derive hostname from IP via DNS for {ip}") return None return Host( sender=sender, receipt_handle=receipt_handle, instance_id=instance_id, hostname=hostnames[0], group_id=sfr_ids[0], ip=ip, scheduler=scheduler, )
def test_empty_instance_ids(): assert ec2_describe_instances(instance_ids=None) == [] assert ec2_describe_instances(instance_ids=[]) == []