def wait_until(state): asg = self.get(network, service_name) def instance_list(service, state): return [ instance for subnetwork in service.subnetworks for instance in subnetwork.instances if instance.state == state ] retries = 0 while len(instance_list(asg, state)) < instance_count: logger.info( "Waiting for instance creation for service %s. %s of %s running", service_name, len(instance_list(asg, state)), instance_count) logger.debug("Waiting for instance creation in asg: %s", asg) asg = self.get(network, service_name) retries = retries + 1 if retries > RETRY_COUNT: raise OperationTimedOut( "Timed out waiting for ASG to be created") time.sleep(RETRY_DELAY) logger.info("Success! %s of %s instances running.", len(instance_list(asg, state)), instance_count)
def delete_with_retries(self, security_group_id, retries, retry_delay): ec2 = self.driver.client("ec2") def attempt_delete_security_group(security_group_id): try: ec2.delete_security_group(GroupId=security_group_id) return True except ClientError as client_error: logger.info("Recieved exception destroying security group: %s", client_error) if (client_error.response["Error"]["Code"] == "DependencyViolation"): return False raise client_error deletion_retries = 0 while deletion_retries < retries: if attempt_delete_security_group(security_group_id): break deletion_retries = deletion_retries + 1 if deletion_retries >= retries: raise OperationTimedOut( "Exceeded max retries while deleting security group: %s" % security_group_id) time.sleep(float(retry_delay))
def wait_for_available(image_id): image = get_image(image_id) logger.debug("Current image state: %s", image) if image["State"] != "available": raise OperationTimedOut( "Timed out waiting for image %s to be available." % image_id)
def add_instances_to_subnetwork_list(network, service_name, subnetworks): """ Add the instances for this service to the list of subnetworks. Returns the same subnetwork list with the instances added. """ # 1. Get List Of Instances. discovery_retries = 0 discovery_complete = False while discovery_retries < RETRY_COUNT: try: asg = self._discover_asg(network.name, service_name) if not asg: return subnetworks instance_ids = [ instance["InstanceId"] for instance in asg["Instances"] ] instances = discover_instances(instance_ids) logger.debug("Discovered instances: %s", instances) discovery_complete = True except ClientError as client_error: # There is a race between when I discover the autoscaling group # itself and when I try to search for the instances inside it, # so just retry if this happens. logger.debug("Recieved exception discovering instance: %s", client_error) if client_error.response["Error"][ "Code"] == "InvalidInstanceID.NotFound": pass else: raise if discovery_complete: break discovery_retries = discovery_retries + 1 logger.debug("Instance discovery retry number: %s", discovery_retries) if discovery_retries >= RETRY_COUNT: raise OperationTimedOut( "Exceeded retries while discovering %s, in network %s" % (service_name, network)) time.sleep(RETRY_DELAY) # 2. Add instances to subnets. # NOTE: In moto instance objects do not include a "SubnetId" and the IP addresses are # assigned randomly in the VPC, so for now just stripe instances across subnets. if self.mock: for instance, subnetwork, in zip(instances, itertools.cycle(subnetworks)): subnetwork.instances.append( canonicalize_instance_info(instance)) return subnetworks for subnetwork in subnetworks: for instance in instances: if "SubnetId" in instance and subnetwork.subnetwork_id == instance[ "SubnetId"]: subnetwork.instances.append( canonicalize_instance_info(instance)) return subnetworks
def create(self, name, blueprint): """ Create new network named "name" with blueprint file at "blueprint". """ ec2 = self.driver.client("ec2") if self.get(name): raise DisallowedOperationException("Found existing VPC named: %s" % name) if blueprint: network_blueprint = NetworkBlueprint.from_file(blueprint) else: network_blueprint = NetworkBlueprint("") allocation_blocks = network_blueprint.get_allowed_private_cidr() def get_cidr(prefix, address_range_includes, address_range_excludes): for address_range_include in address_range_includes: for cidr in generate_subnets(address_range_include, address_range_excludes, prefix, count=1): return str(cidr) raise NotEnoughIPSpaceException( "Could not allocate network of size " "%s in %s, excluding %s" % (prefix, address_range_includes, address_range_includes)) vpc = ec2.create_vpc(CidrBlock=get_cidr(network_blueprint.get_prefix(), [allocation_blocks], [])) vpc_id = vpc["Vpc"]["VpcId"] try: creation_retries = 0 while creation_retries < RETRY_COUNT: try: ec2.create_tags(Resources=[vpc_id], Tags=[{ "Key": "Name", "Value": name }]) if not self.get(name): time.sleep(float(RETRY_DELAY)) else: break except ec2.exceptions.ClientError as client_error: logger.debug("Received exception tagging VPC: %s", client_error) time.sleep(float(RETRY_DELAY)) creation_retries = creation_retries + 1 if creation_retries >= RETRY_COUNT: raise OperationTimedOut("Cannot find created VPC: %s" % vpc_id) except OperationTimedOut as exception: ec2.delete_vpc(VpcId=vpc_id) raise exception return canonicalize_network_info( name, vpc["Vpc"], self.driver.session.Session().region_name)
def create(self, subnetwork_name, subnet_cidr, availability_zone, dc_id, retry_count, retry_delay): """ Provision a single subnet with a route table and the proper tags. """ ec2 = self.driver.client("ec2") created_subnet = ec2.create_subnet(CidrBlock=subnet_cidr, AvailabilityZone=availability_zone, VpcId=dc_id) subnet_id = created_subnet["Subnet"]["SubnetId"] route_table = ec2.create_route_table(VpcId=dc_id) route_table_id = route_table["RouteTable"]["RouteTableId"] ec2.associate_route_table(RouteTableId=route_table_id, SubnetId=subnet_id) creation_retries = 0 while creation_retries < retry_count: try: ec2.create_tags(Resources=[subnet_id], Tags=[{ "Key": "Name", "Value": subnetwork_name }]) subnets = ec2.describe_subnets( Filters=[{ 'Name': "vpc-id", 'Values': [dc_id] }, { 'Name': "tag:Name", 'Values': [subnetwork_name] }]) subnet_ids = [ subnet["SubnetId"] for subnet in subnets["Subnets"] ] if subnet_id not in subnet_ids: time.sleep(float(retry_delay)) else: break except ec2.exceptions.ClientError as client_error: logger.info("Caught exception creating tags: %s", client_error) time.sleep(float(retry_delay)) creation_retries = creation_retries + 1 if creation_retries >= retry_count: raise OperationTimedOut("Cannot find created Subnet: %s" % subnet_id) return created_subnet["Subnet"]
def delete(self, subnet_id, retry_count, retry_delay): ec2 = self.driver.client("ec2") deletion_retries = 0 while deletion_retries < retry_count: try: ec2.delete_subnet(SubnetId=subnet_id) except ec2.exceptions.ClientError as client_error: if (client_error.response['Error']['Code'] == 'DependencyViolation'): # A dependency violation might be transient if # something is being actively deleted by AWS, so sleep # and retry if we get this specific error. time.sleep(float(retry_delay)) elif (client_error.response['Error']['Code'] == 'InvalidSubnetID.NotFound'): # Just return successfully if the subnet is already gone # for some reason. return else: raise client_error deletion_retries = deletion_retries + 1 if deletion_retries >= retry_count: raise OperationTimedOut("Failed to delete subnet: %s" % str(client_error))
def destroy(self, service): """ Destroy a group of instances described by "service". """ logger.debug("Attempting to destroy: %s", service) asg_name = AsgName(network=service.network.name, subnetwork=service.name) asg = self._discover_asg(service.network.name, service.name) if asg: self.asg.destroy_auto_scaling_group_instances(asg_name) # Wait for instances to be gone. Need to do this before we can delete # the actual ASG otherwise it will error. def instance_list(service, state): return [ instance for subnetwork in service.subnetworks for instance in subnetwork.instances if instance.state != state ] service = self.get(service.network, service.name) logger.debug("Found service: %s", service) retries = 0 while service and instance_list(service, "terminated"): logger.info( "Waiting for instance termination in service %s. %s still terminating", service.name, len(instance_list(service, "terminated"))) logger.debug("Waiting for instance termination in asg: %s", service) service = self.get(service.network, service.name) retries = retries + 1 if retries > RETRY_COUNT: raise OperationTimedOut("Timed out waiting for ASG scale down") time.sleep(RETRY_DELAY) logger.info("Success! All instances terminated.") asg = self._discover_asg(service.network.name, service.name) if asg: self.asg.destroy_auto_scaling_group(asg_name) # Wait for ASG to be gone. Need to wait for this because it's a dependency of the launch # configuration. asg = self._discover_asg(service.network.name, service.name) retries = 0 while asg: logger.debug("Waiting for asg deletion: %s", asg) asg = self._discover_asg(service.network.name, service.name) retries = retries + 1 if retries > RETRY_COUNT: raise OperationTimedOut("Timed out waiting for ASG deletion") time.sleep(RETRY_DELAY) vpc_id = service.network.network_id lc_security_group = self.asg.get_launch_configuration_security_group( service.network.name, service.name) self.asg.destroy_launch_configuration(asg_name) if lc_security_group: logger.debug("Deleting referencing rules of sg: %s", lc_security_group) self.security_groups.delete_referencing_rules( vpc_id, lc_security_group) logger.debug("Attempting to delete sg: %s", lc_security_group) self.security_groups.delete_with_retries(lc_security_group, RETRY_COUNT, RETRY_DELAY) else: logger.debug("Attempting to delete sg by name: %s", str(asg_name)) self.security_groups.delete_by_name(vpc_id, str(asg_name), RETRY_COUNT, RETRY_DELAY) self.subnetwork.destroy(service.network, service.name)
def wait_for_stopped(instance_id): raw_instance = get_instance(instance_id) logger.debug("Current state: %s", raw_instance) if raw_instance["State"]["Name"] != "stopped": raise OperationTimedOut( "Timed out waiting for instance: %s to stop" % instance_id)
def wait_for_destroyed(image_name): logger.info("Waiting for image: %s to be destroyed", image_name) if self.get(image.name): raise OperationTimedOut( "Timed out waiting for image %s to be gone." % image_name) logger.info("Success, did not find image: %s", image_name)