def create_instance(name, config, region, key_name, ssh_key, instance_data, deploypass, loaned_to, loan_bug, create_ami, ignore_subnet_check, max_attempts): """Creates an AMI instance with the given name and config. The config must specify things like ami id.""" conn = get_aws_connection(region) # Make sure we don't request the same things twice token = str(uuid.uuid4())[:16] instance_data = instance_data.copy() instance_data['name'] = name instance_data['domain'] = config['domain'] instance_data['hostname'] = '{name}.{domain}'.format( name=name, domain=config['domain']) ami = conn.get_all_images(image_ids=[config["ami"]])[0] bdm = None if 'device_map' in config: bdm = BlockDeviceMapping() for device, device_info in config['device_map'].items(): bd = BlockDeviceType() if device_info.get('size'): bd.size = device_info['size'] # Overwrite root device size for HVM instances, since they cannot # be resized online if ami.virtualization_type == "hvm" and \ ami.root_device_name == device: bd.size = ami.block_device_mapping[ami.root_device_name].size if device_info.get("delete_on_termination") is not False: bd.delete_on_termination = True if device_info.get("ephemeral_name"): bd.ephemeral_name = device_info["ephemeral_name"] if device_info.get("volume_type"): bd.volume_type = device_info["volume_type"] if device_info["volume_type"] == "io1" \ and device_info.get("iops"): bd.iops = device_info["iops"] bdm[device] = bd interfaces = make_instance_interfaces( region, instance_data['hostname'], ignore_subnet_check, config.get('subnet_ids'), config.get('security_group_ids', []), config.get("use_public_ip")) keep_going, attempt = True, 1 while keep_going: try: puppet_master = pick_puppet_master(instance_data.get('puppet_masters')) user_data = user_data_from_template(config['type'], { "puppet_server": puppet_master, "fqdn": instance_data['hostname'], "hostname": instance_data['name'], "domain": instance_data['domain'], "dns_search_domain": config.get('dns_search_domain'), "password": deploypass, "moz_instance_type": config['type'], "region_dns_atom": get_region_dns_atom(region)}) reservation = conn.run_instances( image_id=config['ami'], key_name=key_name, instance_type=config['instance_type'], block_device_map=bdm, client_token=token, disable_api_termination=config.get('disable_api_termination'), user_data=user_data, instance_profile_name=config.get('instance_profile_name'), network_interfaces=interfaces, ) break except boto.exception.BotoServerError: log.exception("Cannot start an instance") time.sleep(10) if max_attempts: attempt += 1 keep_going = max_attempts >= attempt instance = reservation.instances[0] log.info("instance %s created, waiting to come up", instance) # Wait for the instance to come up wait_for_status(instance, "state", "running", "update") instance.add_tag('Name', name) instance.add_tag('FQDN', instance_data['hostname']) instance.add_tag('created', time.strftime("%Y-%m-%d %H:%M:%S %Z", time.gmtime())) instance.add_tag('moz-type', config['type']) if loaned_to: instance.add_tag("moz-loaned-to", loaned_to) if loan_bug: instance.add_tag("moz-bug", loan_bug) log.info("assimilating %s", instance) instance.add_tag('moz-state', 'pending') keep_going, attempt = True, 1 while keep_going: try: # Don't reboot if need to create ami reboot = not create_ami assimilate_instance(instance=instance, config=config, ssh_key=ssh_key, instance_data=instance_data, deploypass=deploypass, reboot=reboot) break except NetworkError as e: # it takes a while for the machine to start/reboot so the # NetworkError exception is quite common, just log the error, # without the full stack trace log.warn("cannot connect; instance may still be starting %s (%s, %s) - %s," "retrying in %d sec ...", instance_data['hostname'], instance.id, instance.private_ip_address, e, FAILURE_TIMEOUT) time.sleep(FAILURE_TIMEOUT) except: # any other exception log.warn("problem assimilating %s (%s, %s), retrying in " "%d sec ...", instance_data['hostname'], instance.id, instance.private_ip_address, FAILURE_TIMEOUT, exc_info=True) time.sleep(FAILURE_TIMEOUT) if max_attempts: attempt += 1 keep_going = max_attempts >= attempt instance.add_tag('moz-state', 'ready') if create_ami: ami_name = "spot-%s-%s" % ( config['type'], time.strftime("%Y-%m-%d-%H-%M", time.gmtime())) log.info("Generating AMI %s", ami_name) ami_cleanup(mount_point="/", distro=config["distro"]) root_bd = instance.block_device_mapping[instance.root_device_name] volume = instance.connection.get_all_volumes( volume_ids=[root_bd.volume_id])[0] # The instance has to be stopped to flush EBS caches instance.stop() wait_for_status(instance, 'state', 'stopped', 'update') ami = volume_to_ami(volume=volume, ami_name=ami_name, arch=instance.architecture, virtualization_type=instance.virtualization_type, kernel_id=instance.kernel, root_device_name=instance.root_device_name, tags=config["tags"]) log.info("AMI %s (%s) is ready", ami_name, ami.id) log.warn("Terminating %s", instance) instance.terminate()
def create_instance(name, config, region, key_name, ssh_key, instance_data, deploypass, loaned_to, loan_bug, create_ami, ignore_subnet_check, max_attempts): """Creates an AMI instance with the given name and config. The config must specify things like ami id.""" conn = get_aws_connection(region) # Make sure we don't request the same things twice token = str(uuid.uuid4())[:16] instance_data = instance_data.copy() instance_data['name'] = name instance_data['domain'] = config['domain'] instance_data['hostname'] = '{name}.{domain}'.format( name=name, domain=config['domain']) ami = conn.get_all_images(image_ids=[config["ami"]])[0] bdm = None if 'device_map' in config: bdm = BlockDeviceMapping() for device, device_info in config['device_map'].items(): bd = BlockDeviceType() if device_info.get('size'): bd.size = device_info['size'] # Overwrite root device size for HVM instances, since they cannot # be resized online if ami.virtualization_type == "hvm" and \ ami.root_device_name == device: bd.size = ami.block_device_mapping[ami.root_device_name].size if device_info.get("delete_on_termination") is not False: bd.delete_on_termination = True if device_info.get("ephemeral_name"): bd.ephemeral_name = device_info["ephemeral_name"] if device_info.get("volume_type"): bd.volume_type = device_info["volume_type"] if device_info["volume_type"] == "io1" \ and device_info.get("iops"): bd.iops = device_info["iops"] bdm[device] = bd interfaces = make_instance_interfaces(region, instance_data['hostname'], ignore_subnet_check, config.get('subnet_ids'), config.get('security_group_ids', []), config.get("use_public_ip")) keep_going, attempt = True, 1 while keep_going: try: puppet_master = pick_puppet_master( instance_data.get('puppet_masters')) user_data = user_data_from_template( config['type'], { "puppet_server": puppet_master, "fqdn": instance_data['hostname'], "hostname": instance_data['name'], "domain": instance_data['domain'], "dns_search_domain": config.get('dns_search_domain'), "password": deploypass, "moz_instance_type": config['type'], "region_dns_atom": get_region_dns_atom(region) }) reservation = conn.run_instances( image_id=config['ami'], key_name=key_name, instance_type=config['instance_type'], block_device_map=bdm, client_token=token, disable_api_termination=config.get('disable_api_termination'), user_data=user_data, instance_profile_name=config.get('instance_profile_name'), network_interfaces=interfaces, ) break except boto.exception.BotoServerError: log.exception("Cannot start an instance") time.sleep(10) if max_attempts: attempt += 1 keep_going = max_attempts >= attempt instance = reservation.instances[0] log.info("instance %s created, waiting to come up", instance) # Wait for the instance to come up wait_for_status(instance, "state", "running", "update") instance.add_tag('Name', name) instance.add_tag('FQDN', instance_data['hostname']) instance.add_tag('created', time.strftime("%Y-%m-%d %H:%M:%S %Z", time.gmtime())) instance.add_tag('moz-type', config['type']) if loaned_to: instance.add_tag("moz-loaned-to", loaned_to) if loan_bug: instance.add_tag("moz-bug", loan_bug) log.info("assimilating %s", instance) instance.add_tag('moz-state', 'pending') keep_going, attempt = True, 1 while keep_going: try: # Don't reboot if need to create ami reboot = not create_ami assimilate_instance(instance=instance, config=config, ssh_key=ssh_key, instance_data=instance_data, deploypass=deploypass, reboot=reboot) break except NetworkError as e: # it takes a while for the machine to start/reboot so the # NetworkError exception is quite common, just log the error, # without the full stack trace log.warn( "cannot connect; instance may still be starting %s (%s, %s) - %s," "retrying in %d sec ...", instance_data['hostname'], instance.id, instance.private_ip_address, e, FAILURE_TIMEOUT) time.sleep(FAILURE_TIMEOUT) except: # any other exception log.warn( "problem assimilating %s (%s, %s), retrying in " "%d sec ...", instance_data['hostname'], instance.id, instance.private_ip_address, FAILURE_TIMEOUT, exc_info=True) time.sleep(FAILURE_TIMEOUT) if max_attempts: attempt += 1 keep_going = max_attempts >= attempt instance.add_tag('moz-state', 'ready') if create_ami: ami_name = "spot-%s-%s" % ( config['type'], time.strftime("%Y-%m-%d-%H-%M", time.gmtime())) log.info("Generating AMI %s", ami_name) ami_cleanup(mount_point="/", distro=config["distro"]) root_bd = instance.block_device_mapping[instance.root_device_name] volume = instance.connection.get_all_volumes( volume_ids=[root_bd.volume_id])[0] # The instance has to be stopped to flush EBS caches # The sleep is to prevent the occasional interference of the shutdown with the capture of Windows AMIs time.sleep(15) instance.stop() wait_for_status(instance, 'state', 'stopped', 'update') ami = volume_to_ami(volume=volume, ami_name=ami_name, arch=instance.architecture, virtualization_type=instance.virtualization_type, kernel_id=instance.kernel, root_device_name=instance.root_device_name, tags=config["tags"]) log.info("AMI %s (%s) is ready", ami_name, ami.id) log.warn("Terminating %s", instance) instance.terminate()
def do_request_instance(region, moz_instance_type, price, ami, instance_config, instance_type, availability_zone, slaveset, is_spot, all_instances, dryrun): name = get_available_slave_name(region, moz_instance_type, slaveset, is_spot=is_spot, all_instances=all_instances) if not name: log.debug("No slave name available for %s, %s, %s" % (region, moz_instance_type, slaveset)) return False subnet_id = get_avail_subnet(region, instance_config[region]["subnet_ids"], availability_zone) if not subnet_id: log.debug("No free IP available for %s in %s", moz_instance_type, availability_zone) return False fqdn = "{}.{}".format(name, instance_config[region]["domain"]) if is_spot: log.debug("Spot request for %s (%s)", fqdn, price) else: log.debug("Starting %s", fqdn) if dryrun: log.info("Dry run. skipping") return True spec = NetworkInterfaceSpecification( associate_public_ip_address=True, subnet_id=subnet_id, delete_on_termination=True, groups=instance_config[region].get("security_group_ids")) nc = NetworkInterfaceCollection(spec) user_data = user_data_from_template(moz_instance_type, fqdn, region) bdm = create_block_device_mapping(ami, instance_config[region]['device_map']) if is_spot: rv = do_request_spot_instance( region, price, ami.id, instance_type, instance_config[region]["ssh_key"], user_data, bdm, nc, instance_config[region].get("instance_profile_name"), moz_instance_type, name, fqdn) else: rv = do_request_ondemand_instance( region, price, ami.id, instance_type, instance_config[region]["ssh_key"], user_data, bdm, nc, instance_config[region].get("instance_profile_name"), moz_instance_type, name, fqdn) if rv: template_values = dict( region=region, moz_instance_type=moz_instance_type, instance_type=instance_type.replace(".", "-"), life_cycle_type="spot" if is_spot else "ondemand", virtualization=ami.virtualization_type, root_device_type=ami.root_device_type, jacuzzi_type=jacuzzi_suffix(slaveset), ) name = "started.{region}.{moz_instance_type}.{instance_type}" \ ".{life_cycle_type}.{virtualization}.{root_device_type}" \ ".{jacuzzi_type}" gr_log.add(name.format(**template_values), 1, collect=True) return rv
def do_request_instance(region, moz_instance_type, price, ami, instance_config, instance_type, availability_zone, is_spot, all_instances, dryrun): name = get_available_slave_name(region, moz_instance_type, is_spot=is_spot, all_instances=all_instances) if not name: log.debug("No slave name available for %s, %s", region, moz_instance_type) return False subnet_id = get_avail_subnet(region, instance_config[region]["subnet_ids"], availability_zone) if not subnet_id: log.debug("No free IP available for %s in %s", moz_instance_type, availability_zone) return False fqdn = "{}.{}".format(name, instance_config[region]["domain"]) if is_spot: log.debug("Spot request for %s (%s)", fqdn, price) else: log.debug("Starting %s", fqdn) if dryrun: log.info("Dry run. skipping") return True spec = NetworkInterfaceSpecification( associate_public_ip_address=True, subnet_id=subnet_id, delete_on_termination=True, groups=instance_config[region].get("security_group_ids")) nc = NetworkInterfaceCollection(spec) user_data = user_data_from_template(moz_instance_type, { "moz_instance_type": moz_instance_type, "hostname": name, "domain": instance_config[region]["domain"], "fqdn": fqdn, "region_dns_atom": get_region_dns_atom(region), "puppet_server": "", # intentionally empty "password": "" # intentionally empty }) bdm = create_block_device_mapping( ami, instance_config[region]['device_map']) if is_spot: rv = do_request_spot_instance( region, price, ami.id, instance_type, instance_config[region]["ssh_key"], user_data, bdm, nc, instance_config[region].get("instance_profile_name"), moz_instance_type, name, fqdn) else: rv = do_request_ondemand_instance( region, price, ami.id, instance_type, instance_config[region]["ssh_key"], user_data, bdm, nc, instance_config[region].get("instance_profile_name"), moz_instance_type, name, fqdn) if rv: template_values = dict( region=region, moz_instance_type=moz_instance_type, instance_type=instance_type.replace(".", "-"), life_cycle_type="spot" if is_spot else "ondemand", virtualization=ami.virtualization_type, root_device_type=ami.root_device_type, ) name = "started.{region}.{moz_instance_type}.{instance_type}" \ ".{life_cycle_type}.{virtualization}.{root_device_type}" gr_log.add(name.format(**template_values), 1, collect=True) return rv