def attach_disk(self, parameters, disk_name, instance_id): """ Attaches the Elastic Block Store volume specified in 'disk_name' to this virtual machine. Args: parameters: A dict with keys for each parameter needed to connect to AWS. disk_name: A str naming the EBS mount to attach to this machine. instance_id: A str naming the id of the instance that the disk should be attached to. In practice, callers add disks to their own instances. """ try: conn = self.open_connection(parameters) utils.log('Attaching volume {0} to instance {1}, at /dev/sdc'.format( disk_name, instance_id)) conn.attach_volume(disk_name, instance_id, '/dev/sdc') while True: utils.log('Waiting for disk to finish attaching.') status = conn.get_all_volumes(disk_name)[0].status utils.log('Volume {0} reports its status as {1}'.format(disk_name, status)) if status == 'in-use': break utils.sleep(1) utils.log('Volume {0} is attached and ready for use.') return '/dev/sdc' except EC2ResponseError as exception: utils.log('An error occurred when trying to attach volume {0} to ' \ 'instance {1} at /dev/sdc'.format(disk_name, instance_id)) self.handle_failure('EC2 response error while attaching volume:' + exception.error_message)
def attach_disk(self, parameters, disk_name, instance_id): """ Attaches the Elastic Block Store (EBS) volume specified in 'disk_name' to this virtual machine. This method differs from its EC2 counterpart because in EC2, we can ask the cloud to attach the disk to a certain location. In Euca, it determines where the disk gets placed, so we have to learn where it placed the disk and return that location instead. Args: parameters: A dict with keys for each parameter needed to connect to AWS. disk_name: A str naming the EBS volume to attach to this machine. instance_id: A str naming the id of the instance that the disk should be attached to. In practice, callers add disks to their own instances. Returns: The location on the local filesystem where the disk has been attached. """ devices_before_attach = glob.glob('/dev/*') EC2Agent.attach_disk(self, parameters, disk_name, instance_id) while True: devices_after_attach = glob.glob('/dev/*') new_devices = utils.diff(devices_after_attach, devices_before_attach) if new_devices: utils.log("Found new attached devices: {0}".format(new_devices)) if len(new_devices) == 1: utils.log("Found exactly one new attached device at {0}".format( new_devices[0])) return new_devices[0] else: self.handle_failure("Found too many new attached devices - not sure" \ " which one is the device we attached. New devices are {0}".format( new_devices)) else: utils.log("Still waiting for attached device to appear.") utils.sleep(1)
def __init__(self, host=DEFAULT_HOST, port=DEFAULT_PORT, ssl=True): """ Initialize a new instance of the EAGER service. Args: host Hostname to which the service should bind (Optional). Defaults to 0.0.0.0. port Port of the service (Optional). Default to 18444. ssl True if SSL should be engaged or False otherwise (Optional). Defaults to True. When engaged, this implementation expects to find the necessary SSL certificates in the /etc/appscale/certs directory. """ self.host = host self.port = port secret = None while True: try: secret = utils.get_secret(self.APPSCALE_DIR + 'secret.key') break except Exception: utils.log('Waiting for the secret key to become available') utils.sleep(5) utils.log('Found the secret set to: {0}'.format(secret)) SOAPpy.Config.simplify_objects = True if ssl: utils.log('Checking for the certificate and private key') cert = self.APPSCALE_DIR + 'certs/mycert.pem' key = self.APPSCALE_DIR + 'certs/mykey.pem' while True: if os.path.exists(cert) and os.path.exists(key): break else: utils.log('Waiting for certificates') utils.sleep(5) ssl_context = SSL.Context() ssl_context.load_cert(cert, key) self.server = SOAPpy.SOAPServer((host, port), ssl_context=ssl_context) else: self.server = SOAPpy.SOAPServer((host, port)) e = Eager() self.server.registerFunction(e.ping) self.server.registerFunction(e.validate_application_for_deployment) self.server.registerFunction(e.publish_api_list) self.server.registerFunction(e.add_policy) self.server.registerFunction(e.remove_policy) self.server.registerFunction(e.enable_policy) self.server.registerFunction(e.disable_policy) self.server.registerFunction(e.list_policy) self.server.registerFunction(e.info_policy) self.started = False
def attach_disk(self, parameters, disk_name, instance_id): """ Attaches the Elastic Block Store (EBS) volume specified in 'disk_name' to this virtual machine. This method differs from its EC2 counterpart because in EC2, we can ask the cloud to attach the disk to a certain location. In Euca, it determines where the disk gets placed, so we have to learn where it placed the disk and return that location instead. Args: parameters: A dict with keys for each parameter needed to connect to AWS. disk_name: A str naming the EBS volume to attach to this machine. instance_id: A str naming the id of the instance that the disk should be attached to. In practice, callers add disks to their own instances. Returns: The location on the local filesystem where the disk has been attached. """ devices_before_attach = glob.glob('/dev/*') EC2Agent.attach_disk(self, parameters, disk_name, instance_id) while True: devices_after_attach = glob.glob('/dev/*') new_devices = utils.diff(devices_after_attach, devices_before_attach) if new_devices: utils.log( "Found new attached devices: {0}".format(new_devices)) if len(new_devices) == 1: utils.log( "Found exactly one new attached device at {0}".format( new_devices[0])) return new_devices[0] else: self.handle_failure("Found too many new attached devices - not sure" \ " which one is the device we attached. New devices are {0}".format( new_devices)) else: utils.log("Still waiting for attached device to appear.") utils.sleep(1)
def run_instances(self, count, parameters, security_configured): """ Spawns the specified number of EC2 instances using the parameters provided. This method is blocking in that it waits until the requested VMs are properly booted up. However if the requested VMs cannot be procured within 1800 seconds, this method will treat it as an error and return. (Also see documentation for the BaseAgent class) Args: count No. of VMs to spawned parameters A dictionary of parameters. This must contain 'keyname', 'group', 'image_id' and 'instance_type' parameters. security_configured Uses this boolean value as an heuristic to detect brand new AppScale deployments. Returns: A tuple of the form (instances, public_ips, private_ips) """ image_id = parameters[self.PARAM_IMAGE_ID] instance_type = parameters[self.PARAM_INSTANCE_TYPE] keyname = parameters[self.PARAM_KEYNAME] group = parameters[self.PARAM_GROUP] spot = parameters[self.PARAM_SPOT] utils.log('[{0}] [{1}] [{2}] [{3}] [ec2] [{4}] [{5}]'.format(count, image_id, instance_type, keyname, group, spot)) credentials = parameters[self.PARAM_CREDENTIALS] creds = parameters['credentials'] f = open('userfile','w') userstr = """#!/bin/bash \nset -x\nexec > >(tee /var/log/user-data.log|logger -t user-data -s 2>/dev/console) 2>&1\ntouch anand3.txt\necho "testing logfile"\necho BEGIN\ndate '+%Y-%m-%d %H:%M:%S'\necho END\ntouch anand2.txt\n""" userstr+='export AWS_ACCESS_KEY_ID={0}\n'.format(str(credentials['EC2_ACCESS_KEY'])) userstr+='export AWS_SECRET_ACCESS_KEY={0}\n'.format( str(credentials['EC2_SECRET_KEY'])) userstr+='echo export AWS_ACCESS_KEY_ID={0} >> ~/.bashrc\n'.format(str(credentials['EC2_ACCESS_KEY'])) userstr+='echo export AWS_SECRET_ACCESS_KEY={0} >> ~/.bashrc\n'.format( str(credentials['EC2_SECRET_KEY'])) userstr+='echo export AWS_ACCESS_KEY_ID={0} >> /home/ubuntu/.bashrc\n'.format(str(credentials['EC2_ACCESS_KEY'])) userstr+='echo export AWS_SECRET_ACCESS_KEY={0} >> /home/ubuntu/.bashrc\n'.format( str(credentials['EC2_SECRET_KEY'])) userstr+='export STOCHKIT_HOME={0}\n'.format('/home/ubuntu/StochKit/') userstr+='export STOCHKIT_ODE={0}\n'.format('/home/ubuntu/ode/') userstr+='echo export STOCHKIT_HOME={0} >> ~/.bashrc\n'.format("/home/ubuntu/StochKit/") userstr+='echo export STOCHKIT_HOME={0} >> /home/ubuntu/.bashrc\n'.format("/home/ubuntu/StochKit/") userstr+='echo export STOCHKIT_ODE={0} >> ~/.bashrc\n'.format("/home/ubuntu/ode/") userstr+='echo export STOCHKIT_ODE={0} >> /home/ubuntu/.bashrc\n'.format("/home/ubuntu/ode/") userstr+='source ~/.bashrc \n' userstr+='source /home/ubuntu/.bashrc \n' # Workers need an alarm... skip_alarm = False if self.PARAM_QUEUE_HEAD in parameters and parameters[self.PARAM_QUEUE_HEAD]: # ...but the queue head doesnt skip_alarm = True # Queue head, needs to have at least two cores insufficient_cores = ['t1.micro', 'm1.small', 'm1.medium', 'm3.medium'] if instance_type in insufficient_cores: instance_type = 'c3.large' # Create the user that we want to use to connect to the broker # and configure its permissions on the default vhost. userstr += "rabbitmqctl add_user stochss ucsb\n" userstr += 'rabbitmqctl set_permissions -p / stochss ".*" ".*" ".*"\n' # userstr += "rabbitmq-server -detached\n" else: # Update celery config file...it should have the correct IP # of the Queue head node, which should already be running. celery_config_filename = os.path.join( os.path.dirname(os.path.abspath(__file__)), "../celeryconfig.py" ) # Pass it line by line so theres no weird formatting errors from # trying to echo a multi-line file directly on the command line with open(celery_config_filename, 'r') as celery_config_file: lines = celery_config_file.readlines() # Make sure we overwrite the file with our first write userstr += "echo '{0}' > /home/ubuntu/celeryconfig.py\n".format(lines[0]) for line in lines[1:]: userstr += "echo '{0}' >> /home/ubuntu/celeryconfig.py\n".format(line) # Even the queue head gets a celery worker # NOTE: We only need to use the -n argument to celery command if we are starting # multiple workers on the same machine. Instead, we are starting one worker # per machine and letting that one worker execute one task per core, using # the configuration in celeryconfig.py to ensure that Celery detects the # number of cores and enforces this desired behavior. if self.PARAM_WORKER_QUEUE in parameters: userstr+="nohup celery -A tasks worker --autoreload --loglevel=info -Q {0} --workdir /home/ubuntu > /home/ubuntu/nohup.log 2>&1 & \n".format( parameters[self.PARAM_WORKER_QUEUE] ) else: userstr+="nohup celery -A tasks worker --autoreload --loglevel=info --workdir /home/ubuntu > /home/ubuntu/nohup.log 2>&1 & \n" f.write(userstr) f.close() start_time = datetime.datetime.now() active_public_ips = [] active_private_ips = [] active_instances = [] try: attempts = 1 while True: instance_info = self.describe_instances_old(parameters) active_public_ips = instance_info[0] active_private_ips = instance_info[1] active_instances = instance_info[2] # If security has been configured on this agent just now, # that's an indication that this is a fresh cloud deployment. # As such it's not expected to have any running VMs. if len(active_instances) > 0 or security_configured: break elif attempts == self.DESCRIBE_INSTANCES_RETRY_COUNT: self.handle_failure('Failed to invoke describe_instances') attempts += 1 conn = self.open_connection(parameters) if spot == 'True': price = parameters[self.PARAM_SPOT_PRICE] conn.request_spot_instances(str(price), image_id, key_name=keyname, security_groups=[group], instance_type=instance_type, count=count, user_data = userstr) else: conn.run_instances(image_id, count, count, key_name=keyname, security_groups=[group], instance_type=instance_type, user_data=userstr) instance_ids = [] public_ips = [] private_ips = [] utils.sleep(10) end_time = datetime.datetime.now() + datetime.timedelta(0, self.MAX_VM_CREATION_TIME) now = datetime.datetime.now() while now < end_time: time_left = (end_time - now).seconds utils.log('[{0}] {1} seconds left...'.format(now, time_left)) instance_info = self.describe_instances_old(parameters) public_ips = instance_info[0] private_ips = instance_info[1] instance_ids = instance_info[2] public_ips = utils.diff(public_ips, active_public_ips) private_ips = utils.diff(private_ips, active_private_ips) instance_ids = utils.diff(instance_ids, active_instances) if count == len(public_ips): break time.sleep(self.SLEEP_TIME) now = datetime.datetime.now() if not public_ips: self.handle_failure('No public IPs were able to be procured ' 'within the time limit') if len(public_ips) != count: for index in range(0, len(public_ips)): if public_ips[index] == '0.0.0.0': instance_to_term = instance_ids[index] utils.log('Instance {0} failed to get a public IP address and' \ ' is being terminated'.format(instance_to_term)) conn.terminate_instances([instance_to_term]) end_time = datetime.datetime.now() total_time = end_time - start_time if spot: utils.log('TIMING: It took {0} seconds to spawn {1} spot ' \ 'instances'.format(total_time.seconds, count)) else: utils.log('TIMING: It took {0} seconds to spawn {1} ' \ 'regular instances'.format(total_time.seconds, count)) if not skip_alarm: utils.log('Creating Alarms for the instances') for machineid in instance_ids: self.make_sleepy(parameters, machineid) return instance_ids, public_ips, private_ips except EC2ResponseError as exception: self.handle_failure('EC2 response error while starting VMs: ' + exception.error_message) except Exception as exception: if isinstance(exception, AgentRuntimeException): raise exception else: self.handle_failure('Error while starting VMs: ' + exception.message)
def run_instances(self, count, parameters, security_configured): """ Spawns the specified number of EC2 instances using the parameters provided. This method is blocking in that it waits until the requested VMs are properly booted up. However if the requested VMs cannot be procured within 1800 seconds, this method will treat it as an error and return. (Also see documentation for the BaseAgent class) Args: count No. of VMs to spawned parameters A dictionary of parameters. This must contain 'keyname', 'group', 'image_id' and 'instance_type' parameters. security_configured Uses this boolean value as an heuristic to detect brand new AppScale deployments. Returns: A tuple of the form (instances, public_ips, private_ips) """ image_id = parameters[self.PARAM_IMAGE_ID] instance_type = parameters[self.PARAM_INSTANCE_TYPE] keyname = parameters[self.PARAM_KEYNAME] group = parameters[self.PARAM_GROUP] spot = parameters[self.PARAM_SPOT] utils.log('[{0}] [{1}] [{2}] [{3}] [ec2] [{4}] [{5}]'.format(count, image_id, instance_type, keyname, group, spot)) start_time = datetime.datetime.now() active_public_ips = [] active_private_ips = [] active_instances = [] try: attempts = 1 while True: instance_info = self.describe_instances(parameters) active_public_ips = instance_info[0] active_private_ips = instance_info[1] active_instances = instance_info[2] # If security has been configured on this agent just now, # that's an indication that this is a fresh cloud deployment. # As such it's not expected to have any running VMs. if len(active_instances) > 0 or security_configured: break elif attempts == self.DESCRIBE_INSTANCES_RETRY_COUNT: self.handle_failure('Failed to invoke describe_instances') attempts += 1 conn = self.open_connection(parameters) if spot == 'True': price = parameters[self.PARAM_SPOT_PRICE] conn.request_spot_instances(str(price), image_id, key_name=keyname, security_groups=[group], instance_type=instance_type, count=count) else: conn.run_instances(image_id, count, count, key_name=keyname, security_groups=[group], instance_type=instance_type) instance_ids = [] public_ips = [] private_ips = [] utils.sleep(10) end_time = datetime.datetime.now() + datetime.timedelta(0, self.MAX_VM_CREATION_TIME) now = datetime.datetime.now() while now < end_time: time_left = (end_time - now).seconds utils.log('[{0}] {1} seconds left...'.format(now, time_left)) instance_info = self.describe_instances(parameters) public_ips = instance_info[0] private_ips = instance_info[1] instance_ids = instance_info[2] public_ips = utils.diff(public_ips, active_public_ips) private_ips = utils.diff(private_ips, active_private_ips) instance_ids = utils.diff(instance_ids, active_instances) if count == len(public_ips): break time.sleep(self.SLEEP_TIME) now = datetime.datetime.now() if not public_ips: self.handle_failure('No public IPs were able to be procured ' 'within the time limit') if len(public_ips) != count: for index in range(0, len(public_ips)): if public_ips[index] == '0.0.0.0': instance_to_term = instance_ids[index] utils.log('Instance {0} failed to get a public IP address and' \ ' is being terminated'.format(instance_to_term)) conn.terminate_instances([instance_to_term]) end_time = datetime.datetime.now() total_time = end_time - start_time if spot: utils.log('TIMING: It took {0} seconds to spawn {1} spot ' \ 'instances'.format(total_time.seconds, count)) else: utils.log('TIMING: It took {0} seconds to spawn {1} ' \ 'regular instances'.format(total_time.seconds, count)) return instance_ids, public_ips, private_ips except EC2ResponseError as exception: self.handle_failure('EC2 response error while starting VMs: ' + exception.error_message) except Exception as exception: if isinstance(exception, AgentRuntimeException): raise exception else: self.handle_failure('Error while starting VMs: ' + exception.message)
def __init__(self, host=DEFAULT_HOST, port=DEFAULT_PORT, ssl=True): """ Initialize a new instance of the infrastructure manager service. Args: host Hostname to which the service should bind (Optional). Defaults to 0.0.0.0. port Port of the service (Optional). Default to 17444. ssl True if SSL should be engaged or False otherwise (Optional). Defaults to True. When engaged, this implementation expects to find the necessary SSL certificates in the /etc/appscale/certs directory. """ self.host = host self.port = port secret = None while True: try: secret = utils.get_secret(self.APPSCALE_DIR + 'secret.key') break except Exception: logging.info('Waiting for the secret key to become available') utils.sleep(5) logging.info('Found the secret set to: {0}'.format(secret)) SOAPpy.Config.simplify_objects = True if ssl: logging.info('Checking for the certificate and private key') cert = self.APPSCALE_DIR + 'certs/mycert.pem' key = self.APPSCALE_DIR + 'certs/mykey.pem' while True: if os.path.exists(cert) and os.path.exists(key): break else: logging.info('Waiting for certificates') utils.sleep(5) ssl_context = SSL.Context() ssl_context.load_cert(cert, key) self.server = SOAPpy.SOAPServer((host, port), ssl_context=ssl_context) else: self.server = SOAPpy.SOAPServer((host, port)) parent_dir = os.path.dirname(os.path.realpath(sys.argv[0])) config_file = os.path.join(parent_dir, self.CONFIG_FILE) if os.path.exists(config_file): with open(config_file) as file_handle: params = json.load(file_handle) if params.has_key(PersistentStoreFactory.PARAM_STORE_TYPE): logging.info( 'Loading infrastructure manager configuration from ' + config_file) i = InfrastructureManager(params) else: i = InfrastructureManager() else: i = InfrastructureManager() self.server.registerFunction(i.describe_operation) self.server.registerFunction(i.run_instances) self.server.registerFunction(i.terminate_instances) self.server.registerFunction(i.attach_disk) system_manager = SystemManager() self.server.registerFunction(system_manager.get_cpu_usage) self.server.registerFunction(system_manager.get_disk_usage) self.server.registerFunction(system_manager.get_memory_usage) self.server.registerFunction(system_manager.get_service_summary) self.server.registerFunction(system_manager.get_swap_usage) self.server.registerFunction(system_manager.get_loadavg) self.started = False
def run_instances(self, count, parameters, security_configured): """ Spawns the specified number of EC2 instances using the parameters provided. This method is blocking in that it waits until the requested VMs are properly booted up. However if the requested VMs cannot be procured within 1800 seconds, this method will treat it as an error and return. (Also see documentation for the BaseAgent class) Args: count No. of VMs to spawned parameters A dictionary of parameters. This must contain 'keyname', 'group', 'image_id' and 'instance_type' parameters. security_configured Uses this boolean value as an heuristic to detect brand new AppScale deployments. Returns: A tuple of the form (instances, public_ips, private_ips) """ image_id = parameters[self.PARAM_IMAGE_ID] instance_type = parameters[self.PARAM_INSTANCE_TYPE] keyname = parameters[self.PARAM_KEYNAME] group = parameters[self.PARAM_GROUP] spot = parameters[self.PARAM_SPOT] zone = parameters[self.PARAM_ZONE] utils.log("Starting {0} machines with machine id {1}, with " \ "instance type {2}, keyname {3}, in security group {4}, in zone {5}" \ .format(count, image_id, instance_type, keyname, group, zone)) start_time = datetime.datetime.now() active_public_ips = [] active_private_ips = [] active_instances = [] try: attempts = 1 while True: instances = self.__describe_instances(parameters) term_instance_info = self.__get_instance_info( instances, 'terminated', keyname) if len(term_instance_info[2]): self.handle_failure('One or more nodes started with key {0} have '\ 'been terminated'.format(keyname)) instance_info = self.__get_instance_info( instances, 'running', keyname) active_public_ips = instance_info[0] active_private_ips = instance_info[1] active_instances = instance_info[2] # If security has been configured on this agent just now, # that's an indication that this is a fresh cloud deployment. # As such it's not expected to have any running VMs. if len(active_instances) > 0 or security_configured: break elif attempts == self.DESCRIBE_INSTANCES_RETRY_COUNT: self.handle_failure('Failed to invoke describe_instances') attempts += 1 conn = self.open_connection(parameters) if spot == 'True': price = parameters[self.PARAM_SPOT_PRICE] conn.request_spot_instances(str(price), image_id, key_name=keyname, security_groups=[group], instance_type=instance_type, count=count, placement=zone) else: retries_left = self.RUN_INSTANCES_RETRY_COUNT while True: try: conn.run_instances(image_id, count, count, key_name=keyname, security_groups=[group], instance_type=instance_type, placement=zone) break except EC2ResponseError as exception: utils.log("Couldn't start {0} instances because of error: {1}. " \ "{2} retries left.".format(count, exception.error_message, retries_left)) retries_left = -1 if retries_left <= 0: self.handle_failure(exception.error_message) utils.sleep(10) instance_ids = [] public_ips = [] private_ips = [] utils.sleep(10) end_time = datetime.datetime.now() + datetime.timedelta( 0, self.MAX_VM_CREATION_TIME) now = datetime.datetime.now() while now < end_time: time_left = (end_time - now).seconds utils.log('[{0}] {1} seconds left...'.format(now, time_left)) instances = self.__describe_instances(parameters) term_instance_info = self.__get_instance_info( instances, 'terminated', keyname) if len(term_instance_info[2]): self.handle_failure('One or more nodes started with key {0} have '\ 'been terminated'.format(keyname)) instance_info = self.__get_instance_info( instances, 'running', keyname) public_ips = instance_info[0] private_ips = instance_info[1] instance_ids = instance_info[2] public_ips = utils.diff(public_ips, active_public_ips) private_ips = utils.diff(private_ips, active_private_ips) instance_ids = utils.diff(instance_ids, active_instances) if count == len(public_ips): break time.sleep(self.SLEEP_TIME) now = datetime.datetime.now() if not public_ips: self.handle_failure('No public IPs were able to be procured ' 'within the time limit') if len(public_ips) != count: for index in range(0, len(public_ips)): if public_ips[index] == '0.0.0.0': instance_to_term = instance_ids[index] utils.log('Instance {0} failed to get a public IP address and' \ ' is being terminated'.format(instance_to_term)) conn.terminate_instances([instance_to_term]) end_time = datetime.datetime.now() total_time = end_time - start_time if spot: utils.log('TIMING: It took {0} seconds to spawn {1} spot ' \ 'instances'.format(total_time.seconds, count)) else: utils.log('TIMING: It took {0} seconds to spawn {1} ' \ 'regular instances'.format(total_time.seconds, count)) return instance_ids, public_ips, private_ips except EC2ResponseError as exception: self.handle_failure('EC2 response error while starting VMs: ' + exception.error_message) except Exception as exception: if isinstance(exception, AgentRuntimeException): raise exception else: self.handle_failure('Error while starting VMs: ' + exception.message)
def __init__(self, host=DEFAULT_HOST, port=DEFAULT_PORT, ssl=True): """ Initialize a new instance of the infrastructure manager service. Args: host Hostname to which the service should bind (Optional). Defaults to 0.0.0.0. port Port of the service (Optional). Default to 17444. ssl True if SSL should be engaged or False otherwise (Optional). Defaults to True. When engaged, this implementation expects to find the necessary SSL certificates in the /etc/appscale/certs directory. """ self.host = host self.port = port secret = None while True: try: secret = utils.get_secret(self.APPSCALE_DIR + 'secret.key') break except Exception: logging.info('Waiting for the secret key to become available') utils.sleep(5) logging.info('Found the secret set to: {0}'.format(secret)) SOAPpy.Config.simplify_objects = True if ssl: logging.info('Checking for the certificate and private key') cert = self.APPSCALE_DIR + 'certs/mycert.pem' key = self.APPSCALE_DIR + 'certs/mykey.pem' while True: if os.path.exists(cert) and os.path.exists(key): break else: logging.info('Waiting for certificates') utils.sleep(5) ssl_context = SSL.Context() ssl_context.load_cert(cert, key) self.server = SOAPpy.SOAPServer((host, port), ssl_context=ssl_context) else: self.server = SOAPpy.SOAPServer((host, port)) parent_dir = os.path.dirname(os.path.realpath(sys.argv[0])) config_file = os.path.join(parent_dir, self.CONFIG_FILE) if os.path.exists(config_file): with open(config_file) as file_handle: params = json.load(file_handle) if params.has_key(PersistentStoreFactory.PARAM_STORE_TYPE): logging.info('Loading infrastructure manager configuration from ' + config_file) i = InfrastructureManager(params) else: i = InfrastructureManager() else: i = InfrastructureManager() self.server.registerFunction(i.describe_instances) self.server.registerFunction(i.run_instances) self.server.registerFunction(i.terminate_instances) self.server.registerFunction(i.attach_disk) system_manager = SystemManager() self.server.registerFunction(system_manager.get_cpu_usage) self.server.registerFunction(system_manager.get_disk_usage) self.server.registerFunction(system_manager.get_memory_usage) self.server.registerFunction(system_manager.get_service_summary) self.server.registerFunction(system_manager.get_swap_usage) self.server.registerFunction(system_manager.get_loadavg) self.started = False
def run_instances(self, count, parameters, security_configured): """ Spawn the specified number of EC2 instances using the parameters provided. This method relies on the ec2-run-instances command to spawn the actual VMs in the cloud. This method is blocking in that it waits until the requested VMs are properly booted up. However if the requested VMs cannot be procured within 1800 seconds, this method will treat it as an error and return. (Also see documentation for the BaseAgent class) Args: count No. of VMs to spawned parameters A dictionary of parameters. This must contain 'keyname', 'group', 'image_id' and 'instance_type' parameters. security_configured Uses this boolean value as an heuristic to detect brand new AppScale deployments. Returns: A tuple of the form (instances, public_ips, private_ips) """ image_id = parameters[self.PARAM_IMAGE_ID] instance_type = parameters[self.PARAM_INSTANCE_TYPE] keyname = parameters[self.PARAM_KEYNAME] group = parameters[self.PARAM_GROUP] spot = False utils.log('[{0}] [{1}] [{2}] [{3}] [ec2] [{4}] [{5}]'.format(count, image_id, instance_type, keyname, group, spot)) start_time = datetime.datetime.now() active_public_ips = [] active_private_ips = [] active_instances = [] if os.environ.has_key('EC2_URL'): utils.log('EC2_URL = [{0}]'.format(os.environ['EC2_URL'])) else: utils.log('Warning: EC2_URL environment not found in the process runtime!') while True: active_public_ips, active_private_ips, active_instances =\ self.describe_instances(parameters) # If security has been configured on this agent just now, # that's an indication that this is a fresh cloud deployment. # As such it's not expected to have any running VMs. if len(active_instances) > 0 or security_configured: break args = '-k {0} -n {1} --instance-type {2} --group {3} {4}'.format(keyname, count, instance_type, group, image_id) if spot: price = self.get_optimal_spot_price(instance_type) command_to_run = '{0}-request-spot-instances -p {1} {2}'.format(self.prefix, price, args) else: command_to_run = '{0}-run-instances {1}'.format(self.prefix, args) while True: run_instances = utils.shell(command_to_run) utils.log('Run instances says {0}'.format(run_instances)) status, command_to_run = self.run_instances_response(command_to_run, run_instances) if status: break utils.log('sleepy time') utils.sleep(5) instances = [] public_ips = [] private_ips = [] utils.sleep(10) end_time = datetime.datetime.now() + datetime.timedelta(0, self.MAX_VM_CREATION_TIME) now = datetime.datetime.now() while now < end_time: describe_instances = utils.shell(self.prefix + '-describe-instances 2>&1') utils.log('[{0}] {1} seconds left...'.format(now, (end_time - now).seconds)) utils.log(describe_instances) fqdn_regex = re.compile('\s+({0})\s+({0})\s+running\s+{1}\s'.format(self.FQDN_REGEX, keyname)) instance_regex = re.compile('INSTANCE\s+(i-\w+)') all_ip_addresses = utils.flatten(fqdn_regex.findall(describe_instances)) instances = utils.flatten(instance_regex.findall(describe_instances)) public_ips, private_ips = self.get_ip_addresses(all_ip_addresses) public_ips = utils.diff(public_ips, active_public_ips) private_ips = utils.diff(private_ips, active_private_ips) instances = utils.diff(instances, active_instances) if count == len(public_ips): break time.sleep(self.SLEEP_TIME) now = datetime.datetime.now() if not public_ips: sys.exit('No public IPs were able to be procured within the time limit') if len(public_ips) != count: for index in range(0, len(public_ips)): if public_ips[index] == '0.0.0.0': instance_to_term = instances[index] utils.log('Instance {0} failed to get a public IP address and is being terminated'.\ format(instance_to_term)) utils.shell(self.prefix + '-terminate-instances ' + instance_to_term) pass end_time = datetime.datetime.now() total_time = end_time - start_time if spot: utils.log('TIMING: It took {0} seconds to spawn {1} spot instances'.format( total_time.seconds, count)) else: utils.log('TIMING: It took {0} seconds to spawn {1} regular instances'.format( total_time.seconds, count)) return instances, public_ips, private_ips