def scale_up(ec2_pool, extra_nodes, instance_type, spot, spot_bid_price): log.debug('Scaling condor pool %s with %d extra nodes'%(ec2_pool.id, extra_nodes)) errors = [] vpc_connection, ec2_connection = aws_tools.create_connections(ec2_pool.vpc.access_key) log.debug('Retrieving machine image') ami = get_active_ami(ec2_connection) try: if not spot: #Fix price launch. This is easy. log.debug('Launching fixed price instances') worker_reservation = ec2_connection.run_instances(ami.id, key_name=ec2_pool.key_pair.name, instance_type=instance_type, subnet_id=ec2_pool.vpc.subnet_id, security_group_ids=[ec2_pool.vpc.worker_group_id], user_data=ec2_config.WORKER_LAUNCH_STRING % ec2_pool.master.get_private_ip(), min_count=extra_nodes, max_count=extra_nodes, ) sleep(3) instances = worker_reservation.instances for instance in instances: ec2_instance = EC2Instance() ec2_instance.ec2_pool = ec2_pool ec2_instance.instance_id = instance.id ec2_instance.instance_type = ec2_pool.initial_instance_type ec2_instance.instance_role = 'worker' ec2_instance.save() else: #We're launching a spot request pool instead. log.debug('Launching spot requests') worker_requests = ec2_connection.request_spot_instances(str(spot_bid_price), ami.id, type='persistent', count=extra_nodes, key_name=ec2_pool.key_pair.name, instance_type=instance_type, subnet_id=ec2_pool.vpc.subnet_id, security_group_ids=[ec2_pool.vpc.worker_group_id], user_data=ec2_config.WORKER_LAUNCH_STRING % ec2_pool.master.get_private_ip(), ) for request in worker_requests: spot_request = SpotRequest(ec2_pool=ec2_pool, request_id=request.id, price=request.price, status_code=request.status.code, status_message=request.status.message, state=request.state, instance_type=ec2_pool.initial_instance_type, ) spot_request.save() except EC2ResponseError, e: errors.append(('Error launching worker instances', 'An error occured when launching the worker instances, \ however a master instance was launched successfully. Check your AWS usage limit to ensure you \ are not trying to exceed it. You should either try again to scale the pool up, or terminate it.')) errors.append(e)
def scale_up(ec2_pool, extra_nodes, instance_type, spot, spot_bid_price): log.debug('Scaling condor pool %s with %d extra nodes'%(ec2_pool.id, extra_nodes)) errors = [] vpc_connection, ec2_connection = aws_tools.create_connections(ec2_pool.vpc.access_key) log.debug('Retrieving machine image') ami = get_active_ami(ec2_connection) try: if not spot: #Fix price launch. This is easy. log.debug('Launching fixed price instances') worker_reservation = ec2_connection.run_instances(ami.id, key_name=ec2_pool.key_pair.name, instance_type=instance_type, subnet_id=ec2_pool.vpc.subnet_id, security_group_ids=[ec2_pool.vpc.worker_group_id], user_data=ec2_config.WORKER_LAUNCH_STRING % ec2_pool.master.get_private_ip(), min_count=extra_nodes, max_count=extra_nodes, ) sleep(3) instances = worker_reservation.instances for instance in instances: ec2_instance = EC2Instance() ec2_instance.ec2_pool = ec2_pool ec2_instance.instance_id = instance.id ec2_instance.instance_type = ec2_pool.initial_instance_type ec2_instance.instance_role = 'worker' ec2_instance.save() else: #We're launching a spot request pool instead. log.debug('Launching spot requests') worker_requests = ec2_connection.request_spot_instances(str(spot_bid_price), ami.id, type='persistent', count=extra_nodes, key_name=ec2_pool.key_pair.name, instance_type=instance_type, subnet_id=ec2_pool.vpc.subnet_id, security_group_ids=[ec2_pool.vpc.worker_group_id], user_data=ec2_config.WORKER_LAUNCH_STRING % ec2_pool.master.get_private_ip(), ) for request in worker_requests: spot_request = SpotRequest(ec2_pool=ec2_pool, request_id=request.id, price=request.price, status_code=request.status.code, status_message=request.status.message, state=request.state, instance_type=ec2_pool.initial_instance_type, ) spot_request.save() except EC2ResponseError as e: errors.append(('Error launching worker instances', 'An error occured when launching the worker instances, \ however a master instance was launched successfully. Check your AWS usage limit to ensure you \ are not trying to exceed it. You should either try again to scale the pool up, or terminate it.')) errors.append(e) return errors
def launch_pool(ec2_pool): """ Launch a EC2 pool with the definitions provided by the ec2_pool object """ log.debug('Launcing EC2 pool') assert isinstance(ec2_pool, EC2Pool) errors = [] #Initiate the connection vpc_connection, ec2_connection = aws_tools.create_connections(ec2_pool.vpc.access_key) log.debug('Retrieving machine image') ami = get_active_ami(ec2_connection) #Launch the master instance #Add the pool details to the launch string master_launch_string = ec2_config.MASTER_LAUNCH_STRING #And launch log.debug('Launching Master node') master_reservation = ec2_connection.run_instances(ami.id, key_name=ec2_pool.key_pair.name, instance_type=settings.MASTER_NODE_TYPE, subnet_id=ec2_pool.vpc.subnet_id, security_group_ids=[ec2_pool.vpc.master_group_id], user_data=master_launch_string, min_count=1,#Only 1 instance needed max_count=1, ) # sleep(2) ec2_instances = [] master_instance = master_reservation.instances[0] master_ec2_instance = EC2Instance() master_ec2_instance.ec2_pool = ec2_pool master_ec2_instance.instance_id = master_instance.id master_ec2_instance.instance_type = settings.MASTER_NODE_TYPE master_ec2_instance.instance_role = 'master' master_ec2_instance.save() ec2_instances.append(master_ec2_instance) ec2_pool.master = master_ec2_instance ec2_pool.last_update_time = now() ec2_pool.save() #wait until the master has a private ip address #sleep in beween log.debug('Waiting for private IP to be assigned to master node') sleep_time=5 max_retrys=20 current_try=0 while master_ec2_instance.get_private_ip() == None and current_try<max_retrys: sleep(sleep_time) current_try+=1 sleep(2) if ec2_pool.size > 0: log.debug('Launching worker nodes') #Are we launcing fixed price or spot instances? try: if not ec2_pool.spot_request: #Fix price launch. This is easy. worker_reservation = ec2_connection.run_instances(ami.id, key_name=ec2_pool.key_pair.name, instance_type=ec2_pool.initial_instance_type, subnet_id=ec2_pool.vpc.subnet_id, security_group_ids=[ec2_pool.vpc.worker_group_id], user_data=ec2_config.WORKER_LAUNCH_STRING % ec2_pool.master.get_private_ip(), min_count=ec2_pool.size, max_count=ec2_pool.size, ) sleep(3) instances = worker_reservation.instances for instance in instances: ec2_instance = EC2Instance() ec2_instance.ec2_pool = ec2_pool ec2_instance.instance_id = instance.id ec2_instance.instance_type = ec2_pool.initial_instance_type ec2_instance.instance_role = 'worker' ec2_instance.save() ec2_instances.append(ec2_instance) else: #We're launching a spot request pool instead. worker_requests = ec2_connection.request_spot_instances(str(ec2_pool.spot_price), ami.id, type='persistent', count=ec2_pool.size, key_name=ec2_pool.key_pair.name, instance_type=ec2_pool.initial_instance_type, subnet_id=ec2_pool.vpc.subnet_id, security_group_ids=[ec2_pool.vpc.worker_group_id], user_data=ec2_config.WORKER_LAUNCH_STRING % ec2_pool.master.get_private_ip(), ) for request in worker_requests: spot_request = SpotRequest(ec2_pool=ec2_pool, request_id=request.id, price=request.price, status_code=request.status.code, status_message=request.status.message, state=request.state, instance_type=ec2_pool.initial_instance_type, ) spot_request.save() except EC2ResponseError, e: errors.append(('Error launching worker instances', 'An error occured when launching the worker instances, \ however a master instance was launched successfully. Check your AWS usage limit to ensure you \ are not trying to exceed it. You should either try again to scale the pool up, or terminate it.')) errors.append(e)
def launch_pool(ec2_pool): """ Launch a EC2 pool with the definitions provided by the ec2_pool object """ log.debug('Launcing EC2 pool') assert isinstance(ec2_pool, EC2Pool) errors = [] #Initiate the connection vpc_connection, ec2_connection = aws_tools.create_connections(ec2_pool.vpc.access_key) log.debug('Retrieving machine image') ami = get_active_ami(ec2_connection) #Launch the master instance #Add the pool details to the launch string master_launch_string = ec2_config.MASTER_LAUNCH_STRING #And launch log.debug('Launching Master node') master_reservation = ec2_connection.run_instances(ami.id, key_name=ec2_pool.key_pair.name, instance_type=settings.MASTER_NODE_TYPE, subnet_id=ec2_pool.vpc.subnet_id, security_group_ids=[ec2_pool.vpc.master_group_id], user_data=master_launch_string, min_count=1,#Only 1 instance needed max_count=1, ) # sleep(2) ec2_instances = [] master_instance = master_reservation.instances[0] master_ec2_instance = EC2Instance() master_ec2_instance.ec2_pool = ec2_pool master_ec2_instance.instance_id = master_instance.id master_ec2_instance.instance_type = settings.MASTER_NODE_TYPE master_ec2_instance.instance_role = 'master' master_ec2_instance.save() ec2_instances.append(master_ec2_instance) ec2_pool.master = master_ec2_instance ec2_pool.last_update_time = now() ec2_pool.save() #wait until the master has a private ip address #sleep in beween log.debug('Waiting for private IP to be assigned to master node') sleep_time=5 max_retrys=20 current_try=0 while master_ec2_instance.get_private_ip() == None and current_try<max_retrys: sleep(sleep_time) current_try+=1 sleep(2) if ec2_pool.size > 0: log.debug('Launching worker nodes') #Are we launcing fixed price or spot instances? try: if not ec2_pool.spot_request: #Fix price launch. This is easy. worker_reservation = ec2_connection.run_instances(ami.id, key_name=ec2_pool.key_pair.name, instance_type=ec2_pool.initial_instance_type, subnet_id=ec2_pool.vpc.subnet_id, security_group_ids=[ec2_pool.vpc.worker_group_id], user_data=ec2_config.WORKER_LAUNCH_STRING % ec2_pool.master.get_private_ip(), min_count=ec2_pool.size, max_count=ec2_pool.size, ) sleep(3) instances = worker_reservation.instances for instance in instances: ec2_instance = EC2Instance() ec2_instance.ec2_pool = ec2_pool ec2_instance.instance_id = instance.id ec2_instance.instance_type = ec2_pool.initial_instance_type ec2_instance.instance_role = 'worker' ec2_instance.save() ec2_instances.append(ec2_instance) else: #We're launching a spot request pool instead. worker_requests = ec2_connection.request_spot_instances(str(ec2_pool.spot_price), ami.id, type='persistent', count=ec2_pool.size, key_name=ec2_pool.key_pair.name, instance_type=ec2_pool.initial_instance_type, subnet_id=ec2_pool.vpc.subnet_id, security_group_ids=[ec2_pool.vpc.worker_group_id], user_data=ec2_config.WORKER_LAUNCH_STRING % ec2_pool.master.get_private_ip(), ) for request in worker_requests: spot_request = SpotRequest(ec2_pool=ec2_pool, request_id=request.id, price=request.price, status_code=request.status.code, status_message=request.status.message, state=request.state, instance_type=ec2_pool.initial_instance_type, ) spot_request.save() except EC2ResponseError as e: errors.append(('Error launching worker instances', 'An error occured when launching the worker instances, \ however a master instance was launched successfully. Check your AWS usage limit to ensure you \ are not trying to exceed it. You should either try again to scale the pool up, or terminate it.')) errors.append(e) #Create an sqs queue log.debug('Creating SQS for pool') sqs_connection = aws_tools.create_sqs_connection(ec2_pool.vpc.access_key) queue = sqs_connection.get_queue(ec2_pool.get_queue_name()) if queue != None: sqs_connection.delete_queue(queue) sqs_connection.create_queue(ec2_pool.get_queue_name()) #Create an SNS topic for instance alarm notifications log.debug('Creating SNS topic for alarms') sns_connection = aws_tools.create_sns_connection(ec2_pool.vpc.access_key) topic_data = sns_connection.create_topic(ec2_pool.get_alarm_notify_topic()) topic_arn = topic_data['CreateTopicResponse']['CreateTopicResult']['TopicArn'] log.debug('SNS topic created with arn %s' %topic_arn) ec2_pool.alarm_notify_topic_arn = topic_arn #And create a subscription to the api_terminate_instance_alarm endpoint termination_notify_url = 'http://' + settings.HOST + str(reverse_lazy('api_terminate_instance_alarm')) try: sns_connection.subscribe(topic_arn, 'http', termination_notify_url) except BotoServerError as e: errors.append(('Error enabling smart termination', 'Smart termination was not successfully enabled')) try: ec2_pool.smart_terminate = False sns_connection.delete_topic(topic_arn) except: pass #Apply an alarm to each of the ec2 instances to notify that they should be shutdown should they be unused ##Note, this is now performed when the master node sends a notification back to the server through the API #Assign an elastic IP to the master instance #Try up to 5 times log.debug('Assigning elastic IP to master node') try: elastic_ip = assign_ip_address(master_ec2_instance) log.debug('Assigned elastic IP address to instance %s' % master_ec2_instance.instance_id) except Exception as e: log.error('Error assigning elastic ip to master instance %s' % master_ec2_instance.instance_id) log.exception(e) raise e ec2_pool.address = 'ubuntu@' + str(elastic_ip.public_ip) #Check to see if we can ssh in #Try a couple of times tries = 15 for i in range(tries): log.debug('Testing SSH credentials') command = ['ssh', '-o', 'StrictHostKeyChecking=no', '-i', ec2_pool.key_pair.path, ec2_pool.address, 'pwd'] process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env={'DISPLAY' : ''}) output = process.communicate() log.debug('SSH response:') log.debug(output) if process.returncode == 0: log.debug('SSH success') break sleep(5) return errors