def deployment(self, safe_deployment_strategy): """ Main entry point for Host Deployment Manager process :type safe_deployment_strategy: string/enum :return True if operation succeed otherwise an Exception will be raised. """ app_name = self._app['name'] app_env = self._app['env'] app_role = self._app['role'] app_region = self._app['region'] app_blue_green, app_color = get_blue_green_from_app(self._app) # Retrieve autoscaling infos, if any as_conn = self._cloud_connection.get_connection(app_region, ['autoscaling'], boto_version='boto3') as_group, as_group_processes_to_suspend = get_autoscaling_group_and_processes_to_suspend(as_conn, self._app, self._log_file) try: # Suspend autoscaling suspend_autoscaling_group_processes(as_conn, as_group, as_group_processes_to_suspend, self._log_file) # Wait for pending instances to become ready while True: pending_instances = find_ec2_pending_instances(self._cloud_connection, app_name, app_env, app_role, app_region, as_group, ghost_color=app_color) if not pending_instances: break log( "INFO: waiting 10s for {} instance(s) to become running before proceeding with deployment: {}".format( len(pending_instances), pending_instances), self._log_file) time.sleep(10) running_instances = find_ec2_running_instances(self._cloud_connection, app_name, app_env, app_role, app_region, ghost_color=app_color) if running_instances: if safe_deployment_strategy and self._safe_infos: self._as_name = as_group self._hosts_list = running_instances return self.safe_manager(safe_deployment_strategy) else: self._hosts_list = [host['private_ip_address'] for host in running_instances] self.trigger_launch(self._hosts_list) return True else: raise GCallException( "No instance found in region {region} with tags app:{app}, env:{env}, role:{role}{color}".format( region=app_region, app=app_name, env=app_env, role=app_role, color=', color:%s' % app_color if app_color else '')) finally: resume_autoscaling_group_processes(as_conn, as_group, as_group_processes_to_suspend, self._log_file)
def execute(self): log(_green("STATE: Started"), self._log_file) online_app, offline_app = get_blue_green_apps(self._app, self._worker._db.apps, self._log_file) if not offline_app: self._worker.update_status( "aborted", message=self._get_notification_message_aborted( self._app, "Blue/green is not enabled on this app or not well configured" )) return running_jobs = get_running_jobs(self._db, online_app['_id'], offline_app['_id'], self._job['_id']) if abort_if_other_bluegreen_job( running_jobs, self._worker, self._get_notification_message_aborted( self._app, "Please wait until the end of the current jobs before triggering a Blue/green operation" ), self._log_file): return # Check ASG if offline_app['autoscale']['name'] and online_app['autoscale']['name']: if not (check_autoscale_exists(self._cloud_connection, offline_app['autoscale']['name'], offline_app['region']) and check_autoscale_exists(self._cloud_connection, online_app['autoscale']['name'], online_app['region'])): self._worker.update_status( "aborted", message=self._get_notification_message_aborted( offline_app, "Not AutoScale group found on the offline app to purge." )) return else: self._worker.update_status( "aborted", message=self._get_notification_message_aborted( offline_app, "Not AutoScale group found on the offline app to purge.")) return # Check if we have two different AS ! if offline_app['autoscale']['name'] == online_app['autoscale']['name']: self._worker.update_status( "aborted", message=self._get_notification_message_aborted( offline_app, "Please set a different AutoScale on green and blue app.")) return # Retrieve autoscaling infos, if any app_region = offline_app['region'] as_conn3 = self._cloud_connection.get_connection(app_region, ['autoscaling'], boto_version='boto3') as_group, as_group_processes_to_suspend = get_autoscaling_group_and_processes_to_suspend( as_conn3, offline_app, self._log_file) suspend_autoscaling_group_processes(as_conn3, as_group, as_group_processes_to_suspend, self._log_file) try: lb_mgr = load_balancing.get_lb_manager( self._cloud_connection, self._app['region'], online_app["safe-deployment"]["load_balancer_type"]) # Check if instances are running if not get_instances_from_autoscaling( offline_app['autoscale']['name'], as_conn3): log( _yellow( " WARNING: Autoscaling Group [{0}] of offline app is empty. " "No running instances to clean detected.".format( offline_app['autoscale']['name'])), self._log_file) temp_elb_names = lb_mgr.list_lbs_from_autoscale( offline_app['autoscale']['name'], self._log_file, {'bluegreen-temporary': 'true'}) if len(temp_elb_names) > 1: self._worker.update_status( "aborted", message=self._get_notification_message_aborted( offline_app, "There are more than one temporary ELB associated to the ASG '{0}' \n" "ELB found: {1}".format( offline_app['autoscale']['name'], str(temp_elb_names)))) return # Detach temp ELB from ASG log( _green( "Detach the current temporary ELB [{0}] from the AutoScale [{1}]" .format(temp_elb_names, offline_app['autoscale']['name'])), self._log_file) lb_mgr.register_lbs_into_autoscale( offline_app['autoscale']['name'], temp_elb_names, None, self._log_file) # Update ASG and kill instances log("Update AutoScale with `0` on mix, max, desired values.", self._log_file) log( _yellow( "Destroy all instances in the AutoScale and all instances matching the `app_id` [{0}]" .format(offline_app['_id'])), self._log_file) flush_instances_update_autoscale(as_conn3, self._cloud_connection, offline_app, self._log_file) # Destroy temp ELB if temp_elb_names: lb_mgr.destroy_lb(temp_elb_names[0], self._log_file) else: log(" INFO: No ELB to destroy", self._log_file) # Update App Autoscale values, next buildimage or updateautoscaling should not set values different from 0 self._update_app_autoscale_options(offline_app, self._log_file) # All good self._worker.update_status( "done", message=self._get_notification_message_done(offline_app)) except GCallException as e: self._worker.update_status( "failed", message=self._get_notification_message_failed( offline_app, str(e))) finally: # Resume autoscaling groups in any case resume_autoscaling_group_processes(as_conn3, as_group, as_group_processes_to_suspend, self._log_file)
def elb_rolling_update(self, instances_list): """ Manage the safe destroy process for the ELB. :param instances_list list: Instances on which to destroy (list of dict. ex: [{'id':XXX, 'private_ip_address':XXXX}...]). :return True if operation successed or raise an Exception. """ if not self.as_name: raise GCallException( 'Cannot continue because there is no AutoScaling Group configured' ) app_region = self.app['region'] as_conn = self.cloud_connection.get_connection(app_region, ['autoscaling'], boto_version='boto3') lb_mgr = load_balancing.get_lb_manager(self.cloud_connection, app_region, load_balancing.LB_TYPE_AWS_CLB) destroy_asg_policy = ['OldestLaunchConfiguration'] try: elb_instances = lb_mgr.get_instances_status_from_autoscale( self.as_name, self.log_file) asg_infos = get_autoscaling_group_object(as_conn, self.as_name) if not len(elb_instances): raise GCallException( 'Cannot continue because there is no ELB configured in the AutoScaling Group' ) elif len([ i for i in elb_instances.values() if 'outofservice' in i.values() ]): raise GCallException( 'Cannot continue because one or more instances are in the out of service state' ) elif not check_autoscale_instances_lifecycle_state( asg_infos['Instances']): raise GCallException( 'Cannot continue because one or more instances are not in InService Lifecycle state' ) else: group_size = len(instances_list) original_termination_policies = asg_infos[ 'TerminationPolicies'] log( _green( 'Suspending "Terminate" process in the AutoScale and provisioning %s instance(s)' % group_size), self.log_file) suspend_autoscaling_group_processes(as_conn, self.as_name, ['Terminate'], self.log_file) update_auto_scaling_group_attributes( as_conn, self.as_name, asg_infos['MinSize'], asg_infos['MaxSize'] + group_size, asg_infos['DesiredCapacity'] + group_size) log( _green( 'Deregister old instances from the Load Balancer (%s)' % str([host['id'] for host in instances_list])), self.log_file) lb_mgr.deregister_instances_from_lbs( elb_instances.keys(), [host['id'] for host in instances_list], self.log_file) wait_con_draining = int( lb_mgr.get_lbs_max_connection_draining_value( elb_instances.keys())) log( 'Waiting {0}s: The connection draining time'.format( wait_con_draining), self.log_file) time.sleep(wait_con_draining) asg_updated_infos = get_autoscaling_group_object( as_conn, self.as_name) while len(asg_updated_infos['Instances'] ) < asg_updated_infos['DesiredCapacity']: log( 'Waiting 30s because the instance(s) are not provisioned in the AutoScale', self.log_file) time.sleep(30) asg_updated_infos = get_autoscaling_group_object( as_conn, self.as_name) while not check_autoscale_instances_lifecycle_state( asg_updated_infos['Instances']): log( 'Waiting 30s because the instance(s) are not in InService state in the AutoScale', self.log_file) time.sleep(30) asg_updated_infos = get_autoscaling_group_object( as_conn, self.as_name) while len([ i for i in lb_mgr.get_instances_status_from_autoscale( self.as_name, self.log_file).values() if 'outofservice' in i.values() ]): log( 'Waiting 10s because the instance(s) are not in service in the ELB', self.log_file) time.sleep(10) suspend_autoscaling_group_processes(as_conn, self.as_name, ['Launch', 'Terminate'], self.log_file) log( _green( 'Restore initial AutoScale attributes and destroy old instances for this group (%s)' % str([host['id'] for host in instances_list])), self.log_file) update_auto_scaling_group_attributes( as_conn, self.as_name, asg_infos['MinSize'], asg_infos['MaxSize'], asg_infos['DesiredCapacity'], destroy_asg_policy) destroy_specific_ec2_instances(self.cloud_connection, self.app, instances_list, self.log_file) resume_autoscaling_group_processes(as_conn, self.as_name, ['Terminate'], self.log_file) asg_updated_infos = get_autoscaling_group_object( as_conn, self.as_name) while len(asg_updated_infos['Instances'] ) > asg_updated_infos['DesiredCapacity']: log( 'Waiting 20s because the old instance(s) are not removed from the AutoScale', self.log_file) time.sleep(20) asg_updated_infos = get_autoscaling_group_object( as_conn, self.as_name) update_auto_scaling_group_attributes( as_conn, self.as_name, asg_infos['MinSize'], asg_infos['MaxSize'], asg_infos['DesiredCapacity'], original_termination_policies) log( _green( '%s instance(s) have been re-generated and are registered in their ELB' % group_size), self.log_file) return True except Exception as e: raise finally: resume_autoscaling_group_processes(as_conn, self.as_name, ['Launch', 'Terminate'], self.log_file)
def execute(self): """Execute all checks and preparations.""" log(_green("STATE: Started"), self._log_file) online_app, offline_app = get_blue_green_apps(self._app, self._db.apps, self._log_file) if not online_app: self._worker.update_status( "aborted", message=self._get_notification_message_aborted( self._app, "Blue/green is not enabled on this app or not well configured" )) return copy_ami_option = (self._job['options'][0] if 'options' in self._job and len(self._job['options']) > 0 else get_blue_green_copy_ami_config(self._config)) copy_ami_option = boolify(copy_ami_option) app_region = self._app['region'] as_conn3 = self._cloud_connection.get_connection(app_region, ['autoscaling'], boto_version='boto3') as_group, as_group_processes_to_suspend = get_autoscaling_group_and_processes_to_suspend( as_conn3, offline_app, self._log_file) suspend_autoscaling_group_processes(as_conn3, as_group, as_group_processes_to_suspend, self._log_file) try: lb_mgr = load_balancing.get_lb_manager( self._cloud_connection, self._app['region'], online_app["safe-deployment"]["load_balancer_type"]) # check if app is online if not online_app: self._worker.update_status( "aborted", message=self._get_notification_message_aborted( self._app, "Blue/green is not enabled on this app or not well configured" )) return running_jobs = get_running_jobs(self._db, online_app['_id'], offline_app['_id'], self._job['_id']) if abort_if_other_bluegreen_job( running_jobs, self._worker, self._get_notification_message_aborted( self._app, "Please wait until the end of the current jobs before triggering a Blue/green operation" ), self._log_file): return # Check if app has up to date AMI if ((not copy_ami_option and 'ami' not in offline_app) or (copy_ami_option and 'ami' not in online_app)): self._worker.update_status( "aborted", message=self._get_notification_message_aborted( offline_app, "Please run `Buildimage` first or use the `copy_ami` option" )) return # Check if app has AS if offline_app['autoscale']['name'] and online_app['autoscale'][ 'name']: if not (check_autoscale_exists( self._cloud_connection, offline_app['autoscale']['name'], offline_app['region']) and check_autoscale_exists( self._cloud_connection, online_app['autoscale']['name'], online_app['region'])): self._worker.update_status( "aborted", message=self._get_notification_message_aborted( offline_app, "Please check that the configured AutoScale on both green and blue app exists." )) return else: self._worker.update_status( "aborted", message=self._get_notification_message_aborted( offline_app, "Please set an AutoScale on both green and blue app.")) return # Check if we have two different AS ! if offline_app['autoscale']['name'] == online_app['autoscale'][ 'name']: self._worker.update_status( "aborted", message=self._get_notification_message_aborted( offline_app, "Please set a different AutoScale on green and blue app." )) return if copy_ami_option: log( "Copy AMI option activated. AMI used by [{0}] will be reused by [{1}]" .format(online_app['autoscale']['name'], offline_app['autoscale']['name']), self._log_file) # Check if modules have been deployed if get_blue_green_config(self._config, 'preparebluegreen', 'module_deploy_required', False): if not check_app_manifest( offline_app, self._config, self._log_file, get_path_from_app_with_color(offline_app)): self._worker.update_status( "aborted", message=self._get_notification_message_aborted( offline_app, "Please deploy your app's modules")) return # Check if instances are already running if get_instances_from_autoscaling(offline_app['autoscale']['name'], as_conn3): self._worker.update_status( "aborted", message=self._get_notification_message_aborted( offline_app, "Autoscaling Group of offline app should be empty.")) return # Get the online ELB online_elbs = lb_mgr.list_lbs_from_autoscale( online_app['autoscale']['name'], self._log_file) if len(online_elbs) == 0: self._worker.update_status( "aborted", message=self._get_notification_message_aborted( offline_app, "Online app AutoScale is not attached to a valid Elastic Load Balancer" )) return # Create the temporary ELB: ghost-bluegreentemp-{original ELB name}, duplicated from the online ELB temp_elb_name, new_elb_dns = (None, None) create_temporary_elb_option = ( self._job['options'][1] if 'options' in self._job and len(self._job['options']) > 1 else get_blue_green_create_temporary_elb_config(self._config)) if boolify(create_temporary_elb_option): online_elb = online_elbs[0] temp_elb_name = "bgtmp-{0}".format( offline_app['_id'])[:31] # ELB name is 32 char long max log( _green( "Creating the temporary ELB [{0}] by copying parameters from [{1}]" .format(temp_elb_name, online_elb)), self._log_file) new_elb_dns = lb_mgr.copy_lb( temp_elb_name, online_elb, { 'app_id': str(offline_app['_id']), 'bluegreen-temporary': 'true' }, self._log_file) # Register the temporary ELB into the AutoScale log( _green("Attaching ELB [{0}] to the AutoScale [{1}]".format( temp_elb_name, offline_app['autoscale']['name'])), self._log_file) lb_mgr.register_lbs_into_autoscale( offline_app['autoscale']['name'], [], [temp_elb_name], self._log_file) offline_app['autoscale']['min'] = online_app['autoscale']['min'] offline_app['autoscale']['max'] = online_app['autoscale']['max'] if copy_ami_option: offline_app['ami'] = online_app['ami'] offline_app['build_infos']['ami_name'] = online_app[ 'build_infos']['ami_name'] log( "Copying AMI [{0}]({1}) into offline app [{2}]".format( offline_app['ami'], offline_app['build_infos']['ami_name'], str(offline_app['_id'])), self._log_file) self._update_app_ami(offline_app) # Update AutoScale properties in DB App self._update_app_autoscale_options(offline_app, online_app, self._log_file) # Update AutoScale properties and starts instances if copy_ami_option: try: if not create_userdata_launchconfig_update_asg( offline_app['ami'], self._cloud_connection, offline_app, self._config, self._log_file, update_as_params=True): self._worker.update_status( "failed", message=self._get_notification_message_failed( online_app, offline_app, "")) return except: traceback.print_exc(self._log_file) self._worker.update_status( "failed", message=self._get_notification_message_failed( online_app, offline_app, "")) return else: update_auto_scale(self._cloud_connection, offline_app, None, self._log_file, update_as_params=True) log( _green( "Starting at least [{0}] instance(s) into the AutoScale [{1}]" .format(offline_app['autoscale']['min'], offline_app['autoscale']['name'])), self._log_file) self._worker.update_status( "done", message=self._get_notification_message_done( offline_app, temp_elb_name, new_elb_dns)) except GCallException as e: self._worker.update_status( "failed", message=self._get_notification_message_failed( online_app, offline_app, e)) finally: resume_autoscaling_group_processes(as_conn3, as_group, as_group_processes_to_suspend, self._log_file)
def _swap_asg(self, lb_mgr, swap_execution_strategy, online_app, to_deploy_app, log_file): """ Swap group of instances from A to B atatched to the main ELB :param swap_execution_strategy: string: The swap strategy which can be 'isolated' or 'overlap' :param online_app: object: Ghost app - ASG instances to de-register :param to_deploy_app: object: Ghost app - ASG instances to register :param log_file: str: :return tuple (Main ELB name, Main ELB dns) """ app_region = self._app['region'] as_conn3 = self._cloud_connection.get_connection(app_region, ['autoscaling'], boto_version='boto3') # Retrieve autoscaling infos, if any as_group_old, as_group_old_processes_to_suspend = get_autoscaling_group_and_processes_to_suspend( as_conn3, online_app, log_file) as_group_new, as_group_new_processes_to_suspend = get_autoscaling_group_and_processes_to_suspend( as_conn3, to_deploy_app, log_file) # Retrieve ELB instances elb_online_instances = lb_mgr.get_instances_status_from_autoscale( online_app['autoscale']['name'], log_file) log( _green('Online configuration : {0}'.format( str(elb_online_instances))), self._log_file) elb_tempwarm_instances = lb_mgr.get_instances_status_from_autoscale( to_deploy_app['autoscale']['name'], log_file) log( _green('Offline configuration : {0}'.format( str(elb_tempwarm_instances))), self._log_file) elb_online, health_check_config = (None, None) try: log( "Swapping using strategy '{0}'".format( swap_execution_strategy), self._log_file) # Suspend autoscaling groups suspend_autoscaling_group_processes( as_conn3, as_group_old, as_group_old_processes_to_suspend, log_file) suspend_autoscaling_group_processes( as_conn3, as_group_new, as_group_new_processes_to_suspend, log_file) # Retrieve online ELB object elb_online = lb_mgr.get_by_name(elb_online_instances.keys()[0]) health_check_config = lb_mgr.get_health_check(elb_online.name) log( _green( 'Changing HealthCheck to be "minimal" on online ELB "{0}"'. format(elb_online)), self._log_file) lb_mgr.configure_health_check( elb_online.name, interval=get_blue_green_config(self._config, 'swapbluegreen', 'healthcheck_interval', 5), timeout=get_blue_green_config(self._config, 'swapbluegreen', 'healthcheck_timeout', 2), healthy_threshold=get_blue_green_config( self._config, 'swapbluegreen', 'healthcheck_healthy_threshold', 2)) if swap_execution_strategy == 'isolated': log( _green( 'De-register all online instances from ELB {0}'.format( ', '.join(elb_online_instances.keys()))), self._log_file) lb_mgr.deregister_all_instances_from_lbs( elb_online_instances, self._log_file) self._wait_draining_connection(lb_mgr, elb_online_instances.keys()) log( _green( 'Register and put online new instances to online ELB {0}' .format(', '.join(elb_online_instances.keys()))), self._log_file) lb_mgr.register_all_instances_to_lbs( elb_online_instances.keys(), elb_tempwarm_instances, self._log_file) elif swap_execution_strategy == 'overlap': log( _green('De-register old instances from ELB {0}'.format( ', '.join(elb_online_instances.keys()))), self._log_file) lb_mgr.deregister_all_instances_from_lbs( elb_online_instances, self._log_file) log( _green('Register new instances in the ELB: {0}'.format( elb_online['LoadBalancerName'])), self._log_file) lb_mgr.register_all_instances_to_lbs( elb_online_instances.keys(), elb_tempwarm_instances, self._log_file) else: log( "Invalid swap execution strategy selected : '{0}'. " "Please choose between 'isolated' and 'overlap'".format( swap_execution_strategy), self._log_file) return None, None if not self._wait_until_instances_registered( lb_mgr, elb_online_instances.keys(), get_blue_green_config(self._config, 'swapbluegreen', 'registreation_timeout', 45)): log( _red( "Timeout reached while waiting the instances registration. Rollback process launch" ), self._log_file) lb_mgr.deregister_instances_from_lbs( elb_online_instances.keys(), elb_tempwarm_instances[ elb_tempwarm_instances.keys()[0]].keys(), self._log_file) lb_mgr.register_all_instances_to_lbs( elb_online_instances.keys(), elb_online_instances, self._log_file) lb_mgr.register_all_instances_to_lbs( elb_tempwarm_instances.keys(), elb_tempwarm_instances, self._log_file) log(_yellow("Rollback completed."), self._log_file) return None, None log( _green('De-register all instances from temp (warm) ELB {0}'. format(', '.join(elb_tempwarm_instances.keys()))), self._log_file) lb_mgr.deregister_all_instances_from_lbs(elb_tempwarm_instances, self._log_file) log( _green( 'Register old instances to Temp ELB {0} (usefull for another Rollback Swap)' .format(', '.join(elb_tempwarm_instances.keys()))), self._log_file) lb_mgr.register_all_instances_to_lbs(elb_tempwarm_instances.keys(), elb_online_instances, self._log_file) log(_green('Update autoscale groups with their new ELB'), self._log_file) lb_mgr.register_lbs_into_autoscale( to_deploy_app['autoscale']['name'], elb_tempwarm_instances.keys(), elb_online_instances.keys(), self._log_file) lb_mgr.register_lbs_into_autoscale(online_app['autoscale']['name'], elb_online_instances.keys(), elb_tempwarm_instances.keys(), self._log_file) # Update _is_online field in DB on both app self._update_app_is_online(online_app, False) # no more online anymore self._update_app_is_online(to_deploy_app, True) # promotion ! online_elb_name = elb_online_instances.keys()[0] return str(online_elb_name), lb_mgr.get_dns_name(online_elb_name) finally: if elb_online and health_check_config: log( _green( 'Restoring original HealthCheck config on online ELB "{0}"' .format(elb_online['LoadBalancerName'])), self._log_file) lb_mgr.configure_health_check(elb_online['LoadBalancerName'], **health_check_config) resume_autoscaling_group_processes( as_conn3, as_group_old, as_group_old_processes_to_suspend, log_file) resume_autoscaling_group_processes( as_conn3, as_group_new, as_group_new_processes_to_suspend, log_file)