def wait_for_healthok(instance): logger.info("Waiting for instance to be Online %s", instance) sys.stdout.write('Waiting') while not is_everything_awesome(instance): sys.stdout.write('.') # Flush stdout so tmux updates sys.stdout.flush() time.sleep(10) print("Ok, Everything is awesome!")
def wait_for_elb_state(instance, state): logger.info("Waiting for %s to reach ELB state '%s'...", instance, state) while True: instance.update() instance_state = instance.instance.state['Name'] if instance_state != 'running': raise RuntimeError( 'Instance no longer running (state {})'.format(instance_state)) logger.debug("State is %s", instance.elb_health) if instance.elb_health == state: logger.info("...done") return time.sleep(5)
def instances_restart(cfg: Config, motd: str): """Restart the instances, picking up new code.""" if not are_you_sure( 'restart all instances with version {}'.format( describe_current_release(cfg)), cfg): return begin_time = datetime.datetime.now() # Store old motd old_motd = update_motd(cfg, motd) modified_groups: Dict[str, int] = {} failed = False to_restart = pick_instances(cfg) for index, instance in enumerate(to_restart): logger.info("Restarting %s (%d of %d)...", instance, index + 1, len(to_restart)) as_instance_status = instance.describe_autoscale() if not as_instance_status: logger.warning("Skipping %s as it is no longer in the ASG", instance) continue as_group_name = as_instance_status['AutoScalingGroupName'] if as_instance_status['LifecycleState'] != 'InService': logger.warning("Skipping %s as it is not InService (%s)", instance, as_instance_status) continue try: restart_one_instance(as_group_name, instance, modified_groups) except RuntimeError as e: logger.error("Failed restarting %s - skipping: %s", instance, e) failed = True # TODO, what here? for group, desired in iter(modified_groups.items()): logger.info("Putting desired instances for %s back to %d", group, desired) as_client.update_auto_scaling_group(AutoScalingGroupName=group, DesiredCapacity=desired) update_motd(cfg, old_motd) end_time = datetime.datetime.now() delta_time = end_time - begin_time print(f'Instances restarted in {delta_time.total_seconds()} seconds') sys.exit(1 if failed else 0)
def restart_one_instance(as_group_name: str, instance: Instance, modified_groups: Dict[str, int]): instance_id = instance.instance.instance_id logger.info("Enabling instance protection for %s", instance) as_client.set_instance_protection(AutoScalingGroupName=as_group_name, InstanceIds=[instance_id], ProtectedFromScaleIn=True) as_group = get_autoscaling_group(as_group_name) adjustment_required = as_group['DesiredCapacity'] == as_group['MinSize'] if adjustment_required: logger.info("Group '%s' needs to be adjusted to keep enough nodes", as_group_name) modified_groups[ as_group['AutoScalingGroupName']] = as_group['DesiredCapacity'] logger.info("Putting %s into standby", instance) as_client.enter_standby( InstanceIds=[instance_id], AutoScalingGroupName=as_group_name, ShouldDecrementDesiredCapacity=not adjustment_required) wait_for_autoscale_state(instance, 'Standby') logger.info("Restarting service on %s", instance) restart_response = exec_remote( instance, ['sudo', 'systemctl', 'restart', 'compiler-explorer']) if restart_response: logger.warning("Restart gave some output: %s", restart_response) wait_for_healthok(instance) logger.info("Moving %s out of standby", instance) as_client.exit_standby(InstanceIds=[instance_id], AutoScalingGroupName=as_group_name) wait_for_autoscale_state(instance, 'InService') wait_for_elb_state(instance, 'healthy') logger.info("Disabling instance protection for %s", instance) as_client.set_instance_protection(AutoScalingGroupName=as_group_name, InstanceIds=[instance_id], ProtectedFromScaleIn=False) logger.info("Instance restarted ok")