Exemplo n.º 1
0
def wait_for_healthok(instance):
    logger.info("Waiting for instance to be Online %s", instance)
    sys.stdout.write('Waiting')
    while not is_everything_awesome(instance):
        sys.stdout.write('.')
        # Flush stdout so tmux updates
        sys.stdout.flush()
        time.sleep(10)
    print("Ok, Everything is awesome!")
Exemplo n.º 2
0
def wait_for_elb_state(instance, state):
    logger.info("Waiting for %s to reach ELB state '%s'...", instance, state)
    while True:
        instance.update()
        instance_state = instance.instance.state['Name']
        if instance_state != 'running':
            raise RuntimeError(
                'Instance no longer running (state {})'.format(instance_state))
        logger.debug("State is %s", instance.elb_health)
        if instance.elb_health == state:
            logger.info("...done")
            return
        time.sleep(5)
Exemplo n.º 3
0
def instances_restart(cfg: Config, motd: str):
    """Restart the instances, picking up new code."""
    if not are_you_sure(
            'restart all instances with version {}'.format(
                describe_current_release(cfg)), cfg):
        return
    begin_time = datetime.datetime.now()
    # Store old motd
    old_motd = update_motd(cfg, motd)
    modified_groups: Dict[str, int] = {}
    failed = False
    to_restart = pick_instances(cfg)

    for index, instance in enumerate(to_restart):
        logger.info("Restarting %s (%d of %d)...", instance, index + 1,
                    len(to_restart))
        as_instance_status = instance.describe_autoscale()
        if not as_instance_status:
            logger.warning("Skipping %s as it is no longer in the ASG",
                           instance)
            continue
        as_group_name = as_instance_status['AutoScalingGroupName']
        if as_instance_status['LifecycleState'] != 'InService':
            logger.warning("Skipping %s as it is not InService (%s)", instance,
                           as_instance_status)
            continue

        try:
            restart_one_instance(as_group_name, instance, modified_groups)
        except RuntimeError as e:
            logger.error("Failed restarting %s - skipping: %s", instance, e)
            failed = True
            # TODO, what here?

    for group, desired in iter(modified_groups.items()):
        logger.info("Putting desired instances for %s back to %d", group,
                    desired)
        as_client.update_auto_scaling_group(AutoScalingGroupName=group,
                                            DesiredCapacity=desired)
    update_motd(cfg, old_motd)
    end_time = datetime.datetime.now()
    delta_time = end_time - begin_time
    print(f'Instances restarted in {delta_time.total_seconds()} seconds')
    sys.exit(1 if failed else 0)
Exemplo n.º 4
0
def restart_one_instance(as_group_name: str, instance: Instance,
                         modified_groups: Dict[str, int]):
    instance_id = instance.instance.instance_id
    logger.info("Enabling instance protection for %s", instance)
    as_client.set_instance_protection(AutoScalingGroupName=as_group_name,
                                      InstanceIds=[instance_id],
                                      ProtectedFromScaleIn=True)
    as_group = get_autoscaling_group(as_group_name)
    adjustment_required = as_group['DesiredCapacity'] == as_group['MinSize']
    if adjustment_required:
        logger.info("Group '%s' needs to be adjusted to keep enough nodes",
                    as_group_name)
        modified_groups[
            as_group['AutoScalingGroupName']] = as_group['DesiredCapacity']
    logger.info("Putting %s into standby", instance)
    as_client.enter_standby(
        InstanceIds=[instance_id],
        AutoScalingGroupName=as_group_name,
        ShouldDecrementDesiredCapacity=not adjustment_required)
    wait_for_autoscale_state(instance, 'Standby')
    logger.info("Restarting service on %s", instance)
    restart_response = exec_remote(
        instance, ['sudo', 'systemctl', 'restart', 'compiler-explorer'])
    if restart_response:
        logger.warning("Restart gave some output: %s", restart_response)
    wait_for_healthok(instance)
    logger.info("Moving %s out of standby", instance)
    as_client.exit_standby(InstanceIds=[instance_id],
                           AutoScalingGroupName=as_group_name)
    wait_for_autoscale_state(instance, 'InService')
    wait_for_elb_state(instance, 'healthy')
    logger.info("Disabling instance protection for %s", instance)
    as_client.set_instance_protection(AutoScalingGroupName=as_group_name,
                                      InstanceIds=[instance_id],
                                      ProtectedFromScaleIn=False)
    logger.info("Instance restarted ok")