Пример #1
0
def tasks_are_healthy(ecs_client, cluster_name, service_name):

    next_token = ''
    healthy = 0
    while True:
        task_response = ecs_client.list_tasks(cluster=cluster_name,
                                              serviceName=service_name,
                                              nextToken=next_token,
                                              maxResults=100)
        tasks = task_response.get('taskArns')
        next_token = task_response.get('nextToken')

        for task in ecs_client.describe_tasks(cluster=cluster_name,
                                              tasks=tasks).get('tasks'):
            task_arn = task.get('taskArn')
            status = task.get('healthStatus')
            if status != 'HEALTHY':
                utils.print_warning(f'task {task_arn} status: {status}')
                return False
            healthy += 1
        if not next_token:
            break

    utils.print_info(f'{service_name} {healthy} tasks are healthy')
    return True
Пример #2
0
def delete_param(name, region):
    """Remove SSM parameter."""
    ssm = boto3.client('ssm', region)
    try:
        utils.print_info(json.dumps(ssm.delete_parameter(Name=name)))
    except botocore.exceptions.ClientError as e:
        if (e.response['Error']['Code'] == 'ParameterNotFound'):
            utils.print_error(f'Cannot find {name}')
            sys.exit(1)
            raise e
Пример #3
0
def poll_cluster_state(ecs_client,
                       cluster_name,
                       service_names,
                       polling_timeout,
                       stale_s=None):
    """
    Poll services in an ECS cluster for service stability
    """

    utils.print_info(
        f'Polling cluster services: {service_names} in cluster: {cluster_name} with timeout: {polling_timeout}s'
    )
    start_time = time.time()
    services = service_names.copy()
    is_2019_arn_format = services[0].startswith(f'{cluster_name}/')
    last_response = []
    while services:
        time.sleep(SLEEP_TIME_S)
        elapsed = time.time() - start_time
        if elapsed > polling_timeout:
            print_events(last_response)
            raise TimeoutException(
                f'Polling timed out! Check {service_names} status.')

        response = ecs_client.describe_services(cluster=cluster_name,
                                                services=services)
        last_response = response
        if not response.get('services'):
            utils.print_warning(
                'describe_services got an empty services response')
            continue
        for service_response in response.get('services'):
            if stale_s:
                # check that the service has started to change based on events
                if not has_recent_event(service_response, start_time, stale_s):
                    continue
            service_name = service_response.get('serviceName')
            is_active = service_response.get('desiredCount') > 0

            if service_is_stable(service_response):
                # only check services that are active (desiredCount > 0)
                if is_active and not tasks_are_healthy(
                        ecs_client, cluster_name, service_name):
                    utils.print_warning(
                        f'{service_name} tasks are still not healthy')
                    continue
                if is_2019_arn_format:
                    services.remove(f'{cluster_name}/{service_name}')
                else:
                    services.remove(service_name)
                elapsed = int(time.time() - start_time)
                utils.print_success(
                    f'{service_name} tasks are healthy. Elapsed: {elapsed}s')
Пример #4
0
def encrypt(data, alias, context, region):
    """generates a kms encrypted data blob"""
    if isinstance(data, str):
        plaintext = str.encode(data, 'ascii')
    else:
        plaintext = data
    client = boto3.client('kms', region)
    key_id = get_kms_key_id(alias, region)
    kms_encryption = client.encrypt(KeyId=key_id,
                                    Plaintext=plaintext,
                                    EncryptionContext=context)
    utils.print_info(
        f'Encryption using keyId {key_id} with context: {context}')
    return base64.b64encode(kms_encryption['CiphertextBlob']).decode('ascii')
Пример #5
0
def get_already_updated_instances(ecs_response, ami_id):
    instances = []

    for container_instance in ecs_response.get('containerInstances'):
        instance_id = container_instance.get('ec2InstanceId')
        status = container_instance.get('status')
        if status == 'DRAINING':
            # unexpected but we should proceed with terminating it
            # because we already verified that the services were in a steady
            # state.
            utils.print_warning(f'{instance_id} was already draining')
            continue
        this_ami_id = get_ami_id(container_instance)
        utils.print_info(f'Instance to drain: {instance_id}/{this_ami_id}')
        if this_ami_id == ami_id:
            utils.print_warning(
                f'{instance_id} already uses ami_id {ami_id}. Skipping.')
            instances.append(instance_id)
    return instances
Пример #6
0
def poll_deployment_state(ecs_client,
                          cluster_name,
                          service_name,
                          polling_timeout,
                          stale_s=None):
    """
    Poll service in an ECS cluster for a complete deployment.
    """

    utils.print_info(
        f'Polling for deploy state service: {service_name} in cluster: {cluster_name}'
    )
    start_time = time.time()
    last_response = []
    while True:
        time.sleep(SLEEP_TIME_S)
        if (time.time() - start_time) > polling_timeout:
            if last_response: print_events(last_response)
            raise TimeoutException(
                f'Polling timed out! Check {service_name} status.')
        response = ecs_client.describe_services(cluster=cluster_name,
                                                services=[service_name])
        last_response = response
        if not response.get('services'):
            utils.print_warning(
                'describe_services got an empty services response')
            continue
        service_response = response.get('services')[0]

        deployments = service_response.get('deployments')
        if deployment_is_stable(deployments[0], start_time, stale_s):
            # double check that tasks are healthy
            if not tasks_are_healthy(ecs_client, cluster_name, service_name):
                utils.print_warning(
                    f'{service_name} tasks are still not healthy')
                continue
            elapsed = int(time.time() - start_time)
            utils.print_success(
                f'{service_name} deploy is complete. Elapsed: {elapsed}s')
            break
Пример #7
0
def put_param(name,
              value,
              region,
              kms_key_alias=None,
              overwrite=False,
              plaintext=True):
    """Store the name and value"""
    ssm = boto3.client('ssm', region)

    try:
        if kms_key_alias:
            kms_key = kms.get_kms_key_id(kms_key_alias, region)
            if not kms_key:
                raise ParamException(
                    f'No key found for alias {kms_key_alias} {region}')
            result = ssm.put_parameter(Name=name,
                                       Description=name,
                                       Value=value,
                                       Type='SecureString',
                                       KeyId=kms_key,
                                       Overwrite=overwrite)
        else:
            utils.print_warning('Creating without encryption')
            result = ssm.put_parameter(Name=name,
                                       Description=name,
                                       Value=value,
                                       Type='String',
                                       Overwrite=overwrite)

        utils.print_info(json.dumps(result))
    except botocore.exceptions.ClientError as e:
        if (e.response['Error']['Code'] == 'ParameterAlreadyExists'):
            utils.print_error(
                f'setting "{name}" already exists, use -f to overwrite.')
            sys.exit(1)
        raise e
Пример #8
0
def rolling_replace_instances(ecs, ec2, cluster_name, batches, ami_id, force, drain_timeout_s):

    replace_start_time = time.time()
    services = get_services(ecs, cluster_name)
    if not services:
        raise RollingException('No services found in cluster. exiting.')
    utils.print_info(
        f'Checking cluster {cluster_name}, services {str(services)} are stable'
    )
    ecs_utils.poll_cluster_state(
        ecs, cluster_name, services, polling_timeout=120
    )
    instances = get_container_instance_arns(ecs, cluster_name)
    # batches determines the number of instances you want to replace at once.
    # Choose conservatively, as this process temporarily reduces your capacity.
    # But note each batch can be time consuming (up to 10m per batch)

    batch_count = math.ceil(len(instances) / batches)
    utils.print_info(f'You have {len(instances)} instances.')
    utils.print_info(f'Terminating in batches of {batch_count}')
    if len(instances) <= batch_count:
        utils.print_warning(
            f'Terminating {batch_count} instances will cause downtime.'
        )
        if not force:
            raise RollingException('Quitting, use --force to over-ride.')
    instance_batches = batch_instances(instances, batch_count)
    for to_drain in instance_batches:
        if len(to_drain) > 100:
            utils.print_error('Batch size exceeded 100, try using more batches.')
            raise RollingException(
                f'Quitting, batch size exceeded 100: {batch_count}.'
            )
        response = ecs.describe_container_instances(
            cluster=cluster_name, containerInstances=to_drain)

        if not response.get('containerInstances'):
            raise RollingException('No containerInstances found.')

        # don't drain or teriminate any instances that are already up to date
        # (if the user provided the --ami-id flag)
        done_instances = get_already_updated_instances(response, ami_id)
        if len(done_instances) == len(to_drain):
            # move on if the whole batch is already up to date
            continue

        # drain instances in this batch
        ecs.update_container_instances_state(cluster=cluster_name,
                                             status='DRAINING',
                                             containerInstances=to_drain)
        utils.print_info(f'Wait for drain to complete with {drain_timeout_s}s timeout...')
        start_time = time.time()
        while len(done_instances) < len(to_drain):
            if (time.time() - start_time) > drain_timeout_s:
                raise RollingTimeoutException('Waiting for instance to complete draining. Giving up.')
            time.sleep(SLEEP_TIME_S)
            response = ecs.describe_container_instances(
                cluster=cluster_name, containerInstances=to_drain)
            for container_instance in response.get('containerInstances'):
                instance_id = container_instance.get('ec2InstanceId')
                running_tasks = container_instance.get('runningTasksCount')
                if running_tasks > 0:
                    PRINT_PROGRESS()
                    continue
                if instance_id not in done_instances:
                    utils.print_info(f'{instance_id} is drained, terminate!')
                    ec2.terminate_instances(InstanceIds=[instance_id])
                    done_instances.append(instance_id)
        # new instance will take as much as 10m to go into service
        # then we wait for ECS to resume a steady state before moving on
        ecs_utils.poll_cluster_state(ecs, cluster_name,
                                     services, polling_timeout=drain_timeout_s)
    utils.print_success(f'EC2 instance replacement process complete! {int(time.time() - replace_start_time)}s elapsed')
Пример #9
0
    for blockuid in blocks:
        block = blocks[blockuid]
        if block["@type"] == "image":
            if "@@images" in block["url"]:
                if block["url"].split("/")[-1] == "large":
                    block["size"] = "l"
                block["url"] = block["url"].split("/@@images")[0]
    return blocks


if __name__ == "__main__":
    pc = api.portal.get_tool("portal_catalog")
    for brain in pc.unrestrictedSearchResults(
            object_provides=IBlocks.__identifier__, path=PATH):
        try:
            obj = brain.getObject()
        except KeyError:
            obj = None

        if obj:
            blocks = obj.blocks

            utils.print_info(f"Processing: {obj.absolute_url()}")

            # Search for any image block and replaces scales
            blocks = remove_image_scales(blocks)

            obj.blocks = blocks

    transaction.commit()