示例#1
0
def iterate_to_track_progress_of_inventory_jobs():
    """ Iterator for monitoring progress all of inventory jobs

    Returns:
        json -- Lambda event input for the next iteration
    """
    index = ddb.get_iterator_index(
        config.ServiceParameters.inventory_monitor_iterator_name)
    new_index = index - 1
    should_continue_loop = True
    input_parameters = ddb.get_input_parameters()
    wait_time_in_seconds = config.ServiceParameters.wait_time_in_seconds
    if input_parameters.is_smoke_test:
        should_continue_loop = False
    elif are_inventories_ready(input_parameters):
        should_continue_loop = False
    elif new_index < 1:
        logging.warning('inventory jobs did not finish in time. moving forward with all existing inventories files.')
        should_continue_loop = False
    logging.info(
        f'new_index:{new_index} should_continue_loop:{should_continue_loop} wait_time_in_seconds:{wait_time_in_seconds}')
    ddb.store_iterator_index(
        config.ServiceParameters.inventory_monitor_iterator_name,
        new_index)
    return {
        config.ServiceParameters.iterator_index_key_name: new_index,
        config.ServiceParameters.iterator_continue_key_name: should_continue_loop,
        config.ServiceParameters.iterator_wait_time_in_seconds_key_name: wait_time_in_seconds,
        config.ServiceParameters.iterator_step_key_name: config.ServiceParameters.iterator_step
    }
def update_helper(account_id):
    """ Helper function for updating inventory status for a given account

    Arguments:
        account_id {string} -- AWS account id
    """
    input_parameters = ddb.get_input_parameters()
    s3.update_source_bucket_inventory_status(input_parameters, account_id)
def delete_helper(account_id):
    """ Helper function for deleting inventory configurations from source buckets

    Arguments:
        account_id {string} -- AWS account id
    """
    source_buckets_ddb = ddb.get_source_buckets(account_id)
    input_parameters = ddb.get_input_parameters()
    s3.remove_bucket_inventory_configurations(input_parameters,
                                              source_buckets_ddb)
示例#4
0
def return_wait_time():
    """ Return wait time for Athena query execution

    Returns:
        integer -- time in seconds to wait
    """
    input_parameters = ddb.get_input_parameters()
    wait_time_in_seconds = config.ServiceParameters.wait_time_in_seconds_smoke_test if input_parameters.is_smoke_test else config.ServiceParameters.wait_time_in_seconds
    return {
        config.ServiceParameters.iterator_wait_time_in_seconds_key_name:
        wait_time_in_seconds
    }
示例#5
0
def create_inventory_configuration_helper(account_id):
    """ Helper function to create inventory configurations

    Arguments:
        account_id {string} -- AWS account id
    """
    input_parameters = ddb.get_input_parameters()
    source_buckets = get_source_buckets(input_parameters, account_id)
    ddb.store_source_buckets(source_buckets)
    create_bucket_inventory_configurations(
        input_parameters.run_id,
        source_buckets)
示例#6
0
def initialize():
    """ Initialize the iterator for repeating a task for multiple AWS accounts

    Returns:
        json -- Lambda execution event
    """
    input_parameters = ddb.get_input_parameters()
    account_ids = [account_config.id for account_config in input_parameters.accounts]
    return {
        config.ServiceParameters.iterator_account_ids_key_name: account_ids,
        config.ServiceParameters.iterator_index_key_name: 0,
        config.ServiceParameters.iterator_continue_key_name: True
    }
示例#7
0
def configure_iterator():
    """ Configure iterator for deleting destination buckets

    Returns:
        json -- Lambda input for the next Step Function task
    """
    input_parameters = ddb.get_input_parameters()
    regions = input_parameters.supported_regions

    return {
        config.ServiceParameters.iterator_regions_key_name: regions,
        config.ServiceParameters.iterator_index_key_name: 0,
        config.ServiceParameters.iterator_current_attempt_count_key_ame: 0,
        config.ServiceParameters.iterator_continue_key_name: True
    }
示例#8
0
def process_inventory_object(event):
    """ Process an inventory object once it has been stored in a staging destination bucket

    Arguments:
        event {json} -- S3 notification event
    """
    if 'Records' in event:
        input_parameters = ddb.get_input_parameters()
        for record in event['Records']:
            if 'body' in record:
                body_json = json.loads(record['body'])
                if 'Records' in body_json:
                    for record in body_json['Records']:
                        if 'eventName' in record and record['eventName'] == 'ObjectCreated:Put':
                            source_bucket_name = record['s3']['bucket']['name']
                            if is_destination_bucket_name(input_parameters, source_bucket_name):
                                source_object_key = record['s3']['object']['key']
                                object_key_parts = source_object_key.split('/')
                                object_key_parts_len = len(object_key_parts)

                                bucket_account_id = object_key_parts[0].lower()
                                bucket_region = object_key_parts[1].lower()
                                bucket_name = object_key_parts[2].lower()
                                logging.info(f'source_object_key:{source_object_key} bucket_account_id:{bucket_account_id} bucket_name:{bucket_name}')
                                if ddb.is_inprogress_inventory_job(bucket_account_id, bucket_name):
                                    if object_key_parts_len > 4:
                                        if utility.compare_strings(object_key_parts[object_key_parts_len - 1], 'manifest.checksum'):
                                            ddb.update_source_bucket_inventory_status(object_key_parts[2], ddb.BucketInventoryStatus.done)
                                            remove_bucket_inventory_configuration(
                                                input_parameters.run_id,
                                                bucket_account_id,
                                                bucket_region, bucket_name
                                            )
                                        elif utility.compare_strings(object_key_parts[object_key_parts_len - 2], 'data'):
                                            copy_inventory_object_into_consolidation_bucket(
                                                input_parameters.run_id,
                                                source_bucket_name,
                                                source_object_key,
                                                config.DeploymentDetails.consolidated_inventory_bucket_name
                                            )
                                else:
                                    logging.warning('Received an unexpected SQS notification')
def cleanup(event):
    """ Delete staging destination CloudFormation stacks

    Arguments:
        event {json} -- Lambda execution event

    Returns:
        json -- Lambda input for the next Step Function task
    """
    iterator_info = event
    if config.ServiceParameters.iterator_key_name in event:
        iterator_info = event[config.ServiceParameters.iterator_key_name]
    should_continue = True
    current_attempt_count = iterator_info[config.ServiceParameters.iterator_current_attempt_count_key_ame]
    regions = iterator_info[config.ServiceParameters.iterator_regions_key_name]
    index = iterator_info[config.ServiceParameters.iterator_index_key_name]
    if (current_attempt_count >= config.ServiceParameters.max_attempt_count) or (len(regions) <= index):
        should_continue = False
    else:
        region = regions[index]
        input_parameters = ddb.get_input_parameters()
        worker_process = Process(
            target=s3.delete_regional_s3_inventory_bucket,
            args=(input_parameters, region,))

        worker_process.start()
        timeout_in_seconds = (config.ServiceParameters.lambda_timeout_in_minutes - 2) * 60
        worker_process.join(timeout_in_seconds)
        if not worker_process.is_alive(): # Cleanup finished
            index = index + 1
            current_attempt_count = 0
        else: # Need more time to cleanup regional inventory objects
            worker_process.terminate()
            current_attempt_count = current_attempt_count + 1

    return {
        config.ServiceParameters.iterator_regions_key_name: regions,
        config.ServiceParameters.iterator_index_key_name: index,
        config.ServiceParameters.iterator_current_attempt_count_key_ame: current_attempt_count,
        config.ServiceParameters.iterator_continue_key_name: should_continue
    }
示例#10
0
def cleanup_and_verify():
    """ Cleanup smoke test resources and verify test resules
    """
    input_parameters = ddb.get_input_parameters()
    if input_parameters.is_smoke_test:
        smoketest.cleanup_and_verify()
示例#11
0
def send_welcome_email():
    """ Send welcome email
    """
    queries = ddb.get_athena_queries()
    query_details_html = '''
    <html>
        <head>
            <style>
                table, th, td {
                    border: 1px solid black;
                    border-collapse: collapse;
                }
                .success {
                    background-color: rgba(0, 255, 0, 0.2);
                }
                .failed {
                    background-color: rgba(255, 0, 0, 0.2);
                }
                .neutral {
                    background-color:white;
                }
            </style>
        </head>
    <body>
        <body>
        <p>
            Your latest <a href="https://github.com/kurmiashish/S3Insights/blob/master/docs/user_guide.md#how-to-initiate-a-state-machine-execution">S3Insights Harvester execution</a> generated this welcome email. You can learn more about the platform <a href="https://github.com/kurmiashish/S3Insights">here</a>.
        </p>
    '''
    intro_html = 'In this run, the following Athena queries were executed. You can run additional Athena queries manually by following <a href="https://github.com/kurmiashish/S3Insights/blob/master/docs/user_guide.md#running-athena-analysis-queries-manually">these instructions</a>. Please refer to the <a href="https://github.com/kurmiashish/S3Insights/blob/master/docs/troubleshooting.md#athena-failures">Athena troubleshooting document</a> if any of the following Athena queries have failed.'
    input_parameters = ddb.get_input_parameters()
    if input_parameters.is_smoke_test:
        intro_html = intro_html + ' <b>As this is a smoke test run, the following links may not work as the platform may have deleted the Athena resources.</b>'
    query_details_html = query_details_html + "<h4>Analysis Queries</h4><p>" + intro_html + "</p>"

    query_details_html = query_details_html + '''
    <table>
            <tr>
                <th>Name</th>
                <th>Query</th>
                <th>Status</th>
                <th>Execution Details</th>
            </tr>
    '''
    succeeded_status_value = 'succeeded'
    done_status_value = 'done'
    bucket_is_empty_status_value = 'bucket_is_empty'
    everything_else_status_value = 'everything_else'

    success_css_class_name = 'success'
    failed_css_class_name = 'failed'
    neutral_css_class_name = 'neutral'

    css_mappings = {
        succeeded_status_value: success_css_class_name,
        done_status_value: success_css_class_name,
        everything_else_status_value: failed_css_class_name,
        bucket_is_empty_status_value: neutral_css_class_name
    }

    for state in [succeeded_status_value, everything_else_status_value]:
        for query in queries:
            should_include = False
            if not utility.compare_strings(state,
                                           everything_else_status_value):
                should_include = utility.compare_strings(state, query.state)
            else:
                should_include = not utility.compare_strings(
                    succeeded_status_value, query.state)

            if should_include:
                css_class_name = css_mappings[state]
                query_web_console_link = 'https://console.aws.amazon.com/athena/home?region={0}#query/history/{1}'.format(
                    config.DeploymentDetails.region, query.query_execution_id)
                query_web_console_link_html = '<a href={0}> Web Console Link </a>'.format(
                    query_web_console_link)
                query_details_html = query_details_html + f'<tr class="{css_class_name}"><td>' + ' </td><td>'.join(
                    [
                        query.query_name, query.actual_query, query.state,
                        query_web_console_link_html
                    ]) + '</td></tr>'

    query_details_html = query_details_html + '</table><br>'
    bucket_html_table = '''
            <h4>Source buckets</h4>
            <p>
                The following buckets are included in the analysis. If the platform failed to generate inventory for any of the buckets (i.e., if any entry in the following table is highlighted in Red), please consult the <a href="https://github.com/kurmiashish/S3Insights/blob/master/docs/troubleshooting.md#inventory-generation-failures">inventory generation troubleshooting document</a>.
            </p>
            <table>
            <tr>
                <th>Account</th>
                <th>Region</th>
                <th>Bucket</th>
                <th>Inventory Status</th>
            </tr>
    '''

    source_buckets = ddb.get_source_buckets()
    for account_id in source_buckets:
        # Let's calculate the value for rowspan
        account_row_span = sum([
            len(source_buckets[account_id][region])
            for region in source_buckets[account_id]
        ])
        inserted_account_row = False
        for region in source_buckets[account_id]:
            region_row_span = len(source_buckets[account_id][region])
            inserted_region_row = False
            for inventory_status in [
                    done_status_value, bucket_is_empty_status_value,
                    everything_else_status_value
            ]:
                for bucket in source_buckets[account_id][region]:
                    should_include = False
                    if not utility.compare_strings(
                            inventory_status, everything_else_status_value):
                        should_include = utility.compare_strings(
                            inventory_status, bucket.inventory_status)
                    else:
                        already_included = utility.compare_strings(
                            done_status_value, bucket.inventory_status
                        ) or utility.compare_strings(
                            bucket_is_empty_status_value,
                            bucket.inventory_status)
                        should_include = not already_included

                    if should_include:
                        css_class_name = css_mappings[inventory_status]
                        row = "<tr>"
                        if not inserted_account_row:
                            inserted_account_row = True
                            row = row + "<td rowspan={0}>{1}</td>".format(
                                account_row_span, account_id)
                        if not inserted_region_row:
                            inserted_region_row = True
                            row = row + "<td rowspan={0}>{1}</td>".format(
                                region_row_span, region)
                        row = row + f'<td class="{css_class_name}">{bucket.name}</td>'
                        row = row + f'<td class="{css_class_name}">{bucket.inventory_status}</td></tr>'
                        bucket_html_table = bucket_html_table + row
    bucket_html_table = bucket_html_table + "</table>"
    query_details_html = query_details_html + bucket_html_table

    input_parameters_str = json.dumps(
        input_parameters,
        default=lambda input_parameters: input_parameters.__dict__,
        sort_keys=True,
        indent=4,
        separators=(',', ': '))

    input_parameters_section = '''
<br>
<h4>Input Parameters</h4>
<p>
<div style="white-space: pre-wrap;">
The execution parameters used for this run are given below.
{0}
</div>
</p>
    '''.format(input_parameters_str)
    query_details_html = query_details_html + input_parameters_section + '</body></html>'
    logging.info(f'welcome email content:{query_details_html}')

    input_parameters = ddb.get_input_parameters()
    ses_client = awshelper.get_client(awshelper.ServiceName.ses)
    response = ses_client.send_email(
        Destination={
            'ToAddresses': input_parameters.recipient_email_addresses,
        },
        Message={
            'Body': {
                'Html': {
                    'Charset': 'UTF-8',
                    'Data': query_details_html,
                },
                'Text': {
                    'Charset': 'UTF-8',
                    'Data': query_details_html,
                },
            },
            'Subject': {
                'Charset': 'UTF-8',
                'Data': 'Your S3Insights snapshot is ready',
            },
        },
        Source=input_parameters.sender_email_address,
    )
    logging.info(f'send email api response:{response}')
示例#12
0
def cleanup_and_verify():
    """ Cleanup smoke test resources and verify that smoke test worked as expected

    Raises:
        utility.S3InsightsException: If the test fails
    """
    input_parameters = ddb.get_input_parameters()
    database_name = input_parameters.athena_database_name
    run_id = input_parameters.run_id
    is_manual_cleanup = ddb.is_manual_cleanup()
    athena_client = awshelper.get_client(awshelper.ServiceName.athena)
    try:
        athena.run_query(
            run_id, athena_client,
            'DROP TABLE  {0}'.format(input_parameters.athena_table_name),
            input_parameters.athena_database_name, True)
    except utility.S3InsightsException as e:
        logging.info('received exception while deleting athena table: {e}')
        if is_manual_cleanup:
            logging.info(
                'ignoring the exception as this is a manual cleanup operation')
        else:
            raise
    try:
        athena.run_query(
            run_id, athena_client,
            'DROP DATABASE  {0}'.format(input_parameters.athena_database_name),
            None, True)
    except utility.S3InsightsException as e:
        logging.info('received exception while deleting athena table: {e}')
        if is_manual_cleanup:
            logging.info(
                'ignoring the exception as this is a manual cleanup operation')
        else:
            raise

    s3_resource = awshelper.get_resource(awshelper.ServiceName.s3)
    s3_athena_output_prefix = athena.get_s3_output_location_prefix(run_id)
    consolidated_bucket = s3_resource.Bucket(
        config.DeploymentDetails.consolidated_inventory_bucket_name)
    athena_outout_objects = consolidated_bucket.objects.filter(
        Prefix=s3_athena_output_prefix)
    athena_outout_objects.delete()

    did_smoke_test_fail = False
    if len(run_id) > 0:
        s3_inventory_prefix = s3.get_inventory_prefix_at_consolidated_bucket(
            run_id)

        objects = consolidated_bucket.objects.filter(
            Prefix=s3_inventory_prefix)
        objects_count = 0
        for obj in objects:
            objects_count += 1
        logging.info(
            f'Number of objects that were created in the consolidation bucket:{objects_count}'
        )
        objects = consolidated_bucket.objects.filter(
            Prefix=s3_inventory_prefix)
        objects.delete()
        if objects_count == 0:
            did_smoke_test_fail = True
    else:
        did_smoke_test_fail = True
    if is_manual_cleanup is not True and did_smoke_test_fail:
        raise utility.S3InsightsException(
            'smoke test failed. Clean up operation itself might have succeeded.'
        )
def create_if_required():
    """ Create smoke test resources if required
    """
    input_parameters = ddb.get_input_parameters()
    if input_parameters.is_smoke_test:
        smoketest.simulate(input_parameters)
示例#14
0
def create_resources():
    """ Create Athena resources once all inventory objects have been partitioned
        and stored in the consolidation bucket.
    """

    input_parameters = ddb.get_input_parameters()
    database_name = input_parameters.athena_database_name
    table_name = input_parameters.athena_table_name
    athena_client = awshelper.get_client(awshelper.ServiceName.athena)
    create_database_query = 'CREATE DATABASE IF NOT EXISTS {0}'.format(
        database_name)
    logging.info(f'create database query={create_database_query}')
    run_id = input_parameters.run_id
    run_query(run_id, athena_client, create_database_query, None, True)

    athena_table_format = """
        CREATE EXTERNAL TABLE {0}(
        bucket string,
        key string,
        version_id string,
        is_latest boolean,
        is_delete_marker boolean,
        size bigint,
        last_modified_date timestamp,
        e_tag string,
        storage_class string,
        is_multipart_uploaded boolean,
        replication_status string,
        encryption_status string,
        object_lock_retain_until_date timestamp,
        object_lock_mode string,
        object_lock_legal_hold_status string)
        PARTITIONED BY (
        account string,
        region string,
        bucketname string)
        ROW FORMAT SERDE
        'org.apache.hadoop.hive.ql.io.orc.OrcSerde'
        STORED AS INPUTFORMAT
        'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat'
        OUTPUTFORMAT
        'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'
        LOCATION
        's3://{1}/{2}'
    """

    create_table_query = athena_table_format.format(
        table_name,
        config.DeploymentDetails.consolidated_inventory_bucket_name,
        s3.get_inventory_prefix_at_consolidated_bucket(run_id))
    logging.info(f'create table query={create_table_query}')

    run_query(run_id, athena_client, create_table_query, database_name, True)

    run_query(run_id, athena_client,
              'MSCK REPAIR TABLE {0}'.format(table_name), database_name, True)

    query_execution_details = {}
    for athena_query in input_parameters.athena_queries:
        execution_id = run_query(
            run_id, athena_client,
            athena_query.query.replace("{ATHENA_TABLE}", table_name),
            database_name, False)

        query_execution_details[execution_id] = athena_query
        logging.info('Execution Id: {0} Name: {1} Query:{2}'.format(
            execution_id, athena_query.name, athena_query.query))
    ddb.store_athena_queries(query_execution_details)