Пример #1
0
def test_collector(historical_role, buckets, mock_lambda_environment,
                   swag_accounts, current_s3_table):
    from historical.s3.collector import handler

    now = datetime.utcnow().replace(tzinfo=None, microsecond=0)
    create_event = CloudwatchEventFactory(
        detail=DetailFactory(requestParameters={"bucketName": "testbucket1"},
                             source="aws.s3",
                             eventName="CreateBucket",
                             eventTime=now))
    data = json.dumps(create_event, default=serialize)
    data = KinesisRecordsFactory(
        records=[KinesisRecordFactory(kinesis=KinesisDataFactory(data=data))])
    data = json.dumps(data, default=serialize)
    data = json.loads(data)

    handler(data, None)
    result = list(CurrentS3Model.query("arn:aws:s3:::testbucket1"))
    assert len(result) == 1
    # Verify that the tags are duplicated in the top level and configuration:
    assert len(result[0].Tags.attribute_values) == len(
        result[0].configuration.attribute_values["Tags"]) == 1
    assert result[0].Tags.attribute_values["theBucketName"] == \
           result[0].configuration.attribute_values["Tags"]["theBucketName"] == "testbucket1"  # noqa

    # Polling (make sure the date is included):
    polling_event = CloudwatchEventFactory(
        detail=DetailFactory(requestParameters={
            "bucketName": "testbucket1",
            "creationDate": now
        },
                             source="aws.s3",
                             eventName="DescribeBucket",
                             eventTime=now))
    data = json.dumps(polling_event, default=serialize)
    data = KinesisRecordsFactory(
        records=[KinesisRecordFactory(kinesis=KinesisDataFactory(data=data))])
    data = json.dumps(data, default=serialize)
    data = json.loads(data)

    handler(data, None)
    assert CurrentS3Model.count() == 1

    # Load the config and verify the polling timestamp is in there:
    result = list(CurrentS3Model.query("arn:aws:s3:::testbucket1"))
    assert result[0].configuration["CreationDate"] == now.isoformat() + "Z"

    # And deletion:
    delete_event = CloudwatchEventFactory(
        detail=DetailFactory(requestParameters={"bucketName": "testbucket1"},
                             source="aws.s3",
                             eventName="DeleteBucket",
                             eventTime=now))
    data = json.dumps(delete_event, default=serialize)
    data = KinesisRecordsFactory(
        records=[KinesisRecordFactory(kinesis=KinesisDataFactory(data=data))])
    data = json.dumps(data, default=serialize)
    data = json.loads(data)
    handler(data, None)
    assert CurrentS3Model.count() == 0
Пример #2
0
def test_collector(historical_role, buckets, mock_lambda_environment,
                   swag_accounts, current_s3_table):
    """Test the Collector."""
    from historical.s3.models import CurrentS3Model
    from historical.s3.collector import handler

    now = datetime.utcnow().replace(tzinfo=None, microsecond=0)
    create_event = CloudwatchEventFactory(
        detail=DetailFactory(requestParameters={"bucketName": "testbucket1"},
                             eventSource="aws.s3",
                             eventName="CreateBucket",
                             eventTime=now))
    data = json.dumps(create_event, default=serialize)
    data = RecordsFactory(records=[SQSDataFactory(body=data)])
    data = json.dumps(data, default=serialize)
    data = json.loads(data)

    handler(data, mock_lambda_environment)
    result = list(CurrentS3Model.query("arn:aws:s3:::testbucket1"))
    assert len(result) == 1
    assert result[0].Tags.attribute_values["theBucketName"] == "testbucket1"
    assert result[0].eventSource == "aws.s3"

    # Polling (make sure the date is included):
    polling_event = CloudwatchEventFactory(
        detail=DetailFactory(requestParameters={
            "bucketName": "testbucket1",
            "creationDate": now
        },
                             eventSource="historical.s3.poller",
                             eventName="PollS3",
                             eventTime=now))
    data = json.dumps(polling_event, default=serialize)
    data = RecordsFactory(records=[SQSDataFactory(body=data)])
    data = json.dumps(data, default=serialize)
    data = json.loads(data)

    handler(data, mock_lambda_environment)
    assert CurrentS3Model.count() == 1

    # Load the config and verify the polling timestamp is in there:
    result = list(CurrentS3Model.query("arn:aws:s3:::testbucket1"))
    assert result[0].configuration["CreationDate"] == now.isoformat() + "Z"
    assert result[0].eventSource == "historical.s3.poller"

    # And deletion:
    delete_event = CloudwatchEventFactory(
        detail=DetailFactory(requestParameters={"bucketName": "testbucket1"},
                             eventSource="aws.s3",
                             eventName="DeleteBucket",
                             eventTime=now))
    data = json.dumps(delete_event, default=serialize)
    data = RecordsFactory(records=[SQSDataFactory(body=data)])
    data = json.dumps(data, default=serialize)
    data = json.loads(data)
    handler(data, mock_lambda_environment)
    assert CurrentS3Model.count() == 0
Пример #3
0
def test_current_table(current_s3_table):
    from historical.s3.models import CurrentS3Model

    CurrentS3Model(**S3_BUCKET).save()

    items = list(CurrentS3Model.query('arn:aws:s3:::testbucket1'))

    assert len(items) == 1
    assert isinstance(items[0].ttl, int)
    assert items[0].ttl > 0
Пример #4
0
def test_current_table(current_s3_table):  # pylint: disable=W0613
    """Tests for the Current PynamoDB model."""
    from historical.s3.models import CurrentS3Model

    CurrentS3Model(**S3_BUCKET).save()

    items = list(CurrentS3Model.query('arn:aws:s3:::testbucket1'))

    assert len(items) == 1
    assert isinstance(items[0].ttl, int)
    assert items[0].ttl > 0
Пример #5
0
def test_collector_on_deleted_bucket(historical_role, buckets, mock_lambda_environment, swag_accounts,
                                     current_s3_table):
    from historical.s3.collector import handler

    # If an event arrives on a bucket that is deleted, then it should skip
    # and wait until the Deletion event arrives.
    create_event = CloudwatchEventFactory(
        detail=DetailFactory(
            requestParameters={
                "bucketName": "not-a-bucket"
            },
            source="aws.s3",
            eventName="PutBucketPolicy",
        )
    )
    create_event_data = json.dumps(create_event, default=serialize)
    data = KinesisRecordsFactory(
        records=[
            KinesisRecordFactory(
                kinesis=KinesisDataFactory(data=create_event_data))
        ]
    )
    data = json.dumps(data, default=serialize)
    data = json.loads(data)

    handler(data, None)
    assert CurrentS3Model.count() == 0
Пример #6
0
def test_lite_bucket_schema_for_events(historical_table, bucket_event):
    old_fields = CONFIG.exclude_fields
    CONFIG.exclude_fields = "Name,_version,Grants,LifecycleRules,Logging,Policy,Tags,Versioning,Website,Cors," \
                            "Notifications,Acceleration,Replication,CreationDate,AnalyticsConfigurations," \
                            "MetricsConfigurations,InventoryConfigurations".split(",")

    all_buckets = CurrentS3Model.scan()
    generated_report = S3ReportSchema(strict=True).dump({"all_buckets": all_buckets}).data

    generated_report["all_buckets"] = []
    process_dynamodb_record(bucket_event["Records"][0], generated_report)

    lite_report = S3ReportSchema(strict=True).dump(generated_report).data

    assert lite_report["generated_date"]
    assert lite_report["s3_report_version"] == CONFIG.s3_reports_version
    assert not lite_report.get("all_buckets")

    assert lite_report["buckets"]["testbucketNEWBUCKET"]
    assert len(lite_report["buckets"]) == 11

    for bucket in lite_report["buckets"].values():
        keys = bucket.keys()
        for excluded in CONFIG.exclude_fields:
            assert excluded not in keys

        assert bucket["AccountId"] == "123456789012"
        assert bucket["Region"] == "us-east-1"

    # Clean-up:
    CONFIG.exclude_fields = old_fields
def test_light_bucket_schema(historical_table):
    old_fields = CONFIG.exclude_fields
    CONFIG.exclude_fields = "Name,_version,Grants,LifecycleRules,Logging,Policy,Tags,Versioning,Website,Cors," \
                            "Notifications,Acceleration,Replication,CreationDate,AnalyticsConfigurations," \
                            "MetricsConfigurations,InventoryConfigurations".split(",")

    all_buckets = CurrentS3Model.scan()
    generated_file = S3ReportSchema(strict=True).dump({
        "all_buckets":
        all_buckets
    }).data

    assert generated_file["generated_date"]
    assert generated_file["s3_report_version"] == CONFIG.s3_reports_version
    assert len(generated_file["buckets"]) == 10
    assert not generated_file.get("all_buckets")

    for bucket in generated_file["buckets"].values():
        keys = bucket.keys()
        for excluded in CONFIG.exclude_fields:
            assert excluded not in keys

        assert bucket["AccountId"] == "123456789012"
        assert bucket["Region"] == "us-east-1"

    # Clean-up:
    CONFIG.exclude_fields = old_fields
Пример #8
0
def test_bucket_schema(historical_table):
    all_buckets = CurrentS3Model.scan()
    generated_file = S3ReportSchema(strict=True).dump({"all_buckets": all_buckets}).data

    assert generated_file["generated_date"]
    assert generated_file["s3_report_version"] == CONFIG.s3_reports_version
    assert not generated_file.get("all_buckets")

    for name, value in generated_file["buckets"].items():
        assert value["AccountId"] == "123456789012"
        assert value["Region"] == "us-east-1"
        assert value["Tags"]["theBucketName"] == name
        assert not value.get("_version")
        assert not value.get("Name")
Пример #9
0
def test_serialization():
    """Tests that the dictionary serialization for PynamoDB objects works properly."""
    from historical.s3.models import CurrentS3Model

    bucket = S3_BUCKET.copy()
    bucket['eventTime'] = datetime(
        year=2017, month=5, day=12, hour=10, minute=30,
        second=0).isoformat() + 'Z'

    bucket = CurrentS3Model(**bucket)
    dictionary = dict(bucket)

    assert dictionary['version'] == VERSION
    assert dictionary['configuration']['LifecycleRules'][0]['Prefix'] is None
Пример #10
0
def process_durable_event(record, s3_report):
    """Processes a group of Historical Durable Table events."""
    if record.get(EVENT_TOO_BIG_FLAG):
        result = list(CurrentS3Model.query(record['arn']))

        # Is the record too big and also not found in the Current Table? Then delete it:
        if not result:
            record['item'] = {'configuration': {}, 'BucketName': record['arn'].split('arn:aws:s3:::')[1]}

        else:
            record['item'] = dict(result[0])

    if not record['item']['configuration']:
        log.debug(f"[ ] Processing deletion for: {record['item']['BucketName']}")
        s3_report["buckets"].pop(record['item']['BucketName'], None)
    else:
        log.debug(f"[ ] Processing: {record['item']['BucketName']}")
        s3_report["all_buckets"].append(record['item'])
Пример #11
0
def create_delete_model(record):
    """Create an S3 model from a record."""
    arn = "arn:aws:s3:::{}".format(cloudwatch.filter_request_parameters('bucketName', record))
    log.debug('[-] Deleting Dynamodb Records. Hash Key: {arn}'.format(arn=arn))

    data = {
        'arn': arn,
        'principalId': cloudwatch.get_principal(record),
        'userIdentity': cloudwatch.get_user_identity(record),
        'accountId': record['account'],
        'eventTime': record['detail']['eventTime'],
        'BucketName': cloudwatch.filter_request_parameters('bucketName', record),
        'Region': cloudwatch.get_region(record),
        'Tags': {},
        'configuration': {},
        'eventSource': record["detail"]["eventSource"]
    }

    return CurrentS3Model(**data)
Пример #12
0
def dump_report(commit=True):
    # Get all the data from DynamoDB:
    log.debug("Starting... Beginning scan.")
    all_buckets = CurrentS3Model.scan()

    generated_file = S3ReportSchema(strict=True).dump({
        "all_buckets":
        all_buckets
    }).data

    # Dump to S3:
    if commit:
        log.debug("Saving to S3.")

        # Replace <empty> with "" <-- Due to Pynamo/Dynamo issues...
        dump_to_s3(
            json.dumps(generated_file,
                       indent=4).replace("\"<empty>\"",
                                         "\"\"").encode("utf-8"))
    else:
        log.debug("Commit flag not set, not saving.")

    log.debug("Completed S3 report generation.")
Пример #13
0
def current_s3_table(dynamodb):
    yield CurrentS3Model.create_table(read_capacity_units=1,
                                      write_capacity_units=1,
                                      wait=True)
Пример #14
0
def generated_file(historical_table):
    all_buckets = CurrentS3Model.scan()
    return S3ReportSchema(strict=True).dumps({
        "all_buckets": all_buckets
    }).data.encode("utf-8")
Пример #15
0
def historical_table(current_s3_table):
    for x in range(0, 10):
        bucket = json.loads(S3_BUCKET.replace("{number}", "{}".format(x)))
        CurrentS3Model(**bucket).save()
Пример #16
0
def process_update_records(update_records):
    """Process the requests for S3 bucket update requests"""
    events = sorted(update_records, key=lambda x: x['account'])

    # Group records by account for more efficient processing
    for account_id, events in groupby(events, lambda x: x['account']):
        events = list(events)

        # Grab the bucket names (de-dupe events):
        buckets = {}
        for event in events:
            # If the creation date is present, then use it:
            bucket_event = buckets.get(
                event['detail']['requestParameters']['bucketName'], {
                    'creationDate':
                    event['detail']['requestParameters'].get('creationDate')
                })
            bucket_event.update(event['detail']['requestParameters'])

            buckets[event['detail']['requestParameters']
                    ['bucketName']] = bucket_event
            buckets[event['detail']['requestParameters']
                    ['bucketName']]['eventDetails'] = event

        # Query AWS for current configuration
        for b_name, item in buckets.items():
            LOG.debug(f'[~] Processing Create/Update for: {b_name}')
            # If the bucket does not exist, then simply drop the request --
            # If this happens, there is likely a Delete event that has occurred and will be processed soon.
            try:
                bucket_details = get_bucket(
                    b_name,
                    account_number=account_id,
                    include_created=(item.get('creationDate') is None),
                    assume_role=HISTORICAL_ROLE,
                    region=CURRENT_REGION)
                if bucket_details.get('Error'):
                    LOG.error(
                        f"[X] Unable to fetch details about bucket: {b_name}. "
                        f"The error details are: {bucket_details['Error']}")
                    continue

            except ClientError as cerr:
                if cerr.response['Error']['Code'] == 'NoSuchBucket':
                    LOG.warning(
                        f'[?] Received update request for bucket: {b_name} that does not '
                        'currently exist. Skipping.')
                    continue

                # Catch Access Denied exceptions as well:
                if cerr.response['Error']['Code'] == 'AccessDenied':
                    LOG.error(
                        f'[X] Unable to fetch details for S3 Bucket: {b_name} in {account_id}. Access is Denied. '
                        'Skipping...')
                    continue
                raise Exception(cerr)

            # Pull out the fields we want:
            data = {
                'arn':
                f'arn:aws:s3:::{b_name}',
                'principalId':
                cloudwatch.get_principal(item['eventDetails']),
                'userIdentity':
                cloudwatch.get_user_identity(item['eventDetails']),
                'userAgent':
                item['eventDetails']['detail'].get('userAgent'),
                'sourceIpAddress':
                item['eventDetails']['detail'].get('sourceIPAddress'),
                'requestParameters':
                item['eventDetails']['detail'].get('requestParameters'),
                'accountId':
                account_id,
                'eventTime':
                item['eventDetails']['detail']['eventTime'],
                'BucketName':
                b_name,
                'Region':
                bucket_details.pop('Region'),
                # Duplicated in top level and configuration for secondary index
                'Tags':
                bucket_details.pop('Tags', {}) or {},
                'eventSource':
                item['eventDetails']['detail']['eventSource'],
                'eventName':
                item['eventDetails']['detail']['eventName'],
                'version':
                VERSION
            }

            # Remove the fields we don't care about:
            del bucket_details['Arn']
            del bucket_details['GrantReferences']
            del bucket_details['_version']
            del bucket_details['Name']

            if not bucket_details.get('CreationDate'):
                bucket_details['CreationDate'] = item['creationDate']

            data['configuration'] = bucket_details

            current_revision = CurrentS3Model(**data)
            current_revision.save()
Пример #17
0
def test_historical_table_fixture(historical_table):
    assert CurrentS3Model.count() == 10
Пример #18
0
def test_snsproxy_dynamodb_differ(historical_role, current_s3_table,
                                  durable_s3_table, mock_lambda_environment,
                                  buckets):
    """
    This mostly checks that the differ is able to properly load the reduced dataset from the SNSProxy.
    """
    # Create the item in the current table:
    from historical.s3.collector import handler as current_handler
    from historical.s3.differ import handler as diff_handler
    from historical.s3.models import CurrentS3Model, DurableS3Model
    from historical.common.sns import shrink_sns_blob

    # Mock out the loggers:
    import historical.common.dynamodb
    old_logger = historical.common.dynamodb.log
    mocked_logger = MagicMock()
    historical.common.dynamodb.log = mocked_logger

    now = datetime.utcnow().replace(tzinfo=None, microsecond=0)
    create_event = CloudwatchEventFactory(
        detail=DetailFactory(requestParameters={"bucketName": "testbucket1"},
                             eventSource="aws.s3",
                             eventName="CreateBucket",
                             eventTime=now))
    data = json.dumps(create_event, default=serialize)
    data = RecordsFactory(records=[SQSDataFactory(body=data)])
    data = json.dumps(data, default=serialize)
    data = json.loads(data)

    current_handler(data, mock_lambda_environment)
    result = list(CurrentS3Model.query("arn:aws:s3:::testbucket1"))
    assert len(result) == 1

    # Mock out the DDB Stream for this creation and for an item that is NOT in the current table::
    ttl = int(time.time() + TTL_EXPIRY)
    new_bucket = S3_BUCKET.copy()
    new_bucket['eventTime'] = datetime(
        year=2017, month=5, day=12, hour=10, minute=30,
        second=0).isoformat() + 'Z'
    new_bucket['ttl'] = ttl
    ddb_existing_item = DynamoDBRecordFactory(dynamodb=DynamoDBDataFactory(
        NewImage=new_bucket,
        Keys={'arn': new_bucket['arn']},
        OldImage=new_bucket),
                                              eventName='INSERT')

    missing_bucket = S3_BUCKET.copy()
    missing_bucket['eventTime'] = datetime(
        year=2017, month=5, day=12, hour=10, minute=30,
        second=0).isoformat() + 'Z'
    missing_bucket['ttl'] = ttl
    missing_bucket['BucketName'] = 'notinthecurrenttable'
    missing_bucket['arn'] = 'arn:aws:s3:::notinthecurrenttable'
    missing_bucket['configuration']['Name'] = 'notinthecurrenttable'
    ddb_missing_item = DynamoDBRecordFactory(dynamodb=DynamoDBDataFactory(
        NewImage=missing_bucket,
        Keys={'arn': 'arn:aws:s3:::notinthecurrenttable'},
        OldImage=new_bucket),
                                             eventName='INSERT')

    # Get the shrunken blob:
    shrunken_existing = json.dumps(
        shrink_sns_blob(
            json.loads(json.dumps(ddb_existing_item, default=serialize))))
    shrunken_missing = json.dumps(
        shrink_sns_blob(
            json.loads(json.dumps(ddb_missing_item, default=serialize))))

    records = RecordsFactory(records=[
        SQSDataFactory(body=json.dumps(
            SnsDataFactory(Message=shrunken_existing), default=serialize)),
        SQSDataFactory(body=json.dumps(
            SnsDataFactory(Message=shrunken_missing), default=serialize))
    ])
    records_event = json.loads(json.dumps(records, default=serialize))

    # Run the differ:
    diff_handler(records_event, mock_lambda_environment)

    # Verify that the existing bucket in the Current table is in the Durable table with the correct configuration:
    result = list(DurableS3Model.query("arn:aws:s3:::testbucket1"))
    assert len(result) == 1
    assert result[0].configuration.attribute_values['Name'] == 'testbucket1'

    # Verify that the missing bucket is ignored -- as it will be processed presumably later:
    result = list(DurableS3Model.query("arn:aws:s3:::notinthecurrenttable"))
    assert not result

    # Verify that the proper log statements were reached:
    assert mocked_logger.debug.called
    assert mocked_logger.error.called
    debug_calls = [
        '[-->] Item with ARN: arn:aws:s3:::notinthecurrenttable was too big for SNS '
        '-- fetching it from the Current table...',
        '[+] Saving new revision to durable table.',
        '[-->] Item with ARN: arn:aws:s3:::testbucket1 was too big for SNS -- fetching it from the Current table...'
    ]
    for dc in debug_calls:
        mocked_logger.debug.assert_any_call(dc)

    mocked_logger.error.assert_called_once_with(
        '[?] Received item too big for SNS, and was not able to '
        'find the original item with ARN: arn:aws:s3:::notinthecurrenttable')

    # Unmock the logger:
    historical.common.dynamodb.log = old_logger
Пример #19
0
def current_s3_table(dynamodb):
    from historical.s3.models import CurrentS3Model
    yield CurrentS3Model.create_table(read_capacity_units=1,
                                      write_capacity_units=1,
                                      wait=True)
Пример #20
0
def process_update_records(update_records):
    """Process the requests for S3 bucket update requests"""
    events = sorted(update_records, key=lambda x: x['account'])

    # Group records by account for more efficient processing
    for account_id, events in groupby(events, lambda x: x['account']):
        events = list(events)

        # Grab the bucket names (de-dupe events):
        buckets = {}
        for e in events:
            # If the creation date is present, then use it:
            bucket_event = buckets.get(e["detail"]["requestParameters"]["bucketName"], {
                "creationDate": e["detail"]["requestParameters"].get("creationDate")
            })
            bucket_event.update(e["detail"]["requestParameters"])

            buckets[e["detail"]["requestParameters"]["bucketName"]] = bucket_event
            buckets[e["detail"]["requestParameters"]["bucketName"]]["eventDetails"] = e

        # Query AWS for current configuration
        for b, item in buckets.items():
            log.debug("[~] Processing Create/Update for: {}".format(b))
            # If the bucket does not exist, then simply drop the request --
            # If this happens, there is likely a Delete event that has occurred and will be processed soon.
            try:
                bucket_details = get_bucket(b,
                                            account_number=account_id,
                                            include_created=(item.get("creationDate") is None),
                                            assume_role=HISTORICAL_ROLE,
                                            region=CURRENT_REGION)
                if bucket_details.get("Error"):
                    log.error("[X] Unable to fetch details about bucket: {}. "
                              "The error details are: {}".format(b, bucket_details["Error"]))
                    continue

            except ClientError as ce:
                if ce.response["Error"]["Code"] == "NoSuchBucket":
                    log.warning("[?] Received update request for bucket: {} that does not "
                                "currently exist. Skipping.".format(b))
                    continue

                # Catch Access Denied exceptions as well:
                if ce.response["Error"]["Code"] == "AccessDenied":
                    log.error("[X] Unable to fetch details for S3 Bucket: {} in {}. Access is Denied. Skipping...".format(
                        b, account_id
                    ))
                    continue
                raise Exception(ce)

            # Pull out the fields we want:
            data = {
                "arn": "arn:aws:s3:::{}".format(b),
                "principalId": cloudwatch.get_principal(item["eventDetails"]),
                "userIdentity": cloudwatch.get_user_identity(item["eventDetails"]),
                "accountId": account_id,
                "eventTime": item["eventDetails"]["detail"]["eventTime"],
                "BucketName": b,
                "Region": bucket_details["Region"],
                # Duplicated in top level and configuration for secondary index
                "Tags": bucket_details["Tags"] or {},
                "eventSource": item["eventDetails"]["detail"]["eventSource"]
            }

            # Remove the fields we don't care about:
            del bucket_details["Arn"]
            del bucket_details["GrantReferences"]
            del bucket_details["Region"]

            if not bucket_details.get("CreationDate"):
                bucket_details["CreationDate"] = item["creationDate"]

            data["configuration"] = bucket_details

            current_revision = CurrentS3Model(**data)
            current_revision.save()