def test_collector_on_deleted_bucket(historical_role, buckets, mock_lambda_environment, swag_accounts, current_s3_table): from historical.s3.collector import handler # If an event arrives on a bucket that is deleted, then it should skip # and wait until the Deletion event arrives. create_event = CloudwatchEventFactory( detail=DetailFactory( requestParameters={ "bucketName": "not-a-bucket" }, source="aws.s3", eventName="PutBucketPolicy", ) ) create_event_data = json.dumps(create_event, default=serialize) data = KinesisRecordsFactory( records=[ KinesisRecordFactory( kinesis=KinesisDataFactory(data=create_event_data)) ] ) data = json.dumps(data, default=serialize) data = json.loads(data) handler(data, None) assert CurrentS3Model.count() == 0
def test_poller(historical_role, buckets, mock_lambda_environment, historical_sqs, swag_accounts): from historical.s3.poller import handler handler({}, None) # Need to ensure that 51 total buckets were added into SQS: sqs = boto3.client("sqs", region_name="us-east-1") queue_url = get_queue_url(os.environ['POLLER_QUEUE_NAME']) all_buckets = {"SWAG": True} for i in range(0, 50): all_buckets["testbucket{}".format(i)] = True # Loop through the queue and make sure all buckets are accounted for: for i in range(0, 6): messages = sqs.receive_message(QueueUrl=queue_url, MaxNumberOfMessages=10)['Messages'] message_ids = [] for m in messages: message_ids.append({ "Id": m['MessageId'], "ReceiptHandle": m['ReceiptHandle'] }) data = s3_polling_schema.loads(m['Body']).data assert all_buckets[data["detail"]["request_parameters"] ["bucket_name"]] assert datetime.strptime( data["detail"]["request_parameters"]["creation_date"], '%Y-%m-%dT%H:%M:%SZ') assert data["detail"]["event_source"] == "historical.s3.poller" # Remove from the dict (at the end, there should be 0 items left) del all_buckets[data["detail"]["request_parameters"] ["bucket_name"]] sqs.delete_message_batch(QueueUrl=queue_url, Entries=message_ids) assert len(all_buckets) == 0 # Check that an exception raised doesn't break things: import historical.s3.poller def mocked_poller(account, stream): raise ClientError({"Error": { "Message": "", "Code": "AccessDenied" }}, "sts:AssumeRole") old_method = historical.s3.poller.produce_events # For pytest inter-test issues... historical.s3.poller.produce_events = mocked_poller handler({}, None) historical.s3.poller.produce_events = old_method
def test_poller(historical_role, buckets, mock_lambda_environment, historical_kinesis, swag_accounts): from historical.s3.poller import handler os.environ["MAX_BUCKET_BATCH"] = "4" handler({}, None) # Need to ensure that 50 Buckets were added to the stream: kinesis = boto3.client("kinesis", region_name="us-east-1") all_buckets = {"SWAG": True} for i in range(0, 50): all_buckets["testbucket{}".format(i)] = True # Loop through the stream and make sure all buckets are accounted for: shard_id = kinesis.describe_stream( StreamName="historicalstream" )["StreamDescription"]["Shards"][0]["ShardId"] iterator = kinesis.get_shard_iterator( StreamName="historicalstream", ShardId=shard_id, ShardIteratorType="AT_SEQUENCE_NUMBER", StartingSequenceNumber="0") records = kinesis.get_records(ShardIterator=iterator["ShardIterator"]) for r in records["Records"]: data = s3_polling_schema.loads(r["Data"]).data assert all_buckets[data["detail"]["request_parameters"]["bucket_name"]] assert datetime.strptime( data["detail"]["request_parameters"]["creation_date"], '%Y-%m-%dT%H:%M:%SZ') # Remove from the dict (at the end, there should be 0 items left) del all_buckets[data["detail"]["request_parameters"]["bucket_name"]] assert len(all_buckets) == 0 # Check that an exception raised doesn't break things: import historical.s3.poller def mocked_poller(account, stream): raise ClientError({"Error": { "Message": "", "Code": "AccessDenied" }}, "sts:AssumeRole") old_method = historical.s3.poller.create_polling_event # For pytest inter-test issues... historical.s3.poller.create_polling_event = mocked_poller handler({}, None) historical.s3.poller.create_polling_event = old_method
def make_poller_events(): """A sort-of fixture to make polling events for tests.""" from historical.s3.poller import poller_tasker_handler as handler handler({}, None) # Need to ensure that all of the accounts and regions were properly tasked (only 1 region for S3): sqs = boto3.client("sqs", region_name="us-east-1") queue_url = get_queue_url(os.environ['POLLER_TASKER_QUEUE_NAME']) messages = sqs.receive_message(QueueUrl=queue_url, MaxNumberOfMessages=10)['Messages'] # 'Body' needs to be made into 'body' for proper parsing later: for m in messages: m['body'] = m.pop('Body') return messages
def test_collector(historical_role, buckets, mock_lambda_environment, swag_accounts, current_s3_table): from historical.s3.collector import handler now = datetime.utcnow().replace(tzinfo=None, microsecond=0) create_event = CloudwatchEventFactory( detail=DetailFactory(requestParameters={"bucketName": "testbucket1"}, source="aws.s3", eventName="CreateBucket", eventTime=now)) data = json.dumps(create_event, default=serialize) data = KinesisRecordsFactory( records=[KinesisRecordFactory(kinesis=KinesisDataFactory(data=data))]) data = json.dumps(data, default=serialize) data = json.loads(data) handler(data, None) result = list(CurrentS3Model.query("arn:aws:s3:::testbucket1")) assert len(result) == 1 # Verify that the tags are duplicated in the top level and configuration: assert len(result[0].Tags.attribute_values) == len( result[0].configuration.attribute_values["Tags"]) == 1 assert result[0].Tags.attribute_values["theBucketName"] == \ result[0].configuration.attribute_values["Tags"]["theBucketName"] == "testbucket1" # noqa # Polling (make sure the date is included): polling_event = CloudwatchEventFactory( detail=DetailFactory(requestParameters={ "bucketName": "testbucket1", "creationDate": now }, source="aws.s3", eventName="DescribeBucket", eventTime=now)) data = json.dumps(polling_event, default=serialize) data = KinesisRecordsFactory( records=[KinesisRecordFactory(kinesis=KinesisDataFactory(data=data))]) data = json.dumps(data, default=serialize) data = json.loads(data) handler(data, None) assert CurrentS3Model.count() == 1 # Load the config and verify the polling timestamp is in there: result = list(CurrentS3Model.query("arn:aws:s3:::testbucket1")) assert result[0].configuration["CreationDate"] == now.isoformat() + "Z" # And deletion: delete_event = CloudwatchEventFactory( detail=DetailFactory(requestParameters={"bucketName": "testbucket1"}, source="aws.s3", eventName="DeleteBucket", eventTime=now)) data = json.dumps(delete_event, default=serialize) data = KinesisRecordsFactory( records=[KinesisRecordFactory(kinesis=KinesisDataFactory(data=data))]) data = json.dumps(data, default=serialize) data = json.loads(data) handler(data, None) assert CurrentS3Model.count() == 0
def test_collector(historical_role, buckets, mock_lambda_environment, swag_accounts, current_s3_table): """Test the Collector.""" from historical.s3.models import CurrentS3Model from historical.s3.collector import handler now = datetime.utcnow().replace(tzinfo=None, microsecond=0) create_event = CloudwatchEventFactory( detail=DetailFactory(requestParameters={"bucketName": "testbucket1"}, eventSource="aws.s3", eventName="CreateBucket", eventTime=now)) data = json.dumps(create_event, default=serialize) data = RecordsFactory(records=[SQSDataFactory(body=data)]) data = json.dumps(data, default=serialize) data = json.loads(data) handler(data, mock_lambda_environment) result = list(CurrentS3Model.query("arn:aws:s3:::testbucket1")) assert len(result) == 1 assert result[0].Tags.attribute_values["theBucketName"] == "testbucket1" assert result[0].eventSource == "aws.s3" # Polling (make sure the date is included): polling_event = CloudwatchEventFactory( detail=DetailFactory(requestParameters={ "bucketName": "testbucket1", "creationDate": now }, eventSource="historical.s3.poller", eventName="PollS3", eventTime=now)) data = json.dumps(polling_event, default=serialize) data = RecordsFactory(records=[SQSDataFactory(body=data)]) data = json.dumps(data, default=serialize) data = json.loads(data) handler(data, mock_lambda_environment) assert CurrentS3Model.count() == 1 # Load the config and verify the polling timestamp is in there: result = list(CurrentS3Model.query("arn:aws:s3:::testbucket1")) assert result[0].configuration["CreationDate"] == now.isoformat() + "Z" assert result[0].eventSource == "historical.s3.poller" # And deletion: delete_event = CloudwatchEventFactory( detail=DetailFactory(requestParameters={"bucketName": "testbucket1"}, eventSource="aws.s3", eventName="DeleteBucket", eventTime=now)) data = json.dumps(delete_event, default=serialize) data = RecordsFactory(records=[SQSDataFactory(body=data)]) data = json.dumps(data, default=serialize) data = json.loads(data) handler(data, mock_lambda_environment) assert CurrentS3Model.count() == 0
def test_differ(durable_s3_table, mock_lambda_environment): from historical.s3.models import DurableS3Model from historical.s3.differ import handler from historical.models import TTL_EXPIRY ttl = int(time.time() + TTL_EXPIRY) new_bucket = S3_BUCKET.copy() new_bucket['eventTime'] = datetime( year=2017, month=5, day=12, hour=10, minute=30, second=0).isoformat() + 'Z' new_bucket["ttl"] = ttl new_item = DynamoDBRecordsFactory(records=[ DynamoDBRecordFactory(dynamodb=DynamoDBDataFactory( NewImage=new_bucket, Keys={'arn': new_bucket['arn']}), eventName='INSERT') ]) data = json.loads(json.dumps(new_item, default=serialize)) handler(data, None) assert DurableS3Model.count() == 1 # Test duplicates don't change anything: data = json.loads(json.dumps(new_item, default=serialize)) handler(data, None) assert DurableS3Model.count() == 1 # Test ephemeral changes don't add new models: ephemeral_changes = S3_BUCKET.copy() ephemeral_changes["eventTime"] = \ datetime(year=2017, month=5, day=12, hour=11, minute=30, second=0).isoformat() + 'Z' ephemeral_changes["configuration"]["_version"] = 99999 ephemeral_changes["ttl"] = ttl data = DynamoDBRecordsFactory(records=[ DynamoDBRecordFactory(dynamodb=DynamoDBDataFactory( NewImage=ephemeral_changes, Keys={'arn': ephemeral_changes['arn'] }), eventName='MODIFY') ]) data = json.loads(json.dumps(data, default=serialize)) handler(data, None) assert DurableS3Model.count() == 1 # Add an update: new_changes = S3_BUCKET.copy() new_date = datetime( year=2017, month=5, day=12, hour=11, minute=30, second=0).isoformat() + 'Z' new_changes["eventTime"] = new_date new_changes["Tags"] = {"ANew": "Tag"} new_changes["configuration"]["Tags"] = {"ANew": "Tag"} new_changes["ttl"] = ttl data = DynamoDBRecordsFactory(records=[ DynamoDBRecordFactory(dynamodb=DynamoDBDataFactory( NewImage=new_changes, Keys={'arn': new_changes['arn']}), eventName='MODIFY') ]) data = json.loads(json.dumps(data, default=serialize)) handler(data, None) results = list(DurableS3Model.query("arn:aws:s3:::testbucket1")) assert len(results) == 2 assert results[1].Tags["ANew"] == results[ 1].configuration.attribute_values["Tags"]["ANew"] == "Tag" assert results[1].eventTime == new_date # And deletion (ensure new record -- testing TTL): delete_bucket = S3_BUCKET.copy() delete_bucket["eventTime"] = datetime( year=2017, month=5, day=12, hour=12, minute=30, second=0).isoformat() + 'Z' delete_bucket["ttl"] = ttl data = DynamoDBRecordsFactory(records=[ DynamoDBRecordFactory(dynamodb=DynamoDBDataFactory( OldImage=delete_bucket, Keys={'arn': delete_bucket['arn']}), eventName='REMOVE', userIdentity=UserIdentityFactory( type='Service', principalId='dynamodb.amazonaws.com')) ]) data = json.loads(json.dumps(data, default=serialize)) handler(data, None) assert DurableS3Model.count() == 3
def test_poller_processor_handler(historical_role, buckets, mock_lambda_environment, historical_sqs, swag_accounts): """Test the Poller's processing component that tasks the collector.""" from historical.s3.poller import poller_processor_handler as handler # Create the events and SQS records: messages = make_poller_events() event = json.loads( json.dumps(RecordsFactory(records=messages), default=serialize)) # Run the collector: handler(event, None) # Need to ensure that 51 total buckets were added into SQS: sqs = boto3.client("sqs", region_name="us-east-1") queue_url = get_queue_url(os.environ['POLLER_QUEUE_NAME']) all_buckets = {"SWAG": True} for i in range(0, 50): all_buckets[f"testbucket{i}"] = True # Loop through the queue and make sure all buckets are accounted for: for i in range(0, 6): messages = sqs.receive_message(QueueUrl=queue_url, MaxNumberOfMessages=10)['Messages'] message_ids = [] for msg in messages: message_ids.append({ "Id": msg['MessageId'], "ReceiptHandle": msg['ReceiptHandle'] }) data = S3_POLLING_SCHEMA.loads(msg['Body']).data assert all_buckets[data["detail"]["request_parameters"] ["bucket_name"]] assert datetime.strptime( data["detail"]["request_parameters"]["creation_date"], '%Y-%m-%dT%H:%M:%SZ') assert data["detail"]["event_source"] == "historical.s3.poller" # Remove from the dict (at the end, there should be 0 items left) del all_buckets[data["detail"]["request_parameters"] ["bucket_name"]] sqs.delete_message_batch(QueueUrl=queue_url, Entries=message_ids) assert not all_buckets # Check that an exception raised doesn't break things: import historical.s3.poller def mocked_poller(account, stream, randomize_delay=0): # pylint: disable=W0613 raise ClientError({"Error": { "Message": "", "Code": "AccessDenied" }}, "sts:AssumeRole") old_method = historical.s3.poller.produce_events # For pytest inter-test issues... historical.s3.poller.produce_events = mocked_poller handler(event, None) historical.s3.poller.produce_events = old_method