def test_process_durable_event_deletion(delete_bucket_event, generated_report): generated_report["all_buckets"] = [] records = deserialize_records(delete_bucket_event["Records"]) process_durable_event(records[0], generated_report) # Should not do anything -- since not present in the list: assert not generated_report["all_buckets"] # Check if removal logic works: generated_report["buckets"]["testbucketNEWBUCKET"] = { "some configuration": "this should be deleted" } records = deserialize_records(delete_bucket_event["Records"]) process_durable_event(records[0], generated_report) assert not generated_report["buckets"].get("testbucketNEWBUCKET")
def poller_processor_handler(event, context): """ Historical S3 Poller Processor. This will receive events from the Poller Tasker, and will list all objects of a given technology for an account/region pair. This will generate `polling events` which simulate changes. These polling events contain configuration data such as the account/region defining where the collector should attempt to gather data from. """ log.debug('[@] Running Poller...') queue_url = get_queue_url(os.environ.get('POLLER_QUEUE_NAME', 'HistoricalS3Poller')) records = deserialize_records(event['Records']) for record in records: # Skip accounts that have role assumption errors: try: # List all buckets in the account: all_buckets = list_buckets(account_number=record['account_id'], assume_role=HISTORICAL_ROLE, session_name="historical-cloudwatch-s3list", region=record['region'])["Buckets"] events = [s3_polling_schema.serialize_me(record['account_id'], bucket) for bucket in all_buckets] produce_events(events, queue_url) except ClientError as e: log.error('[X] Unable to generate events for account. Account Id: {account_id} Reason: {reason}'.format( account_id=record['account_id'], reason=e)) log.debug('[@] Finished generating polling events. Events Created: {}'.format(len(record['account_id'])))
def test_lite_bucket_schema_for_events(historical_table, bucket_event): old_fields = CONFIG.exclude_fields CONFIG.exclude_fields = "Name,Owner,_version,Grants,LifecycleRules,Logging,Policy,Tags,Versioning,Website,Cors," \ "Notifications,Acceleration,Replication,CreationDate,AnalyticsConfigurations," \ "MetricsConfigurations,InventoryConfigurations".split(",") all_buckets = CurrentS3Model.scan() generated_report = S3ReportSchema(strict=True).dump({ "all_buckets": all_buckets }).data generated_report["all_buckets"] = [] records = deserialize_records(bucket_event["Records"]) process_durable_event(records[0], generated_report) lite_report = S3ReportSchema(strict=True).dump(generated_report).data assert lite_report["generated_date"] assert lite_report["s3_report_version"] == CONFIG.s3_reports_version assert not lite_report.get("all_buckets") assert lite_report["buckets"]["testbucketNEWBUCKET"] assert len(lite_report["buckets"]) == 11 for bucket in lite_report["buckets"].values(): keys = bucket.keys() for excluded in CONFIG.exclude_fields: assert excluded not in keys assert bucket["AccountId"] == "123456789012" assert bucket["Region"] == "us-east-1" # Clean-up: CONFIG.exclude_fields = old_fields
def poller_processor_handler(event, context): # pylint: disable=W0613 """ Historical Security Group Poller Processor. This will receive events from the Poller Tasker, and will list all objects of a given technology for an account/region pair. This will generate `polling events` which simulate changes. These polling events contain configuration data such as the account/region defining where the collector should attempt to gather data from. """ LOG.debug('[@] Running Poller...') queue_url = get_queue_url(os.environ.get('POLLER_QUEUE_NAME', 'HistoricalVPCPoller')) records = deserialize_records(event['Records']) for record in records: # Skip accounts that have role assumption errors: try: vpcs = describe_vpcs( account_number=record['account_id'], assume_role=HISTORICAL_ROLE, region=record['region'] ) events = [VPC_POLLING_SCHEMA.serialize(record['account_id'], v) for v in vpcs] produce_events(events, queue_url, randomize_delay=RANDOMIZE_POLLER) LOG.debug(f"[@] Finished generating polling events. Account: {record['account_id']}/{record['region']} " f"Events Created: {len(events)}") except ClientError as exc: LOG.error(f"[X] Unable to generate events for account/region. Account Id/Region: {record['account_id']}" f"/{record['region']} Reason: {exc}")
def test_process_durable_event(bucket_event, generated_report): generated_report["all_buckets"] = [] records = deserialize_records(bucket_event["Records"]) process_durable_event(records[0], generated_report) assert len(generated_report["all_buckets"]) == 1 assert generated_report["all_buckets"][0]['Region'] == "us-east-1"
def test_update_records(existing_s3_report, historical_table, bucket_event, delete_bucket_event, dump_buckets, lambda_entry): old_dump_to_buckets = CONFIG.dump_to_buckets old_import_bucket = CONFIG.import_bucket CONFIG.import_bucket = "dump0" CONFIG.dump_to_buckets = ["dump0"] # Add a bucket: if lambda_entry: handler(bucket_event, MockContext()) else: records = deserialize_records(bucket_event["Records"]) update_records(records) new_report = json.loads( dump_buckets.get_object( Bucket="dump0", Key="historical-s3-report.json")["Body"].read().decode("utf-8")) assert len(new_report["buckets"]) == 11 existing_json = json.loads(existing_s3_report.decode("utf-8")) assert len(new_report["buckets"]) != len(existing_json["buckets"]) assert new_report["buckets"]["testbucketNEWBUCKET"] # Delete a bucket: if lambda_entry: handler(delete_bucket_event, MockContext()) else: records = deserialize_records(delete_bucket_event["Records"]) update_records(records) delete_report = json.loads( dump_buckets.get_object( Bucket="dump0", Key="historical-s3-report.json")["Body"].read().decode("utf-8")) assert len(delete_report["buckets"]) == len(existing_json["buckets"]) == 10 assert not delete_report["buckets"].get("testbucketNEWBUCKET") # Clean-up: CONFIG.dump_to_buckets = old_dump_to_buckets CONFIG.import_bucket = old_import_bucket
def handler(event, context): # pylint: disable=W0613 """ Historical S3 event differ. Listens to the Historical current table and determines if there are differences that need to be persisted in the historical record. """ # De-serialize the records: records = deserialize_records(event['Records']) for record in records: process_dynamodb_differ_record(record, CurrentS3Model, DurableS3Model)
def handler(event, context): """ Historical security group event differ. Listens to the Historical current table and determines if there are differences that need to be persisted in the historical record. """ # De-serialize the records: records = deserialize_records(event['Records']) for record in records: process_dynamodb_differ_record(record, CurrentVPCModel, DurableVPCModel)
def test_process_big_durable_event(bucket_event, generated_report): generated_report["all_buckets"] = [] record = deserialize_records(bucket_event["Records"])[0] record.pop('item') record[EVENT_TOO_BIG_FLAG] = True # Bucket does not exist: process_durable_event(record, generated_report) assert len(generated_report['all_buckets']) == 0 # Bucket that does exist: record['arn'] = 'arn:aws:s3:::testbucket0' process_durable_event(record, generated_report) assert len(generated_report["all_buckets"]) == 1 assert generated_report["all_buckets"][0]['BucketName'] == "testbucket0"
def handler(event, context): """ Historical security group event collector. This collector is responsible for processing Cloudwatch events and polling events. """ records = deserialize_records(event['Records']) # Split records into two groups, update and delete. # We don't want to query for deleted records. update_records, delete_records = group_records_by_type(records, UPDATE_EVENTS) capture_delete_records(delete_records) # filter out error events update_records = [e for e in update_records if not e['detail'].get('errorCode')] # group records by account for more efficient processing log.debug('[@] Update Records: {records}'.format(records=records)) capture_update_records(update_records)
def poller_processor_handler(event, context): """ Historical Security Group Poller Processor. This will receive events from the Poller Tasker, and will list all objects of a given technology for an account/region pair. This will generate `polling events` which simulate changes. These polling events contain configuration data such as the account/region defining where the collector should attempt to gather data from. """ log.debug('[@] Running Poller...') queue_url = get_queue_url( os.environ.get('POLLER_QUEUE_NAME', 'HistoricalSecurityGroupPoller')) records = deserialize_records(event['Records']) for record in records: # Skip accounts that have role assumption errors: try: groups = describe_security_groups( account_number=record['account_id'], assume_role=HISTORICAL_ROLE, region=record['region']) events = [ security_group_polling_schema.serialize( record['account_id'], g) for g in groups['SecurityGroups'] ] produce_events(events, queue_url) log.debug('[@] Finished generating polling events. Account: {}/{} ' 'Events Created: {}'.format(record['account_id'], record['region'], len(events))) except ClientError as e: log.error( '[X] Unable to generate events for account/region. Account Id/Region: {account_id}/{region}' ' Reason: {reason}'.format(account_id=record['account_id'], region=record['region'], reason=e)) log.debug('[@] Finished generating polling events. Events Created: {}'. format(len(record['account_id'])))
def handler(event, context): """ Historical S3 event collector. This collector is responsible for processing CloudWatch events and polling events. """ records = deserialize_records(event['Records']) # Split records into two groups, update and delete. # We don't want to query for deleted records. update_records, delete_records = group_records_by_type(records, UPDATE_EVENTS) log.debug("[@] Processing update records...") process_update_records(update_records) log.debug("[@] Completed processing of update records.") log.debug("[@] Processing delete records...") process_delete_records(delete_records) log.debug("[@] Completed processing of delete records.") log.debug('[@] Successfully updated current Historical table')
def test_bucket_schema_for_events(historical_table, generated_report, bucket_event): generated_report["all_buckets"] = [] records = deserialize_records(bucket_event["Records"]) process_durable_event(records[0], generated_report) full_report = S3ReportSchema(strict=True).dump(generated_report).data assert full_report["generated_date"] assert full_report["s3_report_version"] == CONFIG.s3_reports_version assert not full_report.get("all_buckets") assert full_report["buckets"]["testbucketNEWBUCKET"] assert len(full_report["buckets"]) == 11 for name, value in full_report["buckets"].items(): assert value["AccountId"] == "123456789012" assert value["Region"] == "us-east-1" assert value["Tags"]["theBucketName"] == name assert not value.get("_version") assert not value.get("Name")
def handler(event, context): """ Historical S3 report generator lambda handler. This will handle both scheduled events as well as dynamo stream events. """ set_config_from_input(event) if event.get("Records"): log.debug('[@] Received update event with records.') # Deserialize the records: records = deserialize_records(event["Records"]) log.debug( '[ ] Received the (deserialized) records: {}'.format(records)) # Update event: update_records(records) else: log.debug('[@] Received a scheduled event for a full report.') # Generate event: dump_report()
def poller_processor_handler(event, context): # pylint: disable=W0613 """ Historical Security Group Poller Processor. This will receive events from the Poller Tasker, and will list all objects of a given technology for an account/region pair. This will generate `polling events` which simulate changes. These polling events contain configuration data such as the account/region defining where the collector should attempt to gather data from. """ LOG.debug('[@] Running Poller...') collector_poller_queue_url = get_queue_url( os.environ.get('POLLER_QUEUE_NAME', 'HistoricalSecurityGroupPoller')) takser_queue_url = get_queue_url( os.environ.get('POLLER_TASKER_QUEUE_NAME', 'HistoricalSecurityGroupPollerTasker')) poller_task_schema = HistoricalPollerTaskEventModel() records = deserialize_records(event['Records']) for record in records: # Skip accounts that have role assumption errors: try: # Did we get a NextToken? if record.get('NextToken'): LOG.debug( f"[@] Received pagination token: {record['NextToken']}") groups = describe_security_groups( account_number=record['account_id'], assume_role=HISTORICAL_ROLE, region=record['region'], MaxResults=200, NextToken=record['NextToken']) else: groups = describe_security_groups( account_number=record['account_id'], assume_role=HISTORICAL_ROLE, region=record['region'], MaxResults=200) # FIRST THINGS FIRST: Did we get a `NextToken`? If so, we need to enqueue that ASAP because # 'NextToken`s expire in 60 seconds! if groups.get('NextToken'): logging.debug( f"[-->] Pagination required {groups['NextToken']}. Tasking continuation." ) produce_events([ poller_task_schema.serialize_me( record['account_id'], record['region'], next_token=groups['NextToken']) ], takser_queue_url) # Task the collector to perform all the DDB logic -- this will pass in the collected data to the # collector in very small batches. events = [ SECURITY_GROUP_POLLING_SCHEMA.serialize( record['account_id'], g, record['region']) for g in groups['SecurityGroups'] ] produce_events(events, collector_poller_queue_url, batch_size=3) LOG.debug( f"[@] Finished generating polling events. Account: {record['account_id']}/{record['region']} " f"Events Created: {len(events)}") except ClientError as exc: LOG.error( f"[X] Unable to generate events for account/region. Account Id/Region: {record['account_id']}" f"/{record['region']} Reason: {exc}")