def test_exception_on_error_response(post_mock): queue_ingestor.ssm_client.get_parameters.return_value = ssm_return_vals() # mock response contains error post_mock.return_value = response_mock = MagicMock() response_mock.json.return_value = { "error": "I'm Aled Jones, it's all gone wrong for me" } response_mock.text = "Air" test_event = { "Records": [{ "body": dumps({ "Subject": "my_scan", "Message": dumps({"some_field": "some_value"}) }) }] } with pytest.raises(RuntimeError, match="I'm Aled Jones, it's all gone wrong for me"): queue_ingestor.ingest(test_event, MagicMock()) post_mock.assert_called_with("https://elastic.url.com/my_scan/_doc", auth=auth_mock, data=dumps({"some_field": "some_value"}), headers={"content-type": "application/json"})
async def report_letters(event, _): es_queue = event['ssm_params'][ES_SQS] writes = [] for record in event["Records"]: s3_object = objectify(record["s3"]) bucket = s3_object.bucket.name key = unquote_plus(s3_object.object.key) print(f"Loading new dead letter file: {(bucket, key)}") obj = await s3_client.get_object(Bucket=bucket, Key=key) dead_letter_details = obj["Metadata"] print(f"Wring new dead letter with metadata: {dead_letter_details}") ensure_essential_metadata(dead_letter_details, [ ("deadletterqueuename", "Metadata missing"), ("deadletterkey", "Metadata missing"), ("deadlettersenttime", str(iso_date_string_from_timestamp(datetime.now().timestamp()))) ]) writes.append( sqs_client.send_message(QueueUrl=es_queue, MessageBody=dumps({ "Subject": "dead_letter:data:write", "Message": dumps(dead_letter_details) }))) print(f"Gathering writes") await gather(*writes) print(f"Written successfully")
def test_exception_on_no_parent_key(post_mock): queue_ingestor.ssm_client.get_parameters.return_value = ssm_return_vals() # mock response contains error post_mock.return_value = response_mock = MagicMock() response_mock.json.return_value = {} response_mock.text = "Walk" test_event = { "Records": [{ "body": dumps({ "Subject": "my_scan", "Message": dumps({"some_field": "some_value"}), "MessageAttributes": { "TemporalKey": { "Value": ResultsContext._hash_of( iso_date_string_from_timestamp(5)), "DataType": "String" } } }) }] } with pytest.raises( ValueError, match= "Analytics ingestor requires the ParentKey message attribute be present" ): queue_ingestor.ingest(test_event, MagicMock())
def _delete_old_snapshots(endpoint, data_source, doc_type, parent_key): es_url = f"https://{endpoint}/{data_source}:{doc_type}_snapshot:write/_doc/_delete_by_query?conflicts=proceed" print(f"Deleting {doc_type} snapshots for {parent_key} using {es_url}") delete_query = {"query": {"term": {"__ParentKey": parent_key}}} r = requests.post(es_url, auth=awsauth, data=dumps(delete_query), headers=HEADERS) print(f"Delete completed {r.text}") response_json = r.json() if "error" in response_json.keys(): raise RuntimeError(dumps(response_json["error"]))
async def publish_results(self): result_docs = {} msg_for_analytics_ingestor = { "scan_id": self.scan_id, "scan_start_time": self.start, "scan_end_time": self.end, "__docs": result_docs } for doc_type, docs in self.docs.items(): docs_for_type = [] for key, doc in docs.items(): docs_for_type.append({ "NonTemporalKey": self._hash_of(key), "Data": doc }) result_docs[doc_type] = docs_for_type r = await self.sns_client.publish( TopicArn=self.topic, Subject=f"{self.task_name}", Message=dumps(msg_for_analytics_ingestor), MessageAttributes={ "ParentKey": {"StringValue": self._hash_of(self._parent_key()), "DataType": "String"}, "TemporalKey": {"StringValue": self._hash_of(self.end), "DataType": "String"} } ) print(f"Published message {r['MessageId']}")
async def __aenter__(self): queue = await self.sqs_client.create_queue(QueueName=self.queue_name) self.queue_url = queue["QueueUrl"] response = await self.sqs_client.get_queue_attributes( QueueUrl=self.queue_url, AttributeNames=["QueueArn"]) self.queue_arn = response["Attributes"]["QueueArn"] # ensure we can subscribe too await self.sqs_client.set_queue_attributes( QueueUrl=self.queue_url, Attributes={ 'Policy': dumps({ "Version": "2008-10-17", "Id": f"{self.queue_arn}/SQSDefaultPolicy", "Statement": [{ "Effect": "Allow", "Principal": "*", "Action": "SQS:SendMessage", "Resource": self.queue_arn }] }) }) self.subscription = (await self.sns_client.subscribe( TopicArn=self.sns_output_notifier_arn, Protocol="sqs", Endpoint=self.queue_arn))["SubscriptionArn"] print(f"Subscribed {self.queue_name} to {self.task_name}", flush=True) return self
def wrapper(*args, **kwargs): try: return handler(*args, **kwargs) except Exception as e: event, context = _get_event_and_context(args) context.loop = get_event_loop() msg_attrs = { "RequestID": { "StringValue": str(context.aws_request_id), "DataType": "String" }, "ErrorCode": { "StringValue": "500", "DataType": "Number" }, "ErrorMessage": { "StringValue": traceback.format_exc(), "DataType": "String" } } context.loop.run_until_complete( sqs_client.send_message(QueueUrl=sqs_queue_url, MessageBody=dumps(event), MessageAttributes=msg_attrs)) # re-raise the exception, suppressing it would make the lambda appear to succeed raise e
async def test_publish_no_data(): mock_sns_client = MagicMock() mock_sns_client.publish.return_value = coroutine_of({"MessageId": "Msg32"}) context = ResultsContext( "PubTopic", {"address": "123.123.123.123"}, "scan_12", iso_date_string_from_timestamp(123456), iso_date_string_from_timestamp(789123), "scan_name", mock_sns_client ) await context.publish_results() # it should publish the top level info parent and temporal key mock_sns_client.publish.assert_called_with( TopicArn="PubTopic", Subject="scan_name", Message=dumps( { "scan_id": "scan_12", "scan_start_time": iso_date_string_from_timestamp(123456), "scan_end_time": iso_date_string_from_timestamp(789123), "__docs": {} } ), MessageAttributes={ "ParentKey": {"StringValue": ResultsContext._hash_of({"address": "123.123.123.123"}), "DataType": "String"}, "TemporalKey": {"StringValue": ResultsContext._hash_of(iso_date_string_from_timestamp(789123)), "DataType": "String"} } )
def wrapper(*args, **kwargs): response = handler(*args, **kwargs) if 'body' in response: try: response['body'] = dumps(response['body']) except Exception as exception: return {'statusCode': 500, 'body': str(exception)} return response
def _expected_snapshot_write(doc_type, non_temp_key, auth_mock, data): return call( f"https://elastic.url.com/scan_name:{doc_type}_snapshot:write/_doc/" # Doc id for history uses non temporal key only f"{ResultsContext._hash_of(non_temp_key)}", auth=auth_mock, data=dumps(data), headers={"content-type": "application/json"})
def _expected_history_write(doc_type, non_temp_key, temporal_key, auth_mock, data): return call( f"https://elastic.url.com/scan_name:{doc_type}_history:write/_doc/" # Doc id for history uses combined hash of non temporal key with hash of temporal key f"{ResultsContext._hash_of(non_temp_key)}@{temporal_key}", auth=auth_mock, data=dumps(data), headers={"content-type": "application/json"})
def _post_to_es(endpoint, index, message, doc_id=None): doc_id = f"/{doc_id}" if doc_id else "" es_url = f"https://{endpoint}/{index}/_doc{doc_id}" print(f"Posting {message} to {es_url}") r = requests.post(es_url, auth=awsauth, data=message, headers=HEADERS) print(f"Post completed {r.text}") response_json = r.json() if "error" in response_json.keys(): raise RuntimeError(dumps(response_json["error"]))
async def test_summary_info_published(): mock_sns_client = MagicMock() mock_sns_client.publish.return_value = coroutine_of({"MessageId": "Msg32"}) context = ResultsContext( "PubTopic", {"address": "123.456.123.456"}, "scan_9", iso_date_string_from_timestamp(4), iso_date_string_from_timestamp(5), "scan_name", mock_sns_client ) context.add_summaries({"foo": "bar", "boo": "baz"}) context.add_summary("banana", "man") context.post_results("host_info", {"uptime": "1234567"}, include_summaries=True) await context.publish_results() mock_sns_client.publish.assert_called_with( TopicArn="PubTopic", Subject="scan_name", Message=dumps( { "scan_id": "scan_9", "scan_start_time": iso_date_string_from_timestamp(4), "scan_end_time": iso_date_string_from_timestamp(5), "__docs": { "host_info": [ { "NonTemporalKey": ResultsContext._hash_of({ "address": "123.456.123.456", }), "Data": { "address": "123.456.123.456", "uptime": "1234567", "summary_foo": "bar", "summary_boo": "baz", "summary_banana": "man", "__ParentKey": ResultsContext._hash_of({"address": "123.456.123.456"}), } } ] } } ), MessageAttributes={ "ParentKey": { "StringValue": ResultsContext._hash_of({"address": "123.456.123.456"}), "DataType": "String" }, "TemporalKey": { "StringValue": ResultsContext._hash_of(iso_date_string_from_timestamp(5)), "DataType": "String" } } )
async def forward_record(self, event, _): es_queue = event['ssm_params'][ES_SQS] writes = [] for record in event["Records"]: dynamo_data = record["dynamodb"] new_record = self._deserialise_image(dynamo_data, "NewImage") old_record = self._deserialise_image(dynamo_data, "OldImage") transformed_data = self.transform_record(new_record, old_record) msg_attributes = self.construct_msg_attributes(transformed_data) # Uses the format that the ResultsContext uses # TODO https://dsdmoj.atlassian.net/browse/SA-170 - reuse the ResultsContext non_temp_key_val = transformed_data[non_temp_key] payload = dumps({ "__docs": { "data": [{ "Data": transformed_data, "NonTemporalKey": non_temp_key_val }] } }) print(f"Forwarding {payload} to {index_name}") # N.B. Normally SNS notifiers that are the output of a scan feed the SQS queue # When amazon copies the meta data from the SNS to SQS, it moves the message attributes # to the message body. We replicate that here. message_like_from_sns = { "Subject": f"{index_name}", "Message": payload, "MessageAttributes": msg_attributes } writes.append( self.sqs_client.send_message( QueueUrl=es_queue, MessageBody=dumps(message_like_from_sns))) await gather(*writes)
def test_no_msg_attributes_simple_post(post_mock): queue_ingestor.ssm_client.get_parameters.return_value = ssm_return_vals() post_mock.return_value = response_mock = MagicMock() response_mock.json.return_value = {} test_event = { "Records": [{ "body": dumps({ "Subject": "my_scan", "Message": dumps({"some_field": "some_value"}) }) }] } queue_ingestor.ingest(test_event, MagicMock()) # in the simple (no msg attributes) mode, the subject is used as the index name and the whole message # is used as the data post_mock.assert_called_with("https://elastic.url.com/my_scan/_doc", auth=auth_mock, data=dumps({"some_field": "some_value"}), headers={"content-type": "application/json"})
async def send_request(self): db_name = self.get_ssm_param(self._dynamodb_param) table = self.dynamodb_resource.Table(db_name) # so the ssl scan scan be tested we need to give it an entry in dynamo # TODO need to stand up our own hosts to scan not use scanme respa = await table.update_item( Key={ "Address": "35.189.73.64", "DnsIngestTime": 1560902409 }, UpdateExpression="SET Hosts = :Hosts", ExpressionAttributeValues={ ":Hosts": set(["scottlogic.com"]) } ) resp = await self.sqs_client.send_message( QueueUrl=self.sqs_input_queue_url, # TODO relying on an external resource like this is error prone and unreliable, # we should setup a host to scan as part of the test setup instead MessageBody=dumps({ "Message": dumps({ "scan_id": "Scan5", "port_id": "443", "protocol": "tcp", "address": "35.189.73.64", "address_type": "ip", "service": "http", "product": "apache", "version": "0.4", }) }) ) self.request_msg_id = resp["MessageId"] print(f"Made request {self.request_msg_id}")
def _post_snap_and_history(body, endpoint, message, data_source): attrs = body["MessageAttributes"] if "ParentKey" not in attrs: raise ValueError( "Analytics ingestor requires the ParentKey message attribute be present" ) parent_key = attrs["ParentKey"]["Value"] # temporal key is optional, e.g. the address_info table has no history only the latest info temporal_key = attrs["TemporalKey"][ "Value"] if "TemporalKey" in attrs else None message_json = loads(message) all_docs = message_json.pop("__docs") # naming alias just to make code more readable global_fields = message_json for doc_type, docs in all_docs.items(): _delete_old_snapshots(endpoint, data_source, doc_type, parent_key) for doc in docs: non_temporal_key = doc["NonTemporalKey"] content = doc["Data"] doc_string = dumps({**global_fields, **content}) if temporal_key: history_doc_id = f"{non_temporal_key}@{temporal_key}" # This post is the history, used in time series, note that key enables re-ingestion _post_to_es(endpoint, f"{data_source}:{doc_type}_history:write", doc_string, history_doc_id) # This post is going to update the latest doc for this non temporal key # i.e. this produces an index where we can access the latest version of each scan. _post_to_es(endpoint, f"{data_source}:{doc_type}_snapshot:write", doc_string, non_temporal_key)
def expected_pub(doc_type, doc): return { "TopicArn": "test_topic_arn", "Subject": doc_type, "Message": dumps(doc) }
async def test_context_push_and_pop(): mock_sns_client = MagicMock() mock_sns_client.publish.return_value = coroutine_of({"MessageId": "Msg32"}) context = ResultsContext( "PubTopic", {"address": "123.456.123.456"}, "scan_2", iso_date_string_from_timestamp(4), iso_date_string_from_timestamp(5), "scan_name", mock_sns_client ) context.push_context({"port": "22"}) context.post_results("port_info", {"open": "false"}) context.push_context({"vulnerability": "cve4"}) context.post_results("vuln_info", {"severity": "5"}) context.pop_context() context.push_context({"vulnerability": "cve5"}) context.post_results("vuln_info", {"severity": "2"}) context.pop_context() context.pop_context() context.push_context({"port": "80"}) context.post_results("port_info", {"open": "true"}) context.pop_context() context.post_results("host_info", {"uptime": "1234567"}) await context.publish_results() # it should publish the top level info parent and temporal key mock_sns_client.publish.assert_called_with( TopicArn="PubTopic", Subject="scan_name", Message=dumps( { "scan_id": "scan_2", "scan_start_time": iso_date_string_from_timestamp(4), "scan_end_time": iso_date_string_from_timestamp(5), "__docs": { "port_info": [ { "NonTemporalKey": ResultsContext._hash_of({ "address": "123.456.123.456", "port": "22" }), "Data": { "address": "123.456.123.456", "port": "22", "open": "false", "__ParentKey": ResultsContext._hash_of({"address": "123.456.123.456"}), } }, { "NonTemporalKey": ResultsContext._hash_of({ "address": "123.456.123.456", "port": "80" }), "Data": { "address": "123.456.123.456", "port": "80", "open": "true", "__ParentKey": ResultsContext._hash_of({"address": "123.456.123.456"}), } } ], "vuln_info": [ { "NonTemporalKey": ResultsContext._hash_of({ "address": "123.456.123.456", "port": "22", "vulnerability": "cve4" }), "Data": { "address": "123.456.123.456", "port": "22", "vulnerability": "cve4", "severity": "5", "__ParentKey": ResultsContext._hash_of({"address": "123.456.123.456"}), } }, { "NonTemporalKey": ResultsContext._hash_of({ "address": "123.456.123.456", "port": "22", "vulnerability": "cve5" }), "Data": { "address": "123.456.123.456", "port": "22", "vulnerability": "cve5", "severity": "2", "__ParentKey": ResultsContext._hash_of({"address": "123.456.123.456"}), } } ], "host_info": [ { "NonTemporalKey": ResultsContext._hash_of({ "address": "123.456.123.456", }), "Data": { "address": "123.456.123.456", "uptime": "1234567", "__ParentKey": ResultsContext._hash_of({"address": "123.456.123.456"}), } } ] } } ), MessageAttributes={ "ParentKey": { "StringValue": ResultsContext._hash_of({"address": "123.456.123.456"}), "DataType": "String" }, "TemporalKey": { "StringValue": ResultsContext._hash_of(iso_date_string_from_timestamp(5)), "DataType": "String" } } )
SAMPLE_DOC_COLLECTION = dumps({ "scan_id": "scan_2", "scan_start_time": iso_date_string_from_timestamp(4), "scan_end_time": iso_date_string_from_timestamp(5), "__docs": { "port_info": [{ "NonTemporalKey": ResultsContext._hash_of({ "address": "123.456.123.456", "port": "22" }), "Data": { "address": "123.456.123.456", "port": "22", "open": "false", "__ParentKey": ResultsContext._hash_of({"address": "123.456.123.456"}), } }, { "NonTemporalKey": ResultsContext._hash_of({ "address": "123.456.123.456", "port": "80" }), "Data": { "address": "123.456.123.456", "port": "80", "open": "true", "__ParentKey": ResultsContext._hash_of({"address": "123.456.123.456"}), } }], "vuln_info": [{ "NonTemporalKey": ResultsContext._hash_of({ "address": "123.456.123.456", "port": "22", "vulnerability": "cve4" }), "Data": { "address": "123.456.123.456", "port": "22", "vulnerability": "cve4", "severity": "5", "__ParentKey": ResultsContext._hash_of({"address": "123.456.123.456"}), } }, { "NonTemporalKey": ResultsContext._hash_of({ "address": "123.456.123.456", "port": "22", "vulnerability": "cve5" }), "Data": { "address": "123.456.123.456", "port": "22", "vulnerability": "cve5", "severity": "2", "__ParentKey": ResultsContext._hash_of({"address": "123.456.123.456"}), } }], "host_info": [{ "NonTemporalKey": ResultsContext._hash_of({ "address": "123.456.123.456", }), "Data": { "address": "123.456.123.456", "uptime": "1234567", "__ParentKey": ResultsContext._hash_of({"address": "123.456.123.456"}), } }] } })
def test_cant_serialise_mock_normally(): with pytest.raises(TypeError): dumps(MagicMock())
def test_can_serialise_mock_with_decorator(): assert isinstance(dumps(MagicMock()), str)
def test_snapshot_only_mode(post_mock): queue_ingestor.ssm_client.get_parameters.return_value = ssm_return_vals() post_mock.return_value = post_response_mock = MagicMock() post_response_mock.json.return_value = {} # Using as a sample event the expected output of the test_scan_results.py test test_event = { "Records": [{ "body": dumps({ "Subject": "scan_name", "Message": SAMPLE_DOC_COLLECTION, "MessageAttributes": { # N.B. No TemporalKey here "ParentKey": { "Value": ResultsContext._hash_of({"address": "123.456.123.456"}), "DataType": "String" } } }) }] } queue_ingestor.ingest(test_event, MagicMock()) # There will be 2 port info, 2 vuln info, and one host info docs posted, but only for the snapshot collections # There will be 3 delete old snapshot requests made, one for each doc_type, and each using the parent key assert post_mock.call_count == 5 + 3 expected_deletes = { doc_type: call( f"https://elastic.url.com/scan_name:{doc_type}_snapshot:write/_doc/_delete_by_query?conflicts=proceed", auth=auth_mock, data=dumps({ "query": { "term": { "__ParentKey": ResultsContext._hash_of({"address": "123.456.123.456"}) } } }), headers={"content-type": "application/json"}) for doc_type in ["port_info", "vuln_info", "host_info"] } parent_key = ResultsContext._hash_of({"address": "123.456.123.456"}) assert post_mock.call_args_list == [ # info for port 22 and 80 expected_deletes["port_info"], _expected_snapshot_write( "port_info", { 'address': '123.456.123.456', 'port': '22' }, auth_mock, { "scan_id": "scan_2", "scan_start_time": iso_date_string_from_timestamp(4), "scan_end_time": iso_date_string_from_timestamp(5), "address": "123.456.123.456", "port": "22", "open": "false", "__ParentKey": parent_key, }), _expected_snapshot_write( "port_info", { 'address': '123.456.123.456', 'port': '80' }, auth_mock, { "scan_id": "scan_2", "scan_start_time": iso_date_string_from_timestamp(4), "scan_end_time": iso_date_string_from_timestamp(5), "address": "123.456.123.456", "port": "80", "open": "true", "__ParentKey": parent_key, }), # info for the two cves expected_deletes["vuln_info"], _expected_snapshot_write( "vuln_info", { "address": "123.456.123.456", "port": "22", "vulnerability": "cve4" }, auth_mock, { "scan_id": "scan_2", "scan_start_time": iso_date_string_from_timestamp(4), "scan_end_time": iso_date_string_from_timestamp(5), "address": "123.456.123.456", "port": "22", "vulnerability": "cve4", "severity": "5", "__ParentKey": parent_key, }), _expected_snapshot_write( "vuln_info", { "address": "123.456.123.456", "port": "22", "vulnerability": "cve5" }, auth_mock, { "scan_id": "scan_2", "scan_start_time": iso_date_string_from_timestamp(4), "scan_end_time": iso_date_string_from_timestamp(5), "address": "123.456.123.456", "port": "22", "vulnerability": "cve5", "severity": "2", "__ParentKey": parent_key, }), # Host info expected_deletes["host_info"], _expected_snapshot_write( "host_info", {"address": "123.456.123.456"}, auth_mock, { "scan_id": "scan_2", "scan_start_time": iso_date_string_from_timestamp(4), "scan_end_time": iso_date_string_from_timestamp(5), "address": "123.456.123.456", "uptime": "1234567", "__ParentKey": parent_key, }), ]
def test_serialisation(): object_to_serialise = {'date': date.fromtimestamp(1234567), 'datetime': datetime.utcfromtimestamp(1234567)} assert json_serialisation.dumps(object_to_serialise) == \ "{\"date\": \"1970-01-15\", \"datetime\": \"1970-01-15T06:56:07\"}"