def testPublishMetricDataWithDuplicateKeys(self, connectDynamoDB, _gracefulCreateTable): """ Test for elimination of rows with duplicate keys by _publishMetricData """ metricId = "3b035a5916994f2bb950f5717138f94b" rowTemplate = dict(rowid=99, ts=epochFromNaiveUTCDatetime( datetime(2015, 3, 20, 0, 46, 28)), value=10305.0, rawAnomaly=0.275, anomaly=0.999840891) row1 = dict(rowTemplate) row2 = dict(rowTemplate) row2["rowid"] = row1["rowid"] + 1 rows = [row1, row2] service = DynamoDBService() service._publishMetricData(metricId, rows) data = dynamodb_service.convertInferenceResultRowToMetricDataItem( metricId, row1) mockPutItem = (service._metric_data.batch_write.return_value.__enter__. return_value.put_item) mockPutItem.assert_called_once_with(data=data._asdict(), overwrite=True)
def testPublishMetricDataWithDuplicateKeys(self, connectDynamoDB, _gracefulCreateTable): """ Test for elimination of rows with duplicate keys by _publishMetricData """ metricId = "3b035a5916994f2bb950f5717138f94b" rowTemplate = dict( rowid=99, ts=epochFromNaiveUTCDatetime(datetime(2015, 3, 20, 0, 46, 28)), value=10305.0, rawAnomaly=0.275, anomaly=0.999840891 ) row1 = dict(rowTemplate) row2 = dict(rowTemplate) row2["rowid"] = row1["rowid"] + 1 rows = [row1, row2] service = DynamoDBService() service._publishMetricData(metricId, rows) data = dynamodb_service.convertInferenceResultRowToMetricDataItem(metricId, row1) mockPutItem = (service._metric_data.batch_write.return_value.__enter__ .return_value.put_item) mockPutItem.assert_called_once_with(data=data._asdict(), overwrite=True)
def testModelResultHandlerSkipsStaleBatch( self, _amqpUtilsMock, deserializeModelResult, connectDynamoDB, _gracefulCreateTable ): """ Given a stale batch of model inference results, verify that it isn't saved to DynamoDB """ # We're going to mostly mock out all of the arguments to # DynamoDBService.messageHandler() since it is normally called by amqp lib. # Then simulate the process of handling an inbound batch of model inference # results and assert that the appropriate put_item() calls are made at the # other end. message = amqp.messages.ConsumerMessage( body=Mock(), properties=Mock(headers=dict()), methodInfo=amqp.messages.MessageDeliveryInfo( consumerTag=Mock(), deliveryTag=Mock(), redelivered=False, exchange=Mock(), routingKey="" ), ackImpl=Mock(), nackImpl=Mock(), ) # We will have to bypass the normal serialize/deserialize phases to avoid # dependency on sqlalchemy rowproxy. Instead, we'll just mock out the # AnomalyService.deserializeModelResult() call, returning an object that # approximates a batch of model inference results as much as possible ts = epochFromNaiveUTCDatetime( datetime.utcnow().replace(microsecond=0) - timedelta(days=DynamoDBService._FRESH_DATA_THRESHOLD_DAYS + 1) ) resultRow = dict(rowid=4790, ts=ts, value=9305.0, rawAnomaly=0.775, anomaly=0.999840891) metricId = "3b035a5916994f2bb950f5717138f94b" deserializeModelResult.return_value = dict( metric=dict( uid=metricId, name="XIGNITE.AGN.VOLUME", description="XIGNITE.AGN.VOLUME", resource="Resource-of-XIGNITE.AGN.VOLUME", location="", datasource="custom", spec=dict(userInfo=dict(symbol="AGN", metricType="StockVolume", metricTypeName="Stock Volume")), ), results=[resultRow], ) service = DynamoDBService() publishMetricDataPatch = patch.object(service, "_publishMetricData", spec_set=service._publishMetricData) publishInstancePatch = patch.object( service, "_publishInstanceDataHourly", spec_set=service._publishInstanceDataHourly ) with publishMetricDataPatch as publishMetricDataMock, publishInstancePatch as publishInstanceMock: service.messageHandler(message) deserializeModelResult.assert_called_once_with(message.body) self.assertEqual(publishMetricDataMock.call_count, 0) self.assertEqual(publishInstanceMock.call_count, 0)
def testMessageHandlerRoutesTweetDataToDynamoDB(self, _amqpUtilsMock, connectDynamoDB, _gracefulCreateTable): """ Simple test for twitter interface """ ## channel = Mock() ## method = Mock(routing_key="taurus.data.non-metric.twitter") ## properties = Mock() tweetData = [ { "metric_name": "Metric Name", "tweet_uid": "3b035a5916994f2bb950f5717138f94b", "created_at": "2015-02-19T19:43:24.870109", "agg_ts": "2015-02-19T19:43:24.870118", "text": "Tweet text", "userid": "10", "username": "******", "retweet_count": "0", } ] message = amqp.messages.ConsumerMessage( body=json.dumps(tweetData), properties=Mock(), methodInfo=amqp.messages.MessageDeliveryInfo( consumerTag=Mock(), deliveryTag=Mock(), redelivered=False, exchange=Mock(), routingKey="taurus.data.non-metric.twitter", ), ackImpl=Mock(), nackImpl=Mock(), ) service = DynamoDBService() service.messageHandler(message) ( service._metric_tweets.batch_write.return_value.__enter__.return_value.put_item.assert_called_once_with( data=OrderedDict( [ ("metric_name_tweet_uid", "Metric Name-3b035a5916994f2bb950f5717138f94b"), ("metric_name", "Metric Name"), ("tweet_uid", "3b035a5916994f2bb950f5717138f94b"), ("created_at", "2015-02-19T19:43:24.870109"), ("agg_ts", "2015-02-19T19:43:24.870118"), ("text", "Tweet text"), ("userid", "10"), ("username", "Tweet username"), ("retweet_count", "0"), ] ), overwrite=True, ) )
def testMessageHandlerRoutesTweetDataToDynamoDB(self, _amqpUtilsMock, connectDynamoDB, _gracefulCreateTable): """ Simple test for twitter interface """ ## channel = Mock() ## method = Mock(routing_key="taurus.data.non-metric.twitter") ## properties = Mock() tweetData = [{ "metric_name": "Metric Name", "tweet_uid": "3b035a5916994f2bb950f5717138f94b", "created_at": "2015-02-19T19:43:24.870109", "agg_ts": "2015-02-19T19:43:24.870118", "text": "Tweet text", "userid": "10", "username": "******", "retweet_count": "0" }] message = amqp.messages.ConsumerMessage( body=json.dumps(tweetData), properties=Mock(), methodInfo=amqp.messages.MessageDeliveryInfo( consumerTag=Mock(), deliveryTag=Mock(), redelivered=False, exchange=Mock(), routingKey="taurus.data.non-metric.twitter"), ackImpl=Mock(), nackImpl=Mock()) service = DynamoDBService() service.messageHandler(message) (service._metric_tweets.batch_write.return_value.__enter__. return_value.put_item.assert_called_once_with(data=OrderedDict([ ("metric_name_tweet_uid", "Metric Name-3b035a5916994f2bb950f5717138f94b"), ("metric_name", "Metric Name"), ("tweet_uid", "3b035a5916994f2bb950f5717138f94b"), ("created_at", "2015-02-19T19:43:24.870109"), ("agg_ts", "2015-02-19T19:43:24.870118"), ("text", "Tweet text"), ("userid", "10"), ("username", "Tweet username"), ("retweet_count", "0") ]), overwrite=True))
def testDynamoDBServiceRun(self, amqpClientClassMock, connectDynamoDB, _gracefulCreateTable): """ Very basic test to validate that the service follows AMQP protocol. Upon `run()`, it should: 1. Connecto to RabbitMQ 2. Open a channel 3. Declare two exchanges; one for model results, and one for non-metric data 4. Declare a durable "dynamodb" queue 5. Bind the "dynamodb" queue to the two exchanges 6. Start consuming. """ amqpClientMock = MagicMock(spec_set=( dynamodb_service.amqp.synchronous_amqp_client.SynchronousAmqpClient )) amqpClientMock.__enter__.return_value = amqpClientMock amqpClientClassMock.return_value = amqpClientMock DynamoDBService().run() self.assertTrue(amqpClientClassMock.called, "Service did not connect to rabbitmq") self.assertTrue(amqpClientMock.declareExchange.called) amqpClientMock.declareExchange.assert_any_call( durable=True, exchangeType="fanout", exchange=taurus.engine.config.get("metric_streamer", "results_exchange_name")) amqpClientMock.declareExchange.assert_any_call( durable=True, exchangeType="topic", exchange=taurus.engine.config.get("non_metric_data", "exchange_name")) amqpClientMock.declareQueue.assert_called_once_with(ANY, durable=True) amqpClientMock.bindQueue.assert_any_call( queue=amqpClientMock.declareQueue.return_value.queue, exchange=taurus.engine.config.get("metric_streamer", "results_exchange_name"), routingKey="") amqpClientMock.bindQueue.assert_any_call( exchange=taurus.engine.config.get("non_metric_data", "exchange_name"), queue=amqpClientMock.declareQueue.return_value.queue, routingKey="#") self.assertTrue(amqpClientMock.readEvents.called)
def testDynamoDBServiceInit(self, connectDynamoDB, _gracefulCreateTable): service = DynamoDBService() self.assertTrue(hasattr(service, "run")) self.assertTrue( connectDynamoDB.called, "Service did not attempt to " "authenticate with DynamoDB API during initialization") self.assertTrue( _gracefulCreateTable.called, "Service did not attempt to " "create any dynamodb tables") for callArgs, _ in _gracefulCreateTable.call_args_list: self.assertIsInstance( callArgs[0], DynamoDBDefinition, "Service " "attempted to create a table using something that isn't a subclass of " "DynamoDBDefinition")
def testPublishInstanceDataHourly(self, connectDynamoDB, _gracefulCreateTable): connectionMock = Mock(spec_set=DynamoDBConnection) connectionMock.update_item.side_effect = ResourceNotFoundException( 400, "item not found") connectDynamoDB.return_value = connectionMock tableName = InstanceDataHourlyDynamoDBDefinition().tableName instanceName = "testName" condition = "attribute_not_exists(instance_id)" rows = [ dict( rowid=99, ts=epochFromNaiveUTCDatetime(datetime(2015, 2, 20, 0, 46, 28)), value=10305.0, rawAnomaly=0.275, anomaly=0.999840891 ), dict( rowid=100, ts=epochFromNaiveUTCDatetime(datetime(2015, 2, 20, 0, 51, 28)), value=9305.0, rawAnomaly=0.975, anomaly=0.999990891 ), dict( rowid=101, ts=epochFromNaiveUTCDatetime(datetime(2015, 2, 20, 0, 56, 20)), value=6111.0, rawAnomaly=0.775, anomaly=0.999940891 ), dict( rowid=102, ts=epochFromNaiveUTCDatetime(datetime(2015, 2, 20, 1, 1, 38)), value=7092.0, rawAnomaly=0.775, anomaly=0.999640891 ) ] service = DynamoDBService() # Run the function under test service._publishInstanceDataHourly(instanceName, "TwitterVolume", rows) # Validate results self.assertEqual(connectionMock.update_item.call_count, 2) self.assertEqual(connectionMock.put_item.call_count, 2) calls = connectionMock.put_item.call_args_list kwargs0 = calls[0][1] item0 = kwargs0["item"] self.assertDictEqual(item0["instance_id"], {"S": instanceName}) self.assertEqual(item0["date_hour"], {"S": "2015-02-20T00"}) self.assertEqual(item0["date"], {"S": "2015-02-20"}) self.assertEqual(item0["hour"], {"S": "00"}) self.assertDictEqual(item0["anomaly_score"]["M"]["TwitterVolume"], {"N": "0.99999"}) self.assertEqual(kwargs0["condition_expression"], condition) kwargs1 = calls[1][1] item1 = kwargs1["item"] self.assertEqual(item1["instance_id"], {"S": instanceName}) self.assertEqual(item1["date_hour"], {"S": "2015-02-20T01"}) self.assertEqual(item1["date"], {"S": "2015-02-20"}) self.assertEqual(item1["hour"], {"S": "01"}) self.assertDictEqual(item1["anomaly_score"]["M"]["TwitterVolume"], {"N": "0.99964"}) self.assertEqual(kwargs1["condition_expression"], condition)
def testMessageHandlerRoutesMetricDataToDynamoDB( self, _amqpUtilsMock, deserializeModelResult, connectDynamoDB, _gracefulCreateTable): """ Given a batch of model inference results, send the appropriate data to DynamoDB tables according to design in an environment where both rabbitmq and dynamodb are mocked out """ # We're going to mostly mock out all of the arguments to # DynamoDBService.messageHandler() since it is normally called by amqp lib. # Then simulate the process of handling an inbound batch of model inference # results and assert that the appropriate put_item() calls are made at the # other end. message = amqp.messages.ConsumerMessage( body=Mock(), properties=Mock(headers=dict()), methodInfo=amqp.messages.MessageDeliveryInfo(consumerTag=Mock(), deliveryTag=Mock(), redelivered=False, exchange=Mock(), routingKey=""), ackImpl=Mock(), nackImpl=Mock()) # We will have to bypass the normal serialize/deserialize phases to avoid # dependency on sqlalchemy rowproxy. Instead, we'll just mock out the # AnomalyService.deserializeModelResult() call, returning an object that # approximates a batch of model inference results as much as possible now = int(time.time()) resultRow = dict( rowid=4790, ts=now, value=9305.0, rawAnomaly=0.775, anomaly=0.999840891 ) metricId = "3b035a5916994f2bb950f5717138f94b" deserializeModelResult.return_value = dict( metric=dict( uid=metricId, name="XIGNITE.AGN.VOLUME", description="XIGNITE.AGN.VOLUME", resource="Resource-of-XIGNITE.AGN.VOLUME", location = "", datasource = "custom", spec=dict( userInfo=dict( symbol="AGN", metricType="StockVolume", metricTypeName="Stock Volume" ) ) ), results=[resultRow] ) service = DynamoDBService() service.messageHandler(message) deserializeModelResult.assert_called_once_with(message.body) mockMetricDataPutItem = ( service._metric_data.batch_write.return_value.__enter__ .return_value.put_item) data = dynamodb_service.convertInferenceResultRowToMetricDataItem( metricId, resultRow) mockMetricDataPutItem.assert_called_once_with(data=data._asdict(), overwrite=True) self.assertFalse(service._metric_tweets.batch_write.called) # Make sure that a model command result doesn't get mistaken for an # inference result batch deserializeModelResult.return_value = Mock() message.properties = Mock(headers=dict(dataType="model-cmd-result")) message.body = Mock() service = DynamoDBService() with patch.object(service, "_handleModelCommandResult", spec_set=service._handleModelCommandResult): service.messageHandler(message) service._handleModelCommandResult.assert_called_once_with(message.body)
def testPathwayToDynamoDB(self): """ Test metric data pathway to dynamodb """ metricName = "TEST." + "".join(random.sample(string.ascii_letters, 16)) nativeMetric = { "modelParams": { "minResolution": 0.2, "min": 0.0, "max": 10000.0, }, "datasource": "custom", "metricSpec": { "metric": metricName, "resource": "Test", "userInfo": { "symbol": "TEST", "metricType": "TwitterVolume", "metricTypeName": "Twitter Volume", } } } metricName = nativeMetric["metricSpec"]["metric"] instanceName = nativeMetric["metricSpec"]["resource"] userInfo = nativeMetric["metricSpec"]["userInfo"] now = datetime.datetime.utcnow().replace(minute=0, second=0, microsecond=0) data = [ (5000.0, now - datetime.timedelta(minutes=10)), (6000.0, now - datetime.timedelta(minutes=5)), (7000.0, now), ] # We'll be explicitly deleting the metric below, but we need to add a # cleanup step that runs in case there is some other failure that prevents # that part of the test from being reached. def gracefulDelete(): try: self._deleteMetric(metricName) except ObjectNotFoundError: pass self.addCleanup(gracefulDelete) # Add custom metric data sock = socket.socket() sock.connect(("localhost", self.plaintextPort)) for metricValue, ts in data: sock.sendall("%s %r %s\n" % (metricName, metricValue, epochFromNaiveUTCDatetime(ts))) self.gracefullyCloseSocket(sock) uid = self.checkMetricCreated(metricName) # Save the uid for later LOGGER.info("Metric %s has uid: %s", metricName, uid) # Send model creation request model = self._createModel(nativeMetric) parameters = json.loads(model.parameters) self.assertEqual(parameters["metricSpec"]["userInfo"], userInfo) for _ in xrange(60): with self.engine.begin() as conn: metric = repository.getMetric(conn, uid) if metric.status == MetricStatus.ACTIVE: break LOGGER.info("Model=%s not ready. Sleeping 1 second...", uid) time.sleep(1) else: self.fail("Model results not available within 5 minutes") # Check that the data all got processed self.checkModelResultsSize(uid, 3) # Now check that the data was published to dynamodb... dynamodb = DynamoDBService.connectDynamoDB() metricTable = Table(MetricDynamoDBDefinition().tableName, connection=dynamodb) metricItem = metricTable.lookup(uid) self.assertEqual(metricItem["uid"], uid) self.assertEqual(metricItem["name"], metricName) self.assertEqual(metricItem["metricType"], "TwitterVolume") self.assertEqual(metricItem["metricTypeName"], "Twitter Volume") self.assertEqual(metricItem["symbol"], "TEST") metricDataTable = Table(MetricDataDynamoDBDefinition().tableName, connection=dynamodb) instanceDataAnomalyScores = {} for metricValue, ts in data: metricDataItem = _RETRY_ON_ITEM_NOT_FOUND_DYNAMODB_ERROR( metricDataTable.lookup )(uid, ts.isoformat()) # There is no server-side cleanup for metric data, so remove it here for # now to avoid accumulating test data self.addCleanup(metricDataItem.delete) self.assertEqual(metricValue, metricDataItem["metric_value"]) dt = datetime.datetime.strptime(metricDataItem["timestamp"], "%Y-%m-%dT%H:%M:%S") self.assertEqual(ts, dt) ts = ts.replace(minute=0, second=0, microsecond=0) date = ts.strftime("%Y-%m-%d") hour = ts.strftime("%H") key = (date, hour) maxVal = instanceDataAnomalyScores.get(key, 0.0) instanceDataAnomalyScores[key] = max( maxVal, metricDataItem["anomaly_score"]) # And check that the aggregated instance data is updated instanceDataHourlyTable = Table( InstanceDataHourlyDynamoDBDefinition().tableName, connection=dynamodb) for key, anomalyScore in instanceDataAnomalyScores.iteritems(): date, hour = key instanceDataHourlyItem = _RETRY_ON_ITEM_NOT_FOUND_DYNAMODB_ERROR( instanceDataHourlyTable.lookup )(instanceName, "%sT%s" % (date, hour)) self.addCleanup(instanceDataHourlyItem.delete) self.assertAlmostEqual( anomalyScore, float(instanceDataHourlyItem["anomaly_score"]["TwitterVolume"])) self.assertEqual(date, instanceDataHourlyItem["date"]) self.assertEqual(hour, instanceDataHourlyItem["hour"]) # Now send some twitter data and validate that it made it to dynamodb twitterData = [ { "metric_name": metricName, "tweet_uid": uid, "created_at": "2015-02-19T19:43:24.870109", "agg_ts": "2015-02-19T19:43:24.870118", "text": "Tweet text", "userid": "10", "username": "******", "retweet_count": "0" } ] with MessageBusConnector() as messageBus: messageBus.publishExg( exchange=self.config.get("non_metric_data", "exchange_name"), routingKey=( self.config.get("non_metric_data", "exchange_name") + ".twitter"), body=json.dumps(twitterData) ) metricTweetsTable = Table(MetricTweetsDynamoDBDefinition().tableName, connection=dynamodb) for _ in range(30): try: metricTweetItem = metricTweetsTable.lookup( twitterData[0]["text"], twitterData[0]["agg_ts"] ) break except ItemNotFound: # LOL eventual consistency time.sleep(1) continue # There is no server-side cleanup for tweet data, so remove it here for # now to avoid accumulating test data self.addCleanup(metricTweetItem.delete) self.assertEqual(metricTweetItem["username"], twitterData[0]["username"]) self.assertEqual(metricTweetItem["tweet_uid"], twitterData[0]["tweet_uid"]) self.assertEqual(metricTweetItem["created_at"], twitterData[0]["created_at"]) self.assertEqual(metricTweetItem["agg_ts"], twitterData[0]["agg_ts"]) self.assertEqual(metricTweetItem["text"], twitterData[0]["text"]) self.assertEqual(metricTweetItem["userid"], twitterData[0]["userid"]) self.assertEqual(metricTweetItem["username"], twitterData[0]["username"]) self.assertEqual(metricTweetItem["retweet_count"], twitterData[0]["retweet_count"]) self.assertEqual(metricTweetItem["copy_count"], 0) sort_key = twitterData[0]["agg_ts"] ts = (epochFromNaiveUTCDatetime( datetime.datetime.strptime(twitterData[0]["agg_ts"].partition(".")[0], "%Y-%m-%dT%H:%M:%S")) * 1e5) queryResult = metricTweetsTable.query_2( metric_name__eq=metricName, sort_key__gte=ts, index="taurus.metric_data-metric_name_index") queriedMetricTweetItem = next(queryResult) self.assertEqual(queriedMetricTweetItem["username"], twitterData[0]["username"]) self.assertEqual(queriedMetricTweetItem["tweet_uid"], twitterData[0]["tweet_uid"]) self.assertEqual(queriedMetricTweetItem["created_at"], twitterData[0]["created_at"]) self.assertEqual(queriedMetricTweetItem["agg_ts"], twitterData[0]["agg_ts"]) self.assertEqual(queriedMetricTweetItem["text"], twitterData[0]["text"]) self.assertEqual(queriedMetricTweetItem["userid"], twitterData[0]["userid"]) self.assertEqual(queriedMetricTweetItem["username"], twitterData[0]["username"]) self.assertEqual(queriedMetricTweetItem["retweet_count"], twitterData[0]["retweet_count"]) self.assertEqual(queriedMetricTweetItem["copy_count"], 0) self.assertEqual(queriedMetricTweetItem["sort_key"], ts) duplicatedTwitterData = [ { "metric_name": "copy of " + metricName, "tweet_uid": "copy of " + uid, "created_at": "2015-02-19T19:45:24.870109", "agg_ts": "2015-02-19T19:43:24.870118", # Same agg_ts! "text": "Tweet text", # Same text! "userid": "20", "username": "******", "retweet_count": "0" } ] with MessageBusConnector() as messageBus: messageBus.publishExg( exchange=self.config.get("non_metric_data", "exchange_name"), routingKey=( self.config.get("non_metric_data", "exchange_name") + ".twitter"), body=json.dumps(duplicatedTwitterData) ) for _ in range(30): metricTweetItem = metricTweetsTable.lookup( twitterData[0]["text"], twitterData[0]["agg_ts"] ) if metricTweetItem["copy_count"] != 1: time.sleep(1) continue # Assert same as original, except for copy_count, which should be 1 self.assertEqual(metricTweetItem["username"], twitterData[0]["username"]) self.assertEqual(metricTweetItem["tweet_uid"], twitterData[0]["tweet_uid"]) self.assertEqual(metricTweetItem["created_at"], twitterData[0]["created_at"]) self.assertEqual(metricTweetItem["agg_ts"], twitterData[0]["agg_ts"]) self.assertEqual(metricTweetItem["text"], twitterData[0]["text"]) self.assertEqual(metricTweetItem["userid"], twitterData[0]["userid"]) self.assertEqual(metricTweetItem["username"], twitterData[0]["username"]) self.assertEqual(metricTweetItem["retweet_count"], twitterData[0]["retweet_count"]) self.assertEqual(metricTweetItem["sort_key"], ts + 1) break else: self.fail("copy_count of original tweet not updated within reasonable" " amount of time (~30s) for duplicated tweet.") # Delete metric and ensure metric is deleted from dynamodb, too self._deleteMetric(metricName) for _ in xrange(60): time.sleep(1) try: metricItem = metricTable.lookup(uid) except ItemNotFound as err: break else: self.fail("Metric not deleted from dynamodb")
def testPublishInstanceDataHourly(self, connectDynamoDB, _gracefulCreateTable): connectionMock = Mock(spec_set=DynamoDBConnection) connectionMock.update_item.side_effect = ResourceNotFoundException( 400, "item not found") connectDynamoDB.return_value = connectionMock tableName = InstanceDataHourlyDynamoDBDefinition().tableName instanceName = "testName" condition = "attribute_not_exists(instance_id)" rows = [ dict(rowid=99, ts=epochFromNaiveUTCDatetime(datetime(2015, 2, 20, 0, 46, 28)), value=10305.0, rawAnomaly=0.275, anomaly=0.999840891), dict(rowid=100, ts=epochFromNaiveUTCDatetime(datetime(2015, 2, 20, 0, 51, 28)), value=9305.0, rawAnomaly=0.975, anomaly=0.999990891), dict(rowid=101, ts=epochFromNaiveUTCDatetime(datetime(2015, 2, 20, 0, 56, 20)), value=6111.0, rawAnomaly=0.775, anomaly=0.999940891), dict(rowid=102, ts=epochFromNaiveUTCDatetime(datetime(2015, 2, 20, 1, 1, 38)), value=7092.0, rawAnomaly=0.775, anomaly=0.999640891) ] service = DynamoDBService() # Run the function under test service._publishInstanceDataHourly(instanceName, "TwitterVolume", rows) # Validate results self.assertEqual(connectionMock.update_item.call_count, 2) self.assertEqual(connectionMock.put_item.call_count, 2) calls = connectionMock.put_item.call_args_list kwargs0 = calls[0][1] item0 = kwargs0["item"] self.assertDictEqual(item0["instance_id"], {"S": instanceName}) self.assertEqual(item0["date_hour"], {"S": "2015-02-20T00"}) self.assertEqual(item0["date"], {"S": "2015-02-20"}) self.assertEqual(item0["hour"], {"S": "00"}) self.assertDictEqual(item0["anomaly_score"]["M"]["TwitterVolume"], {"N": "0.99999"}) self.assertEqual(kwargs0["condition_expression"], condition) kwargs1 = calls[1][1] item1 = kwargs1["item"] self.assertEqual(item1["instance_id"], {"S": instanceName}) self.assertEqual(item1["date_hour"], {"S": "2015-02-20T01"}) self.assertEqual(item1["date"], {"S": "2015-02-20"}) self.assertEqual(item1["hour"], {"S": "01"}) self.assertDictEqual(item1["anomaly_score"]["M"]["TwitterVolume"], {"N": "0.99964"}) self.assertEqual(kwargs1["condition_expression"], condition)
def testModelResultHandlerSkipsStaleBatch(self, _amqpUtilsMock, deserializeModelResult, connectDynamoDB, _gracefulCreateTable): """ Given a stale batch of model inference results, verify that it isn't saved to DynamoDB """ # We're going to mostly mock out all of the arguments to # DynamoDBService.messageHandler() since it is normally called by amqp lib. # Then simulate the process of handling an inbound batch of model inference # results and assert that the appropriate put_item() calls are made at the # other end. message = amqp.messages.ConsumerMessage( body=Mock(), properties=Mock(headers=dict()), methodInfo=amqp.messages.MessageDeliveryInfo(consumerTag=Mock(), deliveryTag=Mock(), redelivered=False, exchange=Mock(), routingKey=""), ackImpl=Mock(), nackImpl=Mock()) # We will have to bypass the normal serialize/deserialize phases to avoid # dependency on sqlalchemy rowproxy. Instead, we'll just mock out the # AnomalyService.deserializeModelResult() call, returning an object that # approximates a batch of model inference results as much as possible ts = epochFromNaiveUTCDatetime( datetime.utcnow().replace(microsecond=0) - timedelta(days=DynamoDBService._FRESH_DATA_THRESHOLD_DAYS + 1)) resultRow = dict(rowid=4790, ts=ts, value=9305.0, rawAnomaly=0.775, anomaly=0.999840891) metricId = "3b035a5916994f2bb950f5717138f94b" deserializeModelResult.return_value = dict(metric=dict( uid=metricId, name="XIGNITE.AGN.VOLUME", description="XIGNITE.AGN.VOLUME", resource="Resource-of-XIGNITE.AGN.VOLUME", location="", datasource="custom", spec=dict(userInfo=dict(symbol="AGN", metricType="StockVolume", metricTypeName="Stock Volume"))), results=[resultRow]) service = DynamoDBService() publishMetricDataPatch = patch.object( service, "_publishMetricData", spec_set=service._publishMetricData) publishInstancePatch = patch.object( service, "_publishInstanceDataHourly", spec_set=service._publishInstanceDataHourly) with publishMetricDataPatch as publishMetricDataMock, \ publishInstancePatch as publishInstanceMock: service.messageHandler(message) deserializeModelResult.assert_called_once_with(message.body) self.assertEqual(publishMetricDataMock.call_count, 0) self.assertEqual(publishInstanceMock.call_count, 0)
def testMessageHandlerRoutesMetricDataToDynamoDB(self, _amqpUtilsMock, deserializeModelResult, connectDynamoDB, _gracefulCreateTable): """ Given a batch of model inference results, send the appropriate data to DynamoDB tables according to design in an environment where both rabbitmq and dynamodb are mocked out """ # We're going to mostly mock out all of the arguments to # DynamoDBService.messageHandler() since it is normally called by amqp lib. # Then simulate the process of handling an inbound batch of model inference # results and assert that the appropriate put_item() calls are made at the # other end. message = amqp.messages.ConsumerMessage( body=Mock(), properties=Mock(headers=dict()), methodInfo=amqp.messages.MessageDeliveryInfo(consumerTag=Mock(), deliveryTag=Mock(), redelivered=False, exchange=Mock(), routingKey=""), ackImpl=Mock(), nackImpl=Mock()) # We will have to bypass the normal serialize/deserialize phases to avoid # dependency on sqlalchemy rowproxy. Instead, we'll just mock out the # AnomalyService.deserializeModelResult() call, returning an object that # approximates a batch of model inference results as much as possible now = int(time.time()) resultRow = dict(rowid=4790, ts=now, value=9305.0, rawAnomaly=0.775, anomaly=0.999840891) metricId = "3b035a5916994f2bb950f5717138f94b" deserializeModelResult.return_value = dict(metric=dict( uid=metricId, name="XIGNITE.AGN.VOLUME", description="XIGNITE.AGN.VOLUME", resource="Resource-of-XIGNITE.AGN.VOLUME", location="", datasource="custom", spec=dict(userInfo=dict(symbol="AGN", metricType="StockVolume", metricTypeName="Stock Volume"))), results=[resultRow]) service = DynamoDBService() service.messageHandler(message) deserializeModelResult.assert_called_once_with(message.body) mockMetricDataPutItem = (service._metric_data.batch_write.return_value. __enter__.return_value.put_item) data = dynamodb_service.convertInferenceResultRowToMetricDataItem( metricId, resultRow) mockMetricDataPutItem.assert_called_once_with(data=data._asdict(), overwrite=True) self.assertFalse(service._metric_tweets.batch_write.called) # Make sure that a model command result doesn't get mistaken for an # inference result batch deserializeModelResult.return_value = Mock() message.properties = Mock(headers=dict(dataType="model-cmd-result")) message.body = Mock() service = DynamoDBService() with patch.object(service, "_handleModelCommandResult", spec_set=service._handleModelCommandResult): service.messageHandler(message) service._handleModelCommandResult.assert_called_once_with( message.body)
def testPathwayToDynamoDB(self): """ Test metric data pathway to dynamodb """ metricName = "TEST." + "".join(random.sample(string.ascii_letters, 16)) nativeMetric = { "modelParams": { "minResolution": 0.2, "min": 0.0, "max": 10000.0, }, "datasource": "custom", "metricSpec": { "metric": metricName, "resource": "Test", "userInfo": { "symbol": "TEST", "metricType": "TwitterVolume", "metricTypeName": "Twitter Volume", } } } metricName = nativeMetric["metricSpec"]["metric"] instanceName = nativeMetric["metricSpec"]["resource"] userInfo = nativeMetric["metricSpec"]["userInfo"] now = datetime.datetime.utcnow().replace(minute=0, second=0, microsecond=0) data = [ (5000.0, now - datetime.timedelta(minutes=10)), (6000.0, now - datetime.timedelta(minutes=5)), (7000.0, now), ] # We'll be explicitly deleting the metric below, but we need to add a # cleanup step that runs in case there is some other failure that prevents # that part of the test from being reached. def gracefulDelete(): try: self._deleteMetric(metricName) except ObjectNotFoundError: pass self.addCleanup(gracefulDelete) # Add custom metric data sock = socket.socket() sock.connect(("localhost", self.plaintextPort)) for metricValue, ts in data: sock.sendall( "%s %r %s\n" % (metricName, metricValue, epochFromNaiveUTCDatetime(ts))) self.gracefullyCloseSocket(sock) uid = self.checkMetricCreated(metricName) # Save the uid for later LOGGER.info("Metric %s has uid: %s", metricName, uid) # Send model creation request model = self._createModel(nativeMetric) parameters = json.loads(model.parameters) self.assertEqual(parameters["metricSpec"]["userInfo"], userInfo) for _ in xrange(60): with self.engine.begin() as conn: metric = repository.getMetric(conn, uid) if metric.status == MetricStatus.ACTIVE: break LOGGER.info("Model=%s not ready. Sleeping 1 second...", uid) time.sleep(1) else: self.fail("Model results not available within 5 minutes") # Check that the data all got processed self.checkModelResultsSize(uid, 3) # Now check that the data was published to dynamodb... dynamodb = DynamoDBService.connectDynamoDB() metricTable = Table(MetricDynamoDBDefinition().tableName, connection=dynamodb) metricItem = metricTable.lookup(uid) self.assertEqual(metricItem["uid"], uid) self.assertEqual(metricItem["name"], metricName) self.assertEqual(metricItem["metricType"], "TwitterVolume") self.assertEqual(metricItem["metricTypeName"], "Twitter Volume") self.assertEqual(metricItem["symbol"], "TEST") metricDataTable = Table(MetricDataDynamoDBDefinition().tableName, connection=dynamodb) instanceDataAnomalyScores = {} for metricValue, ts in data: metricDataItem = _RETRY_ON_ITEM_NOT_FOUND_DYNAMODB_ERROR( metricDataTable.lookup)(uid, ts.isoformat()) # There is no server-side cleanup for metric data, so remove it here for # now to avoid accumulating test data self.addCleanup(metricDataItem.delete) self.assertEqual(metricValue, metricDataItem["metric_value"]) dt = datetime.datetime.strptime(metricDataItem["timestamp"], "%Y-%m-%dT%H:%M:%S") self.assertEqual(ts, dt) ts = ts.replace(minute=0, second=0, microsecond=0) date = ts.strftime("%Y-%m-%d") hour = ts.strftime("%H") key = (date, hour) maxVal = instanceDataAnomalyScores.get(key, 0.0) instanceDataAnomalyScores[key] = max( maxVal, metricDataItem["anomaly_score"]) # And check that the aggregated instance data is updated instanceDataHourlyTable = Table( InstanceDataHourlyDynamoDBDefinition().tableName, connection=dynamodb) for key, anomalyScore in instanceDataAnomalyScores.iteritems(): date, hour = key instanceDataHourlyItem = _RETRY_ON_ITEM_NOT_FOUND_DYNAMODB_ERROR( instanceDataHourlyTable.lookup)(instanceName, "%sT%s" % (date, hour)) self.addCleanup(instanceDataHourlyItem.delete) self.assertAlmostEqual( anomalyScore, float( instanceDataHourlyItem["anomaly_score"]["TwitterVolume"])) self.assertEqual(date, instanceDataHourlyItem["date"]) self.assertEqual(hour, instanceDataHourlyItem["hour"]) # Now send some twitter data and validate that it made it to dynamodb twitterData = [{ "metric_name": metricName, "tweet_uid": uid, "created_at": "2015-02-19T19:43:24.870109", "agg_ts": "2015-02-19T19:43:24.870118", "text": "Tweet text", "userid": "10", "username": "******", "retweet_count": "0" }] with MessageBusConnector() as messageBus: messageBus.publishExg( exchange=self.config.get("non_metric_data", "exchange_name"), routingKey=( self.config.get("non_metric_data", "exchange_name") + ".twitter"), body=json.dumps(twitterData)) metricTweetsTable = Table(MetricTweetsDynamoDBDefinition().tableName, connection=dynamodb) metricTweetItem = metricTweetsTable.lookup( "-".join((metricName, uid)), "2015-02-19T19:43:24.870118") # There is no server-side cleanup for tweet data, so remove it here for # now to avoid accumulating test data self.addCleanup(metricTweetItem.delete) self.assertEqual(metricTweetItem["username"], twitterData[0]["username"]) self.assertEqual(metricTweetItem["tweet_uid"], twitterData[0]["tweet_uid"]) self.assertEqual(metricTweetItem["created_at"], twitterData[0]["created_at"]) self.assertEqual(metricTweetItem["agg_ts"], twitterData[0]["agg_ts"]) self.assertEqual(metricTweetItem["text"], twitterData[0]["text"]) self.assertEqual(metricTweetItem["userid"], twitterData[0]["userid"]) self.assertEqual(metricTweetItem["username"], twitterData[0]["username"]) self.assertEqual(metricTweetItem["retweet_count"], twitterData[0]["retweet_count"]) queryResult = metricTweetsTable.query_2( metric_name__eq=metricName, agg_ts__eq=twitterData[0]["agg_ts"], index="taurus.metric_data-metric_name_index") queriedMetricTweetItem = next(queryResult) self.assertEqual(queriedMetricTweetItem["username"], twitterData[0]["username"]) self.assertEqual(queriedMetricTweetItem["tweet_uid"], twitterData[0]["tweet_uid"]) self.assertEqual(queriedMetricTweetItem["created_at"], twitterData[0]["created_at"]) self.assertEqual(queriedMetricTweetItem["agg_ts"], twitterData[0]["agg_ts"]) self.assertEqual(queriedMetricTweetItem["text"], twitterData[0]["text"]) self.assertEqual(queriedMetricTweetItem["userid"], twitterData[0]["userid"]) self.assertEqual(queriedMetricTweetItem["username"], twitterData[0]["username"]) self.assertEqual(queriedMetricTweetItem["retweet_count"], twitterData[0]["retweet_count"]) # Delete metric and ensure metric is deleted from dynamodb, too self._deleteMetric(metricName) for _ in xrange(60): time.sleep(1) try: metricItem = metricTable.lookup(uid) except ItemNotFound as err: break else: self.fail("Metric not deleted from dynamodb")