示例#1
0
 def createDynamoDBSchema(self):
     """ Apply full dynamodb table schema definitions.
 """
     self._metric = self._gracefulCreateTable(MetricDynamoDBDefinition())
     self._metric_data = (self._gracefulCreateTable(
         MetricDataDynamoDBDefinition()))
     self._metric_tweets = (self._gracefulCreateTable(
         MetricTweetsDynamoDBDefinition()))
     self._instance_data_hourly = self._gracefulCreateTable(
         InstanceDataHourlyDynamoDBDefinition())
示例#2
0
    def _publishInstanceDataHourly(self, instanceName, metricType, rows):
        """ Specific handler for instance data rows.  Publishes to the
    `taurus.data.instance_data_hourly` dynamodb table.

    :param instanceName: name of the instance
    :type instanceName: str

    :param metricType: the metric type identifier
    :type metricType: str

    :param rows: model inference result rows per "results" property of
      htmengine/runtime/json_schema/model_inference_results_msg_schema.json
    :type rows: Sequence of dicts
    """
        hourToMaxScore = {}
        for row in rows:
            # row.timestamp is a datetime instance
            ts = datetime.utcfromtimestamp(row["ts"]).replace(minute=0,
                                                              second=0,
                                                              microsecond=0)
            # Store the max anomaly likelihood for the period
            hourToMaxScore[ts] = max(hourToMaxScore.get(ts, 0.0),
                                     row["anomaly"])
        for ts, score in sorted(hourToMaxScore.iteritems()):
            score = FIXED_DYNAMODB_CONTEXT.create_decimal_from_float(score)
            dateHour = ts.strftime("%Y-%m-%dT%H")

            data = {
                "instance_id": {
                    "S": instanceName
                },
                "date_hour": {
                    "S": dateHour
                },
                "date": {
                    "S": ts.strftime("%Y-%m-%d")
                },
                "hour": {
                    "S": ts.strftime("%H")
                },
                "anomaly_score": {
                    "M": {
                        metricType: {
                            "N": str(score)
                        }
                    }
                },
            }
            # Validate the data fields against the schema
            InstanceDataHourlyDynamoDBDefinition().Item(**data)

            # First try a conditional update for the anomaly score for this metric
            updateKey = {
                "instance_id": data["instance_id"],
                "date_hour": data["date_hour"]
            }
            anomalyScoreMetric = "anomaly_score.%s" % metricType
            updateCondition = ("attribute_not_exists(%(asm)s) or "
                               "%(asm)s < :value" % {
                                   "asm": anomalyScoreMetric
                               })
            updateValues = {":value": {"N": str(score)}}
            updateExpression = "SET %s = :value" % anomalyScoreMetric

            @retryOnTransientDynamoDBError(g_log)
            def updateItemWithRetries():
                self.dynamodb.update_item(
                    self._instance_data_hourly.table_name,
                    key=updateKey,
                    update_expression=updateExpression,
                    condition_expression=updateCondition,
                    expression_attribute_values=updateValues)

            try:
                updateItemWithRetries()
            except ResourceNotFoundException:
                # There is no row yet, so continue on to PutItem
                pass
            except ValidationException:
                # It's OK, let's continue and try the PutItem
                pass
            except ConditionalCheckFailedException:
                # The existing value is larger so we are done
                continue
            except Exception:
                g_log.exception(
                    "update_item failed: table=%s; updateKey=%s; "
                    "update=%s; condition=%s; values=%s",
                    self._instance_data_hourly.table_name, updateKey,
                    updateExpression, updateCondition, updateValues)
                raise
            else:
                # There was no exception, the update succeeded, we are done
                continue

            # If the UpdateItem failed with ResourceNotFoundException, put the row
            # and continue to next iteration of the loop

            putCondition = "attribute_not_exists(instance_id)"

            @retryOnTransientDynamoDBError(g_log)
            def putItemWithRetries(item, condition):
                self.dynamodb.put_item(self._instance_data_hourly.table_name,
                                       item=item,
                                       condition_expression=condition)

            try:
                putItemWithRetries(data, putCondition)
            except ConditionalCheckFailedException:
                # No problem, row already exists!
                pass
            except Exception:
                g_log.exception(
                    "put_item failed: table=%s; condition=%s; item=%s",
                    self._instance_data_hourly.table_name, putCondition, data)
                raise
            else:
                # There was no exception, the put succeeded, we are done
                continue

            # In the case that a parallel process beat us to it
            try:
                updateItemWithRetries()
            except ConditionalCheckFailedException:
                # The existing value is larger so we are done
                continue
            except Exception:
                g_log.exception(
                    "update_item failed: table=%s; updateKey=%s; "
                    "update=%s; condition=%s; values=%s",
                    self._instance_data_hourly.table_name, updateKey,
                    updateExpression, updateCondition, updateValues)
                raise
    def testPathwayToDynamoDB(self):
        """ Test metric data pathway to dynamodb
    """

        metricName = "TEST." + "".join(random.sample(string.ascii_letters, 16))

        nativeMetric = {
            "modelParams": {
                "minResolution": 0.2,
                "min": 0.0,
                "max": 10000.0,
            },
            "datasource": "custom",
            "metricSpec": {
                "metric": metricName,
                "resource": "Test",
                "userInfo": {
                    "symbol": "TEST",
                    "metricType": "TwitterVolume",
                    "metricTypeName": "Twitter Volume",
                }
            }
        }
        metricName = nativeMetric["metricSpec"]["metric"]
        instanceName = nativeMetric["metricSpec"]["resource"]
        userInfo = nativeMetric["metricSpec"]["userInfo"]

        now = datetime.datetime.utcnow().replace(minute=0,
                                                 second=0,
                                                 microsecond=0)

        data = [
            (5000.0, now - datetime.timedelta(minutes=10)),
            (6000.0, now - datetime.timedelta(minutes=5)),
            (7000.0, now),
        ]

        # We'll be explicitly deleting the metric below, but we need to add a
        # cleanup step that runs in case there is some other failure that prevents
        # that part of the test from being reached.

        def gracefulDelete():
            try:
                self._deleteMetric(metricName)
            except ObjectNotFoundError:
                pass

        self.addCleanup(gracefulDelete)

        # Add custom metric data
        sock = socket.socket()
        sock.connect(("localhost", self.plaintextPort))
        for metricValue, ts in data:
            sock.sendall(
                "%s %r %s\n" %
                (metricName, metricValue, epochFromNaiveUTCDatetime(ts)))

        self.gracefullyCloseSocket(sock)

        uid = self.checkMetricCreated(metricName)

        # Save the uid for later
        LOGGER.info("Metric %s has uid: %s", metricName, uid)

        # Send model creation request
        model = self._createModel(nativeMetric)
        parameters = json.loads(model.parameters)
        self.assertEqual(parameters["metricSpec"]["userInfo"], userInfo)

        for _ in xrange(60):
            with self.engine.begin() as conn:
                metric = repository.getMetric(conn, uid)

            if metric.status == MetricStatus.ACTIVE:
                break
            LOGGER.info("Model=%s not ready. Sleeping 1 second...", uid)
            time.sleep(1)
        else:
            self.fail("Model results not available within 5 minutes")

        # Check that the data all got processed
        self.checkModelResultsSize(uid, 3)

        # Now check that the data was published to dynamodb...
        dynamodb = DynamoDBService.connectDynamoDB()

        metricTable = Table(MetricDynamoDBDefinition().tableName,
                            connection=dynamodb)
        metricItem = metricTable.lookup(uid)
        self.assertEqual(metricItem["uid"], uid)
        self.assertEqual(metricItem["name"], metricName)
        self.assertEqual(metricItem["metricType"], "TwitterVolume")
        self.assertEqual(metricItem["metricTypeName"], "Twitter Volume")
        self.assertEqual(metricItem["symbol"], "TEST")

        metricDataTable = Table(MetricDataDynamoDBDefinition().tableName,
                                connection=dynamodb)
        instanceDataAnomalyScores = {}
        for metricValue, ts in data:
            metricDataItem = _RETRY_ON_ITEM_NOT_FOUND_DYNAMODB_ERROR(
                metricDataTable.lookup)(uid, ts.isoformat())
            # There is no server-side cleanup for metric data, so remove it here for
            # now to avoid accumulating test data
            self.addCleanup(metricDataItem.delete)
            self.assertEqual(metricValue, metricDataItem["metric_value"])
            dt = datetime.datetime.strptime(metricDataItem["timestamp"],
                                            "%Y-%m-%dT%H:%M:%S")
            self.assertEqual(ts, dt)
            ts = ts.replace(minute=0, second=0, microsecond=0)
            date = ts.strftime("%Y-%m-%d")
            hour = ts.strftime("%H")
            key = (date, hour)
            maxVal = instanceDataAnomalyScores.get(key, 0.0)
            instanceDataAnomalyScores[key] = max(
                maxVal, metricDataItem["anomaly_score"])

        # And check that the aggregated instance data is updated
        instanceDataHourlyTable = Table(
            InstanceDataHourlyDynamoDBDefinition().tableName,
            connection=dynamodb)
        for key, anomalyScore in instanceDataAnomalyScores.iteritems():
            date, hour = key
            instanceDataHourlyItem = _RETRY_ON_ITEM_NOT_FOUND_DYNAMODB_ERROR(
                instanceDataHourlyTable.lookup)(instanceName,
                                                "%sT%s" % (date, hour))
            self.addCleanup(instanceDataHourlyItem.delete)
            self.assertAlmostEqual(
                anomalyScore,
                float(
                    instanceDataHourlyItem["anomaly_score"]["TwitterVolume"]))
            self.assertEqual(date, instanceDataHourlyItem["date"])
            self.assertEqual(hour, instanceDataHourlyItem["hour"])

        # Now send some twitter data and validate that it made it to dynamodb

        twitterData = [{
            "metric_name": metricName,
            "tweet_uid": uid,
            "created_at": "2015-02-19T19:43:24.870109",
            "agg_ts": "2015-02-19T19:43:24.870118",
            "text": "Tweet text",
            "userid": "10",
            "username": "******",
            "retweet_count": "0"
        }]

        with MessageBusConnector() as messageBus:
            messageBus.publishExg(
                exchange=self.config.get("non_metric_data", "exchange_name"),
                routingKey=(
                    self.config.get("non_metric_data", "exchange_name") +
                    ".twitter"),
                body=json.dumps(twitterData))

        metricTweetsTable = Table(MetricTweetsDynamoDBDefinition().tableName,
                                  connection=dynamodb)
        metricTweetItem = metricTweetsTable.lookup(
            "-".join((metricName, uid)), "2015-02-19T19:43:24.870118")
        # There is no server-side cleanup for tweet data, so remove it here for
        # now to avoid accumulating test data
        self.addCleanup(metricTweetItem.delete)
        self.assertEqual(metricTweetItem["username"],
                         twitterData[0]["username"])
        self.assertEqual(metricTweetItem["tweet_uid"],
                         twitterData[0]["tweet_uid"])
        self.assertEqual(metricTweetItem["created_at"],
                         twitterData[0]["created_at"])
        self.assertEqual(metricTweetItem["agg_ts"], twitterData[0]["agg_ts"])
        self.assertEqual(metricTweetItem["text"], twitterData[0]["text"])
        self.assertEqual(metricTweetItem["userid"], twitterData[0]["userid"])
        self.assertEqual(metricTweetItem["username"],
                         twitterData[0]["username"])
        self.assertEqual(metricTweetItem["retweet_count"],
                         twitterData[0]["retweet_count"])

        queryResult = metricTweetsTable.query_2(
            metric_name__eq=metricName,
            agg_ts__eq=twitterData[0]["agg_ts"],
            index="taurus.metric_data-metric_name_index")
        queriedMetricTweetItem = next(queryResult)

        self.assertEqual(queriedMetricTweetItem["username"],
                         twitterData[0]["username"])
        self.assertEqual(queriedMetricTweetItem["tweet_uid"],
                         twitterData[0]["tweet_uid"])
        self.assertEqual(queriedMetricTweetItem["created_at"],
                         twitterData[0]["created_at"])
        self.assertEqual(queriedMetricTweetItem["agg_ts"],
                         twitterData[0]["agg_ts"])
        self.assertEqual(queriedMetricTweetItem["text"],
                         twitterData[0]["text"])
        self.assertEqual(queriedMetricTweetItem["userid"],
                         twitterData[0]["userid"])
        self.assertEqual(queriedMetricTweetItem["username"],
                         twitterData[0]["username"])
        self.assertEqual(queriedMetricTweetItem["retweet_count"],
                         twitterData[0]["retweet_count"])

        # Delete metric and ensure metric is deleted from dynamodb, too
        self._deleteMetric(metricName)

        for _ in xrange(60):
            time.sleep(1)
            try:
                metricItem = metricTable.lookup(uid)
            except ItemNotFound as err:
                break
        else:
            self.fail("Metric not deleted from dynamodb")