def testMetricDataForRandomRowID(uid): ''' This tests if the metric data returned by the GET call : _models/<uid>/data has anomaly_score consistent with what is there in the actual database by asserting it against a dao.MetricData.get() call It repeats the process for 5 random sample rows for each uid in the database. Algorithm : - Query the MetricDataHandler GET call for a certain uid - Check if response is OK - Find the last row id for the uid - Select a random row between 1 and last row id - Find the anomaly score for that row id - Assert on the anomaly score ''' response = self.app.get("/%s/data" % uid, headers=self.headers) assertions.assertSuccess(self, response) getAllModelsResult = utils.jsonDecode(response.body) with repository.engineFactory().connect() as conn: lastRowID = repository.getMetric(conn, uid).last_rowid for _ in range(5): randomRowID = randrange(1, lastRowID) with repository.engineFactory().connect() as conn: singleMetricData = repository.getMetricData( conn, uid, rowid=randomRowID).first() metricData = getMetricDataWithRowID(getAllModelsResult['data'], randomRowID) self.assertEqual(metricData[2], singleMetricData.anomaly_score) self.assertEqual( datetime.strptime(metricData[0], '%Y-%m-%d %H:%M:%S'), singleMetricData.timestamp)
def testMetricDataForRandomRowID(uid): ''' This tests if the metric data returned by the GET call : _models/<uid>/data has anomaly_score consistent with what is there in the actual database by asserting it against a dao.MetricData.get() call It repeats the process for 5 random sample rows for each uid in the database. Algorithm : - Query the MetricDataHandler GET call for a certain uid - Check if response is OK - Find the last row id for the uid - Select a random row between 1 and last row id - Find the anomaly score for that row id - Assert on the anomaly score ''' response = self.app.get("/%s/data" %uid, headers=self.headers) assertions.assertSuccess(self, response) getAllModelsResult = utils.jsonDecode(response.body) with repository.engineFactory().connect() as conn: lastRowID = repository.getMetric(conn, uid).last_rowid for _ in range(5): randomRowID = randrange(1, lastRowID) with repository.engineFactory().connect() as conn: singleMetricData = repository.getMetricData( conn, uid, rowid=randomRowID).first() metricData = getMetricDataWithRowID(getAllModelsResult['data'], randomRowID) self.assertEqual(metricData[2], singleMetricData.anomaly_score) self.assertEqual(datetime.strptime(metricData[0], '%Y-%m-%d %H:%M:%S'), singleMetricData.timestamp)
def POST(cls): """Upload the metric info and metric data as a compressed tarfile to S3. The request must include the uid of the metric and may include other JSON keys as well. For instance, it is likely that a request from the mobile application will include information about the current view and data being displayed when the feedback request is sent. Any fields in addition to uid will be stored with the feedback archive file that is uploaded to S3. """ inputData = json.loads(web.data()) # Get the metric uid uid = inputData["uid"] del inputData["uid"] inputData["server_id"] = _MACHINE_ID # Data is written to a temporary directory before uploading path = tempfile.mkdtemp() try: # Retrieve the metric table record and add it to the other input # parameters metricFields = [schema.metric.c.uid, schema.metric.c.datasource, schema.metric.c.name, schema.metric.c.description, schema.metric.c.server, schema.metric.c.location, schema.metric.c.parameters, schema.metric.c.status, schema.metric.c.message, schema.metric.c.last_timestamp, schema.metric.c.poll_interval, schema.metric.c.tag_name, schema.metric.c.last_rowid] with repository.engineFactory().connect() as conn: metricRow = repository.getMetric(conn, uid, metricFields) metric = dict([(col.name, utils.jsonDecode(getattr(metricRow, col.name)) if col.name == "parameters" else getattr(metricRow, col.name)) for col in metricFields]) if metric["tag_name"]: metric["display_name"] = "%s (%s)" % (metric["tag_name"], metric["server"]) else: metric["display_name"] = metric["server"] inputData["metric"] = utils.jsonEncode(metric) metricPath = os.path.join(path, "metric.json") with open(metricPath, "w") as f: json.dump(inputData, f) # Retrieve the metric data with repository.engineFactory().connect() as conn: metricDataRows = repository.getMetricData(conn, uid) metricData = [dict([(col.name, getattr(metricData, col.name)) for col in schema.metric_data.columns]) for metricData in metricDataRows] metricDataPath = os.path.join(path, "metric_data.csv") with open(metricDataPath, "w") as f: writer = csv.writer(f) if len(metricData) > 0: header = metricData[0].keys() # Write the field names first writer.writerow(header) # Then write out the data for each row for dataDict in metricData: row = [dataDict[h] for h in header] writer.writerow(row) # Create a tarfile to upload ts = datetime.datetime.utcnow().strftime("%Y%m%d-%H%M%S") filename = "metric_dump_%s_%s.tar.gz" % (uid, ts) tfPath = os.path.join(path, filename) with tarfile.open(tfPath, "w:gz") as tf: tf.add(metricPath, arcname=os.path.basename(metricPath)) tf.add(metricDataPath, arcname=os.path.basename(metricDataPath)) # Upload the tarfile return cls._uploadTarfile(filename, tfPath) finally: shutil.rmtree(path)
def GET(self, metricId=None): """ Get Model Data :: GET /_models/{model-id}/data?from={fromTimestamp}&to={toTimestamp}&anomaly={anomalyScore}&limit={numOfRows} Parameters: :param limit: (optional) max number of records to return :type limit: int :param from: (optional) return records from this timestamp :type from: timestamp :param to: (optional) return records up to this timestamp :type to: timestamp :param anomaly: anomaly score to filter :type anomaly: float Returns: :: { "data": [ ["2013-08-15 21:34:00", 222, 0.025, 125], ["2013-08-15 21:32:00", 202, 0, 124], ["2013-08-15 21:30:00", 202, 0, 123], ... ], "names": [ "timestamp", "value", "anomaly_score", "rowid ] } """ queryParams = dict(urlparse.parse_qsl(web.ctx.env['QUERY_STRING'])) fromTimestamp = queryParams.get("from") toTimestamp = queryParams.get("to") anomaly = float(queryParams.get("anomaly") or 0.0) limit = int(queryParams.get("limit") or 0) with web.ctx.connFactory() as conn: fields = (schema.metric_data.c.uid, schema.metric_data.c.timestamp, schema.metric_data.c.metric_value, schema.metric_data.c.anomaly_score, schema.metric_data.c.rowid) names = ("names", ) + tuple([ "value" if col.name == "metric_value" else col.name for col in fields ]) if fromTimestamp: sort = schema.metric_data.c.timestamp.asc() else: sort = schema.metric_data.c.timestamp.desc() result = repository.getMetricData(conn, metricId=metricId, fields=fields, fromTimestamp=fromTimestamp, toTimestamp=toTimestamp, score=anomaly, sort=sort) if "application/octet-stream" in web.ctx.env.get('HTTP_ACCEPT', ""): results_per_uid = defaultdict(int) packer = msgpack.Packer() self.addStandardHeaders(content_type='application/octet-stream') web.header('X-Accel-Buffering', 'no') yield packer.pack(names) for row in result: if not limit or (limit and len(results_per_uid[row.uid]) < limit): resultTuple = ( row.uid, calendar.timegm(row.timestamp.timetuple()), row.metric_value, row.anomaly_score, row.rowid, ) yield packer.pack(resultTuple) results_per_uid[row.uid] += 1 else: if metricId is None: output = {} for row in result: uid = row.uid default = {"uid": uid, "data": []} recordTuple = (row.timestamp.strftime("%Y-%m-%d %H:%M:%S"), row.metric_value, row.anomaly_score, row.rowid) metricDataRecord = output.setdefault(uid, default) if not limit or (limit and len(metricDataRecord["data"]) < limit): metricDataRecord["data"].append(recordTuple) results = {"metrics": output.values(), "names": names[2:]} else: if limit: results = { "names": names[2:], "data": [(row.timestamp.strftime("%Y-%m-%d %H:%M:%S"), row.metric_value, row.anomaly_score, row.rowid) for row in itertools.islice(result, 0, limit)] } else: results = { "names": names[2:], "data": [(row.timestamp.strftime("%Y-%m-%d %H:%M:%S"), row.metric_value, row.anomaly_score, row.rowid) for row in result] } self.addStandardHeaders() yield utils.jsonEncode(results)
def POST(cls): """Upload the metric info and metric data as a compressed tarfile to S3. The request must include the uid of the metric and may include other JSON keys as well. For instance, it is likely that a request from the mobile application will include information about the current view and data being displayed when the feedback request is sent. Any fields in addition to uid will be stored with the feedback archive file that is uploaded to S3. """ inputData = json.loads(web.data()) # Get the metric uid uid = inputData["uid"] del inputData["uid"] inputData["server_id"] = _MACHINE_ID # Data is written to a temporary directory before uploading path = tempfile.mkdtemp() try: # Retrieve the metric table record and add it to the other input # parameters metricFields = [ schema.metric.c.uid, schema.metric.c.datasource, schema.metric.c.name, schema.metric.c.description, schema.metric.c.server, schema.metric.c.location, schema.metric.c.parameters, schema.metric.c.status, schema.metric.c.message, schema.metric.c.last_timestamp, schema.metric.c.poll_interval, schema.metric.c.tag_name, schema.metric.c.last_rowid ] with repository.engineFactory().connect() as conn: metricRow = repository.getMetric(conn, uid, metricFields) metric = dict([ (col.name, utils.jsonDecode(getattr(metricRow, col.name)) if col.name == "parameters" else getattr(metricRow, col.name)) for col in metricFields ]) if metric["tag_name"]: metric["display_name"] = "%s (%s)" % (metric["tag_name"], metric["server"]) else: metric["display_name"] = metric["server"] inputData["metric"] = utils.jsonEncode(metric) metricPath = os.path.join(path, "metric.json") with open(metricPath, "w") as f: json.dump(inputData, f) # Retrieve the metric data with repository.engineFactory().connect() as conn: metricDataRows = repository.getMetricData(conn, uid) metricData = [ dict([(col.name, getattr(metricData, col.name)) for col in schema.metric_data.columns]) for metricData in metricDataRows ] metricDataPath = os.path.join(path, "metric_data.csv") with open(metricDataPath, "w") as f: writer = csv.writer(f) if len(metricData) > 0: header = metricData[0].keys() # Write the field names first writer.writerow(header) # Then write out the data for each row for dataDict in metricData: row = [dataDict[h] for h in header] writer.writerow(row) # Create a tarfile to upload ts = datetime.datetime.utcnow().strftime("%Y%m%d-%H%M%S") filename = "metric_dump_%s_%s.tar.gz" % (uid, ts) tfPath = os.path.join(path, filename) with tarfile.open(tfPath, "w:gz") as tf: tf.add(metricPath, arcname=os.path.basename(metricPath)) tf.add(metricDataPath, arcname=os.path.basename(metricDataPath)) # Upload the tarfile return cls._uploadTarfile(filename, tfPath) finally: shutil.rmtree(path)
def setUpClass(cls): """ Setup steps for all test cases. Focus for these is to cover all API checks for ModelDataHandler. Hence, this does all setup creating metric, waiting for metricData across all testcases, all API call for querying metricData will be against single metric created in setup Setup Process 1) Update conf with aws credentials, ManagedTempRepository will not work in this test 2) Select test instance such that its running from longer time, We are using instance older than 15 days 3) Create Metric, wait for min metricData rows to become available Set to 100, configurable 4) Pick testRowId, set it lower value this will make sure to have Non NULL value for anomaly_score field for given row while invoking GET with consitions, set to 5 5) Decide queryParams for anomalyScore, to and from timestamp """ cls.headers = getDefaultHTTPHeaders(grok.app.config) # All other sevices needs AWS credentials to work # Set AWS credentials grok.app.config.loadConfig() # Select test instance such that its running from longer time g_logger.info("Getting long-running EC2 Instances") instances = aws_utils.getLongRunningEC2Instances("us-west-2", grok.app.config.get("aws", "aws_access_key_id"), grok.app.config.get("aws", "aws_secret_access_key"), 15) testInstance = instances[randrange(1, len(instances))] createModelData = { "region": "us-west-2", "namespace": "AWS/EC2", "datasource": "cloudwatch", "metric": "CPUUtilization", "dimensions": { "InstanceId": testInstance.id } } # Number of minimum rows cls.minDataRows = 100 cls.app = TestApp(models_api.app.wsgifunc()) # create test metric g_logger.info("Creating test metric; modelSpec=%s", createModelData) response = cls.app.put("/", utils.jsonEncode(createModelData), headers=cls.headers) postResult = utils.jsonDecode(response.body) maxWaitTime = 600 waitTimeMetricData = 0 waitAnomalyScore = 0 # Wait for enough metric data to be available cls.uid = postResult[0]["uid"] engine = repository.engineFactory() with engine.connect() as conn: cls.metricData = [row for row in repository.getMetricData(conn, cls.uid)] with engine.connect() as conn: cls.testMetric = repository.getMetric(conn, cls.uid) # Confirm that we have enough metricData g_logger.info("Waiting for metric data") while (len(cls.metricData) < cls.minDataRows and waitTimeMetricData < maxWaitTime): g_logger.info("not ready, waiting for metric data: got %d of %d ...", len(cls.metricData), cls.minDataRows) time.sleep(5) waitTimeMetricData += 5 with engine.connect() as conn: cls.metricData = [row for row in repository.getMetricData(conn, cls.uid)] # taking lower value for testRowId, this will make sure to have # Non NULL value for anomaly_score field for given row cls.testRowId = 5 with engine.connect() as conn: cls.testMetricRow = (repository.getMetricData(conn, cls.uid, rowid=cls.testRowId) .fetchone()) # Make sure we did not receive None etc for anomaly score g_logger.info("cls.testMetricRow.anomaly_score=%r", cls.testMetricRow.anomaly_score) g_logger.info("waitAnomalyScore=%r", waitAnomalyScore) while (cls.testMetricRow.anomaly_score is None and waitAnomalyScore < maxWaitTime): g_logger.info("anomaly_score not ready, sleeping...") time.sleep(5) waitAnomalyScore += 5 with engine.connect() as conn: cls.testMetricRow = (repository.getMetricData(conn, cls.uid, rowid=cls.testRowId) .fetchone()) # Decide queryParams for anomalyScore, to and from timestamp cls.testAnomalyScore = cls.testMetricRow.anomaly_score cls.testTimeStamp = cls.testMetricRow.timestamp
def GET(self, metricId=None): """ Get Model Data :: GET /_models/{model-id}/data?from={fromTimestamp}&to={toTimestamp}&anomaly={anomalyScore}&limit={numOfRows} Parameters: :param limit: (optional) max number of records to return :type limit: int :param from: (optional) return records from this timestamp :type from: timestamp :param to: (optional) return records up to this timestamp :type to: timestamp :param anomaly: anomaly score to filter :type anomaly: float Returns: :: { "data": [ ["2013-08-15 21:34:00", 222, 0.025, 125], ["2013-08-15 21:32:00", 202, 0, 124], ["2013-08-15 21:30:00", 202, 0, 123], ... ], "names": [ "timestamp", "value", "anomaly_score", "rowid ] } """ queryParams = dict(urlparse.parse_qsl(web.ctx.env['QUERY_STRING'])) fromTimestamp = queryParams.get("from") toTimestamp = queryParams.get("to") anomaly = float(queryParams.get("anomaly") or 0.0) limit = int(queryParams.get("limit") or 0) with web.ctx.connFactory() as conn: fields = (schema.metric_data.c.uid, schema.metric_data.c.timestamp, schema.metric_data.c.metric_value, schema.metric_data.c.anomaly_score, schema.metric_data.c.rowid) names = ("names",) + tuple(["value" if col.name == "metric_value" else col.name for col in fields]) if fromTimestamp: sort = schema.metric_data.c.timestamp.asc() else: sort = schema.metric_data.c.timestamp.desc() result = repository.getMetricData(conn, metricId=metricId, fields=fields, fromTimestamp=fromTimestamp, toTimestamp=toTimestamp, score=anomaly, sort=sort) if "application/octet-stream" in web.ctx.env.get('HTTP_ACCEPT', ""): results_per_uid = defaultdict(int) packer = msgpack.Packer() self.addStandardHeaders(content_type='application/octet-stream') web.header('X-Accel-Buffering', 'no') yield packer.pack(names) for row in result: if not limit or (limit and len(results_per_uid[row.uid]) < limit): resultTuple = ( row.uid, calendar.timegm(row.timestamp.timetuple()), row.metric_value, row.anomaly_score, row.rowid, ) yield packer.pack(resultTuple) results_per_uid[row.uid] += 1 else: if metricId is None: output = {} for row in result: uid = row.uid default = {"uid": uid, "data": []} recordTuple = ( row.timestamp.strftime("%Y-%m-%d %H:%M:%S"), row.metric_value, row.anomaly_score, row.rowid ) metricDataRecord = output.setdefault(uid, default) if not limit or (limit and len(metricDataRecord["data"]) < limit): metricDataRecord["data"].append(recordTuple) results = { "metrics": output.values(), "names": names[2:] } else: if limit: results = {"names": names[2:], "data": [(row.timestamp.strftime("%Y-%m-%d %H:%M:%S"), row.metric_value, row.anomaly_score, row.rowid) for row in itertools.islice(result, 0, limit)]} else: results = {"names": names[2:], "data": [(row.timestamp.strftime("%Y-%m-%d %H:%M:%S"), row.metric_value, row.anomaly_score, row.rowid) for row in result]} self.addStandardHeaders() yield utils.jsonEncode(results)
def setUpClass(cls): """ Setup steps for all test cases. Focus for these is to cover all API checks for ModelDataHandler. Hence, this does all setup creating metric, waiting for metricData across all testcases, all API call for querying metricData will be against single metric created in setup Setup Process 1) Update conf with aws credentials, ManagedTempRepository will not work in this test 2) Select test instance such that its running from longer time, We are using instance older than 15 days 3) Create Metric, wait for min metricData rows to become available Set to 100, configurable 4) Pick testRowId, set it lower value this will make sure to have Non NULL value for anomaly_score field for given row while invoking GET with consitions, set to 5 5) Decide queryParams for anomalyScore, to and from timestamp """ cls.headers = getDefaultHTTPHeaders(grok.app.config) # All other sevices needs AWS credentials to work # Set AWS credentials grok.app.config.loadConfig() # Select test instance such that its running from longer time g_logger.info("Getting long-running EC2 Instances") instances = aws_utils.getLongRunningEC2Instances( "us-west-2", grok.app.config.get("aws", "aws_access_key_id"), grok.app.config.get("aws", "aws_secret_access_key"), 15) testInstance = instances[randrange(1, len(instances))] createModelData = { "region": "us-west-2", "namespace": "AWS/EC2", "datasource": "cloudwatch", "metric": "CPUUtilization", "dimensions": { "InstanceId": testInstance.id } } # Number of minimum rows cls.minDataRows = 100 cls.app = TestApp(models_api.app.wsgifunc()) # create test metric g_logger.info("Creating test metric; modelSpec=%s", createModelData) response = cls.app.put("/", utils.jsonEncode(createModelData), headers=cls.headers) postResult = utils.jsonDecode(response.body) maxWaitTime = 600 waitTimeMetricData = 0 waitAnomalyScore = 0 # Wait for enough metric data to be available cls.uid = postResult[0]["uid"] engine = repository.engineFactory() with engine.connect() as conn: cls.metricData = [ row for row in repository.getMetricData(conn, cls.uid) ] with engine.connect() as conn: cls.testMetric = repository.getMetric(conn, cls.uid) # Confirm that we have enough metricData g_logger.info("Waiting for metric data") while (len(cls.metricData) < cls.minDataRows and waitTimeMetricData < maxWaitTime): g_logger.info( "not ready, waiting for metric data: got %d of %d ...", len(cls.metricData), cls.minDataRows) time.sleep(5) waitTimeMetricData += 5 with engine.connect() as conn: cls.metricData = [ row for row in repository.getMetricData(conn, cls.uid) ] # taking lower value for testRowId, this will make sure to have # Non NULL value for anomaly_score field for given row cls.testRowId = 5 with engine.connect() as conn: cls.testMetricRow = (repository.getMetricData( conn, cls.uid, rowid=cls.testRowId).fetchone()) # Make sure we did not receive None etc for anomaly score g_logger.info("cls.testMetricRow.anomaly_score=%r", cls.testMetricRow.anomaly_score) g_logger.info("waitAnomalyScore=%r", waitAnomalyScore) while (cls.testMetricRow.anomaly_score is None and waitAnomalyScore < maxWaitTime): g_logger.info("anomaly_score not ready, sleeping...") time.sleep(5) waitAnomalyScore += 5 with engine.connect() as conn: cls.testMetricRow = (repository.getMetricData( conn, cls.uid, rowid=cls.testRowId).fetchone()) # Decide queryParams for anomalyScore, to and from timestamp cls.testAnomalyScore = cls.testMetricRow.anomaly_score cls.testTimeStamp = cls.testMetricRow.timestamp