class TestPrometheusConnectWithMockedNetwork(BaseMockedNetworkTestcase): """ Network is blocked in this testcase, see base class """ def setUp(self): self.pc = PrometheusConnect(url='http://doesnt_matter.xyz', disable_ssl=True) def test_network_is_blocked(self): resp = requests.get('https://google.com') self.assertEqual(resp.status_code, 403) self.assertEqual(resp.text, 'BOOM!') def test_how_mock_prop_works(self): with self.mock_response('kekekeke', status_code=500) as handler: self.assertEqual(len(handler.requests), 0) resp = requests.get('https://redhat.com') self.assertEqual(resp.status_code, 500) self.assertEqual(resp.text, 'kekekeke') self.assertEqual(len(handler.requests), 1) request = handler.requests[0] self.assertEqual(request.url, 'https://redhat.com/') def test_unauthorized(self): with self.mock_response("Unauthorized", status_code=403): with self.assertRaises(PrometheusApiClientException) as exc: self.pc.all_metrics() self.assertEqual("HTTP Status Code 403 (b'Unauthorized')", str(exc.exception)) def test_broken_responses(self): with self.assertRaises(PrometheusApiClientException) as exc: self.pc.all_metrics() self.assertEqual("HTTP Status Code 403 (b'BOOM!')", str(exc.exception)) with self.assertRaises(PrometheusApiClientException) as exc: self.pc.get_current_metric_value("metric") self.assertEqual("HTTP Status Code 403 (b'BOOM!')", str(exc.exception)) with self.assertRaises(PrometheusApiClientException) as exc: self.pc.get_metric_range_data("metric") self.assertEqual("HTTP Status Code 403 (b'BOOM!')", str(exc.exception)) with self.assertRaises(PrometheusApiClientException) as exc: self.pc.custom_query_range("query", datetime.now(), datetime.now(), "1") self.assertEqual("HTTP Status Code 403 (b'BOOM!')", str(exc.exception)) with self.assertRaises(PrometheusApiClientException) as exc: self.pc.custom_query("query") self.assertEqual("HTTP Status Code 403 (b'BOOM!')", str(exc.exception)) def test_all_metrics_method(self): all_metrics_payload = {"status": "success", "data": ["up", "alerts"]} with self.mock_response(all_metrics_payload) as handler: self.assertTrue(len(self.pc.all_metrics())) self.assertEqual(handler.call_count, 1) request = handler.requests[0] self.assertEqual(request.path_url, "/api/v1/label/__name__/values")
def collect_metrics(configuration: Configuration, sli_report: SLIReport): """Collect metrics from Prometheus/Thanos.""" if not _DRY_RUN: pc = PrometheusConnect( url=configuration.thanos_url, headers={"Authorization": f"bearer {configuration.thanos_token}"}, disable_ssl=True, ) collected_info = {} for sli_name, sli_methods in sli_report.report_sli_context.items(): _LOGGER.info(f"Retrieving data for... {sli_name}") collected_info[sli_name] = {} for query_name, query_inputs in sli_methods["query"].items(): requires_range = False if isinstance(query_inputs, dict): query = query_inputs["query"] requires_range = query_inputs["requires_range"] action_type = query_inputs["type"] else: query = query_inputs _LOGGER.info(f"Querying... {query_name}") _LOGGER.info(f"Using query... {query}") try: if not _DRY_RUN: if requires_range: metric_data = pc.custom_query_range( query=query, start_time=configuration.start_time, end_time=configuration.end_time, step=configuration.step, ) else: metric_data = pc.custom_query(query=query) _LOGGER.info(f"Metric obtained... {metric_data}") if requires_range: metrics_vector = [ float(v[1]) for v in metric_data[0]["values"] if float(v[1]) > 0 ] result = manipulate_retrieved_metrics_vector( metrics_vector=metrics_vector, action=action_type) collected_info[sli_name][query_name] = result else: collected_info[sli_name][query_name] = float( metric_data[0]["value"][1]) else: metric_data = [{ "metric": "dry run", "value": [datetime.datetime.utcnow(), 0] }] result = float(metric_data[0]["value"][1]) collected_info[sli_name][query_name] = result except Exception as e: _LOGGER.exception( f"Could not gather metric for {sli_name}-{query_name}...{e}" ) pass collected_info[sli_name][query_name] = "ErrorMetricRetrieval" return collected_info
class get_prometheus_data: def __init__(self, action): self.sample_info_dict = action self.uuid = action["uuid"] self.user = action["user"] self.cluster_name = action["cluster_name"] self.test_config = action["test_config"] # change datetime in seconds string to datetime object starttime = datetime.fromtimestamp( int(self.sample_info_dict["starttime"])) self.start = starttime # change datetime in seconds string to datetime object endtime = datetime.fromtimestamp(int(self.sample_info_dict["endtime"])) self.end = endtime # step value to be used in prometheus query # default is 30 seconds(openshift default scraping interval) # but can be overridden with env if "prom_step" in os.environ: self.T_Delta = os.environ["prom_step"] else: self.T_Delta = 30 self.get_data = False if "prom_token" in os.environ and "prom_url" in os.environ: self.get_data = True token = os.environ["prom_token"] self.url = os.environ["prom_url"] bearer = "Bearer " + token self.headers = {"Authorization": bearer} self.pc = PrometheusConnect(url=self.url, headers=self.headers, disable_ssl=True) else: logger.warn( """snafu service account token and prometheus url not set \n No Prometheus data will be indexed""") def get_all_metrics(self): # check get_data bool, if false by-pass all processing if self.get_data: start_time = time.time() # resolve directory the tool include file dirname = os.path.dirname(os.path.realpath(__file__)) include_file_dir = os.path.join(dirname, "prometheus_labels/") tool_include_file = include_file_dir + self.sample_info_dict[ "tool"] + "_included_labels.json" # check if tools include file is there # if not use the default include file if os.path.isfile(tool_include_file): filename = tool_include_file else: filename = os.path.join(include_file_dir, "included_labels.json") logger.info("using prometheus metric include file %s" % filename) # open tools include file and loop through all with open(filename, "r") as f: datastore = json.load(f) for metric_name in datastore["data"]: query_item = datastore["data"][metric_name] query = query_item["query"] label = query_item["label"] step = str(self.T_Delta) + "s" try: # Execute custom query to pull the desired labels between X and Y time. response = self.pc.custom_query_range( query, self.start, self.end, step, None) except Exception as e: # response undefined at this point, we want to skip next for loop response = [] logger.info(query) logger.warn("failure to get metric results %s" % e) for result in response: # clean up name key from __name__ to name result["metric"]["name"] = "" if "__name__" in result["metric"]: result["metric"]["name"] = result["metric"]["__name__"] del result["metric"]["__name__"] else: result["metric"]["name"] = label # each result has a list, we must flatten it out in order to send to ES for value in result["values"]: # fist index is time stamp timestamp = datetime.utcfromtimestamp( value[0]).strftime("%Y-%m-%dT%H:%M:%S.%fZ") # second index is value of metric if "NaN" in value[ 1]: # need to handle values that are NaN, Inf, or -Inf metric_value = 0 else: metric_value = float(value[1]) flat_doc = { "metric": result["metric"], "Date": timestamp, "value": metric_value, "metric_name": metric_name, } flat_doc.update(self.sample_info_dict) yield flat_doc logger.debug("Total Time --- %s seconds ---" % (time.time() - start_time))
def query_prom_data_range(svc_names, query_fn, start_time, end_time, sampling_rate=1, is_summary=False, url="http://vmhost1.local:9090"): """Query Prometheus metric data for customized services during customized time range. Params: svc_names: service metric names query_fn: function to construct the Prometheus query string from the service name. start_time: start time. A datetime.datetime object. end_time: same as start. A datetime.datetime object. sampling_rate: float, in seconds. is_summary: Boolean to represent whether the query is a summary with quantiles. Returns: all_metric_data: A dict of all metric data. Keys are service names. Values are dict containing timestamps and values (If is_summary is True, there are multiple timestamp and value items). """ def append_data(d, key, l): if key in d: d[key].append(l) else: d[key] = [l] prom = PrometheusConnect(url=url, disable_ssl=True) all_metric_data = {} for n in svc_names: query = query_fn(n) # Split into 3-hour batch and get one batch at a time. batch_len = datetime.timedelta(hours=3) batch_start = start_time batch_end = start_time + batch_len timestamps_dict = {} values_dict = {} metric_info = None while batch_start < end_time: if batch_end >= end_time: batch_end = end_time metric_data = prom.custom_query_range(query=query, start_time=batch_start, end_time=batch_end, step=sampling_rate) # Sometimes there are no metric data within the range. Skip processing. if len(metric_data) > 0: if metric_info is None: metric_info = {} metric_info['metric'] = metric_data[0]['metric'].copy() for one_data in metric_data: raw_values = np.array(one_data['values'], dtype=np.float64) # Retrive multiple time series data for different quantiles. if is_summary is True: # Remove quantile from metric info. metric_info['metric'].pop('quantile', None) key = 'q' + one_data['metric']['quantile'] else: # Only one time series key = 'data' append_data(timestamps_dict, key, raw_values[:, 0]) append_data(values_dict, key, raw_values[:, 1]) # Because the previous range [batch_start, batch_end] is inclusive at both ends. # We move to the next timestamp here. batch_start = batch_end + datetime.timedelta(seconds=sampling_rate) batch_end = batch_start + batch_len def concat(d, name, conv_type=np.float64): for k, v in d.items(): merged_v = np.concatenate(v).astype(conv_type) metric_info[f'{name}_{k}'] = merged_v concat(timestamps_dict, 'timestamps', conv_type=np.int64) concat(values_dict, 'values') all_metric_data[n] = metric_info return all_metric_data