def pushToPrometheus(c, g, h, executionTime, serviceName, address, job, registry): c.labels(grpc_type = 'unary', grpc_service = serviceName, grpc_method = job).inc() g.labels(grpc_type = 'unary', grpc_service = serviceName, grpc_method = job).set_to_current_time() h.labels(grpc_type = 'unary', grpc_service = serviceName, grpc_method = job).observe(executionTime) prometheus.push_to_gateway(address, job=job, registry=registry) logger.info("Succesfully pushed metrics")
def cli(cleanup_namespace: str, verbose: bool = False): """Operator handling Thoth's workloads.""" if verbose: _LOGGER.setLevel(logging.DEBUG) _LOGGER.info("Thoth Cleanup Job v%s starting...", __version__) _LOGGER.info("Cleanup will be performed in namespace %r", cleanup_namespace) _METRIC_INFO.labels(__version__).inc() with _METRIC_RUNTIME.time(): try: _do_cleanup(cleanup_namespace) finally: if _THOTH_METRICS_PUSHGATEWAY_URL: try: _LOGGER.info( "Submitting metrics to Prometheus pushgateway %r", _THOTH_METRICS_PUSHGATEWAY_URL) push_to_gateway(_THOTH_METRICS_PUSHGATEWAY_URL, job="cleanup", registry=_PROMETHEUS_REGISTRY) except Exception as exc: _LOGGER.exception( "An error occurred pushing the metrics: %s", exc)
def notes_detail(key): """ Retrieve, update or delete note instances. """ if request.method == 'PUT': note = str(request.data.get('text', '')) notes[key] = note mysql_connection = mysql.connection.cursor() mysql_connection.execute('''SELECT testRunId FROM testresultparent ORDER BY updatedAt DESC LIMIT 1;''') testrunid_data = mysql_connection.fetchall() rm_tuple = testrunid_data[0] rm = rm_tuple[0] testrunid = str(rm) getMetrics() push_to_gateway(prome, job=testrunid, registry=registry) return note_repr(key) elif request.method == 'DELETE': notes.pop(key, None) mysql_connection = mysql.connection.cursor() mysql_connection.execute('''SELECT testRunId FROM testresultparent ORDER BY updatedAt DESC LIMIT 1;''') testrunid_data = mysql_connection.fetchall() rm_tuple = testrunid_data[0] rm = rm_tuple[0] testrunid = str(rm) getMetrics() push_to_gateway(prome, job=testrunid, registry=registry) return 'Deleted successfully'
def push_job_finished_metric(endpoint, namespace, job, runtime): try: bigtable_backup_job_last_success_seconds.set_to_current_time() bigtable_backup_job_runtime_seconds.set(runtime) push_to_gateway(endpoint, job="{}/{}".format(namespace, job), registry=registry) except Exception as e: print("failed to push metrics with error {}".format(e))
def push_date_gateway(): registry = CollectorRegistry() g = Gauge('job_last_success_unixtime', 'Last time a batch job successfully finished', registry=registry) g.set("1005521") push_to_gateway(GATEWAY, job='batchA', registry=registry)
def push_metrics(endpoint, namespace, job): try: push_to_gateway(endpoint, job="{}/{}".format(namespace, job), registry=registry) except Exception as e: print("failed to push metrics with error {}".format(e))
def close(): # Push the results somewhere useful... if PUSH_GATEWAY: push_to_gateway(PUSH_GATEWAY, job=JOB_NAME, registry=registry) else: print( "WARNING! No Push Gateway set, so no metrics have been published.")
def push_data(instance, class_, host, value: float): g = Gauge(class_, 'virtual machine statistic data', labelnames=("instance_name", "class", "instance"), registry=registry) g.labels(instance, "usage", host).set(value) push_to_gateway('172.18.0.21:9091', job='vm_metrics', registry=registry)
def _send_metrics(): """Send metrics to pushgateway.""" pushgateway_url = THOTH_METRICS_PUSHGATEWAY_URL deployment_name = THOTH_DEPLOYMENT_NAME component_name = "graph-backup-job" if deployment_name: database_schema_revision_script.labels( component_name, GraphDatabase().get_script_alembic_version_head(), deployment_name).inc() else: _LOGGER.warning("THOTH_DEPLOYMENT_NAME env variable is not set.") if pushgateway_url and deployment_name: try: _LOGGER.debug( f"Submitting metrics to Prometheus pushgateway {pushgateway_url}" ) push_to_gateway( pushgateway_url, job=component_name, registry=prometheus_registry, ) except Exception as e: _LOGGER.exception( f"An error occurred pushing the metrics: {str(e)}") else: _LOGGER.warning("PROMETHEUS_PUSHGATEWAY_URL env variable is not set.")
def main(): """Perform graph refresh job.""" _LOGGER.info(f"Thoth graph-refresh-job v{__service_version__}") _LOGGER.debug("Debug mode is on") with _METRIC_RUNTIME.time(): if not bool(int(os.getenv("GRAPH_REFRESH_NO_SOLVERS", 0))): graph_refresh_solver() else: _LOGGER.warning( "Skipping scheduling of solvers based on user configuration" ) if _THOTH_METRICS_PUSHGATEWAY_URL: try: _LOGGER.debug( f"Submitting metrics to Prometheus pushgateway {_THOTH_METRICS_PUSHGATEWAY_URL}" ) push_to_gateway( _THOTH_METRICS_PUSHGATEWAY_URL, job="graph-refresh", registry=prometheus_registry, ) except Exception as e: _LOGGER.exception(f"An error occurred pushing the metrics: {str(e)}")
def push_prometheus(self): logging.info('check push') while True: try: logging.info('begin push to %s' % self.promethus_url) self.requests_total.labels('female', 'pushgateway').inc() prometheus_client.push_to_gateway(self.promethus_url, job=self.job, registry=self.registry, timeout=3) # 设置3秒超时 # 将所有的error码的统计结果清空 for label_text in self.requests_total._metrics: self.requests_total._metrics[label_text].set(0) logging.info('push success') print('push success') # 卸载所有搜集器 # for register in list(self.registry._collector_to_names): # self.registry.unregister(register) except Exception as e: logging.error('push_to_gateway error %s' % e) self.push_time = time.time() time.sleep(0.3) # 每30秒钟push一次
def push_metrics(self): job = '{job_key}_{random}'.format(job_key=self.job_key, random=random.randint( 1000000000000, 100000000000000000)) push_to_gateway(self.pushgateway_url, job=job, registry=self.registry) return job
def gauge_example(): registry = CollectorRegistry() g = Gauge('test_gauge', 'Description of gauge', registry=registry) g.set(500) push_to_gateway('192.168.10.3:9091', job=Gauge.__name__.lower(), registry=registry)
def push_metrics(self, stat_keys=[]): registry = CollectorRegistry() # Record the task completion timestamp, good for checking all is well: g = Gauge('ukwa_task_event_timestamp', 'Timestamp of this task event, labelled by task status.', labelnames=['task_namespace', 'status'], registry=registry) g.labels(task_namespace=self.event_name, status=self.status).set_to_current_time() # For every key passed in above, pick it out of the event stats and add it: for key in stat_keys: if key in self.event: g = Gauge(f"ukwa_task_{key}", 'Outcome of this task, labelled by task status.', labelnames=['task_namespace', 'status'], registry=registry) g.labels(task_namespace=self.event_name, status=self.status).set(self.event[key]) else: logger.warning(f"Could not find metrics key '{key}'") if os.environ.get("PUSH_GATEWAY"): push_to_gateway(os.environ.get("PUSH_GATEWAY"), job=self.event_name, registry=registry) else: logger.error("No metrics PUSH_GATEWAY configured!")
def get_user_count(): token = get_token() auth = JWTAuth(token) url = "https://lackadaisical-tip.glitch.me/active-users" start = time.time() response = requests.get(url, auth=auth) logger.info("op={:4s}, status={}, url={}".format('GET', response.status_code, url)) duration = time.time() - start respTime.set(duration) # json={u'activeUsers': 8596} result = -1 if response.json() and 'activeUsers' in response.json(): green.set_to_current_time() result = response.json().get('activeUsers', -1) userCount.set(result) else: red.set_to_current_time() errCount.inc() # if prom.push_to_gateway('localhost:9091', job='api_active_users', registry=registry) return result
def push_report(self): registry = CollectorRegistry() print("\nStarting the exporter...") failed_requests, success_requests = self.get_tested_endpoints() print("\nCollected list of all requests...", failed_requests, success_requests) if not failed_requests and not success_requests: return test_results_gauge = Gauge( 'API_Automation_requests_results', 'Requests that were made in testing grouped by labels', ['test_suite', 'method', 'endpoint', 'result'], registry=registry) for request in failed_requests: test_results_gauge.labels(test_suite=request['test_suite'], method=request['method'], endpoint=request['endpoint'], result="failed").inc() for request in success_requests: test_results_gauge.labels(test_suite=request['test_suite'], method=request['method'], endpoint=request['endpoint'], result="success").inc() print("\nPushing to Gateway...") push_to_gateway(PUSH_GATE_WAY, job=SERVICE_NAME, registry=registry) return len(failed_requests) + len( success_requests), success_requests, failed_requests
def notes_list(): """ List or create notes. """ if request.method == 'POST': note = str(request.data.get('text', '')) idx = max(notes.keys()) + 1 notes[idx] = note mysql_connection = mysql.connection.cursor() mysql_connection.execute('''SELECT testRunId FROM testresultparent ORDER BY updatedAt DESC LIMIT 1;''') testrunid_data = mysql_connection.fetchall() rm_tuple = testrunid_data[0] rm = rm_tuple[0] testrunid = str(rm) getMetrics() push_to_gateway(prome, job=testrunid, registry=registry) return "successful" elif request.method == 'GET': mysql_connection = mysql.connection.cursor() mysql_connection.execute('''SELECT testRunId FROM testresultparent ORDER BY updatedAt DESC LIMIT 1;''') testrunid_data = mysql_connection.fetchall() rm_tuple = testrunid_data[0] rm = rm_tuple[0] testrunid = str(rm) getMetrics() push_to_gateway(prome, job=testrunid, registry=registry) return [note_repr(idx) for idx in sorted(notes.keys())]
def _publish(self): """ push metrics out to a place where prometheus can scrape. """ prometheus_client.push_to_gateway("push_to_gateway:9091", job="BenchmarksHook", registry=self.registry)
def push(self, job): gid = {} gid['instance'] = self.instance push_to_gateway(self.url, job=job, registry=self.registry, grouping_key=gid)
def counter_example(): registry = CollectorRegistry() c = Counter('test_counter', 'Description of counter', registry=registry) c.inc(random.randint(0, 20)) push_to_gateway('192.168.10.3:9091', job=Counter.__name__.lower(), registry=registry)
def print_hello(): registry = CollectorRegistry() c = Counter('count_exceptions', 'counts number of successes and failures', labelnames=['type'], registry=registry) s = Summary('time_delta', 'execution time of print_hello function', registry=registry) for i in range(randint(1, 10)): start = timer() time.sleep(random()*10) try: if randint(0, 1) == 1: raise Exception c.labels(type='success').inc() except: c.labels(type='failure').inc() end = timer() s.observe(timedelta(seconds=end - start).seconds) push_to_gateway('%s:9091' % GATEWAY, job='print_hello', registry=registry) return 'Hello world!'
def summary_example(): registry = CollectorRegistry() s = Summary('test_summary', 'Description of summary', registry=registry) s.observe(random.randint(0, 20)) push_to_gateway('192.168.10.3:9091', job=Summary.__name__.lower(), registry=registry)
def on_task_retried(self, event): # TODO self.state.event(event) task = self.state.tasks.get(event['uuid']) logger.warning('Task {}[{}] retried'.format(task.name, task.uuid)) self.tasks_counter.labels(worker=task.hostname, task=task.name, result='retried').inc() self.tasks_info.info({ 'name': task.name, 'uuid': task.uuid, 'result': 'retried', 'exception': task.exception, 'traceback': task.traceback, 'hostname': task.hostname, 'timestamp': str(task.timestamp) }) push_to_gateway(self.gateway, job='pushgateway', registry=self.tasks_counter_c, handler=self.auth_handler) push_to_gateway(self.gateway, job='pushgateway', registry=self.tasks_info_c, handler=self.auth_handler)
def save_data(text): print('Particles > 2.5: {}'.format(text[10])) print('Particles > 10: {}'.format(text[12])) gpm2_5.set(text[10]) gpm10.set(text[12]) push_to_gateway('localhost:9091', job='pms5003', registry=registry) return
def test_push_with_redirect_handler(self): def my_redirect_handler(url, method, timeout, headers, data): return passthrough_redirect_handler(url, method, timeout, headers, data) push_to_gateway(self.address, "my_job_with_redirect", self.registry, handler=my_redirect_handler) self.assertEqual(self.requests[0][0].command, 'PUT') self.assertEqual(self.requests[0][0].path, '/metrics/job/my_job_with_redirect') self.assertEqual(self.requests[0][0].headers.get('content-type'), CONTENT_TYPE_LATEST) self.assertEqual(self.requests[0][1], b'# HELP g help\n# TYPE g gauge\ng 0.0\n') # ensure the redirect preserved request settings from the initial request. self.assertEqual(self.requests[0][0].command, self.requests[1][0].command) self.assertEqual(self.requests[0][0].headers.get('content-type'), self.requests[1][0].headers.get('content-type')) self.assertEqual(self.requests[0][1], self.requests[1][1]) # ensure the redirect took place at the expected redirect location. self.assertEqual(self.requests[1][0].path, "/" + self.redirect_flag)
def main(): parser = argparse.ArgumentParser( prog="pyxbackup-prom-stats", formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument("--pushgw", default=DEFAULT_PUSH_GATEWAY, help="Address of the pushgateway to publish to. If " "set to '-' it will print the metrics to stdout instead.") parser.add_argument("--job", default=DEFAULT_JOB_NAME, help="Pushgateway job name.") parser.add_argument("-v", action="store_true", help="Print some information to stdout.") args = parser.parse_args() level = logging.WARNING if args.v: level = logging.INFO logging.basicConfig( format='[%(asctime)s] %(message)s', level=level) registry = setup_metrics() logging.info("Started") def_labels = {'instance': localhost} process_input(def_labels) logging.info("Finished reading output") if args.pushgw == "-": print(generate_latest(registry).decode("utf-8")) else: logging.info("publishing to pushgateway @ %s", args.pushgw) push_to_gateway(args.pushgw, job=args.job, registry=registry)
def run(self): agg_url = self._app.config.get("PROMETHEUS_PUSHGATEWAY_URL") while True: # Practically disable this worker, if there is no pushgateway. if agg_url is None or os.getenv("TEST", "false").lower() == "true": time.sleep(ONE_DAY_IN_SECONDS) continue time.sleep(PROMETHEUS_PUSH_INTERVAL_SECONDS) try: push_to_gateway( agg_url, job=self._app.config.get("PROMETHEUS_NAMESPACE", "quay"), registry=REGISTRY, grouping_key=process_grouping_key(), ) logger.debug( "pushed registry to pushgateway at %s with grouping key %s", agg_url, process_grouping_key(), ) except urllib2.URLError: # There are many scenarios when the gateway might not be running. # These could be testing scenarios or simply processes racing to start. # Rather than try to guess all of them, keep it simple and let it fail. if os.getenv("DEBUGLOG", "false").lower() == "true": logger.exception( "failed to push registry to pushgateway at %s with grouping key %s", agg_url, process_grouping_key(), ) else: pass
def handleNotification(self, cHandle, data): temp = round( (int.from_bytes(data[0:2], byteorder="little", signed=True) / 100) * 1.8 + 32, 3, ) print(f"Temperature: {temp}") # self.temperature.set_to_current_time() self.temperature.labels(sensor_name=self.label).set(temp) humidity = int.from_bytes(data[2:3], byteorder="little") print(f"Humidity: {humidity}") self.humidity.labels(sensor_name=self.label).set(humidity) voltage = int.from_bytes(data[3:5], byteorder="little") / 1000.0 print(f"Battery voltage: {voltage}") self.battery_voltage.labels(sensor_name=self.label).set(voltage) batteryLevel = min( int(round((voltage - 2.1), 2) * 100), 100 ) # 3.1 or above --> 100% 2.1 --> 0 % print("Battery level:", batteryLevel) self.battery_level.labels(sensor_name=self.label).set(batteryLevel) try: push_to_gateway( PROMETHEUS_URL, job="btle_sensor_poller", registry=registry ) except OSError as e: logging.error( f"Couldn't connect to {PROMETHEUS_URL}. Skipping this update. ({e}" )
def collect_queue_metrics(self): """Collect queue metrics with scheduled celery task.""" queue_len = {} with celery_app.pool.acquire(block=True) as conn: for queue, gauge in QUEUES.items(): length = conn.default_channel.client.llen(queue) queue_len[queue] = length gauge.set(length) LOG.info("Celery queue backlog info: ") LOG.info(queue_len) LOG.debug("Pushing stats to gateway: %s", settings.PROMETHEUS_PUSHGATEWAY) try: push_to_gateway(settings.PROMETHEUS_PUSHGATEWAY, job="masu.celery.tasks.collect_queue_metrics", registry=REGISTRY) except OSError as exc: LOG.error("Problem reaching pushgateway: %s", exc) try: self.update_state(state="FAILURE", meta={ "result": str(exc), "traceback": str(exc.__traceback__) }) except TypeError as err: LOG.error("The following error occurred: %s " % err) return queue_len
def main(): parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument("--pushgw", default=DEFAULT_PUSH_GATEWAY, help="Address of the pushgateway to publish to.") parser.add_argument("--job", default=DEFAULT_JOB_NAME, help="Pushgateway job name.") parser.add_argument("-v", action="store_true", help="Print some information to stdout.") args = parser.parse_args() logging.basicConfig(level=logging.INFO) registry = setup_metrics() start = time.time() if args.v: logging.info("started") def_labels = {'instance': socket.getfqdn()} process_input(def_labels) end = time.time() if args.v: logging.info("finished reading output.") gauges["rsnapshot_start"].labels(def_labels).set(start) gauges["rsnapshot_end"].labels(def_labels).set(end) gauges["rsnapshot_duration"].labels(def_labels).set(end - start) if args.v: logging.info("publishing to pushgateway @ %s", args.pushgw) push_to_gateway(args.pushgw, job=args.job, registry=registry)
async def metric_task(self): def async_handler(url, method, timeout, headers, data): async def handle(): async with self.bot.session.request( method=method, url=url, data=data, headers=headers ) as resp: if resp.status >= 400: log.error("Error pushing metrics to gateway: %s %s" % (resp.status, await resp.text())) return lambda: self.bot.loop.create_task(handle()) log.debug("Pushing metrics to gateway") try: prometheus.push_to_gateway( gateway="prometheus-pushgateway.monitoring:9091", job=self.bot.config.identifier, grouping_key={"pod": self.bot.config.pod_id}, registry=registry, handler=async_handler ) except Exception as e: log.error("Error pushing metrics to gateway: %s %s" % (type(e), str(e)))
def _process_event(event: Event, sdp_state: SDPState, service_states: List[ServiceState]): """Process a SDP state change event.""" LOG.debug('Event detected! (id : "%s", type: "%s", data: "%s")', event.object_id, event.type, event.data) if event.object_id == 'SDP' and event.type == 'current_state_updated': LOG.info('SDP current state updated, no action required!') if event.object_id == 'SDP' and event.type == 'target_state_updated': LOG.info("SDP target state changed to '%s'", sdp_state.target_state) # If the sdp is already in the target state do nothing if sdp_state.target_state == sdp_state.current_state: LOG.warning('SDP already in %s state', sdp_state.current_state) return # Check that a transition to the target state is allowed in the # current state. if not sdp_state.is_target_state_allowed(sdp_state.target_state): LOG.error('Transition to %s is not allowed when in state %s', sdp_state.target_state, sdp_state.current_state) sdp_state.target_state = sdp_state.current_state return _update_services_target_state(sdp_state.target_state) # If asking SDP to turn off, also turn off services. if sdp_state.target_state == 'off': LOG.info('Turning off services!') for service_state in service_states: service_state.update_target_state('off') service_state.update_current_state('off') LOG.info('Processing target state change request ...') time.sleep(0.1) LOG.info('Done processing target state change request!') # Assuming that the SDP has responding to the target # target state command by now, set the current state # to the target state. sdp_state.update_current_state(sdp_state.target_state) if sdp_state.current_state == 'alarm': LOG.debug('raising SDP state alarm') SIP_STATE_ALARM.set(1) else: SIP_STATE_ALARM.set(0) try: # FIXME(BMo) the pushgateway host should not be hardcoded! push_to_gateway('platform_pushgateway:9091', job='SIP', registry=COLLECTOR_REGISTRY) except urllib.error.URLError: LOG.warning("Unable to connect to the Alarms service!")
def notify_success(self, source, hostname, filename, stats): registry = CollectorRegistry() g = Gauge('backup_size', 'Size of backup file in bytes', registry=registry) g.set(stats.size) g = Gauge('backup_dumptime', 'Time taken to dump and compress/encrypt backup in seconds', registry=registry) g.set(stats.dumptime) g = Gauge('backup_uploadtime', 'Time taken to upload backup in seconds', registry=registry) g.set(stats.uploadtime) g = Gauge('backup_retained_copies', 'Number of retained backups found on destination', registry=registry) g.set(stats.retained_copies) g = Gauge('backup_timestamp', 'Time backup completed as seconds-since-the-epoch', registry=registry) g.set_to_current_time() push_to_gateway(self.url, job=source.id, registry=registry, handler=http_basic_auth_handler, handler_args=self.auth_args) logging.info("Pushed metrics for job '%s' to gateway (%s)" % (source.id, self.url))
def push_inventory_metrics(): ppg = settings.get('apps', {}).get('zentral.contrib.inventory', {}).get('prometheus_push_gateway', None) if not ppg: return registry = CollectorRegistry() g = Gauge('zentral_inventory_osx_apps', 'Zentral inventory OSX apps', ['name', 'version_str', 'source'], registry=registry) for r in osx_app_count(): count = r.pop('count') g.labels(r).set(count) g = Gauge('zentral_inventory_os_versions', 'Zentral inventory OS Versions', ['name', 'major', 'minor', 'patch', 'build', 'source'], registry=registry) for r in os_version_count(): count = r.pop('count') g.labels(r).set(count) push_to_gateway(ppg, job='zentral_push_inventory_metrics', registry=registry)
def notify_success(self, source, hostname, filename, stats): registry = CollectorRegistry() s = Summary('backup_size', 'Size of backup file in bytes', registry=registry) s.observe(stats.size) s = Summary('backup_dumptime', 'Time taken to dump and compress/encrypt backup in seconds', registry=registry) s.observe(stats.dumptime) s = Summary('backup_uploadtime', 'Time taken to upload backup in seconds', registry=registry) s.observe(stats.uploadtime) if stats.retained_copies is not None: g = Gauge('backup_retained_copies', 'Number of retained backups found on destination', registry=registry) g.set(stats.retained_copies) g = Gauge('backup_timestamp', 'Time backup completed as seconds-since-the-epoch', registry=registry) g.set_to_current_time() def auth_handler(url, method, timeout, headers, data): return basic_auth_handler(url, method, timeout, headers, data, self.username, self.password) push_to_gateway(self.url, job=source.id, registry=registry, handler=auth_handler) logging.info("Pushed metrics for job '%s' to gateway (%s)" % (source.id, self.url))
def test_push(self): push_to_gateway(self.address, "my_job", self.registry) self.assertEqual(self.requests[0][0].command, "PUT") self.assertEqual(self.requests[0][0].path, "/metrics/job/my_job") self.assertEqual(self.requests[0][0].headers.get("content-type"), CONTENT_TYPE_LATEST) self.assertEqual(self.requests[0][1], b"# HELP g help\n# TYPE g gauge\ng 0.0\n")
def test_push_with_complex_groupingkey(self): push_to_gateway(self.address, "my_job", self.registry, {"a": 9, "b": "a/ z"}) self.assertEqual(self.requests[0][0].command, "PUT") self.assertEqual(self.requests[0][0].path, "/metrics/job/my_job/a/9/b/a%2F+z") self.assertEqual(self.requests[0][0].headers.get("content-type"), CONTENT_TYPE_LATEST) self.assertEqual(self.requests[0][1], b"# HELP g help\n# TYPE g gauge\ng 0.0\n")
def main(): global HWManager, conf, tsclient args = parse_args() log.info("Testbed deploy script - %s" % " ".join(sys.argv[1:])) if 'CONFFILE' in os.environ: args.conffile = os.environ['CONFFILE'] log.info("Using conffile %r" % args.conffile) if args.conffile: with open(args.conffile) as f: conf = json.load(f) assert "bmm_api_address" in conf log.info("BMM address: %r" % conf["bmm_api_address"]) assert conf conf["logsdir"] = args.logsdir HWManager = RemoteHWManager tsclient = TestbedServiceClient() if args.release: tsclient.release_servers(args.testname) return args.tftpdir = handle_iso(args) log.info("TFTP dir: %r" % args.tftpdir) if args.wipe_admin: wipe_admin_node(args) if args.admin or args.poweroff: power_off_nodes(args) if args.admin: admin_host_ipaddr = deploy_admin_node(args) log.info("Admin node deployment - done") generate_environment_json(admin_host_ipaddr, [], use_bogus_hosts=True) elif args.deploy_nodes: t0 = int(time()) # discover admin_host_ipaddr admin_node = tsclient.fetch_servers_list(args.testname, args.master_count, args.worker_count, want_admin=True, want_nodes=False)[0] servername, serial, desc, ilo_ipaddr, ilo_iface_macaddr, eth0_macaddr = admin_node power_up_time = datetime.now() - timedelta(hours=4) admin_host_ipaddr = parse_dhcp_logs(power_up_time, eth0_macaddr) assert admin_host_ipaddr fetch_and_mangle_worker_autoyast(admin_host_ipaddr) power_off_nodes(args) available_hosts = deploy_nodes(args, admin_host_ipaddr, max_failing_nodes=0) generate_environment_json(admin_host_ipaddr, available_hosts) log.info("Waiting 30s") sleep(30) fetch_nodes_syslog_logs(t0, available_hosts) log.info("Nodes deployment - done") elif args.prometheus: log.info("Pushing metrics to Prometheus") try: push_to_gateway(PROMETHEUS_ADDR, job=PROMETHEUS_JOB_NAME, registry=prometheus_reg) except Exception as e: log.error("Error pushing to Prometheus", exc_info=True) elif args.release: tsclient.release(args.testname)
def main(): parser = _create_parser() args = parser.parse_args() logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO) registry = core.REGISTRY total_fb_memory = Gauge('gpu_total_fb_memory_mb', 'Total installed frame buffer memory (in ' 'megabytes)', ['device'], registry=registry) free_fb_memory = Gauge('gpu_free_fb_memory_mb', 'Unallocated frame buffer memory (in ' 'megabytes)', ['device'], registry=registry) used_fb_memory = Gauge('gpu_used_fb_memory_mb', 'Allocated frame buffer memory (in megabytes).' ' Note that the diver/GPU will always set ' 'a small amount of memory fore bookkeeping.', ['device'], registry=registry) gpu_utilization = Gauge('gpu_utilization_pct', 'Percent of time over the past sample period ' 'during which one or more kernels was ' 'executing on the GPU.', ['device'], registry=registry) memory_utilization = Gauge('gpu_mem_utilization_pct', 'Percent of time over the past sample ' 'period during which global (device) memory ' 'was being read or written', ['device'], registry=registry) iteration = 0 try: log.debug('Initializing NVML...') nvmlInit() log.info('Started with nVidia driver version = %s', nvmlSystemGetDriverVersion()) device_count = nvmlDeviceGetCount() log.debug('%d devices found.', device_count) if args.port: log.debug('Starting http server on port %d', args.port) start_http_server(args.port) log.info('HTTP server started on port %d', args.port) while True: iteration += 1 log.debug('Current iteration = %d', iteration) for i in range(device_count): log.debug('Analyzing device %d...', i) try: log.debug('Obtaining handle for device %d...', i) handle = nvmlDeviceGetHandleByIndex(i) log.debug('Device handle for %d is %s', i, str(handle)) log.debug('Querying for memory information...') mem_info = nvmlDeviceGetMemoryInfo(handle) log.debug('Memory information = %s', str(mem_info)) total_fb_memory.labels(device=i).set(mem_info.total / 1024) free_fb_memory.labels(device=i).set(mem_info.free / 1024) used_fb_memory.labels(device=i).set(mem_info.used / 1024) log.debug('Obtaining utilization statistics...') utilization = nvmlDeviceGetUtilizationRates(handle) log.debug('Utilization statistics = %s', str(utilization)) gpu_utilization.labels(device=i).set(utilization.gpu / 100.0) memory_utilization.labels(device=i).set(utilization.memory / 100.0) except Exception as e: log.warning(e, exc_info=True) if args.gateway: log.debug('Pushing metrics to gateway at %s...', args.gateway) hostname = platform.node() push_to_gateway(args.gateway, job=hostname, registry=core.REGISTRY) log.debug('Push complete.') time.sleep(args.update_period) except Exception as e: log.error('Exception thrown - %s', e, exc_info=True) finally: nvmlShutdown()
def test_push(self): push_to_gateway(self.address, "my_job", self.registry) self.assertEqual(self.requests[0][0].command, 'PUT') self.assertEqual(self.requests[0][0].path, '/job/my_job') self.assertEqual(self.requests[0][0].headers.get('content-type'), CONTENT_TYPE_LATEST) self.assertEqual(self.requests[0][1], b'# HELP g help\n# TYPE g gauge\ng 0.0\n')
def test_push_with_complex_groupingkey(self): push_to_gateway(self.address, "my_job", self.registry, {'a': 9, 'b': 'a/ z'}) self.assertEqual(self.requests[0][0].command, 'PUT') self.assertEqual(self.requests[0][0].path, '/job/my_job/a/9/b/a%2F+z') self.assertEqual(self.requests[0][0].headers.get('content-type'), CONTENT_TYPE_LATEST) self.assertEqual(self.requests[0][1], b'# HELP g help\n# TYPE g gauge\ng 0.0\n')
for day in range(1, 4): print '%s %s: %s to %s with %s%% rain' %( dayforecast[day]['date/day'], calendar.month_name[dayforecast[day]['date/month']][0:3], dayforecast[day]['low/celsius'], dayforecast[day]['high/celsius'], dayforecast[day]['pop']) img = pygame.image.load(dayforecast[day]['icon']) outimg.blit(img, (((day - 1) * 90) + X_MARGIN, 50 + Y_MARGIN)) label = small_font.render( '%s %s' %(dayforecast[day]['date/day'], calendar.month_name[dayforecast[day]['date/month']][0:3]), 1, (0, 0, 0)) outimg.blit(label, (((day - 1) * 90) + X_MARGIN, 100 + Y_MARGIN)) label = small_font.render( '%s-%s %s%%' %(dayforecast[day]['low/celsius'], dayforecast[day]['high/celsius'], dayforecast[day]['pop']), 1, (0, 0, 0)) outimg.blit(label, (((day - 1) * 90) + X_MARGIN, 100 + Y_MARGIN + 20)) label = small_font.render('Data from wunderground.com', 1, (0, 0, 0)) outimg.blit(label, (X_MARGIN, Y_MARGIN + 160)) pygame.image.save(outimg, conf['output_filename']) push_to_gateway('localhost:9091', job='weather', registry=registry)
def PushMetrics(): if keys.PROMETHEUS_ENDPOINT is not None: logging.info('Pushing metrics to %s', keys.PROMETHEUS_ENDPOINT) pc.push_to_gateway( keys.PROMETHEUS_ENDPOINT, job='nest-wfh', registry=registry)
record['Contacts_LeadType_S_Data_Status1'] = 'CAMPAIGN DETAIL ERROR' else: record['Contacts_LeadType_MostRecent_Offer_PrimarySol1'] = thisCampaign['Solution_Code_Family__c'] record['Contacts_LeadType_MostRecent_Offer_ProductSer1'] = thisCampaign['Solution_Code__c'] record['Contacts_LeadType_S_Data_Status1'] = 'CAMPAIGN DETAILS RETREIVED' if (thisCampaign['Solution_Code_Family__c']==None): nullCount += 1 logging.info("Records with no Primary Solution: " + str(nullCount)) importDefName = 'Contacts.LeadType - Get Campaign Details ' + str(datetime.now()) cdoInDef = elq.CreateDef(defType='imports', entity='customObjects', cdoID=1269, fields=myFields, defName=importDefName, identifierFieldName='Email_Address1') logging.info("import definition created: " + cdoInDef['uri']) postInData = elq.PostSyncData(data = dataOut, defObject=cdoInDef, maxPost=20000) logging.info("Data successfully imported, job finished: " + str(datetime.now())) else: logging.info("No records, job finished") ### Logging for Prometheus registry = CollectorRegistry() g = Gauge('job_last_success_unixtime', 'Last time a batch job successfully finished', registry=registry) g.set_to_current_time() h = Gauge('job_total_records_success', 'Total number of records successfully processed in last batch', registry=registry) h.set(len(data)) push_to_gateway(os.environ['PUSHGATEWAY'], job='Contacts.LeadType_getOfferDetails', registry=registry)
def push(self,job): gid ={} gid['instance']=self.instance push_to_gateway(self.url, job=job , registry=self.registry, grouping_key=gid)