def main(): args = parse_args() # Flushing metrics here to be sure the presence/up got sent even if the # program runs for less than a minute. ts_mon.flush() if args.json_file: desired_state = desired_state_parser.load_desired_state_file( args.json_file) else: desired_state_data = gitiles.call_gitiles( args.json_gitiles, 'text', netrc_path=args.netrc) desired_state = desired_state_parser.parse_desired_state(desired_state_data) if args.verify: return 0 # File checks out, no need to continue. triggered, ignored = desired_state_parser.get_masters_for_host( desired_state, args.build_dir, args.hostname) log_triggered_ignored(triggered, ignored, args.hostname) commands = [ synthesize_master_manager_cmd(m, args.hostname, prod=args.prod) for m in triggered ] if args.command_timeout: commands = [daemon.add_timeout(c, args.command_timeout) for c in commands] multiprocess.safe_map(subprocess.call, commands, args.processes)
def single_iteration(): try: for mon in monitors: mon.poll() finally: ts_mon.flush() return True
def main(argv): args = get_arguments(argv) infra_libs.logs.process_argparse_options(args) ts_mon.process_argparse_options(args) arg_to_metric_type = ( (args.gauge, ts_mon.GaugeMetric), (args.float, ts_mon.FloatMetric), (args.string, ts_mon.StringMetric), (args.bool, ts_mon.BooleanMetric), (args.counter, ts_mon.CounterMetric), (args.cumulative, ts_mon.CumulativeMetric), ) for arg, metric in arg_to_metric_type: set_metrics(arg, metric) argfile_to_metric_type = ( (args.gauge_file, ts_mon.GaugeMetric), (args.float_file, ts_mon.FloatMetric), (args.string_file, ts_mon.StringMetric), (args.bool_file, ts_mon.BooleanMetric), (args.counter_file, ts_mon.CounterMetric), (args.cumulative_file, ts_mon.CumulativeMetric), ) for arg, metric in argfile_to_metric_type: set_metrics_file(arg, metric) ts_mon.flush()
def main(): args = parse_args() # Flushing metrics here to be sure the presence/up got sent even if the # program runs for less than a minute. ts_mon.flush() if args.json_file: desired_state = desired_state_parser.load_desired_state_file( args.json_file) else: desired_state_data = gitiles.call_gitiles(args.json_gitiles, 'text', netrc_path=args.netrc) desired_state = desired_state_parser.parse_desired_state( desired_state_data) if args.verify: return 0 # File checks out, no need to continue. triggered, ignored = desired_state_parser.get_masters_for_host( desired_state, args.build_dir, args.hostname) log_triggered_ignored(triggered, ignored, args.hostname) commands = [ synthesize_master_manager_cmd(m, args.hostname, prod=args.prod) for m in triggered ] if args.command_timeout: commands = [ daemon.add_timeout(c, args.command_timeout) for c in commands ] multiprocess.safe_map(subprocess.call, commands, args.processes)
def task(self): try: for mon in self.monitors: mon.poll() finally: ts_mon.flush() return True
def main(self, opts): # pragma: no cover status = 0 try: if opts.build_event_type: success_metric.set(common.send_build_event(opts)) elif opts.service_event_type: success_metric.set(common.send_service_event(opts)) elif opts.events_from_file: success_metric.set(common.send_events_from_file(opts)) else: print >> sys.stderr, ('At least one of the --*-event-type options or ' '--events-from-file should be provided. Nothing ' 'was sent.') status = 2 success_metric.set(False) except Exception: success_metric.set(False) traceback.print_exc() # helps with debugging locally. finally: event_mon.close() try: ts_mon.flush() except ts_mon.MonitoringNoConfiguredMonitorError: logging.error("Unable to flush ts_mon because it's not configured.") except Exception: logging.exception("Flushing ts_mon metrics failed.") return status
def single_iteration(): try: get_cpu_info() get_disk_info() get_mem_info() get_net_info() get_proc_info() finally: ts_mon.flush() return True
def single_iteration(): try: system_metrics.get_cpu_info() system_metrics.get_disk_info() system_metrics.get_mem_info() system_metrics.get_net_info() system_metrics.get_proc_info() puppet_metrics.get_puppet_summary() finally: ts_mon.flush() return True
def main(argv): # pragma: no cover args = get_arguments(argv) infra_libs.logs.process_argparse_options(args) ts_mon.process_argparse_options(args) set_metrics(args.gauge, ts_mon.GaugeMetric) set_metrics(args.float, ts_mon.FloatMetric) set_metrics(args.string, ts_mon.StringMetric) set_metrics(args.bool, ts_mon.BooleanMetric) set_metrics(args.counter, ts_mon.CounterMetric) set_metrics(args.cumulative, ts_mon.CumulativeMetric) ts_mon.flush()
def main(args): parser = argparse.ArgumentParser() parser.add_argument( '-n', '--nice', type=int, metavar='VALUE', help='Set the nice level of the process to VALUE prior to execution.') parser.add_argument( 'master_paths', nargs='+', help='The paths to the master base directories to monitor. Consider ' 'the /path/to/build/masters/master.* wildcard to specify all of ' 'them.') logs.add_argparse_options(parser) ts_mon.add_argparse_options(parser) # Parse arguments. args = parser.parse_args(args) logs.process_argparse_options(args) ts_mon.process_argparse_options(args) # Try setting the nice value; if it fails, eat the error and continue. if args.nice: logging.debug('Setting process "nice" to: %d', args.nice) try: os.nice(args.nice) except OSError as e: logging.error('Failed to update "nice" to %d: %s', args.nice, e) # Update global state calculations. logging.info('Pulling master state from: %s', args.master_paths) for master_path in args.master_paths: master_name = master_path_to_name(master_path) # Log to the target: buildbot/master/<master_name> target = ts_mon.TaskTarget('buildbot/master', master_name, args.ts_mon_task_region, args.ts_mon_task_hostname, args.ts_mon_task_number) logging.info('Collecting log state for master "%s" at: %s', master_name, master_path) get_master_state(master_path, target) logging.info('Flushing collected information.') ts_mon.flush() return 0
def Flush(reset_after=()): """Flushes metrics, but warns on transient errors. Args: reset_after: A list of metrics to reset after flushing. """ if not ts_mon: return try: ts_mon.flush() while reset_after: reset_after.pop().reset() except ssl.SSLError as e: logging.warning('Caught transient network error while flushing: %s', e) except Exception as e: logging.error('Caught exception while flushing: %s', e)
def RunMatchers(stream, matchers): """Parses lines of |stream| using patterns and emitters from |matchers| @param stream: A file object to read from. @param matchers: A list of pairs of (matcher, emitter), where matcher is a regex and emitter is a function called when the regex matches. """ # The input might terminate if the log gets rotated. Make sure that Monarch # flushes any pending metrics before quitting. try: for line in iter(stream.readline, ''): for matcher, emitter in matchers: m = matcher.match(line) if m: emitter(m) finally: ts_mon.close() ts_mon.flush()
def main(args): parser = argparse.ArgumentParser() parser.add_argument('-n', '--nice', type=int, metavar='VALUE', help='Set the nice level of the process to VALUE prior to execution.') parser.add_argument('master_paths', nargs='+', help='The paths to the master base directories to monitor. Consider ' 'the /path/to/build/masters/master.* wildcard to specify all of ' 'them.') logs.add_argparse_options(parser) ts_mon.add_argparse_options(parser) # Parse arguments. args = parser.parse_args(args) logs.process_argparse_options(args) ts_mon.process_argparse_options(args) # Try setting the nice value; if it fails, eat the error and continue. if args.nice: logging.debug('Setting process "nice" to: %d', args.nice) try: os.nice(args.nice) except OSError as e: logging.error('Failed to update "nice" to %d: %s', args.nice, e) # Update global state calculations. logging.info('Pulling master state from: %s', args.master_paths) for master_path in args.master_paths: master_name = master_path_to_name(master_path) # Log to the target: buildbot/master/<master_name> target = ts_mon.TaskTarget( 'buildbot/master', master_name, args.ts_mon_task_region, args.ts_mon_task_hostname, args.ts_mon_task_number) logging.info('Collecting log state for master "%s" at: %s', master_name, master_path) get_master_state(master_path, target) logging.info('Flushing collected information.') ts_mon.flush() return 0
def main(args): opts = parse_args(args) stats = cq_stats.acquire_stats( StatsArgs(opts.project, opts.range), add_tree_stats=False) try: durations_dist = ts_mon.Distribution(ts_mon.GeometricBucketer()) for duration in stats['patchset-committed-durations']['raw']: durations_dist.add(duration) patchset_committed_durations.set(durations_dist) attempt_false_reject_count.set( sum(p['count'] for p in stats['false-rejections'])) attempt_count.set(stats['attempt-count']) finally: ts_mon.flush() return 0
def main(args): # pragma: no cover opts = parse_args(args) try: return run(opts) finally: # Always flush metrics before exit. failure_message = 'Failed to flush ts_mon data, potentially losing data' try: if not ts_mon.flush(): logging.error(failure_message) except Exception: logging.exception(failure_message)
def task(self): if self.blacklist_file: unhealthy_devices = {} # todo: fetch blacklist else: unhealthy_devices = {} try: devices = adb_wrapper.AdbWrapper.Devices(desired_state=None) for device in devices: device = device_utils.DeviceUtils(device) fields = { 'device_id': str(device), 'device_type': 'type', # todo: get device type 'device_os': 'os', # todo: get os version } try: device_metrics.set_cpu_temp(device, fields) device_metrics.set_battery_temp(device, fields) device_metrics.set_battery_charge(device, fields) # Assume the blacklist is a more accurate source of truth for device # health, so defer to it when determining phone status if device not in unhealthy_devices: if device.IsOnline(): device_metrics.set_device_status(device, fields, status='good') else: logging.warning('Unhealthy device %s not listed in blacklist.', str(device)) unhealthy_devices[str(device)] = {'reason': device.adb.GetState()} except Exception: # todo: change this to catch only device errors logging.exception('Error when fetching status of %s.', str(device)) device_metrics.set_device_status(device, fields, status='unknown') for device in unhealthy_devices: device_metrics.set_device_status(device, fields, status=device['reason']) finally: ts_mon.flush() return True
def main(argv): # pragma: no cover # Does nothing when no arguments are passed, to make it safe to import this # module (main() is executed on import, because this file is called __main__). status = 0 if len(argv) == 0: return status success_metric = ts_mon.BooleanMetric('send_monitoring_event/success') try: args = send_event.get_arguments(argv) send_event.process_argparse_options(args) if args.build_event_type: success_metric.set(send_event.send_build_event(args)) elif args.service_event_type: success_metric.set(send_event.send_service_event(args)) elif args.events_from_file: success_metric.set(send_event.send_events_from_file(args)) else: print >> sys.stderr, ( 'At least one of the --*-event-type options or ' '--events-from-file should be provided. Nothing ' 'was sent.') status = 2 success_metric.set(False) except Exception: success_metric.set(False) finally: event_mon.close() try: ts_mon.flush() except ts_mon.MonitoringNoConfiguredMonitorError: pass return status
def main(argv): # pragma: no cover # Does nothing when no arguments are passed, to make it safe to import this # module (main() is executed on import, because this file is called __main__). status = 0 if len(argv) == 0: return status try: args = send_event.get_arguments(argv) send_event.process_argparse_options(args) if args.build_event_type: success_metric.set(send_event.send_build_event(args)) elif args.service_event_type: success_metric.set(send_event.send_service_event(args)) elif args.events_from_file: success_metric.set(send_event.send_events_from_file(args)) else: print >> sys.stderr, ('At least one of the --*-event-type options or ' '--events-from-file should be provided. Nothing ' 'was sent.') status = 2 success_metric.set(False) except Exception: success_metric.set(False) traceback.print_exc() # helps with debugging locally. finally: event_mon.close() try: ts_mon.flush() except ts_mon.MonitoringNoConfiguredMonitorError: logging.error("Unable to flush ts_mon because it's not configured.") except Exception: logging.exception("Flushing ts_mon metrics failed.") return status
def main(argv): # pragma: no cover # Does nothing when no arguments are passed, to make it safe to import this # module (main() is executed on import, because this file is called __main__). status = 0 if len(argv) == 0: return status success_metric = ts_mon.BooleanMetric('send_monitoring_event/success') try: args = send_event.get_arguments(argv) send_event.process_argparse_options(args) if args.build_event_type: success_metric.set(send_event.send_build_event(args)) elif args.service_event_type: success_metric.set(send_event.send_service_event(args)) elif args.events_from_file: success_metric.set(send_event.send_events_from_file(args)) else: print >> sys.stderr, ('At least one of the --*-event-type options or ' '--events-from-file should be provided. Nothing ' 'was sent.') status = 2 success_metric.set(False) except Exception: success_metric.set(False) finally: event_mon.close() try: ts_mon.flush() except ts_mon.MonitoringNoConfiguredMonitorError: pass return status
def run(self, args=None): """Main application entry point.""" if args is None: # pragma: no cover args = sys.argv # Add and parse commandline args. self.parser = argparse.ArgumentParser( description=self.DESCRIPTION, prog=self.PROG_NAME or args[0], formatter_class=argparse.RawTextHelpFormatter) self.add_argparse_options(self.parser) self.opts = self.parser.parse_args(args[1:]) self.process_argparse_options(self.opts) # Print a startup log message. logging.info( 'Process started at %s', datetime.datetime.utcfromtimestamp( psutil.Process().create_time()).isoformat()) logging.info('Command line arguments:') for index, arg in enumerate(sys.argv): logging.info('argv[%d]: %s', index, arg) logging.info('Process id %d', os.getpid()) logging.info('Current working directory %s', os.getcwd()) # Run the application's main function. try: status = self.main(self.opts) except Exception: logging.exception('Uncaught exception, exiting:') if self.USES_TS_MON: # Flushing ts_mon to try to report the exception. ts_mon.flush() status = 1 sys.exit(status)
def _flush_and_log_exceptions(self): try: ts_mon.flush() except Exception: log.err(None, 'Automatic monitoring flush failed.')