Пример #1
0
def main():
  args = parse_args()
  # Flushing metrics here to be sure the presence/up got sent even if the
  # program runs for less than a minute.
  ts_mon.flush()

  if args.json_file:
    desired_state = desired_state_parser.load_desired_state_file(
        args.json_file)
  else:
    desired_state_data = gitiles.call_gitiles(
        args.json_gitiles, 'text', netrc_path=args.netrc)
    desired_state = desired_state_parser.parse_desired_state(desired_state_data)

  if args.verify:
    return 0  # File checks out, no need to continue.

  triggered, ignored = desired_state_parser.get_masters_for_host(
      desired_state, args.build_dir, args.hostname)
  log_triggered_ignored(triggered, ignored, args.hostname)

  commands = [
      synthesize_master_manager_cmd(m, args.hostname, prod=args.prod)
      for m in triggered
  ]

  if args.command_timeout:
    commands = [daemon.add_timeout(c, args.command_timeout) for c in commands]

  multiprocess.safe_map(subprocess.call, commands, args.processes)
Пример #2
0
 def single_iteration():
   try:
     for mon in monitors:
       mon.poll()
   finally:
     ts_mon.flush()
   return True
Пример #3
0
def main(argv):
    args = get_arguments(argv)
    infra_libs.logs.process_argparse_options(args)
    ts_mon.process_argparse_options(args)

    arg_to_metric_type = (
        (args.gauge, ts_mon.GaugeMetric),
        (args.float, ts_mon.FloatMetric),
        (args.string, ts_mon.StringMetric),
        (args.bool, ts_mon.BooleanMetric),
        (args.counter, ts_mon.CounterMetric),
        (args.cumulative, ts_mon.CumulativeMetric),
    )
    for arg, metric in arg_to_metric_type:
        set_metrics(arg, metric)

    argfile_to_metric_type = (
        (args.gauge_file, ts_mon.GaugeMetric),
        (args.float_file, ts_mon.FloatMetric),
        (args.string_file, ts_mon.StringMetric),
        (args.bool_file, ts_mon.BooleanMetric),
        (args.counter_file, ts_mon.CounterMetric),
        (args.cumulative_file, ts_mon.CumulativeMetric),
    )
    for arg, metric in argfile_to_metric_type:
        set_metrics_file(arg, metric)

    ts_mon.flush()
Пример #4
0
def main():
    args = parse_args()
    # Flushing metrics here to be sure the presence/up got sent even if the
    # program runs for less than a minute.
    ts_mon.flush()

    if args.json_file:
        desired_state = desired_state_parser.load_desired_state_file(
            args.json_file)
    else:
        desired_state_data = gitiles.call_gitiles(args.json_gitiles,
                                                  'text',
                                                  netrc_path=args.netrc)
        desired_state = desired_state_parser.parse_desired_state(
            desired_state_data)

    if args.verify:
        return 0  # File checks out, no need to continue.

    triggered, ignored = desired_state_parser.get_masters_for_host(
        desired_state, args.build_dir, args.hostname)
    log_triggered_ignored(triggered, ignored, args.hostname)

    commands = [
        synthesize_master_manager_cmd(m, args.hostname, prod=args.prod)
        for m in triggered
    ]

    if args.command_timeout:
        commands = [
            daemon.add_timeout(c, args.command_timeout) for c in commands
        ]

    multiprocess.safe_map(subprocess.call, commands, args.processes)
Пример #5
0
 def task(self):
   try:
     for mon in self.monitors:
       mon.poll()
   finally:
     ts_mon.flush()
   return True
Пример #6
0
 def single_iteration():
   try:
     for mon in monitors:
       mon.poll()
   finally:
     ts_mon.flush()
   return True
Пример #7
0
  def main(self, opts):  # pragma: no cover
    status = 0

    try:
      if opts.build_event_type:
        success_metric.set(common.send_build_event(opts))

      elif opts.service_event_type:
        success_metric.set(common.send_service_event(opts))

      elif opts.events_from_file:
        success_metric.set(common.send_events_from_file(opts))

      else:
        print >> sys.stderr, ('At least one of the --*-event-type options or '
                              '--events-from-file should be provided. Nothing '
                              'was sent.')
        status = 2
        success_metric.set(False)
    except Exception:
      success_metric.set(False)
      traceback.print_exc()  # helps with debugging locally.
    finally:
      event_mon.close()
      try:
        ts_mon.flush()
      except ts_mon.MonitoringNoConfiguredMonitorError:
        logging.error("Unable to flush ts_mon because it's not configured.")
      except Exception:
        logging.exception("Flushing ts_mon metrics failed.")
    return status
Пример #8
0
 def single_iteration():
     try:
         get_cpu_info()
         get_disk_info()
         get_mem_info()
         get_net_info()
         get_proc_info()
     finally:
         ts_mon.flush()
     return True
Пример #9
0
 def single_iteration():
   try:
     get_cpu_info()
     get_disk_info()
     get_mem_info()
     get_net_info()
     get_proc_info()
   finally:
     ts_mon.flush()
   return True
Пример #10
0
 def single_iteration():
   try:
     system_metrics.get_cpu_info()
     system_metrics.get_disk_info()
     system_metrics.get_mem_info()
     system_metrics.get_net_info()
     system_metrics.get_proc_info()
     puppet_metrics.get_puppet_summary()
   finally:
     ts_mon.flush()
   return True
Пример #11
0
def main(argv):  # pragma: no cover
  args = get_arguments(argv)
  infra_libs.logs.process_argparse_options(args)
  ts_mon.process_argparse_options(args)

  set_metrics(args.gauge, ts_mon.GaugeMetric)
  set_metrics(args.float, ts_mon.FloatMetric)
  set_metrics(args.string, ts_mon.StringMetric)
  set_metrics(args.bool, ts_mon.BooleanMetric)
  set_metrics(args.counter, ts_mon.CounterMetric)
  set_metrics(args.cumulative, ts_mon.CumulativeMetric)

  ts_mon.flush()
Пример #12
0
def main(args):
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '-n',
        '--nice',
        type=int,
        metavar='VALUE',
        help='Set the nice level of the process to VALUE prior to execution.')
    parser.add_argument(
        'master_paths',
        nargs='+',
        help='The paths to the master base directories to monitor. Consider '
        'the /path/to/build/masters/master.* wildcard to specify all of '
        'them.')

    logs.add_argparse_options(parser)
    ts_mon.add_argparse_options(parser)

    # Parse arguments.
    args = parser.parse_args(args)
    logs.process_argparse_options(args)
    ts_mon.process_argparse_options(args)

    # Try setting the nice value; if it fails, eat the error and continue.
    if args.nice:
        logging.debug('Setting process "nice" to: %d', args.nice)
        try:
            os.nice(args.nice)
        except OSError as e:
            logging.error('Failed to update "nice" to %d: %s', args.nice, e)

    # Update global state calculations.
    logging.info('Pulling master state from: %s', args.master_paths)
    for master_path in args.master_paths:
        master_name = master_path_to_name(master_path)

        # Log to the target: buildbot/master/<master_name>
        target = ts_mon.TaskTarget('buildbot/master', master_name,
                                   args.ts_mon_task_region,
                                   args.ts_mon_task_hostname,
                                   args.ts_mon_task_number)
        logging.info('Collecting log state for master "%s" at: %s',
                     master_name, master_path)
        get_master_state(master_path, target)

    logging.info('Flushing collected information.')
    ts_mon.flush()
    return 0
Пример #13
0
def Flush(reset_after=()):
    """Flushes metrics, but warns on transient errors.

  Args:
    reset_after: A list of metrics to reset after flushing.
  """
    if not ts_mon:
        return

    try:
        ts_mon.flush()
        while reset_after:
            reset_after.pop().reset()
    except ssl.SSLError as e:
        logging.warning('Caught transient network error while flushing: %s', e)
    except Exception as e:
        logging.error('Caught exception while flushing: %s', e)
def RunMatchers(stream, matchers):
    """Parses lines of |stream| using patterns and emitters from |matchers|

    @param stream: A file object to read from.
    @param matchers: A list of pairs of (matcher, emitter), where matcher is a
        regex and emitter is a function called when the regex matches.
    """
    # The input might terminate if the log gets rotated. Make sure that Monarch
    # flushes any pending metrics before quitting.
    try:
        for line in iter(stream.readline, ''):
            for matcher, emitter in matchers:
                m = matcher.match(line)
                if m:
                    emitter(m)
    finally:
        ts_mon.close()
        ts_mon.flush()
Пример #15
0
def main(args):
  parser = argparse.ArgumentParser()
  parser.add_argument('-n', '--nice', type=int, metavar='VALUE',
      help='Set the nice level of the process to VALUE prior to execution.')
  parser.add_argument('master_paths', nargs='+',
      help='The paths to the master base directories to monitor. Consider '
           'the /path/to/build/masters/master.* wildcard to specify all of '
           'them.')

  logs.add_argparse_options(parser)
  ts_mon.add_argparse_options(parser)

  # Parse arguments.
  args = parser.parse_args(args)
  logs.process_argparse_options(args)
  ts_mon.process_argparse_options(args)

  # Try setting the nice value; if it fails, eat the error and continue.
  if args.nice:
    logging.debug('Setting process "nice" to: %d', args.nice)
    try:
      os.nice(args.nice)
    except OSError as e:
      logging.error('Failed to update "nice" to %d: %s', args.nice, e)

  # Update global state calculations.
  logging.info('Pulling master state from: %s', args.master_paths)
  for master_path in args.master_paths:
    master_name = master_path_to_name(master_path)

    # Log to the target: buildbot/master/<master_name>
    target = ts_mon.TaskTarget(
        'buildbot/master',
        master_name,
        args.ts_mon_task_region,
        args.ts_mon_task_hostname,
        args.ts_mon_task_number)
    logging.info('Collecting log state for master "%s" at: %s',
                 master_name, master_path)
    get_master_state(master_path, target)

  logging.info('Flushing collected information.')
  ts_mon.flush()
  return 0
Пример #16
0
def main(args):
  opts = parse_args(args)

  stats = cq_stats.acquire_stats(
      StatsArgs(opts.project, opts.range), add_tree_stats=False)

  try:
    durations_dist = ts_mon.Distribution(ts_mon.GeometricBucketer())
    for duration in stats['patchset-committed-durations']['raw']:
      durations_dist.add(duration)
    patchset_committed_durations.set(durations_dist)

    attempt_false_reject_count.set(
        sum(p['count'] for p in stats['false-rejections']))
    attempt_count.set(stats['attempt-count'])
  finally:
    ts_mon.flush()

  return 0
Пример #17
0
def main(args):  # pragma: no cover
    opts = parse_args(args)
    try:
        return run(opts)
    finally:
        # Always flush metrics before exit.
        failure_message = 'Failed to flush ts_mon data, potentially losing data'
        try:
            if not ts_mon.flush():
                logging.error(failure_message)
        except Exception:
            logging.exception(failure_message)
Пример #18
0
  def task(self):
    if self.blacklist_file:
      unhealthy_devices = {} # todo: fetch blacklist
    else:
      unhealthy_devices = {}

    try:
      devices = adb_wrapper.AdbWrapper.Devices(desired_state=None)
      for device in devices:
        device = device_utils.DeviceUtils(device)
        fields = {
            'device_id': str(device),
            'device_type': 'type', # todo: get device type
            'device_os': 'os', # todo: get os version
        }
        try:
          device_metrics.set_cpu_temp(device, fields)
          device_metrics.set_battery_temp(device, fields)
          device_metrics.set_battery_charge(device, fields)
          # Assume the blacklist is a more accurate source of truth for device
          # health, so defer to it when determining phone status
          if device not in unhealthy_devices:
            if device.IsOnline():
              device_metrics.set_device_status(device, fields, status='good')
            else:
              logging.warning('Unhealthy device %s not listed in blacklist.',
                              str(device))
              unhealthy_devices[str(device)] = {'reason': device.adb.GetState()}
        except Exception: # todo: change this to catch only device errors
          logging.exception('Error when fetching status of %s.', str(device))
          device_metrics.set_device_status(device, fields, status='unknown')

      for device in unhealthy_devices:
        device_metrics.set_device_status(device, fields,
                                         status=device['reason'])

    finally:
      ts_mon.flush()

    return True
Пример #19
0
def main(argv):  # pragma: no cover
    # Does nothing when no arguments are passed, to make it safe to import this
    # module (main() is executed on import, because this file is called __main__).
    status = 0

    if len(argv) == 0:
        return status

    success_metric = ts_mon.BooleanMetric('send_monitoring_event/success')

    try:
        args = send_event.get_arguments(argv)

        send_event.process_argparse_options(args)

        if args.build_event_type:
            success_metric.set(send_event.send_build_event(args))

        elif args.service_event_type:
            success_metric.set(send_event.send_service_event(args))

        elif args.events_from_file:
            success_metric.set(send_event.send_events_from_file(args))

        else:
            print >> sys.stderr, (
                'At least one of the --*-event-type options or '
                '--events-from-file should be provided. Nothing '
                'was sent.')
            status = 2
            success_metric.set(False)
    except Exception:
        success_metric.set(False)
    finally:
        event_mon.close()
        try:
            ts_mon.flush()
        except ts_mon.MonitoringNoConfiguredMonitorError:
            pass
    return status
Пример #20
0
def main(argv):  # pragma: no cover
  # Does nothing when no arguments are passed, to make it safe to import this
  # module (main() is executed on import, because this file is called __main__).
  status = 0

  if len(argv) == 0:
    return status

  try:
    args = send_event.get_arguments(argv)
    send_event.process_argparse_options(args)

    if args.build_event_type:
      success_metric.set(send_event.send_build_event(args))

    elif args.service_event_type:
      success_metric.set(send_event.send_service_event(args))

    elif args.events_from_file:
      success_metric.set(send_event.send_events_from_file(args))

    else:
      print >> sys.stderr, ('At least one of the --*-event-type options or '
                            '--events-from-file should be provided. Nothing '
                            'was sent.')
      status = 2
      success_metric.set(False)
  except Exception:
    success_metric.set(False)
    traceback.print_exc()  # helps with debugging locally.
  finally:
    event_mon.close()
    try:
      ts_mon.flush()
    except ts_mon.MonitoringNoConfiguredMonitorError:
      logging.error("Unable to flush ts_mon because it's not configured.")
    except Exception:
      logging.exception("Flushing ts_mon metrics failed.")
  return status
Пример #21
0
def main(argv):  # pragma: no cover
  # Does nothing when no arguments are passed, to make it safe to import this
  # module (main() is executed on import, because this file is called __main__).
  status = 0

  if len(argv) == 0:
    return status

  success_metric = ts_mon.BooleanMetric('send_monitoring_event/success')

  try:
    args = send_event.get_arguments(argv)

    send_event.process_argparse_options(args)

    if args.build_event_type:
      success_metric.set(send_event.send_build_event(args))

    elif args.service_event_type:
      success_metric.set(send_event.send_service_event(args))

    elif args.events_from_file:
      success_metric.set(send_event.send_events_from_file(args))

    else:
      print >> sys.stderr, ('At least one of the --*-event-type options or '
                            '--events-from-file should be provided. Nothing '
                            'was sent.')
      status = 2
      success_metric.set(False)
  except Exception:
    success_metric.set(False)
  finally:
    event_mon.close()
    try:
      ts_mon.flush()
    except ts_mon.MonitoringNoConfiguredMonitorError:
      pass
  return status
Пример #22
0
    def run(self, args=None):
        """Main application entry point."""

        if args is None:  # pragma: no cover
            args = sys.argv

        # Add and parse commandline args.
        self.parser = argparse.ArgumentParser(
            description=self.DESCRIPTION,
            prog=self.PROG_NAME or args[0],
            formatter_class=argparse.RawTextHelpFormatter)

        self.add_argparse_options(self.parser)
        self.opts = self.parser.parse_args(args[1:])
        self.process_argparse_options(self.opts)

        # Print a startup log message.
        logging.info(
            'Process started at %s',
            datetime.datetime.utcfromtimestamp(
                psutil.Process().create_time()).isoformat())
        logging.info('Command line arguments:')
        for index, arg in enumerate(sys.argv):
            logging.info('argv[%d]: %s', index, arg)
        logging.info('Process id %d', os.getpid())
        logging.info('Current working directory %s', os.getcwd())

        # Run the application's main function.
        try:
            status = self.main(self.opts)
        except Exception:
            logging.exception('Uncaught exception, exiting:')
            if self.USES_TS_MON:
                # Flushing ts_mon to try to report the exception.
                ts_mon.flush()
            status = 1

        sys.exit(status)
Пример #23
0
 def _flush_and_log_exceptions(self):
     try:
         ts_mon.flush()
     except Exception:
         log.err(None, 'Automatic monitoring flush failed.')