def cycle_results(self, overrun_secs): """Yield the results from a run of all the jobs. If overrun_secs elapse and max_overrunnable is nonzero then jobs may be left to run in the background. The results from these 'overrun' jobs will be yielded in subsequent calls to 'cycle_results' or 'finish_results'. Jobs which are currently overrunning will not be started again until the overrun job has finished. :overrun_secs: seconds to wait before considering leaving jobs behind :yields: an (index, result) tuple """ # make a timer out of the overrun_secs and pass to _cycle_results timer = phlsys_timer.Timer() timer.start() def overrun_condition(): return timer.duration >= overrun_secs for index, result in self._cycle_results(overrun_condition): yield index, result
def _remote_io_event_log_context(kind, identifier, detail, logger): _log_remote_io_event_to_logger(kind, 'start', identifier, detail, '', logger) timer = phlsys_timer.Timer() timer.start() result_list = [] try: yield result_list finally: prolog = '{:.3f}s'.format(timer.duration) _log_remote_io_event_to_logger(kind, prolog, identifier, detail, result_list, logger)
def do(repo_configs, sys_admin_emails, sleep_secs, is_no_loop, external_report_command, mail_sender, max_workers, overrun_secs): conduit_manager = _ConduitManager() fs_accessor = abdt_fs.make_default_accessor() url_watcher_wrapper = phlurl_watcher.FileCacheWatcherWrapper( fs_accessor.layout.urlwatcher_cache_path) # decide max workers based on number of CPUs if no value is specified if max_workers == 0: max_workers = determine_max_workers_default() repo_list = [] for name, config in repo_configs: repo_list.append( _ArcydManagedRepository(name, config, conduit_manager, url_watcher_wrapper, sys_admin_emails, mail_sender)) # if we always overrun half our workers then the loop is sustainable, if we # overrun more than that then we'll be lagging too far behind. In the event # that we only have one worker then we can't overrun any. max_overrun_workers = max_workers // 2 pool = phlmp_cyclingpool.CyclingPool(repo_list, max_workers, max_overrun_workers) cycle_timer = phlsys_timer.Timer() cycle_timer.start() exit_code = None while exit_code is None: # This timer needs to be separate from the cycle timer. The cycle timer # must be reset every time it is reported. The sleep timer makes sure # that each run of the loop takes a minimum amount of time. sleep_timer = phlsys_timer.Timer() sleep_timer.start() # refresh git snoops with abdt_logging.remote_io_read_event_context('refresh-git-snoop', ''): abdt_tryloop.critical_tryloop(url_watcher_wrapper.watcher.refresh, abdt_errident.GIT_SNOOP, '') with abdt_logging.remote_io_read_event_context('refresh-conduit', ''): conduit_manager.refresh_conduits() with abdt_logging.misc_operation_event_context( 'process-repos', '{} workers, {} repos'.format(max_workers, len(repo_list))): if max_workers > 1: for i, res in pool.cycle_results(overrun_secs=overrun_secs): repo = repo_list[i] repo.merge_from_worker(res) else: for r in repo_list: r() # important to do this before stopping arcyd and as soon as possible # after doing fetches url_watcher_wrapper.save() # report cycle stats report = { "cycle_time_secs": cycle_timer.restart(), "overrun_jobs": pool.num_active_jobs, } _LOGGER.debug("cycle-stats: {}".format(report)) if external_report_command: report_json = json.dumps(report) full_path = os.path.abspath(external_report_command) with abdt_logging.misc_operation_event_context( 'external-report-command', external_report_command): try: phlsys_subprocess.run(full_path, stdin=report_json) except phlsys_subprocess.CalledProcessError as e: _LOGGER.error( "External command: {} failed with exception: " "{}.".format(external_report_command, type(e).__name__)) _LOGGER.error( "VERBOSE MESSAGE: CycleReportJson:{}".format(e)) if is_no_loop: exit_code = abdi_processexitcodes.ExitCodes.ec_exit elif os.path.isfile(fs_accessor.layout.killfile): exit_code = abdi_processexitcodes.ExitCodes.ec_exit if phlsys_fs.read_text_file(fs_accessor.layout.killfile): _LOGGER.info("Killfile observed, reason given: {}".format( phlsys_fs.read_text_file(fs_accessor.layout.killfile))) else: _LOGGER.info("Killfile observed, arcyd will stop") os.remove(fs_accessor.layout.killfile) elif os.path.isfile(fs_accessor.layout.reloadfile): _LOGGER.info("Reloadfile observed, arcyd will reload") exit_code = abdi_processexitcodes.ExitCodes.ec_reload os.remove(fs_accessor.layout.reloadfile) # sleep to pad out the cycle secs_to_sleep = float(sleep_secs) - float(sleep_timer.duration) if secs_to_sleep > 0 and exit_code is None: with abdt_logging.misc_operation_event_context( 'sleep', secs_to_sleep): time.sleep(secs_to_sleep) # finish any jobs that overran for i, res in pool.finish_results(): repo = repo_list[i] repo.merge_from_worker(res) # important to do this before stopping arcyd and as soon as # possible after doing fetches url_watcher_wrapper.save() return exit_code