def end(self): """This pants run is over, so stop tracking it. Note: If end() has been called once, subsequent calls are no-ops. """ if self._background_worker_pool: if self._aborted: self.log(Report.INFO, "Aborting background workers.") self._background_worker_pool.abort() else: self.log(Report.INFO, "Waiting for background workers to finish.") self._background_worker_pool.shutdown() self.end_workunit(self._background_root_workunit) SubprocPool.shutdown(self._aborted) # Run a dummy work unit to write out one last timestamp. with self.new_workunit("complete"): pass self.end_workunit(self._main_root_workunit) outcome = self._main_root_workunit.outcome() if self._background_root_workunit: outcome = min(outcome, self._background_root_workunit.outcome()) outcome_str = WorkUnit.outcome_string(outcome) log_level = RunTracker._log_levels[outcome] self.log(log_level, outcome_str) if self.run_info.get_info('outcome') is None: # If the goal is clean-all then the run info dir no longer exists, so ignore that error. self.run_info.add_info('outcome', outcome_str, ignore_errors=True) self.report.close() self.store_stats()
def __init__(self, *args, **kwargs): """ :API: public """ super(RunTracker, self).__init__(*args, **kwargs) self._run_timestamp = time.time() self._cmd_line = ' '.join(['pants'] + sys.argv[1:]) # Initialized in `initialize()`. self.run_info_dir = None self.run_info = None self.cumulative_timings = None self.self_timings = None self.artifact_cache_stats = None self.pantsd_stats = None # Initialized in `start()`. self.report = None self._main_root_workunit = None # A lock to ensure that adding to stats at the end of a workunit # operates thread-safely. self._stats_lock = threading.Lock() # Log of success/failure/aborted for each workunit. self.outcomes = {} # Number of threads for foreground work. self._num_foreground_workers = self.get_options().num_foreground_workers # Number of threads for background work. self._num_background_workers = self.get_options().num_background_workers # self._threadlocal.current_workunit contains the current workunit for the calling thread. # Note that multiple threads may share a name (e.g., all the threads in a pool). self._threadlocal = threading.local() # For background work. Created lazily if needed. self._background_worker_pool = None self._background_root_workunit = None # Trigger subproc pool init while our memory image is still clean (see SubprocPool docstring). SubprocPool.set_num_processes(self._num_foreground_workers) SubprocPool.foreground() self._aborted = False # Data will be organized first by target and then scope. # Eg: # { # 'target/address:name': { # 'running_scope': { # 'run_duration': 356.09 # }, # 'GLOBAL': { # 'target_type': 'pants.test' # } # } # } self._target_to_data = {}
def shutdown_worker_pool(self): """Shuts down the SubprocPool. N.B. This exists only for internal use and to afford for fork()-safe operation in pantsd. :API: public """ SubprocPool.shutdown(self._aborted)
def __init__(self, *args, **kwargs): super(RunTracker, self).__init__(*args, **kwargs) run_timestamp = time.time() cmd_line = ' '.join(['pants'] + sys.argv[1:]) # run_id is safe for use in paths. millis = int((run_timestamp * 1000) % 1000) run_id = 'pants_run_{}_{}_{}'.format( time.strftime('%Y_%m_%d_%H_%M_%S', time.localtime(run_timestamp)), millis, uuid.uuid4().hex) info_dir = os.path.join(self.get_options().pants_workdir, self.options_scope) self.run_info_dir = os.path.join(info_dir, run_id) self.run_info = RunInfo(os.path.join(self.run_info_dir, 'info')) self.run_info.add_basic_info(run_id, run_timestamp) self.run_info.add_info('cmd_line', cmd_line) # Create a 'latest' symlink, after we add_infos, so we're guaranteed that the file exists. link_to_latest = os.path.join(os.path.dirname(self.run_info_dir), 'latest') relative_symlink(self.run_info_dir, link_to_latest) # Time spent in a workunit, including its children. self.cumulative_timings = AggregatedTimings(os.path.join(self.run_info_dir, 'cumulative_timings')) # Time spent in a workunit, not including its children. self.self_timings = AggregatedTimings(os.path.join(self.run_info_dir, 'self_timings')) # Hit/miss stats for the artifact cache. self.artifact_cache_stats = \ ArtifactCacheStats(os.path.join(self.run_info_dir, 'artifact_cache_stats')) # Number of threads for foreground work. self._num_foreground_workers = self.get_options().num_foreground_workers # Number of threads for background work. self._num_background_workers = self.get_options().num_background_workers # We report to this Report. self.report = None # self._threadlocal.current_workunit contains the current workunit for the calling thread. # Note that multiple threads may share a name (e.g., all the threads in a pool). self._threadlocal = threading.local() # For main thread work. Created on start(). self._main_root_workunit = None # For background work. Created lazily if needed. self._background_worker_pool = None self._background_root_workunit = None # Trigger subproc pool init while our memory image is still clean (see SubprocPool docstring). SubprocPool.set_num_processes(self._num_foreground_workers) SubprocPool.foreground() self._aborted = False
def end(self): """This pants run is over, so stop tracking it. Note: If end() has been called once, subsequent calls are no-ops. """ if self._background_worker_pool: if self._aborted: self.log(Report.INFO, "Aborting background workers.") self._background_worker_pool.abort() else: self.log(Report.INFO, "Waiting for background workers to finish.") self._background_worker_pool.shutdown() self.report.end_workunit(self._background_root_workunit) self._background_root_workunit.end() if self._foreground_worker_pool: if self._aborted: self.log(Report.INFO, "Aborting foreground workers.") self._foreground_worker_pool.abort() else: self.log(Report.INFO, "Waiting for foreground workers to finish.") self._foreground_worker_pool.shutdown() SubprocPool.shutdown(self._aborted) self.report.end_workunit(self._main_root_workunit) self._main_root_workunit.end() outcome = self._main_root_workunit.outcome() if self._background_root_workunit: outcome = min(outcome, self._background_root_workunit.outcome()) outcome_str = WorkUnit.outcome_string(outcome) log_level = WorkUnit.choose_for_outcome(outcome, Report.ERROR, Report.ERROR, Report.WARN, Report.INFO, Report.INFO) self.log(log_level, outcome_str) if self.run_info.get_info('outcome') is None: try: self.run_info.add_info('outcome', outcome_str) except IOError: pass # If the goal is clean-all then the run info dir no longer exists... self.report.close() self.upload_stats()
def subproc_map(self, f, items): """Map function `f` over `items` in subprocesses and return the result. :param f: A multiproc-friendly (importable) work function. :param args: A iterable of pickleable arguments to f. """ try: # Pool.map (and async_map().get() w/o timeout) can miss SIGINT. # See: http://stackoverflow.com/a/1408476, http://bugs.python.org/issue8844 # Instead, we map_async(...), wait *with a timeout* until ready, then .get() # NB: in 2.x, wait() with timeout wakes up often to check, burning CPU. Oh well. res = SubprocPool.foreground().map_async(f, items) while not res.ready(): res.wait(60) # Repeatedly wait for up to a minute. if not res.ready(): self.log.debug('subproc_map result still not ready...') return res.get() except KeyboardInterrupt: SubprocPool.shutdown(True) raise
def exec_on_subproc(self, f, args): """Send work to a subprocess and block on it. This can be used by existing background Work in a ThreadPool to sidestep the GIL: The Thread calls this method and still blocks until the work is complete, so existing reporting and accounting is unchanged, but the actual work is executed in a subprocess, avoiding lock contention. :param f: A multiproc-friendly (importable) work function. :param args: Multiproc-friendly (pickleable) arguments to f. """ return SubprocPool.background().apply(f, args)
def __init__(self, info_dir, stats_upload_url=None, stats_upload_timeout=2, num_foreground_workers=8, num_background_workers=8): self.run_timestamp = time.time() # A double, so we get subsecond precision for ids. cmd_line = ' '.join(['./pants'] + sys.argv[1:]) # run_id is safe for use in paths. millis = (self.run_timestamp * 1000) % 1000 run_id = 'pants_run_%s_%d' % \ (time.strftime('%Y_%m_%d_%H_%M_%S', time.localtime(self.run_timestamp)), millis) self.run_info_dir = os.path.join(info_dir, run_id) self.run_info = RunInfo(os.path.join(self.run_info_dir, 'info')) self.run_info.add_basic_info(run_id, self.run_timestamp) self.run_info.add_info('cmd_line', cmd_line) self.stats_url = stats_upload_url self.stats_timeout = stats_upload_timeout # Create a 'latest' symlink, after we add_infos, so we're guaranteed that the file exists. link_to_latest = os.path.join(os.path.dirname(self.run_info_dir), 'latest') try: if os.path.lexists(link_to_latest): os.unlink(link_to_latest) os.symlink(self.run_info_dir, link_to_latest) except OSError as e: # Another run may beat us to deletion or creation. if not (e.errno == errno.EEXIST or e.errno == errno.ENOENT): raise # Time spent in a workunit, including its children. self.cumulative_timings = AggregatedTimings(os.path.join(self.run_info_dir, 'cumulative_timings')) # Time spent in a workunit, not including its children. self.self_timings = AggregatedTimings(os.path.join(self.run_info_dir, 'self_timings')) # Hit/miss stats for the artifact cache. self.artifact_cache_stats = \ ArtifactCacheStats(os.path.join(self.run_info_dir, 'artifact_cache_stats')) # Number of threads for foreground work. self._num_foreground_workers = num_foreground_workers # Number of threads for background work. self._num_background_workers = num_background_workers # We report to this Report. self.report = None # self._threadlocal.current_workunit contains the current workunit for the calling thread. # Note that multiple threads may share a name (e.g., all the threads in a pool). self._threadlocal = threading.local() # For main thread work. Created on start(). self._main_root_workunit = None # For concurrent foreground work. Created lazily if needed. # Associated with the main thread's root workunit. self._foreground_worker_pool = None # For background work. Created lazily if needed. self._background_worker_pool = None self._background_root_workunit = None # Trigger subproc pool init while our memory image is still clean (see SubprocPool docstring) SubprocPool.foreground() self._aborted = False
def shutdown_worker_pool(self): """Shuts down the SubprocPool. N.B. This exists only for internal use and to afford for fork()-safe operation in pantsd. """ SubprocPool.shutdown(self._aborted)
def __init__(self, *args, **kwargs): """ :API: public """ super(RunTracker, self).__init__(*args, **kwargs) self._run_timestamp = time.time() self._cmd_line = ' '.join(['pants'] + sys.argv[1:]) self._sorted_goal_infos = tuple() # Initialized in `initialize()`. self.run_info_dir = None self.run_info = None self.cumulative_timings = None self.self_timings = None self.artifact_cache_stats = None self.pantsd_stats = None # Initialized in `start()`. self.report = None self._main_root_workunit = None self._all_options = None # A lock to ensure that adding to stats at the end of a workunit # operates thread-safely. self._stats_lock = threading.Lock() # Log of success/failure/aborted for each workunit. self.outcomes = {} # Number of threads for foreground work. self._num_foreground_workers = self.get_options( ).num_foreground_workers # Number of threads for background work. self._num_background_workers = self.get_options( ).num_background_workers # self._threadlocal.current_workunit contains the current workunit for the calling thread. # Note that multiple threads may share a name (e.g., all the threads in a pool). self._threadlocal = threading.local() # A logger facade that logs into this RunTracker. self._logger = RunTrackerLogger(self) # For background work. Created lazily if needed. self._background_worker_pool = None self._background_root_workunit = None # Trigger subproc pool init while our memory image is still clean (see SubprocPool docstring). SubprocPool.set_num_processes(self._num_foreground_workers) SubprocPool.foreground() self._aborted = False # Data will be organized first by target and then scope. # Eg: # { # 'target/address:name': { # 'running_scope': { # 'run_duration': 356.09 # }, # 'GLOBAL': { # 'target_type': 'pants.test' # } # } # } self._target_to_data = {}