def __init__(self, input_file, task_name, threads=1, keep=False, package_mode=False, dependency_solver=False, skip_check_disk=False): """ Initializes JobDispatcher with user specified input and starts work. If `threads` is specified, JobDispatcher will dispatch jobs to be reproduced in each thread. Otherwise, each job will be reproduced sequentially. """ log.info('Initializing job dispatcher.') self.input_file = input_file self.thread_num = threads self.keep = keep self.package_mode = package_mode self.dependency_solver = dependency_solver # ----- self.config = Config(task_name) self.config.skip_check_disk = skip_check_disk self.utils = Utils(self.config) self.items_processed = Value('i', 0) self.reproduce_err = Value('i', 0) self.job_time_acc = 0 self.start_time = time.time() self.docker = DockerWrapper(self.utils) self.docker_storage_path = self.docker.setup_docker_storage_path() self.terminate = Value('i', 0) self.manager = Manager() self.lock = Lock() self.workspace_locks = self.manager.dict() self.cloned_repos = self.manager.dict() self.threads = {} self.error_reasons = {} self.alive_threads = 0 self.travis_images = None self.job_center = PairCenter(self.input_file, self.utils, self.package_mode)
def __init__(self, input_file, runs, task_name): log.info('Initializing ReproducedResultsAnalyzer.') self.input_file = input_file self.runs = runs self.config = Config(task_name) self.utils = Utils(self.config) self.analyzer = analyzer.Analyzer() # Initializing pair_center should not be in _pre_analyze because we want the pairs to maintain state between # analyzing each run. self.pair_center = PairCenter(input_file, self.utils) # The below attributes are initialized in _pre_analyze. self.start_time = None self.reproduced_logs = None self.reproduced_logs_analyzed = None self.error_count = None
class ReproducedResultsAnalyzer(object): def __init__(self, input_file, runs, task_name): log.info('Initializing ReproducedResultsAnalyzer.') self.input_file = input_file self.runs = runs self.config = Config(task_name) self.utils = Utils(self.config) self.analyzer = analyzer.Analyzer() # Initializing pair_center should not be in _pre_analyze because we want the pairs to maintain state between # analyzing each run. self.pair_center = PairCenter(input_file, self.utils) # The below attributes are initialized in _pre_analyze. self.start_time = None self.reproduced_logs = None self.reproduced_logs_analyzed = None self.error_count = None def run(self): for i in range(1, self.runs + 1): self._pre_analyze() self._analyze(i) self._post_analyze(i) self._show_reproducibility() self._write_output_json() log.info('Done!') def _pre_analyze(self): """ Reset state before analyzing the next run. """ self.start_time = time.time() self.reproduced_logs = {} self.reproduced_logs_analyzed = 0 self.error_count = 0 # Reset the match type flag before each run for r in self.pair_center.repos: for bp in self.pair_center.repos[r].buildpairs: bp.set_match_type.value = False def _analyze(self, run): """ Analyze a single run of reproduced results. For each job in a jobpair, check if the reproduced log exists in the task folder. If it does, then download the original Travis log. Finally, analyze and compare the two logs. """ for r in self.pair_center.repos: for bp in self.pair_center.repos[r].buildpairs: for jp in bp.jobpairs: for j in jp.jobs: try: analyzed_reproduced_log = analyze_and_compare(self, j, run) if analyzed_reproduced_log: self.reproduced_logs_analyzed += 1 except Exception as e: log.error('Encountered an error while analyzing and comparing {}: {}'.format(j.job_name, e)) self.error_count += 1 self.pair_center.update_buildpair_done_status() self.pair_center.assign_pair_match_types() self.pair_center.assign_pair_match_history(run) self.pair_center.assign_pair_patch_history(run) def _post_analyze(self, run): """ This function is called after analyzing each run. Print statistics like how many pairs matched and time elapsed and then visualize the match history after this run. """ log.info('Done analyzing run {}.'.format(run)) self._visualize_match_history() log.info('{} reproduced logs analyzed and {} errors in run {}.' .format(self.reproduced_logs_analyzed, self.error_count, run)) # Print a blank line to separate each run. log.info() mmm = self.utils.construct_mmm_count(self.pair_center) aaa = self.utils.construct_aaa_count(self.pair_center) log.debug('Match types in run {}: m1-m2-m3: {} a1-a2-a3: {}.'.format(run, mmm, aaa)) def _write_output_json(self): log.info('Writing output JSON annotated with match history.') pairs = read_json(self.input_file) # Write default attributes. for p in pairs: for jp in p['jobpairs']: jp['match_history'] = {} jp['failed_job']['match_history'] = {} jp['passed_job']['match_history'] = {} jp['failed_job']['orig_result'] = '' jp['passed_job']['orig_result'] = '' jp['failed_job']['mismatch_attrs'] = [] jp['passed_job']['mismatch_attrs'] = [] jp['failed_job']['pip_patch'] = False jp['passed_job']['pip_patch'] = False for p in pairs: repo = p['repo'] if repo not in self.pair_center.repos: continue # Try to find this build pair in pair center. for bp in self.pair_center.repos[repo].buildpairs: if p['failed_build']['build_id'] == bp.builds[0].build_id: # Found build pair in pair center. # Optional: Write buildpair match type. # This is not used since we switched to jobpair packaging. p['match'] = bp.match.value trigger_sha = p['failed_build']['head_sha'] # Similarly, for each job pair in build pair, try to find it in the pair center. for jp in p['jobpairs']: # For a build that has some jobs filtered and some jobs not filtered, # the job cannot be found in paircenter. if jp['is_filtered']: continue found_in_paircenter = False for jobpair in bp.jobpairs: if str(jobpair.jobs[0].job_id) == str(jp['failed_job']['job_id']): found_in_paircenter = True # Write jobpair match history, analyzed results, and mismatched attributes. jp['match_history'] = jobpair.match_history jp['failed_job']['match_history'] = jobpair.failed_job_match_history jp['passed_job']['match_history'] = jobpair.passed_job_match_history jp['failed_job']['orig_result'] = jobpair.jobs[0].orig_result jp['passed_job']['orig_result'] = jobpair.jobs[1].orig_result jp['failed_job']['mismatch_attrs'] = jobpair.jobs[0].mismatch_attrs jp['passed_job']['mismatch_attrs'] = jobpair.jobs[1].mismatch_attrs jp['failed_job']['pip_patch'] = jobpair.jobs[0].pip_patch jp['passed_job']['pip_patch'] = jobpair.jobs[1].pip_patch if not found_in_paircenter: # If not found in pair center, this jobpair was filtered out. # In this case, we still analyze the original log to get as many attributes as possible. for i in range(2): job_name = 'failed_job' if i == 0 else 'passed_job' job_id = jp[job_name]['job_id'] original_log_path = self.utils.get_orig_log_path(job_id) if not download_log(job_id, original_log_path): continue original_result = self.analyzer.analyze_single_log(original_log_path, job_id, trigger_sha, repo) if 'not_in_supported_language' in original_result: continue jp[job_name]['orig_result'] = original_result raise RuntimeError('Unexpected state: Jobpair not found in pair center. Exiting.') os.makedirs(self.config.result_json_dir, exist_ok=True) filename = self.config.task + '.json' filepath = os.path.join(self.config.result_json_dir, filename) write_json(filepath, pairs) def _get_all_jobpairs_and_all_runs(self) -> Tuple[List[JobPair], List[str]]: all_jobpairs = [] for r in self.pair_center.repos: for bp in self.pair_center.repos[r].buildpairs: for jp in bp.jobpairs: all_jobpairs.append(jp) all_runs = [] for jp in all_jobpairs: for run in jp.match_history: all_runs.append(run) all_runs = list(set(all_runs)) all_runs.sort() return all_jobpairs, all_runs def _visualize_match_history(self): log.info('Visualizing match history:') log.info('N means no reproduced log exists. (An error occured in reproducer while reproducing the job.)') all_jobpairs, all_runs = self._get_all_jobpairs_and_all_runs() for jp in all_jobpairs: log.info(jp.full_name) match_histories = [ (jp.match_history, 'Job pair'), (jp.failed_job_match_history, 'Failed job'), (jp.passed_job_match_history, 'Passed job'), ] for match_history, history_name in match_histories: # Task name is run number 1-5 mh = [str(match_history.get(run, 'N')) for run in all_runs] if mh: full_history_name = '{} match history'.format(history_name) log.info('{:>24}:'.format(full_history_name), ' -> '.join(mh)) else: log.info('No match history. (This jobpair is not reproduced.)') def _show_reproducibility(self): log.info('Visualizing reproducibility:') all_jobpairs, all_runs = self._get_all_jobpairs_and_all_runs() if not all_jobpairs: log.info('Nothing to visualize since no jobs were run.') else: full_name_max_length = max([len(jp.full_name) for jp in all_jobpairs]) for jp in all_jobpairs: mh = [] for run in all_runs: run_result = jp.match_history.get(run) # run_result could be 'N', 0, or 1 if run_result != 1: mh.append(0) else: mh.append(run_result) # No reproducing runs were successful if all(v == 0 for v in mh): reproducibility = 'Unreproducible' # match history is all 1s, all runs reproducible elif all(mh): reproducibility = 'Reproducible' else: reproducibility = 'Flaky' log.info('{full_name: >{width}} job pair reproducibility: {result}' .format(width=full_name_max_length, full_name=jp.full_name, result=reproducibility)) # Print a blank separator line. log.info()
class JobDispatcher(object): """ JobDispatcher controls the entire reproducing workflow by dispatching tasks to a pool of worker threads. Subclasses determine the specific task. """ def __init__(self, input_file, task_name, threads=1, keep=False, package_mode=False, dependency_solver=False, skip_check_disk=False): """ Initializes JobDispatcher with user specified input and starts work. If `threads` is specified, JobDispatcher will dispatch jobs to be reproduced in each thread. Otherwise, each job will be reproduced sequentially. """ log.info('Initializing job dispatcher.') self.input_file = input_file self.thread_num = threads self.keep = keep self.package_mode = package_mode self.dependency_solver = dependency_solver # ----- self.config = Config(task_name) self.config.skip_check_disk = skip_check_disk self.utils = Utils(self.config) self.items_processed = Value('i', 0) self.reproduce_err = Value('i', 0) self.job_time_acc = 0 self.start_time = time.time() self.docker = DockerWrapper(self.utils) self.docker_storage_path = self.docker.setup_docker_storage_path() self.terminate = Value('i', 0) self.manager = Manager() self.lock = Lock() self.workspace_locks = self.manager.dict() self.cloned_repos = self.manager.dict() self.threads = {} self.error_reasons = {} self.alive_threads = 0 self.travis_images = None self.job_center = PairCenter(self.input_file, self.utils, self.package_mode) def run(self): """ The entry point for reproducing jobs. Calls post_run() after all items are processed. Subclasses must not override this method. """ self._base_pre_run() self.pre_run() try: while self.job_center.get_num_remaining_items(self.package_mode): log.info('Ready to initialize threads.') if not self.utils.check_disk_space_available(): self.utils.clean_disk_usage(self) if not self.utils.check_disk_space_available(): msg = 'Still inadequate disk space after removing temporary Reproducer files. Exiting.' log.error(msg) raise OSError(msg) if not self.utils.check_docker_disk_space_available(self.docker_storage_path): self.utils.clean_docker_disk_usage(self.docker) if not self.utils.check_docker_disk_space_available(self.docker_storage_path): msg = 'Still inadequate disk space after removing inactive Docker Images. Exiting.' log.error(msg) raise OSError(msg) self._init_threads() except KeyboardInterrupt: log.info('Caught KeyboardInterrupt. Cleaning up before terminating.') self.terminate.value = 1 else: self.post_run() log.info('Done!') finally: log.info(self.progress_str()) def _spawn(self, tid): t = Process(target=self._thread_main, args=(tid,)) thread = {'process': t, 'exit_reason': ''} self.threads[tid] = thread t.start() def _thread_watcher(self): """ Repeatedly check if process is alive. """ log.info('Initialized', len(self.threads), 'threads.') count = 0 old_str = self.progress_str() while True: time.sleep(3) count += 1 if count == 6: count = 0 self.update_local_files() # Update local files every 3*6 seconds. if self.terminate.value: log.info(colored('Waiting for threads...', 'blue')) # elif not self.utils.check_disk_space_available(): # log.warning(colored('Not enough disk space. Joining threads...', 'yellow')) # self.terminate.value = 1 alive_threads = 0 for tid in self.threads: p = self.threads[tid]['process'] if p.is_alive(): alive_threads += 1 else: if p.exitcode is None: # Not finished and not running. # Do error handling and restarting here assigning the new process to processes[n]. self.threads[tid]['exit_reason'] = 'not finished and not running' self._spawn(tid) elif p.exitcode != 0: self.threads[tid]['exit_reason'] = 'errored or terminated' # Handle this either by restarting or deleting the entry so it is removed from list. self._spawn(tid) else: self.threads[tid]['exit_reason'] = 'finished' self.terminate.value = 1 p.join() # Allow cleanup. self.alive_threads = alive_threads if not alive_threads: break curr_str = self.progress_str() if curr_str != old_str: old_str = curr_str if curr_str: log.info(curr_str) def _init_threads(self): """ Initialize min(num_threads, number of jobs to reproduce) threads. """ self.lock = Lock() self.workspace_locks = self.manager.dict() self.cloned_repos = self.manager.dict() self.threads = {} self.terminate.value = 0 num_remaining_items = self.job_center.get_num_remaining_items(self.package_mode) if not num_remaining_items: log.info('No remaining items. Exiting.') return 0 self.thread_num = min(self.thread_num, num_remaining_items) self.job_center.init_queues_for_threads(self.thread_num, self.package_mode) # Begin initializing threads. for tid in range(self.thread_num): self._spawn(tid) self._thread_watcher() def _thread_main(self, tid): """ This is the target function for each thread. It receives the work load (a queue) for a given thread from job_center.thread_workloads. For each item, it calls self.process_item() to run. :param tid: Thread ID """ workload = self.job_center.thread_workloads[tid] while not workload.empty(): # Break out of the loop if the terminate flag is set. if self.terminate.value: return 0 item = workload.get() # Intentionally catch ReproduceError but allow KeyboardInterrupt to propagate. try: self.process_item(item, tid) except ReproduceError as e: log.info(colored('[THREAD {}] {} {}'.format(tid, item, e), 'red')) self.reproduce_err.value += 1 self.record_error_reason(item, str(e)) # Optionally handle failed reproducing here. log.info('[THREAD {}] Workload complete. Exiting thread.'.format(tid)) def _base_pre_run(self): if self.job_center.total_jobs < 1: log.info('No jobs to reproduce. Exiting.') return # Set up the required directories. os.makedirs(self.config.orig_logs_dir, exist_ok=True) os.makedirs(self.config.output_dir, exist_ok=True) self.utils.directories_setup() if os.path.isfile(self.utils.get_error_reason_file_path()): self.error_reasons = read_json(self.utils.get_error_reason_file_path()) self.error_reasons = self.manager.dict(self.error_reasons) # Check if commands to Travis work. if not Utils.is_travis_installed(): log.error(colored('Commands to Travis are failing unexpectedly. Try restarting your shell and ensure your ' 'environment is provisioned correctly. Also try restarting your shell.', 'red')) raise Exception('Unexpected state: Commands to Travis are failing unexpectedly.') # Read travis_images.json. try: self.travis_images = read_json(self.config.travis_images_json) except FileNotFoundError: log.error(colored(self.config.travis_images_json + ' not found. Exiting.', 'red')) raise def pre_run(self): """ Called before any items have been processed. Overriding is optional. Defaults to no-op. """ pass def progress_str(self) -> Optional[str]: """ Subclasses should return a string, which will be logged, representing progress at the time the method is called. Returns None by default, which indicates to the caller that logging the progress should be skipped. Overriding is optional. :return: A string representing the dispatcher's progress or None to skip logging the progress. """ return None def update_local_files(self): """ Called periodically to allow the dispatcher to update local files as needed. Overriding is optional. Defaults to no-op. """ pass def process_item(self, item, tid): """ Subclasses must override this method to process each item in the workload. :param item: The item to process. :param tid: The thread ID tasked with processing the item. """ raise NotImplementedError def record_error_reason(self, item, message): """ Overriding is optional. Defaults to no-op. :param item: The item for which to record an error message. :param message: The error message to record. """ pass def post_run(self): """ Called after all items have been processed. Overriding is optional. Defaults to no-op. """ pass