def get_runnables(granularity): past_failures_data = test_scheduling.get_past_failures(granularity) push_num = past_failures_data["push_num"] all_runnables = past_failures_data["all_runnables"] commit_tests = [] for data in test_scheduling.generate_data(past_failures_data, commit_data, push_num, all_runnables, [], []): if granularity == "label" and not data["name"].startswith("test-"): continue commit_test = commit_data.copy() commit_test["test_job"] = data commit_tests.append(commit_test) probs = MODEL_CACHE.get(f"test{granularity}select").classify( commit_tests, probabilities=True) selected_indexes = np.argwhere( probs[:, 1] > test_selection_threshold)[:, 0] return { commit_tests[i]["test_job"]["name"]: math.floor(probs[i, 1] * 100) / 100 for i in selected_indexes }
def select_tests(self, commits, confidence=0.3, push_num=None): commit_data = commit_features.merge_commits(commits) past_failures_data = test_scheduling.get_past_failures( self.granularity) if push_num is None: push_num = past_failures_data["push_num"] + 1 all_runnables = past_failures_data["all_runnables"] if self.granularity == "label": all_runnables = tuple(r for r in all_runnables if r.startswith("test-")) commit_tests = [] for data in test_scheduling.generate_data(past_failures_data, commit_data, push_num, all_runnables, tuple(), tuple()): commit_test = commit_data.copy() commit_test["test_job"] = data commit_tests.append(commit_test) probs = self.classify(commit_tests, probabilities=True) selected_indexes = np.argwhere(probs[:, 1] >= confidence)[:, 0] return { commit_tests[i]["test_job"]["name"]: math.floor(probs[i, 1] * 100) / 100 for i in selected_indexes }
def classify(self, diff_id): self.update_commit_db() with hglib.open(self.repo_dir) as hg: self.apply_phab(hg, diff_id) patch_rev = hg.log(revrange="not public()")[0].node # Analyze patch. commits = repository.download_commits( self.repo_dir, rev_start=patch_rev.decode("utf-8"), save=False) # We use "clean" (or "dirty") commits as the background dataset for feature importance. # This way, we can see the features which are most important in differentiating # the current commit from the "clean" (or "dirty") commits. if not self.use_test_history: probs, importance = self.model.classify( commits[-1], probabilities=True, importances=True, background_dataset=lambda v: self.X[self.y != v], importance_cutoff=0.05, ) self.generate_feature_importance_data(probs, importance) with open("probs.json", "w") as f: json.dump(probs[0].tolist(), f) if self.model_name == "regressor" and self.method_defect_predictor_dir: self.classify_methods() else: # TODO: Should we consider a merge of the commits of the stack? commit = commits[-1] push_num = self.past_failures_data["push_num"] # XXX: Consider using mozilla-central built-in rules to filter some of these out, e.g. SCHEDULES. # XXX: Consider using the runnable jobs artifact from the Gecko Decision task. all_tasks = self.past_failures_data["all_tasks"] selected_tasks = [] # TODO: Classify multiple commit/test at the same time. for data in test_scheduling.generate_data(self.past_failures_data, commit, push_num, all_tasks, [], []): if not data["name"].startswith("test-"): continue commit["test_job"] = data probs = self.model.classify(commit, probabilities=True) if probs[0][1] > 0.9: selected_tasks.append(data["name"]) with open("selected_tasks", "w") as f: f.writelines(f"{selected_task}\n" for selected_task in selected_tasks)
def select_tests( self, commits: Sequence[repository.CommitDict], confidence: float = 0.5, push_num: Optional[int] = None, ) -> Dict[str, float]: commit_data = commit_features.merge_commits(commits) past_failures_data = test_scheduling.get_past_failures(self.granularity, True) if push_num is None: push_num = past_failures_data["push_num"] + 1 all_runnables = past_failures_data["all_runnables"] commit_tests = [] for data in test_scheduling.generate_data( self.granularity, past_failures_data, commit_data, push_num, all_runnables, tuple(), tuple(), ): commit_test = commit_data.copy() commit_test["test_job"] = data commit_tests.append(commit_test) probs = self.classify(commit_tests, probabilities=True) selected_indexes = np.argwhere(probs[:, 1] >= confidence)[:, 0] return { commit_tests[i]["test_job"]["name"]: math.floor(probs[i, 1] * 100) / 100 for i in selected_indexes }
def test_generate_data(granularity): past_failures = test_scheduling.get_past_failures(granularity) commits = [ { "types": ["C/C++"], "files": ["dom/file1.cpp"], "directories": ["dom"], "components": ["DOM"], }, { "types": ["C/C++"], "files": ["dom/file1.cpp", "dom/file2.cpp"], "directories": ["dom"], "components": ["DOM"], }, { "types": ["C/C++"], "files": ["layout/file.cpp"], "directories": ["layout"], "components": ["Layout"], }, { "types": ["C/C++"], "files": ["layout/file.cpp"], "directories": ["layout"], "components": ["Layout"], }, { "types": ["JavaScript", "C/C++"], "files": ["dom/file1.cpp", "dom/file1.js"], "directories": ["dom"], "components": ["DOM"], }, ] data = list( test_scheduling.generate_data(past_failures, commits[0], 1, ["runnable1", "runnable2"], [], [])) assert len(data) == 2 assert data[0] == { "failures": 0, "failures_in_components": 0, "failures_in_directories": 0, "failures_in_files": 0, "failures_in_types": 0, "failures_past_1400_pushes": 0, "failures_past_1400_pushes_in_components": 0, "failures_past_1400_pushes_in_directories": 0, "failures_past_1400_pushes_in_files": 0, "failures_past_1400_pushes_in_types": 0, "failures_past_2800_pushes": 0, "failures_past_2800_pushes_in_components": 0, "failures_past_2800_pushes_in_directories": 0, "failures_past_2800_pushes_in_files": 0, "failures_past_2800_pushes_in_types": 0, "failures_past_700_pushes": 0, "failures_past_700_pushes_in_components": 0, "failures_past_700_pushes_in_directories": 0, "failures_past_700_pushes_in_files": 0, "failures_past_700_pushes_in_types": 0, "is_likely_regression": False, "is_possible_regression": False, "name": "runnable1", "touched_together_directories": 0, "touched_together_files": 0, } assert data[1] == { "failures": 0, "failures_in_components": 0, "failures_in_directories": 0, "failures_in_files": 0, "failures_in_types": 0, "failures_past_1400_pushes": 0, "failures_past_1400_pushes_in_components": 0, "failures_past_1400_pushes_in_directories": 0, "failures_past_1400_pushes_in_files": 0, "failures_past_1400_pushes_in_types": 0, "failures_past_2800_pushes": 0, "failures_past_2800_pushes_in_components": 0, "failures_past_2800_pushes_in_directories": 0, "failures_past_2800_pushes_in_files": 0, "failures_past_2800_pushes_in_types": 0, "failures_past_700_pushes": 0, "failures_past_700_pushes_in_components": 0, "failures_past_700_pushes_in_directories": 0, "failures_past_700_pushes_in_files": 0, "failures_past_700_pushes_in_types": 0, "is_likely_regression": False, "is_possible_regression": False, "name": "runnable2", "touched_together_directories": 0, "touched_together_files": 0, } data = list( test_scheduling.generate_data(past_failures, commits[1], 2, ["runnable1", "runnable2"], ["runnable1"], [])) assert len(data) == 2 assert data[0] == { "failures": 0, "failures_in_components": 0, "failures_in_directories": 0, "failures_in_files": 0, "failures_in_types": 0, "failures_past_1400_pushes": 0, "failures_past_1400_pushes_in_components": 0, "failures_past_1400_pushes_in_directories": 0, "failures_past_1400_pushes_in_files": 0, "failures_past_1400_pushes_in_types": 0, "failures_past_2800_pushes": 0, "failures_past_2800_pushes_in_components": 0, "failures_past_2800_pushes_in_directories": 0, "failures_past_2800_pushes_in_files": 0, "failures_past_2800_pushes_in_types": 0, "failures_past_700_pushes": 0, "failures_past_700_pushes_in_components": 0, "failures_past_700_pushes_in_directories": 0, "failures_past_700_pushes_in_files": 0, "failures_past_700_pushes_in_types": 0, "is_likely_regression": False, "is_possible_regression": True, "name": "runnable1", "touched_together_directories": 0, "touched_together_files": 0, } assert data[1] == { "failures": 0, "failures_in_components": 0, "failures_in_directories": 0, "failures_in_files": 0, "failures_in_types": 0, "failures_past_1400_pushes": 0, "failures_past_1400_pushes_in_components": 0, "failures_past_1400_pushes_in_directories": 0, "failures_past_1400_pushes_in_files": 0, "failures_past_1400_pushes_in_types": 0, "failures_past_2800_pushes": 0, "failures_past_2800_pushes_in_components": 0, "failures_past_2800_pushes_in_directories": 0, "failures_past_2800_pushes_in_files": 0, "failures_past_2800_pushes_in_types": 0, "failures_past_700_pushes": 0, "failures_past_700_pushes_in_components": 0, "failures_past_700_pushes_in_directories": 0, "failures_past_700_pushes_in_files": 0, "failures_past_700_pushes_in_types": 0, "is_likely_regression": False, "is_possible_regression": False, "name": "runnable2", "touched_together_directories": 0, "touched_together_files": 0, } data = list( test_scheduling.generate_data(past_failures, commits[2], 3, ["runnable1", "runnable2"], [], ["runnable2"])) assert len(data) == 2 assert data[0] == { "failures": 1, "failures_in_components": 0, "failures_in_directories": 0, "failures_in_files": 0, "failures_in_types": 1, "failures_past_1400_pushes": 1, "failures_past_1400_pushes_in_components": 0, "failures_past_1400_pushes_in_directories": 0, "failures_past_1400_pushes_in_files": 0, "failures_past_1400_pushes_in_types": 1, "failures_past_2800_pushes": 1, "failures_past_2800_pushes_in_components": 0, "failures_past_2800_pushes_in_directories": 0, "failures_past_2800_pushes_in_files": 0, "failures_past_2800_pushes_in_types": 1, "failures_past_700_pushes": 1, "failures_past_700_pushes_in_components": 0, "failures_past_700_pushes_in_directories": 0, "failures_past_700_pushes_in_files": 0, "failures_past_700_pushes_in_types": 1, "is_likely_regression": False, "is_possible_regression": False, "name": "runnable1", "touched_together_directories": 0, "touched_together_files": 0, } assert data[1] == { "failures": 0, "failures_in_components": 0, "failures_in_directories": 0, "failures_in_files": 0, "failures_in_types": 0, "failures_past_1400_pushes": 0, "failures_past_1400_pushes_in_components": 0, "failures_past_1400_pushes_in_directories": 0, "failures_past_1400_pushes_in_files": 0, "failures_past_1400_pushes_in_types": 0, "failures_past_2800_pushes": 0, "failures_past_2800_pushes_in_components": 0, "failures_past_2800_pushes_in_directories": 0, "failures_past_2800_pushes_in_files": 0, "failures_past_2800_pushes_in_types": 0, "failures_past_700_pushes": 0, "failures_past_700_pushes_in_components": 0, "failures_past_700_pushes_in_directories": 0, "failures_past_700_pushes_in_files": 0, "failures_past_700_pushes_in_types": 0, "is_likely_regression": True, "is_possible_regression": False, "name": "runnable2", "touched_together_directories": 0, "touched_together_files": 0, } data = list( test_scheduling.generate_data(past_failures, commits[3], 4, ["runnable1"], [], [])) assert len(data) == 1 assert data[0] == { "failures": 1, "failures_in_components": 0, "failures_in_directories": 0, "failures_in_files": 0, "failures_in_types": 1, "failures_past_1400_pushes": 1, "failures_past_1400_pushes_in_components": 0, "failures_past_1400_pushes_in_directories": 0, "failures_past_1400_pushes_in_files": 0, "failures_past_1400_pushes_in_types": 1, "failures_past_2800_pushes": 1, "failures_past_2800_pushes_in_components": 0, "failures_past_2800_pushes_in_directories": 0, "failures_past_2800_pushes_in_files": 0, "failures_past_2800_pushes_in_types": 1, "failures_past_700_pushes": 1, "failures_past_700_pushes_in_components": 0, "failures_past_700_pushes_in_directories": 0, "failures_past_700_pushes_in_files": 0, "failures_past_700_pushes_in_types": 1, "is_likely_regression": False, "is_possible_regression": False, "name": "runnable1", "touched_together_directories": 0, "touched_together_files": 0, } data = list( test_scheduling.generate_data( past_failures, commits[4], 1500, ["runnable1", "runnable2"], ["runnable1", "runnable2"], [], )) assert len(data) == 2 assert data[0] == { "failures": 1, "failures_in_components": 1, "failures_in_directories": 1, "failures_in_files": 1, "failures_in_types": 1, "failures_past_1400_pushes": 0, "failures_past_1400_pushes_in_components": 0, "failures_past_1400_pushes_in_directories": 0, "failures_past_1400_pushes_in_files": 0, "failures_past_1400_pushes_in_types": 0, "failures_past_2800_pushes": 1, "failures_past_2800_pushes_in_components": 1, "failures_past_2800_pushes_in_directories": 1, "failures_past_2800_pushes_in_files": 1, "failures_past_2800_pushes_in_types": 1, "failures_past_700_pushes": 0, "failures_past_700_pushes_in_components": 0, "failures_past_700_pushes_in_directories": 0, "failures_past_700_pushes_in_files": 0, "failures_past_700_pushes_in_types": 0, "is_likely_regression": False, "is_possible_regression": True, "name": "runnable1", "touched_together_directories": 0, "touched_together_files": 0, } assert data[1] == { "failures": 1, "failures_in_components": 0, "failures_in_directories": 0, "failures_in_files": 0, "failures_in_types": 1, "failures_past_1400_pushes": 0, "failures_past_1400_pushes_in_components": 0, "failures_past_1400_pushes_in_directories": 0, "failures_past_1400_pushes_in_files": 0, "failures_past_1400_pushes_in_types": 0, "failures_past_2800_pushes": 1, "failures_past_2800_pushes_in_components": 0, "failures_past_2800_pushes_in_directories": 0, "failures_past_2800_pushes_in_files": 0, "failures_past_2800_pushes_in_types": 1, "failures_past_700_pushes": 0, "failures_past_700_pushes_in_components": 0, "failures_past_700_pushes_in_directories": 0, "failures_past_700_pushes_in_files": 0, "failures_past_700_pushes_in_types": 0, "is_likely_regression": False, "is_possible_regression": True, "name": "runnable2", "touched_together_directories": 0, "touched_together_files": 0, } data = list( test_scheduling.generate_data( past_failures, commits[4], 2400, ["runnable1", "runnable2"], ["runnable1", "runnable2"], [], )) assert len(data) == 2 assert data[0] == { "failures": 2, "failures_in_components": 2, "failures_in_directories": 2, "failures_in_files": 3, "failures_in_types": 3, "failures_past_1400_pushes": 1, "failures_past_1400_pushes_in_components": 1, "failures_past_1400_pushes_in_directories": 1, "failures_past_1400_pushes_in_files": 2, "failures_past_1400_pushes_in_types": 2, "failures_past_2800_pushes": 2, "failures_past_2800_pushes_in_components": 2, "failures_past_2800_pushes_in_directories": 2, "failures_past_2800_pushes_in_files": 3, "failures_past_2800_pushes_in_types": 3, "failures_past_700_pushes": 0, "failures_past_700_pushes_in_components": 0, "failures_past_700_pushes_in_directories": 0, "failures_past_700_pushes_in_files": 0, "failures_past_700_pushes_in_types": 0, "is_likely_regression": False, "is_possible_regression": True, "name": "runnable1", "touched_together_directories": 0, "touched_together_files": 0, } assert data[1] == { "failures": 2, "failures_in_components": 1, "failures_in_directories": 1, "failures_in_files": 2, "failures_in_types": 3, "failures_past_1400_pushes": 1, "failures_past_1400_pushes_in_components": 1, "failures_past_1400_pushes_in_directories": 1, "failures_past_1400_pushes_in_files": 2, "failures_past_1400_pushes_in_types": 2, "failures_past_2800_pushes": 2, "failures_past_2800_pushes_in_components": 1, "failures_past_2800_pushes_in_directories": 1, "failures_past_2800_pushes_in_files": 2, "failures_past_2800_pushes_in_types": 3, "failures_past_700_pushes": 0, "failures_past_700_pushes_in_components": 0, "failures_past_700_pushes_in_directories": 0, "failures_past_700_pushes_in_files": 0, "failures_past_700_pushes_in_types": 0, "is_likely_regression": False, "is_possible_regression": True, "name": "runnable2", "touched_together_directories": 0, "touched_together_files": 0, }
def generate_all_data() -> Generator[Dict[str, Any], None, None]: past_failures = test_scheduling.get_past_failures( granularity, False) push_num = past_failures[ "push_num"] if "push_num" in past_failures else 0 commit_map = {} for commit_data in tqdm(repository.get_commits()): commit_map[commit_data["node"]] = commit_data # Store all runnables in the past_failures DB so it can be used in the evaluation phase. past_failures["all_runnables"] = all_runnables # XXX: Should we recreate the DB from scratch if the previous all_runnables are not the # same as the current ones? saved_nodes = set() skipped_no_commits = 0 skipped_too_big_commits = 0 skipped_no_runnables = 0 if granularity in ("group", "config_group"): update_touched_together_gen = test_scheduling.update_touched_together( ) next(update_touched_together_gen) for ( i, ( revisions, fix_revision, push_runnables, possible_regressions, likely_regressions, ), ) in enumerate(tqdm(push_data_iter(), total=push_data_count)): push_num += 1 # XXX: Some commits are skipped in the repository mining, e.g. merges and backouts. Maybe we should not skip them. commits = tuple( commit_map.pop(revision) for revision in revisions if revision in commit_map) if len(commits) == 0: skipped_no_commits += 1 continue # Skip wptsync commits, since they are not like normal pushes made by developers. if any(repository.is_wptsync(commit) for commit in commits): continue merged_commits = commit_features.merge_commits(commits) # XXX: For now, skip commits which are too large. # In the future we can either: # - Improve shelve perf and go back to consider all files; # - Consider only files which appear with a given frequency, like the "files" feature in commit_features; # - Keep a limit of number of files. if len(merged_commits["files"]) > 50: skipped_too_big_commits += 1 continue # If we considered all_runnables, we'd generate a huge amount of data. # We consider only the runnables which run in this push, and the possible and likely regressions # from this push. We can't consider all runnables because we can't be sure that a task that didn't # run on a push would have been successful. runnables_to_consider = list( set(push_runnables + possible_regressions + likely_regressions)) if len(runnables_to_consider) == 0: skipped_no_runnables += 1 continue # Sync DB every 250 pushes, so we cleanup the shelve cache (we'd run OOM otherwise!). if i % 250 == 0: past_failures.sync() pushdate = dateutil.parser.parse(merged_commits["pushdate"]) if granularity in ("group", "config_group"): update_touched_together_gen.send(commits[0]["node"]) result_data = [] for data in test_scheduling.generate_data( granularity, past_failures, merged_commits, push_num, runnables_to_consider, possible_regressions, likely_regressions, ): if pushdate > HISTORY_DATE_START: result_data.append(data) if pushdate > HISTORY_DATE_START: saved_nodes.add(i) yield { "revs": revisions, "data": result_data, } if granularity == "group": try: update_touched_together_gen.send(None) except StopIteration: pass logger.info(f"saved push data nodes: {len(saved_nodes)}") logger.info(f"skipped {skipped_no_commits} (no commits in our DB)") logger.info(f"skipped {skipped_too_big_commits} (too big commits)") logger.info( f"skipped {skipped_no_runnables} (no interesting runnables)") past_failures["push_num"] = push_num past_failures.close()
def test_generate_data(granularity: str) -> None: past_failures = test_scheduling.get_past_failures(granularity, False) commits = [ CommitDict({ "types": ["C/C++"], "files": ["dom/file1.cpp"], "directories": ["dom"], "components": ["DOM"], }), CommitDict({ "types": ["C/C++"], "files": ["dom/file1.cpp", "dom/file2.cpp"], "directories": ["dom"], "components": ["DOM"], }), CommitDict({ "types": ["C/C++"], "files": ["layout/file.cpp"], "directories": ["layout"], "components": ["Layout"], }), CommitDict({ "types": ["C/C++"], "files": ["layout/file.cpp"], "directories": ["layout"], "components": ["Layout"], }), CommitDict({ "types": ["JavaScript", "C/C++"], "files": ["dom/file1.cpp", "dom/file1.js"], "directories": ["dom"], "components": ["DOM"], }), ] data = list( test_scheduling.generate_data( granularity, past_failures, commits[0], 1, ["runnable1", "runnable2"], [], [], )) assert len(data) == 2 obj = { "failures": 0, "failures_in_components": 0, "failures_in_directories": 0, "failures_in_files": 0, "failures_in_types": 0, "failures_past_1400_pushes": 0, "failures_past_1400_pushes_in_components": 0, "failures_past_1400_pushes_in_directories": 0, "failures_past_1400_pushes_in_files": 0, "failures_past_1400_pushes_in_types": 0, "failures_past_2800_pushes": 0, "failures_past_2800_pushes_in_components": 0, "failures_past_2800_pushes_in_directories": 0, "failures_past_2800_pushes_in_files": 0, "failures_past_2800_pushes_in_types": 0, "failures_past_700_pushes": 0, "failures_past_700_pushes_in_components": 0, "failures_past_700_pushes_in_directories": 0, "failures_past_700_pushes_in_files": 0, "failures_past_700_pushes_in_types": 0, "is_likely_regression": False, "is_possible_regression": False, "name": "runnable1", } if granularity == "group": obj["touched_together_directories"] = 0 obj["touched_together_files"] = 0 assert data[0] == obj obj = { "failures": 0, "failures_in_components": 0, "failures_in_directories": 0, "failures_in_files": 0, "failures_in_types": 0, "failures_past_1400_pushes": 0, "failures_past_1400_pushes_in_components": 0, "failures_past_1400_pushes_in_directories": 0, "failures_past_1400_pushes_in_files": 0, "failures_past_1400_pushes_in_types": 0, "failures_past_2800_pushes": 0, "failures_past_2800_pushes_in_components": 0, "failures_past_2800_pushes_in_directories": 0, "failures_past_2800_pushes_in_files": 0, "failures_past_2800_pushes_in_types": 0, "failures_past_700_pushes": 0, "failures_past_700_pushes_in_components": 0, "failures_past_700_pushes_in_directories": 0, "failures_past_700_pushes_in_files": 0, "failures_past_700_pushes_in_types": 0, "is_likely_regression": False, "is_possible_regression": False, "name": "runnable2", } if granularity == "group": obj["touched_together_directories"] = 0 obj["touched_together_files"] = 0 assert data[1] == obj data = list( test_scheduling.generate_data( granularity, past_failures, commits[1], 2, ["runnable1", "runnable2"], ["runnable1"], [], )) assert len(data) == 2 obj = { "failures": 0, "failures_in_components": 0, "failures_in_directories": 0, "failures_in_files": 0, "failures_in_types": 0, "failures_past_1400_pushes": 0, "failures_past_1400_pushes_in_components": 0, "failures_past_1400_pushes_in_directories": 0, "failures_past_1400_pushes_in_files": 0, "failures_past_1400_pushes_in_types": 0, "failures_past_2800_pushes": 0, "failures_past_2800_pushes_in_components": 0, "failures_past_2800_pushes_in_directories": 0, "failures_past_2800_pushes_in_files": 0, "failures_past_2800_pushes_in_types": 0, "failures_past_700_pushes": 0, "failures_past_700_pushes_in_components": 0, "failures_past_700_pushes_in_directories": 0, "failures_past_700_pushes_in_files": 0, "failures_past_700_pushes_in_types": 0, "is_likely_regression": False, "is_possible_regression": True, "name": "runnable1", } if granularity == "group": obj["touched_together_directories"] = 0 obj["touched_together_files"] = 0 assert data[0] == obj obj = { "failures": 0, "failures_in_components": 0, "failures_in_directories": 0, "failures_in_files": 0, "failures_in_types": 0, "failures_past_1400_pushes": 0, "failures_past_1400_pushes_in_components": 0, "failures_past_1400_pushes_in_directories": 0, "failures_past_1400_pushes_in_files": 0, "failures_past_1400_pushes_in_types": 0, "failures_past_2800_pushes": 0, "failures_past_2800_pushes_in_components": 0, "failures_past_2800_pushes_in_directories": 0, "failures_past_2800_pushes_in_files": 0, "failures_past_2800_pushes_in_types": 0, "failures_past_700_pushes": 0, "failures_past_700_pushes_in_components": 0, "failures_past_700_pushes_in_directories": 0, "failures_past_700_pushes_in_files": 0, "failures_past_700_pushes_in_types": 0, "is_likely_regression": False, "is_possible_regression": False, "name": "runnable2", } if granularity == "group": obj["touched_together_directories"] = 0 obj["touched_together_files"] = 0 assert data[1] == obj data = list( test_scheduling.generate_data( granularity, past_failures, commits[2], 3, ["runnable1", "runnable2"], [], ["runnable2"], )) assert len(data) == 2 obj = { "failures": 1, "failures_in_components": 0, "failures_in_directories": 0, "failures_in_files": 0, "failures_in_types": 1, "failures_past_1400_pushes": 1, "failures_past_1400_pushes_in_components": 0, "failures_past_1400_pushes_in_directories": 0, "failures_past_1400_pushes_in_files": 0, "failures_past_1400_pushes_in_types": 1, "failures_past_2800_pushes": 1, "failures_past_2800_pushes_in_components": 0, "failures_past_2800_pushes_in_directories": 0, "failures_past_2800_pushes_in_files": 0, "failures_past_2800_pushes_in_types": 1, "failures_past_700_pushes": 1, "failures_past_700_pushes_in_components": 0, "failures_past_700_pushes_in_directories": 0, "failures_past_700_pushes_in_files": 0, "failures_past_700_pushes_in_types": 1, "is_likely_regression": False, "is_possible_regression": False, "name": "runnable1", } if granularity == "group": obj["touched_together_directories"] = 0 obj["touched_together_files"] = 0 assert data[0] == obj obj = { "failures": 0, "failures_in_components": 0, "failures_in_directories": 0, "failures_in_files": 0, "failures_in_types": 0, "failures_past_1400_pushes": 0, "failures_past_1400_pushes_in_components": 0, "failures_past_1400_pushes_in_directories": 0, "failures_past_1400_pushes_in_files": 0, "failures_past_1400_pushes_in_types": 0, "failures_past_2800_pushes": 0, "failures_past_2800_pushes_in_components": 0, "failures_past_2800_pushes_in_directories": 0, "failures_past_2800_pushes_in_files": 0, "failures_past_2800_pushes_in_types": 0, "failures_past_700_pushes": 0, "failures_past_700_pushes_in_components": 0, "failures_past_700_pushes_in_directories": 0, "failures_past_700_pushes_in_files": 0, "failures_past_700_pushes_in_types": 0, "is_likely_regression": True, "is_possible_regression": False, "name": "runnable2", } if granularity == "group": obj["touched_together_directories"] = 0 obj["touched_together_files"] = 0 assert data[1] == obj data = list( test_scheduling.generate_data(granularity, past_failures, commits[3], 4, ["runnable1"], [], [])) assert len(data) == 1 obj = { "failures": 1, "failures_in_components": 0, "failures_in_directories": 0, "failures_in_files": 0, "failures_in_types": 1, "failures_past_1400_pushes": 1, "failures_past_1400_pushes_in_components": 0, "failures_past_1400_pushes_in_directories": 0, "failures_past_1400_pushes_in_files": 0, "failures_past_1400_pushes_in_types": 1, "failures_past_2800_pushes": 1, "failures_past_2800_pushes_in_components": 0, "failures_past_2800_pushes_in_directories": 0, "failures_past_2800_pushes_in_files": 0, "failures_past_2800_pushes_in_types": 1, "failures_past_700_pushes": 1, "failures_past_700_pushes_in_components": 0, "failures_past_700_pushes_in_directories": 0, "failures_past_700_pushes_in_files": 0, "failures_past_700_pushes_in_types": 1, "is_likely_regression": False, "is_possible_regression": False, "name": "runnable1", } if granularity == "group": obj["touched_together_directories"] = 0 obj["touched_together_files"] = 0 assert data[0] == obj data = list( test_scheduling.generate_data( granularity, past_failures, commits[4], 1500, ["runnable1", "runnable2"], ["runnable1", "runnable2"], [], )) assert len(data) == 2 obj = { "failures": 1, "failures_in_components": 1, "failures_in_directories": 1, "failures_in_files": 1, "failures_in_types": 1, "failures_past_1400_pushes": 0, "failures_past_1400_pushes_in_components": 0, "failures_past_1400_pushes_in_directories": 0, "failures_past_1400_pushes_in_files": 0, "failures_past_1400_pushes_in_types": 0, "failures_past_2800_pushes": 1, "failures_past_2800_pushes_in_components": 1, "failures_past_2800_pushes_in_directories": 1, "failures_past_2800_pushes_in_files": 1, "failures_past_2800_pushes_in_types": 1, "failures_past_700_pushes": 0, "failures_past_700_pushes_in_components": 0, "failures_past_700_pushes_in_directories": 0, "failures_past_700_pushes_in_files": 0, "failures_past_700_pushes_in_types": 0, "is_likely_regression": False, "is_possible_regression": True, "name": "runnable1", } if granularity == "group": obj["touched_together_directories"] = 0 obj["touched_together_files"] = 0 assert data[0] == obj obj = { "failures": 1, "failures_in_components": 0, "failures_in_directories": 0, "failures_in_files": 0, "failures_in_types": 1, "failures_past_1400_pushes": 0, "failures_past_1400_pushes_in_components": 0, "failures_past_1400_pushes_in_directories": 0, "failures_past_1400_pushes_in_files": 0, "failures_past_1400_pushes_in_types": 0, "failures_past_2800_pushes": 1, "failures_past_2800_pushes_in_components": 0, "failures_past_2800_pushes_in_directories": 0, "failures_past_2800_pushes_in_files": 0, "failures_past_2800_pushes_in_types": 1, "failures_past_700_pushes": 0, "failures_past_700_pushes_in_components": 0, "failures_past_700_pushes_in_directories": 0, "failures_past_700_pushes_in_files": 0, "failures_past_700_pushes_in_types": 0, "is_likely_regression": False, "is_possible_regression": True, "name": "runnable2", } if granularity == "group": obj["touched_together_directories"] = 0 obj["touched_together_files"] = 0 assert data[1] == obj data = list( test_scheduling.generate_data( granularity, past_failures, commits[4], 2400, ["runnable1", "runnable2"], ["runnable1", "runnable2"], [], )) assert len(data) == 2 obj = { "failures": 2, "failures_in_components": 2, "failures_in_directories": 2, "failures_in_files": 3, "failures_in_types": 3, "failures_past_1400_pushes": 1, "failures_past_1400_pushes_in_components": 1, "failures_past_1400_pushes_in_directories": 1, "failures_past_1400_pushes_in_files": 2, "failures_past_1400_pushes_in_types": 2, "failures_past_2800_pushes": 2, "failures_past_2800_pushes_in_components": 2, "failures_past_2800_pushes_in_directories": 2, "failures_past_2800_pushes_in_files": 3, "failures_past_2800_pushes_in_types": 3, "failures_past_700_pushes": 0, "failures_past_700_pushes_in_components": 0, "failures_past_700_pushes_in_directories": 0, "failures_past_700_pushes_in_files": 0, "failures_past_700_pushes_in_types": 0, "is_likely_regression": False, "is_possible_regression": True, "name": "runnable1", } if granularity == "group": obj["touched_together_directories"] = 0 obj["touched_together_files"] = 0 assert data[0] == obj obj = { "failures": 2, "failures_in_components": 1, "failures_in_directories": 1, "failures_in_files": 2, "failures_in_types": 3, "failures_past_1400_pushes": 1, "failures_past_1400_pushes_in_components": 1, "failures_past_1400_pushes_in_directories": 1, "failures_past_1400_pushes_in_files": 2, "failures_past_1400_pushes_in_types": 2, "failures_past_2800_pushes": 2, "failures_past_2800_pushes_in_components": 1, "failures_past_2800_pushes_in_directories": 1, "failures_past_2800_pushes_in_files": 2, "failures_past_2800_pushes_in_types": 3, "failures_past_700_pushes": 0, "failures_past_700_pushes_in_components": 0, "failures_past_700_pushes_in_directories": 0, "failures_past_700_pushes_in_files": 0, "failures_past_700_pushes_in_types": 0, "is_likely_regression": False, "is_possible_regression": True, "name": "runnable2", } if granularity == "group": obj["touched_together_directories"] = 0 obj["touched_together_files"] = 0 assert data[1] == obj
def generate_all_data(): past_failures = test_scheduling.get_past_failures(granularity) push_num = past_failures["push_num"] if "push_num" in past_failures else 0 # We can start once we get to the last revision we added in the previous run. can_start = True if last_node is None else False commit_map = {} for commit_data in tqdm(repository.get_commits()): if not can_start: if last_node == commit_data["node"]: can_start = True continue commit_map[commit_data["node"]] = commit_data with open(push_data_path, "r") as f: push_data = json.load(f) logger.info(f"push data nodes: {len(push_data)}") if granularity == "label": push_data = [ ( revisions, rename_tasks(push_tasks), rename_tasks(possible_regressions), rename_tasks(likely_regressions), ) for revisions, push_tasks, possible_regressions, likely_regressions in push_data ] # In the last 28 pushes, we definitely run all possible runnables. all_runnables_set = set( sum((push_runnables for _, push_runnables, _, _ in push_data[-28:]), []) ) # Filter runnables we don't need. all_runnables = filter_runnables( list(all_runnables_set), all_runnables_set, granularity ) all_runnables_set = set(all_runnables_set) logger.info(f"{len(all_runnables_set)} runnables run in the last 28 pushes") push_data = [ ( revisions, filter_runnables(push_tasks, all_runnables_set, granularity), filter_runnables( possible_regressions, all_runnables_set, granularity ), filter_runnables( likely_regressions, all_runnables_set, granularity ), ) for revisions, push_tasks, possible_regressions, likely_regressions in push_data ] if granularity == "label": generate_failing_together_probabilities(push_data) # Store all runnables in the past_failures DB so it can be used in the evaluation phase. past_failures["all_runnables"] = all_runnables # XXX: Should we recreate the DB from scratch if the previous all_runnables are not the # same as the current ones? saved_nodes = set() skipped_no_commits = 0 skipped_too_big_commits = 0 skipped_no_runnables = 0 # We can start once we get to the last revision we added in the previous run. can_start = True if last_node is None else False if granularity == "group": update_touched_together_gen = test_scheduling.update_touched_together() next(update_touched_together_gen) for i in tqdm(range(len(push_data))): ( revisions, push_runnables, possible_regressions, likely_regressions, ) = push_data.pop(0) if not can_start: if last_node == revisions[0]: can_start = True continue push_num += 1 # XXX: Some commits are skipped in the repository mining, e.g. merges and backouts. Maybe we should not skip them. commits = tuple( commit_map.pop(revision) for revision in revisions if revision in commit_map ) if len(commits) == 0: skipped_no_commits += 1 continue merged_commits = commit_features.merge_commits(commits) # XXX: For now, skip commits which are too large. # In the future we can either: # - Improve shelve perf and go back to consider all files; # - Consider only files which appear with a given frequency, like the "files" feature in commit_features; # - Keep a limit of number of files. if len(merged_commits["files"]) > 50: skipped_too_big_commits += 1 continue # If we considered all_runnables, we'd generate a huge amount of data. # We consider only the runnables which run in this push, and the possible and likely regressions # from this push. We can't consider all runnables because we can't be sure that a task that didn't # run on a push would have been successful. runnables_to_consider = list( set(push_runnables + possible_regressions + likely_regressions) ) if len(runnables_to_consider) == 0: skipped_no_runnables += 1 continue # Sync DB every 250 pushes, so we cleanup the shelve cache (we'd run OOM otherwise!). if i % 250 == 0: past_failures.sync() pushdate = dateutil.parser.parse(merged_commits["pushdate"]) if granularity == "group": update_touched_together_gen.send(commits[0]["node"]) result = { "revs": revisions, "data": [], } for data in test_scheduling.generate_data( past_failures, merged_commits, push_num, runnables_to_consider, possible_regressions, likely_regressions, ): if pushdate > HISTORY_DATE_START: result["data"].append(data) if pushdate > HISTORY_DATE_START: saved_nodes.add(i) yield result if granularity == "group": try: update_touched_together_gen.send(None) except StopIteration: pass logger.info(f"saved push data nodes: {len(saved_nodes)}") logger.info(f"skipped {skipped_no_commits} (no commits in our DB)") logger.info(f"skipped {skipped_too_big_commits} (too big commits)") logger.info(f"skipped {skipped_no_runnables} (no interesting runnables)") past_failures["push_num"] = push_num past_failures.close()
def classify_test_select(self, commits, runnable_jobs_path): testfailure_probs = self.testfailure_model.classify(commits[-1], probabilities=True) logger.info(f"Test failure risk: {testfailure_probs[0][1]}") commit_data = commit_features.merge_commits(commits) push_num = self.past_failures_data["push_num"] # XXX: Consider using mozilla-central built-in rules to filter some of these out, e.g. SCHEDULES. all_tasks = self.past_failures_data["all_runnables"] if not runnable_jobs_path: runnable_jobs = {task for task in all_tasks} elif runnable_jobs_path.startswith("http"): r = requests.get(runnable_jobs_path) r.raise_for_status() runnable_jobs = r.json() else: with open(runnable_jobs_path, "r") as f: runnable_jobs = json.load(f) # XXX: For now, only restrict to linux64 test tasks. all_tasks = [ t for t in all_tasks if t.startswith("test-linux1804-64/") ] # XXX: Remove tasks which are not in runnable jobs right away, so we avoid classifying them. commit_tests = [] for data in test_scheduling.generate_data(self.past_failures_data, commit_data, push_num, all_tasks, [], []): if not data["name"].startswith("test-"): continue commit_test = commit_data.copy() commit_test["test_job"] = data commit_tests.append(commit_test) probs = self.model.classify(commit_tests, probabilities=True) selected_indexes = np.argwhere(probs[:, 1] > float( get_secret("TEST_SELECTION_CONFIDENCE_THRESHOLD")))[:, 0] selected_tasks = [ commit_tests[i]["test_job"]["name"] for i in selected_indexes ] with open("failure_risk", "w") as f: f.write("1" if testfailure_probs[0][1] > float( get_secret("TEST_FAILURE_CONFIDENCE_THRESHOLD")) else "0") # This should be kept in sync with the test scheduling history retriever script. cleaned_selected_tasks = [] for selected_task in selected_tasks: if (selected_task.startswith("test-linux64") and selected_task not in runnable_jobs): selected_task = selected_task.replace("test-linux64-", "test-linux1804-64-") if (selected_task.startswith("test-linux1804-64-") and selected_task not in runnable_jobs): selected_task = selected_task.replace("test-linux1804-64-", "test-linux64-") if selected_task in runnable_jobs: cleaned_selected_tasks.append(selected_task) # It isn't worth running the build associated to the tests, if we only run three test tasks. if len(cleaned_selected_tasks) < 3: cleaned_selected_tasks = [] with open("selected_tasks", "w") as f: f.writelines(f"{selected_task}\n" for selected_task in cleaned_selected_tasks)
def generate_all_data(): past_failures = test_scheduling.get_past_failures() push_num = past_failures[ "push_num"] if "push_num" in past_failures else 0 # We can start once we get to the last revision we added in the previous run. can_start = True if last_node is None else False commit_map = {} for commit_data in tqdm(repository.get_commits()): if not can_start: if last_node == commit_data["node"]: can_start = True continue commit_map[commit_data["node"]] = commit_data with open("push_data.json", "r") as f: push_data = json.load(f)[1:] logger.info(f"push data nodes: {len(push_data)}") # In the last 28 pushes, we definitely run all possible tasks. all_tasks_set = set( sum((push_tasks for _, push_tasks, _, _ in push_data[-28:]), [])) # Filter tasks we don't need. all_tasks = filter_tasks(list(all_tasks_set), all_tasks_set) all_tasks_set = set(all_tasks) logger.info( f"{len(all_tasks_set)} tasks run in the last 28 pushes") # Store all tasks in the past_failures DB so it can be used in the evaluation phase. past_failures["all_tasks"] = all_tasks # XXX: Should we recreate the DB from scratch if the previous all_tasks are not the # same as the current ones? saved_nodes = set() skipped_no_commits = 0 skipped_too_big_commits = 0 skipped_no_tasks = 0 # We can start once we get to the last revision we added in the previous run. can_start = True if last_node is None else False for i in tqdm(range(len(push_data))): ( revisions, push_tasks, possible_regressions, likely_regressions, ) = push_data.pop(0) if not can_start: if last_node == revisions[0]: can_start = True continue push_num += 1 # XXX: Some commits are skipped in the repository mining, e.g. merges and backouts. Maybe we should not skip them. commits = tuple( commit_map.pop(revision) for revision in revisions if revision in commit_map) if len(commits) == 0: skipped_no_commits += 1 continue merged_commits = commit_features.merge_commits(commits) # XXX: For now, skip commits which are too large. # In the future we can either: # - Improve shelve perf and go back to consider all files; # - Consider only files which appear with a given frequency, like the "files" feature in commit_features; # - Keep a limit of number of files. if len(merged_commits["files"]) > 50: skipped_too_big_commits += 1 continue # If we considered all_tasks, we'd generate a huge amount of data. # So we consider only the tasks which run in this push, and the possible and likely regressions # from this push. tasks_to_consider = list( set(push_tasks + possible_regressions + likely_regressions)) tasks_to_consider = filter_tasks(tasks_to_consider, all_tasks_set) if len(tasks_to_consider) == 0: skipped_no_tasks += 1 continue # Sync DB every 250 pushes, so we cleanup the shelve cache (we'd run OOM otherwise!). if i % 250 == 0: past_failures.sync() pushdate = dateutil.parser.parse(merged_commits["pushdate"]) for data in test_scheduling.generate_data( past_failures, merged_commits, push_num, tasks_to_consider, possible_regressions, likely_regressions, ): if pushdate > HISTORY_DATE_START: saved_nodes.add(i) data["revs"] = revisions yield data logger.info(f"saved push data nodes: {len(saved_nodes)}") logger.info(f"skipped {skipped_no_commits} (no commits in our DB)") logger.info(f"skipped {skipped_too_big_commits} (too big commits)") logger.info(f"skipped {skipped_no_tasks} (no interesting tasks)") past_failures["push_num"] = push_num past_failures.close()
def classify(self, diff_id): self.update_commit_db() with hglib.open(self.repo_dir) as hg: self.apply_phab(hg, diff_id) patch_rev = hg.log(revrange="not public()")[0].node # Analyze patch. commits = repository.download_commits( self.repo_dir, rev_start=patch_rev.decode("utf-8"), save=False ) # We use "clean" (or "dirty") commits as the background dataset for feature importance. # This way, we can see the features which are most important in differentiating # the current commit from the "clean" (or "dirty") commits. if not self.use_test_history: probs, importance = self.model.classify( commits[-1], probabilities=True, importances=True, background_dataset=lambda v: self.X[self.y != v], importance_cutoff=0.05, ) self.generate_feature_importance_data(probs, importance) with open("probs.json", "w") as f: json.dump(probs[0].tolist(), f) if self.model_name == "regressor" and self.method_defect_predictor_dir: self.classify_methods(commits[-1]) else: testfailure_probs = self.testfailure_model.classify( commits[-1], probabilities=True ) logger.info(f"Test failure risk: {testfailure_probs[0][1]}") commit_data = commit_features.merge_commits(commits) push_num = self.past_failures_data["push_num"] # XXX: Consider using mozilla-central built-in rules to filter some of these out, e.g. SCHEDULES. # XXX: Consider using the runnable jobs artifact from the Gecko Decision task. all_tasks = self.past_failures_data["all_tasks"] # XXX: For now, only restrict to test-linux64 tasks. all_tasks = [ t for t in all_tasks if t.startswith("test-linux64/") and "test-verify" not in t ] commit_tests = [] for data in test_scheduling.generate_data( self.past_failures_data, commit_data, push_num, all_tasks, [], [] ): if not data["name"].startswith("test-"): continue commit_test = commit_data.copy() commit_test["test_job"] = data commit_tests.append(commit_test) probs = self.model.classify(commit_tests, probabilities=True) selected_indexes = np.argwhere( probs[:, 1] > float(get_secret("TEST_SELECTION_CONFIDENCE_THRESHOLD")) )[:, 0] selected_tasks = [ commit_tests[i]["test_job"]["name"] for i in selected_indexes ] with open("failure_risk", "w") as f: f.write( "1" if testfailure_probs[0][1] > float(get_secret("TEST_FAILURE_CONFIDENCE_THRESHOLD")) else "0" ) # It isn't worth running the build associated to the tests, if we only run three test tasks. if len(selected_tasks) < 3: selected_tasks = [] with open("selected_tasks", "w") as f: f.writelines(f"{selected_task}\n" for selected_task in selected_tasks)