Exemplo n.º 1
0
    def select_tests(self, commits, confidence=0.3, push_num=None):
        commit_data = commit_features.merge_commits(commits)

        past_failures_data = test_scheduling.get_past_failures(
            self.granularity)

        if push_num is None:
            push_num = past_failures_data["push_num"] + 1
        all_runnables = past_failures_data["all_runnables"]

        if self.granularity == "label":
            all_runnables = tuple(r for r in all_runnables
                                  if r.startswith("test-"))

        commit_tests = []
        for data in test_scheduling.generate_data(past_failures_data,
                                                  commit_data, push_num,
                                                  all_runnables, tuple(),
                                                  tuple()):
            commit_test = commit_data.copy()
            commit_test["test_job"] = data
            commit_tests.append(commit_test)

        probs = self.classify(commit_tests, probabilities=True)
        selected_indexes = np.argwhere(probs[:, 1] >= confidence)[:, 0]
        return {
            commit_tests[i]["test_job"]["name"]:
            math.floor(probs[i, 1] * 100) / 100
            for i in selected_indexes
        }
Exemplo n.º 2
0
    def select_tests(
        self,
        commits: Sequence[repository.CommitDict],
        confidence: float = 0.5,
        push_num: Optional[int] = None,
    ) -> Dict[str, float]:
        commit_data = commit_features.merge_commits(commits)

        past_failures_data = test_scheduling.get_past_failures(self.granularity, True)

        if push_num is None:
            push_num = past_failures_data["push_num"] + 1
        all_runnables = past_failures_data["all_runnables"]

        commit_tests = []
        for data in test_scheduling.generate_data(
            self.granularity,
            past_failures_data,
            commit_data,
            push_num,
            all_runnables,
            tuple(),
            tuple(),
        ):
            commit_test = commit_data.copy()
            commit_test["test_job"] = data
            commit_tests.append(commit_test)

        probs = self.classify(commit_tests, probabilities=True)
        selected_indexes = np.argwhere(probs[:, 1] >= confidence)[:, 0]
        return {
            commit_tests[i]["test_job"]["name"]: math.floor(probs[i, 1] * 100) / 100
            for i in selected_indexes
        }
Exemplo n.º 3
0
    def items_gen(self, classes):
        commit_map = {}

        for commit in repository.get_commits():
            commit_map[commit["node"]] = commit

        assert len(commit_map) > 0

        for test_data in test_scheduling.get_test_scheduling_history(
                self.granularity):
            revs = test_data["revs"]
            name = test_data["name"]

            if (revs[0], name) not in classes:
                continue

            commits = tuple(commit_map[revision]
                            for revision in test_data["revs"]
                            if revision in commit_map)
            if len(commits) == 0:
                continue

            commit_data = commit_features.merge_commits(commits)
            commit_data["test_job"] = test_data
            yield commit_data, classes[(revs[0], name)]
Exemplo n.º 4
0
    def items_gen(self, classes):
        commit_map = {}

        for commit in repository.get_commits():
            commit_map[commit["node"]] = commit

        assert len(commit_map) > 0

        done = set()
        for test_data in test_scheduling.get_test_scheduling_history("label"):
            revs = test_data["revs"]

            if revs[0] in done:
                continue

            if revs[0] not in classes:
                continue

            done.add(revs[0])

            commits = tuple(commit_map[revision] for revision in revs
                            if revision in commit_map)
            if len(commits) == 0:
                continue

            commit_data = commit_features.merge_commits(commits)
            yield commit_data, classes[revs[0]]
Exemplo n.º 5
0
    def items_gen(self, classes):
        commit_map = get_commit_map()

        for revs, test_datas in test_scheduling.get_test_scheduling_history(
                self.granularity):
            commits = tuple(commit_map[revision] for revision in revs
                            if revision in commit_map)
            if len(commits) == 0:
                continue

            for test_data in test_datas:
                name = test_data["name"]

                if (revs[0], name) not in classes:
                    continue

                commit_data = commit_features.merge_commits(commits)
                commit_data["test_job"] = test_data
                yield commit_data, classes[(revs[0], name)]
Exemplo n.º 6
0
    def items_gen(self, classes):
        commit_map = {}

        for commit in repository.get_commits():
            commit_map[commit["node"]] = commit

        assert len(commit_map) > 0

        for revs, test_datas in test_scheduling.get_test_scheduling_history("label"):
            if revs[0] not in classes:
                continue

            commits = tuple(
                commit_map[revision] for revision in revs if revision in commit_map
            )
            if len(commits) == 0:
                continue

            commit_data = commit_features.merge_commits(commits)
            yield commit_data, classes[revs[0]]
Exemplo n.º 7
0
def schedule_tests(branch, rev):
    from bugbug_http.app import JobInfo
    from bugbug_http import REPO_DIR

    job = JobInfo(schedule_tests, branch, rev)
    LOGGER.debug(f"Processing {job}")

    # Load the full stack of patches leading to that revision
    try:
        stack = get_hgmo_stack(branch, rev)
    except requests.exceptions.RequestException:
        LOGGER.warning(f"Push not found for {branch} @ {rev}!")
        return "NOK"

    # Apply the stack on the local repository
    try:
        revs = repository.apply_stack(REPO_DIR, stack, branch)
    except Exception as e:
        LOGGER.warning(f"Failed to apply stack {branch} @ {rev}: {e}")
        return "NOK"

    test_selection_threshold = float(
        os.environ.get("TEST_SELECTION_CONFIDENCE_THRESHOLD", 0.3))

    # Analyze patches.
    commits = repository.download_commits(REPO_DIR,
                                          revs=revs,
                                          save=False,
                                          use_single_process=True)

    commit_data = commit_features.merge_commits(commits)

    def get_runnables(granularity):
        past_failures_data = test_scheduling.get_past_failures(granularity)

        push_num = past_failures_data["push_num"]
        all_runnables = past_failures_data["all_runnables"]

        commit_tests = []
        for data in test_scheduling.generate_data(past_failures_data,
                                                  commit_data, push_num,
                                                  all_runnables, [], []):
            if granularity == "label" and not data["name"].startswith("test-"):
                continue

            commit_test = commit_data.copy()
            commit_test["test_job"] = data
            commit_tests.append(commit_test)

        probs = MODEL_CACHE.get(f"test{granularity}select").classify(
            commit_tests, probabilities=True)
        selected_indexes = np.argwhere(
            probs[:, 1] > test_selection_threshold)[:, 0]
        return {
            commit_tests[i]["test_job"]["name"]:
            math.floor(probs[i, 1] * 100) / 100
            for i in selected_indexes
        }

    data = {
        "tasks": get_runnables("label"),
        "groups": get_runnables("group"),
    }
    setkey(job.result_key, orjson.dumps(data))

    return "OK"
        def generate_all_data() -> Generator[Dict[str, Any], None, None]:
            past_failures = test_scheduling.get_past_failures(
                granularity, False)

            push_num = past_failures[
                "push_num"] if "push_num" in past_failures else 0

            commit_map = {}
            for commit_data in tqdm(repository.get_commits()):
                commit_map[commit_data["node"]] = commit_data

            # Store all runnables in the past_failures DB so it can be used in the evaluation phase.
            past_failures["all_runnables"] = all_runnables
            # XXX: Should we recreate the DB from scratch if the previous all_runnables are not the
            # same as the current ones?

            saved_nodes = set()
            skipped_no_commits = 0
            skipped_too_big_commits = 0
            skipped_no_runnables = 0

            if granularity in ("group", "config_group"):
                update_touched_together_gen = test_scheduling.update_touched_together(
                )
                next(update_touched_together_gen)

            for (
                    i,
                (
                    revisions,
                    fix_revision,
                    push_runnables,
                    possible_regressions,
                    likely_regressions,
                ),
            ) in enumerate(tqdm(push_data_iter(), total=push_data_count)):
                push_num += 1

                # XXX: Some commits are skipped in the repository mining, e.g. merges and backouts. Maybe we should not skip them.
                commits = tuple(
                    commit_map.pop(revision) for revision in revisions
                    if revision in commit_map)
                if len(commits) == 0:
                    skipped_no_commits += 1
                    continue

                # Skip wptsync commits, since they are not like normal pushes made by developers.
                if any(repository.is_wptsync(commit) for commit in commits):
                    continue

                merged_commits = commit_features.merge_commits(commits)

                # XXX: For now, skip commits which are too large.
                # In the future we can either:
                #  - Improve shelve perf and go back to consider all files;
                #  - Consider only files which appear with a given frequency, like the "files" feature in commit_features;
                #  - Keep a limit of number of files.
                if len(merged_commits["files"]) > 50:
                    skipped_too_big_commits += 1
                    continue

                # If we considered all_runnables, we'd generate a huge amount of data.
                # We consider only the runnables which run in this push, and the possible and likely regressions
                # from this push. We can't consider all runnables because we can't be sure that a task that didn't
                # run on a push would have been successful.
                runnables_to_consider = list(
                    set(push_runnables + possible_regressions +
                        likely_regressions))

                if len(runnables_to_consider) == 0:
                    skipped_no_runnables += 1
                    continue

                # Sync DB every 250 pushes, so we cleanup the shelve cache (we'd run OOM otherwise!).
                if i % 250 == 0:
                    past_failures.sync()

                pushdate = dateutil.parser.parse(merged_commits["pushdate"])

                if granularity in ("group", "config_group"):
                    update_touched_together_gen.send(commits[0]["node"])

                result_data = []
                for data in test_scheduling.generate_data(
                        granularity,
                        past_failures,
                        merged_commits,
                        push_num,
                        runnables_to_consider,
                        possible_regressions,
                        likely_regressions,
                ):
                    if pushdate > HISTORY_DATE_START:
                        result_data.append(data)

                if pushdate > HISTORY_DATE_START:
                    saved_nodes.add(i)
                    yield {
                        "revs": revisions,
                        "data": result_data,
                    }

            if granularity == "group":
                try:
                    update_touched_together_gen.send(None)
                except StopIteration:
                    pass

            logger.info(f"saved push data nodes: {len(saved_nodes)}")
            logger.info(f"skipped {skipped_no_commits} (no commits in our DB)")
            logger.info(f"skipped {skipped_too_big_commits} (too big commits)")
            logger.info(
                f"skipped {skipped_no_runnables} (no interesting runnables)")

            past_failures["push_num"] = push_num
            past_failures.close()
        def generate_all_data():
            past_failures = test_scheduling.get_past_failures(granularity)

            push_num = past_failures["push_num"] if "push_num" in past_failures else 0

            # We can start once we get to the last revision we added in the previous run.
            can_start = True if last_node is None else False

            commit_map = {}
            for commit_data in tqdm(repository.get_commits()):
                if not can_start:
                    if last_node == commit_data["node"]:
                        can_start = True

                    continue

                commit_map[commit_data["node"]] = commit_data

            with open(push_data_path, "r") as f:
                push_data = json.load(f)

            logger.info(f"push data nodes: {len(push_data)}")

            if granularity == "label":
                push_data = [
                    (
                        revisions,
                        rename_tasks(push_tasks),
                        rename_tasks(possible_regressions),
                        rename_tasks(likely_regressions),
                    )
                    for revisions, push_tasks, possible_regressions, likely_regressions in push_data
                ]

            # In the last 28 pushes, we definitely run all possible runnables.
            all_runnables_set = set(
                sum((push_runnables for _, push_runnables, _, _ in push_data[-28:]), [])
            )
            # Filter runnables we don't need.
            all_runnables = filter_runnables(
                list(all_runnables_set), all_runnables_set, granularity
            )
            all_runnables_set = set(all_runnables_set)
            logger.info(f"{len(all_runnables_set)} runnables run in the last 28 pushes")

            push_data = [
                (
                    revisions,
                    filter_runnables(push_tasks, all_runnables_set, granularity),
                    filter_runnables(
                        possible_regressions, all_runnables_set, granularity
                    ),
                    filter_runnables(
                        likely_regressions, all_runnables_set, granularity
                    ),
                )
                for revisions, push_tasks, possible_regressions, likely_regressions in push_data
            ]

            if granularity == "label":
                generate_failing_together_probabilities(push_data)

            # Store all runnables in the past_failures DB so it can be used in the evaluation phase.
            past_failures["all_runnables"] = all_runnables
            # XXX: Should we recreate the DB from scratch if the previous all_runnables are not the
            # same as the current ones?

            saved_nodes = set()
            skipped_no_commits = 0
            skipped_too_big_commits = 0
            skipped_no_runnables = 0

            # We can start once we get to the last revision we added in the previous run.
            can_start = True if last_node is None else False

            if granularity == "group":
                update_touched_together_gen = test_scheduling.update_touched_together()
                next(update_touched_together_gen)

            for i in tqdm(range(len(push_data))):
                (
                    revisions,
                    push_runnables,
                    possible_regressions,
                    likely_regressions,
                ) = push_data.pop(0)

                if not can_start:
                    if last_node == revisions[0]:
                        can_start = True

                    continue

                push_num += 1

                # XXX: Some commits are skipped in the repository mining, e.g. merges and backouts. Maybe we should not skip them.
                commits = tuple(
                    commit_map.pop(revision)
                    for revision in revisions
                    if revision in commit_map
                )
                if len(commits) == 0:
                    skipped_no_commits += 1
                    continue

                merged_commits = commit_features.merge_commits(commits)

                # XXX: For now, skip commits which are too large.
                # In the future we can either:
                #  - Improve shelve perf and go back to consider all files;
                #  - Consider only files which appear with a given frequency, like the "files" feature in commit_features;
                #  - Keep a limit of number of files.
                if len(merged_commits["files"]) > 50:
                    skipped_too_big_commits += 1
                    continue

                # If we considered all_runnables, we'd generate a huge amount of data.
                # We consider only the runnables which run in this push, and the possible and likely regressions
                # from this push. We can't consider all runnables because we can't be sure that a task that didn't
                # run on a push would have been successful.
                runnables_to_consider = list(
                    set(push_runnables + possible_regressions + likely_regressions)
                )

                if len(runnables_to_consider) == 0:
                    skipped_no_runnables += 1
                    continue

                # Sync DB every 250 pushes, so we cleanup the shelve cache (we'd run OOM otherwise!).
                if i % 250 == 0:
                    past_failures.sync()

                pushdate = dateutil.parser.parse(merged_commits["pushdate"])

                if granularity == "group":
                    update_touched_together_gen.send(commits[0]["node"])

                result = {
                    "revs": revisions,
                    "data": [],
                }
                for data in test_scheduling.generate_data(
                    past_failures,
                    merged_commits,
                    push_num,
                    runnables_to_consider,
                    possible_regressions,
                    likely_regressions,
                ):
                    if pushdate > HISTORY_DATE_START:
                        result["data"].append(data)

                if pushdate > HISTORY_DATE_START:
                    saved_nodes.add(i)
                    yield result

            if granularity == "group":
                try:
                    update_touched_together_gen.send(None)
                except StopIteration:
                    pass

            logger.info(f"saved push data nodes: {len(saved_nodes)}")
            logger.info(f"skipped {skipped_no_commits} (no commits in our DB)")
            logger.info(f"skipped {skipped_too_big_commits} (too big commits)")
            logger.info(f"skipped {skipped_no_runnables} (no interesting runnables)")

            past_failures["push_num"] = push_num
            past_failures.close()
Exemplo n.º 10
0
    def classify_test_select(self, commits, runnable_jobs_path):
        testfailure_probs = self.testfailure_model.classify(commits[-1],
                                                            probabilities=True)

        logger.info(f"Test failure risk: {testfailure_probs[0][1]}")

        commit_data = commit_features.merge_commits(commits)

        push_num = self.past_failures_data["push_num"]

        # XXX: Consider using mozilla-central built-in rules to filter some of these out, e.g. SCHEDULES.
        all_tasks = self.past_failures_data["all_runnables"]

        if not runnable_jobs_path:
            runnable_jobs = {task for task in all_tasks}
        elif runnable_jobs_path.startswith("http"):
            r = requests.get(runnable_jobs_path)
            r.raise_for_status()
            runnable_jobs = r.json()
        else:
            with open(runnable_jobs_path, "r") as f:
                runnable_jobs = json.load(f)

        # XXX: For now, only restrict to linux64 test tasks.
        all_tasks = [
            t for t in all_tasks if t.startswith("test-linux1804-64/")
        ]

        # XXX: Remove tasks which are not in runnable jobs right away, so we avoid classifying them.

        commit_tests = []
        for data in test_scheduling.generate_data(self.past_failures_data,
                                                  commit_data, push_num,
                                                  all_tasks, [], []):
            if not data["name"].startswith("test-"):
                continue

            commit_test = commit_data.copy()
            commit_test["test_job"] = data
            commit_tests.append(commit_test)

        probs = self.model.classify(commit_tests, probabilities=True)
        selected_indexes = np.argwhere(probs[:, 1] > float(
            get_secret("TEST_SELECTION_CONFIDENCE_THRESHOLD")))[:, 0]
        selected_tasks = [
            commit_tests[i]["test_job"]["name"] for i in selected_indexes
        ]

        with open("failure_risk", "w") as f:
            f.write("1" if testfailure_probs[0][1] > float(
                get_secret("TEST_FAILURE_CONFIDENCE_THRESHOLD")) else "0")

        # This should be kept in sync with the test scheduling history retriever script.
        cleaned_selected_tasks = []
        for selected_task in selected_tasks:
            if (selected_task.startswith("test-linux64")
                    and selected_task not in runnable_jobs):
                selected_task = selected_task.replace("test-linux64-",
                                                      "test-linux1804-64-")

            if (selected_task.startswith("test-linux1804-64-")
                    and selected_task not in runnable_jobs):
                selected_task = selected_task.replace("test-linux1804-64-",
                                                      "test-linux64-")

            if selected_task in runnable_jobs:
                cleaned_selected_tasks.append(selected_task)

        # It isn't worth running the build associated to the tests, if we only run three test tasks.
        if len(cleaned_selected_tasks) < 3:
            cleaned_selected_tasks = []

        with open("selected_tasks", "w") as f:
            f.writelines(f"{selected_task}\n"
                         for selected_task in cleaned_selected_tasks)
Exemplo n.º 11
0
        def generate_all_data():
            past_failures = test_scheduling.get_past_failures()

            push_num = past_failures[
                "push_num"] if "push_num" in past_failures else 0

            # We can start once we get to the last revision we added in the previous run.
            can_start = True if last_node is None else False

            commit_map = {}
            for commit_data in tqdm(repository.get_commits()):
                if not can_start:
                    if last_node == commit_data["node"]:
                        can_start = True

                    continue

                commit_map[commit_data["node"]] = commit_data

            with open("push_data.json", "r") as f:
                push_data = json.load(f)[1:]

            logger.info(f"push data nodes: {len(push_data)}")

            # In the last 28 pushes, we definitely run all possible tasks.
            all_tasks_set = set(
                sum((push_tasks for _, push_tasks, _, _ in push_data[-28:]),
                    []))
            # Filter tasks we don't need.
            all_tasks = filter_tasks(list(all_tasks_set), all_tasks_set)
            all_tasks_set = set(all_tasks)
            logger.info(
                f"{len(all_tasks_set)} tasks run in the last 28 pushes")

            # Store all tasks in the past_failures DB so it can be used in the evaluation phase.
            past_failures["all_tasks"] = all_tasks
            # XXX: Should we recreate the DB from scratch if the previous all_tasks are not the
            # same as the current ones?

            saved_nodes = set()
            skipped_no_commits = 0
            skipped_too_big_commits = 0
            skipped_no_tasks = 0

            # We can start once we get to the last revision we added in the previous run.
            can_start = True if last_node is None else False

            for i in tqdm(range(len(push_data))):
                (
                    revisions,
                    push_tasks,
                    possible_regressions,
                    likely_regressions,
                ) = push_data.pop(0)

                if not can_start:
                    if last_node == revisions[0]:
                        can_start = True

                    continue

                push_num += 1

                # XXX: Some commits are skipped in the repository mining, e.g. merges and backouts. Maybe we should not skip them.
                commits = tuple(
                    commit_map.pop(revision) for revision in revisions
                    if revision in commit_map)
                if len(commits) == 0:
                    skipped_no_commits += 1
                    continue

                merged_commits = commit_features.merge_commits(commits)

                # XXX: For now, skip commits which are too large.
                # In the future we can either:
                #  - Improve shelve perf and go back to consider all files;
                #  - Consider only files which appear with a given frequency, like the "files" feature in commit_features;
                #  - Keep a limit of number of files.
                if len(merged_commits["files"]) > 50:
                    skipped_too_big_commits += 1
                    continue

                # If we considered all_tasks, we'd generate a huge amount of data.
                # So we consider only the tasks which run in this push, and the possible and likely regressions
                # from this push.
                tasks_to_consider = list(
                    set(push_tasks + possible_regressions +
                        likely_regressions))
                tasks_to_consider = filter_tasks(tasks_to_consider,
                                                 all_tasks_set)

                if len(tasks_to_consider) == 0:
                    skipped_no_tasks += 1
                    continue

                # Sync DB every 250 pushes, so we cleanup the shelve cache (we'd run OOM otherwise!).
                if i % 250 == 0:
                    past_failures.sync()

                pushdate = dateutil.parser.parse(merged_commits["pushdate"])

                for data in test_scheduling.generate_data(
                        past_failures,
                        merged_commits,
                        push_num,
                        tasks_to_consider,
                        possible_regressions,
                        likely_regressions,
                ):
                    if pushdate > HISTORY_DATE_START:
                        saved_nodes.add(i)
                        data["revs"] = revisions
                        yield data

            logger.info(f"saved push data nodes: {len(saved_nodes)}")
            logger.info(f"skipped {skipped_no_commits} (no commits in our DB)")
            logger.info(f"skipped {skipped_too_big_commits} (too big commits)")
            logger.info(f"skipped {skipped_no_tasks} (no interesting tasks)")

            past_failures["push_num"] = push_num
            past_failures.close()
Exemplo n.º 12
0
    def classify(self, diff_id):
        self.update_commit_db()

        with hglib.open(self.repo_dir) as hg:
            self.apply_phab(hg, diff_id)

            patch_rev = hg.log(revrange="not public()")[0].node

            # Analyze patch.
            commits = repository.download_commits(
                self.repo_dir, rev_start=patch_rev.decode("utf-8"), save=False
            )

        # We use "clean" (or "dirty") commits as the background dataset for feature importance.
        # This way, we can see the features which are most important in differentiating
        # the current commit from the "clean" (or "dirty") commits.

        if not self.use_test_history:
            probs, importance = self.model.classify(
                commits[-1],
                probabilities=True,
                importances=True,
                background_dataset=lambda v: self.X[self.y != v],
                importance_cutoff=0.05,
            )

            self.generate_feature_importance_data(probs, importance)

            with open("probs.json", "w") as f:
                json.dump(probs[0].tolist(), f)

            if self.model_name == "regressor" and self.method_defect_predictor_dir:
                self.classify_methods()
        else:
            backout_probs = self.backout_model.classify(commits[-1], probabilities=True)

            logger.info(f"Backout risk: {backout_probs[0][1]}")

            commit_data = commit_features.merge_commits(commits)

            push_num = self.past_failures_data["push_num"]

            # XXX: Consider using mozilla-central built-in rules to filter some of these out, e.g. SCHEDULES.
            # XXX: Consider using the runnable jobs artifact from the Gecko Decision task.
            all_tasks = self.past_failures_data["all_tasks"]

            selected_tasks = []
            # TODO: Classify multiple commit/test at the same time.
            for data in test_scheduling.generate_data(
                self.past_failures_data, commit_data, push_num, all_tasks, [], []
            ):
                if not data["name"].startswith("test-"):
                    continue

                commit_data["test_job"] = data

                probs = self.model.classify(commit_data, probabilities=True)

                if probs[0][1] > float(
                    get_secret("TEST_SELECTION_CONFIDENCE_THRESHOLD")
                ):
                    selected_tasks.append(data["name"])

            with open("failure_risk", "w") as f:
                f.write(
                    "1"
                    if backout_probs[0][1]
                    > float(get_secret("TEST_FAILURE_CONFIDENCE_THRESHOLD"))
                    else "0"
                )

            with open("selected_tasks", "w") as f:
                f.writelines(f"{selected_task}\n" for selected_task in selected_tasks)
Exemplo n.º 13
0
    def classify(self, diff_id):
        self.update_commit_db()

        with hglib.open(self.repo_dir) as hg:
            self.apply_phab(hg, diff_id)

            patch_rev = hg.log(revrange="not public()")[0].node

            # Analyze patch.
            commits = repository.download_commits(
                self.repo_dir, rev_start=patch_rev.decode("utf-8"), save=False
            )

        # We use "clean" (or "dirty") commits as the background dataset for feature importance.
        # This way, we can see the features which are most important in differentiating
        # the current commit from the "clean" (or "dirty") commits.

        if not self.use_test_history:
            probs, importance = self.model.classify(
                commits[-1],
                probabilities=True,
                importances=True,
                background_dataset=lambda v: self.X[self.y != v],
                importance_cutoff=0.05,
            )

            self.generate_feature_importance_data(probs, importance)

            with open("probs.json", "w") as f:
                json.dump(probs[0].tolist(), f)

            if self.model_name == "regressor" and self.method_defect_predictor_dir:
                self.classify_methods(commits[-1])
        else:
            testfailure_probs = self.testfailure_model.classify(
                commits[-1], probabilities=True
            )

            logger.info(f"Test failure risk: {testfailure_probs[0][1]}")

            commit_data = commit_features.merge_commits(commits)

            push_num = self.past_failures_data["push_num"]

            # XXX: Consider using mozilla-central built-in rules to filter some of these out, e.g. SCHEDULES.
            # XXX: Consider using the runnable jobs artifact from the Gecko Decision task.
            all_tasks = self.past_failures_data["all_tasks"]

            # XXX: For now, only restrict to test-linux64 tasks.
            all_tasks = [
                t
                for t in all_tasks
                if t.startswith("test-linux64/") and "test-verify" not in t
            ]

            commit_tests = []
            for data in test_scheduling.generate_data(
                self.past_failures_data, commit_data, push_num, all_tasks, [], []
            ):
                if not data["name"].startswith("test-"):
                    continue

                commit_test = commit_data.copy()
                commit_test["test_job"] = data
                commit_tests.append(commit_test)

            probs = self.model.classify(commit_tests, probabilities=True)
            selected_indexes = np.argwhere(
                probs[:, 1] > float(get_secret("TEST_SELECTION_CONFIDENCE_THRESHOLD"))
            )[:, 0]
            selected_tasks = [
                commit_tests[i]["test_job"]["name"] for i in selected_indexes
            ]

            with open("failure_risk", "w") as f:
                f.write(
                    "1"
                    if testfailure_probs[0][1]
                    > float(get_secret("TEST_FAILURE_CONFIDENCE_THRESHOLD"))
                    else "0"
                )

            # It isn't worth running the build associated to the tests, if we only run three test tasks.
            if len(selected_tasks) < 3:
                selected_tasks = []

            with open("selected_tasks", "w") as f:
                f.writelines(f"{selected_task}\n" for selected_task in selected_tasks)
Exemplo n.º 14
0
        def generate_data():
            nonlocal push_num
            saved_nodes = set()
            skipped_no_commits = 0
            skipped_too_big_commits = 0
            skipped_no_tasks = 0

            # We can start once we get to the last revision we added in the previous run.
            can_start = True if last_node is None else False

            commit_map = {}
            for commit_data in tqdm(repository.get_commits()):
                if not can_start:
                    if last_node == commit_data["node"]:
                        can_start = True

                    continue

                commit_map[commit_data["node"]] = commit_data

            with open("push_data.json", "r") as f:
                push_data = json.load(f)[1:]

            logger.info(f"push data nodes: {len(push_data)}")

            # In the last 28 pushes, we definitely run all possible tasks.
            all_tasks_set = set(
                sum((push_tasks for _, push_tasks, _, _ in push_data[-28:]),
                    []))
            # Filter tasks we don't need.
            all_tasks = filter_tasks(list(all_tasks_set), all_tasks_set)
            all_tasks_set = set(all_tasks)
            logger.info(
                f"{len(all_tasks_set)} tasks run in the last 28 pushes")

            # We can start once we get to the last revision we added in the previous run.
            can_start = True if last_node is None else False

            for i in tqdm(range(len(push_data))):
                (
                    revisions,
                    push_tasks,
                    possible_regressions,
                    likely_regressions,
                ) = push_data.pop(0)

                if not can_start:
                    if last_node == revisions[0]:
                        can_start = True

                    continue

                push_num += 1

                # XXX: Some commits are skipped in the repository mining, e.g. merges and backouts. Maybe we should not skip them.
                commits = tuple(
                    commit_map.pop(revision) for revision in revisions
                    if revision in commit_map)
                if len(commits) == 0:
                    skipped_no_commits += 1
                    continue

                merged_commits = commit_features.merge_commits(commits)

                # XXX: For now, skip commits which are too large.
                # In the future we can either:
                #  - Improve shelve perf and go back to consider all files;
                #  - Consider only files which appear with a given frequency, like the "files" feature in commit_features;
                #  - Keep a limit of number of files.
                if len(merged_commits["files"]) > 20:
                    skipped_too_big_commits += 1
                    continue

                # If we considered all_tasks, we'd generate a huge amount of data.
                # So we consider only the tasks which run in this push, and the possible and likely regressions
                # from this push.
                tasks_to_consider = list(
                    set(push_tasks + possible_regressions +
                        likely_regressions))
                tasks_to_consider = filter_tasks(tasks_to_consider,
                                                 all_tasks_set)

                if len(tasks_to_consider) == 0:
                    skipped_no_tasks += 1
                    continue

                # Sync DB every 250 pushes, so we cleanup the shelve cache (we'd run OOM otherwise!).
                if i % 250 == 0:
                    past_failures.sync()

                pushdate = dateutil.parser.parse(merged_commits["pushdate"])

                for task in tasks_to_consider:
                    is_regression = (task in possible_regressions
                                     or task in likely_regressions)

                    (
                        total_failures,
                        past_7_pushes_failures,
                        past_14_pushes_failures,
                        past_28_pushes_failures,
                        past_56_pushes_failures,
                    ) = get_and_update_past_failures("all", task, ["all"],
                                                     push_num, is_regression)

                    (
                        total_types_failures,
                        past_7_pushes_types_failures,
                        past_14_pushes_types_failures,
                        past_28_pushes_types_failures,
                        past_56_pushes_types_failures,
                    ) = get_and_update_past_failures("type", task,
                                                     merged_commits["types"],
                                                     push_num, is_regression)

                    (
                        total_files_failures,
                        past_7_pushes_files_failures,
                        past_14_pushes_files_failures,
                        past_28_pushes_files_failures,
                        past_56_pushes_files_failures,
                    ) = get_and_update_past_failures("file", task,
                                                     merged_commits["files"],
                                                     push_num, is_regression)

                    (
                        total_directories_failures,
                        past_7_pushes_directories_failures,
                        past_14_pushes_directories_failures,
                        past_28_pushes_directories_failures,
                        past_56_pushes_directories_failures,
                    ) = get_and_update_past_failures(
                        "directory",
                        task,
                        merged_commits["directories"],
                        push_num,
                        is_regression,
                    )

                    (
                        total_components_failures,
                        past_7_pushes_components_failures,
                        past_14_pushes_components_failures,
                        past_28_pushes_components_failures,
                        past_56_pushes_components_failures,
                    ) = get_and_update_past_failures(
                        "component",
                        task,
                        merged_commits["components"],
                        push_num,
                        is_regression,
                    )

                    if pushdate > HISTORY_DATE_START:
                        saved_nodes.add(i)

                        yield {
                            "revs": revisions,
                            "name": task,
                            "failures": total_failures,
                            "failures_past_7_pushes": past_7_pushes_failures,
                            "failures_past_14_pushes": past_14_pushes_failures,
                            "failures_past_28_pushes": past_28_pushes_failures,
                            "failures_past_56_pushes": past_56_pushes_failures,
                            "failures_in_types": total_types_failures,
                            "failures_past_7_pushes_in_types":
                            past_7_pushes_types_failures,
                            "failures_past_14_pushes_in_types":
                            past_14_pushes_types_failures,
                            "failures_past_28_pushes_in_types":
                            past_28_pushes_types_failures,
                            "failures_past_56_pushes_in_types":
                            past_56_pushes_types_failures,
                            "failures_in_files": total_files_failures,
                            "failures_past_7_pushes_in_files":
                            past_7_pushes_files_failures,
                            "failures_past_14_pushes_in_files":
                            past_14_pushes_files_failures,
                            "failures_past_28_pushes_in_files":
                            past_28_pushes_files_failures,
                            "failures_past_56_pushes_in_files":
                            past_56_pushes_files_failures,
                            "failures_in_directories":
                            total_directories_failures,
                            "failures_past_7_pushes_in_directories":
                            past_7_pushes_directories_failures,
                            "failures_past_14_pushes_in_directories":
                            past_14_pushes_directories_failures,
                            "failures_past_28_pushes_in_directories":
                            past_28_pushes_directories_failures,
                            "failures_past_56_pushes_in_directories":
                            past_56_pushes_directories_failures,
                            "failures_in_components":
                            total_components_failures,
                            "failures_past_7_pushes_in_components":
                            past_7_pushes_components_failures,
                            "failures_past_14_pushes_in_components":
                            past_14_pushes_components_failures,
                            "failures_past_28_pushes_in_components":
                            past_28_pushes_components_failures,
                            "failures_past_56_pushes_in_components":
                            past_56_pushes_components_failures,
                            "is_possible_regression": task
                            in possible_regressions,
                            "is_likely_regression": task in likely_regressions,
                        }

            logger.info(f"saved push data nodes: {len(saved_nodes)}")
            logger.info(f"skipped {skipped_no_commits} (no commits in our DB)")
            logger.info(f"skipped {skipped_too_big_commits} (too big commits)")
            logger.info(f"skipped {skipped_no_tasks} (no interesting tasks)")