Пример #1
0
def test_all(g: Graph) -> None:
    tasks = [f"windows10/opt-{chr(i)}" for i in range(len(g.vs))]

    try:
        test_scheduling.close_failing_together_db("label")
    except AssertionError:
        pass
    test_scheduling.remove_failing_together_db("label")

    # TODO: Also add some couples that are *not* failing together.
    ft: Dict[str, Dict[str, Tuple[float, float]]] = {}

    for edge in g.es:
        task1 = tasks[edge.tuple[0]]
        task2 = tasks[edge.tuple[1]]
        assert task1 < task2
        if task1 not in ft:
            ft[task1] = {}
        ft[task1][task2] = (0.1, 1.0)

    failing_together = test_scheduling.get_failing_together_db("label", False)
    for t, ts in ft.items():
        failing_together[t.encode("ascii")] = pickle.dumps(ts)

    test_scheduling.close_failing_together_db("label")

    model = TestLabelSelectModel()
    result = model.reduce(tasks, 1.0)
    hypothesis.note(f"Result: {sorted(result)}")
    assert len(result) == len(g.components())
Пример #2
0
def test_reduce():
    failing_together = test_scheduling.get_failing_together_db("label")
    failing_together[b"test-linux64/debug$test-windows10/debug"] = struct.pack(
        "ff", 0.1, 1.0)
    failing_together[b"test-linux64/debug$test-windows10/opt"] = struct.pack(
        "ff", 0.1, 1.0)
    failing_together[b"test-linux64/opt$test-windows10/opt"] = struct.pack(
        "ff", 0.1, 0.91)
    failing_together[b"test-linux64/debug$test-linux64/opt"] = struct.pack(
        "ff", 0.1, 1.0)
    failing_together[
        b"test-linux64-asan/debug$test-linux64/debug"] = struct.pack(
            "ff", 0.1, 1.0)
    test_scheduling.close_failing_together_db("label")

    model = TestLabelSelectModel()
    assert model.reduce({"test-linux64/debug", "test-windows10/debug"},
                        1.0) == {"test-linux64/debug"}
    assert model.reduce({"test-linux64/debug", "test-windows10/opt"},
                        1.0) == {"test-linux64/debug"}
    assert model.reduce({"test-linux64/opt", "test-windows10/opt"}, 1.0) == {
        "test-linux64/opt",
        "test-windows10/opt",
    }
    assert model.reduce({"test-linux64/opt", "test-windows10/opt"},
                        0.9) == {"test-linux64/opt"}
    assert model.reduce({"test-linux64/opt", "test-linux64/debug"},
                        1.0) == {"test-linux64/opt"}
    assert model.reduce({"test-linux64-asan/debug", "test-linux64/debug"},
                        1.0) == {"test-linux64/debug"}
Пример #3
0
def mock_schedule_tests_classify(monkeypatch):
    with open("known_tasks", "w") as f:
        f.write("prova")

    # Initialize a mock past failures DB.
    for granularity in ("label", "group"):
        past_failures_data = test_scheduling.get_past_failures(granularity)
        past_failures_data["push_num"] = 1
        past_failures_data["all_runnables"] = [
            f"test-{granularity}1",
            f"test-{granularity}2",
            "test-linux64/opt",
            "test-windows10/opt",
        ]
        past_failures_data.close()

    failing_together = test_scheduling.get_failing_together_db()
    failing_together[b"test-linux64/opt$test-windows10/opt"] = struct.pack(
        "ff", 0.1, 1.0)
    test_scheduling.close_failing_together_db()

    def do_mock(labels_to_choose, groups_to_choose):
        # Add a mock test selection model.
        def classify(self, items, probabilities=False):
            assert probabilities
            results = []
            for item in items:
                runnable_name = item["test_job"]["name"]
                if self.granularity == "label":
                    if runnable_name in labels_to_choose:
                        results.append([
                            1 - labels_to_choose[runnable_name],
                            labels_to_choose[runnable_name],
                        ])
                    else:
                        results.append([0.9, 0.1])
                elif self.granularity == "group":
                    if runnable_name in groups_to_choose:
                        results.append([
                            1 - groups_to_choose[runnable_name],
                            groups_to_choose[runnable_name],
                        ])
                    else:
                        results.append([0.9, 0.1])
            return np.array(results)

        class MockModelCache:
            def get(self, model_name):
                if "group" in model_name:
                    return bugbug.models.testselect.TestGroupSelectModel()
                else:
                    return bugbug.models.testselect.TestLabelSelectModel()

        monkeypatch.setattr(bugbug_http.models, "MODEL_CACHE",
                            MockModelCache())
        monkeypatch.setattr(bugbug.models.testselect.TestSelectModel,
                            "classify", classify)

    return do_mock
Пример #4
0
def mock_get_config_specific_groups(
    monkeypatch: MonkeyPatch,
) -> None:
    with open("known_tasks", "w") as f:
        f.write("prova")

    # Initialize a mock past failures DB.
    past_failures_data = test_scheduling.get_past_failures("group", False)
    past_failures_data["push_num"] = 1
    past_failures_data["all_runnables"] = [
        "test-group1",
        "test-group2",
    ]
    past_failures_data.close()

    try:
        test_scheduling.close_failing_together_db("config_group")
    except AssertionError:
        pass
    failing_together = test_scheduling.get_failing_together_db("config_group", False)
    failing_together[b"$ALL_CONFIGS$"] = pickle.dumps(
        ["test-linux1804-64/opt-*", "test-windows10/debug-*", "test-windows10/opt-*"]
    )
    failing_together[b"$CONFIGS_BY_GROUP$"] = pickle.dumps(
        {
            "test-group1": {
                "test-linux1804-64/opt-*",
                "test-windows10/debug-*",
                "test-windows10/opt-*",
            },
            "test-group2": {
                "test-linux1804-64/opt-*",
                "test-windows10/debug-*",
                "test-windows10/opt-*",
            },
        }
    )
    failing_together[b"test-group1"] = pickle.dumps(
        {
            "test-linux1804-64/opt-*": {
                "test-windows10/debug-*": (1.0, 0.0),
                "test-windows10/opt-*": (1.0, 0.0),
            },
            "test-windows10/debug-*": {
                "test-windows10/opt-*": (1.0, 1.0),
            },
        }
    )
    test_scheduling.close_failing_together_db("config_group")

    monkeypatch.setattr(bugbug_http.models, "MODEL_CACHE", MockModelCache())
Пример #5
0
def test_reduce():
    failing_together = test_scheduling.get_failing_together_db("label")
    failing_together[b"test-linux1804-64/debug"] = pickle.dumps(
        {
            "test-windows10/debug": (0.1, 1.0),
            "test-windows10/opt": (0.1, 1.0),
            "test-linux1804-64/opt": (0.1, 1.0),
        }
    )
    failing_together[b"test-linux1804-64/opt"] = pickle.dumps(
        {"test-windows10/opt": (0.1, 0.91),}
    )
    failing_together[b"test-linux1804-64-asan/debug"] = pickle.dumps(
        {"test-linux1804-64/debug": (0.1, 1.0),}
    )
    test_scheduling.close_failing_together_db("label")

    model = TestLabelSelectModel()
    assert model.reduce({"test-linux1804-64/debug", "test-windows10/debug"}, 1.0) == {
        "test-linux1804-64/debug"
    }
    assert model.reduce({"test-linux1804-64/debug", "test-windows10/opt"}, 1.0) == {
        "test-linux1804-64/debug"
    }
    assert model.reduce({"test-linux1804-64/opt", "test-windows10/opt"}, 1.0) == {
        "test-linux1804-64/opt",
        "test-windows10/opt",
    }
    assert model.reduce({"test-linux1804-64/opt", "test-windows10/opt"}, 0.9) == {
        "test-linux1804-64/opt"
    }
    assert model.reduce({"test-linux1804-64/opt", "test-linux1804-64/debug"}, 1.0) == {
        "test-linux1804-64/opt"
    }
    assert model.reduce(
        {"test-linux1804-64-asan/debug", "test-linux1804-64/debug"}, 1.0
    ) == {"test-linux1804-64/debug"}

    # Test case where the second task is not present in the failing together stats of the first.
    assert model.reduce(
        {"test-linux1804-64-asan/debug", "test-windows10/opt"}, 1.0
    ) == {"test-linux1804-64-asan/debug", "test-windows10/opt"}

    # Test case where a task is not present at all in the failing together DB.
    assert model.reduce({"test-linux1804-64-qr/debug", "test-windows10/opt"}, 1.0) == {
        "test-linux1804-64-qr/debug",
        "test-windows10/opt",
    }
Пример #6
0
def test_reduce2(failing_together: LMDBDict) -> None:
    failing_together[b"windows10/opt-a"] = pickle.dumps({
        "windows10/opt-b": (0.1, 1.0),
        "windows10/opt-c": (0.1, 0.3),
        "windows10/opt-d": (0.1, 1.0),
    })
    failing_together[b"windows10/opt-b"] = pickle.dumps({
        "windows10/opt-c": (0.1, 1.0),
        "windows10/opt-d": (0.1, 0.3),
    })
    test_scheduling.close_failing_together_db("label")

    assert testselect.reduce_configs(
        {
            "windows10/opt-a", "windows10/opt-b", "windows10/opt-c",
            "windows10/opt-d"
        },
        1.0,
    ) == {
        "windows10/opt-b",
    }
Пример #7
0
def test_reduce2(failing_together: LMDBDict) -> None:
    failing_together[b"windows10/opt-a"] = pickle.dumps({
        "windows10/opt-b": (0.1, 1.0),
        "windows10/opt-c": (0.1, 0.3),
        "windows10/opt-d": (0.1, 1.0),
    })
    failing_together[b"windows10/opt-b"] = pickle.dumps({
        "windows10/opt-c": (0.1, 1.0),
        "windows10/opt-d": (0.1, 0.3),
    })
    test_scheduling.close_failing_together_db("label")

    model = TestLabelSelectModel()
    assert model.reduce(
        {
            "windows10/opt-a", "windows10/opt-b", "windows10/opt-c",
            "windows10/opt-d"
        },
        1.0,
    ) == {
        "windows10/opt-b",
    }
Пример #8
0
def test_select_configs(failing_together_config_group: LMDBDict) -> None:
    past_failures_data = test_scheduling.get_past_failures("group", False)
    past_failures_data["all_runnables"] = ["group1", "group2"]
    past_failures_data.close()

    failing_together_config_group[b"group1"] = pickle.dumps({
        "linux1804-64-asan/debug": {
            "linux1804-64/debug": (1.0, 0.0),
            "linux1804-64/opt": (1.0, 0.0),
            "mac/debug": (1.0, 0.0),
            "windows10/debug": (1.0, 0.0),
        },
        "linux1804-64/debug": {
            "linux1804-64/opt": (1.0, 1.0),
            "mac/debug": (1.0, 1.0),
            "windows10/debug": (1.0, 1.0),
        },
        "linux1804-64/opt": {
            "mac/debug": (1.0, 1.0),
            "windows10/debug": (1.0, 1.0),
        },
        "mac/debug": {
            "windows10/debug": (1.0, 1.0)
        },
    })
    failing_together_config_group[b"group2"] = pickle.dumps({
        "linux1804-64-asan/debug": {
            "linux1804-64/debug": (1.0, 1.0),
            "linux1804-64/opt": (1.0, 0.0),
            "mac/debug": (1.0, 0.0),
            "windows10/debug": (1.0, 0.0),
        },
        "linux1804-64/debug": {
            "linux1804-64/opt": (1.0, 0.0),
            "mac/debug": (1.0, 0.0),
            "windows10/debug": (1.0, 1.0),
        },
        "linux1804-64/opt": {
            "mac/debug": (1.0, 0.0),
            "windows10/debug": (1.0, 0.0),
        },
        "mac/debug": {
            "windows10/debug": (1.0, 0.0)
        },
    })
    failing_together_config_group[b"$ALL_CONFIGS$"] = pickle.dumps([
        "linux1804-64-asan/debug",
        "linux1804-64/debug",
        "linux1804-64/opt",
        "mac/debug",
        "windows10/debug",
    ])
    failing_together_config_group[b"$CONFIGS_BY_GROUP$"] = pickle.dumps({
        "group1": {
            "linux1804-64-asan/debug",
            "linux1804-64/debug",
            "linux1804-64/opt",
            "mac/debug",
            "windows10/debug",
        },
        "group2": {
            "linux1804-64-asan/debug",
            "linux1804-64/debug",
            "linux1804-64/opt",
            "mac/debug",
            "windows10/debug",
        },
    })
    test_scheduling.close_failing_together_db("config_group")

    model = TestGroupSelectModel()
    result = model.select_configs(
        {
            "group1",
            "group2",
        },
        1.0,
    )
    assert len(result) == 2
    assert set(
        result["group1"]) == {"linux1804-64-asan/debug", "linux1804-64/opt"}
    assert set(result["group2"]) == {
        "linux1804-64/opt",
        "mac/debug",
        "linux1804-64/debug",
    }
Пример #9
0
def failing_together_config_group() -> Iterator[LMDBDict]:
    yield test_scheduling.get_failing_together_db("config_group", False)
    test_scheduling.close_failing_together_db("config_group")
Пример #10
0
def failing_together() -> Iterator[LMDBDict]:
    yield test_scheduling.get_failing_together_db("label", False)
    test_scheduling.close_failing_together_db("label")
Пример #11
0
def mock_schedule_tests_classify(
    monkeypatch: MonkeyPatch,
) -> Callable[[dict[str, float], dict[str, float]], None]:
    with open("known_tasks", "w") as f:
        f.write("prova")

    # Initialize a mock past failures DB.
    for granularity in ("label", "group"):
        past_failures_data = test_scheduling.get_past_failures(granularity, False)
        past_failures_data["push_num"] = 1
        past_failures_data["all_runnables"] = [
            "test-linux1804-64-opt-label1",
            "test-linux1804-64-opt-label2",
            "test-group1",
            "test-group2",
            "test-linux1804-64/opt",
            "test-windows10/opt",
        ]
        past_failures_data.close()

    try:
        test_scheduling.close_failing_together_db("label")
    except AssertionError:
        pass
    failing_together = test_scheduling.get_failing_together_db("label", False)
    failing_together[b"test-linux1804-64/opt"] = pickle.dumps(
        {
            "test-windows10/opt": (0.1, 1.0),
        }
    )
    test_scheduling.close_failing_together_db("label")

    try:
        test_scheduling.close_failing_together_db("config_group")
    except AssertionError:
        pass
    failing_together = test_scheduling.get_failing_together_db("config_group", False)
    failing_together[b"$ALL_CONFIGS$"] = pickle.dumps(
        ["test-linux1804-64/opt", "test-windows10/debug", "test-windows10/opt"]
    )
    failing_together[b"$CONFIGS_BY_GROUP$"] = pickle.dumps(
        {
            "test-group1": {
                "test-linux1804-64/opt",
                "test-windows10/debug",
                "test-windows10/opt",
            },
            "test-group2": {
                "test-linux1804-64/opt",
                "test-windows10/debug",
                "test-windows10/opt",
            },
        }
    )
    failing_together[b"test-group1"] = pickle.dumps(
        {
            "test-linux1804-64/opt": {
                "test-windows10/debug": (1.0, 0.0),
                "test-windows10/opt": (1.0, 1.0),
            },
            "test-windows10/debug": {
                "test-windows10/opt": (1.0, 0.0),
            },
        }
    )
    test_scheduling.close_failing_together_db("config_group")

    try:
        test_scheduling.close_touched_together_db()
    except AssertionError:
        pass
    test_scheduling.get_touched_together_db(False)
    test_scheduling.close_touched_together_db()

    def do_mock(labels_to_choose, groups_to_choose):
        # Add a mock test selection model.
        def classify(self, items, probabilities=False):
            assert probabilities
            results = []
            for item in items:
                runnable_name = item["test_job"]["name"]
                if self.granularity == "label":
                    if runnable_name in labels_to_choose:
                        results.append(
                            [
                                1 - labels_to_choose[runnable_name],
                                labels_to_choose[runnable_name],
                            ]
                        )
                    else:
                        results.append([0.9, 0.1])
                elif self.granularity == "group":
                    if runnable_name in groups_to_choose:
                        results.append(
                            [
                                1 - groups_to_choose[runnable_name],
                                groups_to_choose[runnable_name],
                            ]
                        )
                    else:
                        results.append([0.9, 0.1])
            return np.array(results)

        monkeypatch.setattr(bugbug_http.models, "MODEL_CACHE", MockModelCache())
        monkeypatch.setattr(
            bugbug.models.testselect.TestSelectModel, "classify", classify
        )

    return do_mock
        def generate_failing_together_probabilities(push_data):
            # TODO: we should consider the probabilities of `task1 failure -> task2 failure` and
            # `task2 failure -> task1 failure` separately, as they could be different.

            count_runs = collections.Counter()
            count_single_failures = collections.Counter()
            count_both_failures = collections.Counter()

            for revisions, tasks, likely_regressions, candidate_regressions in tqdm(
                push_data
            ):
                failures = set(likely_regressions + candidate_regressions)
                all_tasks = list(set(tasks) | failures)

                for task1, task2 in itertools.combinations(sorted(all_tasks), 2):
                    count_runs[(task1, task2)] += 1

                    if task1 in failures:
                        if task2 in failures:
                            count_both_failures[(task1, task2)] += 1
                        else:
                            count_single_failures[(task1, task2)] += 1
                    elif task2 in failures:
                        count_single_failures[(task1, task2)] += 1

            stats = {}

            skipped = 0

            for couple, run_count in count_runs.most_common():
                failure_count = count_both_failures[couple]
                support = failure_count / run_count

                if support < 1 / 700:
                    skipped += 1
                    continue

                if failure_count != 0:
                    confidence = failure_count / (
                        count_single_failures[couple] + failure_count
                    )
                else:
                    confidence = 0.0

                stats[couple] = (support, confidence)

            logger.info(f"{skipped} couples skipped because their support was too low")

            logger.info("Redundancies with the highest support and confidence:")
            for couple, (support, confidence) in sorted(
                stats.items(), key=lambda k: (-k[1][1], -k[1][0])
            )[:7]:
                failure_count = count_both_failures[couple]
                run_count = count_runs[couple]
                logger.info(
                    f"{couple[0]} - {couple[1]} redundancy confidence {confidence}, support {support} ({failure_count} over {run_count})."
                )

            logger.info("Redundancies with the highest confidence and lowest support:")
            for couple, (support, confidence) in sorted(
                stats.items(), key=lambda k: (-k[1][1], k[1][0])
            )[:7]:
                failure_count = count_both_failures[couple]
                run_count = count_runs[couple]
                logger.info(
                    f"{couple[0]} - {couple[1]} redundancy confidence {confidence}, support {support} ({failure_count} over {run_count})."
                )

            failing_together = test_scheduling.get_failing_together_db()
            count_redundancies = collections.Counter()
            for couple, (support, confidence) in stats.items():
                if confidence == 1.0:
                    count_redundancies["==100%"] += 1
                if confidence > 0.9:
                    count_redundancies[">=90%"] += 1
                if confidence > 0.8:
                    count_redundancies[">=80%"] += 1
                if confidence > 0.7:
                    count_redundancies[">=70%"] += 1

                if confidence < 0.7:
                    continue

                failing_together[
                    f"{couple[0]}${couple[1]}".encode("utf-8")
                ] = struct.pack("ff", support, confidence)

            for percentage, count in count_redundancies.most_common():
                logger.info(f"{count} with {percentage} confidence")

            test_scheduling.close_failing_together_db()