Пример #1
0
def test_download_support_file_missing(tmp_path, capfd):
    url = "https://index.taskcluster.net/v1/task/project.relman.bugbug.data_commits.latest/artifacts/public/commits.json.zst"
    support_filename = "support_mock.zst"
    url_support = urljoin(url, support_filename)

    db_path = tmp_path / "prova.json"
    db.register(db_path, url, 1, support_files=[support_filename])

    responses.add(
        responses.HEAD,
        url_support,
        status=404,
        headers={
            "ETag": "123",
            "Accept-Encoding": "zstd"
        },
    )

    responses.add(
        responses.GET,
        url_support,
        status=404,
        body=requests.exceptions.HTTPError("HTTP error"),
    )

    db.download_support_file(db_path, support_filename)

    out, err = capfd.readouterr()
    path = os.path.join(os.path.dirname(db_path),
                        f"{os.path.splitext(support_filename)[0]}.zst")
    assert (out.split("\n")[-2] ==
            f"{support_filename} is not yet available to download for {path}")
Пример #2
0
def test_download_support_file_zst(tmp_path, mock_zst):
    url_zst = "https://index.taskcluster.net/v1/task/project.relman.bugbug.data_commits.latest/artifacts/public/commits.json.zst"
    support_filename = "support.zst"
    url = urljoin(url_zst, support_filename)

    db_path = tmp_path / "prova.json"
    db.register(db_path, url_zst, 1, support_files=[support_filename])

    responses.add(
        responses.HEAD,
        url,
        status=200,
        headers={"ETag": "123", "Accept-Encoding": "zstd"},
    )

    tmp_zst_path = tmp_path / "prova_tmp.zst"
    mock_zst(tmp_zst_path)

    with open(tmp_zst_path, "rb") as content:
        responses.add(responses.GET, url, status=200, body=content.read())

    db.download_support_file(db_path, support_filename)

    assert os.path.exists(os.path.join(os.path.dirname(db_path), support_filename))
    assert os.path.exists(
        os.path.join(os.path.dirname(db_path), os.path.splitext(support_filename)[0])
    )
    assert os.path.exists(
        os.path.join(
            os.path.dirname(db_path),
            os.path.splitext(support_filename)[0] + ".zst.etag",
        )
    )
Пример #3
0
def boot_worker():
    # Clone autoland
    logger.info(f"Cloning mozilla autoland in {REPO_DIR}...")
    repository.clone(REPO_DIR, "https://hg.mozilla.org/integration/autoland")

    # Download test scheduling DB support files.
    logger.info("Downloading test scheduling DB support files...")
    assert (db.download_support_file(
        test_scheduling.TEST_LABEL_SCHEDULING_DB,
        test_scheduling.PAST_FAILURES_LABEL_DB,
    ) or ALLOW_MISSING_MODELS)

    assert (db.download_support_file(
        test_scheduling.TEST_GROUP_SCHEDULING_DB,
        test_scheduling.PAST_FAILURES_GROUP_DB,
    ) or ALLOW_MISSING_MODELS)

    assert (db.download_support_file(
        test_scheduling.TEST_GROUP_SCHEDULING_DB,
        test_scheduling.TOUCHED_TOGETHER_DB,
    ) or ALLOW_MISSING_MODELS)

    # Download commits DB
    logger.info("Downloading commits DB...")
    commits_db_downloaded = db.download(repository.COMMITS_DB,
                                        support_files_too=True)
    if not ALLOW_MISSING_MODELS:
        assert commits_db_downloaded

    if commits_db_downloaded:
        # And update it
        logger.info("Browsing all commits...")
        for commit in repository.get_commits():
            pass

        rev_start = "children({})".format(commit["node"])
        logger.info("Updating commits DB...")
        commits = repository.download_commits(REPO_DIR,
                                              rev_start,
                                              use_single_process=True)

        if len(commits) > 0:
            # Update the touched together DB.
            update_touched_together_gen = test_scheduling.update_touched_together(
            )
            next(update_touched_together_gen)

            update_touched_together_gen.send(commits[-1]["node"])

            try:
                update_touched_together_gen.send(None)
            except StopIteration:
                pass

    # Preload models
    bugbug_http.models.preload_models()

    logger.info("Worker boot done")
Пример #4
0
def test_download_support_file_xz(tmp_path, mock_xz):
    url_zst = "https://index.taskcluster.net/v1/task/project.relman.bugbug.data_commits.latest/artifacts/public/commits.json.zst"
    url_xz = "https://index.taskcluster.net/v1/task/project.relman.bugbug.data_commits.latest/artifacts/public/commits.json.xz"

    support_filename = "support_mock.zst"

    url_zst_support = urljoin(url_zst, support_filename)
    url_xz_support = urljoin(url_xz, f"{os.path.splitext(support_filename)[0]}.xz")

    db_path = tmp_path / "prova.json"
    db.register(db_path, url_zst, 1, support_files=[support_filename])

    responses.add(
        responses.HEAD,
        url_zst_support,
        status=404,
        headers={"ETag": "123", "Accept-Encoding": "zstd"},
    )

    responses.add(
        responses.GET,
        url_zst_support,
        status=404,
        body=requests.exceptions.HTTPError("HTTP error"),
    )

    responses.add(
        responses.HEAD,
        url_xz_support,
        status=200,
        headers={"ETag": "123", "Accept-Encoding": "xz"},
    )

    tmp_xz_path = tmp_path / "prova_tmp.xz"
    mock_xz(tmp_xz_path)

    with open(tmp_xz_path, "rb") as content:
        responses.add(responses.GET, url_xz_support, status=200, body=content.read())

    db.download_support_file(db_path, support_filename)

    assert os.path.exists(
        os.path.join(
            os.path.dirname(db_path), f"{os.path.splitext(support_filename)[0]}.xz"
        )
    )
    assert os.path.exists(
        os.path.join(os.path.dirname(db_path), os.path.splitext(support_filename)[0])
    )
    assert os.path.exists(
        os.path.join(
            os.path.dirname(db_path), os.path.splitext(support_filename)[0] + ".xz.etag"
        )
    )
Пример #5
0
def test_download_support_file_missing(tmp_path, caplog):
    url = "https://community-tc.services.mozilla.com/api/index/v1/task/project.relman.bugbug.data_commits.latest/artifacts/public/commits.json.zst"
    support_filename = "support_mock.zst"
    url_support = urljoin(url, support_filename)

    db_path = tmp_path / "prova.json"
    db.register(db_path, url, 1, support_files=[support_filename])

    responses.add(
        responses.HEAD,
        url_support,
        status=404,
        headers={
            "ETag": "123",
            "Accept-Encoding": "zstd"
        },
    )

    responses.add(
        responses.GET,
        url_support,
        status=404,
        body=requests.exceptions.HTTPError("HTTP error"),
    )

    url_fallback = url_support.replace(
        "https://community-tc.services.mozilla.com/api/index",
        "https://index.taskcluster.net",
    )

    responses.add(
        responses.HEAD,
        url_fallback,
        status=404,
        headers={
            "ETag": "123",
            "Accept-Encoding": "zstd"
        },
    )

    responses.add(
        responses.GET,
        url_fallback,
        status=404,
        body=requests.exceptions.HTTPError("HTTP error"),
    )

    db.download_support_file(db_path, support_filename)

    path = os.path.join(os.path.dirname(db_path),
                        f"{os.path.splitext(support_filename)[0]}.zst")
    expected_message = f"{support_filename} is not yet available to download for {path}"
    assert expected_message in caplog.text
Пример #6
0
def download_models():
    for model_name in MODELS_NAMES:
        utils.download_model(model_name)
        # Try loading the model
        get_model(model_name)

    db.download_support_file(
        test_scheduling.TEST_LABEL_SCHEDULING_DB,
        test_scheduling.PAST_FAILURES_LABEL_DB,
        extract=False,
    )

    db.download_support_file(
        test_scheduling.TEST_GROUP_SCHEDULING_DB,
        test_scheduling.PAST_FAILURES_GROUP_DB,
        extract=False,
    )

    db.download_support_file(
        test_scheduling.TEST_GROUP_SCHEDULING_DB,
        test_scheduling.TOUCHED_TOGETHER_DB,
        extract=False,
    )

    db.download_support_file(repository.COMMITS_DB,
                             repository.COMMIT_EXPERIENCES_DB,
                             extract=False)

    db.download(repository.COMMITS_DB, extract=False)
Пример #7
0
    def __init__(self, model_name, repo_dir, git_repo_dir, method_defect_predictor_dir):
        self.model_name = model_name
        self.repo_dir = repo_dir

        self.model = download_and_load_model(model_name)
        assert self.model is not None

        self.git_repo_dir = git_repo_dir
        if git_repo_dir:
            self.clone_git_repo("https://github.com/mozilla/gecko-dev", git_repo_dir)

        self.method_defect_predictor_dir = method_defect_predictor_dir
        if method_defect_predictor_dir:
            self.clone_git_repo(
                "https://github.com/lucapascarella/MethodDefectPredictor",
                method_defect_predictor_dir,
                "8cc47f47ffb686a29324435a0151b5fabd37f865",
            )

        if model_name == "regressor":
            self.use_test_history = False

            model_data_X_path = f"{model_name}model_data_X"
            updated = download_check_etag(
                URL.format(model_name=model_name, file_name=f"{model_data_X_path}.zst")
            )
            if updated:
                zstd_decompress(model_data_X_path)
            assert os.path.exists(model_data_X_path), "Decompressed X dataset exists"

            model_data_y_path = f"{model_name}model_data_y"
            updated = download_check_etag(
                URL.format(model_name=model_name, file_name=f"{model_data_y_path}.zst")
            )
            if updated:
                zstd_decompress(model_data_y_path)
            assert os.path.exists(model_data_y_path), "Decompressed y dataset exists"

            self.X = to_array(joblib.load(model_data_X_path))
            self.y = to_array(joblib.load(model_data_y_path))

            past_bugs_by_function_path = "data/past_bugs_by_function.pickle"
            download_check_etag(
                PAST_BUGS_BY_FUNCTION_URL, path=f"{past_bugs_by_function_path}.zst"
            )
            zstd_decompress(past_bugs_by_function_path)
            assert os.path.exists(past_bugs_by_function_path)
            with open(past_bugs_by_function_path, "rb") as f:
                self.past_bugs_by_function = pickle.load(f)

        if model_name == "testlabelselect":
            self.use_test_history = True
            assert db.download_support_file(
                test_scheduling.TEST_LABEL_SCHEDULING_DB,
                test_scheduling.PAST_FAILURES_LABEL_DB,
            )
            self.past_failures_data = test_scheduling.get_past_failures("label")

            self.testfailure_model = download_and_load_model("testfailure")
            assert self.testfailure_model is not None
Пример #8
0
def test_download_support_file_missing(tmp_path, caplog):
    url = "https://community-tc.services.mozilla.com/api/index/v1/task/project.relman.bugbug.data_commits.latest/artifacts/public/commits.json.zst"
    url_version = "https://community-tc.services.mozilla.com/api/index/v1/task/project.relman.bugbug.data_commits.latest/artifacts/public/prova.json.version"
    support_filename = "support_mock.zst"
    url_support = urljoin(url, support_filename)

    db_path = tmp_path / "prova.json"
    db.register(db_path, url, 1, support_files=[support_filename])

    responses.add(responses.GET, url_version, status=404)

    responses.add(
        responses.HEAD,
        url_support,
        status=404,
        headers={
            "ETag": "123",
            "Accept-Encoding": "zstd"
        },
    )

    responses.add(
        responses.GET,
        url_support,
        status=404,
        body=requests.exceptions.HTTPError("HTTP error"),
    )

    assert not db.download_support_file(db_path, support_filename)

    expected_message = f"Version file is not yet available to download for {db_path}"
    assert expected_message in caplog.text
Пример #9
0
def boot_worker():
    # Preload models
    bugbug_http.models.preload_models()

    # Clone mozilla central
    repo_dir = os.environ.get("BUGBUG_REPO_DIR",
                              os.path.join(tempfile.gettempdir(), "bugbug-hg"))
    logger.info(f"Cloning mozilla-central in {repo_dir}...")
    repository.clone(repo_dir)

    # Download databases
    logger.info("Downloading test scheduling DB support file...")
    assert (db.download_support_file(
        test_scheduling.TEST_LABEL_SCHEDULING_DB,
        test_scheduling.PAST_FAILURES_LABEL_DB,
    ) or ALLOW_MISSING_MODELS)

    # Download commits DB
    logger.info("Downloading commits DB...")
    commits_db_downloaded = db.download(repository.COMMITS_DB,
                                        support_files_too=True)
    if not ALLOW_MISSING_MODELS:
        assert commits_db_downloaded

    if commits_db_downloaded:
        # And update it
        logger.info("Browsing all commits...")
        for commit in repository.get_commits():
            pass

        rev_start = "children({})".format(commit["node"])
        logger.info("Updating commits DB...")
        repository.download_commits(repo_dir, rev_start)

    logger.info("Worker boot done")
Пример #10
0
    def __init__(self, model_name, cache_root, git_repo_dir,
                 method_defect_predictor_dir):
        self.model_name = model_name
        self.cache_root = cache_root

        assert os.path.isdir(
            cache_root), f"Cache root {cache_root} is not a dir."
        self.repo_dir = os.path.join(cache_root, "mozilla-central")

        self.model = self.load_model(model_name)
        assert self.model is not None

        self.git_repo_dir = git_repo_dir
        if git_repo_dir:
            self.clone_git_repo("https://github.com/mozilla/gecko-dev",
                                git_repo_dir)

        self.method_defect_predictor_dir = method_defect_predictor_dir
        if method_defect_predictor_dir:
            self.clone_git_repo(
                "https://github.com/lucapascarella/MethodDefectPredictor",
                method_defect_predictor_dir,
                "fa5269b959d8ddf7e97d1e92523bb64c17f9bbcd",
            )

        if model_name == "regressor":
            self.use_test_history = False

            model_data_X_path = f"{model_name}model_data_X"
            if not os.path.exists(model_data_X_path):
                download_check_etag(
                    URL.format(model_name=model_name,
                               file_name=f"{model_data_X_path}.zst"))
                zstd_decompress(model_data_X_path)
                assert os.path.exists(
                    model_data_X_path), "Decompressed X dataset exists"

            model_data_y_path = f"{model_name}model_data_y"
            if not os.path.exists(model_data_y_path):
                download_check_etag(
                    URL.format(model_name=model_name,
                               file_name=f"{model_data_y_path}.zst"))
                zstd_decompress(model_data_y_path)
                assert os.path.exists(
                    model_data_y_path), "Decompressed y dataset exists"

            self.X = to_array(joblib.load(model_data_X_path))
            self.y = to_array(joblib.load(model_data_y_path))

        if model_name == "testselect":
            self.use_test_history = True
            assert db.download_support_file(test_scheduling.TEST_SCHEDULING_DB,
                                            test_scheduling.PAST_FAILURES_DB)
            self.past_failures_data = test_scheduling.get_past_failures()

            self.backout_model = self.load_model("backout")
            assert self.backout_model is not None
Пример #11
0
 def download_eval_dbs(self,
                       extract: bool = True,
                       ensure_exist: bool = True) -> None:
     for eval_db, eval_files in self.eval_dbs.items():
         for eval_file in eval_files:
             if db.is_registered(eval_file):
                 assert db.download(eval_file,
                                    extract=extract) or not ensure_exist
             else:
                 assert (db.download_support_file(
                     eval_db, eval_file, extract=extract)
                         or not ensure_exist)
Пример #12
0
def test_download_support_file(tmp_path, mock_zst):
    url = "https://community-tc.services.mozilla.com/api/index/v1/task/project.bugbug.data_commits.latest/artifacts/public/prova.json.zst"
    url_version = "https://community-tc.services.mozilla.com/api/index/v1/task/project.bugbug.data_commits.latest/artifacts/public/prova.json.version"
    support_filename = "support.zst"
    url_support = urljoin(url, support_filename)

    db_path = tmp_path / "prova.json"
    db.register(db_path, url, 1, support_files=[support_filename])

    responses.add(responses.GET, url_version, status=200, body="1")

    responses.add(
        responses.HEAD,
        url_support,
        status=200,
        headers={
            "ETag": "123",
            "Accept-Encoding": "zstd"
        },
    )

    tmp_zst_path = tmp_path / "prova_tmp.zst"
    mock_zst(tmp_zst_path)

    with open(tmp_zst_path, "rb") as content:
        responses.add(responses.GET,
                      url_support,
                      status=200,
                      body=content.read())

    assert db.download_support_file(db_path, support_filename)

    assert not os.path.exists(
        os.path.join(os.path.dirname(db_path), support_filename))
    assert os.path.exists(
        os.path.join(os.path.dirname(db_path),
                     os.path.splitext(support_filename)[0]))
    assert os.path.exists(
        os.path.join(
            os.path.dirname(db_path),
            os.path.splitext(support_filename)[0] + ".zst.etag",
        ))
Пример #13
0
def download_models():
    for model_name in MODELS_NAMES:
        utils.download_model(model_name)
        # Try loading the model
        try:
            MODEL_CACHE.get(model_name)
        except FileNotFoundError:
            if ALLOW_MISSING_MODELS:
                LOGGER.info(
                    "Missing %r model, skipping because ALLOW_MISSING_MODELS is set"
                    % model_name
                )
                return None
            else:
                raise

    db.download_support_file(
        test_scheduling.TEST_LABEL_SCHEDULING_DB,
        test_scheduling.PAST_FAILURES_LABEL_DB,
        extract=False,
    )

    db.download_support_file(
        test_scheduling.TEST_GROUP_SCHEDULING_DB,
        test_scheduling.PAST_FAILURES_GROUP_DB,
        extract=False,
    )

    db.download_support_file(
        test_scheduling.TEST_GROUP_SCHEDULING_DB,
        test_scheduling.TOUCHED_TOGETHER_DB,
        extract=False,
    )

    db.download_support_file(
        repository.COMMITS_DB, repository.COMMIT_EXPERIENCES_DB, extract=False
    )

    db.download(repository.COMMITS_DB, extract=False)
Пример #14
0
    def __init__(
        self,
        model_name: str,
        repo_dir: str,
        git_repo_dir: str,
        method_defect_predictor_dir: str,
        use_single_process: bool,
        skip_feature_importance: bool,
    ):
        self.model_name = model_name
        self.repo_dir = repo_dir

        self.model = Model.load(download_model(model_name))
        assert self.model is not None

        self.git_repo_dir = git_repo_dir
        if git_repo_dir:
            self.clone_git_repo(
                "hg::https://hg.mozilla.org/mozilla-central", git_repo_dir
            )

        self.method_defect_predictor_dir = method_defect_predictor_dir
        if method_defect_predictor_dir:
            self.clone_git_repo(
                "https://github.com/lucapascarella/MethodDefectPredictor",
                method_defect_predictor_dir,
                "8cc47f47ffb686a29324435a0151b5fabd37f865",
            )

        self.use_single_process = use_single_process
        self.skip_feature_importance = skip_feature_importance

        if model_name == "regressor":
            self.use_test_history = False

            model_data_X_path = f"{model_name}model_data_X"
            updated = download_check_etag(
                URL.format(model_name=model_name, file_name=f"{model_data_X_path}.zst")
            )
            if updated:
                zstd_decompress(model_data_X_path)
            assert os.path.exists(model_data_X_path), "Decompressed X dataset exists"

            model_data_y_path = f"{model_name}model_data_y"
            updated = download_check_etag(
                URL.format(model_name=model_name, file_name=f"{model_data_y_path}.zst")
            )
            if updated:
                zstd_decompress(model_data_y_path)
            assert os.path.exists(model_data_y_path), "Decompressed y dataset exists"

            with open(model_data_X_path, "rb") as fb:
                self.X = to_array(pickle.load(fb))

            with open(model_data_y_path, "rb") as fb:
                self.y = to_array(pickle.load(fb))

            past_bugs_by_function_path = "data/past_fixed_bugs_by_function.json"
            download_check_etag(
                PAST_BUGS_BY_FUNCTION_URL, path=f"{past_bugs_by_function_path}.zst"
            )
            zstd_decompress(past_bugs_by_function_path)
            assert os.path.exists(past_bugs_by_function_path)
            with open(past_bugs_by_function_path, "r") as f:
                self.past_bugs_by_function = json.load(f)

        if model_name == "testlabelselect":
            self.use_test_history = True
            assert db.download_support_file(
                test_scheduling.TEST_LABEL_SCHEDULING_DB,
                test_scheduling.PAST_FAILURES_LABEL_DB,
            )
            self.past_failures_data = test_scheduling.get_past_failures("label", True)

            self.testfailure_model = cast(
                TestFailureModel, TestFailureModel.load(download_model("testfailure"))
            )
            assert self.testfailure_model is not None
Пример #15
0
    def __init__(
        self, model_name, cache_root, git_repo_dir, method_defect_predictor_dir
    ):
        self.model_name = model_name
        self.cache_root = cache_root

        assert os.path.isdir(cache_root), f"Cache root {cache_root} is not a dir."
        self.repo_dir = os.path.join(cache_root, "mozilla-central")

        self.model = download_and_load_model(model_name)
        assert self.model is not None

        self.git_repo_dir = git_repo_dir
        if git_repo_dir:
            self.clone_git_repo("https://github.com/mozilla/gecko-dev", git_repo_dir)

        self.method_defect_predictor_dir = method_defect_predictor_dir
        if method_defect_predictor_dir:
            self.clone_git_repo(
                "https://github.com/lucapascarella/MethodDefectPredictor",
                method_defect_predictor_dir,
                "fa5269b959d8ddf7e97d1e92523bb64c17f9bbcd",
            )

        if model_name == "regressor":
            self.use_test_history = False

            model_data_X_path = f"{model_name}model_data_X"
            updated = download_check_etag(
                URL.format(model_name=model_name, file_name=f"{model_data_X_path}.zst")
            )
            if updated:
                zstd_decompress(model_data_X_path)
            assert os.path.exists(model_data_X_path), "Decompressed X dataset exists"

            model_data_y_path = f"{model_name}model_data_y"
            updated = download_check_etag(
                URL.format(model_name=model_name, file_name=f"{model_data_y_path}.zst")
            )
            if updated:
                zstd_decompress(model_data_y_path)
            assert os.path.exists(model_data_y_path), "Decompressed y dataset exists"

            self.X = to_array(joblib.load(model_data_X_path))
            self.y = to_array(joblib.load(model_data_y_path))

            past_bugs_by_function_path = "data/past_bugs_by_function.pickle"
            download_check_etag(
                PAST_BUGS_BY_FUNCTION_URL, path=f"{past_bugs_by_function_path}.zst"
            )
            zstd_decompress(past_bugs_by_function_path)
            assert os.path.exists(past_bugs_by_function_path)
            with open(past_bugs_by_function_path, "rb") as f:
                self.past_bugs_by_function = pickle.load(f)

        if model_name == "testselect":
            self.use_test_history = True
            assert db.download_support_file(
                test_scheduling.TEST_SCHEDULING_DB, test_scheduling.PAST_FAILURES_DB
            )
            self.past_failures_data = test_scheduling.get_past_failures()

            self.testfailure_model = download_and_load_model("testfailure")
            assert self.testfailure_model is not None