Exemplo n.º 1
0
 def append_bug_fixing_commits(bug_id, type_):
     for commit in commit_map[bug_id]:
         bug_fixing_commits.append(
             {
                 "mercurial_rev": commit["node"],
                 "git_rev": vcs_map.mercurial_to_git(commit["node"]),
                 "type": type_,
             }
         )
Exemplo n.º 2
0
 def append_commits_to_ignore(commits, type_):
     for commit in commits:
         commits_to_ignore.append(
             {
                 "mercurial_rev": commit.node,
                 "git_rev": vcs_map.mercurial_to_git(commit.node),
                 "type": type_,
             }
         )
Exemplo n.º 3
0
    def classify_methods(self, commit):
        # Get commit hash from 4 months before the analysis time.
        # The method-level analyzer needs 4 months of history.
        stop_hash = None
        four_months_ago = datetime.utcnow() - relativedelta(months=4)
        for commit in repository.get_commits():
            if dateutil.parser.parse(commit["pushdate"]) >= four_months_ago:
                stop_hash = tuple(
                    vcs_map.mercurial_to_git(self.git_repo_dir,
                                             [commit["node"]]))[0]
                break
        assert stop_hash is not None

        p = subprocess.run(
            [
                "git",
                "rev-list",
                "-n",
                "1",
                "HEAD",
            ],
            check=True,
            capture_output=True,
            cwd=self.git_repo_dir,
        )

        start_hash = p.stdout.decode().strip()

        # Run the method-level analyzer.
        subprocess.run(
            [
                "python3",
                "tester.py",
                "--repo",
                self.git_repo_dir,
                "--start",
                start_hash,
                "--stop",
                stop_hash,
                "--output",
                os.path.abspath("method_level.csv"),
            ],
            cwd=self.method_defect_predictor_dir,
        )

        method_level_results = []
        try:
            with open("method_level.csv", "r") as f:
                reader = csv.DictReader(f)
                for item in reader:
                    item["past_bugs"] = []
                    method_level_results.append(item)
        except FileNotFoundError:
            # No methods were classified.
            pass

        for method_level_result in method_level_results:
            method_level_result_path = method_level_result["file_name"]
            if method_level_result_path not in self.past_bugs_by_function:
                continue

            for path, functions in commit["functions"].items():
                if method_level_result_path != path:
                    continue

                for function_name, _, _ in functions:
                    if function_name not in self.past_bugs_by_function[path]:
                        continue

                    if method_level_result["method_name"].endswith(
                            function_name):
                        method_level_result["past_bugs"] = list(
                            self.past_bugs_by_function[path][function_name]
                            ["bugs"])

        with open("method_level.json", "w") as f:
            json.dump(method_level_results, f)
Exemplo n.º 4
0
    def apply_phab(self, hg, phabricator_deployment, diff_id):
        if phabricator_deployment == PHAB_PROD:
            api_key = get_secret("PHABRICATOR_TOKEN")
            url = get_secret("PHABRICATOR_URL")
        else:
            api_key = get_secret("PHABRICATOR_DEV_TOKEN")
            url = get_secret("PHABRICATOR_DEV_URL")

        phabricator_api = PhabricatorAPI(api_key=api_key, url=url)

        # Get the stack of patches
        stack = phabricator_api.load_patches_stack(diff_id)
        assert len(stack) > 0, "No patches to apply"

        # Find the first unknown base revision
        needed_stack = []
        revisions = {}
        for patch in reversed(stack):
            needed_stack.insert(0, patch)

            # Stop as soon as a base revision is available
            if self.has_revision(hg, patch.base_revision):
                logger.info(
                    f"Stopping at diff {patch.id} and revision {patch.base_revision}"
                )
                break

        if not needed_stack:
            logger.info("All the patches are already applied")
            return

        # Load all the diff revisions
        diffs = phabricator_api.search_diffs(diff_phid=[p.phid for p in stack])
        revisions = {
            diff["phid"]:
            phabricator_api.load_revision(rev_phid=diff["revisionPHID"],
                                          attachments={"reviewers": True})
            for diff in diffs
        }

        # Update repo to base revision
        hg_base = needed_stack[0].base_revision
        if not self.has_revision(hg, hg_base):
            logger.warning(
                "Missing base revision {} from Phabricator".format(hg_base))
            hg_base = "tip"

        if hg_base:
            hg.update(rev=hg_base, clean=True)
            logger.info(f"Updated repo to {hg_base}")

            if self.git_repo_dir and hg_base != "tip":
                try:
                    self.git_base = tuple(
                        vcs_map.mercurial_to_git(self.git_repo_dir,
                                                 [hg_base]))[0]
                    subprocess.run(
                        [
                            "git", "checkout", "-b", "analysis_branch",
                            self.git_base
                        ],
                        check=True,
                        cwd=self.git_repo_dir,
                    )
                    logger.info(f"Updated git repo to {self.git_base}")
                except Exception as e:
                    logger.info(
                        f"Updating git repo to Mercurial {hg_base} failed: {e}"
                    )

        def load_user(phid):
            if phid.startswith("PHID-USER"):
                return phabricator_api.load_user(user_phid=phid)
            elif phid.startswith("PHID-PROJ"):
                # TODO: Support group reviewers somehow.
                logger.info(f"Skipping group reviewer {phid}")
            else:
                raise Exception(f"Unsupported reviewer {phid}")

        for patch in needed_stack:
            revision = revisions[patch.phid]

            message = "{}\n\n{}".format(revision["fields"]["title"],
                                        revision["fields"]["summary"])

            author_name = None
            author_email = None

            if patch.commits:
                author_name = patch.commits[0]["author"]["name"]
                author_email = patch.commits[0]["author"]["email"]

            if author_name is None:
                author = load_user(revision["fields"]["authorPHID"])
                author_name = author["fields"]["realName"]
                # XXX: Figure out a way to know the email address of the author.
                author_email = author["fields"]["username"]

            reviewers = list(
                filter(
                    None,
                    (load_user(reviewer["reviewerPHID"]) for reviewer in
                     revision["attachments"]["reviewers"]["reviewers"]),
                ))
            reviewers = set(reviewer["fields"]["username"]
                            for reviewer in reviewers)

            if len(reviewers):
                message = replace_reviewers(message, reviewers)

            logger.info(
                f"Applying {patch.phid} from revision {revision['id']}: {message}"
            )

            hg.import_(
                patches=io.BytesIO(patch.patch.encode("utf-8")),
                message=message.encode("utf-8"),
                user=f"{author_name} <{author_email}>".encode("utf-8"),
            )

            if self.git_repo_dir:
                patch_proc = subprocess.Popen(
                    ["patch", "-p1", "--no-backup-if-mismatch", "--force"],
                    stdin=subprocess.PIPE,
                    cwd=self.git_repo_dir,
                )
                patch_proc.communicate(patch.patch.encode("utf-8"))
                assert patch_proc.returncode == 0, "Failed to apply patch"

                subprocess.run(
                    [
                        "git",
                        "-c",
                        f"user.name={author_name}",
                        "-c",
                        f"user.email={author_email}",
                        "commit",
                        "-am",
                        message,
                    ],
                    check=True,
                    cwd=self.git_repo_dir,
                )
Exemplo n.º 5
0
 def mercurial_to_git(rev):
     if tokenized:
         return self.mercurial_to_tokenized_git[rev]
     else:
         return vcs_map.mercurial_to_git(rev)
Exemplo n.º 6
0
    def apply_phab(self, hg, diff_id):
        def has_revision(revision):
            if not revision:
                return False
            try:
                hg.identify(revision)
                return True
            except hglib.error.CommandError:
                return False

        phabricator_api = PhabricatorAPI(
            api_key=get_secret("PHABRICATOR_TOKEN"),
            url=get_secret("PHABRICATOR_URL"))

        # Get the stack of patches
        stack = phabricator_api.load_patches_stack(diff_id)
        assert len(stack) > 0, "No patches to apply"

        # Find the first unknown base revision
        needed_stack = []
        revisions = {}
        for patch in reversed(stack):
            needed_stack.insert(0, patch)

            # Stop as soon as a base revision is available
            if has_revision(patch.base_revision):
                logger.info(
                    f"Stopping at diff {patch.id} and revision {patch.base_revision}"
                )
                break

        if not needed_stack:
            logger.info("All the patches are already applied")
            return

        # Load all the diff revisions
        diffs = phabricator_api.search_diffs(diff_phid=[p.phid for p in stack])
        revisions = {
            diff["phid"]:
            phabricator_api.load_revision(rev_phid=diff["revisionPHID"],
                                          attachments={"reviewers": True})
            for diff in diffs
        }

        # Update repo to base revision
        hg_base = needed_stack[0].base_revision
        if not has_revision(hg_base):
            logger.warning(
                "Missing base revision {} from Phabricator".format(hg_base))
            hg_base = "tip"

        if hg_base:
            hg.update(rev=hg_base, clean=True)
            logger.info(f"Updated repo to {hg_base}")

            try:
                self.git_base = vcs_map.mercurial_to_git(hg_base)
                subprocess.run(
                    [
                        "git", "checkout", "-b", "analysis_branch",
                        self.git_base
                    ],
                    check=True,
                    cwd=self.git_repo_dir,
                )
                logger.info(f"Updated git repo to {self.git_base}")
            except Exception as e:
                logger.info(
                    f"Updating git repo to Mercurial {hg_base} failed: {e}")

        def load_user(phid):
            if phid.startswith("PHID-USER"):
                return phabricator_api.load_user(user_phid=phid)
            elif phid.startswith("PHID-PROJ"):
                # TODO: Support group reviewers somehow.
                logger.info(f"Skipping group reviewer {phid}")
            else:
                raise Exception(f"Unsupported reviewer {phid}")

        for patch in needed_stack:
            revision = revisions[patch.phid]

            message = "{}\n\n{}".format(revision["fields"]["title"],
                                        revision["fields"]["summary"])

            author_name = None
            author_email = None

            if patch.commits:
                author_name = patch.commits[0]["author"]["name"]
                author_email = patch.commits[0]["author"]["email"]

            if author_name is None:
                author = load_user(revision["fields"]["authorPHID"])
                author_name = author["fields"]["realName"]
                # XXX: Figure out a way to know the email address of the author.
                author_email = author["fields"]["username"]

            reviewers = list(
                filter(
                    None,
                    (load_user(reviewer["reviewerPHID"]) for reviewer in
                     revision["attachments"]["reviewers"]["reviewers"]),
                ))
            reviewers = set(reviewer["fields"]["username"]
                            for reviewer in reviewers)

            if len(reviewers):
                message = replace_reviewers(message, reviewers)

            logger.info(
                f"Applying {patch.phid} from revision {revision['id']}: {message}"
            )

            hg.import_(
                patches=io.BytesIO(patch.patch.encode("utf-8")),
                message=message.encode("utf-8"),
                user=f"{author_name} <{author_email}>".encode("utf-8"),
            )

            with tempfile.TemporaryDirectory() as tmpdirname:
                temp_file = os.path.join(tmpdirname, "temp.patch")
                with open(temp_file, "w") as f:
                    f.write(patch.patch)

                subprocess.run(
                    ["git", "apply", "--3way", temp_file],
                    check=True,
                    cwd=self.git_repo_dir,
                )
                subprocess.run(
                    [
                        "git",
                        "-c",
                        f"user.name={author_name}",
                        "-c",
                        f"user.email={author_email}",
                        "commit",
                        "-am",
                        message,
                    ],
                    check=True,
                    cwd=self.git_repo_dir,
                )
Exemplo n.º 7
0
 def mercurial_to_git(revs):
     if tokenized:
         return (self.mercurial_to_tokenized_git[rev] for rev in revs)
     else:
         yield from vcs_map.mercurial_to_git(repo_dir, revs)
Exemplo n.º 8
0
    def apply_phab(self, hg, diff_id):
        def has_revision(revision):
            if not revision:
                return False
            try:
                hg.identify(revision)
                return True
            except hglib.error.CommandError:
                return False

        phabricator_api = PhabricatorAPI(
            api_key=get_secret("PHABRICATOR_TOKEN"), url=get_secret("PHABRICATOR_URL")
        )

        # Get the stack of patches
        stack = phabricator_api.load_patches_stack(diff_id)
        assert len(stack) > 0, "No patches to apply"

        # Find the first unknown base revision
        needed_stack = []
        revisions = {}
        for patch in reversed(stack):
            needed_stack.insert(0, patch)

            # Stop as soon as a base revision is available
            if has_revision(patch.base_revision):
                logger.info(
                    f"Stopping at diff {patch.id} and revision {patch.base_revision}"
                )
                break

        if not needed_stack:
            logger.info("All the patches are already applied")
            return

        # Load all the diff revisions
        diffs = phabricator_api.search_diffs(diff_phid=[p.phid for p in stack])
        revisions = {
            diff["phid"]: phabricator_api.load_revision(rev_phid=diff["revisionPHID"])
            for diff in diffs
        }

        # Update repo to base revision
        hg_base = needed_stack[0].base_revision
        if not has_revision(hg_base):
            logger.warning("Missing base revision {} from Phabricator".format(hg_base))
            hg_base = "tip"

        if hg_base:
            hg.update(rev=hg_base, clean=True)
            logger.info(f"Updated repo to {hg_base}")

            try:
                self.git_base = vcs_map.mercurial_to_git(hg_base)
                subprocess.run(
                    ["git", "checkout", "-b", "analysis_branch", self.git_base],
                    check=True,
                    cwd=self.git_repo_dir,
                )
                logger.info(f"Updated git repo to {self.git_base}")
            except Exception as e:
                logger.info(f"Updating git repo to Mercurial {hg_base} failed: {e}")

        for patch in needed_stack:
            revision = revisions[patch.phid]

            if patch.commits:
                message = patch.commits[0]["message"]
                author_name = patch.commits[0]["author"]["name"]
                author_email = patch.commits[0]["author"]["email"]
            else:
                message = revision["fields"]["title"]
                author_name = "bugbug"
                author_email = "*****@*****.**"

            logger.info(
                f"Applying {patch.phid} from revision {revision['id']}: {message}"
            )

            hg.import_(
                patches=io.BytesIO(patch.patch.encode("utf-8")),
                message=message.encode("utf-8"),
                user=f"{author_name} <{author_email}>".encode("utf-8"),
            )

            with tempfile.TemporaryDirectory() as tmpdirname:
                temp_file = os.path.join(tmpdirname, "temp.patch")
                with open(temp_file, "w") as f:
                    f.write(patch.patch)

                subprocess.run(
                    ["git", "apply", "--3way", temp_file],
                    check=True,
                    cwd=self.git_repo_dir,
                )
                subprocess.run(
                    [
                        "git",
                        "-c",
                        f"user.name={author_name}",
                        "-c",
                        f"user.email={author_email}",
                        "commit",
                        "-am",
                        message,
                    ],
                    check=True,
                    cwd=self.git_repo_dir,
                )
Exemplo n.º 9
0
    def get_commits_to_ignore(self):
        logger.info("Download previous commits to ignore...")
        db.download(IGNORED_COMMITS_DB)

        logger.info("Get previously classified commits...")
        prev_commits_to_ignore = list(db.read(IGNORED_COMMITS_DB))
        logger.info(f"Already found {len(prev_commits_to_ignore)} commits to ignore...")

        # When we already have some analyzed commits, re-analyze the last 3500 to make sure
        # we didn't miss back-outs that happened since the last analysis.
        if len(prev_commits_to_ignore) > 0:
            first_commit_to_reanalyze = (
                -3500 if len(prev_commits_to_ignore) >= 3500 else 0
            )
            rev_start = "children({})".format(
                prev_commits_to_ignore[first_commit_to_reanalyze]["rev"]
            )
        else:
            rev_start = 0

        with hglib.open(self.mercurial_repo_dir) as hg:
            revs = repository.get_revs(hg, rev_start)

        # Drop commits which are not yet present in the mercurial <-> git mapping.
        while len(revs) > 0:
            try:
                vcs_map.mercurial_to_git(revs[-1].decode("ascii"))
                break
            except Exception as e:
                if not str(e).startswith("Missing mercurial commit in the VCS map"):
                    raise

                revs.pop()

        commits = repository.hg_log_multi(self.mercurial_repo_dir, revs)

        repository.set_commits_to_ignore(self.mercurial_repo_dir, commits)

        chosen_commits = set()
        commits_to_ignore = []
        for commit in commits:
            if commit.ignored or commit.backedoutby:
                commits_to_ignore.append(
                    {
                        "rev": commit.node,
                        "type": "backedout" if commit.backedoutby else "",
                    }
                )
                chosen_commits.add(commit.node)

        logger.info(f"{len(commits_to_ignore)} new commits to ignore...")

        for prev_commit in prev_commits_to_ignore[::-1]:
            if prev_commit["rev"] not in chosen_commits:
                commits_to_ignore.append(prev_commit)
                chosen_commits.add(prev_commit["rev"])

        logger.info(f"{len(commits_to_ignore)} commits to ignore...")

        logger.info(
            "...of which {} are backed-out".format(
                sum(1 for commit in commits_to_ignore if commit["type"] == "backedout")
            )
        )

        db.write(IGNORED_COMMITS_DB, commits_to_ignore)
        zstd_compress(IGNORED_COMMITS_DB)
        db.upload(IGNORED_COMMITS_DB)