示例#1
0
def extend_with_bug_commits(case_study: CaseStudy, cmap: CommitMap,
                            report_type: tp.Type['BaseReport'],
                            merge_stage: int, ignore_blocked: bool) -> None:
    """
    Extend a case study with revisions that either introduced or fixed a bug as
    determined by the given SZZ tool.

    Args:
        case_study: to extend
        cmap: commit map to map revisions to unique IDs
        ignore_blocked: ignore_blocked revisions
        merge_stage: stage the revisions will be added to
        report_type: report to use for bug detection
    """
    project_cls: tp.Type[Project] = get_project_cls_by_name(
        case_study.project_name)

    def load_bugs_from_szz_report(
        load_fun: tp.Callable[[Path], SZZReport]
    ) -> tp.Optional[tp.FrozenSet[RawBug]]:
        reports = get_processed_revisions_files(case_study.project_name,
                                                report_type)
        if not reports:
            LOG.warning(f"I could not find any {report_type} reports. "
                        "Falling back to bug provider.")
            return None
        report = load_fun(reports[0])
        return report.get_all_raw_bugs()

    bugs: tp.Optional[tp.FrozenSet[RawBug]] = None
    if report_type == SZZUnleashedReport:
        bugs = load_bugs_from_szz_report(load_szzunleashed_report)
    elif report_type == PyDrillerSZZReport:
        bugs = load_bugs_from_szz_report(load_pydriller_szz_report)
    else:
        LOG.warning(
            f"Report type {report_type} is not supported by this extender "
            f"strategy. Falling back to bug provider.")

    if bugs is None:
        bug_provider = BugProvider.get_provider_for_project(
            get_project_cls_by_name(case_study.project_name))
        bugs = bug_provider.find_raw_bugs()

    revisions: tp.Set[FullCommitHash] = set()
    for bug in bugs:
        revisions.add(bug.fixing_commit)
        revisions.update(bug.introducing_commits)

    rev_list = list(revisions)
    if ignore_blocked:
        rev_list = filter_blocked_revisions(rev_list, project_cls)

    case_study.include_revisions([(rev, cmap.time_id(rev))
                                  for rev in rev_list], merge_stage)
示例#2
0
def _get_all_issue_events(project_name: str) -> tp.List[IssueEvent]:
    """
    Loads and returns all issue events for a given project.

    Args:
        project_name: The name of the project to look in.

    Returns:
        A list of IssueEvent objects or None.
    """

    github_repo_name = get_github_repo_name_for_project(
        get_project_cls_by_name(project_name))

    if github_repo_name:

        def load_issue_events(github: Github) -> 'PaginatedList[IssueEvent]':
            if github_repo_name:
                return github.get_repo(github_repo_name).get_issues_events()

            raise AssertionError(f"{project_name} is not a github project")

        cache_file_name = github_repo_name.replace("/", "_") + "_issues_events"
        issue_events = get_cached_github_object_list(cache_file_name,
                                                     load_issue_events)

        if issue_events:
            return issue_events
        return []

    raise AssertionError(f"{project_name} is not a github project")
示例#3
0
def extend_with_release_revs(case_study: CaseStudy, cmap: CommitMap,
                             release_type: ReleaseType, ignore_blocked: bool,
                             merge_stage: int) -> None:
    """
    Extend a case study with revisions marked as a release. This extender relies
    on the project to determine appropriate revisions.

    Args:
        case_study: to extend
        cmap: commit map to map revisions to unique IDs
        ignore_blocked: ignore_blocked revisions
        merge_stage: stage the revisions will be added to
        release_type: release type to add
    """
    project_cls: tp.Type[Project] = get_project_cls_by_name(
        case_study.project_name)
    release_provider = ReleaseProvider.get_provider_for_project(project_cls)
    release_revisions: tp.List[FullCommitHash] = [
        revision for revision, release in
        release_provider.get_release_revisions(release_type)
    ]

    if ignore_blocked:
        release_revisions = filter_blocked_revisions(release_revisions,
                                                     project_cls)

    case_study.include_revisions([(rev, cmap.time_id(rev))
                                  for rev in release_revisions], merge_stage)
    def plot(self, view_mode: bool) -> None:
        """Plots bug plot for the whole project."""
        project_name = self.plot_kwargs['project']
        project_repo = get_local_project_git(project_name)

        bug_provider = BugProvider.get_provider_for_project(
            get_project_cls_by_name(project_name))
        pydriller_bugs = bug_provider.find_pygit_bugs()

        reports = get_processed_revisions_files(project_name,
                                                SZZUnleashedReport)
        szzunleashed_bugs = frozenset([
            as_pygit_bug(raw_bug, project_repo)
            for raw_bug in SZZUnleashedReport(reports[0]).get_all_raw_bugs()
        ])

        if self.__szz_tool == 'pydriller':
            self.__figure = _plot_chord_diagram_for_raw_bugs(
                project_name, project_repo, pydriller_bugs, self.__szz_tool)
        elif self.__szz_tool == 'szz_unleashed':
            self.__figure = _plot_chord_diagram_for_raw_bugs(
                project_name, project_repo, szzunleashed_bugs, self.__szz_tool)
        elif self.__szz_tool == 'szz_diff':
            self.__figure = _bug_data_diff_plot(project_name, project_repo,
                                                pydriller_bugs,
                                                szzunleashed_bugs)
        else:
            raise PlotDataEmpty
    def tabulate(self, table_format: TableFormat, wrap_table: bool) -> str:
        project_name: str = self.table_kwargs['case_study'].project_name

        bug_provider = BugProvider.get_provider_for_project(
            get_project_cls_by_name(project_name)
        )

        variables = [
            "fixing hash", "fixing message", "fixing author", "issue_number"
        ]
        pybugs = bug_provider.find_pygit_bugs()

        data_rows = [[
            pybug.fixing_commit.hex, pybug.fixing_commit.message,
            pybug.fixing_commit.author.name, pybug.issue_id
        ] for pybug in pybugs]

        bug_df = pd.DataFrame(columns=variables, data=np.array(data_rows))

        kwargs: tp.Dict[str, tp.Any] = {"bold_rows": True}
        if table_format.is_latex():
            kwargs["multicolumn_format"] = "c"
            kwargs["longtable"] = True

        return dataframe_to_table(
            bug_df, table_format, wrap_table, wrap_landscape=True, **kwargs
        )
示例#6
0
    def revisions_of_project(self) -> None:
        """Generate the Revision list for the selected project if select
        specific is enabled."""
        self.strategie_forms.setCurrentIndex(
            GenerationStrategie.SELECT_REVISION.value)
        if self.selected_project != self.revision_list_project:
            self.revision_details.setText("Loading Revisions")
            self.revision_details.repaint()
            get_local_project_git(self.selected_project).remotes[0].fetch()
            git_path = get_local_project_git_path(self.selected_project)
            initial_commit = get_initial_commit(git_path).hash
            commits = get_all_revisions_between(initial_commit, 'HEAD',
                                                FullCommitHash, git_path)
            commit_lookup_helper = create_commit_lookup_helper(
                self.selected_project)
            project = get_project_cls_by_name(self.selected_project)
            repo_name = get_primary_project_source(self.selected_project).local
            commits = map(lambda commit: CommitRepoPair(commit, repo_name),
                          commits)

            cmap = get_commit_map(self.selected_project)
            commit_model = CommitTableModel(
                list(map(commit_lookup_helper, commits)), cmap, project)
            self.proxy_model.setSourceModel(commit_model)
            self.revision_list_project = self.selected_project
            self.revision_details.clear()
            self.revision_details.update()
示例#7
0
    def project_cls(self) -> tp.Type[bb.Project]:
        """
        Look up the BenchBuild project for this case study.

        Returns:
            project class
        """
        return get_project_cls_by_name(self.project_name)
示例#8
0
def _gen_overview_data(tag_blocked: bool,
                       **kwargs: tp.Any) -> tp.Dict[str, tp.List[int]]:
    case_study: CaseStudy = kwargs["case_study"]
    project_name = case_study.project_name
    commit_map: CommitMap = get_commit_map(project_name)
    project = get_project_cls_by_name(project_name)

    if 'report_type' in kwargs:
        result_file_type: tp.Type[BaseReport] = kwargs['report_type']
    else:
        result_file_type = EmptyReport

    positions: tp.Dict[str, tp.List[int]] = {
        "background": [],
        "blocked": [],
        "blocked_all": [],
        "compile_error": [],
        "failed": [],
        "missing": [],
        "success": []
    }

    for c_hash, index in commit_map.mapping_items():
        if not case_study.has_revision(ShortCommitHash(c_hash)):
            positions["background"].append(index)
            if hasattr(project, "is_blocked_revision"
                       ) and project.is_blocked_revision(c_hash)[0]:
                positions["blocked_all"].append(index)

    revisions = FileStatusDatabase.get_data_for_project(
        project_name, ["revision", "time_id", "file_status"],
        commit_map,
        case_study,
        result_file_type=result_file_type,
        tag_blocked=tag_blocked)
    positions["success"] = (
        revisions[revisions["file_status"] == FileStatusExtension.SUCCESS.
                  get_status_extension()])["time_id"].tolist()
    positions["failed"] = (
        revisions[revisions["file_status"] == FileStatusExtension.FAILED.
                  get_status_extension()])["time_id"].tolist()
    positions["blocked"] = (
        revisions[revisions["file_status"] == FileStatusExtension.BLOCKED.
                  get_status_extension()])["time_id"].tolist()
    positions["blocked_all"].extend(
        (revisions[revisions["file_status"] == FileStatusExtension.BLOCKED.
                   get_status_extension()])["time_id"].tolist())
    positions["missing"] = (
        revisions[revisions["file_status"] == FileStatusExtension.MISSING.
                  get_status_extension()])["time_id"].tolist()
    positions["compile_error"] = (
        revisions[revisions["file_status"] == FileStatusExtension.
                  COMPILE_ERROR.get_status_extension()])["time_id"].tolist()

    return positions
    def tabulate(self, table_format: TableFormat, wrap_table: bool) -> str:
        case_studies = get_loaded_paper_config().get_all_case_studies()

        cs_data: tp.List[pd.DataFrame] = []
        for case_study in case_studies:
            project_name = case_study.project_name
            commit_map = get_commit_map(project_name)
            project_cls = get_project_cls_by_name(project_name)
            project_repo = get_local_project_git(project_name)
            project_path = project_repo.path[:-5]
            project_git = git["-C", project_path]

            revisions = sorted(
                case_study.revisions, key=commit_map.time_id, reverse=True
            )
            revision = revisions[0]
            rev_range = revision.hash if revision else "HEAD"

            cs_dict = {
                project_name: {
                    "Domain":
                        str(project_cls.DOMAIN)[0].upper() +
                        str(project_cls.DOMAIN)[1:],
                    "LOC":
                        calc_repo_loc(project_repo, rev_range),
                    "Commits":
                        int(project_git("rev-list", "--count", rev_range)),
                    "Authors":
                        len(
                            project_git("shortlog", "-s",
                                        rev_range).splitlines()
                        )
                }
            }
            if revision:
                cs_dict[project_name]["Revision"] = revision.short_hash

            cs_data.append(pd.DataFrame.from_dict(cs_dict, orient="index"))

        df = pd.concat(cs_data).sort_index()

        kwargs: tp.Dict[str, tp.Any] = {"bold_rows": True}
        if table_format.is_latex():
            kwargs["multicolumn_format"] = "c"
            kwargs["multirow"] = True

        return dataframe_to_table(
            df, table_format, wrap_table, wrap_landscape=True, **kwargs
        )
示例#10
0
 def show_project_data(self, index: QModelIndex) -> None:
     """Update the project data field."""
     project_name = index.data()
     if self.selected_project != project_name:
         self.selected_project = project_name
         project = get_project_cls_by_name(project_name)
         project_info = f"{project_name.upper()} : " \
                        f"\nDomain: {project.DOMAIN}" \
                        f"\nSource: " \
                        f"{bb.source.primary(*project.SOURCE).remote}"
         self.project_details.setText(project_info)
         self.project_details.update()
         if self.strategie_forms.currentIndex(
         ) == GenerationStrategie.SELECT_REVISION.value:
             self.revisions_of_project()
示例#11
0
def extend_with_distrib_sampling(case_study: CaseStudy, cmap: CommitMap,
                                 sampling_method: NormalSamplingMethod,
                                 merge_stage: int, num_rev: int,
                                 ignore_blocked: bool,
                                 only_code_commits: bool) -> None:
    """
    Extend a case study by sampling 'num_rev' new revisions, according to
    distribution specified with kwargs['distribution'].

    Args:
        case_study: to extend
        cmap: commit map to map revisions to unique IDs
        sampling_method: distribution to use for sampling
        merge_stage: stage the revisions will be added to
        num_rev: number of revisions to add
        ignore_blocked: ignore_blocked revisions
        only_code_commits: exclude commits which don't change code
    """
    is_blocked: tp.Callable[[ShortCommitHash, tp.Type[Project]],
                            bool] = lambda rev, _: False
    if ignore_blocked:
        is_blocked = is_revision_blocked

    is_code_commit: tp.Callable[[ShortCommitHash], bool] = lambda rev: True
    if only_code_commits:
        churn_conf = ChurnConfig.create_c_style_languages_config()
        project_git_path = get_local_project_git_path(case_study.project_name)

        def is_c_cpp_code_commit(commit: ShortCommitHash) -> bool:
            return contains_source_code(commit, project_git_path, churn_conf)

        is_code_commit = is_c_cpp_code_commit

    # Needs to be sorted so the propability distribution over the length
    # of the list is the same as the distribution over the commits age history
    project_cls = get_project_cls_by_name(case_study.project_name)
    revision_list = [
        (FullCommitHash(rev), idx)
        for rev, idx in sorted(list(cmap.mapping_items()), key=lambda x: x[1])
        if
        not case_study.has_revision_in_stage(ShortCommitHash(rev), merge_stage)
        and not is_blocked(ShortCommitHash(rev), project_cls)
        and is_code_commit(ShortCommitHash(rev))
    ]

    case_study.include_revisions(
        sampling_method.sample_n(revision_list, num_rev), merge_stage)
示例#12
0
def extend_with_smooth_revs(case_study: CaseStudy, cmap: CommitMap,
                            boundary_gradient: int, ignore_blocked: bool,
                            plot: Plot, merge_stage: int) -> None:
    """
    Extend a case study with extra revisions that could smooth plot curves. This
    can remove steep gradients that result from missing certain revisions when
    sampling.

    Args:
        case_study: to extend
        cmap: commit map to map revisions to unique IDs
        ignore_blocked: ignore_blocked revisions
        merge_stage: stage the revisions will be added to
        plot: Plot to calculate new revisions from.
        boundary_gradient: Maximal expected gradient in percent between
            two revisions
    """
    # convert input to float %
    gradient = boundary_gradient / float(100)
    print("Using boundary gradient: ", gradient)
    new_revisions = plot.calc_missing_revisions(gradient)

    if ignore_blocked:
        new_revisions = set(
            filter_blocked_revisions(
                list(new_revisions),
                get_project_cls_by_name(case_study.project_name)))

    # Remove revision that are already present in another stage.
    new_revisions = {
        rev
        for rev in new_revisions if not case_study.has_revision(rev)
    }
    if new_revisions:
        print("Found new revisions: ", new_revisions)
        case_study.include_revisions([(rev, cmap.time_id(rev))
                                      for rev in new_revisions], merge_stage)
    else:
        print("No new revisions found that where not already "
              "present in the case study.")
示例#13
0
def get_tagged_revision(
    revision: ShortCommitHash, project_name: str,
    result_file_type: tp.Type[BaseReport]
) -> FileStatusExtension:
    """
    Calculates the file status for a revision. If two files exists the newest is
    considered for detecting the status.

    Args:
        revision: the revision to get the status for
        project_name: target project
        result_file_type: the type of the result file

    Returns:
        the status for the revision
    """
    project_cls = get_project_cls_by_name(project_name)
    result_files = __get_result_files_dict(project_name, result_file_type)

    if revision not in result_files.keys():
        return FileStatusExtension.MISSING
    return __get_tag_for_revision(
        revision, result_files[revision], project_cls, result_file_type
    )
示例#14
0
def extend_with_revs_per_year(case_study: CaseStudy, cmap: CommitMap,
                              merge_stage: int, ignore_blocked: bool,
                              git_path: str, revs_per_year: int,
                              revs_year_sep: bool) -> None:
    """
    Extend a case_study with ``revs_per_year`` revisions per year.

    Args:
        case_study: to extend
        cmap: commit map to map revisions to unique IDs
        merge_stage: stage to add the new revisions to
        ignore_blocked: ignore blocked revisions'
        git_path: git path to the project
        revs_per_year:  revisions to add per year
        revs_year_sep: put revisions in separate stages for each year
    """
    def parse_int_string(string: tp.Optional[str]) -> tp.Optional[int]:
        if string is None:
            return None

        try:
            return int(string)
        except ValueError:
            return None

    def get_or_create_stage_for_year(year: int) -> int:
        stages = case_study.stages
        num_stages = len(stages)

        for stage_index in range(num_stages):
            stage_year = parse_int_string(stages[stage_index].name)

            if stage_year is None:
                continue
            if stage_year == year:
                return stage_index
            if stage_year > year:
                continue
            if stage_year < year:
                case_study.insert_empty_stage(stage_index)
                case_study.name_stage(stage_index, str(year))
                return stage_index

        case_study.insert_empty_stage(num_stages)
        case_study.name_stage(num_stages, str(year))
        return num_stages

    repo = pygit2.Repository(pygit2.discover_repository(git_path))
    last_commit = repo[repo.head.target]

    commits: tp.DefaultDict[int, tp.List[FullCommitHash]] = defaultdict(
        list)  # maps year -> list of commits
    for commit in repo.walk(last_commit.id, pygit2.GIT_SORT_TIME):
        commit_date = datetime.utcfromtimestamp(commit.commit_time)
        commits[commit_date.year].append(
            FullCommitHash.from_pygit_commit(commit))

    new_rev_items = []  # new revisions that get added to to case_study
    for year, commits_in_year in commits.items():
        samples = min(len(commits_in_year), revs_per_year)
        sample_commit_indices = sorted(
            random.sample(range(len(commits_in_year)), samples))

        for commit_index in sample_commit_indices:
            commit_hash = commits_in_year[commit_index]
            if ignore_blocked and is_revision_blocked(
                    commit_hash.to_short_commit_hash(),
                    get_project_cls_by_name(case_study.project_name)):
                continue
            time_id = cmap.time_id(commit_hash)
            new_rev_items.append((commit_hash, time_id))

        if revs_year_sep:
            merge_stage = get_or_create_stage_for_year(year)

        case_study.include_revisions(new_rev_items, merge_stage, True)
        new_rev_items.clear()
示例#15
0
def get_revisions_status_for_case_study(
    case_study: CaseStudy,
    result_file_type: tp.Type[BaseReport],
    stage_num: int = -1,
    tag_blocked: bool = True,
    experiment_type: tp.Optional[tp.Type[VersionExperiment]] = None
) -> tp.List[tp.Tuple[ShortCommitHash, FileStatusExtension]]:
    """
    Computes the file status for all revisions in this case study.

    Args:
        case_study: to work on
        result_file_type: report type of the result files
        stage_num: only consider a specific stage of the case study
        tag_blocked: if true, also blocked commits are tagged

    Returns:
        a list of (revision, status) tuples
    """
    try:
        project_cls = get_project_cls_by_name(case_study.project_name)
    except LookupError:
        # Return an empty list should a project name not exist.
        return []

    if experiment_type:
        tagged_revisions = get_tagged_experiment_specific_revisions(
            project_cls, result_file_type, tag_blocked, experiment_type)
    else:
        tagged_revisions = get_tagged_revisions(project_cls, result_file_type,
                                                tag_blocked)

    def filtered_tagged_revs(
        rev_provider: tp.Iterable[FullCommitHash]
    ) -> tp.List[tp.Tuple[ShortCommitHash, FileStatusExtension]]:
        filtered_revisions = []
        for rev in rev_provider:
            short_rev = rev.to_short_commit_hash()
            found = False
            for tagged_rev in tagged_revisions:
                if short_rev == tagged_rev[0]:
                    filtered_revisions.append(tagged_rev)
                    found = True
                    break
            if not found:
                if tag_blocked and is_revision_blocked(short_rev, project_cls):
                    filtered_revisions.append(
                        (short_rev, FileStatusExtension.BLOCKED))
                else:
                    filtered_revisions.append(
                        (short_rev, FileStatusExtension.MISSING))
        return filtered_revisions

    if stage_num == -1:
        return filtered_tagged_revs(case_study.revisions)

    if stage_num < case_study.num_stages:
        stage = case_study.stages[stage_num]
        return filtered_tagged_revs(stage.revisions)

    return []
示例#16
0
    def test_project_lookup_by_name(self) -> None:
        """Check if we can load project classes from their name."""
        grav_prj_cls = get_project_cls_by_name("gravity")

        self.assertEqual(grav_prj_cls, Gravity)