Exemplo n.º 1
0
    def revisions_of_project(self) -> None:
        """Generate the Revision list for the selected project if select
        specific is enabled."""
        self.strategie_forms.setCurrentIndex(
            GenerationStrategie.SELECT_REVISION.value)
        if self.selected_project != self.revision_list_project:
            self.revision_details.setText("Loading Revisions")
            self.revision_details.repaint()
            get_local_project_git(self.selected_project).remotes[0].fetch()
            git_path = get_local_project_git_path(self.selected_project)
            initial_commit = get_initial_commit(git_path).hash
            commits = get_all_revisions_between(initial_commit, 'HEAD',
                                                FullCommitHash, git_path)
            commit_lookup_helper = create_commit_lookup_helper(
                self.selected_project)
            project = get_project_cls_by_name(self.selected_project)
            repo_name = get_primary_project_source(self.selected_project).local
            commits = map(lambda commit: CommitRepoPair(commit, repo_name),
                          commits)

            cmap = get_commit_map(self.selected_project)
            commit_model = CommitTableModel(
                list(map(commit_lookup_helper, commits)), cmap, project)
            self.proxy_model.setSourceModel(commit_model)
            self.revision_list_project = self.selected_project
            self.revision_details.clear()
            self.revision_details.update()
    def plot(self, view_mode: bool) -> None:
        """Plots bug plot for the whole project."""
        project_name = self.plot_kwargs['project']
        project_repo = get_local_project_git(project_name)

        bug_provider = BugProvider.get_provider_for_project(
            get_project_cls_by_name(project_name))
        pydriller_bugs = bug_provider.find_pygit_bugs()

        reports = get_processed_revisions_files(project_name,
                                                SZZUnleashedReport)
        szzunleashed_bugs = frozenset([
            as_pygit_bug(raw_bug, project_repo)
            for raw_bug in SZZUnleashedReport(reports[0]).get_all_raw_bugs()
        ])

        if self.__szz_tool == 'pydriller':
            self.__figure = _plot_chord_diagram_for_raw_bugs(
                project_name, project_repo, pydriller_bugs, self.__szz_tool)
        elif self.__szz_tool == 'szz_unleashed':
            self.__figure = _plot_chord_diagram_for_raw_bugs(
                project_name, project_repo, szzunleashed_bugs, self.__szz_tool)
        elif self.__szz_tool == 'szz_diff':
            self.__figure = _bug_data_diff_plot(project_name, project_repo,
                                                pydriller_bugs,
                                                szzunleashed_bugs)
        else:
            raise PlotDataEmpty
Exemplo n.º 3
0
    def test_get_current_branch(self):
        """Check if we can correctly retrieve the current branch of a repo."""
        repo = get_local_project_git("brotli")

        repo.checkout(repo.lookup_branch('master'))

        self.assertEqual(get_current_branch(repo.workdir), 'master')
Exemplo n.º 4
0
def _load_projects_ordered_by_year(
    current_config: PC.PaperConfig, result_file_type: tp.Type[BaseReport]
) -> tp.Dict[str, tp.Dict[int, tp.List[tp.Tuple[ShortCommitHash,
                                                FileStatusExtension]]]]:
    projects: tp.Dict[str, tp.Dict[int, tp.List[tp.Tuple[
        ShortCommitHash, FileStatusExtension]]]] = OrderedDict()

    for case_study in sorted(current_config.get_all_case_studies(),
                             key=lambda cs: (cs.project_name, cs.version)):
        processed_revisions = get_revisions_status_for_case_study(
            case_study, result_file_type)

        repo = get_local_project_git(case_study.project_name)
        revisions: tp.Dict[int, tp.List[tp.Tuple[
            ShortCommitHash, FileStatusExtension]]] = defaultdict(list)

        # dict: year -> [ (revision: str, status: FileStatusExtension) ]
        for rev, status in processed_revisions:
            commit = repo.get(rev.hash)
            commit_date = datetime.utcfromtimestamp(commit.commit_time)
            revisions[commit_date.year].append((rev, status))

        projects[case_study.project_name] = revisions

    return projects
Exemplo n.º 5
0
def _filter_commit_message_bugs(
    project_name: str,
    commit_filter_function: tp.Callable[[pygit2.Repository, pygit2.Commit],
                                        tp.Optional[PygitBug]]
) -> tp.FrozenSet[PygitBug]:
    """
    Find bugs based on commit messages using the given filter function.

    Args:
        project_name: name of the project to draw the commit history from
        commit_filter_function: function that creates and filters bugs

    Returns:
        the set of bugs created by the given filter
    """
    filtered_bugs = set()
    project_repo = get_local_project_git(project_name)

    for commit in project_repo.walk(project_repo.head.target,
                                    pygit2.GIT_SORT_TIME):
        pybug = commit_filter_function(project_repo, commit)
        if pybug:
            filtered_bugs.add(pybug)

    return frozenset(filtered_bugs)
    def tabulate(self, table_format: TableFormat, wrap_table: bool) -> str:
        case_studies = get_loaded_paper_config().get_all_case_studies()

        cs_data: tp.List[pd.DataFrame] = []
        for case_study in case_studies:
            project_name = case_study.project_name
            commit_map = get_commit_map(project_name)
            project_cls = get_project_cls_by_name(project_name)
            project_repo = get_local_project_git(project_name)
            project_path = project_repo.path[:-5]
            project_git = git["-C", project_path]

            revisions = sorted(
                case_study.revisions, key=commit_map.time_id, reverse=True
            )
            revision = revisions[0]
            rev_range = revision.hash if revision else "HEAD"

            cs_dict = {
                project_name: {
                    "Domain":
                        str(project_cls.DOMAIN)[0].upper() +
                        str(project_cls.DOMAIN)[1:],
                    "LOC":
                        calc_repo_loc(project_repo, rev_range),
                    "Commits":
                        int(project_git("rev-list", "--count", rev_range)),
                    "Authors":
                        len(
                            project_git("shortlog", "-s",
                                        rev_range).splitlines()
                        )
                }
            }
            if revision:
                cs_dict[project_name]["Revision"] = revision.short_hash

            cs_data.append(pd.DataFrame.from_dict(cs_dict, orient="index"))

        df = pd.concat(cs_data).sort_index()

        kwargs: tp.Dict[str, tp.Any] = {"bold_rows": True}
        if table_format.is_latex():
            kwargs["multicolumn_format"] = "c"
            kwargs["multirow"] = True

        return dataframe_to_table(
            df, table_format, wrap_table, wrap_landscape=True, **kwargs
        )
Exemplo n.º 7
0
def _find_corresponding_pygit_suspect_tuple(
        project_name: str,
        issue_event: IssueEvent) -> tp.Optional[PygitSuspectTuple]:
    """
    Creates a suspect tuple given an issue event.

    Partitions the commits found via git blame on the fixing commit into
    suspects (commits after bug report) and non-suspects (commits before bug
    report).

    Args:
        project_name: Name of the project to draw the fixing and introducing
            commits from.
        issue_event: The IssueEvent potentially associated with a bug.

    Returns:
        A PygitSuspectTuple if the issue event represents the closing of a bug,
        None otherwise
    """
    pygit_repo: pygit2.Repository = get_local_project_git(project_name)
    pydrill_repo = pydriller.Git(pygit_repo.path)

    if _has_closed_a_bug(issue_event) and issue_event.commit_id:
        issue_date = issue_event.issue.created_at
        fixing_commit = pygit_repo.get(issue_event.commit_id)
        pydrill_fixing_commit = pydrill_repo.get_commit(issue_event.commit_id)
        blame_dict = pydrill_repo.get_commits_last_modified_lines(
            pydrill_fixing_commit)

        non_suspect_commits = set()
        suspect_commits = set()
        for introducing_set in blame_dict.values():
            for introducing_id in introducing_set:
                issue_date = issue_event.issue.created_at.astimezone(
                    timezone.utc)
                introduction_date = pydrill_repo.get_commit(
                    introducing_id).committer_date.astimezone(timezone.utc)

                if introduction_date > issue_date:  # commit is a suspect
                    suspect_commits.add(pygit_repo.get(introducing_id))
                else:
                    non_suspect_commits.add(pygit_repo.get(introducing_id))

        return PygitSuspectTuple(fixing_commit, non_suspect_commits,
                                 suspect_commits, issue_event.issue.number,
                                 issue_event.issue.created_at,
                                 pydrill_fixing_commit.committer_date)
    return None
Exemplo n.º 8
0
def filter_non_code_changes(blame_data: pd.DataFrame,
                            project_name: str) -> pd.DataFrame:
    """
    Filter all revision from data frame that are not code change related.

    Args:
        blame_data: data to filter
        project_name: name of the project

    Returns:
        filtered data frame without rows related to non code changes
    """
    repo = get_local_project_git(project_name)
    code_related_changes = [
        x.hash for x in calc_repo_code_churn(
            repo, ChurnConfig.create_c_style_languages_config())
    ]
    return blame_data[blame_data.apply(
        lambda x: x['revision'] in code_related_changes, axis=1)]
Exemplo n.º 9
0
def _generate_graph_table(case_studies: tp.List[CaseStudy],
                          graph_generator: tp.Callable[[str, FullCommitHash],
                                                       nx.DiGraph],
                          table_format: TableFormat, wrap_table: bool) -> str:
    degree_data: tp.List[pd.DataFrame] = []
    for case_study in case_studies:
        project_name = case_study.project_name
        project_git = git["-C", get_local_project_git(project_name).path]
        revision = newest_processed_revision_for_case_study(
            case_study, BlameReport)
        if not revision:
            continue

        graph = graph_generator(project_name, revision)

        nodes: tp.List[tp.Dict[str, tp.Any]] = []
        for node in graph.nodes:
            nodes.append(({
                "node_degree": graph.degree(node),
                "node_out_degree": graph.out_degree(node),
                "node_in_degree": graph.in_degree(node),
            }))

        data = pd.DataFrame(nodes)
        degree_data.append(
            pd.DataFrame.from_dict(
                {
                    project_name: {
                        ("commits", ""):
                        int(project_git("rev-list", "--count", revision.hash)),
                        ("authors", ""):
                        len(
                            project_git("shortlog", "-s",
                                        "--all").splitlines()),
                        ("nodes", ""):
                        len(graph.nodes),
                        ("edges", ""):
                        len(graph.edges),
                        ("node degree", "mean"):
                        data["node_degree"].mean(),
                        ("node degree", "median"):
                        data["node_degree"].median(),
                        ("node degree", "min"):
                        data["node_degree"].min(),
                        ("node degree", "max"):
                        data["node_degree"].max(),
                        ("node out degree", "median"):
                        data["node_out_degree"].median(),
                        ("node out degree", "min"):
                        data["node_out_degree"].min(),
                        ("node out degree", "max"):
                        data["node_out_degree"].max(),
                        ("node in degree", "median"):
                        data["node_in_degree"].median(),
                        ("node in degree", "min"):
                        data["node_in_degree"].min(),
                        ("node in degree", "max"):
                        data["node_in_degree"].max(),
                    }
                },
                orient="index"))

    df = pd.concat(degree_data).round(2)

    kwargs: tp.Dict[str, tp.Any] = {"bold_rows": True}
    if table_format.is_latex():
        kwargs["multicolumn_format"] = "c"
        kwargs["multirow"] = True

    return dataframe_to_table(df,
                              table_format,
                              wrap_table,
                              wrap_landscape=True,
                              **kwargs)
    def _load_dataframe(cls, project_name: str, commit_map: CommitMap,
                        case_study: tp.Optional[CaseStudy],
                        **kwargs: tp.Any) -> pd.DataFrame:
        repo = get_local_project_git(project_name)
        commit_lookup = create_commit_lookup_helper(project_name)

        def create_dataframe_layout() -> pd.DataFrame:
            df_layout = pd.DataFrame(columns=cls.COLUMNS)
            df_layout = df_layout.astype(cls.COLUMN_TYPES)
            return df_layout

        def create_data_frame_for_report(
            report_paths: tp.Tuple[Path, Path]
        ) -> tp.Tuple[pd.DataFrame, str, str]:
            # Look-up commit and infos about the HEAD commit of the report
            head_report = load_blame_report(report_paths[0])
            pred_report = load_blame_report(report_paths[1])
            commit = repo.get(head_report.head_commit.hash)
            commit_date = datetime.utcfromtimestamp(commit.commit_time)
            pred_commit = repo.get(pred_report.head_commit.hash)

            diff_between_head_pred = BlameReportDiff(head_report, pred_report)

            # Calculate the total churn between pred and base commit
            code_churn = calc_code_churn(
                Path(repo.path), FullCommitHash.from_pygit_commit(pred_commit),
                FullCommitHash.from_pygit_commit(commit),
                ChurnConfig.create_c_style_languages_config())
            total_churn = code_churn[1] + code_churn[2]

            def weighted_avg(tuples: tp.List[tp.Tuple[int, int]]) -> float:
                total_sum = 0
                degree_sum = 0
                for degree, amount in tuples:
                    degree_sum += degree
                    total_sum += (degree * amount)

                return total_sum / max(1, degree_sum)

            def combine_max(tuples: tp.List[tp.Tuple[int, int]]) -> float:
                if tuples:
                    return max([x for x, y in tuples])
                return 0

            return (pd.DataFrame(
                {
                    'revision':
                    head_report.head_commit.hash,
                    'time_id':
                    commit_map.short_time_id(head_report.head_commit),
                    'churn':
                    total_churn,
                    'num_interactions':
                    count_interactions(diff_between_head_pred),
                    'num_interacting_commits':
                    count_interacting_commits(diff_between_head_pred),
                    'num_interacting_authors':
                    count_interacting_authors(diff_between_head_pred,
                                              commit_lookup),
                    "ci_degree_mean":
                    weighted_avg(
                        generate_degree_tuples(diff_between_head_pred)),
                    "author_mean":
                    weighted_avg(
                        generate_author_degree_tuples(diff_between_head_pred,
                                                      commit_lookup)),
                    "avg_time_mean":
                    weighted_avg(
                        generate_avg_time_distribution_tuples(
                            diff_between_head_pred, commit_lookup, 1)),
                    "ci_degree_max":
                    combine_max(
                        generate_degree_tuples(diff_between_head_pred)),
                    "author_max":
                    combine_max(
                        generate_author_degree_tuples(diff_between_head_pred,
                                                      commit_lookup)),
                    "avg_time_max":
                    combine_max(
                        generate_max_time_distribution_tuples(
                            diff_between_head_pred, commit_lookup, 1)),
                    'year':
                    commit_date.year,
                },
                index=[0]), id_from_paths(report_paths),
                    timestamp_from_paths(report_paths))

        report_pairs, failed_report_pairs = build_report_pairs_tuple(
            project_name, commit_map, case_study)

        # cls.CACHE_ID is set by superclass
        # pylint: disable=E1101
        data_frame = build_cached_report_table(
            cls.CACHE_ID, project_name, report_pairs, failed_report_pairs,
            create_dataframe_layout, create_data_frame_for_report,
            id_from_paths, timestamp_from_paths, compare_timestamps)

        return data_frame