def _collect_cig_node_data(
        project_name: str,
        revision: FullCommitHash) -> tp.List[tp.Dict[str, tp.Any]]:
    churn_config = ChurnConfig.create_c_style_languages_config()
    cig = create_blame_interaction_graph(project_name,
                                         revision).commit_interaction_graph()
    commit_lookup = create_commit_lookup_helper(project_name)
    repo_lookup = get_local_project_gits(project_name)

    def filter_nodes(node: CommitRepoPair) -> bool:
        if node.commit_hash == UNCOMMITTED_COMMIT_HASH:
            return False
        return bool(commit_lookup(node))

    nodes: tp.List[tp.Dict[str, tp.Any]] = []
    for node in cig.nodes:
        node_attrs = tp.cast(CIGNodeAttrs, cig.nodes[node])
        commit = node_attrs["commit"]
        if not filter_nodes(commit):
            continue
        _, insertions, _ = calc_commit_code_churn(
            Path(repo_lookup[commit.repository_name].path), commit.commit_hash,
            churn_config)
        if insertions == 0:
            LOG.warning(f"Churn for commit {commit} is 0.")
            insertions = 1
        nodes.append(({
            "commit_hash": commit.commit_hash.hash,
            "degree": cig.degree(node),
            "insertions": insertions,
        }))
    return nodes
    def plot(self, view_mode: bool) -> None:
        case_study = self.plot_kwargs["case_study"]

        project_name = case_study.project_name
        revision = newest_processed_revision_for_case_study(
            case_study, BlameReport)
        if not revision:
            raise PlotDataEmpty()

        aig = create_blame_interaction_graph(
            project_name, revision).author_interaction_graph()

        nodes: tp.List[tp.Dict[str, tp.Any]] = []
        for node in aig.nodes:
            node_attrs = tp.cast(AIGNodeAttrs, aig.nodes[node])
            nodes.append(({
                "project": project_name,
                "author": node_attrs["author"],
                "# Interacting authors": aig.degree(node),
                "# Commits": node_attrs["num_commits"],
            }))
        data = pd.DataFrame(nodes)
        multivariate_grid("# Commits",
                          "# Interacting authors",
                          "project",
                          data,
                          global_kde=False)
예제 #3
0
    def tabulate(self, table_format: TableFormat, wrap_table: bool) -> str:
        case_study: CaseStudy = self.table_kwargs["case_study"]

        project_name: str = case_study.project_name
        revision = newest_processed_revision_for_case_study(
            case_study, BlameReport)
        if not revision:
            raise TableDataEmpty()

        blame_aig = create_blame_interaction_graph(
            project_name, revision).author_interaction_graph()
        file_aig = create_file_based_interaction_graph(
            project_name, revision).author_interaction_graph()

        blame_nodes: tp.List[tp.Dict[str, tp.Any]] = []
        for node in blame_aig.nodes:
            node_attrs = tp.cast(AIGNodeAttrs, blame_aig.nodes[node])

            blame_neighbors = set(blame_aig.successors(node)).union(
                blame_aig.predecessors(node))
            file_neighbors = set(file_aig.successors(node)).union(
                file_aig.predecessors(node))
            blame_nodes.append(({
                "author":
                f"{node_attrs['author']}",
                "blame_num_commits":
                node_attrs['num_commits'],
                "blame_node_degree":
                blame_aig.degree(node),
                "author_diff":
                len(blame_neighbors.difference(file_neighbors))
            }))
        blame_data = pd.DataFrame(blame_nodes)
        blame_data.set_index("author", inplace=True)

        file_nodes: tp.List[tp.Dict[str, tp.Any]] = []
        for node in file_aig.nodes:
            node_attrs = tp.cast(AIGNodeAttrs, file_aig.nodes[node])
            file_nodes.append(({
                "author": f"{node_attrs['author']}",
                "file_num_commits": node_attrs['num_commits'],
                "file_node_degree": file_aig.degree(node)
            }))
        file_data = pd.DataFrame(file_nodes)
        file_data.set_index("author", inplace=True)

        degree_data = blame_data.join(file_data, how="outer")

        kwargs: tp.Dict[str, tp.Any] = {}
        if table_format.is_latex():
            kwargs["index"] = True
            kwargs["multicolumn_format"] = "c"
            kwargs["multirow"] = True

        return dataframe_to_table(degree_data,
                                  table_format,
                                  wrap_table,
                                  wrap_landscape=True,
                                  **kwargs)
    def plot(self, view_mode: bool) -> None:
        case_studies = get_loaded_paper_config().get_all_case_studies()

        nodes: tp.List[tp.Dict[str, tp.Any]] = []
        project_names: tp.List[str] = []
        for case_study in case_studies:
            project_name = case_study.project_name
            added_project_name = False
            revision = newest_processed_revision_for_case_study(
                case_study, BlameReport)
            if not revision:
                continue

            caig = create_blame_interaction_graph(
                project_name, revision).commit_author_interaction_graph(
                    outgoing_interactions=True, incoming_interactions=True)

            authors = len(
                [1 for node in caig.nodes if caig.nodes[node]["author"]])

            for node in caig.nodes:
                node_attrs = tp.cast(CAIGNodeAttrs, caig.nodes[node])
                commit = node_attrs["commit"]

                if commit:
                    if not added_project_name:
                        project_names.append(project_name)
                        added_project_name = True
                    nodes.append(({
                        "Project":
                        project_name,
                        "commit":
                        commit.commit_hash,
                        "# Interacting Authors":
                        caig.degree(node) / authors
                    }))

        data = pd.DataFrame(nodes)
        ax = sns.violinplot(x="Project",
                            y="# Interacting Authors",
                            data=data,
                            order=sorted(project_names),
                            inner=None,
                            linewidth=1,
                            color=".95")
        sns.stripplot(x="Project",
                      y="# Interacting Authors",
                      data=data,
                      order=sorted(project_names),
                      alpha=.25,
                      size=3)
        ax.set_ylim(-0.1, 1.1)
        ax.set_aspect(0.3 / ax.get_data_ratio())
        ax.tick_params(axis='x', labelrotation=45)
        ax.set_xlabel(None)
예제 #5
0
    def plot(self, view_mode: bool) -> None:
        case_study = self.plot_kwargs["case_study"]

        style.use(self.plot_config.style())
        fig, axes = plt.subplots(1, 1, sharey="all")
        fig.subplots_adjust(hspace=0.5)

        fig.suptitle("Central Code")
        axes.set_title(case_study.project_name)
        axes.set_ylabel("Code Centrality")
        axes.set_xlabel("Commits")

        project_name = case_study.project_name
        revision = newest_processed_revision_for_case_study(
            case_study, BlameReport)
        if not revision:
            raise PlotDataEmpty()

        churn_config = ChurnConfig.create_c_style_languages_config()
        cig = create_blame_interaction_graph(
            project_name, revision).commit_interaction_graph()
        commit_lookup = create_commit_lookup_helper(project_name)
        repo_lookup = get_local_project_gits(project_name)

        def filter_nodes(node: CommitRepoPair) -> bool:
            if node.commit_hash == UNCOMMITTED_COMMIT_HASH:
                return False
            return bool(commit_lookup(node))

        nodes: tp.List[tp.Dict[str, tp.Any]] = []
        for node in cig.nodes:
            node_attrs = tp.cast(CIGNodeAttrs, cig.nodes[node])
            commit = node_attrs["commit"]
            if not filter_nodes(commit):
                continue
            _, insertions, _ = calc_commit_code_churn(
                Path(repo_lookup[commit.repository_name].path),
                commit.commit_hash, churn_config)
            if insertions == 0:
                LOG.warning(f"Churn for commit {commit} is 0.")
                insertions = 1
            nodes.append(({
                "commit_hash": commit.commit_hash,
                "degree": cig.degree(node),
                "insertions": insertions,
            }))

        data = pd.DataFrame(nodes)
        data["code_centrality"] = data["degree"] - data["insertions"]
        data.sort_values(by="code_centrality", inplace=True)
        centrality_scores = data.loc[:, ["commit_hash", "code_centrality"]]
        centrality_scores.sort_values(by="code_centrality", inplace=True)
        axes.plot(centrality_scores["code_centrality"].values)
        axes.set_ylim(bottom=0)
    def plot(self, view_mode: bool) -> None:
        case_study = self.plot_kwargs["case_study"]
        project_name = case_study.project_name
        revision = newest_processed_revision_for_case_study(
            case_study, BlameReport)
        if not revision:
            raise PlotDataEmpty()

        cig = create_blame_interaction_graph(
            project_name, revision).commit_interaction_graph()

        commit_lookup = create_commit_lookup_helper(project_name)
        repo_lookup = get_local_project_gits(project_name)
        code_churn_lookup = {
            repo_name:
            calc_repo_code_churn(repo,
                                 ChurnConfig.create_c_style_languages_config())
            for repo_name, repo in repo_lookup.items()
        }

        def filter_nodes(node: CommitRepoPair) -> bool:
            if node.commit_hash == UNCOMMITTED_COMMIT_HASH:
                return False
            return bool(commit_lookup(node))

        nodes: tp.List[tp.Dict[str, tp.Any]] = []
        for node in cig.nodes:
            node_attrs = tp.cast(CIGNodeAttrs, cig.nodes[node])
            commit = node_attrs["commit"]
            if not filter_nodes(commit):
                continue
            _, insertions, _ = code_churn_lookup[commit.repository_name][
                commit.commit_hash]
            nodes.append(({
                "Case Study": project_name,
                "commit_hash": commit.commit_hash,
                "Commit Size": insertions,
                "Node Degree": cig.degree(node),
            }))
        data = pd.DataFrame(nodes)
        data = apply_tukeys_fence(data, "Commit Size", 3.0)
        grid = multivariate_grid("Commit Size",
                                 "Node Degree",
                                 "Case Study",
                                 data,
                                 global_kde=False)

        ax = grid.ax_joint
        ax.axvline(data["Commit Size"].quantile(0.20),
                   color="#777777",
                   linewidth=3)
        ax.axhline(data["Node Degree"].quantile(0.80),
                   color="#777777",
                   linewidth=3)
예제 #7
0
    def save(self, plot_dir: Path, filetype: str = 'svg') -> None:
        project_name = self.plot_kwargs["project"]
        revision = self.plot_kwargs["revision"]
        cig = create_blame_interaction_graph(
            project_name, revision).commit_interaction_graph()
        nx.set_node_attributes(
            cig, {node: cig.nodes[node]["commit_hash"]
                  for node in cig.nodes}, "label")

        # pylint: disable=import-outside-toplevel
        from networkx.drawing.nx_agraph import write_dot
        write_dot(cig, plot_dir / self.plot_file_name("dot"))
예제 #8
0
def _prepare_cig_plotly(
    project_name: str, revision: FullCommitHash,
    create_node_info: tp.Callable[[NodeTy, CommitRepoPair, nx.DiGraph],
                                  NodeInfoTy],
    create_edge_info: tp.Callable[[CommitRepoPair, CommitRepoPair, int],
                                  EdgeInfoTy]
) -> tp.Tuple[tp.List[tp.Tuple[NodeTy, NodeInfoTy]], tp.List[tp.Tuple[
        NodeTy, NodeTy, EdgeInfoTy]]]:
    commit_lookup = create_commit_lookup_helper(project_name)
    cig = create_blame_interaction_graph(project_name,
                                         revision).commit_interaction_graph()

    def filter_nodes(node: CommitRepoPair) -> bool:
        if node.commit_hash == UNCOMMITTED_COMMIT_HASH:
            return False
        commit = commit_lookup(node)
        if not commit:
            return False
        # make filter configurable
        return datetime.utcfromtimestamp(commit.commit_time) >= datetime(
            2015, 1, 1)

    nodes: tp.List[tp.Tuple[NodeTy, NodeInfoTy]] = []
    node_meta: tp.Dict[NodeTy, CommitRepoPair] = {}
    for node in cig.nodes:
        node_attrs = tp.cast(CIGNodeAttrs, cig.nodes[node])
        commit = node_attrs["commit"]
        if not filter_nodes(commit):
            continue
        node_meta[node] = commit
        nodes.append((node, create_node_info(node, commit, cig)))

    nodes = sorted(
        nodes, key=lambda x: int(commit_lookup(node_meta[x[0]]).commit_time))

    edges: tp.List[tp.Tuple[NodeTy, NodeTy, EdgeInfoTy]] = []
    for source, sink in cig.edges:
        amount = tp.cast(CIGEdgeAttrs, cig[source][sink])["amount"]
        source_commit = tp.cast(CIGNodeAttrs, cig.nodes[source])["commit"]
        sink_commit = tp.cast(CIGNodeAttrs, cig.nodes[sink])["commit"]
        if not filter_nodes(source_commit) or not filter_nodes(sink_commit):
            continue
        edges.append(
            (source, sink, create_edge_info(source_commit, sink_commit,
                                            amount)))

    return nodes, edges
예제 #9
0
    def plot(self, view_mode: bool) -> None:
        case_study = self.plot_kwargs["plot_case_study"]

        style.use(self.plot_config.style())
        fig, axes = plt.subplots(1, 1, sharey="all")
        fig.subplots_adjust(hspace=0.5)

        fig.suptitle("Author Interaction Graph - Node Degrees")
        axes.set_title(case_study.project_name)
        axes.set_ylabel("Degree")
        axes.set_xlabel("Authors")

        project_name = case_study.project_name
        revision = newest_processed_revision_for_case_study(
            case_study, BlameReport)
        if not revision:
            raise PlotDataEmpty()

        aig = create_blame_interaction_graph(
            project_name, revision).author_interaction_graph()

        nodes: tp.List[tp.Dict[str, tp.Any]] = []
        for node in aig.nodes:
            node_attrs = tp.cast(AIGNodeAttrs, aig.nodes[node])
            author = node_attrs["author"]
            nodes.append(({
                "author": author,
                "node_degree": aig.degree(node),
                "node_out_degree": aig.out_degree(node),
                "node_in_degree": aig.in_degree(node),
            }))

        data = pd.DataFrame(nodes)
        node_degrees = data.loc[:, ["author", "node_degree"]]
        node_out_degrees = data.loc[:, ["author", "node_out_degree"]]
        node_in_degrees = data.loc[:, ["author", "node_in_degree"]]

        node_degrees.sort_values(by="node_degree", inplace=True)
        node_out_degrees.sort_values(by="node_out_degree", inplace=True)
        node_in_degrees.sort_values(by="node_in_degree", inplace=True)

        axes.plot(node_degrees["node_degree"].values, label="degree")
        axes.plot(node_out_degrees["node_out_degree"].values,
                  label="out_degree")
        axes.plot(node_in_degrees["node_in_degree"].values, label="in_degree")

        axes.legend()
    def test_get_author_data(self) -> None:
        """Check whether author data is retrieved correctly from the author
        interaction graph."""
        vara_cfg()['paper_config']['current_config'] = "test_casestudy_status"
        load_paper_config()

        revision = newest_processed_revision_for_case_study(
            get_paper_config().get_case_studies("xz")[0], BlameReport
        )
        assert revision
        blame_interaction_graph = create_blame_interaction_graph("xz", revision)

        self.assertEqual(blame_interaction_graph.project_name, "xz")

        aig = blame_interaction_graph.author_interaction_graph()
        author_data = get_author_data(aig, "Lasse Collin")
        self.assertEqual(author_data["node_attrs"]["author"], "Lasse Collin")
        self.assertEqual(author_data["neighbors"], set())
        self.assertEqual(0, len(author_data["in_attrs"]))
        self.assertEqual(0, len(author_data["out_attrs"]))
예제 #11
0
    def plot(self, view_mode: bool) -> None:
        case_study = self.plot_kwargs["plot_case_study"]

        style.use(self.plot_config.style())
        fig, axes = plt.subplots(1, 1, sharey="all")
        fig.subplots_adjust(hspace=0.5)

        fig.suptitle("Commit-Author Interaction Graph - # Interacting Authors")
        axes.set_title(case_study.project_name)
        axes.set_ylabel("Authors")
        axes.set_xlabel("Commits")

        project_name = case_study.project_name
        revision = newest_processed_revision_for_case_study(
            case_study, BlameReport)
        if not revision:
            raise PlotDataEmpty()

        caig = create_blame_interaction_graph(
            project_name, revision).commit_author_interaction_graph()

        nodes: tp.List[tp.Dict[str, tp.Any]] = []
        for node in caig.nodes:
            node_attrs = tp.cast(CAIGNodeAttrs, caig.nodes[node])
            commit = node_attrs["commit"]

            if commit:
                nodes.append(({
                    "commit": commit.commit_hash,
                    "num_authors": caig.degree(node)
                }))

        data = pd.DataFrame(nodes)
        num_authors = data.loc[:, ["commit", "num_authors"]]
        num_authors.sort_values(by="num_authors", inplace=True)
        axes.plot(num_authors["num_authors"].values)
    def test_blame_interaction_graph(self) -> None:
        """Test whether blame interaction graphs are created correctly."""
        vara_cfg()['paper_config']['current_config'] = "test_casestudy_status"
        load_paper_config()

        revision = newest_processed_revision_for_case_study(
            get_paper_config().get_case_studies("xz")[0], BlameReport
        )
        assert revision
        blame_interaction_graph = create_blame_interaction_graph("xz", revision)

        self.assertEqual(blame_interaction_graph.project_name, "xz")

        cig = blame_interaction_graph.commit_interaction_graph()
        self.assertEqual(124, len(cig.nodes))
        self.assertEqual(928, len(cig.edges))

        aig = blame_interaction_graph.author_interaction_graph()
        self.assertEqual(1, len(aig.nodes))
        self.assertEqual(0, len(aig.edges))

        caig = blame_interaction_graph.commit_author_interaction_graph()
        self.assertEqual(125, len(caig.nodes))
        self.assertEqual(92, len(caig.edges))
예제 #13
0
    def plot(self, view_mode: bool) -> None:
        sort = self.plot_kwargs["sort"]
        case_study = self.plot_kwargs["plot_case_study"]

        style.use(self.plot_config.style())
        fig, axes = plt.subplots(1, 1, sharey="all")
        fig.subplots_adjust(hspace=0.5)

        fig.suptitle("Commit Interaction Graph - Node Degrees")
        axes.set_title(case_study.project_name)
        axes.set_ylabel("Degree")
        xlabel = ""
        if sort == "time":
            xlabel = "Time (old to new)"
        elif sort == "degree":
            xlabel = "Commits"
        axes.set_xlabel(xlabel)

        revision = newest_processed_revision_for_case_study(
            case_study, BlameReport)
        if not revision:
            raise PlotDataEmpty()

        cig = create_blame_interaction_graph(
            case_study.project_name, revision).commit_interaction_graph()
        commit_lookup = create_commit_lookup_helper(case_study.project_name)

        def filter_nodes(node: CommitRepoPair) -> bool:
            if node.commit_hash == UNCOMMITTED_COMMIT_HASH:
                return False
            return bool(commit_lookup(node))

        def commit_time(node: CommitRepoPair) -> datetime:
            return datetime.utcfromtimestamp(commit_lookup(node).commit_time)

        nodes: tp.List[tp.Dict[str, tp.Any]] = []
        for node in cig.nodes:
            node_attrs = tp.cast(CIGNodeAttrs, cig.nodes[node])
            commit = node_attrs["commit"]
            if not filter_nodes(commit):
                continue
            nodes.append(({
                "commit_hash": commit.commit_hash,
                "commit_time": commit_time(commit),
                "node_degree": cig.degree(node),
                "node_out_degree": cig.out_degree(node),
                "node_in_degree": cig.in_degree(node),
            }))

        data = pd.DataFrame(nodes)

        if sort == "time":
            data.sort_values(by="commit_time", inplace=True)

        node_degrees = data.loc[:, ["commit_hash", "node_degree"]]
        node_out_degrees = data.loc[:, ["commit_hash", "node_out_degree"]]
        node_in_degrees = data.loc[:, ["commit_hash", "node_in_degree"]]

        if sort == "degree":
            node_degrees.sort_values(by="node_degree", inplace=True)
            node_out_degrees.sort_values(by="node_out_degree", inplace=True)
            node_in_degrees.sort_values(by="node_in_degree", inplace=True)

        axes.plot(node_degrees["node_degree"].values, label="degree")
        axes.plot(node_out_degrees["node_out_degree"].values,
                  label="out_degree")
        axes.plot(node_in_degrees["node_in_degree"].values, label="in_degree")

        axes.legend()
    def plot(self, view_mode: bool) -> None:
        case_studies = get_loaded_paper_config().get_all_case_studies()

        diff_data: tp.List[pd.DataFrame] = []
        project_names: tp.List[str] = []
        for case_study in case_studies:
            project_name = case_study.project_name
            revision = newest_processed_revision_for_case_study(
                case_study, BlameReport)
            if not revision:
                continue

            project_names.append(project_name)

            blame_aig = create_blame_interaction_graph(
                project_name, revision).author_interaction_graph()
            file_aig = create_file_based_interaction_graph(
                project_name, revision).author_interaction_graph()

            file_nodes: tp.List[tp.Dict[str, tp.Any]] = []
            for node in file_aig.nodes:
                node_attrs = tp.cast(AIGNodeAttrs, file_aig.nodes[node])

                if blame_aig.has_node(node):
                    blame_neighbors = set(blame_aig.successors(node)).union(
                        blame_aig.predecessors(node))
                else:
                    blame_neighbors = set()

                file_neighbors = set(file_aig.successors(node)).union(
                    file_aig.predecessors(node))

                file_nodes.append(({
                    "Project":
                    project_name,
                    "author":
                    f"{node_attrs['author']}",
                    "# Additional Authors":
                    len(blame_neighbors.difference(file_neighbors))
                }))
            file_data = pd.DataFrame(file_nodes)
            file_data.set_index("author", inplace=True)
            diff_data.append(file_data)

        data = pd.concat(diff_data)
        ax = sns.violinplot(x="Project",
                            y="# Additional Authors",
                            data=data,
                            order=sorted(project_names),
                            inner=None,
                            linewidth=1,
                            color=".95")
        sns.stripplot(x="Project",
                      y="# Additional Authors",
                      data=data,
                      order=sorted(project_names),
                      alpha=.25,
                      size=3)
        ax.set_ylim(bottom=0, top=1.1 * data["# Additional Authors"].max())
        ax.set_aspect(0.3 / ax.get_data_ratio())
        ax.tick_params(axis='x', labelrotation=45)
        ax.set_xlabel(None)
예제 #15
0
 def create_graph(project_name: str,
                  revision: FullCommitHash) -> nx.DiGraph:
     return create_blame_interaction_graph(
         project_name, revision).commit_author_interaction_graph()