def create_data_frame_for_report(
            report_paths: tp.Tuple[Path, Path]
        ) -> tp.Tuple[pd.DataFrame, str, str]:

            head_report = load_blame_report(report_paths[0])
            pred_report = load_blame_report(report_paths[1])

            diff_report = BlameReportDiff(head_report, pred_report)

            base_inter_c_repo_pair_mapping = \
                gen_base_to_inter_commit_repo_pair_mapping(
                diff_report
            )

            def build_dataframe_row(base_hash: FullCommitHash,
                                    base_library: str,
                                    inter_hash: FullCommitHash,
                                    inter_library: str,
                                    amount: int) -> tp.Dict[str, tp.Any]:

                data_dict: tp.Dict[str, tp.Any] = {
                    'revision': head_report.head_commit.hash,
                    'time_id':
                    commit_map.short_time_id(head_report.head_commit),
                    'base_hash': base_hash.hash,
                    'base_lib': base_library,
                    'inter_hash': inter_hash.hash,
                    'inter_lib': inter_library,
                    'amount': amount
                }
                return data_dict

            result_data_dicts: tp.List[tp.Dict[str, tp.Any]] = []

            for base_pair in base_inter_c_repo_pair_mapping:
                inter_pair_amount_dict = base_inter_c_repo_pair_mapping[
                    base_pair]

                for inter_pair in inter_pair_amount_dict:
                    result_data_dicts.append(
                        build_dataframe_row(
                            base_hash=base_pair.commit.commit_hash,
                            base_library=base_pair.commit.repository_name,
                            inter_hash=inter_pair.commit.commit_hash,
                            inter_library=inter_pair.commit.repository_name,
                            amount=inter_pair_amount_dict[inter_pair]))

            return (pd.DataFrame(result_data_dicts),
                    id_from_paths(report_paths),
                    timestamp_from_paths(report_paths))
示例#2
0
def create_blame_interaction_graph(
        project_name: str, revision: FullCommitHash) -> BlameInteractionGraph:
    """
    Create a blame interaction graph for a certain project revision.

    Args:
        project_name: name of the project
        revision: project revision

    Returns:
        the blame interaction graph
    """
    file_name_filter: tp.Callable[[str], bool] = lambda x: False

    if revision:

        def match_revision(file_name: str) -> bool:
            return ReportFilename(
                file_name).commit_hash != revision.to_short_commit_hash()

        file_name_filter = match_revision

    report_files = get_processed_revisions_files(project_name, BlameReport,
                                                 file_name_filter)
    if len(report_files) == 0:
        raise LookupError(f"Found no BlameReport for project {project_name}")
    report = load_blame_report(report_files[0])
    return BlameInteractionGraph(project_name, report)
    def create_data_frame_for_report(
        report_paths: tp.Tuple[Path, Path]
    ) -> tp.Tuple[pd.DataFrame, str, str]:
        # Look-up commit and infos about the HEAD commit of the report
        fix_report = load_blame_report(report_paths[0])
        intro_report = load_blame_report(report_paths[1])
        fix_commit = commit_lookup(
            CommitRepoPair(
                commit_map.convert_to_full_or_warn(fix_report.head_commit),
                prj_src.local
            )
        )
        intro_commit = commit_lookup(
            CommitRepoPair(
                commit_map.convert_to_full_or_warn(intro_report.head_commit),
                prj_src.local
            )
        )

        fix_in, fix_out = get_interacting_commits_for_commit(
            fix_report,
            CommitRepoPair(
                FullCommitHash.from_pygit_commit(fix_commit), prj_src.local
            )
        )
        intro_in, intro_out = get_interacting_commits_for_commit(
            intro_report,
            CommitRepoPair(
                FullCommitHash.from_pygit_commit(intro_commit), prj_src.local
            )
        )

        score = _calculate_szz_quality_score(
            fix_in, fix_out, intro_in, intro_out
        )

        return (
            pd.DataFrame({
                'revision': str(fix_report.head_commit),
                'time_id': commit_map.short_time_id(fix_report.head_commit),
                'introducer': str(intro_report.head_commit),
                'score': score
            },
                         index=[0]), id_from_paths(report_paths),
            timestamp_from_paths(report_paths)
        )
        def create_data_frame_for_report(
                report_path: Path) -> tp.Tuple[pd.DataFrame, str, str]:
            report = load_blame_report(report_path)
            base_inter_c_repo_pair_mapping = \
                gen_base_to_inter_commit_repo_pair_mapping(report)

            def build_dataframe_row(base_hash: FullCommitHash,
                                    base_library: str,
                                    inter_hash: FullCommitHash,
                                    inter_library: str,
                                    amount: int) -> tp.Dict[str, tp.Any]:

                data_dict: tp.Dict[str, tp.Any] = {
                    'revision': report.head_commit.hash,
                    'time_id': commit_map.short_time_id(report.head_commit),
                    'base_hash': base_hash.hash,
                    'base_lib': base_library,
                    'inter_hash': inter_hash.hash,
                    'inter_lib': inter_library,
                    'amount': amount
                }
                return data_dict

            result_data_dicts: tp.List[tp.Dict[str, tp.Any]] = []

            for base_pair in base_inter_c_repo_pair_mapping:
                inter_pair_amount_dict = base_inter_c_repo_pair_mapping[
                    base_pair]

                for inter_pair in inter_pair_amount_dict:
                    result_data_dicts.append(
                        build_dataframe_row(
                            base_hash=base_pair.commit.commit_hash,
                            base_library=base_pair.commit.repository_name,
                            inter_hash=inter_pair.commit.commit_hash,
                            inter_library=inter_pair.commit.repository_name,
                            amount=inter_pair_amount_dict[inter_pair]))

            return pd.DataFrame(
                result_data_dicts), report.head_commit.hash, str(
                    report_path.stat().st_mtime_ns)
示例#5
0
        def create_data_frame_for_report(
                report_path: Path) -> tp.Tuple[pd.DataFrame, str, str]:
            report = load_blame_report(report_path)
            in_head_interactions = len(generate_in_head_interactions(report))
            out_head_interactions = len(generate_out_head_interactions(report))

            return pd.DataFrame(
                {
                    'revision':
                    report.head_commit.hash,
                    'time_id':
                    commit_map.short_time_id(report.head_commit),
                    'IN_HEAD_Interactions':
                    in_head_interactions,
                    'OUT_HEAD_Interactions':
                    out_head_interactions,
                    'HEAD_Interactions':
                    in_head_interactions + out_head_interactions
                },
                index=[0]), report.head_commit.hash, str(
                    report_path.stat().st_mtime_ns)
        def create_data_frame_for_report(
            report_path: Path
        ) -> tp.Tuple[pd.DataFrame, str, str]:
            report = load_blame_report(report_path)

            categorised_degree_occurrences = generate_lib_dependent_degrees(
                report
            )

            def calc_total_amounts() -> int:
                total = 0

                for _, lib_dict in categorised_degree_occurrences.items():
                    for _, tuple_list in lib_dict.items():
                        for degree_amount_tuple in tuple_list:
                            total += degree_amount_tuple[1]
                return total

            total_amounts_of_all_libs = calc_total_amounts()

            list_of_author_degree_occurrences = generate_author_degree_tuples(
                report, commit_lookup
            )
            author_degrees, author_amounts = _split_tuple_values_in_lists_tuple(
                list_of_author_degree_occurrences
            )
            author_total = sum(author_amounts)

            list_of_max_time_deltas = generate_max_time_distribution_tuples(
                report, commit_lookup, MAX_TIME_BUCKET_SIZE
            )
            (max_time_buckets, max_time_amounts
            ) = _split_tuple_values_in_lists_tuple(list_of_max_time_deltas)
            total_max_time_amounts = sum(max_time_amounts)

            list_of_avg_time_deltas = generate_avg_time_distribution_tuples(
                report, commit_lookup, AVG_TIME_BUCKET_SIZE
            )
            (avg_time_buckets, avg_time_amounts
            ) = _split_tuple_values_in_lists_tuple(list_of_avg_time_deltas)
            total_avg_time_amounts = sum(avg_time_amounts)

            def build_dataframe_row(
                degree_type: DegreeType,
                degree: int,
                amount: int,
                total_amount: int,
                base_library: tp.Optional[str] = None,
                inter_library: tp.Optional[str] = None
            ) -> tp.Dict[str, tp.Any]:

                data_dict: tp.Dict[str, tp.Any] = {
                    'revision': report.head_commit.hash,
                    'time_id': commit_map.short_time_id(report.head_commit),
                    'degree_type': degree_type.value,
                    'base_lib': base_library,
                    'inter_lib': inter_library,
                    'degree': degree,
                    'amount': amount,
                    'fraction': np.divide(amount, total_amount)
                }
                return data_dict

            result_data_dicts: tp.List[tp.Dict[str, tp.Any]] = []

            # Append interaction rows
            for base_lib_name, inter_lib_dict \
                    in categorised_degree_occurrences.items():

                for inter_lib_name, list_of_lib_degree_amount_tuples in \
                        inter_lib_dict.items():

                    (inter_degrees,
                     inter_amounts) = _split_tuple_values_in_lists_tuple(
                         list_of_lib_degree_amount_tuples
                     )

                    for i, _ in enumerate(inter_degrees):
                        degree = inter_degrees[i]
                        lib_amount = inter_amounts[i]

                        interaction_data_dict = build_dataframe_row(
                            degree_type=DegreeType.INTERACTION,
                            degree=degree,
                            amount=lib_amount,
                            total_amount=total_amounts_of_all_libs,
                            base_library=base_lib_name,
                            inter_library=inter_lib_name,
                        )
                        result_data_dicts.append(interaction_data_dict)

            def append_rows_of_degree_type(
                degree_type: DegreeType,
                degrees: tp.List[int],
                amounts: tp.List[int],
                sum_amounts: int,
            ) -> None:
                for k, _ in enumerate(degrees):
                    data_dict = build_dataframe_row(
                        degree_type=degree_type,
                        degree=degrees[k],
                        amount=amounts[k],
                        total_amount=sum_amounts
                    )
                    result_data_dicts.append(data_dict)

            # Append author rows
            append_rows_of_degree_type(
                degree_type=DegreeType.AUTHOR,
                degrees=author_degrees,
                amounts=author_amounts,
                sum_amounts=author_total
            )

            # Append max_time rows
            append_rows_of_degree_type(
                degree_type=DegreeType.MAX_TIME,
                degrees=max_time_buckets,
                amounts=max_time_amounts,
                sum_amounts=total_max_time_amounts
            )

            # Append avg_time rows
            append_rows_of_degree_type(
                degree_type=DegreeType.AVG_TIME,
                degrees=avg_time_buckets,
                amounts=avg_time_amounts,
                sum_amounts=total_avg_time_amounts
            )

            return pd.DataFrame(result_data_dicts
                               ), report.head_commit.hash, str(
                                   report_path.stat().st_mtime_ns
                               )
        def create_data_frame_for_report(
            report_paths: tp.Tuple[Path, Path]
        ) -> tp.Tuple[pd.DataFrame, str, str]:
            # Look-up commit and infos about the HEAD commit of the report
            head_report = load_blame_report(report_paths[0])
            pred_report = load_blame_report(report_paths[1])
            commit = repo.get(head_report.head_commit.hash)
            commit_date = datetime.utcfromtimestamp(commit.commit_time)
            pred_commit = repo.get(pred_report.head_commit.hash)

            diff_between_head_pred = BlameReportDiff(head_report, pred_report)

            # Calculate the total churn between pred and base commit
            code_churn = calc_code_churn(
                Path(repo.path), FullCommitHash.from_pygit_commit(pred_commit),
                FullCommitHash.from_pygit_commit(commit),
                ChurnConfig.create_c_style_languages_config())
            total_churn = code_churn[1] + code_churn[2]

            def weighted_avg(tuples: tp.List[tp.Tuple[int, int]]) -> float:
                total_sum = 0
                degree_sum = 0
                for degree, amount in tuples:
                    degree_sum += degree
                    total_sum += (degree * amount)

                return total_sum / max(1, degree_sum)

            def combine_max(tuples: tp.List[tp.Tuple[int, int]]) -> float:
                if tuples:
                    return max([x for x, y in tuples])
                return 0

            return (pd.DataFrame(
                {
                    'revision':
                    head_report.head_commit.hash,
                    'time_id':
                    commit_map.short_time_id(head_report.head_commit),
                    'churn':
                    total_churn,
                    'num_interactions':
                    count_interactions(diff_between_head_pred),
                    'num_interacting_commits':
                    count_interacting_commits(diff_between_head_pred),
                    'num_interacting_authors':
                    count_interacting_authors(diff_between_head_pred,
                                              commit_lookup),
                    "ci_degree_mean":
                    weighted_avg(
                        generate_degree_tuples(diff_between_head_pred)),
                    "author_mean":
                    weighted_avg(
                        generate_author_degree_tuples(diff_between_head_pred,
                                                      commit_lookup)),
                    "avg_time_mean":
                    weighted_avg(
                        generate_avg_time_distribution_tuples(
                            diff_between_head_pred, commit_lookup, 1)),
                    "ci_degree_max":
                    combine_max(
                        generate_degree_tuples(diff_between_head_pred)),
                    "author_max":
                    combine_max(
                        generate_author_degree_tuples(diff_between_head_pred,
                                                      commit_lookup)),
                    "avg_time_max":
                    combine_max(
                        generate_max_time_distribution_tuples(
                            diff_between_head_pred, commit_lookup, 1)),
                    'year':
                    commit_date.year,
                },
                index=[0]), id_from_paths(report_paths),
                    timestamp_from_paths(report_paths))