Exemplo n.º 1
0
def _remove_all_result_files(ctx: click.Context, error: bool) -> None:
    """Remove all report files of the current paper_config."""
    result_folders = _find_result_dir_paths_of_projects(
        ctx.obj["case_studies"])
    for folder in result_folders:
        for res_file in folder.iterdir():
            report_file = ReportFilename(res_file.name)
            if not report_file.is_result_file():
                continue
            if ctx.obj["experiment"] and not ctx.obj[
                    "experiment"].file_belongs_to_experiment(res_file.name):
                continue
            if ctx.obj["report"] and not ctx.obj[
                    "report"].is_correct_report_type(res_file.name):
                continue

            commit_hash = report_file.commit_hash
            if any(
                    list(
                        case_study.has_revision(commit_hash)
                        for case_study in ctx.obj["case_studies"])):
                if error and not (report_file.has_status_compileerror()
                                  or report_file.has_status_failed()):
                    continue
                res_file.unlink()
Exemplo n.º 2
0
def __get_result_files_dict(
    project_name: str, result_file_type: tp.Type[BaseReport]
) -> tp.Dict[ShortCommitHash, tp.List[Path]]:
    """
    Returns a dict that maps the commit_hash to a list of all result files, of
    type result_file_type, for that commit.

    Args:
        project_name: target project
        result_file_type: the type of the result file
    """
    res_dir = Path(f"{vara_cfg()['result_dir']}/{project_name}/")

    result_files: tp.DefaultDict[ShortCommitHash, tp.List[Path]] = defaultdict(
        list
    )  # maps commit hash -> list of res files (success or fail)
    if not res_dir.exists():
        return result_files

    for res_file in res_dir.iterdir():
        report_file = ReportFilename(res_file)
        if report_file.is_result_file(
        ) and result_file_type.is_correct_report_type(res_file.name):
            commit_hash = report_file.commit_hash
            result_files[commit_hash].append(res_file)

    return result_files
Exemplo n.º 3
0
    def test_get_newest_result_files_for_case_study_fail(self) -> None:
        """Check that when we have two files, the newes one get's selected."""
        vara_cfg()['paper_config']['current_config'] = "test_revision_lookup"
        load_paper_config()

        bad_file = ReportFilename(
            'CRE-CR-brotli-brotli-21ac39f7c8_'
            '34d4d1b5-7212-4244-9adc-b19bff599cf1_success.yaml')

        now = datetime.now().timestamp()
        file_path = Path(str(
            vara_cfg()['result_dir'])) / 'brotli' / bad_file.filename
        os.utime(file_path, (now, now))

        newest_res_files = MCS.get_newest_result_files_for_case_study(
            get_paper_config().get_case_studies('brotli')[0],
            Path(vara_cfg()['result_dir'].value), CR)

        # remove unnecessary files
        filtered_newest_res_files = list(
            filter(
                lambda res_file: res_file.commit_hash == bad_file.commit_hash,
                map(lambda res_file: ReportFilename(res_file),
                    newest_res_files)))

        self.assertFalse(filtered_newest_res_files[0].uuid.endswith('42'))
Exemplo n.º 4
0
 def test_get_commit(self):
     """Check if the correct commit hash is returned."""
     self.assertEqual(
         ReportFilename(self.success_filename).commit_hash,
         ShortCommitHash("7bb9ef5f8c"))
     self.assertEqual(
         ReportFilename(self.fail_filename).commit_hash,
         ShortCommitHash("7bb9ef5f8c"))
Exemplo n.º 5
0
 def setUpClass(cls):
     """Setup file and CommitReport."""
     cls.correct_UUID = "fdb09c5a-4cee-42d8-bbdc-4afe7a7864be"
     cls.raw_filename = ("CRE-CR-foo-bar-7bb9ef5f8c_"
                         f"{cls.correct_UUID}_"
                         "success.txt")
     cls.report_filename = ReportFilename(cls.raw_filename)
     cls.broken_report_filename = ReportFilename("ThisFileIsWrong.foobar")
Exemplo n.º 6
0
    def test_file_status(self):
        """Check if the correct file status is returned for BaseReport names."""
        self.assertTrue(
            ReportFilename(self.success_filename).has_status_success())
        self.assertFalse(
            ReportFilename(self.fail_filename).has_status_success())

        self.assertTrue(ReportFilename(self.fail_filename).has_status_failed())
        self.assertFalse(
            ReportFilename(self.success_filename).has_status_failed())
Exemplo n.º 7
0
 def test_is_result_file(self):
     """Check if the result file matcher works."""
     self.assertTrue(ReportFilename(self.success_filename).is_result_file())
     self.assertTrue(ReportFilename(self.fail_filename).is_result_file())
     self.assertFalse(
         ReportFilename(self.success_filename.replace("_",
                                                      "")).is_result_file())
     self.assertFalse(
         ReportFilename(self.fail_filename.replace("-",
                                                   "")).is_result_file())
Exemplo n.º 8
0
def __get_tag_for_revision(
    revision: ShortCommitHash,
    file_list: tp.List[Path],
    project_cls: tp.Type[Project],
    result_file_type: tp.Type[BaseReport],
    tag_blocked: bool = True
) -> FileStatusExtension:
    """
    Calculates the file status for a revision.

    Args:
        revision: the revision to get the status for
        file_list: the list of result files for the revision
        project_cls: the project class the revision belongs to
        result_file_type: the report type to be considered

    Returns:
        the status for the revision
    """
    if tag_blocked and is_revision_blocked(revision, project_cls):
        return FileStatusExtension.BLOCKED

    newest_res_file = max(file_list, key=lambda x: x.stat().st_mtime)
    if result_file_type.is_correct_report_type(newest_res_file.name):
        return ReportFilename(str(newest_res_file)).file_status

    return FileStatusExtension.MISSING
Exemplo n.º 9
0
def get_newest_result_files_for_case_study(
        case_study: CaseStudy, result_dir: Path,
        report_type: tp.Type[BaseReport]) -> tp.List[Path]:
    """
    Return all result files of a specific type that belong to a given case
    study. For revision with multiple files, the newest file will be selected.

    Args:
        case_study: to load
        result_dir: to load the results from
        report_type: type of report that should be loaded

    Returns:
        list of result file paths
    """
    files_to_store: tp.Dict[ShortCommitHash, Path] = {}

    result_dir /= case_study.project_name
    if not result_dir.exists():
        return []

    for opt_res_file in result_dir.iterdir():
        report_file = ReportFilename(opt_res_file.name)
        if report_type.is_correct_report_type(report_file.filename):
            commit_hash = report_file.commit_hash
            if case_study.has_revision(commit_hash):
                current_file = files_to_store.get(commit_hash, None)
                if current_file is None:
                    files_to_store[commit_hash] = opt_res_file
                else:
                    if (current_file.stat().st_mtime <
                            opt_res_file.stat().st_mtime):
                        files_to_store[commit_hash] = opt_res_file

    return list(files_to_store.values())
Exemplo n.º 10
0
def filter_report_paths_binary(
    report_files: tp.List[Path], binary: ProjectBinaryWrapper
) -> tp.List[Path]:
    return list(
        filter(
            lambda x: ReportFilename(x).binary_name == binary.name, report_files
        )
    )
Exemplo n.º 11
0
    def test_is_result_file(self) -> None:
        """Check if the result file matcher works."""
        self.assertTrue(self.commit_report_success.filename.is_result_file())
        self.assertTrue(self.commit_report_fail.filename.is_result_file())

        self.assertFalse(
            ReportFilename(
                self.commit_report_success.filename.filename.replace("_", "")
            ).is_result_file()
        )
        self.assertFalse(
            ReportFilename(
                self.commit_report_success.filename.filename.replace("-", "")
            ).is_result_file()
        )
        self.assertFalse(
            ReportFilename(
                self.commit_report_success.filename.filename.replace(".", "f")
            ).is_result_file()
        )
Exemplo n.º 12
0
    def cs_filter(file_name: str) -> bool:
        """
        Filter files that are not in the case study.

        Returns:
            ``True`` if a case_study is set and the commit_hash of the file
            is not part of this case_study, otherwise, ``False``.
        """
        if case_study is None:
            return False

        return not case_study.has_revision(
            ReportFilename(file_name).commit_hash)
Exemplo n.º 13
0
    def _load_dataframe(cls, project_name: str, commit_map: CommitMap,
                        case_study: tp.Optional[CaseStudy],
                        **kwargs: tp.Any) -> pd.DataFrame:
        def create_dataframe_layout() -> pd.DataFrame:
            df_layout = pd.DataFrame(columns=cls.COLUMNS)
            df_layout = df_layout.astype(cls.COLUMN_TYPES)
            return df_layout

        def create_data_frame_for_report(
                report_path: Path) -> tp.Tuple[pd.DataFrame, str, str]:
            report = load_commit_report(report_path)
            cf_head_interactions_raw = report.number_of_head_cf_interactions()
            df_head_interactions_raw = report.number_of_head_df_interactions()

            return pd.DataFrame(
                {
                    'revision':
                    report.head_commit.hash,
                    'time_id':
                    commit_map.short_time_id(report.head_commit),
                    'CFInteractions':
                    report.number_of_cf_interactions(),
                    'DFInteractions':
                    report.number_of_df_interactions(),
                    'HEAD CF Interactions':
                    cf_head_interactions_raw[0] + cf_head_interactions_raw[1],
                    'HEAD DF Interactions':
                    df_head_interactions_raw[0] + df_head_interactions_raw[1]
                },
                index=[0]), report.head_commit.hash, str(
                    report_path.stat().st_mtime_ns)

        report_files = get_processed_revisions_files(
            project_name, CommitReport,
            get_case_study_file_name_filter(case_study))

        failed_report_files = get_failed_revisions_files(
            project_name, CommitReport,
            get_case_study_file_name_filter(case_study))

        # cls.CACHE_ID is set by superclass
        # pylint: disable=E1101
        data_frame = build_cached_report_table(
            cls.CACHE_ID, project_name, report_files, failed_report_files,
            create_dataframe_layout, create_data_frame_for_report,
            lambda path: ReportFilename(path).commit_hash.hash,
            lambda path: str(path.stat().st_mtime_ns),
            lambda a, b: int(a) > int(b))

        return data_frame
def build_report_files_tuple(
    project_name: str, case_study: tp.Optional[CaseStudy]
) -> tp.Tuple[tp.Dict[ShortCommitHash, Path], tp.Dict[ShortCommitHash, Path]]:
    """
    Build the mappings between commit hash to its corresponding report file
    path, where the first mapping corresponds to commit hashes and their
    successful report files and the second mapping to commit hashes and their
    failed report files.

    Args:
        project_name: the name of the project
        case_study: the selected CaseStudy

    Returns:
        the mappings from commit hash to successful and failed report files as
        tuple
    """
    report_files: tp.Dict[ShortCommitHash, Path] = {
        ReportFilename(report).commit_hash: report
        for report in get_processed_revisions_files(
            project_name,
            BlameReport,
            get_case_study_file_name_filter(case_study)
            if case_study else lambda x: False,
        )
    }

    failed_report_files: tp.Dict[ShortCommitHash, Path] = {
        ReportFilename(report).commit_hash: report
        for report in get_failed_revisions_files(
            project_name,
            BlameReport,
            get_case_study_file_name_filter(case_study)
            if case_study else lambda x: False,
        )
    }
    return report_files, failed_report_files
Exemplo n.º 15
0
    def file_belongs_to_experiment(cls, file_name: str) -> bool:
        """
        Checks if the file belongs to this experiment.

        Args:
            file_name: name of the file to check

        Returns:
            True, if the file belongs to this experiment type
        """
        try:
            other_short_hand = ReportFilename(file_name).experiment_shorthand
            return cls.shorthand() == other_short_hand
        except ValueError:
            return False
Exemplo n.º 16
0
def _remove_old_result_files(ctx: click.Context) -> None:
    """Remove result files of wich a newer version exists."""
    result_dir = Path(str(vara_cfg()['result_dir']))
    for case_study in ctx.obj['case_studies']:
        old_files: tp.List[Path] = []
        newer_files: tp.Dict[ShortCommitHash, Path] = {}
        result_dir_cs = result_dir / case_study.project_name
        if not result_dir_cs.exists():
            continue
        for opt_res_file in result_dir_cs.iterdir():
            report_file = ReportFilename(opt_res_file.name)
            if not report_file.is_result_file():
                continue
            if ctx.obj["experiment"] and not ctx.obj[
                    "experiment"].file_belongs_to_experiment(
                        opt_res_file.name):
                continue
            if ctx.obj["report"] and not ctx.obj[
                    "report"].is_correct_report_type(opt_res_file.name):
                continue

            commit_hash = report_file.commit_hash
            if case_study.has_revision(commit_hash):
                current_file = newer_files.get(commit_hash)
                if current_file is None:
                    newer_files[commit_hash] = opt_res_file
                else:
                    if (current_file.stat().st_mtime_ns <
                            opt_res_file.stat().st_mtime_ns):
                        newer_files[commit_hash] = opt_res_file
                        old_files.append(current_file)
                    else:
                        old_files.append(opt_res_file)
        for file in old_files:
            if file.exists():
                file.unlink()
Exemplo n.º 17
0
def get_processed_revisions(
    project_name: str, result_file_type: tp.Type[BaseReport]
) -> tp.List[ShortCommitHash]:
    """
    Calculates a list of revisions of a project that have already been processed
    successfully.

    Args:
        project_name: target project
        result_file_type: the type of the result file

    Returns:
        list of correctly process revisions
    """
    return [
        ReportFilename(x.name).commit_hash
        for x in get_processed_revisions_files(project_name, result_file_type)
    ]
def _get_requested_report_paths(
    project_name: str, szz_report: SZZReport
) -> tp.Dict[ShortCommitHash, Path]:
    bugs = szz_report.get_all_raw_bugs()
    requested_report_revisions: tp.Set[ShortCommitHash] = set()
    for bug in bugs:
        requested_report_revisions.add(bug.fixing_commit.to_short_commit_hash())
        requested_report_revisions.update(
            introducer.to_short_commit_hash()
            for introducer in bug.introducing_commits
        )

    report_map: tp.Dict[ShortCommitHash, Path] = {}
    for report_path in get_processed_revisions_files(project_name, BlameReport):
        report_revision = ReportFilename(report_path).commit_hash
        if report_revision in requested_report_revisions:
            report_map[report_revision] = report_path

    return report_map
Exemplo n.º 19
0
def __get_files_with_status(
    project_name: str,
    result_file_type: tp.Type[BaseReport],
    file_statuses: tp.List[FileStatusExtension],
    file_name_filter: tp.Callable[[str], bool] = lambda x: False,
    only_newest: bool = True
) -> tp.List[Path]:
    """
    Find all file paths to revision files with given file statuses.

    Args:
        project_name: target project
        result_file_type: the type of the result file
        file_statuses: a list of statuses the files should have
        file_name_filter: optional filter to exclude certain files; returns
                          true if the file_name should not be checked
        only_newest: whether to include all result files, or only the newest;
                     if ``False``, result files for the same revision are sorted
                     descending by the file's mtime

    Returns:
        a list of file paths to matching revision files
    """
    processed_revisions_paths = []

    result_files = __get_result_files_dict(project_name, result_file_type)
    for value in result_files.values():
        sorted_res_files = sorted(
            value, key=lambda x: Path(x).stat().st_mtime, reverse=True
        )
        if only_newest:
            sorted_res_files = [sorted_res_files[0]]
        for result_file in sorted_res_files:
            if file_name_filter(result_file.name):
                continue
            if ReportFilename(result_file.name).file_status in file_statuses:
                processed_revisions_paths.append(result_file)

    return processed_revisions_paths
Exemplo n.º 20
0
def get_failed_revisions(
    project_name: str, result_file_type: tp.Type[BaseReport]
) -> tp.List[ShortCommitHash]:
    """
    Calculates a list of revisions of a project that have failed.

    Args:
        project_name: target project
        result_file_type: the type of the result file

    Returns:
        list of failed revisions
    """
    failed_revisions = []

    result_files = __get_result_files_dict(project_name, result_file_type)
    for commit_hash, value in result_files.items():
        newest_res_file = max(value, key=lambda x: Path(x).stat().st_mtime)
        if ReportFilename(newest_res_file.name).has_status_failed():
            failed_revisions.append(commit_hash)

    return failed_revisions
    def _load_dataframe(cls, project_name: str, commit_map: CommitMap,
                        case_study: tp.Optional[CaseStudy],
                        **kwargs: tp.Any) -> pd.DataFrame:
        # pylint: disable=unused-argument

        def create_dataframe_layout() -> pd.DataFrame:
            df_layout = pd.DataFrame(columns=cls.COLUMNS)
            df_layout = df_layout.astype(cls.COLUMN_TYPES)
            return df_layout

        def create_data_frame_for_report(
                report_path: Path) -> tp.Tuple[pd.DataFrame, str, str]:

            report_file_name_match = re.search(
                BlameVerifierReportDatabase.report_file_name_pattern,
                str(report_path))

            if report_file_name_match:
                report_file_name = report_file_name_match.group()
            else:
                raise RuntimeWarning(
                    "report file name could not be read from report path")

            report: tp.Union[BlameVerifierReportOpt,
                             BlameVerifierReportNoOptTBAA]

            if BlameVerifierReportOpt.is_correct_report_type(report_file_name):
                report_opt = load_blame_verifier_report_opt(report_path)
                report = report_opt
                opt_level = OptLevel.OPT.value

            elif BlameVerifierReportNoOptTBAA.is_correct_report_type(
                    report_file_name):
                report_no_opt = load_blame_verifier_report_no_opt_tbaa(
                    report_path)

                report = report_no_opt
                opt_level = OptLevel.NO_OPT.value

            else:
                raise RuntimeWarning("unknown report type")

            number_of_total_annotations = report.get_total_annotations()
            number_of_successful_annotations = \
                report.get_successful_annotations()
            number_of_failed_annotations = report.get_failed_annotations()
            number_of_undetermined_annotations \
                = report.get_undetermined_annotations()

            return pd.DataFrame(
                {
                    'revision': report.head_commit.hash,
                    'time_id': commit_map.short_time_id(report.head_commit),
                    'opt_level': opt_level,
                    'total': number_of_total_annotations,
                    'successful': number_of_successful_annotations,
                    'failed': number_of_failed_annotations,
                    'undetermined': number_of_undetermined_annotations
                },
                index=[0]
                # Add prefix of report name to head_commit to differentiate
                # between reports with and without optimization
            ), report.head_commit.hash + report_path.name.split(
                "-", 1)[0], str(report_path.stat().st_mtime_ns)

        report_files_opt = get_processed_revisions_files(
            project_name, BlameVerifierReportOpt,
            get_case_study_file_name_filter(case_study))

        report_files_no_opt = get_processed_revisions_files(
            project_name, BlameVerifierReportNoOptTBAA,
            get_case_study_file_name_filter(case_study))

        report_files = report_files_opt + report_files_no_opt

        failed_report_files_opt = get_failed_revisions_files(
            project_name, BlameVerifierReportOpt,
            get_case_study_file_name_filter(case_study))

        failed_report_files_no_opt = get_failed_revisions_files(
            project_name, BlameVerifierReportNoOptTBAA,
            get_case_study_file_name_filter(case_study))

        failed_report_files = \
            failed_report_files_opt + failed_report_files_no_opt

        # cls.CACHE_ID is set by superclass
        # pylint: disable=E1101
        data_frame = build_cached_report_table(
            cls.CACHE_ID, project_name, report_files, failed_report_files,
            create_dataframe_layout, create_data_frame_for_report, lambda path:
            ReportFilename(path).commit_hash.hash + path.name.split("-", 1)[0],
            lambda path: str(path.stat().st_mtime_ns),
            lambda a, b: int(a) > int(b))
        return data_frame
Exemplo n.º 22
0
 def head_commit(self) -> ShortCommitHash:
     """The current HEAD commit under which this BlameVerifierReportOpt was
     created."""
     return ReportFilename(Path(self.path)).commit_hash
Exemplo n.º 23
0
 def result_file_to_list_entry(result_file: Path) -> str:
     file_status = ReportFilename(result_file.name).file_status
     status = (file_status.get_colored_status().rjust(
         longest_file_status_extension + file_status.num_color_characters(),
         " "))
     return f"[{status}] {result_file.name}"
Exemplo n.º 24
0
 def match_revision(file_name: str) -> bool:
     return ReportFilename(
         file_name).commit_hash != revision.to_short_commit_hash()
    def _load_dataframe(
        cls, project_name: str, commit_map: CommitMap,
        case_study: tp.Optional[CaseStudy], **kwargs: tp.Any
    ) -> pd.DataFrame:
        commit_lookup = create_commit_lookup_helper(project_name)

        def create_dataframe_layout() -> pd.DataFrame:
            df_layout = pd.DataFrame(columns=cls.COLUMNS)
            df_layout = df_layout.astype(cls.COLUMN_TYPES)
            return df_layout

        def create_data_frame_for_report(
            report_path: Path
        ) -> tp.Tuple[pd.DataFrame, str, str]:
            report = load_blame_report(report_path)

            categorised_degree_occurrences = generate_lib_dependent_degrees(
                report
            )

            def calc_total_amounts() -> int:
                total = 0

                for _, lib_dict in categorised_degree_occurrences.items():
                    for _, tuple_list in lib_dict.items():
                        for degree_amount_tuple in tuple_list:
                            total += degree_amount_tuple[1]
                return total

            total_amounts_of_all_libs = calc_total_amounts()

            list_of_author_degree_occurrences = generate_author_degree_tuples(
                report, commit_lookup
            )
            author_degrees, author_amounts = _split_tuple_values_in_lists_tuple(
                list_of_author_degree_occurrences
            )
            author_total = sum(author_amounts)

            list_of_max_time_deltas = generate_max_time_distribution_tuples(
                report, commit_lookup, MAX_TIME_BUCKET_SIZE
            )
            (max_time_buckets, max_time_amounts
            ) = _split_tuple_values_in_lists_tuple(list_of_max_time_deltas)
            total_max_time_amounts = sum(max_time_amounts)

            list_of_avg_time_deltas = generate_avg_time_distribution_tuples(
                report, commit_lookup, AVG_TIME_BUCKET_SIZE
            )
            (avg_time_buckets, avg_time_amounts
            ) = _split_tuple_values_in_lists_tuple(list_of_avg_time_deltas)
            total_avg_time_amounts = sum(avg_time_amounts)

            def build_dataframe_row(
                degree_type: DegreeType,
                degree: int,
                amount: int,
                total_amount: int,
                base_library: tp.Optional[str] = None,
                inter_library: tp.Optional[str] = None
            ) -> tp.Dict[str, tp.Any]:

                data_dict: tp.Dict[str, tp.Any] = {
                    'revision': report.head_commit.hash,
                    'time_id': commit_map.short_time_id(report.head_commit),
                    'degree_type': degree_type.value,
                    'base_lib': base_library,
                    'inter_lib': inter_library,
                    'degree': degree,
                    'amount': amount,
                    'fraction': np.divide(amount, total_amount)
                }
                return data_dict

            result_data_dicts: tp.List[tp.Dict[str, tp.Any]] = []

            # Append interaction rows
            for base_lib_name, inter_lib_dict \
                    in categorised_degree_occurrences.items():

                for inter_lib_name, list_of_lib_degree_amount_tuples in \
                        inter_lib_dict.items():

                    (inter_degrees,
                     inter_amounts) = _split_tuple_values_in_lists_tuple(
                         list_of_lib_degree_amount_tuples
                     )

                    for i, _ in enumerate(inter_degrees):
                        degree = inter_degrees[i]
                        lib_amount = inter_amounts[i]

                        interaction_data_dict = build_dataframe_row(
                            degree_type=DegreeType.INTERACTION,
                            degree=degree,
                            amount=lib_amount,
                            total_amount=total_amounts_of_all_libs,
                            base_library=base_lib_name,
                            inter_library=inter_lib_name,
                        )
                        result_data_dicts.append(interaction_data_dict)

            def append_rows_of_degree_type(
                degree_type: DegreeType,
                degrees: tp.List[int],
                amounts: tp.List[int],
                sum_amounts: int,
            ) -> None:
                for k, _ in enumerate(degrees):
                    data_dict = build_dataframe_row(
                        degree_type=degree_type,
                        degree=degrees[k],
                        amount=amounts[k],
                        total_amount=sum_amounts
                    )
                    result_data_dicts.append(data_dict)

            # Append author rows
            append_rows_of_degree_type(
                degree_type=DegreeType.AUTHOR,
                degrees=author_degrees,
                amounts=author_amounts,
                sum_amounts=author_total
            )

            # Append max_time rows
            append_rows_of_degree_type(
                degree_type=DegreeType.MAX_TIME,
                degrees=max_time_buckets,
                amounts=max_time_amounts,
                sum_amounts=total_max_time_amounts
            )

            # Append avg_time rows
            append_rows_of_degree_type(
                degree_type=DegreeType.AVG_TIME,
                degrees=avg_time_buckets,
                amounts=avg_time_amounts,
                sum_amounts=total_avg_time_amounts
            )

            return pd.DataFrame(result_data_dicts
                               ), report.head_commit.hash, str(
                                   report_path.stat().st_mtime_ns
                               )

        report_files = get_processed_revisions_files(
            project_name, BlameReport,
            get_case_study_file_name_filter(case_study)
        )

        failed_report_files = get_failed_revisions_files(
            project_name, BlameReport,
            get_case_study_file_name_filter(case_study)
        )

        # cls.CACHE_ID is set by superclass
        # pylint: disable=E1101
        data_frame = build_cached_report_table(
            cls.CACHE_ID, project_name, report_files, failed_report_files,
            create_dataframe_layout, create_data_frame_for_report,
            lambda path: ReportFilename(path).commit_hash.hash,
            lambda path: str(path.stat().st_mtime_ns),
            lambda a, b: int(a) > int(b)
        )

        return data_frame
Exemplo n.º 26
0
 def file_name_filter(file_name: str) -> bool:
     file_commit_hash = ReportFilename(file_name).commit_hash
     return not file_commit_hash == commit_hash
    def _load_dataframe(cls, project_name: str, commit_map: CommitMap,
                        case_study: tp.Optional[CaseStudy],
                        **kwargs: tp.Any) -> pd.DataFrame:
        def create_dataframe_layout() -> pd.DataFrame:
            df_layout = pd.DataFrame(columns=cls.COLUMNS)
            df_layout = df_layout.astype(cls.COLUMN_TYPES)
            return df_layout

        def create_data_frame_for_report(
                report_path: Path) -> tp.Tuple[pd.DataFrame, str, str]:
            report = load_blame_report(report_path)
            base_inter_c_repo_pair_mapping = \
                gen_base_to_inter_commit_repo_pair_mapping(report)

            def build_dataframe_row(base_hash: FullCommitHash,
                                    base_library: str,
                                    inter_hash: FullCommitHash,
                                    inter_library: str,
                                    amount: int) -> tp.Dict[str, tp.Any]:

                data_dict: tp.Dict[str, tp.Any] = {
                    'revision': report.head_commit.hash,
                    'time_id': commit_map.short_time_id(report.head_commit),
                    'base_hash': base_hash.hash,
                    'base_lib': base_library,
                    'inter_hash': inter_hash.hash,
                    'inter_lib': inter_library,
                    'amount': amount
                }
                return data_dict

            result_data_dicts: tp.List[tp.Dict[str, tp.Any]] = []

            for base_pair in base_inter_c_repo_pair_mapping:
                inter_pair_amount_dict = base_inter_c_repo_pair_mapping[
                    base_pair]

                for inter_pair in inter_pair_amount_dict:
                    result_data_dicts.append(
                        build_dataframe_row(
                            base_hash=base_pair.commit.commit_hash,
                            base_library=base_pair.commit.repository_name,
                            inter_hash=inter_pair.commit.commit_hash,
                            inter_library=inter_pair.commit.repository_name,
                            amount=inter_pair_amount_dict[inter_pair]))

            return pd.DataFrame(
                result_data_dicts), report.head_commit.hash, str(
                    report_path.stat().st_mtime_ns)

        report_files = get_processed_revisions_files(
            project_name, BlameReport,
            get_case_study_file_name_filter(case_study))

        failed_report_files = get_failed_revisions_files(
            project_name, BlameReport,
            get_case_study_file_name_filter(case_study))

        # cls.CACHE_ID is set by superclass
        # pylint: disable=E1101
        data_frame = build_cached_report_table(
            cls.CACHE_ID, project_name, report_files, failed_report_files,
            create_dataframe_layout, create_data_frame_for_report,
            lambda path: ReportFilename(path).commit_hash.hash,
            lambda path: str(path.stat().st_mtime_ns),
            lambda a, b: int(a) > int(b))

        return data_frame