def prepare_szz_data(self) -> actions.StepResult: """Prepare data needed for running SZZUnleashed.""" project: Project = self.obj run_dir = Path(project.source_of_primary).parent bug_provider = BugProvider.get_provider_for_project(type(project)) bugs = bug_provider.find_pygit_bugs() fixers_dict = {} for bug in bugs: # SZZUnleashed uses some strange timezone format that cannot be # produced by datetime, so we just fake it. def fix_date(date: datetime) -> str: return str(date) + " +0000" commitdate = fix_date( datetime.fromtimestamp(bug.fixing_commit.commit_time)) creationdate = fix_date( bug.creation_date) if bug.creation_date else commitdate resolutiondate = fix_date( bug.resolution_date) if bug.resolution_date else commitdate fixers_dict[str(bug.fixing_commit.id)] = { "hash": str(bug.fixing_commit.id), "commitdate": commitdate, "creationdate": creationdate, "resolutiondate": resolutiondate } with (run_dir / "issue_list.json").open("w") as issues_file: json.dump(fixers_dict, issues_file, indent=2) return actions.StepResult.OK
def plot(self, view_mode: bool) -> None: """Plots bug plot for the whole project.""" project_name = self.plot_kwargs['project'] project_repo = get_local_project_git(project_name) bug_provider = BugProvider.get_provider_for_project( get_project_cls_by_name(project_name)) pydriller_bugs = bug_provider.find_pygit_bugs() reports = get_processed_revisions_files(project_name, SZZUnleashedReport) szzunleashed_bugs = frozenset([ as_pygit_bug(raw_bug, project_repo) for raw_bug in SZZUnleashedReport(reports[0]).get_all_raw_bugs() ]) if self.__szz_tool == 'pydriller': self.__figure = _plot_chord_diagram_for_raw_bugs( project_name, project_repo, pydriller_bugs, self.__szz_tool) elif self.__szz_tool == 'szz_unleashed': self.__figure = _plot_chord_diagram_for_raw_bugs( project_name, project_repo, szzunleashed_bugs, self.__szz_tool) elif self.__szz_tool == 'szz_diff': self.__figure = _bug_data_diff_plot(project_name, project_repo, pydriller_bugs, szzunleashed_bugs) else: raise PlotDataEmpty
def test_basic_repo_pygit_bugs(self) -> None: """Test provider on basic_bug_detection_test_repo.""" provider = BugProvider.get_provider_for_project( BasicBugDetectionTestRepo ) pybugs = provider.find_pygit_bugs() pybug_fix_ids = set(str(pybug.fixing_commit.id) for pybug in pybugs) pybug_fix_msgs = set(pybug.fixing_commit.message for pybug in pybugs) # asserting correct fixes have been found self.assertEqual(pybug_fix_ids, self.basic_expected_fixes) self.assertEqual(pybug_fix_msgs, self.basic_expected_msgs) # find certain pybugs searching them by their fixing hashes pybug_first = provider.find_pygit_bugs( fixing_commit="ddf0ba95408dc5508504c84e6616c49128410389" ) pybug_first_intro_ids = set( intro_commit.hex for intro_commit in next(iter(pybug_first)).introducing_commits ) pybug_second = provider.find_pygit_bugs( fixing_commit="d846bdbe45e4d64a34115f5285079e1b5f84007f" ) pybug_second_intro_ids = set( intro_commit.hex for intro_commit in next(iter(pybug_second)).introducing_commits ) pybug_third = provider.find_pygit_bugs( fixing_commit="2da78b2820370f6759e9086fad74155d6655e93b" ) pybug_third_intro_ids = set( intro_commit.hex for intro_commit in next(iter(pybug_third)).introducing_commits ) pybug_fourth = provider.find_pygit_bugs( fixing_commit="3b76c8d295385358375fefdb0cf045d97ad2d193" ) pybug_fourth_intro_ids = set( intro_commit.hex for intro_commit in next(iter(pybug_fourth)).introducing_commits ) self.assertEqual( self.basic_expected_first_introduction, pybug_first_intro_ids ) self.assertEqual( self.basic_expected_second_introduction, pybug_second_intro_ids ) self.assertEqual( self.basic_expected_third_introduction, pybug_third_intro_ids ) self.assertEqual( self.basic_expected_fourth_introduction, pybug_fourth_intro_ids )
def tabulate(self, table_format: TableFormat, wrap_table: bool) -> str: project_name: str = self.table_kwargs['case_study'].project_name bug_provider = BugProvider.get_provider_for_project( get_project_cls_by_name(project_name) ) variables = [ "fixing hash", "fixing message", "fixing author", "issue_number" ] pybugs = bug_provider.find_pygit_bugs() data_rows = [[ pybug.fixing_commit.hex, pybug.fixing_commit.message, pybug.fixing_commit.author.name, pybug.issue_id ] for pybug in pybugs] bug_df = pd.DataFrame(columns=variables, data=np.array(data_rows)) kwargs: tp.Dict[str, tp.Any] = {"bold_rows": True} if table_format.is_latex(): kwargs["multicolumn_format"] = "c" kwargs["longtable"] = True return dataframe_to_table( bug_df, table_format, wrap_table, wrap_landscape=True, **kwargs )
def extend_with_bug_commits(case_study: CaseStudy, cmap: CommitMap, report_type: tp.Type['BaseReport'], merge_stage: int, ignore_blocked: bool) -> None: """ Extend a case study with revisions that either introduced or fixed a bug as determined by the given SZZ tool. Args: case_study: to extend cmap: commit map to map revisions to unique IDs ignore_blocked: ignore_blocked revisions merge_stage: stage the revisions will be added to report_type: report to use for bug detection """ project_cls: tp.Type[Project] = get_project_cls_by_name( case_study.project_name) def load_bugs_from_szz_report( load_fun: tp.Callable[[Path], SZZReport] ) -> tp.Optional[tp.FrozenSet[RawBug]]: reports = get_processed_revisions_files(case_study.project_name, report_type) if not reports: LOG.warning(f"I could not find any {report_type} reports. " "Falling back to bug provider.") return None report = load_fun(reports[0]) return report.get_all_raw_bugs() bugs: tp.Optional[tp.FrozenSet[RawBug]] = None if report_type == SZZUnleashedReport: bugs = load_bugs_from_szz_report(load_szzunleashed_report) elif report_type == PyDrillerSZZReport: bugs = load_bugs_from_szz_report(load_pydriller_szz_report) else: LOG.warning( f"Report type {report_type} is not supported by this extender " f"strategy. Falling back to bug provider.") if bugs is None: bug_provider = BugProvider.get_provider_for_project( get_project_cls_by_name(case_study.project_name)) bugs = bug_provider.find_raw_bugs() revisions: tp.Set[FullCommitHash] = set() for bug in bugs: revisions.add(bug.fixing_commit) revisions.update(bug.introducing_commits) rev_list = list(revisions) if ignore_blocked: rev_list = filter_blocked_revisions(rev_list, project_cls) case_study.include_revisions([(rev, cmap.time_id(rev)) for rev in rev_list], merge_stage)
def draw_bugs(axis: axes.Axes, project: tp.Type[Project], revisions: tp.List[FullCommitHash], bug_line_width: int, bug_color: str, label_size: int, vertical_alignment: str) -> None: """ Annotates bugs for a project in an existing plot. Args: axis: the axis to use for the plot project: the project to add bugs for revisions: a list of revisions included in the plot in the order they appear on the x-axis bug_line_width: the line width of bug annotations bug_color: the color of bug annotations label_size: the label size of bug annotations vertical_alignment: the vertical alignment of bug annotations """ cmap = create_lazy_commit_map_loader(project.NAME)() revision_time_ids = [cmap.time_id(rev) for rev in revisions] bug_provider = BugProvider.get_provider_for_project(project) for rawbug in bug_provider.find_raw_bugs(): bug_time_id = cmap.time_id(rawbug.fixing_commit) if bug_time_id in revision_time_ids: index = float(revisions.index(rawbug.fixing_commit)) else: # revision not in sample; draw line between closest samples index = len([x for x in revision_time_ids if x < bug_time_id]) - 0.5 label = " ".join([f"#{rawbug.issue_id}"]) transform = axis.get_xaxis_transform() axis.axvline(index, label=label, linewidth=bug_line_width, color=bug_color) axis.text(index + 0.1, 0, label, transform=transform, rotation=90, size=label_size, color=bug_color, va=vertical_alignment)
def create_report(self) -> actions.StepResult: """Create a report from SZZ data.""" project = self.obj bug_provider = BugProvider.get_provider_for_project(project) pygit_bugs = bug_provider.find_pygit_bugs() varats_result_folder = get_varats_result_folder(project) def commit_to_hash(commit: Commit) -> str: return str(commit.id) bugs: tp.Dict[str, tp.List[str]] = {} # entries are lists of the form [<fix>, <introducing>] for bug in pygit_bugs: bugs[commit_to_hash(bug.fixing_commit)] = sorted( [commit_to_hash(commit) for commit in bug.introducing_commits]) raw_szz_report = { "szz_tool": SZZTool.PYDRILLER_SZZ.tool_name, "bugs": bugs } result_file = PyDrillerSZZReport.get_file_name( "PyDrSZZ", project_name=str(project.name), binary_name="none", # we don't rely on binaries in this experiment project_revision=project.version_of_primary, project_uuid=str(project.run_uuid), extension_type=FSE.SUCCESS) with open(f"{varats_result_folder}/{result_file}", "w") as yaml_file: yaml_file.write( yaml.dump_all([ VersionHeader.from_version_number( "SZZReport", 1).get_dict(), raw_szz_report ], explicit_start=True, explicit_end=True)) return actions.StepResult.OK