def revisions_of_project(self) -> None: """Generate the Revision list for the selected project if select specific is enabled.""" self.strategie_forms.setCurrentIndex( GenerationStrategie.SELECT_REVISION.value) if self.selected_project != self.revision_list_project: self.revision_details.setText("Loading Revisions") self.revision_details.repaint() get_local_project_git(self.selected_project).remotes[0].fetch() git_path = get_local_project_git_path(self.selected_project) initial_commit = get_initial_commit(git_path).hash commits = get_all_revisions_between(initial_commit, 'HEAD', FullCommitHash, git_path) commit_lookup_helper = create_commit_lookup_helper( self.selected_project) project = get_project_cls_by_name(self.selected_project) repo_name = get_primary_project_source(self.selected_project).local commits = map(lambda commit: CommitRepoPair(commit, repo_name), commits) cmap = get_commit_map(self.selected_project) commit_model = CommitTableModel( list(map(commit_lookup_helper, commits)), cmap, project) self.proxy_model.setSourceModel(commit_model) self.revision_list_project = self.selected_project self.revision_details.clear() self.revision_details.update()
def plot(self, view_mode: bool) -> None: """Plots bug plot for the whole project.""" project_name = self.plot_kwargs['project'] project_repo = get_local_project_git(project_name) bug_provider = BugProvider.get_provider_for_project( get_project_cls_by_name(project_name)) pydriller_bugs = bug_provider.find_pygit_bugs() reports = get_processed_revisions_files(project_name, SZZUnleashedReport) szzunleashed_bugs = frozenset([ as_pygit_bug(raw_bug, project_repo) for raw_bug in SZZUnleashedReport(reports[0]).get_all_raw_bugs() ]) if self.__szz_tool == 'pydriller': self.__figure = _plot_chord_diagram_for_raw_bugs( project_name, project_repo, pydriller_bugs, self.__szz_tool) elif self.__szz_tool == 'szz_unleashed': self.__figure = _plot_chord_diagram_for_raw_bugs( project_name, project_repo, szzunleashed_bugs, self.__szz_tool) elif self.__szz_tool == 'szz_diff': self.__figure = _bug_data_diff_plot(project_name, project_repo, pydriller_bugs, szzunleashed_bugs) else: raise PlotDataEmpty
def test_get_current_branch(self): """Check if we can correctly retrieve the current branch of a repo.""" repo = get_local_project_git("brotli") repo.checkout(repo.lookup_branch('master')) self.assertEqual(get_current_branch(repo.workdir), 'master')
def _load_projects_ordered_by_year( current_config: PC.PaperConfig, result_file_type: tp.Type[BaseReport] ) -> tp.Dict[str, tp.Dict[int, tp.List[tp.Tuple[ShortCommitHash, FileStatusExtension]]]]: projects: tp.Dict[str, tp.Dict[int, tp.List[tp.Tuple[ ShortCommitHash, FileStatusExtension]]]] = OrderedDict() for case_study in sorted(current_config.get_all_case_studies(), key=lambda cs: (cs.project_name, cs.version)): processed_revisions = get_revisions_status_for_case_study( case_study, result_file_type) repo = get_local_project_git(case_study.project_name) revisions: tp.Dict[int, tp.List[tp.Tuple[ ShortCommitHash, FileStatusExtension]]] = defaultdict(list) # dict: year -> [ (revision: str, status: FileStatusExtension) ] for rev, status in processed_revisions: commit = repo.get(rev.hash) commit_date = datetime.utcfromtimestamp(commit.commit_time) revisions[commit_date.year].append((rev, status)) projects[case_study.project_name] = revisions return projects
def _filter_commit_message_bugs( project_name: str, commit_filter_function: tp.Callable[[pygit2.Repository, pygit2.Commit], tp.Optional[PygitBug]] ) -> tp.FrozenSet[PygitBug]: """ Find bugs based on commit messages using the given filter function. Args: project_name: name of the project to draw the commit history from commit_filter_function: function that creates and filters bugs Returns: the set of bugs created by the given filter """ filtered_bugs = set() project_repo = get_local_project_git(project_name) for commit in project_repo.walk(project_repo.head.target, pygit2.GIT_SORT_TIME): pybug = commit_filter_function(project_repo, commit) if pybug: filtered_bugs.add(pybug) return frozenset(filtered_bugs)
def tabulate(self, table_format: TableFormat, wrap_table: bool) -> str: case_studies = get_loaded_paper_config().get_all_case_studies() cs_data: tp.List[pd.DataFrame] = [] for case_study in case_studies: project_name = case_study.project_name commit_map = get_commit_map(project_name) project_cls = get_project_cls_by_name(project_name) project_repo = get_local_project_git(project_name) project_path = project_repo.path[:-5] project_git = git["-C", project_path] revisions = sorted( case_study.revisions, key=commit_map.time_id, reverse=True ) revision = revisions[0] rev_range = revision.hash if revision else "HEAD" cs_dict = { project_name: { "Domain": str(project_cls.DOMAIN)[0].upper() + str(project_cls.DOMAIN)[1:], "LOC": calc_repo_loc(project_repo, rev_range), "Commits": int(project_git("rev-list", "--count", rev_range)), "Authors": len( project_git("shortlog", "-s", rev_range).splitlines() ) } } if revision: cs_dict[project_name]["Revision"] = revision.short_hash cs_data.append(pd.DataFrame.from_dict(cs_dict, orient="index")) df = pd.concat(cs_data).sort_index() kwargs: tp.Dict[str, tp.Any] = {"bold_rows": True} if table_format.is_latex(): kwargs["multicolumn_format"] = "c" kwargs["multirow"] = True return dataframe_to_table( df, table_format, wrap_table, wrap_landscape=True, **kwargs )
def _find_corresponding_pygit_suspect_tuple( project_name: str, issue_event: IssueEvent) -> tp.Optional[PygitSuspectTuple]: """ Creates a suspect tuple given an issue event. Partitions the commits found via git blame on the fixing commit into suspects (commits after bug report) and non-suspects (commits before bug report). Args: project_name: Name of the project to draw the fixing and introducing commits from. issue_event: The IssueEvent potentially associated with a bug. Returns: A PygitSuspectTuple if the issue event represents the closing of a bug, None otherwise """ pygit_repo: pygit2.Repository = get_local_project_git(project_name) pydrill_repo = pydriller.Git(pygit_repo.path) if _has_closed_a_bug(issue_event) and issue_event.commit_id: issue_date = issue_event.issue.created_at fixing_commit = pygit_repo.get(issue_event.commit_id) pydrill_fixing_commit = pydrill_repo.get_commit(issue_event.commit_id) blame_dict = pydrill_repo.get_commits_last_modified_lines( pydrill_fixing_commit) non_suspect_commits = set() suspect_commits = set() for introducing_set in blame_dict.values(): for introducing_id in introducing_set: issue_date = issue_event.issue.created_at.astimezone( timezone.utc) introduction_date = pydrill_repo.get_commit( introducing_id).committer_date.astimezone(timezone.utc) if introduction_date > issue_date: # commit is a suspect suspect_commits.add(pygit_repo.get(introducing_id)) else: non_suspect_commits.add(pygit_repo.get(introducing_id)) return PygitSuspectTuple(fixing_commit, non_suspect_commits, suspect_commits, issue_event.issue.number, issue_event.issue.created_at, pydrill_fixing_commit.committer_date) return None
def filter_non_code_changes(blame_data: pd.DataFrame, project_name: str) -> pd.DataFrame: """ Filter all revision from data frame that are not code change related. Args: blame_data: data to filter project_name: name of the project Returns: filtered data frame without rows related to non code changes """ repo = get_local_project_git(project_name) code_related_changes = [ x.hash for x in calc_repo_code_churn( repo, ChurnConfig.create_c_style_languages_config()) ] return blame_data[blame_data.apply( lambda x: x['revision'] in code_related_changes, axis=1)]
def _generate_graph_table(case_studies: tp.List[CaseStudy], graph_generator: tp.Callable[[str, FullCommitHash], nx.DiGraph], table_format: TableFormat, wrap_table: bool) -> str: degree_data: tp.List[pd.DataFrame] = [] for case_study in case_studies: project_name = case_study.project_name project_git = git["-C", get_local_project_git(project_name).path] revision = newest_processed_revision_for_case_study( case_study, BlameReport) if not revision: continue graph = graph_generator(project_name, revision) nodes: tp.List[tp.Dict[str, tp.Any]] = [] for node in graph.nodes: nodes.append(({ "node_degree": graph.degree(node), "node_out_degree": graph.out_degree(node), "node_in_degree": graph.in_degree(node), })) data = pd.DataFrame(nodes) degree_data.append( pd.DataFrame.from_dict( { project_name: { ("commits", ""): int(project_git("rev-list", "--count", revision.hash)), ("authors", ""): len( project_git("shortlog", "-s", "--all").splitlines()), ("nodes", ""): len(graph.nodes), ("edges", ""): len(graph.edges), ("node degree", "mean"): data["node_degree"].mean(), ("node degree", "median"): data["node_degree"].median(), ("node degree", "min"): data["node_degree"].min(), ("node degree", "max"): data["node_degree"].max(), ("node out degree", "median"): data["node_out_degree"].median(), ("node out degree", "min"): data["node_out_degree"].min(), ("node out degree", "max"): data["node_out_degree"].max(), ("node in degree", "median"): data["node_in_degree"].median(), ("node in degree", "min"): data["node_in_degree"].min(), ("node in degree", "max"): data["node_in_degree"].max(), } }, orient="index")) df = pd.concat(degree_data).round(2) kwargs: tp.Dict[str, tp.Any] = {"bold_rows": True} if table_format.is_latex(): kwargs["multicolumn_format"] = "c" kwargs["multirow"] = True return dataframe_to_table(df, table_format, wrap_table, wrap_landscape=True, **kwargs)
def _load_dataframe(cls, project_name: str, commit_map: CommitMap, case_study: tp.Optional[CaseStudy], **kwargs: tp.Any) -> pd.DataFrame: repo = get_local_project_git(project_name) commit_lookup = create_commit_lookup_helper(project_name) def create_dataframe_layout() -> pd.DataFrame: df_layout = pd.DataFrame(columns=cls.COLUMNS) df_layout = df_layout.astype(cls.COLUMN_TYPES) return df_layout def create_data_frame_for_report( report_paths: tp.Tuple[Path, Path] ) -> tp.Tuple[pd.DataFrame, str, str]: # Look-up commit and infos about the HEAD commit of the report head_report = load_blame_report(report_paths[0]) pred_report = load_blame_report(report_paths[1]) commit = repo.get(head_report.head_commit.hash) commit_date = datetime.utcfromtimestamp(commit.commit_time) pred_commit = repo.get(pred_report.head_commit.hash) diff_between_head_pred = BlameReportDiff(head_report, pred_report) # Calculate the total churn between pred and base commit code_churn = calc_code_churn( Path(repo.path), FullCommitHash.from_pygit_commit(pred_commit), FullCommitHash.from_pygit_commit(commit), ChurnConfig.create_c_style_languages_config()) total_churn = code_churn[1] + code_churn[2] def weighted_avg(tuples: tp.List[tp.Tuple[int, int]]) -> float: total_sum = 0 degree_sum = 0 for degree, amount in tuples: degree_sum += degree total_sum += (degree * amount) return total_sum / max(1, degree_sum) def combine_max(tuples: tp.List[tp.Tuple[int, int]]) -> float: if tuples: return max([x for x, y in tuples]) return 0 return (pd.DataFrame( { 'revision': head_report.head_commit.hash, 'time_id': commit_map.short_time_id(head_report.head_commit), 'churn': total_churn, 'num_interactions': count_interactions(diff_between_head_pred), 'num_interacting_commits': count_interacting_commits(diff_between_head_pred), 'num_interacting_authors': count_interacting_authors(diff_between_head_pred, commit_lookup), "ci_degree_mean": weighted_avg( generate_degree_tuples(diff_between_head_pred)), "author_mean": weighted_avg( generate_author_degree_tuples(diff_between_head_pred, commit_lookup)), "avg_time_mean": weighted_avg( generate_avg_time_distribution_tuples( diff_between_head_pred, commit_lookup, 1)), "ci_degree_max": combine_max( generate_degree_tuples(diff_between_head_pred)), "author_max": combine_max( generate_author_degree_tuples(diff_between_head_pred, commit_lookup)), "avg_time_max": combine_max( generate_max_time_distribution_tuples( diff_between_head_pred, commit_lookup, 1)), 'year': commit_date.year, }, index=[0]), id_from_paths(report_paths), timestamp_from_paths(report_paths)) report_pairs, failed_report_pairs = build_report_pairs_tuple( project_name, commit_map, case_study) # cls.CACHE_ID is set by superclass # pylint: disable=E1101 data_frame = build_cached_report_table( cls.CACHE_ID, project_name, report_pairs, failed_report_pairs, create_dataframe_layout, create_data_frame_for_report, id_from_paths, timestamp_from_paths, compare_timestamps) return data_frame