def test_aig_file_vs_blame_degrees_table(self) -> None: """ Tests the latex booktabs format for the file vs. ci table. """ vara_cfg()["paper_config"][ "current_config"] = "test_diff_correlation_overview_table" initialize_projects() load_paper_config() # latex booktabs is default format table_str = AuthorBlameVsFileDegreesTable( TableConfig.from_kwargs(view=False), case_study=get_loaded_paper_config().get_case_studies( "xz")[0]).tabulate(TableFormat.LATEX_BOOKTABS, False) self.assertEqual( r"""\begin{tabular}{lrrrrr} \toprule {} & blame\_num\_commits & blame\_node\_degree & author\_diff & file\_num\_commits & file\_node\_degree \\ author & & & & & \\ \midrule Alexey Tourbin & NaN & NaN & NaN & 1 & 2 \\ Ben Boeckel & NaN & NaN & NaN & 1 & 2 \\ Jim Meyering & NaN & NaN & NaN & 1 & 2 \\ Lasse Collin & 124.0 & 0.0 & 0.0 & 479 & 6 \\ \bottomrule \end{tabular} """, table_str)
def value_to_string( value: tp.Union[CaseStudy, tp.List[CaseStudy]] ) -> tp.Union[str, tp.List[str]]: if isinstance(value, tp.List): pc = get_loaded_paper_config() if value == pc.get_all_case_studies(): return "all" return [f"{cs.project_name}_{cs.version}" for cs in value] return f"{value.project_name}_{value.version}"
def plot(self, view_mode: bool) -> None: case_studies = get_loaded_paper_config().get_all_case_studies() nodes: tp.List[tp.Dict[str, tp.Any]] = [] project_names: tp.List[str] = [] for case_study in case_studies: project_name = case_study.project_name added_project_name = False revision = newest_processed_revision_for_case_study( case_study, BlameReport) if not revision: continue caig = create_blame_interaction_graph( project_name, revision).commit_author_interaction_graph( outgoing_interactions=True, incoming_interactions=True) authors = len( [1 for node in caig.nodes if caig.nodes[node]["author"]]) for node in caig.nodes: node_attrs = tp.cast(CAIGNodeAttrs, caig.nodes[node]) commit = node_attrs["commit"] if commit: if not added_project_name: project_names.append(project_name) added_project_name = True nodes.append(({ "Project": project_name, "commit": commit.commit_hash, "# Interacting Authors": caig.degree(node) / authors })) data = pd.DataFrame(nodes) ax = sns.violinplot(x="Project", y="# Interacting Authors", data=data, order=sorted(project_names), inner=None, linewidth=1, color=".95") sns.stripplot(x="Project", y="# Interacting Authors", data=data, order=sorted(project_names), alpha=.25, size=3) ax.set_ylim(-0.1, 1.1) ax.set_aspect(0.3 / ax.get_data_ratio()) ax.tick_params(axis='x', labelrotation=45) ax.set_xlabel(None)
def string_to_value( str_value: tp.Union[str, tp.List[str]] ) -> tp.Union[CaseStudy, tp.List[CaseStudy]]: pc = get_loaded_paper_config() if isinstance(str_value, tp.List): return [ cs for cs_name in str_value for cs in pc.get_case_studies(cs_name) ] if str_value == "all": return pc.get_all_case_studies() return pc.get_case_studies(str_value)[0]
def plot_file_name(self, filetype: str) -> str: """ Get the file name this plot will be stored to when calling save. Args: filetype: the file type for the plot Returns: the file name the plot will be stored to """ pc_name = get_loaded_paper_config().path.name var_x = self.plot_kwargs['var_x'].value var_y = self.plot_kwargs['var_y'].value return f"{pc_name}_{self.name}_{var_x}_vs_{var_y}.{filetype}"
def tabulate(self, table_format: TableFormat, wrap_table: bool) -> str: case_studies = get_loaded_paper_config().get_all_case_studies() df = pd.DataFrame() for case_study in case_studies: project_name = case_study.project_name report_files = get_processed_revisions_files( project_name, TimeReportAggregate, get_case_study_file_name_filter(case_study), False) for report_file in report_files: time_aggregated = TimeReportAggregate(report_file) report_name = time_aggregated.filename mean_runtime = np.mean( time_aggregated.measurements_wall_clock_time) std_runtime = np.std( time_aggregated.measurements_wall_clock_time) mean_ctx = np.mean(time_aggregated.measurements_ctx_switches) std_ctx = np.std(time_aggregated.measurements_ctx_switches) new_row = { "Binary": report_name.binary_name, "Experiment": report_name.experiment_shorthand, "Runtime Mean (Std)": f"{mean_runtime:.2f} ({std_runtime:.2f})", "Ctx-Switches Mean (Std)": f"{mean_ctx:.2f} ({std_ctx:.2f})" } df = df.append(new_row, ignore_index=True) df.sort_values(["Binary", "Experiment"], inplace=True) df.set_index( ["Binary", "Experiment"], inplace=True, ) kwargs: tp.Dict[str, tp.Any] = {} if table_format.is_latex(): kwargs["column_format"] = "llrr" return dataframe_to_table(df, table_format, wrap_table, wrap_landscape=True, **kwargs)
def tabulate(self, table_format: TableFormat, wrap_table: bool) -> str: case_studies = get_loaded_paper_config().get_all_case_studies() cs_data: tp.List[pd.DataFrame] = [] for case_study in case_studies: project_name = case_study.project_name commit_map = get_commit_map(project_name) project_cls = get_project_cls_by_name(project_name) project_repo = get_local_project_git(project_name) project_path = project_repo.path[:-5] project_git = git["-C", project_path] revisions = sorted( case_study.revisions, key=commit_map.time_id, reverse=True ) revision = revisions[0] rev_range = revision.hash if revision else "HEAD" cs_dict = { project_name: { "Domain": str(project_cls.DOMAIN)[0].upper() + str(project_cls.DOMAIN)[1:], "LOC": calc_repo_loc(project_repo, rev_range), "Commits": int(project_git("rev-list", "--count", rev_range)), "Authors": len( project_git("shortlog", "-s", rev_range).splitlines() ) } } if revision: cs_dict[project_name]["Revision"] = revision.short_hash cs_data.append(pd.DataFrame.from_dict(cs_dict, orient="index")) df = pd.concat(cs_data).sort_index() kwargs: tp.Dict[str, tp.Any] = {"bold_rows": True} if table_format.is_latex(): kwargs["multicolumn_format"] = "c" kwargs["multirow"] = True return dataframe_to_table( df, table_format, wrap_table, wrap_landscape=True, **kwargs )
def test_cli_option_converter(self): """Test whether CLI option conversion works correctly.""" # setup config vara_cfg()['paper_config']['current_config'] = "test_artefacts_driver" load_paper_config() save_config() plot_generator = CaseStudyOverviewGenerator( PlotConfig.from_kwargs(view=False), report_type=EmptyReport, case_study=get_loaded_paper_config().get_case_studies("xz")[0]) artefact = PlotArtefact.from_generator("CS Overview", plot_generator, CommonPlotOptions.from_kwargs()) artefact_dict = artefact.get_dict() self.assertEqual("xz_0", artefact_dict["case_study"]) self.assertEqual("EmptyReport", artefact_dict["report_type"])
def test_caig_metrics_table(self) -> None: """Tests the latex booktabs format for the caig metrics table.""" vara_cfg()["paper_config"][ "current_config"] = "test_diff_correlation_overview_table" initialize_projects() load_paper_config() # latex booktabs is default format table_str = CommitAuthorInteractionGraphMetricsTable( TableConfig.from_kwargs(view=False), case_study=get_loaded_paper_config().get_all_case_studies( )).tabulate(TableFormat.LATEX_BOOKTABS, False) self.assertEqual( r"""\begin{tabular}{lrrrrrrrrrrrrrr} \toprule {} & commits & authors & nodes & edges & \multicolumn{4}{c}{node degree} & \multicolumn{3}{c}{node out degree} & \multicolumn{3}{c}{node in degree} \\ {} & mean & median & min & max & median & min & max & median & min & max \\ \midrule \textbf{xz} & 1143 & 28 & 125 & 92 & 1.47 & 1.0 & 0 & 92 & 1.0 & 0 & 1 & 0.0 & 0 & 92 \\ \bottomrule \end{tabular} """, table_str)
def test_one_case_study_latex_booktabs(self) -> None: """Tests the latex booktabs format for the code centrality metrics table.""" vara_cfg()["paper_config"][ "current_config"] = "test_diff_correlation_overview_table" initialize_projects() load_paper_config() # latex booktabs is default format table_str = TopCentralCodeCommitsTable( TableConfig.from_kwargs(view=False), case_study=get_loaded_paper_config().get_case_studies("xz")[0], num_commits=10).tabulate(TableFormat.LATEX_BOOKTABS, False) self.assertEqual( r"""\begin{table} \centering \caption{Top 10 Central Code Commits} \begin{tabular}{lr} \toprule commit & centrality \\ \midrule ef68dd4a92976276304de2aedfbe34ae91a86abb & 28 \\ 57597d42ca1740ad506437be168d800a50f1a0ad & 16 \\ ea00545beace5b950f709ec21e46878e0f448678 & 16 \\ 7f0a4c50f4a374c40acf4b86848f301ad1e82d34 & 15 \\ c15c42abb3c8c6e77c778ef06c97a4a10b8b5d00 & 15 \\ fa3ab0df8ae7a8a1ad55b52266dc0fd387458671 & 10 \\ 1d924e584b146136989f48c13fff2632896efb3d & 9 \\ d8b41eedce486d400f701b757b7b5e4e32276618 & 8 \\ 1b0ac0c53c761263e91e34195cb21dfdcfeac0bd & 6 \\ e0ea6737b03e83ccaff4514d00e31bb926f8f0f3 & 6 \\ \bottomrule \end{tabular} \end{table} """, table_str)
def plot(self, view_mode: bool) -> None: case_studies = get_loaded_paper_config().get_all_case_studies() diff_data: tp.List[pd.DataFrame] = [] project_names: tp.List[str] = [] for case_study in case_studies: project_name = case_study.project_name revision = newest_processed_revision_for_case_study( case_study, BlameReport) if not revision: continue project_names.append(project_name) blame_aig = create_blame_interaction_graph( project_name, revision).author_interaction_graph() file_aig = create_file_based_interaction_graph( project_name, revision).author_interaction_graph() file_nodes: tp.List[tp.Dict[str, tp.Any]] = [] for node in file_aig.nodes: node_attrs = tp.cast(AIGNodeAttrs, file_aig.nodes[node]) if blame_aig.has_node(node): blame_neighbors = set(blame_aig.successors(node)).union( blame_aig.predecessors(node)) else: blame_neighbors = set() file_neighbors = set(file_aig.successors(node)).union( file_aig.predecessors(node)) file_nodes.append(({ "Project": project_name, "author": f"{node_attrs['author']}", "# Additional Authors": len(blame_neighbors.difference(file_neighbors)) })) file_data = pd.DataFrame(file_nodes) file_data.set_index("author", inplace=True) diff_data.append(file_data) data = pd.concat(diff_data) ax = sns.violinplot(x="Project", y="# Additional Authors", data=data, order=sorted(project_names), inner=None, linewidth=1, color=".95") sns.stripplot(x="Project", y="# Additional Authors", data=data, order=sorted(project_names), alpha=.25, size=3) ax.set_ylim(bottom=0, top=1.1 * data["# Additional Authors"].max()) ax.set_aspect(0.3 / ax.get_data_ratio()) ax.tick_params(axis='x', labelrotation=45) ax.set_xlabel(None)