def test_store_artefact(self): """Test whether `vara-plot` can store artefacts.""" # setup config vara_cfg()['paper_config']['current_config'] = "test_artefacts_driver" load_paper_config() save_config() artefacts_file_path = get_paper_config().path / "artefacts.yaml" artefacts_file_path.unlink() # vara-plot runner = CliRunner() result = runner.invoke(driver_plot.main, [ "--save-artefact=PC Overview", "--plot-dir=foo", "pc-overview-plot", "--report-type=EmptyReport" ]) self.assertEqual(0, result.exit_code, result.exception) self.assertTrue(artefacts_file_path.exists()) # load new artefact file load_paper_config() artefacts = list(get_paper_config().artefacts) self.assertEqual(1, len(artefacts)) artefact = artefacts[0] self.assertIsInstance(artefact, PlotArtefact) self.assertEqual("PC Overview", artefact.name)
def generate(only: tp.Optional[str]) -> None: """ Generate artefacts. By default, all artefacts are generated. Args: only: generate only this artefact """ if not Artefact.base_output_dir().exists(): Artefact.base_output_dir().mkdir(parents=True) artefacts: tp.Iterable[Artefact] if only: artefacts = [ art for art in get_paper_config().get_all_artefacts() if art.name in only ] else: artefacts = get_paper_config().get_all_artefacts() for artefact in artefacts: LOG.info(f"Generating artefact {artefact.name} in location " f"{artefact.output_dir}") artefact.generate_artefact() # generate index.html _generate_index_html(artefacts, Artefact.base_output_dir() / "index.html") # generate plot_matrix.html plot_artefacts = list(_filter_plot_artefacts(artefacts)) _generate_html_plot_matrix(plot_artefacts, Artefact.base_output_dir() / "plot_matrix.html")
def test_get_newest_result_files_for_case_study_fail(self) -> None: """Check that when we have two files, the newes one get's selected.""" vara_cfg()['paper_config']['current_config'] = "test_revision_lookup" load_paper_config() bad_file = ReportFilename( 'CRE-CR-brotli-brotli-21ac39f7c8_' '34d4d1b5-7212-4244-9adc-b19bff599cf1_success.yaml') now = datetime.now().timestamp() file_path = Path(str( vara_cfg()['result_dir'])) / 'brotli' / bad_file.filename os.utime(file_path, (now, now)) newest_res_files = MCS.get_newest_result_files_for_case_study( get_paper_config().get_case_studies('brotli')[0], Path(vara_cfg()['result_dir'].value), CR) # remove unnecessary files filtered_newest_res_files = list( filter( lambda res_file: res_file.commit_hash == bad_file.commit_hash, map(lambda res_file: ReportFilename(res_file), newest_res_files))) self.assertFalse(filtered_newest_res_files[0].uuid.endswith('42'))
def test_file_based_interaction_graph(self) -> None: """Test whether file-based interaction graphs are created correctly.""" vara_cfg()['paper_config']['current_config'] = "test_casestudy_status" load_paper_config() revision = newest_processed_revision_for_case_study( get_paper_config().get_case_studies("xz")[0], BlameReport ) assert revision blame_interaction_graph = create_file_based_interaction_graph( "xz", revision ) self.assertEqual(blame_interaction_graph.project_name, "xz") cig = blame_interaction_graph.commit_interaction_graph() self.assertEqual(482, len(cig.nodes)) self.assertEqual(16518, len(cig.edges)) aig = blame_interaction_graph.author_interaction_graph() self.assertEqual(4, len(aig.nodes)) self.assertEqual(6, len(aig.edges)) caig = blame_interaction_graph.commit_author_interaction_graph() self.assertEqual(486, len(caig.nodes)) self.assertEqual(509, len(caig.edges))
def command_template(context: click.Context, **kwargs: tp.Any) -> None: # extract common arguments and table config from context common_options: CommonTableOptions = context.obj["common_options"] table_config: TableConfig = context.obj["table_config"] artefact_name: str = context.obj["save_artefact"] try: generator_instance = generator_cls(table_config, **kwargs) if artefact_name: paper_config = get_paper_config() if paper_config.artefacts.get_artefact(artefact_name): LOG.info( f"Updating existing artefact '{artefact_name}'.") else: LOG.info(f"Creating new artefact '{artefact_name}'.") artefact = TableArtefact.from_generator( artefact_name, generator_instance, common_options) paper_config.add_artefact(artefact) paper_config.store_artefacts() else: generator_instance(common_options) except TableGeneratorFailed as ex: print( f"Failed to create table generator {generator_cls.NAME}: " f"{ex.message}")
def tabulate(self, table_format: TableFormat, wrap_table: bool) -> str: case_studies = get_paper_config().get_all_case_studies() variables = [ "churn", "num_interactions", "num_interacting_commits", "num_interacting_authors" ] cs_data = [ BlameDiffMetricsDatabase.get_data_for_project( case_study.project_name, ["revision", *variables], get_commit_map(case_study.project_name), case_study) for case_study in case_studies ] for data in cs_data: data.set_index('revision', inplace=True) data.drop(data[data['churn'] == 0].index, inplace=True) correlations = [ data[variables].corr(method="pearson") for data in cs_data ] df = pd.concat(correlations, axis=1, keys=get_unique_cs_name(case_studies)) kwargs: tp.Dict[str, tp.Any] = {"bold_rows": True} if table_format.is_latex(): kwargs["multicolumn_format"] = "c" return dataframe_to_table(df, table_format, wrap_table, wrap_landscape=False, **kwargs)
def _gen_overview_plot(**kwargs: tp.Any) -> tp.Dict[str, tp.Any]: """Generate the data for the PaperConfigOverviewPlot.""" current_config = PC.get_paper_config() if 'report_type' in kwargs: result_file_type: tp.Type[BaseReport] = kwargs['report_type'] else: result_file_type = EmptyReport projects = _load_projects_ordered_by_year(current_config, result_file_type) min_years = [] max_years = [] for _, revisions in projects.items(): years = revisions.keys() min_years.append(min(years)) max_years.append(max(years)) year_range = list(range(min(min_years), max(max_years) + 1)) project_names = list(projects.keys()) result: tp.Dict[str, tp.Any] = {} result['year_range'] = year_range result['project_names'] = project_names result['revs_successful'] = [] result['revs_blocked'] = [] result['revs_total'] = [] for _, revisions in projects.items(): revs_successful_per_year = [] revs_blocked_per_year = [] revs_total_per_year = [] for year in year_range: revs_in_year = revisions[year] if not revs_in_year: num_revs = np.nan num_successful_revs = np.nan num_blocked_revs = np.nan else: num_revs = len(revs_in_year) num_successful_revs = len([ rev for (rev, status) in revs_in_year if status == FileStatusExtension.SUCCESS ]) num_blocked_revs = len([ rev for (rev, status) in revs_in_year if status == FileStatusExtension.BLOCKED ]) revs_successful_per_year.append(num_successful_revs) revs_blocked_per_year.append(num_blocked_revs) revs_total_per_year.append(num_revs) result['revs_successful'].append(revs_successful_per_year) result['revs_blocked'].append(revs_blocked_per_year) result['revs_total'].append(revs_total_per_year) return result
def _pc_set(paper_config: tp.Optional[Path]) -> None: if not paper_config: pc_folder_path = Path( get_value_or_default(vara_cfg()["paper_config"], "folder", str(get_varats_base_folder()))) if not (pc_folder_path.exists() and pc_folder_path.is_dir()): LOG.error(f"Paper config folder not set: {pc_folder_path} " "(Path does not exist or is no directory).") return paper_configs = _get_paper_configs(pc_folder_path) if not paper_configs: LOG.error(f"Could not find paper configs in: {pc_folder_path}") return raw_pc_path = None def set_pc_path(choice: str) -> None: nonlocal raw_pc_path raw_pc_path = choice current_config = None try: current_config = get_paper_config().path.name except ConfigurationLookupError: # No paper config specified in the varats config file pass try: cli_list_choice( "Choose a number to select a paper config", paper_configs, lambda x: f"{x} *" if current_config and x == current_config else x, set_pc_path) except EOFError: return if raw_pc_path is None: raise AssertionError("Choice should always return a value") paper_config = Path(raw_pc_path) paper_config = Path(paper_config) if not paper_config.is_absolute(): paper_config = Path( vara_cfg()["paper_config"]["folder"].value) / paper_config if not (paper_config.exists() and paper_config.is_dir()): LOG.error(f"Not a paper config: {paper_config} " "(Path does not exist or is no directory).") return folder = paper_config.parent current_config = paper_config.name LOG.info( f"Current paper config is now {current_config} at location {folder}.") vara_cfg()["paper_config"]["folder"] = str(folder) vara_cfg()["paper_config"]["current_config"] = str(current_config) save_config()
def test_newest_processed_revision_no_results(self) -> None: """Check None is returned when no results are available.""" vara_cfg()['paper_config']['current_config'] = "test_revision_lookup" load_paper_config() newest_processed = MCS.newest_processed_revision_for_case_study( get_paper_config().get_case_studies('brotli')[0], CR) self.assertIsNone(newest_processed)
def test_get_revision_not_in_case_study(self) -> None: """Check if we correctly handle the lookup of a revision that is not in the case study.""" vara_cfg()['paper_config']['current_config'] = "test_revision_lookup" load_paper_config() self.assertRaises(ValueError, MCS.get_revision_status_for_case_study, get_paper_config().get_case_studies('brotli')[0], ShortCommitHash('0000000000'), CR)
def __init_commit_hash(report_type: tp.Type[BaseReport], project: str, commit_hash: ShortCommitHash) -> ShortCommitHash: if not commit_hash: # Ask the user to provide a commit hash print("No commit hash was provided.") paper_config = get_paper_config() available_commit_hashes = [] # Compute available commit hashes for case_study in paper_config.get_case_studies(project): available_commit_hashes.extend( get_revisions_status_for_case_study(case_study, report_type, tag_blocked=False)) max_num_hashes = 20 if len(available_commit_hashes) > max_num_hashes: print("Found to many commit hashes, truncating selection...") # Create call backs for cli choice def set_commit_hash( choice_pair: tp.Tuple[ShortCommitHash, FileStatusExtension]) -> None: nonlocal commit_hash commit_hash = choice_pair[0] statuses = FileStatusExtension.get_physical_file_statuses().union( FileStatusExtension.get_virtual_file_statuses()) longest_file_status_extension = max( [len(status.name) for status in statuses]) def result_file_to_list_entry( commit_status_pair: tp.Tuple[ShortCommitHash, FileStatusExtension] ) -> str: status = commit_status_pair[1].get_colored_status().rjust( longest_file_status_extension + commit_status_pair[1].num_color_characters(), " ") return f"[{status}] {commit_status_pair[0]}" # Ask user which commit we should use try: cli_list_choice( "Please select a hash:", available_commit_hashes[:max_num_hashes], result_file_to_list_entry, set_commit_hash, start_label=1, default=1, ) except EOFError as exc: raise LookupError from exc if not commit_hash: print("Could not find processed commit hash.") raise LookupError return commit_hash return commit_hash
def test_get_revisions_status_for_case_study_to_high_stage(self) -> None: """Check if we correctly handle look ups where the stage selected is larger than the biggest one in the case study.""" vara_cfg()['paper_config']['current_config'] = "test_revision_lookup" load_paper_config() self.assertListEqual( MCS.get_revisions_status_for_case_study( get_paper_config().get_case_studies('brotli')[0], CR, 9001), [])
def test_get_revisions_in_case_study(self) -> None: """Check if we correctly handle the lookup of a revision that is in a case study.""" vara_cfg()['paper_config']['current_config'] = "test_revision_lookup" load_paper_config() self.assertEqual( MCS.get_revision_status_for_case_study( get_paper_config().get_case_studies('brotli')[0], ShortCommitHash('21ac39f7c8'), CR), FileStatusExtension.SUCCESS)
def test_get_newest_result_files_for_case_study_with_empty_res_dir( self) -> None: """Check that we correctly handle the edge case where no result dir exists.""" vara_cfg()['paper_config']['current_config'] = "test_revision_lookup" load_paper_config() self.assertListEqual( MCS.get_newest_result_files_for_case_study( get_paper_config().get_case_studies('brotli')[0], Path(vara_cfg()['result_dir'].value), CR), [])
def test_newest_processed_revision(self) -> None: """Check whether the newest processed revision is correctly identified.""" vara_cfg()['paper_config']['current_config'] = "test_revision_lookup" load_paper_config() newest_processed = MCS.newest_processed_revision_for_case_study( get_paper_config().get_case_studies('brotli')[0], CR) self.assertEqual( FullCommitHash('21ac39f7c8ca61c855be0bc38900abe7b5a0f67f'), newest_processed)
def test_get_processed_revisions(self) -> None: """Check if we can correctly find all processed revisions of a case study.""" vara_cfg()['paper_config']['current_config'] = "test_revision_lookup" load_paper_config() process_revs = MCS.processed_revisions_for_case_study( get_paper_config().get_case_studies('brotli')[0], CR) self.assertEqual(len(process_revs), 1) self.assertTrue( FullCommitHash('21ac39f7c8ca61c855be0bc38900abe7b5a0f67f') in process_revs)
def test_get_failed_revisions(self) -> None: """Check if we can correctly find all failed revisions of a case study.""" vara_cfg()['paper_config']['current_config'] = "test_revision_lookup" load_paper_config() failed_revs = MCS.failed_revisions_for_case_study( get_paper_config().get_case_studies('brotli')[0], CR) self.assertEqual(len(failed_revs), 1) self.assertTrue( FullCommitHash('aaa4424d9bdeb10f8af5cb4599a0fc2bbaac5553') in failed_revs)
def create_single_case_study_choice() -> TypedChoice['CaseStudy']: """Create a choice parameter type that allows selecting exactly one case study from the current paper config.""" try: paper_config = get_paper_config() except ConfigurationLookupError: empty_cs_dict: tp.Dict[str, 'CaseStudy'] = {} return TypedChoice(empty_cs_dict) value_dict = { f"{cs.project_name}_{cs.version}": cs for cs in paper_config.get_all_case_studies() } return TypedChoice(value_dict)
def show_status_of_case_studies(experiment_type: tp.Type[VersionExperiment], filter_regex: str, short_status: bool, sort: bool, print_rev_list: bool, sep_stages: bool, print_legend: bool) -> None: """ Prints the status of all matching case studies to the console. Args: experiment_type: experiment type whose files will be considered filter_regex: applied to a ``name_version`` string for filtering the amount of case studies to be shown short_status: print only a short version of the status information sort: sort the output order of the case studies print_rev_list: print a list of revisions for every case study sep_stages: print each stage separeted print_legend: print a legend for the different types """ current_config = PC.get_paper_config() longest_cs_name = 0 output_case_studies = [] for case_study in sorted(current_config.get_all_case_studies(), key=lambda cs: (cs.project_name, cs.version)): match = re.match(filter_regex, f"{case_study.project_name}_{case_study.version}") if match is not None: output_case_studies.append(case_study) longest_cs_name = max( longest_cs_name, len(case_study.project_name) + len(str(case_study.version))) if print_legend: print(get_legend(True)) total_status_occurrences: tp.DefaultDict[ FileStatusExtension, tp.Set[ShortCommitHash]] = defaultdict(set) for case_study in output_case_studies: if print_rev_list: print(get_revision_list(case_study)) elif short_status: print( get_short_status(case_study, experiment_type, longest_cs_name, True, total_status_occurrences)) else: print( get_status(case_study, experiment_type, longest_cs_name, sep_stages, sort, True, total_status_occurrences)) print(get_total_status(total_status_occurrences, longest_cs_name, True))
def show(name: str) -> None: """ Show detailed information about artefacts. Args: name: the name of the artefact """ paper_config = get_paper_config() artefact = paper_config.artefacts.get_artefact(name) if artefact: print(f"Artefact '{name}':") print(textwrap.indent(yaml.dump(artefact.get_dict()), ' ')) else: print(f"There is no artefact with the name {name}.")
def create_multi_case_study_choice() -> TypedMultiChoice['CaseStudy']: """ Create a choice parameter type that allows selecting multiple case studies from the current paper config. Multiple case studies can be given as a comma separated list. The special value "all" selects all case studies in the current paper config. """ try: paper_config = get_paper_config() except ConfigurationLookupError: empty_cs_dict: tp.Dict[str, tp.List['CaseStudy']] = {} return TypedMultiChoice(empty_cs_dict) value_dict = { f"{cs.project_name}_{cs.version}": [cs] for cs in paper_config.get_all_case_studies() } value_dict["all"] = paper_config.get_all_case_studies() return TypedMultiChoice(value_dict)
def test_get_author_data(self) -> None: """Check whether author data is retrieved correctly from the author interaction graph.""" vara_cfg()['paper_config']['current_config'] = "test_casestudy_status" load_paper_config() revision = newest_processed_revision_for_case_study( get_paper_config().get_case_studies("xz")[0], BlameReport ) assert revision blame_interaction_graph = create_blame_interaction_graph("xz", revision) self.assertEqual(blame_interaction_graph.project_name, "xz") aig = blame_interaction_graph.author_interaction_graph() author_data = get_author_data(aig, "Lasse Collin") self.assertEqual(author_data["node_attrs"]["author"], "Lasse Collin") self.assertEqual(author_data["neighbors"], set()) self.assertEqual(0, len(author_data["in_attrs"])) self.assertEqual(0, len(author_data["out_attrs"]))
def test_artefacts_generate(self) -> None: """Test whether `vara-art generate` generates all expected files.""" # setup config vara_cfg()['paper_config']['current_config'] = "test_artefacts_driver" load_paper_config() artefacts = get_paper_config().get_all_artefacts() base_output_dir = Artefact.base_output_dir() # vara-art generate runner = CliRunner() result = runner.invoke(driver_artefacts.main, ["generate"]) self.assertEqual(0, result.exit_code, result.exception) # check that overview files are present self.assertTrue((base_output_dir / "index.html").exists()) self.assertTrue((base_output_dir / "plot_matrix.html").exists()) # check that artefact files are present for artefact in artefacts: self.__check_artefact_files_present(artefact)
def package_paper_config(output_file: Path, cs_filter_regex: tp.Pattern[str], report_names: tp.List[tp.Type[BaseReport]]) -> None: """ Package all files from a paper config into a zip folder. Args: output_file: file to write to cs_filter_regex: applied to a ``name_version`` string for filtering the case studies to be included in the zip archive report_names: list of report names that should be added """ current_config = PC.get_paper_config() result_dir = Path(str(vara_cfg()['result_dir'])) report_types = report_names if report_names else list( BaseReport.REPORT_TYPES.values()) files_to_store: tp.Set[Path] = set() for case_study in current_config.get_all_case_studies(): match = re.match(cs_filter_regex, f"{case_study.project_name}_{case_study.version}") if match is not None: for report_type in report_types: files_to_store.update( get_newest_result_files_for_case_study( case_study, result_dir, report_type)) case_study_files_to_include: tp.List[Path] = [] for cs_file in current_config.path.iterdir(): match = re.match(cs_filter_regex, cs_file.name) if match is not None: case_study_files_to_include.append(cs_file) vara_root = Path(str(vara_cfg()['config_file'])).parent with ZipFile(output_file, "w", compression=ZIP_DEFLATED, compresslevel=9) as pc_zip: for file_path in files_to_store: pc_zip.write(file_path.relative_to(vara_root)) for case_study_file in case_study_files_to_include: pc_zip.write(case_study_file.relative_to(vara_root))
def _generate_html_plot_matrix(artefacts: tp.Iterable[PlotArtefact], outfile: Path) -> None: """ Generates a html overview for the given artefacts. Args: artefacts: the artefacts to include in the overview outfile: the path to store the overview in """ files: tp.Dict[str, tp.Dict[str, Path]] = defaultdict(dict) for artefact in artefacts: file_infos = artefact.get_artefact_file_infos() for file_info in file_infos: if file_info.case_study: file_path = _locate_artefact_file(Path(file_info.file_name), artefact.output_dir, outfile.parent) if not file_path: continue files[file_info.case_study.project_name][ artefact.name] = file_path columns: tp.List[str] = [] for case_study in get_paper_config().get_all_case_studies(): images: tp.List[str] = [] for artefact in artefacts: image_path = files[case_study.project_name].get( artefact.name, None) if not image_path: continue images.append(__IMAGE_TEMPLATE.format(str(image_path))) if images: columns.append(__COLUMN_TEMPLATE.format("\n".join(images))) html = html_page("Results", "\n".join(columns), [CSS_COMMON, CSS_IMAGE_MATRIX]) with open(outfile, "w") as file: file.write(html)
def _pc_list(paper_config_path: tp.Optional[Path]) -> None: if not paper_config_path: paper_config_path = Path(vara_cfg()["paper_config"]["folder"].value) if not (paper_config_path.exists() and paper_config_path.is_dir()): LOG.error(f"Paper config folder not found: {paper_config_path} " "(Path does not exist or is no directory).") return print("Found the following paper_configs:") current_config = None try: current_config = get_paper_config().path.name except ConfigurationLookupError: # No paper config specified in the varats config file pass for paper_config in _get_paper_configs(paper_config_path): if current_config and paper_config == current_config: print(f"{paper_config} *") else: print(paper_config)
def __validate_project_parameters( ctx: tp.Optional[click.Context], param: tp.Optional[click.Parameter], value: tp.Tuple[str, ...]) -> tp.Tuple[str, ...]: """ Sanity-check project/version specification. Currently, we only support the ``<project>@<revision>`` syntax. Checks whether ``project`` and (if available) ``version`` is selected by one of the case studies in the current paper config. """ # pylint: disable=unused-argument for project_specifier in value: split_input = project_specifier.rsplit('@', maxsplit=1) project = split_input[0] version = split_input[1] if len(split_input) > 1 else None projects: tp.Set[str] = set() case_studies: tp.List[CaseStudy] = [] try: paper_config = get_paper_config() case_studies = paper_config.get_all_case_studies() projects = {cs.project_name for cs in case_studies} except ConfigurationLookupError: pass if project not in projects: raise click.BadParameter( f"Project '{project}' is not in the current paper config.") if version: commit_hash = ShortCommitHash(version) if not any(cs.has_revision(commit_hash) for cs in case_studies): raise click.BadParameter( f"Version '{version}' is not selected by any case study.") return value
def main( verbose: int, slurm: bool, submit: bool, container: bool, experiment: str, projects: tp.List[str], pretend: bool, ) -> None: """ Run benchbuild experiments. Runs on all projects in the current paper config by default. You can restrict this to only certain projects or even revisions using BenchBuild- style project selectors: <project>[@<revision>] """ # pylint: disable=too-many-branches initialize_cli_tool() initialize_projects() bb_command_args: tp.List[str] = ["--force-watch-unbuffered"] bb_extra_args: tp.List[str] = [] if sys.stdout.isatty(): bb_command_args.append("--force-tty") if verbose: bb_command_args.append("-" + ("v" * verbose)) if pretend: click.echo("Running in pretend mode. No experiments will be executed.") # benchbuild only supports pretend in the normal run command slurm = False container = False if slurm: bb_command_args.append("slurm") if container: if slurm: if not __is_slurm_prepared(): click.echo("It seems like benchbuild is not properly " "configured for slurm + containers. " "Please run 'vara-container prepare-slurm' first.") sys.exit(1) bb_extra_args = ["--", "container", "run"] if bb_cfg()["container"]["import"].value: bb_extra_args.append("--import") else: bb_command_args.append("container") if not slurm: bb_command_args.append("run") if pretend: bb_command_args.append("-p") if not projects: projects = list({ cs.project_name for cs in get_paper_config().get_all_case_studies() }) bb_args = list( itertools.chain(bb_command_args, ["-E", experiment], projects, bb_extra_args)) with local.cwd(vara_cfg()["benchbuild_root"].value): try: with benchbuild[bb_args].bgrun(stdout=PIPE, stderr=PIPE) as bb_proc: try: _, stdout, _ = tee(bb_proc) except KeyboardInterrupt: # wait for BB to complete when Ctrl-C is pressed retcode, _, _ = tee(bb_proc) sys.exit(retcode) except ProcessExecutionError: sys.exit(1) if slurm: match = __SLURM_SCRIPT_PATTERN.search(stdout) if match: slurm_script = match.group(1) if submit: click.echo( f"Submitting slurm script via sbatch: {slurm_script}") sbatch(slurm_script) else: click.echo(f"Run the following command to submit the slurm:\n" f"sbatch {slurm_script}") else: click.echo("Could not find slurm script.") sys.exit(1)
def list_() -> None: """List the available artefacts.""" paper_config = get_paper_config() for artefact in paper_config.artefacts: print(f"{artefact.name} [{artefact.ARTEFACT_TYPE}]")