def test_analyze_with_errors(cli_runner, tmpdir, capsys, data): """Test that we warn the user of errors in the output files. Also test that we show a question mark instead of a float in the corresponding cell. """ with tmpdir.as_cwd(): result = cli_runner.invoke(cli, [ "analyze", "--directory={}".format(data["analyze-files-w-errors"]) ]) version = Version2Categories() bundle = dtr.discover(data["analyze-files-w-errors"]) df = parse_bundle( bundle, columns=version.analyze_categories, sort_values_by=version.analyze_sort, ) df = df.iloc[:, :-1] df = df.replace(np.nan, "?") print_dataframe( df, columns=map_columns(version.category_mapping, version.analyze_printing), ) out, _ = capsys.readouterr() out = "Setting up...\n" + out assert result.exit_code == 0 assert result.output == "\n".join(out.split("\n"))
def test_consolidate_dataframe(capsys, data): bundle = dtr.discover(data["analyze-files-gromacs"]) version = VersionFactory(categories=bundle.categories).version_class df = utils.parse_bundle( bundle, columns=version.analyze_categories, sort_values_by=version.analyze_sort, ) test_output = utils.consolidate_dataframe( df, columns=version.consolidate_categories ) print_dataframe( test_output[version.generate_printing[1:]], columns=map_columns( map_dict=version.category_mapping, columns=version.generate_printing[1:], ), ) expected_output = ( "Setting up...\n\n" "+----------------+---------+--------------+---------+--------+-----------+-------------+-------------------+\n", "| Module | Nodes | Time (min) | GPUs? | Host | # ranks | # threads | Hyperthreading? |\n", "|----------------+---------+--------------+---------+--------+-----------+-------------+-------------------|\n", "| gromacs/2016.3 | 1-5 | 15 | False | draco | nan | nan | nan |\n", "+----------------+---------+--------------+---------+--------+-----------+-------------+-------------------+\n\n", ) out, _ = capsys.readouterr() assert "\n".join(out.split("\n")) == "".join(expected_output)
def test_parse_bundle(data): bundle = dtr.discover(data["analyze-files-gromacs"]) version = VersionFactory(categories=bundle.categories).version_class test_output = utils.parse_bundle( bundle, columns=version.analyze_categories, sort_values_by=version.analyze_sort, ) expected_output = pd.read_csv(data["analyze-files-gromacs.csv"], index_col=False) assert_frame_equal(test_output, expected_output)
def do_analyze(directory, save_csv): """Analyze benchmarks.""" bundle = dtr.discover(directory) version = VersionFactory(categories=bundle.categories).version_class df = parse_bundle( bundle, columns=version.analyze_categories, sort_values_by=version.analyze_sort, ) # Remove the versions column from the DataFrame columns_to_drop = ["version"] df = df.drop(columns=columns_to_drop) if save_csv is not None: if not save_csv.endswith(".csv"): save_csv = "{}.csv".format(save_csv) df.to_csv(save_csv, index=False) console.success("Successfully benchmark data to {}.", save_csv) # Reformat NaN values nicely into question marks. # move this to the bundle function! df = df.replace(np.nan, "?") if df.isnull().values.any(): console.warn( "We were not able to gather informations for all systems. " "Systems marked with question marks have either crashed or " "were not started yet.") # Warn user that we are going to print more than 50 benchmark results to the console if df.shape[0] > 50: if click.confirm( "We are about to print the results of {} benchmarks to the console. Continue?" .format(click.style(str(df.shape[0]), bold=True))): pass else: console.error("Exiting.") # Print the data to the console print_dataframe( df, columns=map_columns(version.category_mapping, version.analyze_printing), )
def test_analyze_namd(cli_runner, tmpdir, capsys, data): with tmpdir.as_cwd(): result = cli_runner.invoke( cli, ["analyze", "--directory={}".format(data["analyze-files-namd"])]) version = Version2Categories() bundle = dtr.discover(data["analyze-files-namd"]) df = parse_bundle( bundle, columns=version.analyze_categories, sort_values_by=version.analyze_sort, ) df = df.iloc[:, :-1] df = df.replace(np.nan, "?") print_dataframe( df, columns=map_columns(version.category_mapping, version.analyze_printing), ) out, _ = capsys.readouterr() out = "Setting up...\n" + out assert result.exit_code == 0 assert result.output == "\n".join(out.split("\n"))
def do_submit(directory, force_restart, yes): """Submit the benchmarks.""" bundle = dtr.discover(directory) # Exit if no bundles were found in the current directory. if not bundle: console.error("No benchmarks found.") grouped_bundles = bundle.categories.groupby("started") try: bundles_not_yet_started = grouped_bundles[False] except KeyError: bundles_not_yet_started = None if not bundles_not_yet_started and not force_restart: console.error( "All generated benchmarks were already started once. " "You can force a restart with {}.", "--force", ) # Start all benchmark simulations if a restart was requested. Otherwise # only start the ones that were not run yet. bundles_to_start = bundle if not force_restart: bundles_to_start = bundles_not_yet_started benchmark_version = VersionFactory( categories=bundles_to_start.categories).version_class df = parse_bundle( bundles_to_start, columns=benchmark_version.submit_categories, sort_values_by=benchmark_version.analyze_sort, discard_performance=True, ) # Reformat NaN values nicely into question marks. df_to_print = df.replace(np.nan, "?") columns_to_drop = ["ncores", "version"] df_to_print = df.drop(columns=columns_to_drop) # Consolidate the data by grouping on the number of nodes and print to the # user as an overview. consolidated_df = consolidate_dataframe( df_to_print, columns=benchmark_version.consolidate_categories) print_dataframe( consolidated_df, columns=map_columns( map_dict=benchmark_version.category_mapping, columns=benchmark_version.generate_printing[1:], ), ) # Ask the user to confirm whether they want to submit the benchmarks if yes: console.info("The above benchmarks will be submitted.") elif not click.confirm( "The above benchmarks will be submitted. Continue?"): console.error("Exiting. No benchmarks submitted.") batch_cmd = get_batch_command() console.info("Submitting a total of {} benchmarks.", len(bundles_to_start)) for sim in bundles_to_start: # Remove files generated by previous mdbenchmark run if force_restart: engine = detect_md_engine(sim.categories["module"]) cleanup_before_restart(engine=engine, sim=sim) sim.categories["started"] = True os.chdir(sim.abspath) subprocess.call([batch_cmd, "bench.job"]) console.info( "Submitted all benchmarks. Run {} once they are finished to get the results.", "mdbenchmark analyze", )