def test_consolidate_dataframe(capsys, data): bundle = dtr.discover(data["analyze-files-gromacs"]) version = VersionFactory(categories=bundle.categories).version_class df = utils.parse_bundle( bundle, columns=version.analyze_categories, sort_values_by=version.analyze_sort, ) test_output = utils.consolidate_dataframe( df, columns=version.consolidate_categories ) print_dataframe( test_output[version.generate_printing[1:]], columns=map_columns( map_dict=version.category_mapping, columns=version.generate_printing[1:], ), ) expected_output = ( "Setting up...\n\n" "+----------------+---------+--------------+---------+--------+-----------+-------------+-------------------+\n", "| Module | Nodes | Time (min) | GPUs? | Host | # ranks | # threads | Hyperthreading? |\n", "|----------------+---------+--------------+---------+--------+-----------+-------------+-------------------|\n", "| gromacs/2016.3 | 1-5 | 15 | False | draco | nan | nan | nan |\n", "+----------------+---------+--------------+---------+--------+-----------+-------------+-------------------+\n\n", ) out, _ = capsys.readouterr() assert "\n".join(out.split("\n")) == "".join(expected_output)
def test_analyze_with_errors(cli_runner, tmpdir, capsys, data): """Test that we warn the user of errors in the output files. Also test that we show a question mark instead of a float in the corresponding cell. """ with tmpdir.as_cwd(): result = cli_runner.invoke(cli, [ "analyze", "--directory={}".format(data["analyze-files-w-errors"]) ]) version = Version2Categories() bundle = dtr.discover(data["analyze-files-w-errors"]) df = parse_bundle( bundle, columns=version.analyze_categories, sort_values_by=version.analyze_sort, ) df = df.iloc[:, :-1] df = df.replace(np.nan, "?") print_dataframe( df, columns=map_columns(version.category_mapping, version.analyze_printing), ) out, _ = capsys.readouterr() out = "Setting up...\n" + out assert result.exit_code == 0 assert result.output == "\n".join(out.split("\n"))
def test_submit_test_prompt_no(cli_runner, tmpdir, data): """Test whether prompt answer no works.""" benchmark_version = Version2Categories() with tmpdir.as_cwd(): result = cli_runner.invoke( cli, [ "submit", "--directory={}".format(data["analyze-files-gromacs-one-unstarted"]), ], input="n\n", ) df = pd.read_csv(data["gromacs/test_prompt.csv"], index_col=0) print_dataframe( df, columns=map_columns( map_dict=benchmark_version.category_mapping, columns=benchmark_version.generate_printing[1:], ), ) assert result.exit_code == 1 assert ( result.output.split("\n")[-2] == "ERROR Exiting. No benchmarks submitted." )
def test_print_dataframe(capsys, data): df = pd.read_csv(data["analyze-files-gromacs.csv"]) version = Version2Categories() utils.print_dataframe(df, version.analyze_printing + ["version"]) expected_output = tabulate.tabulate( df, headers="keys", tablefmt="psql", showindex=False ) expected_output = "\n" + expected_output + "\n\n" out, _ = capsys.readouterr() assert "\n".join(out.split("\n")) == expected_output
def do_analyze(directory, save_csv): """Analyze benchmarks.""" bundle = dtr.discover(directory) version = VersionFactory(categories=bundle.categories).version_class df = parse_bundle( bundle, columns=version.analyze_categories, sort_values_by=version.analyze_sort, ) # Remove the versions column from the DataFrame columns_to_drop = ["version"] df = df.drop(columns=columns_to_drop) if save_csv is not None: if not save_csv.endswith(".csv"): save_csv = "{}.csv".format(save_csv) df.to_csv(save_csv, index=False) console.success("Successfully benchmark data to {}.", save_csv) # Reformat NaN values nicely into question marks. # move this to the bundle function! df = df.replace(np.nan, "?") if df.isnull().values.any(): console.warn( "We were not able to gather informations for all systems. " "Systems marked with question marks have either crashed or " "were not started yet.") # Warn user that we are going to print more than 50 benchmark results to the console if df.shape[0] > 50: if click.confirm( "We are about to print the results of {} benchmarks to the console. Continue?" .format(click.style(str(df.shape[0]), bold=True))): pass else: console.error("Exiting.") # Print the data to the console print_dataframe( df, columns=map_columns(version.category_mapping, version.analyze_printing), )
def test_submit_resubmit(cli_runner, monkeypatch, tmpdir, data): """Test that we cannot submit a benchmark system that was already submitted, unless we force it. """ with tmpdir.as_cwd(): # Test that we get an error if we try to point the submit function to # an non-existent path. result = cli_runner.invoke(cli, ["submit", "--directory=look_here/"], "--yes") assert result.exit_code == 1 assert result.output == "ERROR No benchmarks found.\n" # Test that we get an error if we try to start benchmarks that were # already started once. result = cli_runner.invoke( cli, ["submit", "--directory={}".format(data["analyze-files-gromacs"]), "--yes"], ) df = pd.read_csv(data["analyze-files-gromacs-consolidated.csv"], index_col=0) s = print_dataframe(df, False) output = "ERROR All generated benchmarks were already started once. You can force a restart with --force.\n" assert result.exit_code == 1 assert result.output == output # Test that we can force restart already run benchmarks. # Monkeypatch a few functions monkeypatch.setattr("subprocess.call", lambda x: True) monkeypatch.setattr( "mdbenchmark.cli.submit.get_batch_command", lambda: "sbatch" ) monkeypatch.setattr( "mdbenchmark.cli.submit.detect_md_engine", lambda x: gromacs ) monkeypatch.setattr( "mdbenchmark.submit.cleanup_before_restart", lambda engine, sim: True ) output = ( "Benchmark Summary:\n" + s + "\nThe above benchmarks will be submitted.\n" + "Submitting a total of 5 benchmarks.\n" + "Submitted all benchmarks. Run mdbenchmark analyze once they are finished to get the results.\n" ) result = cli_runner.invoke( cli, [ "submit", "--directory={}".format(data["analyze-files-gromacs"]), "--force", "--yes", ], ) assert result.exit_code == 0 assert result.output == output
def test_analyze_gromacs(cli_runner, tmpdir, capsys, data): """Test that the output is OK when all outputs are fine.""" with tmpdir.as_cwd(): result = cli_runner.invoke(cli, [ "analyze", "--directory={}".format(data["analyze-files-gromacs"]) ]) df = pd.read_csv(data["analyze-files-gromacs.csv"]) df = df.iloc[:, :-1] df = df.replace(np.nan, "?") version = Version2Categories() print_dataframe( df, columns=map_columns(version.category_mapping, version.analyze_printing), ) out, _ = capsys.readouterr() out = "Setting up...\n" + out assert result.exit_code == 0 assert result.output == "\n".join(out.split("\n"))
def test_analyze_namd(cli_runner, tmpdir, capsys, data): with tmpdir.as_cwd(): result = cli_runner.invoke( cli, ["analyze", "--directory={}".format(data["analyze-files-namd"])]) version = Version2Categories() bundle = dtr.discover(data["analyze-files-namd"]) df = parse_bundle( bundle, columns=version.analyze_categories, sort_values_by=version.analyze_sort, ) df = df.iloc[:, :-1] df = df.replace(np.nan, "?") print_dataframe( df, columns=map_columns(version.category_mapping, version.analyze_printing), ) out, _ = capsys.readouterr() out = "Setting up...\n" + out assert result.exit_code == 0 assert result.output == "\n".join(out.split("\n"))
def test_submit_test_prompt_yes(cli_runner, tmpdir, data, monkeypatch): """Test whether promt answer no works.""" with tmpdir.as_cwd(): # Test that we can force restart already run benchmarks. # Monkeypatch a few functions monkeypatch.setattr("subprocess.call", lambda x: True) monkeypatch.setattr( "mdbenchmark.cli.submit.get_batch_command", lambda: "sbatch" ) monkeypatch.setattr( "mdbenchmark.cli.submit.detect_md_engine", lambda x: gromacs ) monkeypatch.setattr( "mdbenchmark.cli.submit.cleanup_before_restart", lambda engine, sim: True ) result = cli_runner.invoke( cli, [ "submit", "--directory={}".format(data["analyze-files-gromacs-one-unstarted"]), ], input="y\n", ) df = pd.read_csv(data["gromacs/test_prompt.csv"], index_col=0) s = print_dataframe(df, False) output = ( "Benchmark Summary:\n" + s + "\nThe above benchmarks will be submitted. Continue? [y/N]: y\n" + "Submitting a total of 1 benchmarks.\n" + "Submitted all benchmarks. Run mdbenchmark analyze once they are finished to get the results.\n" ) assert result.exit_code == 0 assert result.output == output # Lazy way of resetting the value of `started` to `false`. # TODO: We need to clean up all of our unit tests... treant = dtr.Bundle(data["analyze-files-gromacs-one-unstarted"] + "/1") treant.categories["started"] = False
def do_submit(directory, force_restart, yes): """Submit the benchmarks.""" bundle = dtr.discover(directory) # Exit if no bundles were found in the current directory. if not bundle: console.error("No benchmarks found.") grouped_bundles = bundle.categories.groupby("started") try: bundles_not_yet_started = grouped_bundles[False] except KeyError: bundles_not_yet_started = None if not bundles_not_yet_started and not force_restart: console.error( "All generated benchmarks were already started once. " "You can force a restart with {}.", "--force", ) # Start all benchmark simulations if a restart was requested. Otherwise # only start the ones that were not run yet. bundles_to_start = bundle if not force_restart: bundles_to_start = bundles_not_yet_started benchmark_version = VersionFactory( categories=bundles_to_start.categories).version_class df = parse_bundle( bundles_to_start, columns=benchmark_version.submit_categories, sort_values_by=benchmark_version.analyze_sort, discard_performance=True, ) # Reformat NaN values nicely into question marks. df_to_print = df.replace(np.nan, "?") columns_to_drop = ["ncores", "version"] df_to_print = df.drop(columns=columns_to_drop) # Consolidate the data by grouping on the number of nodes and print to the # user as an overview. consolidated_df = consolidate_dataframe( df_to_print, columns=benchmark_version.consolidate_categories) print_dataframe( consolidated_df, columns=map_columns( map_dict=benchmark_version.category_mapping, columns=benchmark_version.generate_printing[1:], ), ) # Ask the user to confirm whether they want to submit the benchmarks if yes: console.info("The above benchmarks will be submitted.") elif not click.confirm( "The above benchmarks will be submitted. Continue?"): console.error("Exiting. No benchmarks submitted.") batch_cmd = get_batch_command() console.info("Submitting a total of {} benchmarks.", len(bundles_to_start)) for sim in bundles_to_start: # Remove files generated by previous mdbenchmark run if force_restart: engine = detect_md_engine(sim.categories["module"]) cleanup_before_restart(engine=engine, sim=sim) sim.categories["started"] = True os.chdir(sim.abspath) subprocess.call([batch_cmd, "bench.job"]) console.info( "Submitted all benchmarks. Run {} once they are finished to get the results.", "mdbenchmark analyze", )
def do_generate( name, cpu, gpu, module, host, min_nodes, max_nodes, time, skip_validation, job_name, yes, physical_cores, logical_cores, number_of_ranks, enable_hyperthreading, multidir, ): """Generate a bunch of benchmarks.""" # Instantiate the version we are going to use benchmark_version = Version3Categories() # Validate the CPU and GPU flags validate_cpu_gpu_flags(cpu, gpu) # Validate the number of nodes validate_number_of_nodes(min_nodes=min_nodes, max_nodes=max_nodes) if logical_cores < physical_cores: console.error( "The number of logical cores cannot be smaller than the number of physical cores." ) if physical_cores and not logical_cores: console.warn("Assuming logical_cores = 2 * physical_cores") logical_cores = 2 * physical_cores if physical_cores and logical_cores: processor = Processor(physical_cores=physical_cores, logical_cores=logical_cores) else: processor = Processor() # Hyperthreading check if enable_hyperthreading and not processor.supports_hyperthreading: console.error( "The processor of this machine does not support hyperthreading.") if not number_of_ranks: number_of_ranks = (processor.physical_cores, ) # Validate number of simulations validate_number_of_simulations(multidir, min_nodes, max_nodes, number_of_ranks) # Grab the template name for the host. This should always work because # click does the validation for us template = utils.retrieve_host_template(host) # Warn the user that NAMD support is still experimental. if any(["namd" in m for m in module]): console.warn(NAMD_WARNING, "--gpu") # Stop if we cannot find any modules. If the user specified multiple # modules, we will continue with only the valid ones. modules = mdengines.normalize_modules(module, skip_validation) if not modules: console.error("No requested modules available!") # Check if all needed files exist. Throw an error if they do not. validate_required_files(name=name, modules=modules) # Validate that we can use the number of ranks and threads. # We can continue, if no ValueError is thrown for ranks in number_of_ranks: try: processor.get_ranks_and_threads( ranks, with_hyperthreading=enable_hyperthreading) except ValueError as e: console.error(e) # Create all benchmark combinations and put them into a DataFrame data = construct_generate_data( name, job_name, modules, host, template, cpu, gpu, time, min_nodes, max_nodes, processor, number_of_ranks, enable_hyperthreading, multidir, ) df = pd.DataFrame(data, columns=benchmark_version.generate_categories) # Consolidate the data by grouping on the number of nodes and print to the # user as an overview. consolidated_df = consolidate_dataframe( df, columns=benchmark_version.consolidate_categories) print_dataframe( consolidated_df[benchmark_version.generate_printing], columns=map_columns( map_dict=benchmark_version.category_mapping, columns=benchmark_version.generate_printing, ), ) # Save the number of benchmarks for later printing number_of_benchmarks = df.shape[0] # Ask the user for confirmation to generate files. # If the user defined `--yes`, we will skip the confirmation immediately. if yes: console.info( "We will generate {} " + "{benchmark}.".format(benchmark="benchmark" if number_of_benchmarks == 1 else "benchmarks"), number_of_benchmarks, ) elif not click.confirm("We will generate {} benchmarks. Continue?".format( number_of_benchmarks)): console.error("Exiting. No benchmarks were generated.") # Generate the benchmarks with click.progressbar( df.iterrows(), length=number_of_benchmarks, show_pos=True, label="Generating benchmarks", ) as bar: for _, row in bar: relative_path, file_basename = os.path.split(row["name"]) mappings = benchmark_version.generate_mapping kwargs = {"name": file_basename, "relative_path": relative_path} for key, value in mappings.items(): kwargs[value] = row[key] write_benchmark(**kwargs) # Finish up by telling the user how to submit the benchmarks console.info( "Finished! You can submit the jobs with {}.", "mdbenchmark submit", )