def test_detect_md_engine(): """Test that we only accept supported MD engines.""" engine = detect_md_engine('gromacs/2016.3') assert engine.__name__ == 'mdbenchmark.mdengines.gromacs' engine = detect_md_engine('namd/123') assert engine.__name__ == 'mdbenchmark.mdengines.namd' assert detect_md_engine('someengine/123') is None
def DataFrameFromBundle(bundle): """Generates a DataFrame from a datreant bundle.""" df = pd.DataFrame( columns=["module", "nodes", "ns/day", "run time [min]", "gpu", "host", "ncores"] ) for i, sim in enumerate(bundle): # older versions wrote a version category. This ensures backwards compatibility if "module" in sim.categories: module = sim.categories["module"] else: module = sim.categories["version"] # call the engine specific analysis functions engine = detect_md_engine(module) df.loc[i] = utils.analyze_run(engine=engine, sim=sim) if df.empty: console.error("There is no data for the given path.") # Sort values by `nodes` df = df.sort_values( ["host", "module", "run time [min]", "gpu", "nodes"] ).reset_index(drop=True) return df
def parse_bundle(bundle, columns, sort_values_by, discard_performance=False): """Generates a DataFrame from a datreant.Bundle.""" data = [] with click.progressbar( bundle, length=len(bundle), label="Analyzing benchmarks", show_pos=True ) as bar: for treant in bar: module = treant.categories["module"] engine = detect_md_engine(module) row = utils.analyze_benchmark(engine=engine, benchmark=treant) version = 2 if "version" in treant.categories: version = 3 if version == 2: row.pop() # multidir is not a category for version 2 data row += [version] if discard_performance: row = row[:2] + row[3:] data.append(row) df = pd.DataFrame(data, columns=columns) # Exit if no data is available if df.empty: console.error("There is no data for the given path.") # Sort values by `nodes` df = df.sort_values(sort_values_by).reset_index(drop=True) return df
def do_submit(directory, force_restart, yes): """Submit the benchmarks.""" # Migrate from MDBenchmark<2 to MDBenchmark=>2 mds_to_dtr.migrate_to_datreant(directory) bundle = dtr.discover(directory) # Exit if no bundles were found in the current directory. if not bundle: console.error("No benchmarks found.") grouped_bundles = bundle.categories.groupby("started") try: bundles_not_yet_started = grouped_bundles[False] except KeyError: bundles_not_yet_started = None if not bundles_not_yet_started and not force_restart: console.error( "All generated benchmarks were already started once. " "You can force a restart with {}.", "--force", ) # Start all benchmark simulations if a restart was requested. Otherwise # only start the ones that were not run yet. bundles_to_start = bundle if not force_restart: bundles_to_start = bundles_not_yet_started df = DataFrameFromBundle(bundles_to_start) # Reformat NaN values nicely into question marks. df_to_print = df.replace(np.nan, "?") df_to_print = df.drop(columns=["ns/day", "ncores"]) console.info("{}", "Benchmark Summary:") df_short = ConsolidateDataFrame(df_to_print) PrintDataFrame(df_short) # Ask the user to confirm whether they want to submit the benchmarks if yes: console.info("The above benchmarks will be submitted.") elif not click.confirm("The above benchmarks will be submitted. Continue?"): console.error("Exiting. No benchmarks submitted.") batch_cmd = get_batch_command() console.info("Submitting a total of {} benchmarks.", len(bundles_to_start)) for sim in bundles_to_start: # Remove files generated by previous mdbenchmark run if force_restart: engine = detect_md_engine(sim.categories["module"]) cleanup_before_restart(engine=engine, sim=sim) sim.categories["started"] = True os.chdir(sim.abspath) subprocess.call([batch_cmd, "bench.job"]) console.info( "Submitted all benchmarks. Run {} once they are finished to get the results.", "mdbenchmark analyze", )
def construct_generate_data( name, job_name, modules, host, template, cpu, gpu, time, min_nodes, max_nodes, processor, number_of_ranks, enable_hyperthreading, multidir, ): data = [] for module in modules: # Here we detect the MD engine (supported: GROMACS and NAMD). engine = mdengines.detect_md_engine(module) # Iterate over CPUs or GPUs gpu_cpu = {"cpu": cpu, "gpu": gpu} for key, value in sorted(gpu_cpu.items()): # Skip the current processing unit if not value: continue # Generate directory name and string representation for the user. # Also set the `gpu` variable for later use. gpu = True if key == "gpu" else False directory = construct_directory_name(template.name, module, gpu) # Set up the path to the new directory as `datreant.Tree` base_directory = dtr.Tree(directory) # Do the main iteration over nodes, ranks and number of simulations for nodes in range(min_nodes, max_nodes + 1): for _ranks in number_of_ranks: ranks, threads = processor.get_ranks_and_threads( _ranks, with_hyperthreading=enable_hyperthreading ) for nsim in multidir: # Append the data to our list data.append( [ name, job_name, base_directory, host, engine, module, nodes, time, gpu, template, ranks, threads, enable_hyperthreading, nsim, ] ) return data
def validate_required_files(name, modules): for module in modules: # Here we detect the MD engine (supported: GROMACS and NAMD). engine = mdengines.detect_md_engine(module) engine.check_input_file_exists(name)
def test_detect_md_engine_unkown(): """Test that we return None for unkown engines""" engine = detect_md_engine("someengine/123") assert engine is None
def test_detect_md_engine_supported(arg, out): """Test that we only accept supported MD engines.""" engine = detect_md_engine(arg) assert engine.__name__ == out
def do_submit(directory, force_restart, yes): """Submit the benchmarks.""" bundle = dtr.discover(directory) # Exit if no bundles were found in the current directory. if not bundle: console.error("No benchmarks found.") grouped_bundles = bundle.categories.groupby("started") try: bundles_not_yet_started = grouped_bundles[False] except KeyError: bundles_not_yet_started = None if not bundles_not_yet_started and not force_restart: console.error( "All generated benchmarks were already started once. " "You can force a restart with {}.", "--force", ) # Start all benchmark simulations if a restart was requested. Otherwise # only start the ones that were not run yet. bundles_to_start = bundle if not force_restart: bundles_to_start = bundles_not_yet_started benchmark_version = VersionFactory( categories=bundles_to_start.categories).version_class df = parse_bundle( bundles_to_start, columns=benchmark_version.submit_categories, sort_values_by=benchmark_version.analyze_sort, discard_performance=True, ) # Reformat NaN values nicely into question marks. df_to_print = df.replace(np.nan, "?") columns_to_drop = ["ncores", "version"] df_to_print = df.drop(columns=columns_to_drop) # Consolidate the data by grouping on the number of nodes and print to the # user as an overview. consolidated_df = consolidate_dataframe( df_to_print, columns=benchmark_version.consolidate_categories) print_dataframe( consolidated_df, columns=map_columns( map_dict=benchmark_version.category_mapping, columns=benchmark_version.generate_printing[1:], ), ) # Ask the user to confirm whether they want to submit the benchmarks if yes: console.info("The above benchmarks will be submitted.") elif not click.confirm( "The above benchmarks will be submitted. Continue?"): console.error("Exiting. No benchmarks submitted.") batch_cmd = get_batch_command() console.info("Submitting a total of {} benchmarks.", len(bundles_to_start)) for sim in bundles_to_start: # Remove files generated by previous mdbenchmark run if force_restart: engine = detect_md_engine(sim.categories["module"]) cleanup_before_restart(engine=engine, sim=sim) sim.categories["started"] = True os.chdir(sim.abspath) subprocess.call([batch_cmd, "bench.job"]) console.info( "Submitted all benchmarks. Run {} once they are finished to get the results.", "mdbenchmark analyze", )
def do_generate( name, cpu, gpu, module, host, min_nodes, max_nodes, time, skip_validation, job_name, yes, ): """Generate a bunch of benchmarks.""" # Validate the CPU and GPU flags validate_cpu_gpu_flags(cpu, gpu) # Validate the number of nodes validate_number_of_nodes(min_nodes=min_nodes, max_nodes=max_nodes) # Grab the template name for the host. This should always work because # click does the validation for us template = utils.retrieve_host_template(host) # Warn the user that NAMD support is still experimental. if any(["namd" in m for m in module]): console.warn(NAMD_WARNING, "--gpu") module = mdengines.normalize_modules(module, skip_validation) # If several modules were given and we only cannot find one of them, we # continue. if not module: console.error("No requested modules available!") df_overview = pd.DataFrame(columns=[ "name", "job_name", "base_directory", "template", "engine", "module", "nodes", "run time [min]", "gpu", "host", ]) i = 1 for m in module: # Here we detect the MD engine (supported: GROMACS and NAMD). engine = mdengines.detect_md_engine(m) # Check if all needed files exist. Throw an error if they do not. engine.check_input_file_exists(name) gpu_cpu = {"cpu": cpu, "gpu": gpu} for pu, state in sorted(gpu_cpu.items()): if not state: continue directory = "{}_{}".format(host, m) gpu = False gpu_string = "" if pu == "gpu": gpu = True directory += "_gpu" gpu_string = " with GPUs" console.info("Creating benchmark system for {}.", m + gpu_string) base_directory = dtr.Tree(directory) for nodes in range(min_nodes, max_nodes + 1): df_overview.loc[i] = [ name, job_name, base_directory, template, engine, m, nodes, time, gpu, host, ] i += 1 console.info("{}", "Benchmark Summary:") df_short = ConsolidateDataFrame(df_overview) PrintDataFrame(df_short) if yes: console.info("Generating the above benchmarks.") elif not click.confirm( "The above benchmarks will be generated. Continue?"): console.error("Exiting. No benchmarks generated.") for _, row in df_overview.iterrows(): relative_path, file_basename = os.path.split(row["name"]) write_benchmark( engine=row["engine"], base_directory=row["base_directory"], template=row["template"], nodes=row["nodes"], gpu=row["gpu"], module=row["module"], name=file_basename, relative_path=relative_path, job_name=row["job_name"], host=row["host"], time=row["run time [min]"], ) # Provide some output for the user console.info( "Finished generating all benchmarks.\n" "You can now submit the jobs with {}.", "mdbenchmark submit", )