Exemplo n.º 1
0
def test_detect_md_engine():
    """Test that we only accept supported MD engines."""

    engine = detect_md_engine('gromacs/2016.3')
    assert engine.__name__ == 'mdbenchmark.mdengines.gromacs'

    engine = detect_md_engine('namd/123')
    assert engine.__name__ == 'mdbenchmark.mdengines.namd'

    assert detect_md_engine('someengine/123') is None
Exemplo n.º 2
0
def DataFrameFromBundle(bundle):
    """Generates a DataFrame from a datreant bundle."""
    df = pd.DataFrame(
        columns=["module", "nodes", "ns/day", "run time [min]", "gpu", "host", "ncores"]
    )

    for i, sim in enumerate(bundle):
        # older versions wrote a version category. This ensures backwards compatibility
        if "module" in sim.categories:
            module = sim.categories["module"]
        else:
            module = sim.categories["version"]
        # call the engine specific analysis functions
        engine = detect_md_engine(module)
        df.loc[i] = utils.analyze_run(engine=engine, sim=sim)

    if df.empty:
        console.error("There is no data for the given path.")

    # Sort values by `nodes`
    df = df.sort_values(
        ["host", "module", "run time [min]", "gpu", "nodes"]
    ).reset_index(drop=True)

    return df
Exemplo n.º 3
0
def parse_bundle(bundle, columns, sort_values_by, discard_performance=False):
    """Generates a DataFrame from a datreant.Bundle."""
    data = []

    with click.progressbar(
        bundle, length=len(bundle), label="Analyzing benchmarks", show_pos=True
    ) as bar:
        for treant in bar:
            module = treant.categories["module"]
            engine = detect_md_engine(module)
            row = utils.analyze_benchmark(engine=engine, benchmark=treant)

            version = 2
            if "version" in treant.categories:
                version = 3
            if version == 2:
                row.pop()  # multidir is not a category for version 2 data
            row += [version]

            if discard_performance:
                row = row[:2] + row[3:]

            data.append(row)

    df = pd.DataFrame(data, columns=columns)

    # Exit if no data is available
    if df.empty:
        console.error("There is no data for the given path.")

    # Sort values by `nodes`
    df = df.sort_values(sort_values_by).reset_index(drop=True)

    return df
Exemplo n.º 4
0
def do_submit(directory, force_restart, yes):
    """Submit the benchmarks."""
    # Migrate from MDBenchmark<2 to MDBenchmark=>2
    mds_to_dtr.migrate_to_datreant(directory)

    bundle = dtr.discover(directory)

    # Exit if no bundles were found in the current directory.
    if not bundle:
        console.error("No benchmarks found.")

    grouped_bundles = bundle.categories.groupby("started")
    try:
        bundles_not_yet_started = grouped_bundles[False]
    except KeyError:
        bundles_not_yet_started = None
    if not bundles_not_yet_started and not force_restart:
        console.error(
            "All generated benchmarks were already started once. "
            "You can force a restart with {}.",
            "--force",
        )

    # Start all benchmark simulations if a restart was requested. Otherwise
    # only start the ones that were not run yet.
    bundles_to_start = bundle
    if not force_restart:
        bundles_to_start = bundles_not_yet_started

    df = DataFrameFromBundle(bundles_to_start)

    # Reformat NaN values nicely into question marks.
    df_to_print = df.replace(np.nan, "?")
    df_to_print = df.drop(columns=["ns/day", "ncores"])
    console.info("{}", "Benchmark Summary:")
    df_short = ConsolidateDataFrame(df_to_print)
    PrintDataFrame(df_short)

    # Ask the user to confirm whether they want to submit the benchmarks
    if yes:
        console.info("The above benchmarks will be submitted.")
    elif not click.confirm("The above benchmarks will be submitted. Continue?"):
        console.error("Exiting. No benchmarks submitted.")

    batch_cmd = get_batch_command()
    console.info("Submitting a total of {} benchmarks.", len(bundles_to_start))
    for sim in bundles_to_start:
        # Remove files generated by previous mdbenchmark run
        if force_restart:
            engine = detect_md_engine(sim.categories["module"])
            cleanup_before_restart(engine=engine, sim=sim)
        sim.categories["started"] = True
        os.chdir(sim.abspath)
        subprocess.call([batch_cmd, "bench.job"])
    console.info(
        "Submitted all benchmarks. Run {} once they are finished to get the results.",
        "mdbenchmark analyze",
    )
Exemplo n.º 5
0
def construct_generate_data(
    name,
    job_name,
    modules,
    host,
    template,
    cpu,
    gpu,
    time,
    min_nodes,
    max_nodes,
    processor,
    number_of_ranks,
    enable_hyperthreading,
    multidir,
):
    data = []
    for module in modules:
        # Here we detect the MD engine (supported: GROMACS and NAMD).
        engine = mdengines.detect_md_engine(module)

        # Iterate over CPUs or GPUs
        gpu_cpu = {"cpu": cpu, "gpu": gpu}
        for key, value in sorted(gpu_cpu.items()):
            # Skip the current processing unit
            if not value:
                continue

            # Generate directory name and string representation for the user.
            # Also set the `gpu` variable for later use.
            gpu = True if key == "gpu" else False
            directory = construct_directory_name(template.name, module, gpu)

            # Set up the path to the new directory as `datreant.Tree`
            base_directory = dtr.Tree(directory)

            # Do the main iteration over nodes, ranks and number of simulations
            for nodes in range(min_nodes, max_nodes + 1):
                for _ranks in number_of_ranks:
                    ranks, threads = processor.get_ranks_and_threads(
                        _ranks, with_hyperthreading=enable_hyperthreading
                    )
                    for nsim in multidir:

                        # Append the data to our list
                        data.append(
                            [
                                name,
                                job_name,
                                base_directory,
                                host,
                                engine,
                                module,
                                nodes,
                                time,
                                gpu,
                                template,
                                ranks,
                                threads,
                                enable_hyperthreading,
                                nsim,
                            ]
                        )

    return data
Exemplo n.º 6
0
def validate_required_files(name, modules):
    for module in modules:
        # Here we detect the MD engine (supported: GROMACS and NAMD).
        engine = mdengines.detect_md_engine(module)
        engine.check_input_file_exists(name)
Exemplo n.º 7
0
def test_detect_md_engine_unkown():
    """Test that we return None for unkown engines"""
    engine = detect_md_engine("someengine/123")
    assert engine is None
Exemplo n.º 8
0
def test_detect_md_engine_supported(arg, out):
    """Test that we only accept supported MD engines."""
    engine = detect_md_engine(arg)
    assert engine.__name__ == out
Exemplo n.º 9
0
def do_submit(directory, force_restart, yes):
    """Submit the benchmarks."""
    bundle = dtr.discover(directory)

    # Exit if no bundles were found in the current directory.
    if not bundle:
        console.error("No benchmarks found.")

    grouped_bundles = bundle.categories.groupby("started")
    try:
        bundles_not_yet_started = grouped_bundles[False]
    except KeyError:
        bundles_not_yet_started = None
    if not bundles_not_yet_started and not force_restart:
        console.error(
            "All generated benchmarks were already started once. "
            "You can force a restart with {}.",
            "--force",
        )

    # Start all benchmark simulations if a restart was requested. Otherwise
    # only start the ones that were not run yet.
    bundles_to_start = bundle
    if not force_restart:
        bundles_to_start = bundles_not_yet_started

    benchmark_version = VersionFactory(
        categories=bundles_to_start.categories).version_class

    df = parse_bundle(
        bundles_to_start,
        columns=benchmark_version.submit_categories,
        sort_values_by=benchmark_version.analyze_sort,
        discard_performance=True,
    )

    # Reformat NaN values nicely into question marks.
    df_to_print = df.replace(np.nan, "?")

    columns_to_drop = ["ncores", "version"]
    df_to_print = df.drop(columns=columns_to_drop)

    # Consolidate the data by grouping on the number of nodes and print to the
    # user as an overview.
    consolidated_df = consolidate_dataframe(
        df_to_print, columns=benchmark_version.consolidate_categories)
    print_dataframe(
        consolidated_df,
        columns=map_columns(
            map_dict=benchmark_version.category_mapping,
            columns=benchmark_version.generate_printing[1:],
        ),
    )

    # Ask the user to confirm whether they want to submit the benchmarks
    if yes:
        console.info("The above benchmarks will be submitted.")
    elif not click.confirm(
            "The above benchmarks will be submitted. Continue?"):
        console.error("Exiting. No benchmarks submitted.")

    batch_cmd = get_batch_command()
    console.info("Submitting a total of {} benchmarks.", len(bundles_to_start))
    for sim in bundles_to_start:
        # Remove files generated by previous mdbenchmark run
        if force_restart:
            engine = detect_md_engine(sim.categories["module"])
            cleanup_before_restart(engine=engine, sim=sim)
        sim.categories["started"] = True
        os.chdir(sim.abspath)
        subprocess.call([batch_cmd, "bench.job"])
    console.info(
        "Submitted all benchmarks. Run {} once they are finished to get the results.",
        "mdbenchmark analyze",
    )
Exemplo n.º 10
0
def do_generate(
    name,
    cpu,
    gpu,
    module,
    host,
    min_nodes,
    max_nodes,
    time,
    skip_validation,
    job_name,
    yes,
):
    """Generate a bunch of benchmarks."""
    # Validate the CPU and GPU flags
    validate_cpu_gpu_flags(cpu, gpu)

    # Validate the number of nodes
    validate_number_of_nodes(min_nodes=min_nodes, max_nodes=max_nodes)

    # Grab the template name for the host. This should always work because
    # click does the validation for us
    template = utils.retrieve_host_template(host)

    # Warn the user that NAMD support is still experimental.
    if any(["namd" in m for m in module]):
        console.warn(NAMD_WARNING, "--gpu")

    module = mdengines.normalize_modules(module, skip_validation)

    # If several modules were given and we only cannot find one of them, we
    # continue.
    if not module:
        console.error("No requested modules available!")

    df_overview = pd.DataFrame(columns=[
        "name",
        "job_name",
        "base_directory",
        "template",
        "engine",
        "module",
        "nodes",
        "run time [min]",
        "gpu",
        "host",
    ])

    i = 1
    for m in module:
        # Here we detect the MD engine (supported: GROMACS and NAMD).
        engine = mdengines.detect_md_engine(m)

        # Check if all needed files exist. Throw an error if they do not.
        engine.check_input_file_exists(name)

        gpu_cpu = {"cpu": cpu, "gpu": gpu}
        for pu, state in sorted(gpu_cpu.items()):
            if not state:
                continue

            directory = "{}_{}".format(host, m)
            gpu = False
            gpu_string = ""
            if pu == "gpu":
                gpu = True
                directory += "_gpu"
                gpu_string = " with GPUs"

            console.info("Creating benchmark system for {}.", m + gpu_string)

            base_directory = dtr.Tree(directory)

            for nodes in range(min_nodes, max_nodes + 1):
                df_overview.loc[i] = [
                    name,
                    job_name,
                    base_directory,
                    template,
                    engine,
                    m,
                    nodes,
                    time,
                    gpu,
                    host,
                ]
                i += 1

    console.info("{}", "Benchmark Summary:")

    df_short = ConsolidateDataFrame(df_overview)
    PrintDataFrame(df_short)

    if yes:
        console.info("Generating the above benchmarks.")
    elif not click.confirm(
            "The above benchmarks will be generated. Continue?"):
        console.error("Exiting. No benchmarks generated.")

    for _, row in df_overview.iterrows():
        relative_path, file_basename = os.path.split(row["name"])
        write_benchmark(
            engine=row["engine"],
            base_directory=row["base_directory"],
            template=row["template"],
            nodes=row["nodes"],
            gpu=row["gpu"],
            module=row["module"],
            name=file_basename,
            relative_path=relative_path,
            job_name=row["job_name"],
            host=row["host"],
            time=row["run time [min]"],
        )

    # Provide some output for the user
    console.info(
        "Finished generating all benchmarks.\n"
        "You can now submit the jobs with {}.",
        "mdbenchmark submit",
    )