示例#1
0
def main():
    build_dir = initialize()

    # Run JCC-H and validate its output using pexpect and check if all queries were successfully verified with sqlite.
    arguments = {}
    arguments["--scale"] = ".01"
    arguments["--chunk_size"] = "10000"
    arguments["--queries"] = "'2,4,6'"
    arguments["--time"] = "10"
    arguments["--runs"] = "100"
    arguments["--warmup"] = "10"
    arguments["--encoding"] = "'LZ4'"
    arguments["--compression"] = "'SIMD-BP128'"
    arguments["--indexes"] = "false"
    arguments["--scheduler"] = "true"
    arguments["--clients"] = "4"
    arguments["--jcch"] = "skewed"
    arguments["--verify"] = "true"
    arguments["--dont_cache_binary_tables"] = "true"

    benchmark = run_benchmark(build_dir, arguments, "hyriseBenchmarkTPCH",
                              True)

    benchmark.expect_exact(
        "Running in multi-threaded mode using all available cores")
    benchmark.expect_exact(
        "4 simulated clients are scheduling items in parallel")
    benchmark.expect_exact("Running benchmark in 'Ordered' mode")
    benchmark.expect_exact("Encoding is 'LZ4'")
    benchmark.expect_exact("Chunk size is 10000")
    benchmark.expect_exact("Max runs per item is 100")
    benchmark.expect_exact("Max duration per item is 10 seconds")
    benchmark.expect_exact("Warmup duration per item is 10 seconds")
    benchmark.expect_exact("Benchmarking Queries: [ 2, 4, 6 ]")
    benchmark.expect_exact("JCC-H scale factor is 0.01")
    benchmark.expect_exact("Using prepared statements: no")
    benchmark.expect_exact("Using JCC-H dbgen from")
    benchmark.expect_exact("JCC-H query parameters are skewed")
    benchmark.expect_exact("calling external qgen")
    benchmark.expect_exact("Multi-threaded Topology:")

    close_benchmark(benchmark)
    check_exit_status(benchmark)
def main():
    build_dir = initialize()
    compare_benchmarks_path = f"{build_dir}/../scripts/compare_benchmarks.py"
    output_filename = f"{build_dir}/tpch_output.json"

    return_error = False

    # First, run TPC-H and validate it using pexpect. After this run, check if an output file was created and if it
    # matches the arguments.
    arguments = {}
    arguments["--scale"] = ".01"
    arguments["--use_prepared_statements"] = "true"
    arguments["--queries"] = "'1,13,19'"
    arguments["--time"] = "10"
    arguments["--runs"] = "-1"
    arguments["--mode"] = "'Shuffled'"
    arguments["--clustering"] = "Pruning"
    arguments["--encoding"] = "'Dictionary'"
    arguments["--compression"] = "'Fixed-size byte-aligned'"
    arguments["--indexes"] = "true"
    arguments["--scheduler"] = "false"
    arguments["--clients"] = "1"
    arguments["--dont_cache_binary_tables"] = "true"
    arguments["--output"] = output_filename

    benchmark = run_benchmark(build_dir, arguments, "hyriseBenchmarkTPCH",
                              True)

    benchmark.expect_exact(f"Writing benchmark results to '{output_filename}'")
    benchmark.expect_exact("Running in single-threaded mode")
    benchmark.expect_exact("1 simulated client is scheduling items")
    benchmark.expect_exact("Running benchmark in 'Shuffled' mode")
    benchmark.expect_exact("Encoding is 'Dictionary'")
    benchmark.expect_exact("Max duration per item is 10 seconds")
    benchmark.expect_exact("No warmup runs are performed")
    benchmark.expect_exact("Not caching tables as binary files")
    benchmark.expect_exact("Benchmarking Queries: [ 1, 13, 19 ]")
    benchmark.expect_exact("TPC-H scale factor is 0.01")
    benchmark.expect_exact("Using prepared statements: yes")
    benchmark.expect_exact("Sorting tables")
    benchmark.expect_exact("Creating index on customer [ c_custkey ]")
    benchmark.expect_exact("Preparing queries")

    close_benchmark(benchmark)
    check_exit_status(benchmark)

    if not os.path.isfile(arguments["--output"].replace("'", "")):
        print("ERROR: Cannot find output file " + arguments["--output"])
        return_error = True

    with open(arguments["--output"].replace("'", "")) as f:
        output = json.load(f)

    return_error = check_json(not output["summary"]["table_size_in_bytes"], 0,
                              "Table size is zero.", return_error)
    return_error = check_json(
        output["context"]["scale_factor"],
        float(arguments["--scale"]),
        "Scale factor doesn't match with JSON:",
        return_error,
        0.001,
    )
    for i in range(0, 3):
        return_error = check_json(
            output["benchmarks"][i]["name"].replace("TPC-H 0",
                                                    "").replace("TPC-H ", ""),
            arguments["--queries"].replace("'", "").split(",")[i],
            "Query doesn't match with JSON:",
            return_error,
        )
    return_error = check_json(
        output["context"]["max_duration"],
        int(arguments["--time"]) * 1e9,
        "Max duration doesn't match with JSON:",
        return_error,
    )
    return_error = check_json(output["context"]["max_runs"],
                              int(arguments["--runs"]),
                              "Max runs don't match with JSON:", return_error)
    return_error = check_json(
        output["context"]["benchmark_mode"],
        arguments["--mode"].replace("'", ""),
        "Benchmark mode doesn't match with JSON:",
        return_error,
    )
    return_error = check_json(
        output["context"]["encoding"]["default"]["encoding"],
        arguments["--encoding"].replace("'", ""),
        "Encoding doesn't match with JSON:",
        return_error,
    )
    return_error = check_json(
        output["context"]["encoding"]["default"]["compression"],
        arguments["--compression"].replace("'", ""),
        "Compression doesn't match with JSON:",
        return_error,
    )
    return_error = check_json(
        str(output["context"]["using_scheduler"]).lower(),
        arguments["--scheduler"],
        "Scheduler doesn't match with JSON:",
        return_error,
    )
    return_error = check_json(output["context"]["clients"],
                              int(arguments["--clients"]),
                              "Client count doesn't match with JSON:",
                              return_error)

    CompareBenchmarkScriptTest(compare_benchmarks_path, output_filename,
                               output_filename).run()

    # Run TPC-H and validate its output using pexpect and check if all queries were successfully verified with sqlite.
    arguments = {}
    arguments["--scale"] = ".01"
    arguments["--chunk_size"] = "10000"
    arguments["--queries"] = "'2,4,6'"
    arguments["--time"] = "10"
    arguments["--runs"] = "100"
    arguments["--warmup"] = "10"
    arguments["--encoding"] = "'LZ4'"
    arguments["--compression"] = "'SIMD-BP128'"
    arguments["--indexes"] = "false"
    arguments["--scheduler"] = "true"
    arguments["--clients"] = "4"
    arguments["--verify"] = "true"
    arguments["--dont_cache_binary_tables"] = "true"

    benchmark = run_benchmark(build_dir, arguments, "hyriseBenchmarkTPCH",
                              True)

    benchmark.expect_exact(
        "Running in multi-threaded mode using all available cores")
    benchmark.expect_exact(
        "4 simulated clients are scheduling items in parallel")
    benchmark.expect_exact("Running benchmark in 'Ordered' mode")
    benchmark.expect_exact("Encoding is 'LZ4'")
    benchmark.expect_exact("Chunk size is 10000")
    benchmark.expect_exact("Max runs per item is 100")
    benchmark.expect_exact("Max duration per item is 10 seconds")
    benchmark.expect_exact("Warmup duration per item is 10 seconds")
    benchmark.expect_exact("Benchmarking Queries: [ 2, 4, 6 ]")
    benchmark.expect_exact("TPC-H scale factor is 0.01")
    benchmark.expect_exact("Using prepared statements: no")
    benchmark.expect_exact("Multi-threaded Topology:")

    close_benchmark(benchmark)
    check_exit_status(benchmark)

    # Run TPC-H and create query plan visualizations. Test that pruning works end-to-end, that is from the command line
    # parameter all the way to the visualizer.
    arguments = {}
    arguments["--scale"] = ".01"
    arguments["--chunk_size"] = "10000"
    arguments["--queries"] = "'6'"
    arguments["--runs"] = "1"
    arguments["--visualize"] = "true"

    benchmark = run_benchmark(build_dir, arguments, "hyriseBenchmarkTPCH",
                              True)

    benchmark.expect_exact(
        "Visualizing the plans into SVG files. This will make the performance numbers invalid."
    )
    benchmark.expect_exact("Chunk size is 10000")
    benchmark.expect_exact("Benchmarking Queries: [ 6 ]")

    close_benchmark(benchmark)
    check_exit_status(benchmark)

    visualization_file = "TPC-H_06-PQP.svg"
    if not os.path.isfile(visualization_file):
        print("ERROR: Cannot find visualization file " + visualization_file)
        sys.exit(1)
    with open(visualization_file) as f:
        # Check whether the (a) the GetTable node exists and (b) the chunk count is correct for the given scale factor
        if "/7 chunk(s)" not in f.read():
            print(
                "ERROR: Did not find expected pruning information in the visualization file"
            )
            sys.exit(1)

    if return_error:
        sys.exit(1)

    # The next two TPC-H runs are executed to create output files with which we check the output of the
    # compare_benchmark.py script.
    output_filename_1 = f"{build_dir}/tpch_output_1.json"
    output_filename_2 = f"{build_dir}/tpch_output_2.json"

    arguments = {}
    arguments["--scale"] = ".01"
    arguments["--chunk_size"] = "10000"
    arguments["--queries"] = "'2,6,15'"
    arguments["--runs"] = "10"
    arguments["--output"] = output_filename_1
    arguments["--dont_cache_binary_tables"] = "true"

    benchmark = run_benchmark(build_dir, arguments, "hyriseBenchmarkTPCH",
                              True)
    benchmark.expect_exact(
        f"Writing benchmark results to '{output_filename_1}'")

    close_benchmark(benchmark)
    check_exit_status(benchmark)

    arguments["--output"] = output_filename_2
    arguments["--scheduler"] = True
    benchmark = run_benchmark(build_dir, arguments, "hyriseBenchmarkTPCH",
                              True)
    benchmark.expect_exact(
        f"Writing benchmark results to '{output_filename_2}'")

    close_benchmark(benchmark)
    check_exit_status(benchmark)

    CompareBenchmarkScriptTest(compare_benchmarks_path, output_filename_1,
                               output_filename_2).run()
def main():
    build_dir = initialize()

    return_error = False

    arguments = {}
    arguments["--table_path"] = "'resources/test_data/imdb_sample/'"
    arguments["--queries"] = "'21c,22b,23c,24a'"
    arguments["--time"] = "10"
    arguments["--runs"] = "100"
    arguments["--output"] = "'json_output.txt'"
    arguments["--mode"] = "'Shuffled'"
    arguments["--encoding"] = "'Unencoded'"
    arguments["--clients"] = "1"
    arguments["--scheduler"] = "false"

    # Binary tables would be written into the table_path. In CI, this path is shared by different targets that are
    # potentially executed concurrently. This sometimes led to issues with corrupted binary files.
    arguments["--dont_cache_binary_tables"] = "true"

    os.system(f'rm -rf {arguments["--table_path"]}/*.bin')

    benchmark = run_benchmark(build_dir, arguments, "hyriseBenchmarkJoinOrder", True)

    benchmark.expect_exact("Writing benchmark results to 'json_output.txt'")
    benchmark.expect_exact("Running in single-threaded mode")
    benchmark.expect_exact("1 simulated client is scheduling items")
    benchmark.expect_exact("Running benchmark in 'Shuffled' mode")
    benchmark.expect_exact("Encoding is 'Unencoded'")
    benchmark.expect_exact("Max runs per item is 100")
    benchmark.expect_exact("Max duration per item is 10 seconds")
    benchmark.expect_exact("No warmup runs are performed")
    benchmark.expect_exact("Not caching tables as binary files")
    benchmark.expect_exact("Retrieving the IMDB dataset.")
    benchmark.expect_exact("IMDB setup already complete, no setup action required")
    benchmark.expect_exact("Benchmarking queries from third_party/join-order-benchmark")
    benchmark.expect_exact("Running on tables from resources/test_data/imdb_sample/")
    benchmark.expect_exact("Running subset of queries: 21c,22b,23c,24a")
    benchmark.expect_exact("-> Executed")

    close_benchmark(benchmark)
    check_exit_status(benchmark)

    if not os.path.isfile(arguments["--output"].replace("'", "")):
        print("ERROR: Cannot find output file " + arguments["--output"])
        return_error = True

    with open(arguments["--output"].replace("'", "")) as f:
        output = json.load(f)

    return_error = check_json(not output["summary"]["table_size_in_bytes"], 0, "Table size is zero.", return_error)
    for i in range(0, 4):
        return_error = check_json(
            output["benchmarks"][i]["name"],
            arguments["--queries"].replace("'", "").split(",")[i],
            "Query doesn't match with JSON:",
            return_error,
        )
    return_error = check_json(
        output["context"]["max_duration"],
        int(arguments["--time"]) * 1e9,
        "Max duration doesn't match with JSON:",
        return_error,
    )
    return_error = check_json(
        output["context"]["max_runs"], int(arguments["--runs"]), "Max runs don't match with JSON:", return_error
    )
    return_error = check_json(
        output["context"]["benchmark_mode"],
        arguments["--mode"].replace("'", ""),
        "Benchmark mode doesn't match with JSON:",
        return_error,
    )
    return_error = check_json(
        output["context"]["encoding"]["default"]["encoding"],
        arguments["--encoding"].replace("'", ""),
        "Encoding doesn't match with JSON:",
        return_error,
    )
    return_error = check_json(
        str(output["context"]["using_scheduler"]).lower(),
        arguments["--scheduler"],
        "Scheduler doesn't match with JSON:",
        return_error,
    )
    return_error = check_json(
        output["context"]["clients"], int(arguments["--clients"]), "Client count doesn't match with JSON:", return_error
    )

    os.system(f'rm -rf {arguments["--table_path"]}/*.bin')

    arguments = {}
    arguments["--table_path"] = "'resources/test_data/imdb_sample/'"
    arguments["--time"] = "10"
    arguments["--runs"] = "2"
    arguments["--warmup"] = "2"
    arguments["--encoding"] = "'LZ4'"
    arguments["--compression"] = "'SIMD-BP128'"
    arguments["--scheduler"] = "true"
    arguments["--clients"] = "4"
    arguments["--chunk_size"] = "100000"
    arguments["--verify"] = "true"
    arguments["--dont_cache_binary_tables"] = "true"

    benchmark = run_benchmark(build_dir, arguments, "hyriseBenchmarkJoinOrder", True)

    benchmark.expect_exact("Running in multi-threaded mode using all available cores")
    benchmark.expect_exact("4 simulated clients are scheduling items in parallel")
    benchmark.expect_exact("Running benchmark in 'Ordered' mode")
    benchmark.expect_exact("Encoding is 'LZ4'")
    benchmark.expect_exact("Chunk size is 100000")
    benchmark.expect_exact("Max runs per item is 2")
    benchmark.expect_exact("Max duration per item is 10 seconds")
    benchmark.expect_exact("Warmup duration per item is 2 seconds")
    benchmark.expect_exact(
        "Automatically verifying results with SQLite. This will make the performance numbers invalid."
    )
    benchmark.expect_exact("Benchmarking queries from third_party/join-order-benchmark")
    benchmark.expect_exact("Running on tables from resources/test_data/imdb_sample/")
    benchmark.expect_exact("Multi-threaded Topology:")
    benchmark.expect_exact("- Warming up for 10a")
    benchmark.expect_exact("- Benchmarking 10a")

    close_benchmark(benchmark)
    check_exit_status(benchmark)

    if return_error:
        sys.exit(1)
def main():
    build_dir = initialize()
    compare_benchmarks_path = f"{build_dir}/../scripts/compare_benchmarks.py"
    output_filename_1 = f"{build_dir}/file_based_output_1.json"

    return_error = False

    arguments = {}
    arguments["--table_path"] = "'resources/test_data/tbl/file_based/'"
    arguments["--query_path"] = "'resources/test_data/queries/file_based/'"
    arguments["--queries"] = "'select_statement'"
    arguments["--time"] = "10"
    arguments["--runs"] = "100"
    arguments["--output"] = output_filename_1
    arguments["--mode"] = "'Shuffled'"
    arguments["--encoding"] = "'Unencoded'"
    arguments["--scheduler"] = "false"
    arguments["--clients"] = "1"

    # Binary tables would be written into the table_path. In CI, this path is shared by different targets that are
    # potentially executed concurrently. This sometimes led to issues with corrupted binary files.
    arguments["--dont_cache_binary_tables"] = "true"

    os.system(f'rm -rf {arguments["--table_path"]}/*.bin')

    benchmark = run_benchmark(build_dir, arguments, "hyriseBenchmarkFileBased", True)

    benchmark.expect_exact(f"Writing benchmark results to '{output_filename_1}'")
    benchmark.expect_exact("Running in single-threaded mode")
    benchmark.expect_exact("1 simulated client is scheduling items")
    benchmark.expect_exact("Running benchmark in 'Shuffled' mode")
    benchmark.expect_exact("Encoding is 'Unencoded'")
    benchmark.expect_exact("Max runs per item is 100")
    benchmark.expect_exact("Max duration per item is 10 seconds")
    benchmark.expect_exact("No warmup runs are performed")
    benchmark.expect_exact("Not caching tables as binary files")
    benchmark.expect_exact("Benchmarking queries from resources/test_data/queries/file_based/")
    benchmark.expect_exact("Running on tables from resources/test_data/tbl/file_based/")
    benchmark.expect_exact("Running subset of queries: select_statement")
    benchmark.expect_exact("-> Executed")

    close_benchmark(benchmark)
    check_exit_status(benchmark)

    output_filename_2 = f"{build_dir}/file_based_output_1.json"

    # Second run for compare_benchmark.py test
    arguments["--output"] = output_filename_2
    benchmark = run_benchmark(build_dir, arguments, "hyriseBenchmarkFileBased", True)
    benchmark.expect_exact(f"Writing benchmark results to '{output_filename_2}'")

    close_benchmark(benchmark)
    check_exit_status(benchmark)

    CompareBenchmarkScriptTest(compare_benchmarks_path, output_filename_1, output_filename_2).run()

    if not os.path.isfile(arguments["--output"].replace("'", "")):
        print("ERROR: Cannot find output file " + arguments["--output"])
        return_error = True

    with open(arguments["--output"].replace("'", "")) as f:
        output = json.load(f)

    return_error = check_json(not output["summary"]["table_size_in_bytes"], 0, "Table size is zero.", return_error)
    return_error = check_json(
        output["benchmarks"][0]["name"],
        arguments["--queries"].replace("'", "").split(",")[0],
        "Query doesn't match with JSON:",
        return_error,
    )
    return_error = check_json(
        output["context"]["max_duration"],
        int(arguments["--time"]) * 1e9,
        "Max duration doesn't match with JSON:",
        return_error,
    )
    return_error = check_json(
        output["context"]["max_runs"], int(arguments["--runs"]), "Max runs don't match with JSON:", return_error
    )
    return_error = check_json(
        output["context"]["benchmark_mode"],
        arguments["--mode"].replace("'", ""),
        "Benchmark mode doesn't match with JSON:",
        return_error,
    )
    return_error = check_json(
        output["context"]["encoding"]["default"]["encoding"],
        arguments["--encoding"].replace("'", ""),
        "Encoding doesn't match with JSON:",
        return_error,
    )
    return_error = check_json(
        str(output["context"]["using_scheduler"]).lower(),
        arguments["--scheduler"],
        "Scheduler doesn't match with JSON:",
        return_error,
    )
    return_error = check_json(
        output["context"]["clients"], int(arguments["--clients"]), "Client count doesn't match with JSON:", return_error
    )

    arguments = {}
    arguments["--table_path"] = "'resources/test_data/tbl/file_based/'"
    arguments["--query_path"] = "'resources/test_data/queries/file_based/'"
    arguments["--queries"] = "'select_statement'"
    arguments["--time"] = "10"
    arguments["--runs"] = "100"
    arguments["--warmup"] = "5"
    arguments["--encoding"] = "'LZ4'"
    arguments["--compression"] = "'SIMD-BP128'"
    arguments["--scheduler"] = "true"
    arguments["--clients"] = "4"
    arguments["--verify"] = "true"
    arguments["--dont_cache_binary_tables"] = "true"

    benchmark = run_benchmark(build_dir, arguments, "hyriseBenchmarkFileBased", True)

    benchmark.expect_exact("Running in multi-threaded mode using all available cores")
    benchmark.expect_exact("4 simulated clients are scheduling items in parallel")
    benchmark.expect_exact("Running benchmark in 'Ordered' mode")
    benchmark.expect_exact("Encoding is 'LZ4'")
    benchmark.expect_exact("Max runs per item is 100")
    benchmark.expect_exact("Max duration per item is 10 seconds")
    benchmark.expect_exact("Warmup duration per item is 5 seconds")
    benchmark.expect_exact(
        "Automatically verifying results with SQLite. This will make the performance numbers invalid."
    )
    benchmark.expect_exact("Benchmarking queries from resources/test_data/queries/file_based/")
    benchmark.expect_exact("Running on tables from resources/test_data/tbl/file_based/")
    benchmark.expect_exact("Running subset of queries: select_statement")
    benchmark.expect_exact("Multi-threaded Topology:")

    close_benchmark(benchmark)
    check_exit_status(benchmark)

    if return_error:
        sys.exit(1)
def main():
    build_dir = initialize()
    compare_benchmarks_path = f"{build_dir}/../scripts/compare_benchmarks.py"
    output_filename_1 = f"{build_dir}/tpcc_output_1.json"

    # Not explicitly setting all parameters and not testing all lines of the output. Many are tested in the TPCH test
    # and we want to avoid duplication. First TPC-C execution is single-threaded, second is multi-threaded. The third
    # execution is done to run the compare_benchmark script tests.
    arguments = {}
    arguments["--scale"] = "2"
    arguments["--time"] = "30"
    arguments["--verify"] = "true"

    benchmark = run_benchmark(build_dir, arguments, "hyriseBenchmarkTPCC",
                              True)

    benchmark.expect_exact("Running benchmark in 'Shuffled' mode")
    benchmark.expect_exact("TPC-C scale factor (number of warehouses) is 2")
    benchmark.expect_exact("Consistency checks passed")

    close_benchmark(benchmark)
    check_exit_status(benchmark)

    arguments = {}
    arguments["--scale"] = "1"
    arguments["--time"] = "60"
    arguments["--consistency_checks"] = "true"
    arguments["--scheduler"] = "true"
    arguments["--clients"] = "10"
    arguments["--output"] = output_filename_1

    benchmark = run_benchmark(build_dir, arguments, "hyriseBenchmarkTPCC",
                              True)

    benchmark.expect_exact(
        f"Writing benchmark results to '{output_filename_1}'")
    benchmark.expect_exact(
        "Running in multi-threaded mode using all available cores")
    benchmark.expect_exact(
        "10 simulated clients are scheduling items in parallel")
    benchmark.expect_exact("Running benchmark in 'Shuffled' mode")
    benchmark.expect_exact("TPC-C scale factor (number of warehouses) is 1")
    benchmark.expect_exact("Results for Delivery")
    benchmark.expect_exact("-> Executed")
    benchmark.expect_exact("Results for New-Order")
    benchmark.expect_exact("-> Executed")
    benchmark.expect_exact("Results for Order-Status")
    benchmark.expect_exact("-> Executed")
    benchmark.expect_exact("Results for Payment")
    benchmark.expect_exact("-> Executed")
    benchmark.expect_exact("Results for Stock-Level")
    benchmark.expect_exact("-> Executed")
    benchmark.expect_exact("Consistency checks passed")

    close_benchmark(benchmark)
    check_exit_status(benchmark)

    output_filename_2 = f"{build_dir}/tpcc_output_2.json"

    arguments = {}
    arguments["--scale"] = "1"
    arguments["--time"] = "30"
    arguments["--clients"] = "1"
    arguments["--output"] = output_filename_2

    benchmark = run_benchmark(build_dir, arguments, "hyriseBenchmarkTPCC",
                              True)
    benchmark.expect_exact(
        f"Writing benchmark results to '{output_filename_2}'")

    close_benchmark(benchmark)
    check_exit_status(benchmark)

    CompareBenchmarkScriptTest(compare_benchmarks_path, output_filename_1,
                               output_filename_2).run()