def table_dump(config, artifact_processor): """ Create a human readable table for the output comparison between the current test results and the historical results (if there are any) """ text_table = PrettyTable() text_table.field_names = FIELD_NAMES text_table.align['suite'] = 'l' text_table.align['test'] = 'l' for bench_name in sorted(config.benchmarks): filename = "{}.json".format(bench_name) gbench_run_results = GBenchRunResult.from_benchmark_file(filename) for key in sorted(gbench_run_results.benchmarks.keys()): result = gbench_run_results.benchmarks.get(key) LOG.debug("%s Result:\n%s", bench_name, result) comparison = artifact_processor.get_comparison( bench_name, result, config.lax_tolerance) if comparison.get('pass') == 'FAIL': ret = 1 row_values = list( map( lambda field_name: formatFields( field_name, comparison.get(field_name, 0)), text_table.field_names)) text_table.add_row(row_values) print("") print(text_table) print("")
def validate_result(self): """read the results file""" # Make sure the file exists before we try to open it. # If it's not there, we'll dump out the contents of the directory to make it # easier to determine whether or not we are crazy when running Jenkins. if not os.path.exists(self.test_histogram_path): LOG.error("=" * 50) LOG.error("Directory Contents: {}".format( os.path.dirname(self.test_histogram_path))) LOG.error("\n".join( os.listdir(os.path.dirname(self.test_histogram_path)))) LOG.error("=" * 50) msg = "Unable to find OLTP-Bench result file '{}'".format( self.test_histogram_path) raise RuntimeError(msg) with open(self.test_histogram_path) as oltp_result_file: test_result = json.load(oltp_result_file) unexpected_result = test_result.get("unexpected", {}).get("HISTOGRAM") if unexpected_result and unexpected_result.keys(): for test in unexpected_result.keys(): if (unexpected_result[test] != 0): LOG.error(str(unexpected_result)) sys.exit(ErrorCode.ERROR) else: raise RuntimeError(str(unexpected_result))
def _execute_benchmark(self, cmd): """ Execute the microbenchmark command provided. Parameters ---------- cmd : str The command to be executed in order to run the microbenchmark. Returns ------- ret_code : int The return value from the benchmark process. 0 if successful. err : Error The error that occured. None if successful. """ LOG.debug( f'Executing command [num_threads={self.config.num_threads}]: {cmd}' ) try: output = subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=True) pretty_format_json = json.dumps(json.loads( output.decode('utf8').replace("'", '"')), indent=4) LOG.debug(f'OUTPUT: {pretty_format_json}') return 0, None except subprocess.CalledProcessError as err: print(err) return err.returncode, err except Exception as err: return 1, err
def collect_artifact_stats(collectors): """ Takes an array of collector classes, executes the collectors and combines the result. Args: collectors - An array of BaseArtifactStatsCollector sub classes Returns: exit_code - (int)The exit code of the collection task metrics - (dict)The combined metrics from all the collectors """ aggregated_metrics = {} exit_code = 0 try: for collector in collectors: collector_instance = collector(is_debug=args.debug) LOG.info(f'Starting {collector_instance.__class__.__name__} collection') try: exit_code, results = run_collector(collector_instance) check_for_conflicting_metric_keys(aggregated_metrics, results) aggregated_metrics.update(results) except Exception as err: exit_code = 1 if exit_code == 0 else exit_code LOG.error(err) collector_instance.teardown() if exit_code: LOG.error(f'{collector_instance.__class__.__name__} failed. Stopping all artifact stats collection') break LOG.info(f'{collector_instance.__class__.__name__} finished successfully') except Exception as err: exit_code = 1 if exit_code == 0 else exit_code LOG.error(err) return exit_code, aggregated_metrics
def run_command(command, error_msg="", stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=None, printable=True, silent_start=False): """ General purpose wrapper for running a subprocess """ if not silent_start: LOG.info(f'Running subproccess: {command}') p = subprocess.Popen(shlex.split(command), stdout=stdout, stderr=stderr, cwd=cwd) while p.poll() is None: if printable: if stdout == subprocess.PIPE: out = p.stdout.readline() if out: LOG.info(out.decode("utf-8").rstrip("\n")) rc = p.poll() return rc, p.stdout, p.stderr
def build_oltp(self): for command in constants.OLTP_ANT_COMMANDS: error_msg = "Error: unable to run \"{}\"".format(command) rc, stdout, stderr = run_command(command, error_msg) if rc != ErrorCode.SUCCESS: LOG.error(stderr) sys.exit(rc)
def report_oltpbench_result(env, server_data, results_dir, username, password, mem_metrics, query_mode='simple'): """ Parse and format the data from server_data and the results_dir into a JSON body and send those results to the performance storage service""" LOG.debug("parsing OLTPBench results and assembling request body.") metadata, timestamp, benchmark_type, parameters, metrics = parse_oltpbench_data( results_dir) add_mem_metrics(metrics, mem_metrics) parameters['query_mode'] = query_mode parameters['max_connection_threads'] = server_data.get( 'max_connection_threads') metadata['environment']['wal_device'] = server_data.get('wal_device') result = { 'metadata': metadata, 'timestamp': timestamp, 'type': benchmark_type, 'parameters': parameters, 'metrics': metrics } send_result(env, '/oltpbench/', username, password, result)
def execute(self, sql, autocommit=True, expect_result=True, user=DEFAULT_DB_USER): """ Opens up a connection at the DB, and execute a SQL. WARNING: this is a really simple (and barely thought-through) client execution interface. Users might need to extend this with more arguments or error checking. Only SET SQl command has been tested. :param sql: SQL to be execute :param autocommit: If the connection should be set to autocommit. For SQL that should not run in transactions, this should be set to True, e.g. SET XXX :param expect_result: True if results rows are fetched and returned :param user: User of this connection :return: None if error or not expecting results, rows fetched when expect_result is True """ try: with psql.connect(port=self.db_port, host=self.db_host, user=user) as conn: conn.set_session(autocommit=autocommit) with conn.cursor() as cursor: cursor.execute(sql) if expect_result: rows = cursor.fetchall() return rows return None except Exception as e: LOG.error(f"Executing SQL = {sql} failed: ") # Re-raise this raise e
def kill_processes_listening_on_db_port(db_port): """Kills any processes that are listening on the db_port""" for other_pid in check_port(db_port): LOG.info( "Killing existing server instance listening on port {} [PID={}]". format(db_port, other_pid)) os.kill(other_pid, signal.SIGKILL)
def convert(self, output_file): """ Write results to a JUnit compatible xml file """ LOG.debug("Converting Google Benchmark {NAME} to JUNIT file {JUNIT_FILE}".format( NAME=self.name, JUNIT_FILE=output_file )) tree = ElementTree.ElementTree() test_suite_el = ElementTree.Element("testsuite") tree._setroot(test_suite_el) # add attributes to root, testsuite element for el_name in ["errors", "failures", "skipped", "tests", "name"]: test_suite_el.set(el_name, getattr(self, el_name)) # add test results for test in self.testcases: test_el = ElementTree.SubElement(test_suite_el, "testcases") test_el.set("classname", test.suite_name) test_el.set("name", test.name) tree.write(output_file, xml_declaration=True, encoding='utf8') return
def clean_oltp(self): rc, stdout, stderr = run_command(constants.OLTPBENCH_GIT_CLEAN_COMMAND, "Error: unable to clean OLTP repo") if rc != ErrorCode.SUCCESS: LOG.info(stdout.read()) LOG.error(stderr.read()) sys.exit(rc)
def wait_for_db(self): """ Wait for the db server to come up """ # Check that PID is running check_db_process_exists(self.db_process.pid) # Wait a bit before checking if we can connect to give the system time to setup time.sleep(constants.DB_START_WAIT) # flag to check if the db is running is_db_running = False attempt_number = 0 # Keep trying to connect to the DBMS until we run out of attempts or we succeeed for i in range(constants.DB_CONNECT_ATTEMPTS): attempt_number = i + 1 is_db_running = check_db_running(self.db_host, self.db_port) if is_db_running: break else: if attempt_number % 20 == 0: LOG.error( "Failed to connect to DB server [Attempt #{}/{}]". format(attempt_number, constants.DB_CONNECT_ATTEMPTS)) # os.system('ps aux | grep terrier | grep {}'.format(self.db_process.pid)) # os.system('lsof -i :15721') traceback.print_exc(file=sys.stdout) time.sleep(constants.DB_CONNECT_SLEEP) handle_db_connection_status(is_db_running, attempt_number, db_pid=self.db_process.pid) return
def print_db_logs(self): """ Print out the remaining DB logs """ LOG.info("************ DB Logs Start ************") print_pipe(self.db_instance.db_process) LOG.info("************* DB Logs End *************")
def run_single_benchmark(self, bench_name, enable_perf): """ Execute a single benchmark. The results will be stored in a JSON file and an XML file. Parameters ---------- bench_name : str The name of the benchmark to run. enable_perf : bool Whether perf should be enabled for all the benchmarks. Returns ------- ret_val : int The return value from the benchmark process. 0 if successful. """ output_file = "{}.json".format(bench_name) cmd = self._build_benchmark_cmd(bench_name, output_file, enable_perf) # Environment Variables os.environ["TERRIER_BENCHMARK_THREADS"] = str( self.config.num_threads) # has to be a str os.environ["TERRIER_BENCHMARK_LOGFILE_PATH"] = self.config.logfile_path ret_val, err = self._execute_benchmark(cmd) if ret_val == 0: convert_result_xml(bench_name, output_file) else: LOG.error( f'Unexpected failure of {bench_name} [ret_val={ret_val}]') LOG.error(err) # return the process exit code return ret_val
def download_oltp(self): rc, stdout, stderr = run_command( constants.OLTP_GIT_COMMAND, "Error: unable to git clone OLTP source code") if rc != ErrorCode.SUCCESS: LOG.error(stderr) sys.exit(rc)
def print_output(self, filename): """ Print out contents of a file """ fd = open(filename) lines = fd.readlines() for line in lines: LOG.info(line.strip()) fd.close() return
def print_file(filename): """ Print out contents of a file """ try: with open(filename) as file: lines = file.readlines() for line in lines: LOG.info(line.strip()) except FileNotFoundError: LOG.error("file not exists: '{}'".format(filename))
def handle_test_suite_result(self, test_suite_result): """ Determine what to do based on the result. If continue_on_error is True then it will mask any errors and return success. Otherwise, it will return the result of the test suite. """ if test_suite_result is None or test_suite_result != constants.ErrorCode.SUCCESS: LOG.error("The test suite failed") return test_suite_result
def gen_oltp_trace(tpcc_weight: str, tpcc_rates: List[int], pattern_iter: int) -> bool: """ Generates the trace by running OLTP TPCC benchmark on the built database :param tpcc_weight: Weight for the TPCC workload :param tpcc_rates: Arrival rates for each phase in a pattern :param pattern_iter: Number of patterns :return: True when data generation succeeds """ # Remove the old query_trace/query_text.csv Path(DEFAULT_QUERY_TRACE_FILE).unlink(missing_ok=True) # Server is running when this returns oltp_server = TestOLTPBench(DEFAULT_OLTP_SERVER_ARGS) db_server = oltp_server.db_instance db_server.run_db() # Download the OLTP repo and build it oltp_server.run_pre_suite() # Load the workload pattern - based on the tpcc.json in # testing/oltpbench/config test_case_config = DEFAULT_OLTP_TEST_CASE test_case_config["weights"] = tpcc_weight test_case = TestCaseOLTPBench(test_case_config) # Prep the test case build the result dir test_case.run_pre_test() rates = tpcc_rates * pattern_iter config_forecast_data(test_case.xml_config, rates) # Turn on query trace metrics tracing db_server.execute("SET query_trace_metrics_enable='true'", expect_result=False) # Run the actual test ret_val, _, stderr = run_command(test_case.test_command, test_case.test_error_msg, cwd=test_case.test_command_cwd) if ret_val != ErrorCode.SUCCESS: LOG.error(stderr) return False # Clean up, disconnect the DB db_server.stop_db() db_server.delete_wal() if not Path(DEFAULT_QUERY_TRACE_FILE).exists(): LOG.error( f"Missing {DEFAULT_QUERY_TRACE_FILE} at CWD after running OLTP TPCC" ) return False return True
def read_gbench_results(gbench_results_file): """ Based on a test result file get a list of all the GBenchTestResult objects """ LOG.debug("Reading results file {}".format(gbench_results_file)) testcases = [] with open(gbench_results_file) as gbench_result_file: gbench_data = json.load(gbench_result_file) for benchmark in gbench_data.get('benchmarks'): test_result = GBenchTestResult(benchmark) testcases.append(test_result) return testcases
def start_db(db_path, db_output_file): """ Starts the DB process based on the DB path and write stdout and sterr to the db_output_file. This returns the db output file descriptor and the db_process created by Popen. """ db_output_fd = open(db_output_file, "w+") LOG.info("Server start: {PATH}".format(PATH=db_path)) db_process = subprocess.Popen(shlex.split(db_path), stdout=db_output_fd, stderr=db_output_fd) return db_output_fd, db_process
def send_result(env, path, username, password, result): """ Send the results to the performance storage service. If the service responds with an error code this will raise an error. """ LOG.debug("Sending request to {PATH}".format(PATH=path)) base_url = get_base_url(env) try: result = requests.post(base_url + path, json=result, auth=(username, password)) result.raise_for_status() except Exception as err: LOG.error(err.response.text) raise
def generate_numa_command(): """ Return the command line string to execute numactl """ # use all the cpus from the highest numbered numa node nodes = subprocess.check_output( "numactl --hardware | grep 'available: ' | cut -d' ' -f2", shell=True) if not nodes: return "" highest_cpu_node = int(nodes) - 1 if highest_cpu_node > 0: LOG.debug("Number of NUMA Nodes = {}".format(highest_cpu_node)) LOG.debug("Enabling NUMA support") return "numactl --cpunodebind={} --preferred={}".format( highest_cpu_node, highest_cpu_node)
def get_build_path(build_type): """ Get the path to the binary """ path_list = [ ("standard", "build"), ("CLion", "cmake-build-{}".format(build_type)), ] for _, path in path_list: db_bin_path = os.path.join(DIR_REPO, path, "bin", DEFAULT_DB_BIN) LOG.debug(f'Tring to find build path in {db_bin_path}') if os.path.exists(db_bin_path): return db_bin_path raise RuntimeError(f'No DB binary found in {path_list}')
def copy_benchmark_result(bench_name, build_dir): """ Copy the benchmark result file. This is used when running in local mode. Parameters ---------- bench_name : str The name of the microbenchmark. build_dir : str The path to the build directory. """ result_file = f'{bench_name}.json' shutil.copy(result_file, build_dir) LOG.debug(f'Copything result file {result_file} into {build_dir}')
def handle_db_connection_status(is_db_running, attempt_number, db_pid): """ Based on whether the DBMS is running and whether the db_pid exists this will print the appropriate message or throw an error. """ if not is_db_running: LOG.error( "Failed to connect to DB server [Attempt #{ATTEMPT}/{TOTAL_ATTEMPTS}]" .format(ATTEMPT=attempt_number, TOTAL_ATTEMPTS=constants.DB_CONNECT_ATTEMPTS)) check_db_process_exists(db_pid) raise RuntimeError('Unable to connect to DBMS.') else: LOG.info("Connected to server in {} seconds [PID={}]".format( attempt_number * constants.DB_CONNECT_SLEEP, db_pid))
def _load_data(self) -> np.ndarray: """ Load data from csv :return: Loaded 2D numpy array of [query_id, timestamp] """ LOG.info(f"Loading data from {self._query_trace_file}") # Load data from the files with open(self._query_trace_file, newline='') as csvfile: reader = csv.DictReader(csvfile) data = np.array([[int(r['query_id']), int(r[' timestamp'])] for r in reader]) if len(data) == 0: raise ValueError("Empty trace file") return data
def generate_perf_command(bench_name): """ Create the command line string to execute perf. Parameters ---------- bench_name : str The name of the benchmark. Returns ------- perf_cmd : str The command to execute pref data collection. """ perf_output_file = f'{bench_name}.perf' LOG.debug(f'Enabling perf data collection [output={perf_output_file}]') return f'perf record --output={perf_output_file}'
def create_and_load_db(self): """ Create the database and load the data before the actual test execution. """ cmd = "{BIN} -c {XML} -b {BENCHMARK} --create={CREATE} --load={LOAD}".format( BIN=constants.OLTPBENCH_DEFAULT_BIN, XML=self.xml_config, BENCHMARK=self.benchmark, CREATE=self.db_create, LOAD=self.db_load) error_msg = "Error: unable to create and load the database" rc, stdout, stderr = run_command(cmd, error_msg=error_msg, cwd=self.test_command_cwd) if rc != ErrorCode.SUCCESS: LOG.error(stderr) raise RuntimeError(error_msg)
def report_microbenchmark_result(env, timestamp, config, artifact_processor_comparison): """ Parse and format the data from the microbenchmark tests into a JSON body and send those to the performance storage service""" LOG.debug("parsing OLTPBench results and assembling request body.") metadata, test_suite, test_name, metrics = parse_microbenchmark_data(artifact_processor_comparison) parameters = parse_parameters(config) metadata['environment']['wal_device'] = parse_wal_device(config) result = { 'metadata': metadata, 'timestamp': int(timestamp.timestamp() * 1000), # convert to milliseconds 'test_suite': test_suite, 'test_name': test_name, 'parameters': parameters, 'metrics': metrics } send_result(env, '/microbenchmark/', config.publish_results_username, config.publish_results_password, result)