def clean_oltp(self): rc, stdout, stderr = run_command(constants.OLTPBENCH_GIT_CLEAN_COMMAND, "Error: unable to clean OLTP repo") if rc != ErrorCode.SUCCESS: LOG.info(stdout.read()) LOG.error(stderr.read()) sys.exit(rc)
def kill_processes_listening_on_db_port(db_port): """Kills any processes that are listening on the db_port""" for other_pid in check_port(db_port): LOG.info( "Killing existing server instance listening on port {} [PID={}]". format(db_port, other_pid)) os.kill(other_pid, signal.SIGKILL)
def print_db_logs(self): """ Print out the remaining DB logs """ LOG.info("************ DB Logs Start ************") print_pipe(self.db_instance.db_process) LOG.info("************* DB Logs End *************")
def run_benchmarks(self, enable_perf): """ Return 0 if all benchmarks succeed, otherwise return the error code code from the last benchmark to fail """ if not len(self.config.benchmarks): LOG.error("Invlid benchmarks were specified to execute. \ Try not specifying a benchmark and it will execute all.") return 0 ret_val = 0 benchmark_fail_count = 0 # iterate over all benchmarks and run them for benchmark_count, bench_name in enumerate(self.config.benchmarks): LOG.info("Running '{}' with {} threads [{}/{}]".format( bench_name, self.config.num_threads, benchmark_count, len(self.config.benchmarks))) benchmark_ret_val = self.run_single_benchmark( bench_name, enable_perf) if benchmark_ret_val: ret_val = benchmark_ret_val benchmark_fail_count += 1 LOG.info("{PASSED}/{TOTAL} benchmarks passed".format( PASSED=len(self.config.benchmarks) - benchmark_fail_count, TOTAL=len(self.config.benchmarks))) return ret_val
def run_command(command, error_msg="", stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=None, printable=True, silent_start=False): """ General purpose wrapper for running a subprocess """ if not silent_start: LOG.info(f'Running subproccess: {command}') p = subprocess.Popen(shlex.split(command), stdout=stdout, stderr=stderr, cwd=cwd) while p.poll() is None: if printable: if stdout == subprocess.PIPE: out = p.stdout.readline() if out: LOG.info(out.decode("utf-8").rstrip("\n")) rc = p.poll() return rc, p.stdout, p.stderr
def run_db(self): """ Start the DB server """ # Allow ourselves to try to restart the DBMS multiple times for attempt in range(constants.DB_START_ATTEMPTS): # Kill any other terrier processes that our listening on our target port for other_pid in check_port(self.db_port): LOG.info( "Killing existing server instance listening on port {} [PID={}]" .format(self.db_port, other_pid)) os.kill(other_pid, signal.SIGKILL) ## FOR self.db_output_fd = open(self.db_output_file, "w+") self.db_process = subprocess.Popen(self.db_path, stdout=self.db_output_fd, stderr=self.db_output_fd) try: self.wait_for_db() break except: self.stop_db() LOG.error("+" * 100) LOG.error("DATABASE OUTPUT") self.print_output(self.db_output_file) if attempt + 1 == constants.DB_START_ATTEMPTS: raise traceback.print_exc(file=sys.stdout) pass ## FOR return
def download_oltp(self): rc, stdout, stderr = run_command( constants.OLTPBENCH_GIT_COMMAND, "Error: unable to git clone OLTP source code") if rc != ErrorCode.SUCCESS: LOG.info(stdout.read()) LOG.error(stderr.read()) sys.exit(rc)
def print_output(self, filename): """ Print out contents of a file """ fd = open(filename) lines = fd.readlines() for line in lines: LOG.info(line.strip()) fd.close() return
def build_oltp(self): for command in constants.OLTPBENCH_ANT_COMMANDS: error_msg = "Error: unable to run \"{}\"".format(command) rc, stdout, stderr = run_command(command, error_msg) if rc != ErrorCode.SUCCESS: LOG.info(stdout.read()) LOG.error(stderr.read()) sys.exit(rc)
def print_file(filename): """ Print out contents of a file """ try: with open(filename) as file: lines = file.readlines() for line in lines: LOG.info(line.strip()) except FileNotFoundError: LOG.error("file not exists: '{}'".format(filename))
def start_db(db_path, db_output_file): """ Starts the DB process based on the DB path and write stdout and sterr to the db_output_file. This returns the db output file descriptor and the db_process created by Popen. """ db_output_fd = open(db_output_file, "w+") LOG.info("Server start: {PATH}".format(PATH=db_path)) db_process = subprocess.Popen(shlex.split(db_path), stdout=db_output_fd, stderr=db_output_fd) return db_output_fd, db_process
def handle_db_connection_status(is_db_running, attempt_number, db_pid): """ Based on whether the DBMS is running and whether the db_pid exists this will print the appropriate message or throw an error. """ if not is_db_running: LOG.error( "Failed to connect to DB server [Attempt #{ATTEMPT}/{TOTAL_ATTEMPTS}]" .format(ATTEMPT=attempt_number, TOTAL_ATTEMPTS=constants.DB_CONNECT_ATTEMPTS)) check_db_process_exists(db_pid) raise RuntimeError('Unable to connect to DBMS.') else: LOG.info("Connected to server in {} seconds [PID={}]".format( attempt_number * constants.DB_CONNECT_SLEEP, db_pid))
def print_pipe(p): """ Print out the memory buffer of subprocess pipes """ try: stdout, stderr = p.communicate() if stdout: for line in stdout.decode("utf-8").rstrip("\n").split("\n"): LOG.info(line) if stderr: for line in stdout.decode("utf-8").rstrip("\n").split("\n"): LOG.error(line) except ValueError: # This is a dirty workaround LOG.error("Error in subprocess communicate") LOG.error( "Known issue in CPython https://bugs.python.org/issue35182. Please upgrade the Python version." )
def _load_data(self) -> np.ndarray: """ Load data from csv :return: Loaded 2D numpy array of [query_id, timestamp] """ LOG.info(f"Loading data from {self._query_trace_file}") # Load data from the files with open(self._query_trace_file, newline='') as csvfile: reader = csv.DictReader(csvfile) data = np.array([[int(r['query_id']), int(r[' timestamp'])] for r in reader]) if len(data) == 0: raise ValueError("Empty trace file") return data
def create_and_load_db(self): """ Create the database and load the data before the actual test execution. """ cmd = "{BIN} -c {XML} -b {BENCHMARK} --create={CREATE} --load={LOAD}".format( BIN=constants.OLTPBENCH_DEFAULT_BIN, XML=self.xml_config, BENCHMARK=self.benchmark, CREATE=self.db_create, LOAD=self.db_load) error_msg = "Error: unable to create and load the database" rc, stdout, stderr = run_command(cmd, error_msg=error_msg, cwd=self.test_command_cwd) if rc != ErrorCode.SUCCESS: LOG.info(stdout.read()) LOG.error(stderr.read()) raise RuntimeError(error_msg)
def eval(self, cid: int, model: ForecastModel) -> None: """ Evaluate a fitted model on the test dataset. :param cid: Cluster id :param model: Model to use """ eval_seqs = self._cluster_seqs(cid, test_mode=True, with_label=True) preds = [] gts = [] for seq, label in eval_seqs: pred = model.predict(seq) preds.append(pred) gts.append(label.item()) # FIXME: # simple L2 norm for comparing the prediction and results l2norm = np.linalg.norm(np.array(preds) - np.array(gts)) LOG.info( f"[{model.name}] has L2 norm(prediction, ground truth) = {l2norm}")
def wait_for_db(self): """ Wait for the db server to come up """ # Check that PID is running if not check_pid(self.db_process.pid): raise RuntimeError("Unable to find DBMS PID {}".format( self.db_process.pid)) # Wait a bit before checking if we can connect to give the system time to setup time.sleep(constants.DB_START_WAIT) # flag to check if the db is running is_db_running = False # Keep trying to connect to the DBMS until we run out of attempts or we succeeed for i in range(constants.DB_CONNECT_ATTEMPTS): s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) try: s.connect((self.db_host, int(self.db_port))) s.close() LOG.info("Connected to server in {} seconds [PID={}]".format( i * constants.DB_CONNECT_SLEEP, self.db_process.pid)) is_db_running = True break except: if i > 0 and i % 20 == 0: LOG.error( "Failed to connect to DB server [Attempt #{}/{}]". format(i, constants.DB_CONNECT_ATTEMPTS)) # os.system('ps aux | grep terrier | grep {}'.format(self.db_process.pid)) # os.system('lsof -i :15721') traceback.print_exc(file=sys.stdout) time.sleep(constants.DB_CONNECT_SLEEP) continue if not is_db_running: msg = "Unable to connect to DBMS [PID={} / {}]" status = "RUNNING" if not check_pid(self.db_process.pid): status = "NOT RUNNING" msg = msg.format(self.db_process.pid, status) raise RuntimeError(msg) return
def stop_db(self, is_dry_run=False): """ Stop the Db server and print it's log file """ if not self.db_process or is_dry_run: LOG.debug('DB has already been stopped.') return # get exit code if any self.db_process.poll() if self.db_process.returncode is not None: # DB already terminated msg = f'DB terminated with return code {self.db_process.returncode}' LOG.info(msg) self.print_db_logs() raise RuntimeError(msg) else: # still (correctly) running, terminate it self.db_process.terminate() LOG.info("Stopped DB successfully") self.db_process = None
def create_local_dirs(self): """ This will create a directory for the build in the LOCAL_REPO_DIR. Each time the microbenchmark script is run it will create another dir by incrementing the last dir name created. If the script is run 3 times the LOCAL_REPO_DIR will have directories named 001 002 003 each containing the json Google benchmark result file. """ build_dirs = next(os.walk(LOCAL_REPO_DIR))[1] last_build = max(build_dirs) if build_dirs else '000' next_build = os.path.join(LOCAL_REPO_DIR, "{:03}".format(int(last_build) + 1)) LOG.info( "Creating new result directory in local data repository {}".format( next_build)) os.mkdir(next_build) self.last_build = os.path.basename(next_build) for bench_name in self.config.benchmarks: copy_benchmark_result(bench_name, next_build)
def run_command(command, error_msg="", stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=None): """ General purpose wrapper for running a subprocess """ p = subprocess.Popen(shlex.split(command), stdout=stdout, stderr=stderr, cwd=cwd) while p.poll() is None: if stdout == subprocess.PIPE: out = p.stdout.readline() if out: LOG.info(out.decode("utf-8").rstrip("\n")) rc = p.poll() return rc, p.stdout, p.stderr
def stop_db(self): """ Stop the Db server and print it's log file """ if not self.db_process: return # get exit code if any self.db_process.poll() if self.db_process.returncode is not None: # Db terminated already msg = "DB terminated with return code {}".format( self.db_process.returncode) LOG.info("DB exited with return code {}".format( self.db_process.returncode)) self.print_db_logs() raise RuntimeError(msg) else: # still (correctly) running, terminate it self.db_process.terminate() LOG.info("DB stops normally") self.print_db_logs() self.db_process = None return
def run_benchmarks(self, enable_perf): """ Runs all the microbenchmarks. Parameters ---------- enable_perf : bool Whether perf should be enabled for all the benchmarks. Returns ------- ret_val : int the return value for the last failed benchmark. If no benchmarks fail then it will return 0. """ if not len(self.config.benchmarks): LOG.error('Invlid benchmarks were specified to execute. \ Try not specifying a benchmark and it will execute all.') return 0 ret_val = 0 benchmark_fail_count = 0 # iterate over all benchmarks and run them for benchmark_count, bench_name in enumerate(self.config.benchmarks): LOG.info( f"Running '{bench_name}' with {self.config.num_threads} threads [{benchmark_count}/{len(self.config.benchmarks)}]" ) benchmark_ret_val = self.run_single_benchmark( bench_name, enable_perf) if benchmark_ret_val: ret_val = benchmark_ret_val benchmark_fail_count += 1 LOG.info("{PASSED}/{TOTAL} benchmarks passed".format( PASSED=len(self.config.benchmarks) - benchmark_fail_count, TOTAL=len(self.config.benchmarks))) return ret_val
def run_db(self, is_dry_run=False): """ Start the DB server """ # Allow ourselves to try to restart the DBMS multiple times attempt_to_start_time = time.perf_counter() server_args_str = generate_server_args_str(self.server_args) db_run_command = f'{self.build_path} {server_args_str}' if is_dry_run: LOG.info(f'Server start command: {db_run_command}') return for attempt in range(DB_START_ATTEMPTS): # Kill any other noisepage processes that our listening on our target port # early terminate the run_db if kill_server.py encounter any exceptions run_kill_server(self.db_port) # use memory buffer to hold db logs self.db_process = subprocess.Popen(shlex.split(db_run_command), stdout=subprocess.PIPE, stderr=subprocess.PIPE) LOG.info( f'Server start: {db_run_command} [PID={self.db_process.pid}]') if not run_check_pids(self.db_process.pid): LOG.info( f'{self.db_process.pid} does not exist. Trying again.') # The DB process does not exist, try starting it again continue while True: raw_db_log_line = self.db_process.stdout.readline() if not raw_db_log_line: break if has_db_started(raw_db_log_line, self.db_port, self.db_process.pid): db_start_time = time.perf_counter() LOG.info( f'DB process is verified as running in {round(db_start_time - attempt_to_start_time,2)} sec' ) return time.sleep(2**attempt) # exponential backoff db_failed_to_start_time = time.perf_counter() raise RuntimeError( f'Failed to start DB after {DB_START_ATTEMPTS} attempts and {round(db_failed_to_start_time - attempt_to_start_time,2)} sec' )
def _do_fit(self, train_seqs: List[Tuple[np.ndarray, np.ndarray]]) -> None: """ Perform training on the time series trace data. :param train_seqs: Training sequences of (seq, label) :return: None """ epochs = self._epochs lr = self._lr # Training specifics loss_function = nn.MSELoss() optimizer = torch.optim.Adam(self.parameters(), lr=lr) LOG.info(f"Training with {len(train_seqs)} samples, {epochs} epochs:") for i in range(epochs): for seq, labels in train_seqs: optimizer.zero_grad() self._hidden_cell = (torch.zeros(1, 1, self._hidden_layer_size), torch.zeros(1, 1, self._hidden_layer_size)) seq = torch.FloatTensor(seq).view(-1) labels = torch.FloatTensor(labels).view(-1) y_pred = self(seq) single_loss = loss_function(y_pred, labels) single_loss.backward() optimizer.step() if i % 25 == 0: LOG.info( f'[LSTM FIT]epoch: {i+1:3} loss: {single_loss.item():10.8f}' ) LOG.info( f'[LSTM FIT]epoch: {epochs:3} loss: {single_loss.item():10.10f}')
def run_db(self): """ Start the DB server """ # Allow ourselves to try to restart the DBMS multiple times for attempt in range(constants.DB_START_ATTEMPTS): # Kill any other noisepage processes that our listening on our target port # early terminate the run_db if kill_server.py encounter any exceptions run_kill_server(self.db_port) # use memory buffer to hold db logs self.db_process = subprocess.Popen(shlex.split(self.db_path), stdout=subprocess.PIPE, stderr=subprocess.PIPE) LOG.info("Server start: {PATH} [PID={PID}]".format( PATH=self.db_path, PID=self.db_process.pid)) if not run_check_pids(self.db_process.pid): # The DB process does not exist, try starting it again continue check_line = "[info] Listening on Unix domain socket with port {PORT} [PID={PID}]".format( PORT=self.db_port, PID=self.db_process.pid) while True: db_log_line_raw = self.db_process.stdout.readline() if not db_log_line_raw: break db_log_line_str = db_log_line_raw.decode("utf-8").rstrip("\n") LOG.info(db_log_line_str) if db_log_line_str.endswith(check_line): LOG.info("DB process is verified as running") return msg = "Failed to start DB after {} attempts".format( constants.DB_START_ATTEMPTS) raise RuntimeError(msg)
choices=PERFORMANCE_STORAGE_SERVICE_API.keys(), help="Environment in which to store performance results") parser.add_argument("--publish-username", type=str, help="Performance Storage Service Username") parser.add_argument("--publish-password", type=str, help="Performance Storage Service password") args = parser.parse_args() if args.debug: LOG.setLevel(logging.DEBUG) # Get the BaseBinaryMetricsCollector subclasses imported from binary_metrics.binary_metrics_collectors # Effectively this adds each binary metric collector class into an array to be instantiated later. collectors = [obj for obj in BaseArtifactStatsCollector.__subclasses__()] exit_code, aggregated_metrics = collect_artifact_stats(collectors) if not exit_code: LOG.info(f'Artifact stats: {aggregated_metrics}') if args.publish_results != 'none': report_artifact_stats_result(args.publish_results, aggregated_metrics, args.publish_username, args.publish_password) logging.shutdown() sys.exit(exit_code)
eval_size=args.eval_size, horizon_len=args.horizon_len) models = forecaster.train(models_kwargs) # Save the model if args.model_save_path: with open(args.model_save_path, "wb") as f: pickle.dump(models, f) else: # Do inference on a trained model with open(args.model_load_path, "rb") as f: models = pickle.load(f) forecaster = Forecaster(trace_file=args.test_file, test_mode=True, interval_us=INTERVAL_MICRO_SEC, seq_len=args.seq_len, eval_size=args.eval_size, horizon_len=args.horizon_len) # FIXME: # Assuming all the queries in the current trace file are from # the same cluster for now query_pred = forecaster.predict(0, models[0][args.test_model]) # TODO: # How are we consuming predictions? for qid, ts in query_pred.items(): LOG.info(f"[Query: {qid}] pred={ts[:10]}")
def print_output(filename): """ Print out contents of a file """ with open(filename) as file: lines = file.readlines() for line in lines: LOG.info(line.strip())
def check_db_process_exists(db_pid): """ Checks to see if the db_pid exists """ if not check_pid(db_pid): raise RuntimeError("Unable to find DBMS PID {}".format(db_pid)) else: LOG.info("DBMS running on PID {}".format(db_pid))
def print_output(self, filename): """ Print out contents of a file """ with open(filename, "r") as fd: LOG.info("Output:\n" + "\n".join([line.strip() for line in fd.readlines()])) return