def main(argv): argv_obj = parse_args(argv) if argv_obj.jobfile == '-': job_cfg = sys.stdin.read() else: job_cfg = open(argv_obj.jobfile).read() params = {} for param_val in argv_obj.params: assert '=' in param_val name, val = param_val.split("=", 1) params[name] = parse_value(val) sec_it = fio_cfg_compile(job_cfg, argv_obj.jobfile, params) if argv_obj.action == 'estimate': print sec_to_str(sum(map(execution_time, sec_it))) elif argv_obj.action == 'num_tests': print sum(map(len, map(list, sec_it))) elif argv_obj.action == 'compile': splitter = "\n#" + "-" * 70 + "\n\n" print splitter.join(map(str, sec_it)) return 0
def show_test_execution_time(self): if len(self.fio_configs) > 1: # +10% - is a rough estimation for additional operations # like sftp, etc exec_time = int(sum(map(execution_time, self.fio_configs)) * 1.1) exec_time_s = sec_to_str(exec_time) now_dt = datetime.datetime.now() end_dt = now_dt + datetime.timedelta(0, exec_time) msg = "Entire test should takes aroud: {0} and finished at {1}" logger.info(msg.format(exec_time_s, end_dt.strftime("%H:%M:%S")))
def run(self): logger.debug("Run preparation") self.pre_run() self.show_test_execution_time() tname = os.path.basename(self.config_fname) if tname.endswith('.cfg'): tname = tname[:-4] barrier = Barrier(len(self.config.nodes)) results = [] # set of Operation_Mode_BlockSize str's # which should not be tested anymore, as # they already too slow with previous thread count lat_bw_limit_reached = set() with ThreadPoolExecutor(len(self.config.nodes)) as pool: for pos, fio_cfg in enumerate(self.fio_configs): test_descr = get_test_summary(fio_cfg.vals).split("th")[0] if test_descr in lat_bw_limit_reached: continue else: logger.info("Will run {0} test".format(fio_cfg.name)) templ = "Test should takes about {0}." + \ " Should finish at {1}," + \ " will wait at most till {2}" exec_time = execution_time(fio_cfg) exec_time_str = sec_to_str(exec_time) timeout = int(exec_time + max(300, exec_time)) now_dt = datetime.datetime.now() end_dt = now_dt + datetime.timedelta(0, exec_time) wait_till = now_dt + datetime.timedelta(0, timeout) logger.info( templ.format(exec_time_str, end_dt.strftime("%H:%M:%S"), wait_till.strftime("%H:%M:%S"))) func = functools.partial(self.do_run, barrier=barrier, fio_cfg=fio_cfg, pos=pos) max_retr = 3 for idx in range(max_retr): try: intervals = list(pool.map(func, self.config.nodes)) if None not in intervals: break except (EnvironmentError, SSHException) as exc: logger.exception("During fio run") if idx == max_retr - 1: raise StopTestError("Fio failed", exc) logger.info("Reconnectiong, sleeping %ss and retrying", self.retry_time) wait([ pool.submit(node.connection.close) for node in self.config.nodes ]) time.sleep(self.retry_time) wait([ pool.submit(reconnect, node.connection, node.conn_url) for node in self.config.nodes ]) fname = "{0}_task.fio".format(pos) with open(os.path.join(self.config.log_directory, fname), "w") as fd: fd.write(str(fio_cfg)) params = {'vm_count': len(self.config.nodes)} params['name'] = fio_cfg.name params['vals'] = dict(fio_cfg.vals.items()) params['intervals'] = intervals params['nodes'] = [ node.get_conn_id() for node in self.config.nodes ] fname = "{0}_params.yaml".format(pos) with open(os.path.join(self.config.log_directory, fname), "w") as fd: fd.write(dumps(params)) res = load_test_results(self.config.log_directory, pos) results.append(res) if self.max_latency is not None: lat_50, _ = res.get_lat_perc_50_95_multy() # conver us to ms if self.max_latency < lat_50: logger.info( ("Will skip all subsequent tests of {0} " + "due to lat/bw limits").format(fio_cfg.name)) lat_bw_limit_reached.add(test_descr) test_res = res.get_params_from_fio_report() if self.min_bw_per_thread is not None: if self.min_bw_per_thread > average(test_res['bw']): lat_bw_limit_reached.add(test_descr) return IOTestResults(self.config.params['cfg'], results, self.config.log_directory)
def run(self): logger.debug("Run preparation") self.pre_run() self.show_test_execution_time() tname = os.path.basename(self.config_fname) if tname.endswith('.cfg'): tname = tname[:-4] barrier = Barrier(len(self.config.nodes)) results = [] # set of Operation_Mode_BlockSize str's # which should not be tested anymore, as # they already too slow with previous thread count lat_bw_limit_reached = set() with ThreadPoolExecutor(len(self.config.nodes)) as pool: for pos, fio_cfg in enumerate(self.fio_configs): test_descr = get_test_summary(fio_cfg.vals).split("th")[0] if test_descr in lat_bw_limit_reached: continue else: logger.info("Will run {0} test".format(fio_cfg.name)) templ = "Test should takes about {0}." + \ " Should finish at {1}," + \ " will wait at most till {2}" exec_time = execution_time(fio_cfg) exec_time_str = sec_to_str(exec_time) timeout = int(exec_time + max(300, exec_time)) now_dt = datetime.datetime.now() end_dt = now_dt + datetime.timedelta(0, exec_time) wait_till = now_dt + datetime.timedelta(0, timeout) logger.info(templ.format(exec_time_str, end_dt.strftime("%H:%M:%S"), wait_till.strftime("%H:%M:%S"))) func = functools.partial(self.do_run, barrier=barrier, fio_cfg=fio_cfg, pos=pos) max_retr = 3 for idx in range(max_retr): try: intervals = list(pool.map(func, self.config.nodes)) break except (EnvironmentError, SSHException) as exc: logger.exception("During fio run") if idx == max_retr - 1: raise StopTestError("Fio failed", exc) logger.info("Sleeping 30s and retrying") time.sleep(30) fname = "{0}_task.fio".format(pos) with open(os.path.join(self.config.log_directory, fname), "w") as fd: fd.write(str(fio_cfg)) params = {'vm_count': len(self.config.nodes)} params['name'] = fio_cfg.name params['vals'] = dict(fio_cfg.vals.items()) params['intervals'] = intervals params['nodes'] = [node.get_conn_id() for node in self.config.nodes] fname = "{0}_params.yaml".format(pos) with open(os.path.join(self.config.log_directory, fname), "w") as fd: fd.write(dumps(params)) res = load_test_results(self.config.log_directory, pos) results.append(res) if self.max_latency is not None: lat_50, _ = res.get_lat_perc_50_95_multy() # conver us to ms if self.max_latency < lat_50: logger.info(("Will skip all subsequent tests of {0} " + "due to lat/bw limits").format(fio_cfg.name)) lat_bw_limit_reached.add(test_descr) test_res = res.get_params_from_fio_report() if self.min_bw_per_thread is not None: if self.min_bw_per_thread > average(test_res['bw']): lat_bw_limit_reached.add(test_descr) return IOTestResults(self.config.params['cfg'], results, self.config.log_directory)