def run(self): logger.debug("Run preparation") self.pre_run() self.show_test_execution_time() tname = os.path.basename(self.config_fname) if tname.endswith('.cfg'): tname = tname[:-4] barrier = Barrier(len(self.config.nodes)) results = [] # set of Operation_Mode_BlockSize str's # which should not be tested anymore, as # they already too slow with previous thread count lat_bw_limit_reached = set() with ThreadPoolExecutor(len(self.config.nodes)) as pool: for pos, fio_cfg in enumerate(self.fio_configs): test_descr = get_test_summary(fio_cfg.vals).split("th")[0] if test_descr in lat_bw_limit_reached: continue else: logger.info("Will run {0} test".format(fio_cfg.name)) templ = "Test should takes about {0}." + \ " Should finish at {1}," + \ " will wait at most till {2}" exec_time = execution_time(fio_cfg) exec_time_str = sec_to_str(exec_time) timeout = int(exec_time + max(300, exec_time)) now_dt = datetime.datetime.now() end_dt = now_dt + datetime.timedelta(0, exec_time) wait_till = now_dt + datetime.timedelta(0, timeout) logger.info( templ.format(exec_time_str, end_dt.strftime("%H:%M:%S"), wait_till.strftime("%H:%M:%S"))) func = functools.partial(self.do_run, barrier=barrier, fio_cfg=fio_cfg, pos=pos) max_retr = 3 for idx in range(max_retr): try: intervals = list(pool.map(func, self.config.nodes)) if None not in intervals: break except (EnvironmentError, SSHException) as exc: logger.exception("During fio run") if idx == max_retr - 1: raise StopTestError("Fio failed", exc) logger.info("Reconnectiong, sleeping %ss and retrying", self.retry_time) wait([ pool.submit(node.connection.close) for node in self.config.nodes ]) time.sleep(self.retry_time) wait([ pool.submit(reconnect, node.connection, node.conn_url) for node in self.config.nodes ]) fname = "{0}_task.fio".format(pos) with open(os.path.join(self.config.log_directory, fname), "w") as fd: fd.write(str(fio_cfg)) params = {'vm_count': len(self.config.nodes)} params['name'] = fio_cfg.name params['vals'] = dict(fio_cfg.vals.items()) params['intervals'] = intervals params['nodes'] = [ node.get_conn_id() for node in self.config.nodes ] fname = "{0}_params.yaml".format(pos) with open(os.path.join(self.config.log_directory, fname), "w") as fd: fd.write(dumps(params)) res = load_test_results(self.config.log_directory, pos) results.append(res) if self.max_latency is not None: lat_50, _ = res.get_lat_perc_50_95_multy() # conver us to ms if self.max_latency < lat_50: logger.info( ("Will skip all subsequent tests of {0} " + "due to lat/bw limits").format(fio_cfg.name)) lat_bw_limit_reached.add(test_descr) test_res = res.get_params_from_fio_report() if self.min_bw_per_thread is not None: if self.min_bw_per_thread > average(test_res['bw']): lat_bw_limit_reached.add(test_descr) return IOTestResults(self.config.params['cfg'], results, self.config.log_directory)
def run(self): logger.debug("Run preparation") self.pre_run() self.show_test_execution_time() tname = os.path.basename(self.config_fname) if tname.endswith('.cfg'): tname = tname[:-4] barrier = Barrier(len(self.config.nodes)) results = [] # set of Operation_Mode_BlockSize str's # which should not be tested anymore, as # they already too slow with previous thread count lat_bw_limit_reached = set() with ThreadPoolExecutor(len(self.config.nodes)) as pool: for pos, fio_cfg in enumerate(self.fio_configs): test_descr = get_test_summary(fio_cfg.vals).split("th")[0] if test_descr in lat_bw_limit_reached: continue else: logger.info("Will run {0} test".format(fio_cfg.name)) templ = "Test should takes about {0}." + \ " Should finish at {1}," + \ " will wait at most till {2}" exec_time = execution_time(fio_cfg) exec_time_str = sec_to_str(exec_time) timeout = int(exec_time + max(300, exec_time)) now_dt = datetime.datetime.now() end_dt = now_dt + datetime.timedelta(0, exec_time) wait_till = now_dt + datetime.timedelta(0, timeout) logger.info(templ.format(exec_time_str, end_dt.strftime("%H:%M:%S"), wait_till.strftime("%H:%M:%S"))) func = functools.partial(self.do_run, barrier=barrier, fio_cfg=fio_cfg, pos=pos) max_retr = 3 for idx in range(max_retr): try: intervals = list(pool.map(func, self.config.nodes)) break except (EnvironmentError, SSHException) as exc: logger.exception("During fio run") if idx == max_retr - 1: raise StopTestError("Fio failed", exc) logger.info("Sleeping 30s and retrying") time.sleep(30) fname = "{0}_task.fio".format(pos) with open(os.path.join(self.config.log_directory, fname), "w") as fd: fd.write(str(fio_cfg)) params = {'vm_count': len(self.config.nodes)} params['name'] = fio_cfg.name params['vals'] = dict(fio_cfg.vals.items()) params['intervals'] = intervals params['nodes'] = [node.get_conn_id() for node in self.config.nodes] fname = "{0}_params.yaml".format(pos) with open(os.path.join(self.config.log_directory, fname), "w") as fd: fd.write(dumps(params)) res = load_test_results(self.config.log_directory, pos) results.append(res) if self.max_latency is not None: lat_50, _ = res.get_lat_perc_50_95_multy() # conver us to ms if self.max_latency < lat_50: logger.info(("Will skip all subsequent tests of {0} " + "due to lat/bw limits").format(fio_cfg.name)) lat_bw_limit_reached.add(test_descr) test_res = res.get_params_from_fio_report() if self.min_bw_per_thread is not None: if self.min_bw_per_thread > average(test_res['bw']): lat_bw_limit_reached.add(test_descr) return IOTestResults(self.config.params['cfg'], results, self.config.log_directory)
def disk_perf_info(self, avg_interval=2.0): if self._pinfo is not None: return self._pinfo testnodes_count = len(self.config.nodes) pinfo = DiskPerfInfo(self.name, self.summary(), self.params, testnodes_count) def prepare(data, drop=1): if data is None: return data res = [] for ts_data in data: if ts_data.average_interval() < avg_interval: ts_data = ts_data.derived(avg_interval) # drop last value on bounds # as they may contains ranges without activities assert len(ts_data.values) >= drop + 1, str(drop) + " " + str( ts_data.values) if drop > 0: res.append(ts_data.values[:-drop]) else: res.append(ts_data.values) return res def agg_data(matr): arr = sum(matr, []) min_len = min(map(len, arr)) res = [] for idx in range(min_len): res.append(sum(dt[idx] for dt in arr)) return res pinfo.raw_lat = map(prepare, self.lat.per_vm()) num_th = sum(map(len, pinfo.raw_lat)) lat_avg = [val / num_th for val in agg_data(pinfo.raw_lat)] pinfo.lat_avg = data_property(lat_avg).average / 1000 # us to ms pinfo.lat_50, pinfo.lat_95 = self.get_lat_perc_50_95_multy() pinfo.lat = pinfo.lat_50 pinfo.raw_bw = map(prepare, self.bw.per_vm()) pinfo.raw_iops = map(prepare, self.iops.per_vm()) if self.iops_sys is not None: pinfo.raw_iops_sys = map(prepare, self.iops_sys.per_vm()) pinfo.iops_sys = data_property(agg_data(pinfo.raw_iops_sys)) else: pinfo.raw_iops_sys = None pinfo.iops_sys = None fparams = self.get_params_from_fio_report() fio_report_bw = sum(fparams['flt_bw']) fio_report_iops = sum(fparams['flt_iops']) agg_bw = agg_data(pinfo.raw_bw) agg_iops = agg_data(pinfo.raw_iops) log_bw_avg = average(agg_bw) log_iops_avg = average(agg_iops) # update values to match average from fio report coef_iops = fio_report_iops / float(log_iops_avg) coef_bw = fio_report_bw / float(log_bw_avg) bw_log = data_property([val * coef_bw for val in agg_bw]) iops_log = data_property([val * coef_iops for val in agg_iops]) bw_report = data_property([fio_report_bw]) iops_report = data_property([fio_report_iops]) # When IOPS/BW per thread is too low # data from logs is rounded to match iops_per_th = sum(sum(pinfo.raw_iops, []), []) if average(iops_per_th) > 10: pinfo.iops = iops_log pinfo.iops2 = iops_report else: pinfo.iops = iops_report pinfo.iops2 = iops_log bw_per_th = sum(sum(pinfo.raw_bw, []), []) if average(bw_per_th) > 10: pinfo.bw = bw_log pinfo.bw2 = bw_report else: pinfo.bw = bw_report pinfo.bw2 = bw_log self._pinfo = pinfo return pinfo
def disk_perf_info(self, avg_interval=2.0): if self._pinfo is not None: return self._pinfo testnodes_count = len(self.config.nodes) pinfo = DiskPerfInfo(self.name, self.summary(), self.params, testnodes_count) def prepare(data, drop=1): if data is None: return data res = [] for ts_data in data: if ts_data.average_interval() < avg_interval: ts_data = ts_data.derived(avg_interval) # drop last value on bounds # as they may contains ranges without activities assert len(ts_data.values) >= drop + 1, str(drop) + " " + str(ts_data.values) if drop > 0: res.append(ts_data.values[:-drop]) else: res.append(ts_data.values) return res def agg_data(matr): arr = sum(matr, []) min_len = min(map(len, arr)) res = [] for idx in range(min_len): res.append(sum(dt[idx] for dt in arr)) return res pinfo.raw_lat = map(prepare, self.lat.per_vm()) num_th = sum(map(len, pinfo.raw_lat)) lat_avg = [val / num_th for val in agg_data(pinfo.raw_lat)] pinfo.lat_avg = data_property(lat_avg).average / 1000 # us to ms pinfo.lat_50, pinfo.lat_95 = self.get_lat_perc_50_95_multy() pinfo.lat = pinfo.lat_50 pinfo.raw_bw = map(prepare, self.bw.per_vm()) pinfo.raw_iops = map(prepare, self.iops.per_vm()) fparams = self.get_params_from_fio_report() fio_report_bw = sum(fparams['flt_bw']) fio_report_iops = sum(fparams['flt_iops']) agg_bw = agg_data(pinfo.raw_bw) agg_iops = agg_data(pinfo.raw_iops) log_bw_avg = average(agg_bw) log_iops_avg = average(agg_iops) # update values to match average from fio report coef_iops = fio_report_iops / float(log_iops_avg) coef_bw = fio_report_bw / float(log_bw_avg) bw_log = data_property([val * coef_bw for val in agg_bw]) iops_log = data_property([val * coef_iops for val in agg_iops]) bw_report = data_property([fio_report_bw]) iops_report = data_property([fio_report_iops]) # When IOPS/BW per thread is too low # data from logs is rounded to match iops_per_th = sum(sum(pinfo.raw_iops, []), []) if average(iops_per_th) > 10: pinfo.iops = iops_log pinfo.iops2 = iops_report else: pinfo.iops = iops_report pinfo.iops2 = iops_log bw_per_th = sum(sum(pinfo.raw_bw, []), []) if average(bw_per_th) > 10: pinfo.bw = bw_log pinfo.bw2 = bw_report else: pinfo.bw = bw_report pinfo.bw2 = bw_log self._pinfo = pinfo return pinfo