def find_valid_runs(input_list, scenario): # Check for query constraints documented in https://github.com/mlperf/inference_policies/blob/master/inference_rules.adoc#scenarios QUERY_METRIC_CONSTRAINTS = { "Offline": ("effective_samples_per_query", 24576), "Server": ("effective_min_query_count", 270336), "MultiStream": ("effective_min_query_count", 270336), "SingleStream": ("effective_min_query_count", 1024), } perf_list = [] perf_power_list = [] accu_list = [] for input_file in input_list: # Check if this is Accuracy run or Performance run. if os.path.getsize(input_file) > 4: accu_list.append(input_file) continue # Check for valid perf run log_dir = os.path.dirname(input_file) scenario_key = QUERY_METRIC_CONSTRAINTS[scenario][0] result = from_loadgen_by_keys(log_dir, ["result_validity", scenario_key]) is_valid = ("result_validity" in result) and (result["result_validity"] == "VALID") satisfies_query_constraint = (scenario_key in result) and (float( result[scenario_key]) >= QUERY_METRIC_CONSTRAINTS[scenario][1]) if is_valid and satisfies_query_constraint: perf_list.append(input_file) if "power" in log_dir: perf_power_list.append(input_file) return perf_list, perf_power_list, accu_list
def sort_perf_list(perf_file_list, scenario): # Sorts performance runs via a tiebreaker criteria scenario_criteria = { "Offline": ("result_samples_per_second", SortingCriteria.Higher), "Server": ("result_99.00_percentile_latency_ns", SortingCriteria.Lower), "SingleStream": ("result_90.00_percentile_latency_ns", SortingCriteria.Lower), "MultiStream": ("result_99.00_percentile_latency_ns", SortingCriteria.Lower), } perf_vals = [] for perf_file in perf_file_list: log_dir = os.path.dirname(perf_file) scenario_key = scenario_criteria[scenario][0] result = from_loadgen_by_keys(log_dir, [scenario_key]) if len(result) == 0: raise Exception("Could not find perf value in file: " + os.path.join(log_dir, "mlperf_log_detail.txt")) perf_vals.append((perf_file, float(result[scenario_key]))) sorted_perf_vals = sorted( perf_vals, key=lambda k: k[1], reverse=(scenario_criteria[scenario][1] == SortingCriteria.Lower)) return [k[0] for k in sorted_perf_vals]
def traverse_results(results_dir): perf_glob = os.path.join(results_dir, "**", "performance", "run_*", "mlperf_log_detail.txt") perf_run_logs = glob.glob(perf_glob, recursive=True) # Sort the logs so we add each run in order perf_run_logs = list(sorted(perf_run_logs)) results_tree = Tree() for entry in perf_run_logs: parts = entry.split("/") # results/<system_id>/<benchmark>/<scenario>/performance/<run id>/mlperf_log_summary.txt system_id = parts[1] benchmark = parts[2] scenario = parts[3] log_dir = os.path.dirname(entry) scenario_key = scenario_loadgen_log_keys[scenario] result = from_loadgen_by_keys(log_dir, [scenario_key]) if len(result) == 0: print("WARNING: Could not find perf value in file: " + entry + ". Using 0") perf_number = 0.0 else: perf_number = float(result[scenario_key]) results_tree.insert([system_id, benchmark, scenario], perf_number, append=True) return results_tree
def run_harness(self): flag_dict = self.build_default_flags() flag_dict.update(self.build_scenario_specific_flags()) # Handle engines if self.has_gpu: flag_dict["gpu_engines"] = self.gpu_engine # MLPINF-853: Special handing of --fast. Use min_duration=60000, and if Multistream, use min_query_count=1. if flag_dict.get("fast", False): if "min_duration" not in flag_dict: flag_dict["min_duration"] = 60000 if self.scenario in [SCENARIOS.Offline, SCENARIOS.MultiStream]: if "min_query_count" not in flag_dict: flag_dict["min_query_count"] = 1 flag_dict["fast"] = None # Generates the entries in the `measurements/` directory, and updates flag_dict accordingly generate_measurements_entry( self.get_system_name(), self.name, self._get_submission_benchmark_name(), self.scenario, self.args["input_dtype"], self.args["precision"], flag_dict) # Stop here if we are only generating .conf files in measurements if self.generate_conf_files_only: return "Generated conf files" argstr = self._build_custom_flags(flag_dict) if type(argstr) is dict: argstr = args_to_string(flag_dict) # Handle environment variables if self.use_jemalloc: self.prepend_ld_preload("/usr/lib/x86_64-linux-gnu/libjemalloc.so.2") cmd = "{:} {:}".format(self.executable, argstr) output = run_command(cmd, get_output=True, custom_env=self.env_vars) # Return harness result. scenario_key = scenario_loadgen_log_keys[self.scenario] results = from_loadgen_by_keys( os.path.join( self.args["log_dir"], self.get_system_name(), self._get_submission_benchmark_name(), self.scenario), ["result_validity", scenario_key]) if scenario_key not in results: result_string = "Cannot find performance result. Maybe you are running in AccuracyOnly mode." elif "result_validity" not in results: result_string = "{}: {}, Result validity unknown".format(scenario_key, results[scenario_key]) else: result_string = "{}: {}, Result is {}".format(scenario_key, results[scenario_key], results["result_validity"]) return self._handle_harness_result(result_string)
def __init__(self, directory, verbose=False): """ Populate self.data from contents of directory which contains: - METAFILE at top level - mlperf_log_summary.txt in a run/platform-specific subdirectory. """ search_path = os.path.join(directory, "**/mlperf_log_detail.txt") paths = [name for name in glob.glob(search_path, recursive=True)] if not paths: raise RuntimeError(f"Could not find mlperf_log_detail.txt in: \n{directory}\nDid you mean to run with --noparse?") key_set = self.verbose_stat_key_set if verbose else self.scenario_keys_set result = from_loadgen_by_keys(os.path.dirname(paths[0]), key_set) assert len(result) > 0 to_ret = {} to_ret.update(result) with open(os.path.join(directory, METAFILE), 'r') as f: extra_stats = json.load(f)['run_info'] to_ret.update(extra_stats) self.data = to_ret
def main(): log_dir = common_args.parse_args(["log_dir"])["log_dir"] summary_file = os.path.join(log_dir, "perf_harness_summary.json") with open(summary_file) as f: results = json.load(f) print("") print( "======================= Perf harness results: =======================" ) print("") for config_name in results: print("{:}:".format(config_name)) for benchmark in results[config_name]: print(" {:}: {:}".format(benchmark, results[config_name][benchmark])) print("") summary_file = os.path.join(log_dir, "accuracy_summary.json") with open(summary_file) as f: results = json.load(f) print("") print("======================= Accuracy results: =======================") print("") for config_name in results: print("{:}:".format(config_name)) for benchmark in results[config_name]: print(" {:}: {:}".format(benchmark, results[config_name][benchmark])) print("") # If this is a power run, we should print out the average power if os.path.exists(os.path.join(log_dir, "spl.txt")): print("") print("======================= Power results: =======================") print("") for config_name in results: print("{:}:".format(config_name)) for benchmark in results[config_name]: # Get power_start and power_end detail_logs = glob.glob(os.path.join(log_dir, "**", "mlperf_log_detail.txt"), recursive=True) if len(detail_logs) == 0: raise RuntimeError( "Could not find detail logs for power run!") elif len(detail_logs) > 1: print( "WARNING: Power harness run contains multiple benchmark-scenario runs. This is not advised." ) # Select the correct detail log scenario = config_name.split("-")[-1] detail_log_path = None for detail_log in detail_logs: components = detail_log.split("/") if scenario == components[-2] and benchmark == components[ -3]: detail_log_path = detail_log break if detail_log_path is None: raise RuntimeError( "Could not find mlperf_log_detail.txt for {}-{}". format(benchmark, scenario)) power_times = from_loadgen_by_keys( os.path.dirname(detail_log_path), ["power_begin", "power_end"]) power_begin = from_timestamp(power_times["power_begin"]) power_end = from_timestamp(power_times["power_end"]) # Read power metrics from spl.txt with open(os.path.join(log_dir, "spl.txt")) as f: lines = f.read().split("\n") power_vals = [] for line in lines: data = line.split(",") if len(data) != 12: continue timestamp = data[1] watts = float(data[3]) curr_time = from_timestamp(timestamp) if power_begin <= curr_time and curr_time <= power_end: power_vals.append(watts) avg_power = sum(power_vals) / len(power_vals) print( " {}: avg power under load: {:.2f}W with {} power samples" .format(benchmark, avg_power, len(power_vals))) print("")