def __init__(self, threshold=None): """ Class constructor Args: threshold """ self.dstore = dsy.DataStoreYAML() self.threshold = threshold
def write_bench_data(self, benchmark_id): """ TBD Args: benchmark_id (int): benchmark number """ # pylint: disable=too-many-locals, too-many-branches, too-many-statements try: scheduler_interface = slurmi.SlurmInterface() except: # pylint: disable=bare-except print('Warning!! Unable to load Slurm module') # pylint: disable=superfluous-parens scheduler_interface = None os.chdir(self.benchmark_path) output_dir = self.jube_xml_files.get_bench_outputdir() benchmark_rundir = self.get_bench_rundir(benchmark_id) jube_cmd = 'jube info ./{0} --id {1} --step execute'.format( output_dir, benchmark_id) cmd_output = tempfile.TemporaryFile() result_from_jube = Popen(jube_cmd, cwd=os.getcwd(), shell=True, stdout=cmd_output, universal_newlines=True) ret_code = result_from_jube.wait() # pylint: disable=unused-variable cmd_output.flush() cmd_output.seek(0) results = {} workpackages = re.findall(r'Workpackages(.*?)\n{2,}', cmd_output.read().decode('utf-8'), re.DOTALL)[0] workdirs = {} regex_workdir = r'^\s+(\d+).*(' + re.escape(output_dir) + r'.*work).*' for package in workpackages.split('\n'): temp_match = re.match(regex_workdir, package) if temp_match: id_workpackage = temp_match.group(1) path_workpackage = temp_match.group(2) workdirs[id_workpackage] = path_workpackage cmd_output.seek(0) parameterization = re.findall(r'ID:(.*?)(?=\n{3,}|\sID)', cmd_output.read().decode('utf-8') + '\n', re.DOTALL) for execution_step in parameterization: id_step = [x.strip() for x in execution_step.split('\n')][0] param_step = [x.strip() for x in execution_step.split('\n')][1:] results[id_step] = {} for parameter in param_step: temp_match = re.match(r'^\S+:', parameter) if temp_match: value = parameter.replace(temp_match.group(0), '') param = temp_match.group(0).replace(':', '') results[id_step][param] = value.strip() cmd_output.close() for key, value in list(results.items()): result_file_path = os.path.join(benchmark_rundir, 'result/ubench_results.dat') # We add the part of results which corresponds to a given execute with open(result_file_path) as csvfile: reader = csv.DictReader(csvfile) field_names = reader.fieldnames common_fields = list(set(value.keys()) & set(field_names)) result_fields = list(set(field_names) - set(common_fields)) temp_hash = {} for field in result_fields: temp_hash[field] = [] for row in reader: add_to_results = True for field in common_fields: if value[field] != row[field]: add_to_results = False break if add_to_results: for field in result_fields: temp_hash[field].append(row[field]) # When there is just value we transform the array in one value for field in result_fields: if len(temp_hash[field]) == 1: temp_hash[field] = temp_hash[field][0] results[key]['results_bench'] = temp_hash results[key]['context_fields'] = common_fields # Add job information to step execute job_file_path = os.path.join(workdirs[key], 'stdout') job_id = 0 with open(job_file_path, 'r') as job_file: for line in job_file: re_result = re.findall(r'\d+', line) if re_result: job_id = re_result[0] value['job_id_ubench'] = job_id if scheduler_interface: job_info = scheduler_interface.get_job_info(job_id) if job_info: value.update(job_info[-1]) results[key].update(value) break # Add metadata present on ubench.log field_pattern = re.compile('(.*) : (.*)') try: log_file = open(os.path.join(benchmark_rundir, 'ubench.log'), 'r') except IOError: print('Warning!! file ubench log was not found.' + 'Benchmark data result could not be created') return metadata = {} fields = field_pattern.findall(log_file.read()) for field in fields: metadata[field[0].strip()] = field[1].strip() bench_data = data_store_yaml.DataStoreYAML() bench_data.write(metadata, results, os.path.join(benchmark_rundir, 'bench_results.yaml'))
def __init__(self, threshold=None): """ Constructor """ self.dstore = dsy.DataStoreYAML() self.threshold = threshold
def _write_bench_data(self, benchmark_id): # pylint: disable=too-many-locals ''' Generates benchmarks results data Writes bench_results.yaml Args: benchmark_id (int): id of the benchmark Returns: (dict) mapping between Jube execution directories and result values ''' outpath = self.jube_files.get_bench_outputdir() benchmark_rundir = self.get_bench_rundir(benchmark_id, outpath) context_names, context = self._get_execution_context(benchmark_id) results, field_names = self._get_results(benchmark_rundir, context_names) scheduler_interface = slurmi.SlurmInterface() common_fields = [n for n in context_names if n in field_names] map_dir = {} for exec_id, values in context.items(): key_results = hashlib.md5(''.join([values[n] for n in common_fields]).encode('utf-8')) key = key_results.hexdigest() if key not in results: results[key] = 'failed' context[exec_id]['results_bench'] = results[key_results.hexdigest()] context[exec_id]['context_fields'] = common_fields exec_dir = "{}_execute".format(values['jube_wp_id'].zfill(6)) map_dir[exec_dir] = results[key_results.hexdigest()] job_file_path = os.path.join(values['jube_wp_abspath'], 'stdout') with open(job_file_path, 'r') as job_file: for line in job_file: re_result = re.findall(r'\d+', line) if re_result: job_id = re_result[0] values['job_id_ubench'] = job_id if scheduler_interface: job_info = scheduler_interface.get_job_info(job_id) if job_info: values.update(job_info[-1]) context[exec_id].update(values) break try: with open(os.path.join(benchmark_rundir, 'ubench.log'), 'r') as logf: field_pattern = re.compile('(.*) : (.*)') fields = field_pattern.findall(logf.read()) metadata = {name.strip():val.strip() for name, val in fields} except IOError: metadata = {'Benchmark_name': self.benchmark, 'Date' : time.strftime("%c"), 'Platform' : self.platform, 'Run_directory' : benchmark_rundir, 'cmdline' : 'Campaign'} bench_data = data_store_yaml.DataStoreYAML() self.results_file = os.path.join(benchmark_rundir, 'bench_results.yaml') bench_data.write(metadata, context, self.results_file) return map_dir
def write_report(self, output_dir, report_name): """ Write a report in output file according to report_writer metadata. """ required_fields = set(['tester','platform','date_start','date_end','dir','comment', \ 'result']) context_fields = set(['compare','compare_threshold','compare_comment','context','context_res']) report_files = {} session_list = [] # Get default parameters dictionnaries dic_sessions_default = ReportWriter._get_default_dic(self.metadata['sessions']) dic_contexts_default = ReportWriter._get_default_dic(self.metadata['contexts']) dic_benchmarks_default = ReportWriter._get_default_dic(self.metadata['benchmarks']) # Dictionnary to store main report data dic_report_main = {} # Required global parameters global_parameters = set(['author','title','version','introduction','conclusion']) for gp_key in global_parameters: if not gp_key: print("Warning: {} field is missing",gp_key) dic_report_main[gp_key] = '' else: dic_report_main[gp_key] = self.metadata[gp_key] dic_report_main['sessions'] = [] dic_report_main["benchmarks"] = [] if not os.path.exists(output_dir): try: os.makedirs(output_dir) except OSError: print("Error: cannot mkdir {}".format(output_dir)) return # Parse benchmarks for bench_item in self.metadata['benchmarks']: bench_name, bench_dic = ReportWriter._dic_to_tuple(bench_item) if bench_name == 'default': continue dic_report_main['benchmarks'].append(bench_name) common_dic_report_bench = {} common_dic_report_bench["benchmark_name"] = bench_name fields_to_find = required_fields.union(context_fields) dic_contexts = {} for ctx_el in self.metadata['contexts']: ctx_bench_name, ctx_dic = ReportWriter._dic_to_tuple(ctx_el) if ctx_bench_name == bench_name: dic_contexts = ctx_dic # Check context parameters ( same for all sessions) for r_field in context_fields.intersection(fields_to_find): if r_field in dic_contexts: common_dic_report_bench[r_field] = dic_contexts[r_field] elif r_field in dic_contexts_default: common_dic_report_bench[r_field] = dic_contexts_default[r_field] else: print("Please precise {} for benchmark {}".format(r_field, bench_name)) return for r_field in context_fields: fields_to_find.remove(r_field) context_in = (common_dic_report_bench['context'], common_dic_report_bench['context_res']) context_out = None date_interval_list = [] dir_list = [] # Parse sessions for session_item in self.metadata['sessions']: local_fields_to_find = fields_to_find.copy() session, dic_session = ReportWriter._dic_to_tuple(session_item) if session == 'default': continue if not session in dic_report_main['sessions']: dic_report_main['sessions'].append(session) session_list.append(session) fields_found = [] dic_report_bench = common_dic_report_bench.copy() # Check benchmark parameters for r_field in local_fields_to_find: if not bench_dic[session]: bench_dic[session]={} if r_field in bench_dic[session]: dic_report_bench[r_field] = bench_dic[session][r_field] fields_found.append(r_field) elif r_field in dic_benchmarks_default: dic_report_bench[r_field] = dic_benchmarks_default[r_field] fields_found.append(r_field) for r_field in fields_found: local_fields_to_find.remove(r_field) # Check session parameters for r_field in local_fields_to_find: if r_field in dic_session: dic_report_bench[r_field] = dic_session[r_field] elif r_field in dic_sessions_default: dic_report_bench[r_field] = dic_sessions_default[r_field] else: print("Please precise {} for benchmark {}".format(r_field, bench_name)) return # Get performance array dstore = dsy.DataStoreYAML() date_interval = (ReportWriter._read_date(dic_report_bench['date_start']), ReportWriter._read_date(dic_report_bench['date_end'])) date_interval_list.append(date_interval) dir_list.append(dic_report_bench['dir']) run_metadata, bench_dataframe, context_out, sub_bench \ = dstore._dir_to_pandas(dic_report_bench['dir'], bench_name, \ date_interval, context_in) if bench_dataframe.empty: print("Error : no value found for session {} and benchmark {}".\ format(session,bench_name)) return perf_array_list, sub_bench_list \ = self._get_perf_array(bench_dataframe, context_out, sub_bench) if sub_bench_list[0] == None: sub_bench_list[0] = bench_name # Complete benchmark informations if "cmdline" in run_metadata: dic_report_bench['cmdline'] = list(set(run_metadata['cmdline'])) else: dic_report_bench['cmdline'] = ["N/A"] dic_report_bench['perf_array_list'] = zip(perf_array_list, sub_bench_list) dic_report_bench['sub_bench_list'] = sub_bench_list dic_report_bench['ncols'] = len(perf_array_list[-1][-1]) # Write current benchmark report using a template out_filename = bench_name+"_"+session+".asc" if not session in report_files: report_files[session] = {} report_files[session][bench_name] = out_filename self.jinja_templated_write(dic_report_bench, self.bench_template,\ os.path.join(output_dir,out_filename)) # Write performance comparison across sessions if bool(dic_report_bench['compare']): if not 'compare' in report_files: report_files['compare'] = {} report_files['compare'][bench_name]\ = self.write_comparison(output_dir,bench_name, sub_bench, sub_bench_list, date_interval_list, dir_list, context_out,dic_report_bench['compare_threshold'], session_list) # Write full report dic_report_main['report_files'] = report_files self.jinja_templated_write(dic_report_main, self.report_template, \ os.path.join(output_dir,report_name+".asc"))
def add_session_to_report(self, benchmark_name, session_name, session_report, row_headers, column_headers, output_dir): """ Add to report a benchmark session. Args: benchmark_name: name of the benchmark session_name: name of the session session_report: dictionnary from which the report section concerning benchmark_name and session_name will be built. row_headers: labels used to identify rows in report column_headers: label used to to identify columns in report output_dir: report output directory Returns: TODO """ self.session_list.append(session_name) dstore = dsy.DataStoreYAML() date_interval = (Report._read_date(session_report['date_start']), Report._read_date(session_report['date_end'])) self.date_interval_list.append(date_interval) self.directory_list.append(session_report['dir']) context_out = None run_metadata, bench_dataframe, context_out, sub_bench \ = dstore.dir_to_pandas(session_report['dir'], benchmark_name, \ date_interval, (row_headers, column_headers)) if bench_dataframe.empty: print(("Error : no value found for session {} and benchmark {}".\ format(session_name, benchmark_name))) exit perf_array_list, sub_bench_list \ = self._get_perf_array(bench_dataframe, context_out, sub_bench) if sub_bench_list[0] == None: sub_bench_list[0] = benchmark_name # Complete benchmark informations if "cmdline" in run_metadata: session_report['cmdline'] = list(set(run_metadata['cmdline'])) else: session_report['cmdline'] = ["N/A"] session_report['perf_array_list'] = list( zip(perf_array_list, sub_bench_list)) session_report['sub_bench_list'] = sub_bench_list session_report['ncols'] = len(perf_array_list[-1][-1]) # Write current benchmark report using a template out_filename = benchmark_name + "_" + session_name + ".asc" if not session_name in self.report_files: self.report_files[session_name] = {} self.report_files[session_name][benchmark_name] = out_filename self.jinja_templated_write(session_report, self.bench_template, os.path.join(output_dir, out_filename)) return sub_bench, sub_bench_list, context_out