Exemplo n.º 1
0
    def __init__(self, threshold=None):
        """ Class constructor

        Args:
            threshold
        """

        self.dstore = dsy.DataStoreYAML()
        self.threshold = threshold
    def write_bench_data(self, benchmark_id):
        """ TBD

        Args:
            benchmark_id (int): benchmark number
        """
        # pylint: disable=too-many-locals, too-many-branches, too-many-statements

        try:
            scheduler_interface = slurmi.SlurmInterface()
        except:  # pylint: disable=bare-except
            print('Warning!! Unable to load Slurm module')  # pylint: disable=superfluous-parens
            scheduler_interface = None

        os.chdir(self.benchmark_path)
        output_dir = self.jube_xml_files.get_bench_outputdir()
        benchmark_rundir = self.get_bench_rundir(benchmark_id)
        jube_cmd = 'jube info ./{0} --id {1} --step execute'.format(
            output_dir, benchmark_id)

        cmd_output = tempfile.TemporaryFile()
        result_from_jube = Popen(jube_cmd,
                                 cwd=os.getcwd(),
                                 shell=True,
                                 stdout=cmd_output,
                                 universal_newlines=True)
        ret_code = result_from_jube.wait()  # pylint: disable=unused-variable

        cmd_output.flush()
        cmd_output.seek(0)
        results = {}
        workpackages = re.findall(r'Workpackages(.*?)\n{2,}',
                                  cmd_output.read().decode('utf-8'),
                                  re.DOTALL)[0]
        workdirs = {}
        regex_workdir = r'^\s+(\d+).*(' + re.escape(output_dir) + r'.*work).*'

        for package in workpackages.split('\n'):
            temp_match = re.match(regex_workdir, package)
            if temp_match:
                id_workpackage = temp_match.group(1)
                path_workpackage = temp_match.group(2)
                workdirs[id_workpackage] = path_workpackage

        cmd_output.seek(0)
        parameterization = re.findall(r'ID:(.*?)(?=\n{3,}|\sID)',
                                      cmd_output.read().decode('utf-8') + '\n',
                                      re.DOTALL)
        for execution_step in parameterization:
            id_step = [x.strip() for x in execution_step.split('\n')][0]
            param_step = [x.strip() for x in execution_step.split('\n')][1:]
            results[id_step] = {}

            for parameter in param_step:
                temp_match = re.match(r'^\S+:', parameter)
                if temp_match:
                    value = parameter.replace(temp_match.group(0), '')
                    param = temp_match.group(0).replace(':', '')
                    results[id_step][param] = value.strip()

        cmd_output.close()

        for key, value in list(results.items()):
            result_file_path = os.path.join(benchmark_rundir,
                                            'result/ubench_results.dat')

            # We add the part of results which corresponds to a given execute
            with open(result_file_path) as csvfile:
                reader = csv.DictReader(csvfile)

                field_names = reader.fieldnames
                common_fields = list(set(value.keys()) & set(field_names))
                result_fields = list(set(field_names) - set(common_fields))
                temp_hash = {}

                for field in result_fields:
                    temp_hash[field] = []

                for row in reader:
                    add_to_results = True
                    for field in common_fields:
                        if value[field] != row[field]:
                            add_to_results = False
                            break
                    if add_to_results:
                        for field in result_fields:
                            temp_hash[field].append(row[field])

                # When there is just value we transform the array in one value
                for field in result_fields:

                    if len(temp_hash[field]) == 1:
                        temp_hash[field] = temp_hash[field][0]

                results[key]['results_bench'] = temp_hash
                results[key]['context_fields'] = common_fields

            # Add job information to step execute
            job_file_path = os.path.join(workdirs[key], 'stdout')
            job_id = 0

            with open(job_file_path, 'r') as job_file:
                for line in job_file:
                    re_result = re.findall(r'\d+', line)
                    if re_result:
                        job_id = re_result[0]
                        value['job_id_ubench'] = job_id
                        if scheduler_interface:
                            job_info = scheduler_interface.get_job_info(job_id)
                            if job_info:
                                value.update(job_info[-1])
                                results[key].update(value)
                        break

        # Add metadata present on ubench.log
        field_pattern = re.compile('(.*) : (.*)')

        try:
            log_file = open(os.path.join(benchmark_rundir, 'ubench.log'), 'r')
        except IOError:
            print('Warning!! file ubench log was not found.' +
                  'Benchmark data result could not be created')
            return

        metadata = {}
        fields = field_pattern.findall(log_file.read())

        for field in fields:
            metadata[field[0].strip()] = field[1].strip()

        bench_data = data_store_yaml.DataStoreYAML()
        bench_data.write(metadata, results,
                         os.path.join(benchmark_rundir, 'bench_results.yaml'))
Exemplo n.º 3
0
 def __init__(self, threshold=None):
     """ Constructor """
     self.dstore = dsy.DataStoreYAML()
     self.threshold = threshold
Exemplo n.º 4
0
    def _write_bench_data(self, benchmark_id): # pylint: disable=too-many-locals
        ''' Generates benchmarks results data

        Writes bench_results.yaml

        Args:
            benchmark_id (int): id of the benchmark

        Returns:
            (dict) mapping between Jube execution directories and result values
        '''
        outpath = self.jube_files.get_bench_outputdir()
        benchmark_rundir = self.get_bench_rundir(benchmark_id, outpath)
        context_names, context = self._get_execution_context(benchmark_id)
        results, field_names = self._get_results(benchmark_rundir, context_names)
        scheduler_interface = slurmi.SlurmInterface()
        common_fields = [n for n in context_names if n in field_names]
        map_dir = {}
        for exec_id, values in context.items():
            key_results = hashlib.md5(''.join([values[n] for n in common_fields]).encode('utf-8'))

            key = key_results.hexdigest()
            if key not in results:
                results[key] = 'failed'

            context[exec_id]['results_bench'] = results[key_results.hexdigest()]
            context[exec_id]['context_fields'] = common_fields
            exec_dir = "{}_execute".format(values['jube_wp_id'].zfill(6))
            map_dir[exec_dir] = results[key_results.hexdigest()]
            job_file_path = os.path.join(values['jube_wp_abspath'], 'stdout')

            with  open(job_file_path, 'r') as job_file:
                for line in job_file:
                    re_result = re.findall(r'\d+', line)
                    if re_result:
                        job_id = re_result[0]
                        values['job_id_ubench'] = job_id
                        if scheduler_interface:
                            job_info = scheduler_interface.get_job_info(job_id)
                            if job_info:
                                values.update(job_info[-1])
                                context[exec_id].update(values)
                        break


        try:

            with open(os.path.join(benchmark_rundir, 'ubench.log'), 'r') as logf:
                field_pattern = re.compile('(.*) : (.*)')
                fields = field_pattern.findall(logf.read())
                metadata = {name.strip():val.strip() for name, val in fields}

        except IOError:
            metadata = {'Benchmark_name': self.benchmark,
                        'Date' : time.strftime("%c"),
                        'Platform' : self.platform,
                        'Run_directory' : benchmark_rundir,
                        'cmdline' : 'Campaign'}


        bench_data = data_store_yaml.DataStoreYAML()
        self.results_file = os.path.join(benchmark_rundir, 'bench_results.yaml')
        bench_data.write(metadata, context, self.results_file)

        return map_dir
Exemplo n.º 5
0
    def write_report(self, output_dir, report_name):
        """
        Write a report in output file according to report_writer metadata.
        """
        required_fields = set(['tester','platform','date_start','date_end','dir','comment', \
                               'result'])
        context_fields = set(['compare','compare_threshold','compare_comment','context','context_res'])
        report_files = {}
        session_list = []

        # Get default parameters dictionnaries
        dic_sessions_default = ReportWriter._get_default_dic(self.metadata['sessions'])
        dic_contexts_default = ReportWriter._get_default_dic(self.metadata['contexts'])
        dic_benchmarks_default = ReportWriter._get_default_dic(self.metadata['benchmarks'])


        # Dictionnary to store main report data
        dic_report_main = {}
        # Required global parameters
        global_parameters = set(['author','title','version','introduction','conclusion'])
        for gp_key in global_parameters:
            if not gp_key:
                print("Warning: {} field is missing",gp_key)
                dic_report_main[gp_key] = ''
            else:
                dic_report_main[gp_key] = self.metadata[gp_key]
        dic_report_main['sessions'] = []
        dic_report_main["benchmarks"] = []

        if not os.path.exists(output_dir):
            try:
                os.makedirs(output_dir)
            except OSError:
                print("Error: cannot mkdir {}".format(output_dir))
                return

        # Parse benchmarks
        for bench_item in self.metadata['benchmarks']:

            bench_name, bench_dic = ReportWriter._dic_to_tuple(bench_item)

            if bench_name == 'default':
                continue
            dic_report_main['benchmarks'].append(bench_name)

            common_dic_report_bench = {}
            common_dic_report_bench["benchmark_name"] = bench_name
            fields_to_find = required_fields.union(context_fields)

            dic_contexts = {}
            for ctx_el in self.metadata['contexts']:
                ctx_bench_name, ctx_dic = ReportWriter._dic_to_tuple(ctx_el)
                if ctx_bench_name == bench_name:
                    dic_contexts = ctx_dic

            # Check context parameters ( same for all sessions)
            for r_field in context_fields.intersection(fields_to_find):
                if r_field in dic_contexts:
                    common_dic_report_bench[r_field] = dic_contexts[r_field]
                elif r_field in dic_contexts_default:
                    common_dic_report_bench[r_field] = dic_contexts_default[r_field]
                else:
                    print("Please precise {} for benchmark {}".format(r_field, bench_name))
                    return

            for r_field in context_fields:
                fields_to_find.remove(r_field)

            context_in = (common_dic_report_bench['context'], common_dic_report_bench['context_res'])
            context_out = None
            date_interval_list = []
            dir_list = []
            # Parse sessions
            for session_item in self.metadata['sessions']:

                local_fields_to_find = fields_to_find.copy()

                session, dic_session = ReportWriter._dic_to_tuple(session_item)

                if session == 'default':
                    continue
                if not session in dic_report_main['sessions']:
                    dic_report_main['sessions'].append(session)
                    session_list.append(session)

                fields_found = []
                dic_report_bench = common_dic_report_bench.copy()

                # Check benchmark parameters
                for r_field in local_fields_to_find:
                    if not bench_dic[session]:
                        bench_dic[session]={}
                    if r_field in bench_dic[session]:
                        dic_report_bench[r_field] = bench_dic[session][r_field]
                        fields_found.append(r_field)
                    elif r_field in dic_benchmarks_default:
                        dic_report_bench[r_field] = dic_benchmarks_default[r_field]
                        fields_found.append(r_field)

                for r_field in fields_found:
                    local_fields_to_find.remove(r_field)

                # Check session parameters
                for r_field in local_fields_to_find:
                    if r_field in dic_session:
                        dic_report_bench[r_field] = dic_session[r_field]
                    elif r_field in dic_sessions_default:
                        dic_report_bench[r_field] = dic_sessions_default[r_field]
                    else:
                        print("Please precise {} for benchmark {}".format(r_field, bench_name))
                        return

                # Get performance array
                dstore = dsy.DataStoreYAML()
                date_interval = (ReportWriter._read_date(dic_report_bench['date_start']),
                                  ReportWriter._read_date(dic_report_bench['date_end']))

                date_interval_list.append(date_interval)
                dir_list.append(dic_report_bench['dir'])

                run_metadata, bench_dataframe, context_out, sub_bench \
                    = dstore._dir_to_pandas(dic_report_bench['dir'], bench_name, \
                                            date_interval, context_in)

                if bench_dataframe.empty:
                    print("Error : no value found for session {} and benchmark {}".\
                          format(session,bench_name))
                    return

                perf_array_list, sub_bench_list \
                    = self._get_perf_array(bench_dataframe, context_out, sub_bench)

                if sub_bench_list[0] == None:
                    sub_bench_list[0] = bench_name

                # Complete benchmark informations
                if "cmdline" in run_metadata:
                    dic_report_bench['cmdline'] = list(set(run_metadata['cmdline']))
                else:
                    dic_report_bench['cmdline'] = ["N/A"]
                dic_report_bench['perf_array_list'] = zip(perf_array_list, sub_bench_list)
                dic_report_bench['sub_bench_list'] = sub_bench_list
                dic_report_bench['ncols'] = len(perf_array_list[-1][-1])

                # Write current benchmark report using a template
                out_filename = bench_name+"_"+session+".asc"

                if not session in report_files:
                    report_files[session] = {}
                report_files[session][bench_name] = out_filename
                self.jinja_templated_write(dic_report_bench, self.bench_template,\
                                           os.path.join(output_dir,out_filename))

            # Write performance comparison across sessions
            if bool(dic_report_bench['compare']):
                if not 'compare' in report_files:
                    report_files['compare'] = {}

                report_files['compare'][bench_name]\
                    = self.write_comparison(output_dir,bench_name, sub_bench, sub_bench_list,
                                            date_interval_list, dir_list,
                                            context_out,dic_report_bench['compare_threshold'],
                                            session_list)

        # Write full report
        dic_report_main['report_files'] = report_files
        self.jinja_templated_write(dic_report_main, self.report_template, \
                                   os.path.join(output_dir,report_name+".asc"))
Exemplo n.º 6
0
    def add_session_to_report(self, benchmark_name, session_name,
                              session_report, row_headers, column_headers,
                              output_dir):
        """
        Add to report a benchmark session.

        Args:
            benchmark_name: name of the benchmark
            session_name: name of the session
            session_report: dictionnary from which the report section
                            concerning benchmark_name and session_name
                            will be built.
            row_headers: labels used to identify rows in report
            column_headers: label used to to identify columns in report
            output_dir: report output directory

        Returns:
            TODO
        """
        self.session_list.append(session_name)

        dstore = dsy.DataStoreYAML()
        date_interval = (Report._read_date(session_report['date_start']),
                         Report._read_date(session_report['date_end']))
        self.date_interval_list.append(date_interval)
        self.directory_list.append(session_report['dir'])

        context_out = None
        run_metadata, bench_dataframe, context_out, sub_bench \
            = dstore.dir_to_pandas(session_report['dir'], benchmark_name, \
                                   date_interval, (row_headers, column_headers))

        if bench_dataframe.empty:
            print(("Error : no value found for session {} and benchmark {}".\
                   format(session_name, benchmark_name)))
            exit

        perf_array_list, sub_bench_list \
            = self._get_perf_array(bench_dataframe, context_out, sub_bench)

        if sub_bench_list[0] == None:
            sub_bench_list[0] = benchmark_name

        # Complete benchmark informations
        if "cmdline" in run_metadata:
            session_report['cmdline'] = list(set(run_metadata['cmdline']))
        else:
            session_report['cmdline'] = ["N/A"]
        session_report['perf_array_list'] = list(
            zip(perf_array_list, sub_bench_list))
        session_report['sub_bench_list'] = sub_bench_list
        session_report['ncols'] = len(perf_array_list[-1][-1])

        # Write current benchmark report using a template
        out_filename = benchmark_name + "_" + session_name + ".asc"

        if not session_name in self.report_files:
            self.report_files[session_name] = {}

        self.report_files[session_name][benchmark_name] = out_filename
        self.jinja_templated_write(session_report, self.bench_template,
                                   os.path.join(output_dir, out_filename))

        return sub_bench, sub_bench_list, context_out