示例#1
0
    def translate_wlist_to_scheduler_wlist(self, w_list_arg):
        """ Translate ubench custom node list format to scheduler custome node list format

        Args:
            w_list_arg:
        """

        try:
            scheduler_interface = slurmi.SlurmInterface()
        except:  # pylint:disable=bare-except
            print("Error!! Unable to load slurm module")
            scheduler_interface = None
            return None

        w_list = []
        for element in w_list_arg:
            if element.isdigit():
                elem_tuple = (int(element), None)
                w_list.append(elem_tuple)
            else:
                elem_tuple = (
                    scheduler_interface.get_nnodes_from_string(element),
                    element)
                w_list.append(elem_tuple)

        return w_list
def test_available_nodes(mocker):
    """ docstring """

    mock_popen = mocker.patch(".".join(MOCK_UTILS + ["Popen"]),
                              side_effect=mockpopen)

    interface = slurm_i.SlurmInterface()
    node_list = interface.get_available_nodes(5)
    assert interface.get_available_nodes()
    assert len(node_list) == 3
    assert node_list[0] == "node[0006-0008,0020-0021]"
def test_job_status(mocker):
    """ docstring """

    mock_popen = mocker.patch(
        "ubench.scheduler_interfaces.slurm_interface.Popen",
        side_effect=mockpopen)

    interface = slurm_i.SlurmInterface()
    assert interface.get_jobs_state(['111', '222']) == {
        '175757': 'RUNNING',
        '26382': 'COMPLETED',
        '26938': 'COMPLETED'
    }
def test_job_status_cache(pytestconfig, mocker):
    """ we test memoize_disk decorator"""

    mock_popen = mocker.patch(
        "ubench.scheduler_interfaces.slurm_interface.Popen",
        side_effect=mockpopen)

    # cache files have a postfix based on the arguments used in the method's call
    md5_id = hashlib.md5(''.join(['222']).encode('utf-8')).hexdigest()
    cache_file = '/tmp/ubench_cache-{}-{}'.format(ubench.config.USER, md5_id)
    # the cache file exist if the test has failed before so we clean
    if os.path.isfile(cache_file):
        os.remove(cache_file)

    # we create a fake cache file:
    expected_results = {
        "175757": "RUNNING",
        "26382": "COMPLETED",
        "26938": "COMPLETED"
    }
    cache_contets = {'date': time.time(), 'data': expected_results}
    json.dump(cache_contets, open(cache_file, 'w'))

    interface = slurm_i.SlurmInterface()

    # we used cached values no slurm command is executed
    jobs_info = interface.get_jobs_state(['222'])

    assert not mock_popen.called
    assert jobs_info == expected_results

    # we invalidate cache
    cache = {'date': time.time() - 1000}
    json.dump(cache, open(cache_file, 'w'))
    jobs_info = interface.get_jobs_state(['111', '222'])
    # slurm command has been executed
    assert mock_popen.called
    def write_bench_data(self, benchmark_id):
        """ TBD

        Args:
            benchmark_id (int): benchmark number
        """
        # pylint: disable=too-many-locals, too-many-branches, too-many-statements

        try:
            scheduler_interface = slurmi.SlurmInterface()
        except:  # pylint: disable=bare-except
            print('Warning!! Unable to load Slurm module')  # pylint: disable=superfluous-parens
            scheduler_interface = None

        os.chdir(self.benchmark_path)
        output_dir = self.jube_xml_files.get_bench_outputdir()
        benchmark_rundir = self.get_bench_rundir(benchmark_id)
        jube_cmd = 'jube info ./{0} --id {1} --step execute'.format(
            output_dir, benchmark_id)

        cmd_output = tempfile.TemporaryFile()
        result_from_jube = Popen(jube_cmd,
                                 cwd=os.getcwd(),
                                 shell=True,
                                 stdout=cmd_output,
                                 universal_newlines=True)
        ret_code = result_from_jube.wait()  # pylint: disable=unused-variable

        cmd_output.flush()
        cmd_output.seek(0)
        results = {}
        workpackages = re.findall(r'Workpackages(.*?)\n{2,}',
                                  cmd_output.read().decode('utf-8'),
                                  re.DOTALL)[0]
        workdirs = {}
        regex_workdir = r'^\s+(\d+).*(' + re.escape(output_dir) + r'.*work).*'

        for package in workpackages.split('\n'):
            temp_match = re.match(regex_workdir, package)
            if temp_match:
                id_workpackage = temp_match.group(1)
                path_workpackage = temp_match.group(2)
                workdirs[id_workpackage] = path_workpackage

        cmd_output.seek(0)
        parameterization = re.findall(r'ID:(.*?)(?=\n{3,}|\sID)',
                                      cmd_output.read().decode('utf-8') + '\n',
                                      re.DOTALL)
        for execution_step in parameterization:
            id_step = [x.strip() for x in execution_step.split('\n')][0]
            param_step = [x.strip() for x in execution_step.split('\n')][1:]
            results[id_step] = {}

            for parameter in param_step:
                temp_match = re.match(r'^\S+:', parameter)
                if temp_match:
                    value = parameter.replace(temp_match.group(0), '')
                    param = temp_match.group(0).replace(':', '')
                    results[id_step][param] = value.strip()

        cmd_output.close()

        for key, value in list(results.items()):
            result_file_path = os.path.join(benchmark_rundir,
                                            'result/ubench_results.dat')

            # We add the part of results which corresponds to a given execute
            with open(result_file_path) as csvfile:
                reader = csv.DictReader(csvfile)

                field_names = reader.fieldnames
                common_fields = list(set(value.keys()) & set(field_names))
                result_fields = list(set(field_names) - set(common_fields))
                temp_hash = {}

                for field in result_fields:
                    temp_hash[field] = []

                for row in reader:
                    add_to_results = True
                    for field in common_fields:
                        if value[field] != row[field]:
                            add_to_results = False
                            break
                    if add_to_results:
                        for field in result_fields:
                            temp_hash[field].append(row[field])

                # When there is just value we transform the array in one value
                for field in result_fields:

                    if len(temp_hash[field]) == 1:
                        temp_hash[field] = temp_hash[field][0]

                results[key]['results_bench'] = temp_hash
                results[key]['context_fields'] = common_fields

            # Add job information to step execute
            job_file_path = os.path.join(workdirs[key], 'stdout')
            job_id = 0

            with open(job_file_path, 'r') as job_file:
                for line in job_file:
                    re_result = re.findall(r'\d+', line)
                    if re_result:
                        job_id = re_result[0]
                        value['job_id_ubench'] = job_id
                        if scheduler_interface:
                            job_info = scheduler_interface.get_job_info(job_id)
                            if job_info:
                                value.update(job_info[-1])
                                results[key].update(value)
                        break

        # Add metadata present on ubench.log
        field_pattern = re.compile('(.*) : (.*)')

        try:
            log_file = open(os.path.join(benchmark_rundir, 'ubench.log'), 'r')
        except IOError:
            print('Warning!! file ubench log was not found.' +
                  'Benchmark data result could not be created')
            return

        metadata = {}
        fields = field_pattern.findall(log_file.read())

        for field in fields:
            metadata[field[0].strip()] = field[1].strip()

        bench_data = data_store_yaml.DataStoreYAML()
        bench_data.write(metadata, results,
                         os.path.join(benchmark_rundir, 'bench_results.yaml'))
    def translate_wlist_to_scheduler_wlist(self, w_list_arg):
        """
        Translate ubench custom node list format to scheduler custome node list format
        TODO determine scheduler_interface from platform data.
        """
        try:
            scheduler_interface = slurmi.SlurmInterface()
        except:
            print "Warning!! Unable to load slurm module"
            scheduler_interface = None
            return

        w_list = list(w_list_arg)
        for sub_wlist in w_list:
            sub_wlist_temp = list(sub_wlist)
            stride = 0
        for idx, welem in enumerate(sub_wlist_temp):
            # Manage the all keyword that is meant to launch benchmarks on evry idle node
            catch = re.search(r'^all(\d+)$', str(welem))
            idxn = idx + stride
            if catch:
                slice_size = int(catch.group(1))
                available_nodes_list = scheduler_interface.get_available_nodes(
                    slice_size)
                njobs = len(available_nodes_list)
                sub_wlist[idxn:idxn + 1] = zip([slice_size] * njobs,
                                               available_nodes_list)
                stride += njobs - 1
            else:
                # Manage the cn[10,13-17] notation
                catch = re.search(r'^(\D+.*)$', str(welem))
                if catch:
                    nnodes_list = [
                        scheduler_interface.get_nnodes_from_string(
                            catch.group(1))
                    ]
                    nodes_list = [catch.group(1)]
                    sub_wlist[idxn:idxn + 1] = zip(nnodes_list, nodes_list)
                else:
                    # Manage the 2,4 notation that is needed to launch jobs
                    # without defined node targets.
                    catch = re.search(r'^([\d+,]*)([\d]+)$', str(welem))
                    if catch:
                        nnodes_list = [
                            int(x) for x in re.split(',', str(welem))
                        ]
                        sub_wlist[idxn:idxn + 1] = zip(nnodes_list, [None] *
                                                       len(nnodes_list))
                        stride += len(nnodes_list) - 1
                    else:
                        # Manage the 2,4,cn[200-205] notation that is used
                        # to get cn[200-201] cn[200-203]
                        catch = re.search(r'^([\d+,]*[\d+]),(.*)$', str(welem))
                        if catch:
                            nnodes_list = [
                                int(x) for x in re.split(',', catch.group(1))
                            ]
                            nodes_list = str(catch.group(2))
                            sub_wlist[idxn:idxn+1]\
                                = zip(nnodes_list, \
                                      scheduler_interface.\
                                      get_truncated_nodes_lists(nnodes_list, nodes_list))

                            stride += len(nnodes_list) - 1
                        else:
                            raise Exception(
                                str(welem) + 'format is not correct')

        # Flatten the w_list
        w_list = [item for sublist in w_list for item in sublist]
        return w_list
def test_emptylist():
    """ docstring """
    interface = slurm_i.SlurmInterface()
    assert not interface.get_available_nodes()
示例#8
0
    def run(self, opts): # pylint: disable=arguments-differ
        """ Run benchmark on a given platform and write a ubench.log file in
        the benchmark run directory.

        Args:
            opt_dict (dict): dictionary with the options sent to unclebench
        """
        # pylint: disable=dangerous-default-value, too-many-locals, too-many-branches
        self._init_run_dir(self.platform)

        if not opts['foreground']:
            print('---- Launching benchmark in background')

        try:
            # run_dir, ID, updated_params = self.benchmarking_api.run(opts)
            j_job, updated_params = self.benchmarking_api.run(opts)
        except (RuntimeError, OSError) as rerror:
            print('---- Error launching benchmark :')
            print(str(rerror))
            raise

        for name, old_value, new_value in updated_params:
            print('---- {0} parameter was modified from {1} to {2} for this run'.format(name,
                                                                                        old_value,
                                                                                        new_value))

        print("---- benchmark run directory: {}".format(j_job.result_path))
        logfile_path = os.path.join(j_job.result_path, 'ubench.log')
        date = time.strftime("%c")

        with open(logfile_path, 'w') as logfile:
            logfile.write('Benchmark_name  : {0} \n'.format(self.benchmark))
            logfile.write('Platform        : {0} \n'.format(self.platform))
            logfile.write('ID              : {0} \n'.format(j_job.jubeid))
            logfile.write('Date            : {0} \n'.format(date))
            logfile.write('Run_directory   : {0} \n'.format(j_job.result_path))
            if 'raw_cli' in opts:
                logfile.write('cmdline         : {0} \n'.format(' '.join(opts['raw_cli'])))

        print("---- Use the following command to follow benchmark progress: "\
              "ubench log -p {0} -b {1} -i {2}".format(self.platform,
                                                       self.benchmark, j_job.jubeid))
        if opts['foreground']:

            print('---- Waiting benchmark to finish running')
            #waiting for compilation
            while j_job.jube_returncode is None:
                time.sleep(5)
                print('---- Waiting benchmark compilation')

            #waiting for jobs execution
            job_ids = j_job.job_ids
            if not job_ids:
                print("Error: No job ids found")

            scheduler_interface = slurmi.SlurmInterface()
            job_states = ['RUNNING']
            finish_states = ['COMPLETED', 'FAILED', 'CANCELLED']
            while job_states:
                job_req = scheduler_interface.get_jobs_state(job_ids)
                # failing jobs
                failed = [job_n for job_n, job_s in job_req.items() if job_s == 'FAILED']
                if failed:
                    for job_n in failed:
                        print("Job {} has failed".format(job_n))

                job_states = [job_s for job_s in job_req.values() if job_s not in finish_states]
                if job_states:
                    print("Wating for jobs id: {}".format(",".join(job_req.keys())))
                    time.sleep(60)

            print('---- All jobs or processes in background have finished')
    def _write_bench_data(self, benchmark_id): # pylint: disable=too-many-locals
        ''' Generates benchmarks results data

        Writes bench_results.yaml

        Args:
            benchmark_id (int): id of the benchmark

        Returns:
            (dict) mapping between Jube execution directories and result values
        '''
        outpath = self.jube_files.get_bench_outputdir()
        benchmark_rundir = self.get_bench_rundir(benchmark_id, outpath)
        context_names, context = self._get_execution_context(benchmark_id)
        results, field_names = self._get_results(benchmark_rundir, context_names)
        scheduler_interface = slurmi.SlurmInterface()
        common_fields = [n for n in context_names if n in field_names]
        map_dir = {}
        for exec_id, values in context.items():
            key_results = hashlib.md5(''.join([values[n] for n in common_fields]).encode('utf-8'))

            key = key_results.hexdigest()
            if key not in results:
                results[key] = 'failed'

            context[exec_id]['results_bench'] = results[key_results.hexdigest()]
            context[exec_id]['context_fields'] = common_fields
            exec_dir = "{}_execute".format(values['jube_wp_id'].zfill(6))
            map_dir[exec_dir] = results[key_results.hexdigest()]
            job_file_path = os.path.join(values['jube_wp_abspath'], 'stdout')

            with  open(job_file_path, 'r') as job_file:
                for line in job_file:
                    re_result = re.findall(r'\d+', line)
                    if re_result:
                        job_id = re_result[0]
                        values['job_id_ubench'] = job_id
                        if scheduler_interface:
                            job_info = scheduler_interface.get_job_info(job_id)
                            if job_info:
                                values.update(job_info[-1])
                                context[exec_id].update(values)
                        break


        try:

            with open(os.path.join(benchmark_rundir, 'ubench.log'), 'r') as logf:
                field_pattern = re.compile('(.*) : (.*)')
                fields = field_pattern.findall(logf.read())
                metadata = {name.strip():val.strip() for name, val in fields}

        except IOError:
            metadata = {'Benchmark_name': self.benchmark,
                        'Date' : time.strftime("%c"),
                        'Platform' : self.platform,
                        'Run_directory' : benchmark_rundir,
                        'cmdline' : 'Campaign'}


        bench_data = data_store_yaml.DataStoreYAML()
        self.results_file = os.path.join(benchmark_rundir, 'bench_results.yaml')
        bench_data.write(metadata, context, self.results_file)

        return map_dir