示例#1
0
    def work_get(getter, uid):

        data[uid] = list()
        done = False
        while not done:
            msgs = getter.get()
            for msg in msgs:
                msg = ru.as_string(msg)
                if msg['idx'] is None:
                    done = True
                    break
                data[uid].append(msg['src'])
示例#2
0
    def work_get(getter, uid):

        data[uid] = list()
        done = False
        n = 0
        while not done:
            msgs = getter.get()
            for msg in msgs:
                msg = ru.as_string(msg)
                if msg['idx'] is None:
                    final = True
                    done = True
                else:
                    data[uid].append(msg['src'])
                    n += 1

        getter.stop()
示例#3
0
    def rm_config_hook(cls, name, cfg, rm, logger, profiler):

        profiler.prof('flux_start')

        flux = ru.which('flux')
        if not flux:
            raise Exception("Couldn't find flux")

        try:
            import sys
            print(sys.path)
            import flux
        except:
            raise Exception("Couldn't import flux")

    # cmd  = 'flux start -o,-v,-S,log-filename=out'.split()
    # proc = sp.Popen(cmd, stdin=sp.PIPE, stdout=sp.PIPE, stderr=sp.STDOUT)
    # proc.stdin.write(ru.as_bytes('flux getattr local-uri\necho "OK"\n'))

        check = 'flux env; echo "OK"; while true; do echo "ok"; sleep 1; done'
        start = 'flux start -o,-v,-S,log-filename=out'
        cmd = '/bin/bash -c "echo \\\"%s\\\" | %s"' % (check, start)
        proc = sp.Popen(cmd,
                        shell=True,
                        stdin=sp.PIPE,
                        stdout=sp.PIPE,
                        stderr=sp.STDOUT)

        flux_env = dict()
        while True:

            line = ru.as_string(proc.stdout.readline().strip())
            logger.debug('=== %s', line)

            if line.startswith('export '):
                k, v = line.split(' ', 1)[1].strip().split('=', 1)
                flux_env[k] = v.strip('"')
                logger.debug('%s = %s' % (k, v.strip('"')))

            elif line == 'OK':
                break

        assert ('FLUX_URI' in flux_env)

        # TODO check perf implications
        flux_url = ru.Url(flux_env['FLUX_URI'])
        flux_url.host = ru.get_hostname()
        flux_url.scheme = 'ssh'
        flux_env['FLUX_URI'] = str(flux_url)

        profiler.prof('flux_started')

        # ----------------------------------------------------------------------
        def _watch_flux(flux_env):

            logger.info('=== starting flux watcher')

            for k, v in flux_env.items():
                os.environ[k] = v

            ret = None
            while not ret:

                out, err, ret = ru.sh_callout('flux ping -c 1 all')
                logger.debug('=== flux watcher out: %s', out)

                if ret:
                    logger.error('=== flux watcher err: %s', err)
                    break

                time.sleep(0.1)

            logger.info('flux stopped?')
            # FIXME: trigger termination

        # ----------------------------------------------------------------------

        flux_watcher = mt.Thread(target=_watch_flux, args=[flux_env])
        flux_watcher.daemon = True
        flux_watcher.start()

        logger.info("flux startup successful: [%s]", flux_env['FLUX_URI'])

        lm_info = {'flux_env': flux_env, 'flux_pid': proc.pid}

        return lm_info
示例#4
0
    def _parse_pbspro_vnodes(self):

        # PBS Job ID
        val = os.environ.get('PBS_JOBID')
        if val:
            pbspro_jobid = val
        else:
            msg = "$PBS_JOBID not set!"
            self._log.error(msg)
            raise RuntimeError(msg)

        # Get the output of qstat -f for this job
        output = subprocess.check_output(["qstat", "-f", pbspro_jobid])

        # Get the (multiline) 'exec_vnode' entry
        vnodes_str = ''
        for line in output.splitlines():
            line = ru.as_string(line)
            # Detect start of entry
            if 'exec_vnode = ' in line:
                vnodes_str += line.strip()
            elif vnodes_str:
                # Find continuing lines
                if " = " not in line:
                    vnodes_str += line.strip()
                else:
                    break

        # Get the RHS of the entry
        rhs = vnodes_str.split('=',1)[1].strip()
        self._log.debug("input: %s", rhs)

        nodes_list = []
        # Break up the individual node partitions into vnode slices
        while True:
            idx = rhs.find(')+(')

            node_str = rhs[1:idx]
            nodes_list.append(node_str)
            rhs = rhs[idx + 2:]

            if idx < 0:
                break

        vnodes_list = []
        cpus_list = []
        # Split out the slices into vnode name and cpu count
        for node_str in nodes_list:
            slices = node_str.split('+')
            for _slice in slices:
                vnode, cpus = _slice.split(':')
                cpus = int(cpus.split('=')[1])
                self._log.debug("vnode: %s cpus: %s", vnode, cpus)
                vnodes_list.append(vnode)
                cpus_list.append(cpus)

        self._log.debug("vnodes: %s", vnodes_list)
        self._log.debug("cpus: %s", cpus_list)

        cpus_list = list(set(cpus_list))
        min_cpus = int(min(cpus_list))

        if len(cpus_list) > 1:
            self._log.debug("Detected vnodes of different sizes: %s, the minimal is: %d.", cpus_list, min_cpus)

        node_list = []
        for vnode in vnodes_list:
            node_list.append(vnode)

        # only unique node names
        node_list = list(set(node_list))
        self._log.debug("Node list: %s", node_list)

        # Return the list of node names
        return node_list
示例#5
0
    def _handle_unit_stdio(self, unit):

        sandbox = unit['unit_sandbox_path']
        uid     = unit['uid']

        self._prof.prof('staging_stdout_start', uid=uid)

        # TODO: disable this at scale?
        if unit.get('stdout_file') and os.path.isfile(unit['stdout_file']):
            with open(unit['stdout_file'], 'r') as stdout_f:
                try:
                    txt = ru.as_string(stdout_f.read())
                except UnicodeDecodeError:
                    txt = "unit stdout is binary -- use file staging"

                unit['stdout'] += rpu.tail(txt)

        self._prof.prof('staging_stdout_stop',  uid=uid)
        self._prof.prof('staging_stderr_start', uid=uid)

        # TODO: disable this at scale?
        if unit.get('stderr_file') and os.path.isfile(unit['stderr_file']):
            with open(unit['stderr_file'], 'r') as stderr_f:
                try:
                    txt = ru.as_string(stderr_f.read())
                except UnicodeDecodeError:
                    txt = "unit stderr is binary -- use file staging"

                unit['stderr'] += rpu.tail(txt)

            # to help with ID mapping, also parse for PRTE output:
            # [batch3:122527] JOB [3673,4] EXECUTING
            with open(unit['stderr_file'], 'r') as stderr_f:

                for line in stderr_f.readlines():
                    line = line.strip()
                    if not line:
                        continue
                    if line[0] == '[' and line.endswith('EXECUTING'):
                        elems = line.replace('[', '').replace(']', '').split()
                        tid   = elems[2]
                        self._log.info('PRTE IDMAP: %s:%s' % (tid, uid))

                unit['stderr'] += rpu.tail(txt)

        self._prof.prof('staging_stderr_stop', uid=uid)
        self._prof.prof('staging_uprof_start', uid=uid)

        unit_prof = "%s/%s.prof" % (sandbox, uid)
        if os.path.isfile(unit_prof):
            try:
                with open(unit_prof, 'r') as prof_f:
                    txt = ru.as_string(prof_f.read())
                    for line in txt.split("\n"):
                        if line:
                            ts, event, comp, tid, _uid, state, msg = \
                                                                 line.split(',')
                            self._prof.prof(ts=float(ts), event=event,
                                            comp=comp, tid=tid, uid=_uid,
                                            state=state, msg=msg)
            except Exception as e:
                self._log.error("Pre/Post profile read failed: `%s`" % e)

        self._prof.prof('staging_uprof_stop', uid=uid)