示例#1
0
    def knc(self):
        """Returns True when testing KNC (Xeon Phi).

        :rtype: bool
        :returns: True when testing KNC
        """
        return chpl_arch.get('target') == 'knc'
示例#2
0
def get():
    substrate_val = os.environ.get('CHPL_COMM_SUBSTRATE')
    if not substrate_val:
        comm_val = chpl_comm.get()
        platform_val = chpl_platform.get('target')
        arch_val = chpl_arch.get('target')

        if comm_val == 'gasnet':
            if platform_val == 'cray-xt':
                substrate_val = 'mpi'
            elif platform_val == 'cray-xe':
                substrate_val = 'gemini'
            elif platform_val == 'cray-xk':
                substrate_val = 'gemini'
            elif platform_val == 'cray-xc':
                if arch_val == 'knc':
                    substrate_val = 'mpi'
                else:
                    substrate_val = 'aries'
            elif platform_val == 'marenostrum':
                substrate_val = 'udp'
            elif platform_val == 'pwr5':
                substrate_val = 'lapi'
            elif platform_val == 'pwr6':
                substrate_val = 'ibv'
            else:
                substrate_val = 'udp'
        else:
            substrate_val = 'none'
    return substrate_val
示例#3
0
    def knl(self):
        """Returns True when testing KNL (Xeon Phi).

        :rtype: bool
        :returns: True when testing KNL
        """
        return chpl_arch.get('target').arch == 'mic-knl'
示例#4
0
def default_uniq_cfg_path():
    arch_val = chpl_arch.get('target', map_to_compiler=True,
                             get_lcd=using_chapel_module()).arch
    return '{0}-{1}-{2}-{3}'.format(chpl_platform.get('target'),
                                    chpl_compiler.get('target'),
                                    arch_val,
                                    chpl_lib_pic.get())
示例#5
0
def get(flag='host'):
    if flag == 'host':
        mem_val = 'cstdlib'
    elif flag == 'target':
        mem_val = os.environ.get('CHPL_MEM')
        if not mem_val:
            comm_val = chpl_comm.get()
            platform_val = chpl_platform.get('host')
            arch_val = chpl_arch.get('target', get_lcd=True)
            tcmallocCompat = ["gnu", "clang", "intel"]

            # true if tcmalloc is compatible with the target compiler
            #if (not (platform_val == 'cray-xc' and arch_val == 'knc') and
            #        (not platform_val.startswith("cygwin")) and
            #        any(sub in chpl_compiler.get('target') for sub in tcmallocCompat)):
            #    return 'tcmalloc'
            if comm_val == 'gasnet':
                segment_val = chpl_comm_segment.get()
                if segment_val == 'fast' or segment_val == 'large':
                    mem_val = 'dlmalloc'
                else:
                    mem_val = 'cstdlib'
            elif comm_val == 'ugni':
                mem_val = 'tcmalloc'
            else:
                mem_val = 'cstdlib'
    else:
        raise ValueError("Invalid flag: '{0}'".format(flag))
    return mem_val
示例#6
0
def get():
    substrate_val = os.environ.get('CHPL_COMM_SUBSTRATE')
    if not substrate_val:
        comm_val = chpl_comm.get()
        platform_val = chpl_platform.get('target')
        arch_val = chpl_arch.get('target', get_lcd=True)

        if comm_val == 'gasnet':
            if platform_val == 'cray-xt':
                substrate_val = 'mpi'
            elif platform_val == 'cray-xe':
                substrate_val = 'gemini'
            elif platform_val == 'cray-xk':
                substrate_val = 'gemini'
            elif platform_val == 'cray-xc':
                substrate_val = 'aries'
            elif platform_val == 'marenostrum':
                substrate_val = 'udp'
            elif platform_val == 'pwr5':
                substrate_val = 'lapi'
            elif platform_val == 'pwr6':
                substrate_val = 'ibv'
            else:
                substrate_val = 'udp'
        else:
            substrate_val = 'none'
    return substrate_val
示例#7
0
def get():
    tasks_val = os.environ.get('CHPL_TASKS')
    if not tasks_val:
        arch_val = chpl_arch.get('target', get_lcd=True)
        platform_val = chpl_platform.get()
        compiler_val = chpl_compiler.get('target')
        comm_val = chpl_comm.get()

        # use muxed on cray-x* machines using the module and supported compiler
        if (comm_val == 'ugni' and
                platform_val.startswith('cray-x') and
                utils.using_chapel_module() and
                compiler_val in ('cray-prgenv-gnu', 'cray-prgenv-intel') and
                arch_val != 'knc'):
            tasks_val = 'muxed'
        elif (arch_val == 'knc' or
                platform_val.startswith('cygwin') or
                platform_val.startswith('netbsd') or
                compiler_val == 'pgi'             or
                compiler_val == 'cray-prgenv-pgi' or
                compiler_val == 'cray-prgenv-cray'):
            tasks_val = 'fifo'
        else:
            tasks_val = 'qthreads'
    return tasks_val
示例#8
0
def get(flag='host'):
    arch_val = chpl_arch.get(flag)
    platform_val = chpl_platform.get(flag)
    cygwin = platform_val.startswith('cygwin')
    mac_arm = platform_val == 'darwin' and arch_val == 'arm64'
    chpl_host_mem = overrides.get('CHPL_HOST_MEM')
    chpl_target_mem = overrides.get('CHPL_TARGET_MEM')
    chpl_mem = overrides.get('CHPL_MEM')

    if flag == 'target':
        if cygwin or mac_arm:
            mem_val = 'cstdlib'
        elif chpl_target_mem:
            mem_val = chpl_target_mem
            if chpl_mem and chpl_target_mem != chpl_mem:
                warning("CHPL_MEM and CHPL_TARGET_MEM are both set, "
                        "taking value from CHPL_TARGET_MEM")
        elif chpl_mem:
            mem_val = chpl_mem
        else:
            mem_val = 'jemalloc'
    elif flag == 'host':
        if cygwin:
            mem_val = 'cstdlib'
        elif chpl_host_mem:
            mem_val = chpl_host_mem
        else:
            mem_val = 'cstdlib'
    else:
        error("Invalid flag: '{0}'".format(flag), ValueError)
    return mem_val
示例#9
0
    def knl(self):
        """Returns True when testing KNL (Xeon Phi).

        :rtype: bool
        :returns: True when testing KNL
        """
        return chpl_arch.get('target').arch == 'mic-knl'
示例#10
0
    def knl(self):
        """Returns True when testing KNL (Xeon Phi).

        :rtype: bool
        :returns: True when testing KNL
        """
        return chpl_arch.get("target") == "mic-knl"
示例#11
0
def default_uniq_cfg_path():
    cpu_val = chpl_cpu.get('target',
                           map_to_compiler=True,
                           get_lcd=using_chapel_module()).cpu
    compiler_val = chpl_compiler.get_path_component('target')
    return '{0}-{1}-{2}-{3}-{4}'.format(chpl_platform.get('target'),
                                        chpl_arch.get('target'), cpu_val,
                                        compiler_val, chpl_lib_pic.get())
示例#12
0
def compatible_platform_for_llvm():
    target_arch = chpl_arch.get('target')
    target_platform = chpl_platform.get('target')

    is32bit = target_platform == "linux32" or target_arch == "i368"
    mac_arm = target_platform == 'darwin' and target_arch == 'arm64'

    if is32bit or mac_arm:
        return False
    return True
示例#13
0
def get():
    hwloc_val = os.environ.get('CHPL_HWLOC')
    if not hwloc_val:
        tasks_val = chpl_tasks.get()
        arch_val = chpl_arch.get('target', get_lcd=True)
        if tasks_val == 'qthreads' and arch_val != 'knc':
            hwloc_val = 'hwloc'
        else:
            hwloc_val = 'none'
    return hwloc_val
示例#14
0
def get():
    tasks_val = os.environ.get('CHPL_TASKS')
    if not tasks_val:
        arch_val = chpl_arch.get('target', get_lcd=True)
        platform_val = chpl_platform.get()
        if arch_val == 'knc' or platform_val.startswith('cygwin'):
            tasks_val = 'fifo'
        else:
            tasks_val = 'qthreads'
    return tasks_val
示例#15
0
def get():
    hwloc_val = os.environ.get('CHPL_HWLOC')
    if not hwloc_val:
        tasks_val = chpl_tasks.get()
        arch_val = chpl_arch.get('target')
        if tasks_val == 'qthreads' and arch_val != 'knc':
            hwloc_val = 'hwloc'
        else:
            hwloc_val = 'none'
    return hwloc_val
示例#16
0
def get():
    tasks_val = os.environ.get('CHPL_TASKS')
    if not tasks_val:
        arch_val = chpl_arch.get('target', get_lcd=True)
        platform_val = chpl_platform.get()
        compiler_val = chpl_compiler.get('target')
        if (arch_val == 'knc' or platform_val.startswith('cygwin')
                or compiler_val == 'cray-prgenv-cray'):
            tasks_val = 'fifo'
        else:
            tasks_val = 'qthreads'
    return tasks_val
示例#17
0
def get():
    regexp_val = os.environ.get('CHPL_REGEXP')
    if not regexp_val:
        target_platform = chpl_platform.get('target')
        target_compiler = chpl_compiler.get('target')
        target_arch = chpl_arch.get('target', map_to_compiler=True, get_lcd=True)
        chpl_home = utils.get_chpl_home()
        regexp_target_dir = '{0}-{1}-{2}'.format(target_platform, target_compiler, target_arch)
        regexp_subdir = os.path.join(chpl_home, 'third-party', 're2', 'install',
                                     regexp_target_dir)
        regexp_header = os.path.join(regexp_subdir, 'include', 're2', 're2.h')
        if os.path.exists(regexp_header):
            regexp_val = 're2'
        else:
            regexp_val = 'none'
    return regexp_val
示例#18
0
def get():
    regexp_val = os.environ.get('CHPL_REGEXP')
    if not regexp_val:
        target_platform = chpl_platform.get('target')
        target_compiler = chpl_compiler.get('target')
        target_arch = chpl_arch.get('target')
        chpl_home = utils.get_chpl_home()
        regexp_target_dir = '{0}-{1}-{2}'.format(target_platform, target_compiler, target_arch)
        regexp_subdir = os.path.join(chpl_home, 'third-party', 're2', 'install',
                                     regexp_target_dir)
        regexp_header = os.path.join(regexp_subdir, 'include', 're2', 're2.h')
        if os.path.exists(regexp_header):
            regexp_val = 're2'
        else:
            regexp_val = 'none'
    return regexp_val
示例#19
0
def get():
    tasks_val = os.environ.get('CHPL_TASKS')
    if not tasks_val:
        arch_val = chpl_arch.get('target', get_lcd=True)
        platform_val = chpl_platform.get()
        compiler_val = chpl_compiler.get('target')
        comm_val = chpl_comm.get()

        if (arch_val == 'knc' or
                platform_val.startswith('cygwin') or
                platform_val.startswith('netbsd') or
                compiler_val == 'cray-prgenv-cray'):
            tasks_val = 'fifo'
        else:
            tasks_val = 'qthreads'
    return tasks_val
示例#20
0
def get():
    tasks_val = overrides.get('CHPL_TASKS')
    if not tasks_val:
        platform_val = chpl_platform.get('target')
        compiler_val = chpl_compiler.get('target')
        arch_val = chpl_arch.get('target')

        cygwin = platform_val.startswith('cygwin')
        bsd = (platform_val.startswith('netbsd') or
               platform_val.startswith('freebsd'))
        mac_arm = platform_val.startswith('darwin') and arch_val == 'arm64'

        if cygwin or bsd or mac_arm:
            tasks_val = 'fifo'
        else:
            tasks_val = 'qthreads'
    return tasks_val
示例#21
0
def get():
    tasks_val = os.environ.get("CHPL_TASKS")
    if not tasks_val:
        arch_val = chpl_arch.get("target", get_lcd=True)
        platform_val = chpl_platform.get()
        compiler_val = chpl_compiler.get("target")
        comm_val = chpl_comm.get()

        if (
            arch_val == "knc"
            or platform_val.startswith("cygwin")
            or platform_val.startswith("netbsd")
            or compiler_val == "cray-prgenv-cray"
        ):
            tasks_val = "fifo"
        else:
            tasks_val = "qthreads"
    return tasks_val
示例#22
0
def get():
    gmp_val = os.environ.get('CHPL_GMP')
    if not gmp_val:
        target_platform = chpl_platform.get('target')
        target_compiler = chpl_compiler.get('target')
        target_arch = chpl_arch.get('target')

        if target_platform.startswith('cray-x'):
            gmp_val = 'system'
        else:
            chpl_home = utils.get_chpl_home()
            gmp_target_dir = '{0}-{1}-{2}'.format(target_platform, target_compiler, target_arch)
            gmp_subdir = os.path.join(chpl_home, 'third-party', 'gmp',
                                      'install', gmp_target_dir)
            if os.path.exists(os.path.join(gmp_subdir, 'include', 'gmp.h')):
                gmp_val = 'gmp'
            else:
                gmp_val = 'none'
    return gmp_val
示例#23
0
def get(flag='host'):
    platform = chpl_platform.get(flag)
    arch = chpl_arch.get(flag)
    (_, cpu) = chpl_cpu.get(flag, map_to_compiler=True,
                            get_lcd=chpl_home_utils.using_chapel_module())

    # platform
    result = platform

    # compiler is never included since it shouldn't be needed
    # for a bin/ path.

    # arch
    result += '-' + arch

    # cpu
    if cpu != 'none' and cpu != 'unknown':
        result += '-' + cpu
    return result
示例#24
0
def get(flag='host'):
    platform = chpl_platform.get(flag)
    machine = chpl_machine.get(flag)
    (flag, arch) = chpl_arch.get(flag,
                                 map_to_compiler=True,
                                 get_lcd=chpl_home_utils.using_chapel_module())

    # platform
    result = platform

    # compiler is never included since it shouldn't be needed
    # for a bin/ path.

    # machine
    result += '-' + machine

    # arch
    if arch != 'none' and arch != 'unknown':
        result += '-' + arch
    return result
示例#25
0
def get(flag='host'):
    platform = chpl_platform.get(flag)
    arch = chpl_arch.get(flag)
    (_, cpu) = chpl_cpu.get(flag,
                            map_to_compiler=True,
                            get_lcd=chpl_home_utils.using_chapel_module())

    # platform
    result = platform

    # compiler is never included since it shouldn't be needed
    # for a bin/ path.

    # arch
    result += '-' + arch

    # cpu
    if cpu != 'none' and cpu != 'unknown':
        result += '-' + cpu
    return result
示例#26
0
def get():
    tasks_val = os.environ.get('CHPL_TASKS')
    if not tasks_val:
        arch_val = chpl_arch.get('target', get_lcd=True)
        platform_val = chpl_platform.get()
        compiler_val = chpl_compiler.get('target')
        comm_val = chpl_comm.get()

        # use muxed on cray-x* machines using the module and supported compiler
        if (comm_val == 'ugni' and platform_val.startswith('cray-x')
                and utils.using_chapel_module()
                and compiler_val in ('cray-prgenv-gnu', 'cray-prgenv-intel')
                and arch_val != 'knc'):
            tasks_val = 'muxed'
        elif (arch_val == 'knc' or platform_val.startswith('cygwin')
              or platform_val.startswith('netbsd')
              or compiler_val == 'cray-prgenv-cray'):
            tasks_val = 'fifo'
        else:
            tasks_val = 'qthreads'
    return tasks_val
示例#27
0
def get():
    gmp_val = os.environ.get('CHPL_GMP')
    if not gmp_val:
        target_platform = chpl_platform.get('target')
        target_compiler = chpl_compiler.get('target')
        target_arch = chpl_arch.get('target',
                                    map_to_compiler=True,
                                    get_lcd=True)

        # Detect if gmp has been built for this configuration.
        chpl_home = utils.get_chpl_home()
        gmp_target_dir = '{0}-{1}-{2}'.format(target_platform, target_compiler,
                                              target_arch)
        gmp_subdir = os.path.join(chpl_home, 'third-party', 'gmp', 'install',
                                  gmp_target_dir)

        if os.path.exists(os.path.join(gmp_subdir, 'include', 'gmp.h')):
            gmp_val = 'gmp'
        elif target_platform.startswith('cray-x'):
            gmp_val = 'system'
        else:
            gmp_val = 'none'
    return gmp_val
示例#28
0
def default_uniq_cfg_path():
    return '{0}-{1}-{2}'.format(
        chpl_platform.get('target'), chpl_compiler.get('target'),
        chpl_arch.get('target',
                      map_to_compiler=True,
                      get_lcd=using_chapel_module()))
示例#29
0
class AbstractJob(object):
    """Abstract job runner implementation."""

    # These class attributes should always be None on the AbstractJob
    # class. They *should only* be defined on and accessed from a sub class.

    # submit_bin is the program used to submit jobs (i.e. qsub).
    submit_bin = None

    # status_bin is the program used to query the status of jobs (i.e. qstat,
    # squeue)
    status_bin = None

    # argument name to use when specifying specific nodes (i.e. hostlist,
    # mppnodes)
    hostlist_resource = None

    # argument name for specifying number of nodes (i.e. nodes, mppwidth)
    num_nodes_resource = None

    # argument name for specifying number of cpus (i.e. mppdepth)
    num_cpus_resource = None

    def __init__(self, test_command, reservation_args):
        """Initialize new job runner.

        :type test_command: list
        :arg test_command: command to run in qsub

        :type reservation_args: argparse.Namespace
        :arg reservation_args: reservation arguments parsed from cli
        """
        self.test_command = test_command
        self.num_locales = reservation_args.numLocales
        self.walltime = reservation_args.walltime
        self.hostlist = reservation_args.hostlist

        logging.debug('Created instance of: {0}'.format(self))

    def __repr__(self):
        """Return string representation of this instance."""
        cls_name = str(type(self))
        attrs = ', '.join(map(lambda x: '{0}={1}'.format(x, getattr(self, x, None)),
                              ['test_command', 'num_locales', 'walltime', 'hostlist']))
        return '{0}({1})'.format(cls_name, attrs)

    @property
    def full_test_command(self):
        """Returns instance's test_command prefixed with command to change to
        testing_dir. This is required to support both PBSPro and moab flavors
        of PBS. Whereas moab provides a -d argument when calling qsub, both
        support the $PBS_O_WORKDIR argument.

        :rtype: list
        :returns: command to run in qsub with changedir call
        """
        full_test_command = ['cd', '$PBS_O_WORKDIR', '&&']

        # If the first argument of the test command is a file (it should
        # always be the executable), then add a "test -f ./execname" call
        # before running the command. This works around some potential nfs
        # configuration issues that can happen when running from lustre
        # mounted over nfs.
        if os.path.exists(self.test_command[0]):
            logging.debug('Adding "test -f {0}" to launcher command.'.format(
                self.test_command[0]))
            full_test_command += ['test', '-f', self.test_command[0], '&&']

        full_test_command.extend(self.test_command)
        return full_test_command

    @property
    def num_cpus(self):
        """Returns the number of cpus that qsub should reserve. PBSPro requires
        the cpu reservation be given to both qsub, and aprun.

        If cnselect is not callable, raise RuntimeError.

        :rtype: int
        :returns: Number of cpus to reserve, or -1 if there was no cnselect output
        """
        try:
            logging.debug('Checking for number of cpus to reserve.')
            cnselect_proc = subprocess.Popen(
                ['cnselect', '-Lnumcores'],
                stdout=subprocess.PIPE,
                stderr=subprocess.STDOUT)

            logging.debug('Communicating with cnselect process.')
            stdout, stderr = cnselect_proc.communicate()
        except OSError as ex:
            raise RuntimeError(ex)
        first_line = stdout.split('\n')[0]
        if first_line:
            return int(first_line)
        else:
            msg = 'cnselect -Lnumcores had no output.'
            logging.error(msg)
            raise ValueError(msg)

    @property
    def job_name(self):
        """Returns job name string from test command and CHPL_LAUNCHCMD_NAME_PREFIX
        env var.

        :rtype: str
        :returns: job name
        """
        prefix = os.environ.get('CHPL_LAUNCHCMD_NAME_PREFIX', 'Chpl')
        logging.debug('Job name prefix is: {0}'.format(prefix))

        cmd_basename = os.path.basename(self.test_command[0])
        logging.debug('Test command basname: {0}'.format(cmd_basename))

        job_name = '{0}-{1}'.format(prefix, cmd_basename)
        logging.info('Job name is: {0}'.format(job_name))
        return job_name

    @property
    def select_suffix(self):
        """Returns suffix for select expression based instance attributes. For example,
        if self.knc is True, returns `:Xeon_Phi` so reservation will
        target KNC nodes. Returns empty string when self.knc is False.

        :rtype: str
        :returns: select expression suffix, or empty string
        """
        if self.knc:
            return ':Xeon_Phi'
        else:
            return ''

    target_arch = chpl_arch.get('target')
    @property
    def knc(self):
        """Returns True when testing KNC (Xeon Phi).

        :rtype: bool
        :returns: True when testing KNC
        """
        return self.target_arch == 'knc'

    @property
    def knl(self):
        """Returns True when testing KNL (Xeon Phi).

        :rtype: bool
        :returns: True when testing KNL
        """
        return self.target_arch == 'mic-knl'

    def work_around_knc_module_bug(self):
        """Hack to unload the knc module before calling qsub in order to work
        around a module bug. Note that this unloading of knc here is why the
        above 'knc' method doesn't just return `chpl_arch.get('target') == knc`
        but instead caches the value since unloading knc module means chpl_arch
        will no longer return 'knc'
        """
        if self.knc:
	    unload_knc_proc = subprocess.Popen(
                ['modulecmd', 'python', 'unload', 'craype-intel-knc'],
                stdout=subprocess.PIPE
            )
	    stdout, stderr = unload_knc_proc.communicate()
	    exec stdout

    def _qsub_command_base(self, output_file):
        """Returns base qsub command, without any resource listing.

        :type output_file: str
        :arg output_file: combined stdout/stderr output file location

        :rtype: list
        :returns: qsub command as list of strings
        """
        submit_command =  [self.submit_bin, '-V', '-N', self.job_name,
                           '-j', 'oe', '-o', output_file]
        if self.walltime is not None:
            submit_command.append('-l')
            submit_command.append('walltime={0}'.format(self.walltime))

        return submit_command

    def _qsub_command(self, output_file):
        """Returns qsub command list. This implementation is the default that works for
        standard mpp* options. Subclasses can implement versions that meet their needs.

        :type output_file: str
        :arg output_file: combined stdout/stderr output file location

        :rtype: list
        :returns: qsub command as list of strings
        """
        submit_command = self._qsub_command_base(output_file)

        if self.num_locales >= 0:
            submit_command.append('-l')
            submit_command.append('{0}={1}{2}'.format(
                self.num_nodes_resource, self.num_locales, self.select_suffix))
        if self.hostlist is not None:
            submit_command.append('-l')
            submit_command.append('{0}={1}'.format(
                self.hostlist_resource, self.hostlist))
        if self.num_cpus_resource is not None:
            submit_command.append('-l')
            submit_command.append('{0}={1}'.format(
                self.num_cpus_resource, self.num_cpus))

        logging.debug('qsub command: {0}'.format(submit_command))
        return submit_command


    def run(self):
        """Run batch job in subprocess and wait for job to complete. When finished,
        returns output as string.

        :rtype: str
        :returns: stdout/stderr from job
        """
        with _temp_dir() as working_dir:
            output_file = os.path.join(working_dir, 'test_output.log')
            input_file = os.path.join(working_dir, 'test_input')
            testing_dir = os.getcwd()

            job_id = self.submit_job(testing_dir, output_file, input_file)
            logging.info('Test has been queued (job id: {0}). Waiting for output...'.format(job_id))

            # TODO: The while condition here should look for jobs that become held,
            #       are in the queue too long, or ??? and do something
            #       intelligent. For example, if the job is in the queue longer
            #       than the walltime, it should probably be deleted (qdel
            #       <job_id>) and a timeout should be reported. Here are all the
            #       pbs (torque) job statuses:
            #
            #           C -     Job is completed after having run/
            #           E -  Job is exiting after having run.
            #           H -  Job is held.
            #           Q -  job is queued, eligible to run or routed.
            #           R -  job is running.
            #           T -  job is being moved to new location.
            #           W -  job is waiting for its execution time
            #                (-a option) to be reached.
            #           S -  (Unicos only) job is suspend.
            #
            #       (thomasvandoren, 2014-04-09)

            def job_status(job_id, output_file):
                """Returns the status of the job specified by job_id

                 The status is determined by calling status(job_id). If that
                 call is successful the result is returned. The exact code
                 returned is up to status(job_id) but it must support 'C' for
                 complete, 'Q' for queued/waiting to run, and 'R' for running 

                 status(job_id) can raise a ValueError, which can indicate that
                 the job has completed *and* been dequeued. If the output file
                 exists and the job has been dequeued, it is safe to assume it
                 completed. Otherwise we raise the error
                """
                try:
                    job_status = self.status(job_id)
                    return job_status
                except ValueError as ex:
                    # ValueError may indicate that the job completed and was
                    # dequeued before we last checked the status. If the output
                    # file exists, assume success. Otherwise re raise error
                    # message.
                    if os.path.exists(output_file):
                        return 'C'
                    raise

            exec_start_time = time.time()
            alreadyRunning = False
            status = job_status(job_id, output_file)
            while status != 'C':
                if not alreadyRunning and status == 'R':
                    alreadyRunning = True
                    exec_start_time = time.time()
                time.sleep(.5)
                status = job_status(job_id, output_file)

            exec_time = time.time() - exec_start_time
            # Note that this time isn't very accurate as we don't get the exact
            # start or end time, however this does give a better estimate than
            # timing the whole binary for cases where the time in the queue is
            # large. It tends to be a second or two larger than real exec time
            exec_time_file = os.environ.get('CHPL_LAUNCHCMD_EXEC_TIME_FILE')
            if exec_time_file != None:
                with open(exec_time_file, 'w') as fp:
                    fp.write('{0:3f}'.format(exec_time))

            logging.debug('{0} reports job {1} as complete.'.format(
                self.status_bin, job_id))

            if not os.path.exists(output_file):
                logging.error('Output file from job does not exist at: {0}'.format(
                    output_file))
                raise ValueError('[Error: output file from job (id: {0}) does not exist at: {1}]'.format(
                    job_id, output_file))

            # try removing the file stdin was copied to, might not exist
            logging.debug('removing stdin file.')
            try:
                os.unlink(input_file)
            except OSError:
                pass

            logging.debug('Reading output file.')
            with open(output_file, 'r') as fp:
                output = fp.read()
            logging.info('The test finished with output of length {0}.'.format(len(output)))

        return output

    def submit_job(self, testing_dir, output_file, input_file):
        """Submit a new job using ``testing_dir`` as the working dir and
        ``output_file`` as the location for the output. Returns the job id on
        success. AbstractJob does not implement this method. It is the
        responsibility of the sub class.

        :type testing_dir: str
        :arg testing_dir: working directory for running test

        :type output_file: str
        :arg output_file: output log filename

        :rtype: str
        :returns: job id
        """
        raise NotImplementedError('submit_job class method is implemented by sub classes.')

    @classmethod
    def _detect_job_flavor(cls):
        """Returns appropriate class based on the detected version of pbs or slurm in
        the environment.

        If neither srun or qsub is not callable, raise RuntimeError.

        If MOABHOMEDIR is set in the environment, assume moab and return
        MoabJob type.

        Otherwise, if qsub is callable assume PBSPro, and return PbsProJob
        type.

        If srun is callable, assume slurm, and return SlurmJob.

        :rtype: type
        :returns: SlurmJob, MoabJob, or PbsProJob depending on environment
        """
        qsub_callable = False
        qsub_version = ''
        srun_callable = False
        srun_version = ''

        def get_output(cmd):
            proc = subprocess.Popen(
                cmd,
                stdout=subprocess.PIPE,
                stderr=subprocess.STDOUT
            )
            logging.debug('Communicating with job process.')
            stdout, stderr = proc.communicate()
            return stdout

        # Detect if qsub is callable, and capture version output.
        try:
            qsub_version = get_output(['qsub', '--version'])
            qsub_callable = True
        except OSError:
            pass

        # Detect if srun is callable, and capture version output.
        try:
            srun_version = get_output(['srun', '--version'])
            srun_callable = True
        except OSError:
            pass

        # Favor slurm, since Cray version of slurm comes with qsub command
        # that is wrapper around slurm apis.
        if srun_callable:
            return SlurmJob
        elif qsub_callable and os.environ.has_key('MOABHOMEDIR'):
            return MoabJob
        elif qsub_callable:
            return PbsProJob
        else:  # not (qsub_callable or srun_callable)
            raise RuntimeError('Could not find PBS or SLURM on system.')

    def _launch_qsub(self, testing_dir, output_file):
        """Launch job using qsub and return job id. Raises RuntimeError if
        self.submit_bin is anything but qsub.

        :type testing_dir: str
        :arg testing_dir: working directory for running test

        :type output_file: str
        :arg output_file: output log filename

        :rtype: str
        :returns: job id
        """
        if self.submit_bin != 'qsub':
            raise RuntimeError('_launch_qsub called for non-pbs job type!')

        self.work_around_knc_module_bug()

        logging.info(
            'Starting {0} job "{1}" on {2} nodes with walltime {3} '
            'and output file: {4}'.format(
                self.submit_bin, self.job_name, self.num_locales,
                self.walltime, output_file))

        logging.debug('Opening {0} subprocess.'.format(self.submit_bin))
        submit_proc = subprocess.Popen(
            self._qsub_command(output_file),
            stdin=subprocess.PIPE,
            stdout=subprocess.PIPE,
            stderr=subprocess.STDOUT,
            cwd=testing_dir,
            env=os.environ.copy()
        )

        test_command_str = ' '.join(self.full_test_command)
        logging.debug('Communicating with {0} subprocess. Sending test command on stdin: {1}'.format(
            self.submit_bin, test_command_str))
        stdout, stderr = submit_proc.communicate(input=test_command_str)
        logging.debug('{0} process returned with status {1}, stdout: {2} stderr: {3}'.format(
            self.submit_bin, submit_proc.returncode, stdout, stderr))

        if submit_proc.returncode != 0:
            msg = '{0} failed with exit code {1} and output: {2}'.format(
                self.submit_bin, submit_proc.returncode, stdout)
            logging.error(msg)
            raise ValueError(msg)

        job_id = stdout.strip()
        return job_id

    @classmethod
    def init_from_environment(cls):
        """Factory to initialize new job runner instance based on version of
        pbs available and command line arguments.

        :rtype: AbstractJob
        :returns: subclass of AbstractJob based on environment
        """
        args, unparsed_args = cls._parse_args()
        cls._setup_logging(args.verbose)

        logging.info('Num locales is: {0}'.format(args.numLocales))
        logging.info('Walltime is set to: {0}'.format(args.walltime))

        test_command = cls._get_test_command(args, unparsed_args)
        logging.debug('Test command is: {0}'.format(' '.join(test_command)))
        if not test_command:
            logging.error('No test command provided.')
            raise ValueError('No test command found.')

        job_flavor = cls._detect_job_flavor()
        logging.info('Detected job flavor: {0}'.format(job_flavor.__name__))
        return job_flavor(test_command, args)

    @classmethod
    def status(cls, job_id):
        """Query job stat using ``status_bin``. AbstractJob does not implement this
        method. It is the responsibility of the sub class.

        :type job_id: str
        :arg job_id: job id

        :rtype: str
        :returns: job status
        """
        raise NotImplementedError('status class method is implemented by sub classes.')

    @classmethod
    def _cli_walltime(cls, walltime_str):
        """Returns walltime_str if it can be parsed by one of the known walltime
        formats. Raises ValueError if walltime_str does not match a known format.

        :type walltime_str: str
        :arg walltime_str: walltime string from command line

        :rtype: str
        :returns: valid walltime string from command line
        """
        try:
            seconds = int(walltime_str)
            logging.debug('Parsed walltime as integer seconds: {0}'.format(seconds))
            return walltime_str
        except ValueError:
            pass

        try:
            seconds = float(walltime_str)
            logging.debug('Parsed walltime as float seconds: {0}'.format(seconds))
            return walltime_str
        except ValueError:
            pass

        # http://www.csc.fi/english/pages/louhi_guide/batch_jobs/commands/qsub
        known_formats = [
            '%M:%S',
            '%H:%M:%S',
            '%M:%S.%f',
            '%H:%M:%S.%f',
        ]
        for fmt in known_formats:
            try:
                walltime = datetime.datetime.strptime(walltime_str, fmt)
                logging.debug('Parsed walltime as datetime with format {0}: {1}'.format(
                    fmt, walltime))
                return walltime_str
            except ValueError:
                pass

        raise ValueError('Did not recognize walltime: {0}'.format(walltime_str))

    @classmethod
    def _get_test_command(cls, args, unparsed_args):
        """Returns test command by folding numLocales args into unparsed command line
        args.

        :type args: argparse.Namespace
        :arg args: Namespace from parsing original args

        :type unparsed_args: list
        :arg unparsed_args: list of unparsed command line args that make up test command

        :rtype: list
        :returns: command to be tested in qsub
        """
        logging.debug('Rebuilding test command from parsed args: {0} and '
                      'unparsed args: {1}'.format(args, unparsed_args))
        if args.numLocales >= 0:
            unparsed_args.append('-nl')
            unparsed_args.append(str(args.numLocales))
        logging.debug('Rebuild test command: {0}'.format(unparsed_args))
        return unparsed_args

    @classmethod
    def _parse_args(cls):
        """Parse and return command line arguments. Returns tuple of Namespace with
        parsed args and unparsed args.
        """
        class OurFormatter(argparse.ArgumentDefaultsHelpFormatter, argparse.RawDescriptionHelpFormatter):
            pass

        parser = argparse.ArgumentParser(
            description=__doc__,
            formatter_class=OurFormatter)
        parser.add_argument('--CHPL_LAUNCHCMD_DEBUG', action='store_true', dest='verbose',
                            default=('CHPL_LAUNCHCMD_DEBUG' in os.environ),
                            help=('Verbose output. Setting CHPL_LAUNCHCMD_DEBUG '
                                  'in environment also enables verbose output.'))
        parser.add_argument('-nl', '--numLocales', type=int, default=-1,
                            help='Number locales.')
        parser.add_argument('--n', help='Placeholder')
        parser.add_argument('--walltime', type=cls._cli_walltime,
                            help='Timeout as walltime for qsub.')
        parser.add_argument('--CHPL_LAUNCHCMD_HOSTLIST', dest='hostlist',
                            help=('Optional hostlist specification for reserving '
                                  'specific nodes. Can also be set with env var '
                                  'CHPL_LAUNCHCMD_HOSTLIST'))

        args, unparsed_args = parser.parse_known_args()

        # Allow hostlist to be set in environment variable CHPL_LAUNCHCMD_HOSTLIST.
        if args.hostlist is None:
            args.hostlist = os.environ.get('CHPL_LAUNCHCMD_HOSTLIST') or None

        # It is bad form to use a two character argument with only a single
        # dash. Unfortunately, we support it. And unfortunately, python argparse
        # thinks --n is the same thing. So, we pull out --n above so we can put it
        # back in the unparsed args here.
        if args.n:
            logging.debug('Found a --n arg. Putting it back in the unparsed args.')
            unparsed_args.append('--n={0}'.format(args.n))

        return args, unparsed_args

    @classmethod
    def _qstat(cls, job_id, args=None):
        """Call qstat and return output from stdout.

        Raises ValueError if exit code is non-zero.

        :type job_id: str
        :arg job_id: pbs job id

        :type args: list
        :arg args: additional arguments to pass qstat

        :rtype: str
        :returns: qsub job status
        """
        if args is None:
            args = []

        qstat_command = ['qstat'] + args + [job_id]
        logging.debug('qstat command to run: {0}'.format(qstat_command))

        logging.debug('Opening qstat subprocess.')
        qstat_proc = subprocess.Popen(
            qstat_command,
            stdout=subprocess.PIPE,
            stderr=subprocess.STDOUT,
            env=os.environ.copy()
        )

        logging.debug('Communicating with qstat subprocess.')
        stdout, stderr = qstat_proc.communicate()
        logging.debug('qstat process returned with status {0}, stdout: {1}, and stderr: {2}'.format(
            qstat_proc.returncode, stdout, stderr))

        if qstat_proc.returncode != 0:
            raise ValueError('Non-zero exit code {0} from qstat: "{1}"'.format(
                qstat_proc.returncode, stdout))
        else:
            return stdout

    @classmethod
    def _setup_logging(cls, verbose=False):
        """Setup logging to console.

        :type verbose: bool
        :arg verbose: if True, set log level to DEBUG
        """
        # logging module configures default handlers when logging.debug/info/etc
        # are called. In order for our basicConfig call to work, we need to get rid
        # of those. This is generally a bad practice unless we are absolutely sure
        # we are the top level script and we won't break other logging. That's
        # probably true here.
        #
        # See note here: https://docs.python.org/2/library/logging.html#logging.log
        logging.root.handlers = []

        if verbose:
            log_level = logging.DEBUG
        else:
            log_level = logging.WARN
        logging.basicConfig(
            level=log_level, format='[%(module)s] %(asctime)s [%(levelname)s] %(msg)s')
        logging.debug('Verbose logging enabled.')
示例#30
0
def default_uniq_cfg_path():
    return '{0}-{1}-{2}'.format(chpl_platform.get('target'),
                                chpl_compiler.get('target'),
                                chpl_arch.get('target', map_to_compiler=True,
                                              get_lcd=utils.using_chapel_module()))
示例#31
0
def compatible_platform_for_llvm():
    target_arch = chpl_arch.get('target')
    target_platform = chpl_platform.get('target')
    return (target_arch != "i368" and target_platform != "linux32")
示例#32
0
def compatible_platform_for_llvm_default():
    target_arch = chpl_arch.get('target')
    return (target_arch != "i368")