示例#1
0
def test_get_global_nodelist_frm_wrklst_file():
    # worker_list file should override env variables
    os.environ[
        "LIBE_RESOURCES_TEST_NODE_LIST"] = "20-22,137-139,1234"  # Should not be this
    exp_out = [
        'knl-0019', 'knl-0021', 'knl-0022', 'knl-0137', 'knl-0138', 'knl-0139',
        'knl-2345'
    ]  # Should be this

    open('worker_list', 'w').close()
    try:
        _ = Resources.get_global_nodelist(rundir=os.getcwd())
    except ResourcesException as e:
        assert e.args[0] == 'Error. global_nodelist is empty'
    else:
        assert 0

    with open('worker_list', 'w') as f:
        for node in exp_out:
            f.write(node + '\n')

    # Do not specify env vars.
    global_nodelist1 = Resources.get_global_nodelist(rundir=os.getcwd())
    assert global_nodelist1 == exp_out, "global_nodelist returned does not match expected"

    # Specify env vars - should ignore
    env_resources = EnvResources(
        nodelist_env_slurm="THIS_ENV_VARIABLE_IS_DEF_NOT_SET",
        nodelist_env_cobalt="LIBE_RESOURCES_TEST_NODE_LIST",
        nodelist_env_lsf="THIS_ENV_VARIABLE_IS_DEF_NOT_SET",
        nodelist_env_lsf_shortform="THIS_ENV_VARIABLE_IS_DEF_NOT_SET")
    global_nodelist2 = Resources.get_global_nodelist(
        rundir=os.getcwd(), env_resources=env_resources)
    assert global_nodelist2 == exp_out, "global_nodelist returned does not match expected"
    os.remove('worker_list')
def test_abbrev_nodenames_nochange_cobalt():
    env_resources = EnvResources()
    # Test Cobalt abbrev
    exp_names = ['21', '22', '137', '138', '1234', '11234']
    env_resources.schedular = 'Cobalt'
    abbrev_names = env_resources.abbrev_nodenames(exp_names)
    assert abbrev_names == exp_names, "Abbreviated names returned do not match expected"
    del env_resources
def test_get_cpu_resources_from_env_unknown_env():
    os.environ["LIBE_RESOURCES_TEST_NODE_LIST"] = "knl-[0009-0012]"
    env_resources = EnvResources(
        nodelist_env_slurm="LIBE_RESOURCES_TEST_NODE_LIST",
        nodelist_env_cobalt="THIS_ENV_VARIABLE_IS_DEF_NOT_SET",
        nodelist_env_lsf="THIS_ENV_VARIABLE_IS_DEF_NOT_SET",
        nodelist_env_lsf_shortform="THIS_ENV_VARIABLE_IS_DEF_NOT_SET")

    cores_info = node_resources._get_cpu_resources_from_env(
        env_resources=env_resources)
    assert cores_info is None, 'cores_info should be None'
def test_abbrev_nodenames_nochange_slurm():
    env_resources = EnvResources()
    # Test Cobalt abbrev
    exp_names = [
        'knl-0019', 'knl-0021', 'knl-0022', 'knl-0137', 'knl-0138', 'knl-0139',
        'knl-2345'
    ]
    env_resources.schedular = 'Cobalt'
    abbrev_names = env_resources.abbrev_nodenames(exp_names)
    assert abbrev_names == exp_names, "Abbreviated names returned do not match expected"
    del env_resources
示例#5
0
def test_get_global_nodelist_frm_cobalt():
    os.environ["LIBE_RESOURCES_TEST_NODE_LIST"] = "20-22,137-139,1234"
    exp_out = ['20', '21', '22', '137', '138', '139', '1234']
    env_resources = EnvResources(
        nodelist_env_slurm="THIS_ENV_VARIABLE_IS_DEF_NOT_SET",
        nodelist_env_cobalt="LIBE_RESOURCES_TEST_NODE_LIST",
        nodelist_env_lsf="THIS_ENV_VARIABLE_IS_DEF_NOT_SET",
        nodelist_env_lsf_shortform="THIS_ENV_VARIABLE_IS_DEF_NOT_SET")
    global_nodelist = Resources.get_global_nodelist(
        rundir=os.getcwd(), env_resources=env_resources)
    assert global_nodelist == exp_out, "global_nodelist returned does not match expected"
示例#6
0
def test_get_global_nodelist_frm_lsf_shortform():
    os.environ[
        "LIBE_RESOURCES_TEST_NODE_LIST"] = 'batch5 1 g06n02 42 h21n18 42'
    exp_out = ['g06n02', 'h21n18']
    env_resources = EnvResources(
        nodelist_env_slurm="THIS_ENV_VARIABLE_IS_DEF_NOT_SET",
        nodelist_env_cobalt="THIS_ENV_VARIABLE_IS_DEF_NOT_SET",
        nodelist_env_lsf="THIS_ENV_VARIABLE_IS_DEF_NOT_SET",
        nodelist_env_lsf_shortform="LIBE_RESOURCES_TEST_NODE_LIST")
    global_nodelist = Resources.get_global_nodelist(
        rundir=os.getcwd(), env_resources=env_resources)
    assert global_nodelist == exp_out, "global_nodelist returned does not match expected"
示例#7
0
def test_get_global_nodelist_standalone():
    mynode = socket.gethostname()
    env_resources = EnvResources(
        nodelist_env_slurm="THIS_ENV_VARIABLE_IS_DEF_NOT_SET",
        nodelist_env_cobalt="THIS_ENV_VARIABLE_IS_DEF_NOT_SET",
        nodelist_env_lsf="THIS_ENV_VARIABLE_IS_DEF_NOT_SET",
        nodelist_env_lsf_shortform="THIS_ENV_VARIABLE_IS_DEF_NOT_SET")
    global_nodelist = Resources.get_global_nodelist(
        rundir=os.getcwd(), env_resources=env_resources)
    assert global_nodelist == [
        mynode
    ], "global_nodelist returned does not match expected"
def test_get_cpu_resources_from_env_lsf_shortform():
    os.environ["LIBE_RESOURCES_TEST_NODE_LIST"] = 'batch5 1 g06n02 42'
    exp_out = (42, 42)

    env_resources1 = EnvResources(
        nodelist_env_slurm="THIS_ENV_VARIABLE_IS_DEF_NOT_SET",
        nodelist_env_cobalt="THIS_ENV_VARIABLE_IS_DEF_NOT_SET",
        nodelist_env_lsf="THIS_ENV_VARIABLE_IS_DEF_NOT_SET",
        nodelist_env_lsf_shortform="LIBE_RESOURCES_TEST_NODE_LIST")
    cores_info = node_resources._get_cpu_resources_from_env(
        env_resources=env_resources1)
    assert cores_info == exp_out, "cores_info returned does not match expected"

    os.environ[
        "LIBE_RESOURCES_TEST_NODE_LIST"] = 'batch5 1 g06n02 42 h21n18 42'
    env_resources2 = EnvResources(
        nodelist_env_slurm="THIS_ENV_VARIABLE_IS_DEF_NOT_SET",
        nodelist_env_cobalt="THIS_ENV_VARIABLE_IS_DEF_NOT_SET",
        nodelist_env_lsf="THIS_ENV_VARIABLE_IS_DEF_NOT_SET",
        nodelist_env_lsf_shortform="LIBE_RESOURCES_TEST_NODE_LIST")
    cores_info = node_resources._get_cpu_resources_from_env(
        env_resources=env_resources2)
    assert cores_info == exp_out, "cores_info returned does not match expected"
示例#9
0
    def __init__(self,
                 top_level_dir=None,
                 central_mode=False,
                 launcher=None,
                 nodelist_env_slurm=None,
                 nodelist_env_cobalt=None,
                 nodelist_env_lsf=None,
                 nodelist_env_lsf_shortform=None):
        """Initialise new Resources instance

        Works out the compute resources available for current allocation, including
        node list and cores/hardware threads available within nodes.

        Parameters
        ----------

        top_level_dir: string, optional
            Directory libEnsemble runs in (default is current working directory)

        central_mode: boolean, optional
            If true, then running in central mode, else distributed.
            Central mode means libE processes (manager and workers) are grouped together and
            do not share nodes with applications. Distributed mode means Workers share nodes
            with applications.

        launcher: String, optional
            The name of the job launcher such as mpirun or aprun. This may be used to obtain
            intra-node information by launching a probing job onto the compute nodes.
            If not present, the local node will be used to obtain this information.

        nodelist_env_slurm: String, optional
            The environment variable giving a node list in Slurm format (Default: Uses SLURM_NODELIST)
            Note: This is only queried if a worker_list file is not provided and auto_resources=True.

        nodelist_env_cobalt: String, optional
            The environment variable giving a node list in Cobalt format (Default: Uses COBALT_PARTNAME)
            Note: This is only queried if a worker_list file is not provided and auto_resources=True.

        nodelist_env_lsf: String, optional
            The environment variable giving a node list in LSF format (Default: Uses LSB_HOSTS)
            Note: This is only queried if a worker_list file is not provided and auto_resources=True.

        nodelist_env_lsf_shortform: String, optional
            The environment variable giving a node list in LSF short-form format (Default: Uses LSB_MCPU_HOSTS)
            Note: This is only queried if a worker_list file is not provided and auto_resources=True.

        """

        self.top_level_dir = top_level_dir or os.getcwd()
        self.central_mode = central_mode
        if self.central_mode:
            logger.debug('Running in central mode')

        self.env_resources = EnvResources(
            nodelist_env_slurm=nodelist_env_slurm,
            nodelist_env_cobalt=nodelist_env_cobalt,
            nodelist_env_lsf=nodelist_env_lsf,
            nodelist_env_lsf_shortform=nodelist_env_lsf_shortform)

        # This is global nodelist avail to workers - may change to global_worker_nodelist
        self.global_nodelist = Resources.get_global_nodelist(
            rundir=self.top_level_dir, env_resources=self.env_resources)
        remote_detect = False
        if socket.gethostname() not in self.global_nodelist:
            remote_detect = True

        cores_info = node_resources.get_sub_node_resources(
            launcher=launcher,
            remote_mode=remote_detect,
            env_resources=self.env_resources)
        self.logical_cores_avail_per_node = cores_info[0]
        self.physical_cores_avail_per_node = cores_info[1]

        self.libE_nodes = None
        self.worker_resources = None