def test_get_global_nodelist_frm_wrklst_file(): # worker_list file should override env variables os.environ[ "LIBE_RESOURCES_TEST_NODE_LIST"] = "20-22,137-139,1234" # Should not be this exp_out = [ 'knl-0019', 'knl-0021', 'knl-0022', 'knl-0137', 'knl-0138', 'knl-0139', 'knl-2345' ] # Should be this open('worker_list', 'w').close() try: _ = Resources.get_global_nodelist(rundir=os.getcwd()) except ResourcesException as e: assert e.args[0] == 'Error. global_nodelist is empty' else: assert 0 with open('worker_list', 'w') as f: for node in exp_out: f.write(node + '\n') # Do not specify env vars. global_nodelist1 = Resources.get_global_nodelist(rundir=os.getcwd()) assert global_nodelist1 == exp_out, "global_nodelist returned does not match expected" # Specify env vars - should ignore env_resources = EnvResources( nodelist_env_slurm="THIS_ENV_VARIABLE_IS_DEF_NOT_SET", nodelist_env_cobalt="LIBE_RESOURCES_TEST_NODE_LIST", nodelist_env_lsf="THIS_ENV_VARIABLE_IS_DEF_NOT_SET", nodelist_env_lsf_shortform="THIS_ENV_VARIABLE_IS_DEF_NOT_SET") global_nodelist2 = Resources.get_global_nodelist( rundir=os.getcwd(), env_resources=env_resources) assert global_nodelist2 == exp_out, "global_nodelist returned does not match expected" os.remove('worker_list')
def test_abbrev_nodenames_nochange_cobalt(): env_resources = EnvResources() # Test Cobalt abbrev exp_names = ['21', '22', '137', '138', '1234', '11234'] env_resources.schedular = 'Cobalt' abbrev_names = env_resources.abbrev_nodenames(exp_names) assert abbrev_names == exp_names, "Abbreviated names returned do not match expected" del env_resources
def test_get_cpu_resources_from_env_unknown_env(): os.environ["LIBE_RESOURCES_TEST_NODE_LIST"] = "knl-[0009-0012]" env_resources = EnvResources( nodelist_env_slurm="LIBE_RESOURCES_TEST_NODE_LIST", nodelist_env_cobalt="THIS_ENV_VARIABLE_IS_DEF_NOT_SET", nodelist_env_lsf="THIS_ENV_VARIABLE_IS_DEF_NOT_SET", nodelist_env_lsf_shortform="THIS_ENV_VARIABLE_IS_DEF_NOT_SET") cores_info = node_resources._get_cpu_resources_from_env( env_resources=env_resources) assert cores_info is None, 'cores_info should be None'
def test_abbrev_nodenames_nochange_slurm(): env_resources = EnvResources() # Test Cobalt abbrev exp_names = [ 'knl-0019', 'knl-0021', 'knl-0022', 'knl-0137', 'knl-0138', 'knl-0139', 'knl-2345' ] env_resources.schedular = 'Cobalt' abbrev_names = env_resources.abbrev_nodenames(exp_names) assert abbrev_names == exp_names, "Abbreviated names returned do not match expected" del env_resources
def test_get_global_nodelist_frm_cobalt(): os.environ["LIBE_RESOURCES_TEST_NODE_LIST"] = "20-22,137-139,1234" exp_out = ['20', '21', '22', '137', '138', '139', '1234'] env_resources = EnvResources( nodelist_env_slurm="THIS_ENV_VARIABLE_IS_DEF_NOT_SET", nodelist_env_cobalt="LIBE_RESOURCES_TEST_NODE_LIST", nodelist_env_lsf="THIS_ENV_VARIABLE_IS_DEF_NOT_SET", nodelist_env_lsf_shortform="THIS_ENV_VARIABLE_IS_DEF_NOT_SET") global_nodelist = Resources.get_global_nodelist( rundir=os.getcwd(), env_resources=env_resources) assert global_nodelist == exp_out, "global_nodelist returned does not match expected"
def test_get_global_nodelist_frm_lsf_shortform(): os.environ[ "LIBE_RESOURCES_TEST_NODE_LIST"] = 'batch5 1 g06n02 42 h21n18 42' exp_out = ['g06n02', 'h21n18'] env_resources = EnvResources( nodelist_env_slurm="THIS_ENV_VARIABLE_IS_DEF_NOT_SET", nodelist_env_cobalt="THIS_ENV_VARIABLE_IS_DEF_NOT_SET", nodelist_env_lsf="THIS_ENV_VARIABLE_IS_DEF_NOT_SET", nodelist_env_lsf_shortform="LIBE_RESOURCES_TEST_NODE_LIST") global_nodelist = Resources.get_global_nodelist( rundir=os.getcwd(), env_resources=env_resources) assert global_nodelist == exp_out, "global_nodelist returned does not match expected"
def test_get_global_nodelist_standalone(): mynode = socket.gethostname() env_resources = EnvResources( nodelist_env_slurm="THIS_ENV_VARIABLE_IS_DEF_NOT_SET", nodelist_env_cobalt="THIS_ENV_VARIABLE_IS_DEF_NOT_SET", nodelist_env_lsf="THIS_ENV_VARIABLE_IS_DEF_NOT_SET", nodelist_env_lsf_shortform="THIS_ENV_VARIABLE_IS_DEF_NOT_SET") global_nodelist = Resources.get_global_nodelist( rundir=os.getcwd(), env_resources=env_resources) assert global_nodelist == [ mynode ], "global_nodelist returned does not match expected"
def test_get_cpu_resources_from_env_lsf_shortform(): os.environ["LIBE_RESOURCES_TEST_NODE_LIST"] = 'batch5 1 g06n02 42' exp_out = (42, 42) env_resources1 = EnvResources( nodelist_env_slurm="THIS_ENV_VARIABLE_IS_DEF_NOT_SET", nodelist_env_cobalt="THIS_ENV_VARIABLE_IS_DEF_NOT_SET", nodelist_env_lsf="THIS_ENV_VARIABLE_IS_DEF_NOT_SET", nodelist_env_lsf_shortform="LIBE_RESOURCES_TEST_NODE_LIST") cores_info = node_resources._get_cpu_resources_from_env( env_resources=env_resources1) assert cores_info == exp_out, "cores_info returned does not match expected" os.environ[ "LIBE_RESOURCES_TEST_NODE_LIST"] = 'batch5 1 g06n02 42 h21n18 42' env_resources2 = EnvResources( nodelist_env_slurm="THIS_ENV_VARIABLE_IS_DEF_NOT_SET", nodelist_env_cobalt="THIS_ENV_VARIABLE_IS_DEF_NOT_SET", nodelist_env_lsf="THIS_ENV_VARIABLE_IS_DEF_NOT_SET", nodelist_env_lsf_shortform="LIBE_RESOURCES_TEST_NODE_LIST") cores_info = node_resources._get_cpu_resources_from_env( env_resources=env_resources2) assert cores_info == exp_out, "cores_info returned does not match expected"
def __init__(self, top_level_dir=None, central_mode=False, launcher=None, nodelist_env_slurm=None, nodelist_env_cobalt=None, nodelist_env_lsf=None, nodelist_env_lsf_shortform=None): """Initialise new Resources instance Works out the compute resources available for current allocation, including node list and cores/hardware threads available within nodes. Parameters ---------- top_level_dir: string, optional Directory libEnsemble runs in (default is current working directory) central_mode: boolean, optional If true, then running in central mode, else distributed. Central mode means libE processes (manager and workers) are grouped together and do not share nodes with applications. Distributed mode means Workers share nodes with applications. launcher: String, optional The name of the job launcher such as mpirun or aprun. This may be used to obtain intra-node information by launching a probing job onto the compute nodes. If not present, the local node will be used to obtain this information. nodelist_env_slurm: String, optional The environment variable giving a node list in Slurm format (Default: Uses SLURM_NODELIST) Note: This is only queried if a worker_list file is not provided and auto_resources=True. nodelist_env_cobalt: String, optional The environment variable giving a node list in Cobalt format (Default: Uses COBALT_PARTNAME) Note: This is only queried if a worker_list file is not provided and auto_resources=True. nodelist_env_lsf: String, optional The environment variable giving a node list in LSF format (Default: Uses LSB_HOSTS) Note: This is only queried if a worker_list file is not provided and auto_resources=True. nodelist_env_lsf_shortform: String, optional The environment variable giving a node list in LSF short-form format (Default: Uses LSB_MCPU_HOSTS) Note: This is only queried if a worker_list file is not provided and auto_resources=True. """ self.top_level_dir = top_level_dir or os.getcwd() self.central_mode = central_mode if self.central_mode: logger.debug('Running in central mode') self.env_resources = EnvResources( nodelist_env_slurm=nodelist_env_slurm, nodelist_env_cobalt=nodelist_env_cobalt, nodelist_env_lsf=nodelist_env_lsf, nodelist_env_lsf_shortform=nodelist_env_lsf_shortform) # This is global nodelist avail to workers - may change to global_worker_nodelist self.global_nodelist = Resources.get_global_nodelist( rundir=self.top_level_dir, env_resources=self.env_resources) remote_detect = False if socket.gethostname() not in self.global_nodelist: remote_detect = True cores_info = node_resources.get_sub_node_resources( launcher=launcher, remote_mode=remote_detect, env_resources=self.env_resources) self.logical_cores_avail_per_node = cores_info[0] self.physical_cores_avail_per_node = cores_info[1] self.libE_nodes = None self.worker_resources = None