def test_get_available_nodes_central_mode_remove_libE_proc(): mynode = socket.gethostname() nodelist_in = [ 'knl-0020', 'knl-0021', 'knl-0022', 'knl-0036', 'knl-0137', 'knl-0138', 'knl-0139', 'knl-1234' ] with open('worker_list', 'w') as f: for i, node in enumerate(nodelist_in): f.write(node + '\n') if i == 3: f.write(mynode + '\n') resources = Resources(central_mode=True) #Now mock up some more stuff - so consistent #Spoof current process as each worker and check nodelist. resources.num_workers = 8 exp_out = [['knl-0020'], ['knl-0021'], ['knl-0022'], ['knl-0036'], ['knl-0137'], ['knl-0138'], ['knl-0139'], ['knl-1234']] for wrk in range(resources.num_workers): resources.workerID = wrk + 1 local_nodelist = resources.get_available_nodes() assert local_nodelist == exp_out[ wrk], "local_nodelist returned does not match expected" #Spoof current process as each worker and check nodelist. resources.num_workers = 4 exp_out = [['knl-0020', 'knl-0021'], ['knl-0022', 'knl-0036'], ['knl-0137', 'knl-0138'], ['knl-0139', 'knl-1234']] for wrk in range(resources.num_workers): resources.workerID = wrk + 1 local_nodelist = resources.get_available_nodes() assert local_nodelist == exp_out[ wrk], "local_nodelist returned does not match expected" #Spoof current process as each worker and check nodelist. resources.num_workers = 1 exp_out = [[ 'knl-0020', 'knl-0021', 'knl-0022', 'knl-0036', 'knl-0137', 'knl-0138', 'knl-0139', 'knl-1234' ]] for wrk in range(resources.num_workers): resources.workerID = wrk + 1 local_nodelist = resources.get_available_nodes() assert local_nodelist == exp_out[ wrk], "local_nodelist returned does not match expected" #Test the best_split algorithm resources.num_workers = 3 exp_out = [['knl-0020', 'knl-0021', 'knl-0022'], ['knl-0036', 'knl-0137', 'knl-0138'], ['knl-0139', 'knl-1234']] for wrk in range(resources.num_workers): resources.workerID = wrk + 1 local_nodelist = resources.get_available_nodes() assert local_nodelist == exp_out[ wrk], "local_nodelist returned does not match expected" os.remove('worker_list')
def test_get_available_nodes_central_mode(): os.environ[ "LIBE_RESOURCES_TEST_NODE_LIST"] = "knl-[0020-0022,0036,0137-0139,1234]" resources = Resources(nodelist_env_slurm="LIBE_RESOURCES_TEST_NODE_LIST", central_mode=True) #Now mock up some more stuff - so consistent #Spoof current process as each worker and check nodelist. resources.num_workers = 8 exp_out = [['knl-0020'], ['knl-0021'], ['knl-0022'], ['knl-0036'], ['knl-0137'], ['knl-0138'], ['knl-0139'], ['knl-1234']] for wrk in range(resources.num_workers): resources.workerID = wrk + 1 local_nodelist = resources.get_available_nodes() assert local_nodelist == exp_out[ wrk], "local_nodelist returned does not match expected" #Spoof current process as each worker and check nodelist. resources.num_workers = 4 exp_out = [['knl-0020', 'knl-0021'], ['knl-0022', 'knl-0036'], ['knl-0137', 'knl-0138'], ['knl-0139', 'knl-1234']] for wrk in range(resources.num_workers): resources.workerID = wrk + 1 local_nodelist = resources.get_available_nodes() assert local_nodelist == exp_out[ wrk], "local_nodelist returned does not match expected" #Spoof current process as each worker and check nodelist. resources.num_workers = 1 exp_out = [[ 'knl-0020', 'knl-0021', 'knl-0022', 'knl-0036', 'knl-0137', 'knl-0138', 'knl-0139', 'knl-1234' ]] for wrk in range(resources.num_workers): resources.workerID = wrk + 1 local_nodelist = resources.get_available_nodes() assert local_nodelist == exp_out[ wrk], "local_nodelist returned does not match expected" #Test the best_split algorithm resources.num_workers = 3 exp_out = [['knl-0020', 'knl-0021', 'knl-0022'], ['knl-0036', 'knl-0137', 'knl-0138'], ['knl-0139', 'knl-1234']] for wrk in range(resources.num_workers): resources.workerID = wrk + 1 local_nodelist = resources.get_available_nodes() assert local_nodelist == exp_out[ wrk], "local_nodelist returned does not match expected"
def test_get_available_nodes_distrib_mode_host_not_in_list(): os.environ[ "LIBE_RESOURCES_TEST_NODE_LIST"] = "knl-[0020-0022,0036,0137-0139,1234]" resources = Resources(nodelist_env_slurm="LIBE_RESOURCES_TEST_NODE_LIST", central_mode=False) #Spoof current process as each worker and check nodelist. resources.num_workers = 8 exp_out = [['knl-0020'], ['knl-0021'], ['knl-0022'], ['knl-0036'], ['knl-0137'], ['knl-0138'], ['knl-0139'], ['knl-1234']] # Test running distributed mode without current host in list. resources.workerID = 2 try: local_nodelist = resources.get_available_nodes() except: assert 1 else: assert 0
def test_get_available_nodes_distrib_mode_uneven_split(): mynode = socket.gethostname() nodelist_in = [ 'knl-0020', 'knl-0021', 'knl-0022', 'knl-0036', 'knl-0137', 'knl-0138', 'knl-0139', 'knl-1234' ] with open('worker_list', 'w') as f: for i, node in enumerate(nodelist_in): f.write(node + '\n') if i == 4: f.write(mynode + '\n') resources = Resources(central_mode=False) resources.num_workers = 2 # May not be at head of list - should perhaps be warning or enforced resources.workerID = 2 exp_out = ['knl-0137', mynode, 'knl-0138', 'knl-0139'] local_nodelist = resources.get_available_nodes() assert local_nodelist == exp_out, "local_nodelist returned does not match expected" os.remove('worker_list')
def test_get_available_nodes_distrib_mode(): mynode = socket.gethostname() #nodelist_in = ['knl-0020', 'knl-0021', 'knl-0022', 'knl-0036', 'knl-0137','knl-0138', 'knl-0139', 'knl-1234'] nodelist_in = [ 'knl-0020', 'knl-0021', 'knl-0022', 'knl-0036', 'knl-0137', 'knl-0138', 'knl-0139' ] with open('worker_list', 'w') as f: for i, node in enumerate(nodelist_in): f.write(node + '\n') if i == 3: f.write(mynode + '\n') resources = Resources(central_mode=False) #Spoof current process as each worker and check nodelist. resources.num_workers = 8 #Test workerID not in local_nodelist resources.workerID = 4 try: local_nodelist = resources.get_available_nodes() except: assert 1 else: assert 0 resources.workerID = 5 exp_out = [mynode] local_nodelist = resources.get_available_nodes() assert local_nodelist == exp_out, "local_nodelist returned does not match expected" resources.num_workers = 1 resources.workerID = 1 exp_out = [ 'knl-0020', 'knl-0021', 'knl-0022', 'knl-0036', mynode, 'knl-0137', 'knl-0138', 'knl-0139' ] local_nodelist = resources.get_available_nodes() assert local_nodelist == exp_out, "local_nodelist returned does not match expected" resources.num_workers = 4 resources.workerID = 3 exp_out = [mynode, 'knl-0137'] local_nodelist = resources.get_available_nodes() assert local_nodelist == exp_out, "local_nodelist returned does not match expected" #Sub-node workers resources.num_workers = 16 resources.workerID = 9 exp_out = [mynode] local_nodelist = resources.get_available_nodes() assert local_nodelist == exp_out, "local_nodelist returned does not match expected" resources.workerID = 10 exp_out = [mynode] #import pdb; pdb.set_trace() local_nodelist = resources.get_available_nodes() assert local_nodelist == exp_out, "local_nodelist returned does not match expected" os.remove('worker_list')