def test_runtime_mismatch(pilot_description): with warnings.catch_warnings(): warnings.filterwarnings('ignore', category=DeprecationWarning, module='radical.pilot.task_manager') warnings.filterwarnings('ignore', category=DeprecationWarning, module='radical.pilot.db.database') warnings.filterwarnings('ignore', category=DeprecationWarning, module='radical.pilot.session') session = rp.Session() with session: original_pmgr = rp.PilotManager(session=session) pilot = original_pmgr.submit_pilots(rp.PilotDescription(pilot_description)) original_tmgr = rp.TaskManager(session=session) original_tmgr.add_pilots(pilot) assert session.closed # This assertion may not be true: # assert pilot.state in rp.FINAL # Note that Pilot and other components may still be shutting down, but the # intention is that, from this point, pmgr, pilot, and tmgr are now "stale". session = rp.Session() with session: state = Runtime(session=session) with pytest.raises(APIError): state.task_manager(original_tmgr) original_tmgr.close() tmgr = rp.TaskManager(session=session) state.task_manager(tmgr) with pytest.raises(APIError): state.pilot_manager(original_pmgr) original_pmgr.close() pmgr = rp.PilotManager(session=session) state.pilot_manager(pmgr) # The UID will not resolve in the stored PilotManager. with pytest.raises(ValueError): state.pilot(pilot.uid) # The Pilot is detectably invalid. with pytest.raises(APIError): state.pilot(pilot) # Even here, the old Pilot may still be in 'PMGR_ACTIVE_PENDING' if pilot.state not in rp.FINAL: pilot.cancel() tmgr.close() pmgr.close() assert session.closed
def test__session_reconnect(self): """ Tests if reconnecting to an existing session works as epxected. """ session_ids = [] for _ in range(1, 4): session = rp.Session() session_ids.append(session.uid) for sid in session_ids: session_r = rp.Session() assert session_r.uid == sid, "Session IDs don't match" session.close()
def test__add_resource_config_2(self): """ Test if we can wait for different pilot states. """ session = rp.Session() rc = rp.ResourceConfig("mylocalhost") rc.task_launch_method = "LOCAL" rc.mpi_launch_method = "MPIRUN" rc.job_manager_endpoint = "fork://localhost" rc.filesystem_endpoint = "file://localhost/" rc.bootstrapper = "default_bootstrapper.sh" pm = rp.PilotManager(session=session) session.add_resource_config(rc) pd = rp.ComputePilotDescription() pd.resource = "mylocalhost" pd.cores = 1 pd.runtime = 1 pd.sandbox = "/tmp/rp.sandbox.unittests" pd.cleanup = True pilot = pm.submit_pilots(pd) pilot.wait(timeout=300) pilot.cancel() session.close()
def setUp(): session = rp.Session() cfg = session.get_resource_config(resource='ornl.summitdev') cfg["cores"] = 40 return cfg, session
def get_rp_decorator(): """Decorator for tests that should be run in a RADICAL Pilot environment only.""" try: import radical.pilot as rp import radical.utils as ru except ImportError: rp = None ru = None with_radical_only = pytest.mark.skipif( rp is None or ru is None or 'RADICAL_PILOT_DBURL' not in os.environ, reason="Test requires RADICAL environment.") # The above logic may not be sufficient to mark the usability of the RP environment. if rp is not None and 'RADICAL_PILOT_DBURL' in os.environ: try: # Note: radical.pilot.Session creation causes several deprecation warnings. # Ref https://github.com/radical-cybertools/radical.pilot/issues/2185 with warnings.catch_warnings(): warnings.simplefilter('ignore', category=DeprecationWarning) with rp.Session(): with_radical_only = pytest.mark.skipif( False, reason="RP should be available.") except: with_radical_only = pytest.mark.skip( reason="Cannot create radical.pilot.Session") return with_radical_only
def test__pilot_errors(self): """ Test if pilot errors are raised properly. """ session = rp.Session() try: pm = rp.PilotManager(session=session) cpd = rp.ComputePilotDescription() cpd.resource = "local.localhost" cpd.cores = 1 cpd.runtime = 1 cpd.sandbox = "/non-/existing/directory..." cpd.cleanup = True pilot = pm.submit_pilots(descriptions=cpd) pilot.wait(timeout=300) assert pilot.state == rp.FAILED, "State is '%s' instead of 'Failed'." % pilot.state cpd = rp.ComputePilotDescription() cpd.resource = "local.localhost" cpd.cores = 100000000000 # This should fail - at least in 2014 ;-) cpd.runtime = 1 cpd.sandbox = "/tmp/rp.sandbox.unittests" cpd.cleanup = True pilot = pm.submit_pilots(descriptions=cpd) pilot.wait(timeout=300) assert pilot.state == rp.FAILED, ("state should be %s and not %s" % (rp.FAILED, pilot.state)) finally: session.close()
def setUp(self): """ Getting the resources is slow, to avoid calling it for each test use setUpClass() and store the result as class variable """ # Set-up the resource, hard-coding 'localhost' for now... self.resource = 'local.localhost' # Create a new session. No need to try/except this: if session creation # fails, there is not much we can do anyways... self.session = rp.Session() # Add a Pilot Manager. Pilot managers manage one or more ComputePilots. self.pmgr = rp.PilotManager(session=self.session) # Create a UnitManager object. self.umgr = rp.UnitManager(session=self.session) # Define an [n]-core local pilot that runs for [x] minutes # Here we use a dict to initialize the description object self.pd_init = { 'resource': self.resource, 'runtime': 15, # pilot runtime (min) 'exit_on_error': True, 'project': self.config[self.resource]['project'], 'queue': self.config[self.resource]['queue'], 'access_schema': self.config[self.resource]['schema'], 'cores': self.config[self.resource]['cores'], }
def test__pilot_cancel(self): """ Test if we can cancel a pilot. """ session = rp.Session() try: pm = rp.PilotManager(session=session) cpd = rp.ComputePilotDescription() cpd.resource = "local.localhost" cpd.cores = 1 cpd.runtime = 1 cpd.sandbox = "/tmp/rp.sandbox.unittests" cpd.cleanup = True pilot = pm.submit_pilots(descriptions=cpd) assert pilot is not None assert pilot.start_time is None assert pilot.stop_time is None pilot.wait(state=[rp.PMGR_ACTIVE, rp.FAILED], timeout=300) assert pilot.submission_time is not None assert pilot.state == rp.PMGR_ACTIVE assert pilot.start_time is not None # the pilot should finish after it has reached run_time pilot.cancel() pilot.wait(timeout=300) assert pilot.state == rp.CANCELED assert pilot.stop_time is not None finally: session.close()
def test__pilotmanager_list_pilots_after_reconnect(self): """ Test if listing pilots after a reconnect works as expected. """ session = rp.Session() pm1 = rp.PilotManager(session=session) assert len(pm1.list_pilots()) == 0, "Wrong number of pilots returned." pm2 = rp.PilotManager(session=session) assert len(pm2.list_pilots()) == 0, "Wrong number of pilots returned." for i in range(0, 2): cpd = rp.ComputePilotDescription() cpd.resource = "local.localhost" cpd.cores = 1 cpd.runtime = 1 cpd.sandbox = "/tmp/rp.sandbox.unittests" cpd.cleanup = True pm1.submit_pilots(descriptions=cpd) pm2.submit_pilots(descriptions=cpd) assert len(pm1.list_pilots()) == 2, "Wrong number of pilots returned." assert len(pm2.list_pilots()) == 2, "Wrong number of pilots returned." pm1_r = session.get_pilot_managers(pilot_manager_ids=pm1.uid) pm2_r = session.get_pilot_managers(pilot_manager_ids=pm2.uid) assert len( pm1_r.list_pilots()) == 2, "Wrong number of pilots returned." assert len( pm2_r.list_pilots()) == 2, "Wrong number of pilots returned." session.close()
def rp_setup_short(request): session = rp.Session(database_url=db_url) try: pmgr = rp.PilotManager(session=session) umgr = rp.UnitManager(session=session, scheduler=rp.SCHED_DIRECT_SUBMISSION) pdesc = rp.ComputePilotDescription() pdesc.resource = "local.localhost" pdesc.runtime = 1 pdesc.cores = 1 pdesc.sandbox = "/tmp/radical.pilot.sandbox.unittests" pdesc.cleanup = True pilot = pmgr.submit_pilots(pdesc) pilot.register_callback(pilot_state_cb) umgr.add_pilots(pilot) except Exception as e: print 'test failed' raise def fin(): pmgr.cancel_pilots() pmgr.wait_pilots() print 'closing session' session.close() request.addfinalizer(fin) return pilot, pmgr, umgr
def __init__(self, log=None, rep=None, prof=None): if log: self._log = log else: self._log = ru.Logger('radical.nge') if rep: self._rep = log else: self._rep = ru.Reporter('radical.nge') if prof: self._prof = prof else: self._prof = ru.Profiler('radical.nge') self._session = rp.Session() self._pmgr = rp.PilotManager(self._session) self._umgr = rp.UnitManager(self._session) self._pmgr.register_callback(self._pilot_state_cb) self._umgr.register_callback(self._unit_state_cb) # create a dir for data staging self._pwd = os.getcwd() self._data = 'data.%s' % self._session.uid os.makedirs('%s/%s/' % (self._pwd, self._data)) # track submitted tasks self._tcnt = 0 self._tasks = dict()
def test__pilotmanager_wait(self): """Test if wait() waits until all (2) pilots have reached 'DONE' state. """ session = rp.Session() pmgr = rp.PilotManager(session=session) cpd1 = rp.ComputePilotDescription() cpd1.resource = "local.localhost" cpd1.cores = 1 cpd1.runtime = 1 cpd1.sandbox = "/tmp/rp.sandbox.unittests" cpd1.cleanup = True cpd2 = rp.ComputePilotDescription() cpd2.resource = "local.localhost" cpd2.cores = 1 cpd2.runtime = 2 cpd2.sandbox = "/tmp/rp.sandbox.unittests" cpd2.cleanup = True pilots = pmgr.submit_pilots([cpd1, cpd2]) pmgr.wait_pilots(timeout=300) for pilot in pilots: assert pilot.state == rp.DONE, "Expected state 'Done' but state is %s" % pilot.state assert pilot.stop_time is not None assert pilot.start_time is not None session.close()
def test_create_task_from_cu(): """ **Purpose**: Test if the 'create_task_from_cu' function generates a Task with the correct uid, parent_stage and parent_pipeline from a RP ComputeUnit """ session = rp.Session(dburl=MLAB) umgr = rp.UnitManager(session=session) cud = rp.ComputeUnitDescription() cud.name = 'uid, name, parent_stage_uid, parent_stage_name, ' \ 'parent_pipeline_uid, parent_pipeline_name' cud.executable = '/bin/echo' cu = rp.ComputeUnit(umgr, cud) t = create_task_from_cu(cu) assert t.uid == 'uid' assert t.name == 'name' assert t.parent_stage['uid'] == 'parent_stage_uid' assert t.parent_stage['name'] == 'parent_stage_name' assert t.parent_pipeline['uid'] == 'parent_pipeline_uid' assert t.parent_pipeline['name'] == 'parent_pipeline_name' session.close()
def test__issue_114_part_3(self): """ https://github.com/radical-cybertools/radical.pilot/issues/114 """ session = rp.Session(database_url=DBURL, database_name=DBNAME) pm = rp.PilotManager(session=session) cpd = rp.ComputePilotDescription() cpd.resource = "local.localhost" cpd.cores = 1 cpd.runtime = 1 cpd.sandbox = "/tmp/radical.pilot.sandbox.unittests" cpd.cleanup = True pilot = pm.submit_pilots(pilot_descriptions=cpd) um = rp.UnitManager(session=session, scheduler=rp.SCHED_DIRECT_SUBMISSION) um.add_pilots(pilot) state = pm.wait_pilots(state=[rp.PMGR_ACTIVE, rp.DONE, rp.FAILED], timeout=10 * 60) assert state == [rp.PMGR_ACTIVE], 'state : %s' % state assert pilot.state == rp.PMGR_ACTIVE, 'pilot state: %s' % pilot.state state = pm.wait_pilots(timeout=3 * 60) assert state == [rp.DONE], 'state : %s' % state assert pilot.state == rp.DONE, 'pilot state: %s' % pilot.state session.close()
def setUp(): # Add SAGA method to only create directories on remote - don't transfer yet! session = rp.Session() # Get FS endpoint from session object filesystem_endpoint = session.get_resource_config( resource_name)[access_schema]["filesystem_endpoint"] # Get default rwd from session object and parse it default_rwd = parse_rwd( session.get_resource_config(resource_name)["default_remote_workdir"], filesystem_endpoint) # Get the remote sandbox path from rp config files and # reproduce same folder structure as during execution rp_sandbox = os.path.join(default_rwd, 'radical.pilot.sandbox') session_sandbox = os.path.join(rp_sandbox, session_id) pilot_sandbox = os.path.join(session_sandbox, 'pilot.0000') # Unit Configuration unit = dict() unit['uid'] = 'unit.00000' unit['resource_sandbox'] = session_sandbox unit['pilot_sandbox'] = pilot_sandbox unit['unit_sandbox'] = os.path.join(pilot_sandbox, 'unit.00000') # Create unit folder on remote - don't transfer yet! remote_dir = rs.filesystem.Directory(filesystem_endpoint, session=session) remote_dir.make_dir(unit['unit_sandbox'], flags=rsf.CREATE_PARENTS) return unit, session
def rp_setup_state(request): session = rp.Session(database_url=db_url) try: pmgr = rp.PilotManager(session=session) umgr = rp.UnitManager(session=session, scheduler=rp.SCHED_DIRECT_SUBMISSION, output_transfer_workers=4, input_transfer_workers=4) pdesc = rp.ComputePilotDescription() pdesc.resource = "local.localhost" pdesc.runtime = 20 pdesc.cores = 1 pdesc.cleanup = True pilot = pmgr.submit_pilots(pdesc) pilot.register_callback(pilot_state_cb) umgr.add_pilots(pilot) except Exception as e: print 'test failed' raise def fin(): print 'closing session' session.close() request.addfinalizer(fin) return pilot, pmgr, umgr
def __init__(self, descr: dict, executor: jpsi.JobExecutor, url: str) -> None: jpsi.ExecutorAdaptorBase.__init__(self, descr, executor, url) self._url = ru.Url(url) if self._url.schema != 'rp': raise ValueError('handle only rp:// URLs, not %s', self._url) try: self._jobs = dict() # {job.uid : [JPSI_JOB, RP_TASK] self._lock = mt.Lock() self._session = rp.Session() self._pmgr = rp.PilotManager(session=self._session) self._tmgr = rp.TaskManager(session=self._session) self._pmgr.register_callback(self._pilot_state_cb) self._tmgr.register_callback(self._task_state_cb) # this is layer 0, so we just create a dummy pilot pd = rp.PilotDescription({ 'resource': 'local.localhost', 'cores': 16, 'runtime': 60 }) self._pilot = self._pmgr.submit_pilots(pd) self._tmgr.add_pilots(self._pilot) except Exception: self._log.exception('init failed') raise
def test_runtime_bad_uid(pilot_description): with warnings.catch_warnings(): warnings.filterwarnings('ignore', category=DeprecationWarning, module='radical.pilot.task_manager') warnings.filterwarnings('ignore', category=DeprecationWarning, module='radical.pilot.db.database') warnings.filterwarnings('ignore', category=DeprecationWarning, module='radical.pilot.session') session = rp.Session() with session: state = Runtime(session=session) with pytest.raises(ValueError): state.task_manager('spam') tmgr = rp.TaskManager(session=session) state.task_manager(tmgr) with pytest.raises(ValueError): state.pilot_manager('spam') pmgr = rp.PilotManager(session=session) state.pilot_manager(pmgr) with pytest.raises(ValueError): state.pilot_manager('spam') tmgr.close() pmgr.close() assert session.closed
def _new_session(): # Note: radical.pilot.Session creation causes several deprecation warnings. # Ref https://github.com/radical-cybertools/radical.pilot/issues/2185 with warnings.catch_warnings(): warnings.simplefilter('ignore', category=DeprecationWarning) session = rp.Session() logger.info(f'Created {session.uid}') return session
def test_pass_issue258(): session = rp.Session(database_url=db_url) with pytest.raises(KeyError): pmgr = rp.PilotManager(session=session) pmgr.wait_pilots(pilot_ids="12", state=rp.ACTIVE) session.close()
def setUp(): session = rp.Session() cfg = session.get_resource_config(resource='ornl.summitdev') cfg["cores"] = 40 os.environ['LSB_DJOB_HOSTFILE'] = '%s/sample_summitdev_hostfile' \ % os.path.dirname(__file__) return cfg, session
def setUp(self): self._session = rp.Session(uid=session_id) self._cu = dict() self._cu['description'] = { "arguments": [], "cleanup": False, "cpu_process_type": '', "cpu_processes": 1, "cpu_thread_type": "OpenMP", "cpu_threads": 1, "environment": {}, "executable": "test_exe", "gpu_process_type": None, "gpu_processes": 0, "gpu_thread_type": None, "gpu_threads": 0, "input_staging": [], "kernel": None, "name": None, "output_staging": [], "pilot": None, "post_exec": [], "pre_exec": [], "restartable": False, "stderr": None, "stdout": None } self._cu['uid'] = 'unit.000000' self._cu['slots'] = { 'nodes': [{ 'name': 'node1', 'uid': 'node_1', 'core_map': [[0]], 'gpu_map': [], 'lfs': { 'size': 10, 'path': '/tmp' } }], 'cores_per_node': 16, 'gpus_per_node': 0, 'lfs_per_node': 100, 'lm_info': { 'dvm_uri': 'test' } } return
def test__session_create(self): """ Tests if creating a new session works as epxected. """ for _ in range(1, 4): session = rp.Session() client = MongoClient(DBURL) collections = client[DBNAME].collection_names() assert len(collections) == 4, "Wrong number of sessions in database" session.close()
def test__add_resource_config_1(self): """ Test if we can wait for different pilot states. """ session = rp.Session() rc = rp.ResourceConfig('test') session.add_resource_config(rc) session.get_resource_config('test') session.close()
def test_pass_issue_57(): for i in [16, 32, 64]: session = rp.Session(database_url=db_url) try: c = rp.Context('ssh') c.user_id = CONFIG["xsede.stampede"]["user_id"] session.add_context(c) pmgr = rp.PilotManager(session=session) umgr = rp.UnitManager(session=session, scheduler=rp.SCHED_ROUND_ROBIN) pdesc = rp.ComputePilotDescription() pdesc.resource = "xsede.stampede" pdesc.project = CONFIG["xsede.stampede"]["project"] pdesc.cores = i pdesc.runtime = 20 pdesc.cleanup = False pilots = pmgr.submit_pilots(pdesc) umgr.add_pilots(pilots) unit_descrs = [] for k in range(0, i * 2): cu = rp.ComputeUnitDescription() cu.cores = 1 cu.executable = "/bin/date" unit_descrs.append(cu) units = umgr.submit_units(unit_descrs) try: umgr.wait_units() for unit in units: unit.wait() except: pass pmgr.cancel_pilots() pmgr.wait_pilots() except Exception as e: print "TEST FAILED" raise finally: session.close()
def setUp(): session = rp.Session() config = { 'lrms_info': { 'lm_info': 'INFO', 'n_nodes': 2, 'cores_per_node': 4, 'gpus_per_node': 2, 'node_list': [['0', 0], ['1', 1]] } } return config, session
def test__unitmanager_pilot_assoc(self): """ Test if unit manager <-> pilot association works as expected. """ session = rp.Session() pm = rp.PilotManager(session=session) cpd = rp.ComputePilotDescription() cpd.resource = "local.localhost" cpd.cores = 1 cpd.runtime = 1 cpd.sandbox = "/tmp/rp.sandbox.unittests" cpd.cleanup = True p1 = pm.submit_pilots(descriptions=cpd) um = rp.UnitManager(session=session, scheduler='round_robin') assert um.list_pilots() == [], "Wrong list of pilots" um.add_pilots(p1) assert um.list_pilots() == [p1.uid], "Wrong list of pilots" # adding the same pilot twice should be ignored um.add_pilots(p1) assert um.list_pilots() == [p1.uid], "Wrong list of pilots" um.remove_pilots(p1.uid) assert um.list_pilots() == [], "Wrong list of pilots" pilot_list = [] for x in range(0, 2): cpd = rp.ComputePilotDescription() cpd.resource = "local.localhost" cpd.cores = 1 cpd.runtime = 1 cpd.sandbox = "/tmp/rp.sandbox.unittests" cpd.cleanup = True p = pm.submit_pilots(descriptions=cpd) um.add_pilots(p) pilot_list.append(p) pl = um.list_pilots() assert len(pl) == 2, "Wrong number of associated pilots" for l in pilot_list: assert l in pilot_list, "Unknown pilot in list" um.remove_pilots(l.uid) assert um.list_pilots() == [], "Wrong list of pilots" session.close()
def setUp(): # Add SAGA method to only create directories on remote - don't transfer yet! session = rp.Session() cfg = dict() cfg['lrms_info'] = dict() cfg['lrms_info']['lm_info'] = 'INFO' cfg['lrms_info']['cores_per_node'] = 2 cfg['lrms_info']['gpus_per_node'] = 1 cfg['lrms_info']['lfs_per_node'] = {'size': 5120, 'path': 'abc'} cfg['lrms_info']['node_list'] = [['a', 1], ['b', 2], ['c', 3], ['d', 4], ['e', 5]] return cfg, session
def test_fail_session_ctx(): s1 = None s2 = None try: s1 = rp.Session(database_url=db_url, database_name='rp-testing') print "Session 1: %s (%d)" % (s1.uid, len(s1.list_contexts())) c1 = rp.Context('ssh') c1.user_id = "tg802352" print 'context 1: %s' % c1 s1.add_context(c1) c2 = rp.Context('ssh') c2.user_id = "abcedesds" print 'context 2: %s' % c2 s1.add_context(c2) for c in s1.list_contexts(): print c s2 = rp.Session(uid=s1.uid) print "Session 2: %s (%d)" % (s2.uid, len(s2.list_contexts())) for c in s2.list_contexts(): print c assert (len(s1.list_contexts()) == len(s2.list_contexts())) except Exception as e: raise finally: if s1: s1.close() if s2: s2.close()
def setUp(): # Add SAGA method to only create directories on remote - don't transfer yet! session = rp.Session() # Get FS endpoint from session object filesystem_endpoint = session.get_resource_config( resource_name)[access_schema]["filesystem_endpoint"] # Get default rwd from session object and parse it default_rwd = parse_rwd( session.get_resource_config(resource_name)["default_remote_workdir"], filesystem_endpoint) # Get the remote sandbox path from rp config files and # reproduce same folder structure as during execution rp_sandbox = os.path.join(default_rwd, 'radical.pilot.sandbox') session_sandbox = os.path.join(rp_sandbox, session_id) pilot_sandbox = os.path.join(session_sandbox, 'pilot.0000') # Unit Configuration unit = dict() unit['uid'] = 'unit.00000' unit['target_state'] = 'DONE' unit['resource_sandbox'] = session_sandbox unit['pilot_sandbox'] = pilot_sandbox unit['unit_sandbox'] = os.path.join(pilot_sandbox, 'unit.00000') # Create unit folder on remote - don't transfer yet! remote_dir = rs.filesystem.Directory(filesystem_endpoint, session=session) remote_dir.make_dir(unit['unit_sandbox'], flags=rsf.CREATE_PARENTS) remote_dir = rs.filesystem.Directory(unit['unit_sandbox'], session=session) # Move all files and folders into unit sandbox src_file = rs.filesystem.File(os.path.join(local_sample_data, sample_data[0]), session=session) src_file.copy(remote_dir.url.path) src_dir1 = rs.filesystem.File(os.path.join(local_sample_data, sample_data[1]), session=session) src_dir1.copy(remote_dir.url.path, rsf.CREATE_PARENTS | rsf.RECURSIVE) src_dir2 = rs.filesystem.File(os.path.join(local_sample_data, sample_data[2]), session=session) src_dir2.copy(remote_dir.url.path, rsf.CREATE_PARENTS | rsf.RECURSIVE) return unit, session