def test__pilotmanager_list_pilots_after_reconnect(self): """ Test if listing pilots after a reconnect works as expected. """ session = rp.Session() pm1 = rp.PilotManager(session=session) assert len(pm1.list_pilots()) == 0, "Wrong number of pilots returned." pm2 = rp.PilotManager(session=session) assert len(pm2.list_pilots()) == 0, "Wrong number of pilots returned." for i in range(0, 2): cpd = rp.ComputePilotDescription() cpd.resource = "local.localhost" cpd.cores = 1 cpd.runtime = 1 cpd.sandbox = "/tmp/rp.sandbox.unittests" cpd.cleanup = True pm1.submit_pilots(descriptions=cpd) pm2.submit_pilots(descriptions=cpd) assert len(pm1.list_pilots()) == 2, "Wrong number of pilots returned." assert len(pm2.list_pilots()) == 2, "Wrong number of pilots returned." pm1_r = session.get_pilot_managers(pilot_manager_ids=pm1.uid) pm2_r = session.get_pilot_managers(pilot_manager_ids=pm2.uid) assert len( pm1_r.list_pilots()) == 2, "Wrong number of pilots returned." assert len( pm2_r.list_pilots()) == 2, "Wrong number of pilots returned." session.close()
def test_runtime_mismatch(pilot_description): with warnings.catch_warnings(): warnings.filterwarnings('ignore', category=DeprecationWarning, module='radical.pilot.task_manager') warnings.filterwarnings('ignore', category=DeprecationWarning, module='radical.pilot.db.database') warnings.filterwarnings('ignore', category=DeprecationWarning, module='radical.pilot.session') session = rp.Session() with session: original_pmgr = rp.PilotManager(session=session) pilot = original_pmgr.submit_pilots(rp.PilotDescription(pilot_description)) original_tmgr = rp.TaskManager(session=session) original_tmgr.add_pilots(pilot) assert session.closed # This assertion may not be true: # assert pilot.state in rp.FINAL # Note that Pilot and other components may still be shutting down, but the # intention is that, from this point, pmgr, pilot, and tmgr are now "stale". session = rp.Session() with session: state = Runtime(session=session) with pytest.raises(APIError): state.task_manager(original_tmgr) original_tmgr.close() tmgr = rp.TaskManager(session=session) state.task_manager(tmgr) with pytest.raises(APIError): state.pilot_manager(original_pmgr) original_pmgr.close() pmgr = rp.PilotManager(session=session) state.pilot_manager(pmgr) # The UID will not resolve in the stored PilotManager. with pytest.raises(ValueError): state.pilot(pilot.uid) # The Pilot is detectably invalid. with pytest.raises(APIError): state.pilot(pilot) # Even here, the old Pilot may still be in 'PMGR_ACTIVE_PENDING' if pilot.state not in rp.FINAL: pilot.cancel() tmgr.close() pmgr.close() assert session.closed
def test__pilot_errors(self): """ Test if pilot errors are raised properly. """ session = rp.Session() try: pm = rp.PilotManager(session=session) cpd = rp.ComputePilotDescription() cpd.resource = "local.localhost" cpd.cores = 1 cpd.runtime = 1 cpd.sandbox = "/non-/existing/directory..." cpd.cleanup = True pilot = pm.submit_pilots(descriptions=cpd) pilot.wait(timeout=300) assert pilot.state == rp.FAILED, "State is '%s' instead of 'Failed'." % pilot.state cpd = rp.ComputePilotDescription() cpd.resource = "local.localhost" cpd.cores = 100000000000 # This should fail - at least in 2014 ;-) cpd.runtime = 1 cpd.sandbox = "/tmp/rp.sandbox.unittests" cpd.cleanup = True pilot = pm.submit_pilots(descriptions=cpd) pilot.wait(timeout=300) assert pilot.state == rp.FAILED, ("state should be %s and not %s" % (rp.FAILED, pilot.state)) finally: session.close()
def test__pilot_cancel(self): """ Test if we can cancel a pilot. """ session = rp.Session() try: pm = rp.PilotManager(session=session) cpd = rp.ComputePilotDescription() cpd.resource = "local.localhost" cpd.cores = 1 cpd.runtime = 1 cpd.sandbox = "/tmp/rp.sandbox.unittests" cpd.cleanup = True pilot = pm.submit_pilots(descriptions=cpd) assert pilot is not None assert pilot.start_time is None assert pilot.stop_time is None pilot.wait(state=[rp.PMGR_ACTIVE, rp.FAILED], timeout=300) assert pilot.submission_time is not None assert pilot.state == rp.PMGR_ACTIVE assert pilot.start_time is not None # the pilot should finish after it has reached run_time pilot.cancel() pilot.wait(timeout=300) assert pilot.state == rp.CANCELED assert pilot.stop_time is not None finally: session.close()
def __init__(self, descr: dict, executor: jpsi.JobExecutor, url: str) -> None: jpsi.ExecutorAdaptorBase.__init__(self, descr, executor, url) self._url = ru.Url(url) if self._url.schema != 'rp': raise ValueError('handle only rp:// URLs, not %s', self._url) try: self._jobs = dict() # {job.uid : [JPSI_JOB, RP_TASK] self._lock = mt.Lock() self._session = rp.Session() self._pmgr = rp.PilotManager(session=self._session) self._tmgr = rp.TaskManager(session=self._session) self._pmgr.register_callback(self._pilot_state_cb) self._tmgr.register_callback(self._task_state_cb) # this is layer 0, so we just create a dummy pilot pd = rp.PilotDescription({ 'resource': 'local.localhost', 'cores': 16, 'runtime': 60 }) self._pilot = self._pmgr.submit_pilots(pd) self._tmgr.add_pilots(self._pilot) except Exception: self._log.exception('init failed') raise
def test__issue_114_part_3(self): """ https://github.com/radical-cybertools/radical.pilot/issues/114 """ session = rp.Session(database_url=DBURL, database_name=DBNAME) pm = rp.PilotManager(session=session) cpd = rp.ComputePilotDescription() cpd.resource = "local.localhost" cpd.cores = 1 cpd.runtime = 1 cpd.sandbox = "/tmp/radical.pilot.sandbox.unittests" cpd.cleanup = True pilot = pm.submit_pilots(pilot_descriptions=cpd) um = rp.UnitManager(session=session, scheduler=rp.SCHED_DIRECT_SUBMISSION) um.add_pilots(pilot) state = pm.wait_pilots(state=[rp.PMGR_ACTIVE, rp.DONE, rp.FAILED], timeout=10 * 60) assert state == [rp.PMGR_ACTIVE], 'state : %s' % state assert pilot.state == rp.PMGR_ACTIVE, 'pilot state: %s' % pilot.state state = pm.wait_pilots(timeout=3 * 60) assert state == [rp.DONE], 'state : %s' % state assert pilot.state == rp.DONE, 'pilot state: %s' % pilot.state session.close()
def setUp(self): """ Getting the resources is slow, to avoid calling it for each test use setUpClass() and store the result as class variable """ # Set-up the resource, hard-coding 'localhost' for now... self.resource = 'local.localhost' # Create a new session. No need to try/except this: if session creation # fails, there is not much we can do anyways... self.session = rp.Session() # Add a Pilot Manager. Pilot managers manage one or more ComputePilots. self.pmgr = rp.PilotManager(session=self.session) # Create a UnitManager object. self.umgr = rp.UnitManager(session=self.session) # Define an [n]-core local pilot that runs for [x] minutes # Here we use a dict to initialize the description object self.pd_init = { 'resource': self.resource, 'runtime': 15, # pilot runtime (min) 'exit_on_error': True, 'project': self.config[self.resource]['project'], 'queue': self.config[self.resource]['queue'], 'access_schema': self.config[self.resource]['schema'], 'cores': self.config[self.resource]['cores'], }
def rp_setup_state(request): session = rp.Session(database_url=db_url) try: pmgr = rp.PilotManager(session=session) umgr = rp.UnitManager(session=session, scheduler=rp.SCHED_DIRECT_SUBMISSION, output_transfer_workers=4, input_transfer_workers=4) pdesc = rp.ComputePilotDescription() pdesc.resource = "local.localhost" pdesc.runtime = 20 pdesc.cores = 1 pdesc.cleanup = True pilot = pmgr.submit_pilots(pdesc) pilot.register_callback(pilot_state_cb) umgr.add_pilots(pilot) except Exception as e: print 'test failed' raise def fin(): print 'closing session' session.close() request.addfinalizer(fin) return pilot, pmgr, umgr
def rp_setup_short(request): session = rp.Session(database_url=db_url) try: pmgr = rp.PilotManager(session=session) umgr = rp.UnitManager(session=session, scheduler=rp.SCHED_DIRECT_SUBMISSION) pdesc = rp.ComputePilotDescription() pdesc.resource = "local.localhost" pdesc.runtime = 1 pdesc.cores = 1 pdesc.sandbox = "/tmp/radical.pilot.sandbox.unittests" pdesc.cleanup = True pilot = pmgr.submit_pilots(pdesc) pilot.register_callback(pilot_state_cb) umgr.add_pilots(pilot) except Exception as e: print 'test failed' raise def fin(): pmgr.cancel_pilots() pmgr.wait_pilots() print 'closing session' session.close() request.addfinalizer(fin) return pilot, pmgr, umgr
def test__add_resource_config_2(self): """ Test if we can wait for different pilot states. """ session = rp.Session() rc = rp.ResourceConfig("mylocalhost") rc.task_launch_method = "LOCAL" rc.mpi_launch_method = "MPIRUN" rc.job_manager_endpoint = "fork://localhost" rc.filesystem_endpoint = "file://localhost/" rc.bootstrapper = "default_bootstrapper.sh" pm = rp.PilotManager(session=session) session.add_resource_config(rc) pd = rp.ComputePilotDescription() pd.resource = "mylocalhost" pd.cores = 1 pd.runtime = 1 pd.sandbox = "/tmp/rp.sandbox.unittests" pd.cleanup = True pilot = pm.submit_pilots(pd) pilot.wait(timeout=300) pilot.cancel() session.close()
def test_runtime_bad_uid(pilot_description): with warnings.catch_warnings(): warnings.filterwarnings('ignore', category=DeprecationWarning, module='radical.pilot.task_manager') warnings.filterwarnings('ignore', category=DeprecationWarning, module='radical.pilot.db.database') warnings.filterwarnings('ignore', category=DeprecationWarning, module='radical.pilot.session') session = rp.Session() with session: state = Runtime(session=session) with pytest.raises(ValueError): state.task_manager('spam') tmgr = rp.TaskManager(session=session) state.task_manager(tmgr) with pytest.raises(ValueError): state.pilot_manager('spam') pmgr = rp.PilotManager(session=session) state.pilot_manager(pmgr) with pytest.raises(ValueError): state.pilot_manager('spam') tmgr.close() pmgr.close() assert session.closed
def __init__(self, log=None, rep=None, prof=None): if log: self._log = log else: self._log = ru.Logger('radical.nge') if rep: self._rep = log else: self._rep = ru.Reporter('radical.nge') if prof: self._prof = prof else: self._prof = ru.Profiler('radical.nge') self._session = rp.Session() self._pmgr = rp.PilotManager(self._session) self._umgr = rp.UnitManager(self._session) self._pmgr.register_callback(self._pilot_state_cb) self._umgr.register_callback(self._unit_state_cb) # create a dir for data staging self._pwd = os.getcwd() self._data = 'data.%s' % self._session.uid os.makedirs('%s/%s/' % (self._pwd, self._data)) # track submitted tasks self._tcnt = 0 self._tasks = dict()
def test__pilotmanager_wait(self): """Test if wait() waits until all (2) pilots have reached 'DONE' state. """ session = rp.Session() pmgr = rp.PilotManager(session=session) cpd1 = rp.ComputePilotDescription() cpd1.resource = "local.localhost" cpd1.cores = 1 cpd1.runtime = 1 cpd1.sandbox = "/tmp/rp.sandbox.unittests" cpd1.cleanup = True cpd2 = rp.ComputePilotDescription() cpd2.resource = "local.localhost" cpd2.cores = 1 cpd2.runtime = 2 cpd2.sandbox = "/tmp/rp.sandbox.unittests" cpd2.cleanup = True pilots = pmgr.submit_pilots([cpd1, cpd2]) pmgr.wait_pilots(timeout=300) for pilot in pilots: assert pilot.state == rp.DONE, "Expected state 'Done' but state is %s" % pilot.state assert pilot.stop_time is not None assert pilot.start_time is not None session.close()
def test__pilotmanager_create(self): """ Test if pilot manager creation works as expected. """ session = rp.Session() assert session.list_pilot_managers( ) == [], "Wrong number of pilot managers" pm = rp.PilotManager(session=session) assert session.list_pilot_managers() == [ pm.uid ], "Wrong list of pilot managers" pm = rp.PilotManager(session=session) assert len(session.list_pilot_managers() ) == 2, "Wrong number of pilot managers" session.close()
def test_pass_issue258(): session = rp.Session(database_url=db_url) with pytest.raises(KeyError): pmgr = rp.PilotManager(session=session) pmgr.wait_pilots(pilot_ids="12", state=rp.ACTIVE) session.close()
def test_pass_issue_57(): for i in [16, 32, 64]: session = rp.Session(database_url=db_url) try: c = rp.Context('ssh') c.user_id = CONFIG["xsede.stampede"]["user_id"] session.add_context(c) pmgr = rp.PilotManager(session=session) umgr = rp.UnitManager(session=session, scheduler=rp.SCHED_ROUND_ROBIN) pdesc = rp.ComputePilotDescription() pdesc.resource = "xsede.stampede" pdesc.project = CONFIG["xsede.stampede"]["project"] pdesc.cores = i pdesc.runtime = 20 pdesc.cleanup = False pilots = pmgr.submit_pilots(pdesc) umgr.add_pilots(pilots) unit_descrs = [] for k in range(0, i * 2): cu = rp.ComputeUnitDescription() cu.cores = 1 cu.executable = "/bin/date" unit_descrs.append(cu) units = umgr.submit_units(unit_descrs) try: umgr.wait_units() for unit in units: unit.wait() except: pass pmgr.cancel_pilots() pmgr.wait_pilots() except Exception as e: print "TEST FAILED" raise finally: session.close()
def test__pilotmanager_get_pilots(self): session = rp.Session() pm1 = rp.PilotManager(session=session) assert len(pm1.list_pilots()) == 0, "Wrong number of pilots returned." pm2 = rp.PilotManager(session=session) assert len(pm2.list_pilots()) == 0, "Wrong number of pilots returned." pm1_pilot_uids = [] pm2_pilot_uids = [] for i in range(0, 2): cpd = rp.ComputePilotDescription() cpd.resource = "local.localhost" cpd.cores = 1 cpd.runtime = 1 cpd.sandbox = "/tmp/rp.sandbox.unittests" cpd.cleanup = True pilot_pm1 = pm1.submit_pilots(descriptions=cpd) pm1_pilot_uids.append(pilot_pm1.uid) pilot_pm2 = pm2.submit_pilots(descriptions=cpd) pm2_pilot_uids.append(pilot_pm2.uid) for i in pm1.list_pilots(): pilot = pm1.get_pilots(i) assert pilot.uid in pm1_pilot_uids, "Wrong pilot ID %s (not in %s)" % ( pilot.uid, pm1_pilot_uids) assert len(pm1.get_pilots()) == 2, "Wrong number of pilots." for i in pm2.list_pilots(): pilot = pm2.get_pilots(i) assert pilot.uid in pm2_pilot_uids, "Wrong pilot ID %s" % pilot.uid assert len(pm2.get_pilots()) == 2, "Wrong number of pilots." session.close()
def _new_pilotmanager(session: rp.Session): with warnings.catch_warnings(): warnings.filterwarnings('ignore', category=DeprecationWarning, module='radical.pilot.task_manager') warnings.filterwarnings('ignore', category=DeprecationWarning, module='radical.pilot.db.database') warnings.filterwarnings('ignore', category=DeprecationWarning, module='radical.pilot.session') return rp.PilotManager(session=session)
def test__unitmanager_pilot_assoc(self): """ Test if unit manager <-> pilot association works as expected. """ session = rp.Session() pm = rp.PilotManager(session=session) cpd = rp.ComputePilotDescription() cpd.resource = "local.localhost" cpd.cores = 1 cpd.runtime = 1 cpd.sandbox = "/tmp/rp.sandbox.unittests" cpd.cleanup = True p1 = pm.submit_pilots(descriptions=cpd) um = rp.UnitManager(session=session, scheduler='round_robin') assert um.list_pilots() == [], "Wrong list of pilots" um.add_pilots(p1) assert um.list_pilots() == [p1.uid], "Wrong list of pilots" # adding the same pilot twice should be ignored um.add_pilots(p1) assert um.list_pilots() == [p1.uid], "Wrong list of pilots" um.remove_pilots(p1.uid) assert um.list_pilots() == [], "Wrong list of pilots" pilot_list = [] for x in range(0, 2): cpd = rp.ComputePilotDescription() cpd.resource = "local.localhost" cpd.cores = 1 cpd.runtime = 1 cpd.sandbox = "/tmp/rp.sandbox.unittests" cpd.cleanup = True p = pm.submit_pilots(descriptions=cpd) um.add_pilots(p) pilot_list.append(p) pl = um.list_pilots() assert len(pl) == 2, "Wrong number of associated pilots" for l in pilot_list: assert l in pilot_list, "Unknown pilot in list" um.remove_pilots(l.uid) assert um.list_pilots() == [], "Wrong list of pilots" session.close()
def test__issue_262(self): """ https://github.com/radical-cybertools/radical.pilot/issues/18 """ session = rp.Session() pmgr = rp.PilotManager(session=session) # Create a local pilot with a million cores. This will most likely # fail as not enough cores will be available. That means the pilot will # go quickly into failed state, and trigger the callback from above. pd = rp.ComputePilotDescription() pd.resource = "local.localhost" pd.cores = 1 pd.runtime = 1 pilot = pmgr.submit_pilots(pd) umgr = rp.UnitManager( session=session, scheduler=rp.SCHED_DIRECT_SUBMISSION) umgr.add_pilots(pilot) cud = rp.ComputeUnitDescription() cud.executable = "/bin/sleep" cud.arguments = ["10"] cud.cores = 1 cud.input_staging = ["/etc/group"] unit = umgr.submit_units(cud) umgr.wait_units() for log_entry in pilot.log: ld = log_entry.as_dict() assert "timestamp" in ld assert "message" in ld s = "%s" % log_entry assert type(s) == unicode for log_entry in unit.log: ld = log_entry.as_dict() assert "timestamp" in ld assert "message" in ld s = "%s" % log_entry assert type(s) == unicode session.close()
def start_pilot(cr=None): """ In order to start a pilot on the newly created CR, we need to define a resource description for that CR. To do so, we programatically create a clone of the local.localhost description, and replace the job submission URL with an ssh:// URL pointing to the CR. """ if not cr: class _CR(object): def __init__(self): self.access = 'ssh://remote.host.net:1234/' cr = _CR() # get the local resource config session = rp.Session() cfg = session.get_resource_config('local.localhost') # create a new config based on the local one, and add it back new_cfg = rp.ResourceConfig('ec2.vm', cfg) new_cfg.schemas = ['ssh'] new_cfg['ssh']['job_manager_endpoint'] = cr.access new_cfg['ssh']['filesystem_endpoint'] = cr.access # the new config needs to make sure we can bootstrap on the VM new_cfg['pre_bootstrap_1'] = [ 'sudo apt-get update', 'sudo apt-get install -y python-virtualenv python-dev dnsutils bc' ] session.add_resource_config(new_cfg) # use the *same* ssh key for ssh access to the VM ssh_ctx = rs.Context('SSH') ssh_ctx.user_id = 'admin' ssh_ctx.user_key = os.environ['EC2_KEYPAIR'] session.contexts.append(ssh_ctx) # submit a pilot to it. pd = rp.ComputePilotDescription() pd.resource = 'ec2.vm' pd.runtime = 10 pd.cores = 1 pd.exit_on_error = True, pmgr = rp.PilotManager(session=session) return pmgr.submit_pilots(pd)
def setup_gordon(request): session1 = rp.Session() print "session id gordon: {0}".format(session1.uid) c = rp.Context('ssh') c.user_id = CONFIG["xsede.gordon"]["user_id"] session1.add_context(c) try: pmgr1 = rp.PilotManager(session=session1) print "pm id gordon: {0}".format(pmgr1.uid) umgr1 = rp.UnitManager (session=session1, scheduler=rp.SCHEDULER_DIRECT_SUBMISSION) pdesc1 = rp.ComputePilotDescription() pdesc1.resource = "xsede.gordon" pdesc1.project = CONFIG["xsede.gordon"]["project"] pdesc1.runtime = 30 pdesc1.cores = 16 pdesc1.cleanup = False pilot1 = pmgr1.submit_pilots(pdesc1) pilot1.register_callback(pilot_state_cb) umgr1.add_pilots(pilot1) except Exception as e: print 'test failed' raise def fin(): print "finalizing..." pmgr1.cancel_pilots() pmgr1.wait_pilots() print 'closing session' session1.close() time.sleep(5) request.addfinalizer(fin) return session1, pilot1, pmgr1, umgr1, "xsede.gordon"
def test__issue_114_part_1(self): """ https://github.com/radical-cybertools/radical.pilot/issues/114 """ session = rp.Session(database_url=DBURL, database_name=DBNAME) pm = rp.PilotManager(session=session) cpd = rp.ComputePilotDescription() cpd.resource = "local.localhost" cpd.cores = 1 cpd.runtime = 5 cpd.sandbox = "/tmp/radical.pilot.sandbox.unittests" cpd.cleanup = True pilot = pm.submit_pilots(pilot_descriptions=cpd) state = pm.wait_pilots(state=[rp.PMGR_ACTIVE, rp.DONE, rp.FAILED], timeout=5 * 60) assert (pilot.state == rp.PMGR_ACTIVE), "pilot state: %s" % pilot.state um = rp.UnitManager(session=session, scheduler=rp.SCHED_DIRECT_SUBMISSION) um.add_pilots(pilot) all_tasks = [] for i in range(0, 2): cudesc = rp.ComputeUnitDescription() cudesc.cores = 1 cudesc.executable = "/bin/sleep" cudesc.arguments = ['60'] all_tasks.append(cudesc) units = um.submit_units(all_tasks) states = um.wait_units(state=[rp.SCHEDULING, rp.AGENT_EXECUTING], timeout=2 * 60) assert rp.SCHEDULING in states, "states: %s" % states states = um.wait_units(state=[rp.AGENT_EXECUTING, rp.DONE], timeout=1 * 60) assert rp.AGENT_EXECUTING in states, "states: %s" % states session.close()
def test_rp_basic_task(rp_config): rp = rp_config['rp'] # Note: Session creation will fail with a FileNotFound error unless venv is explicitly `activate`d. # TODO: Figure out what `activate` does that `rp-venv/bin/python` doesn't do. with rp.Session() as session: # Based on `radical.pilot/examples/config.json` # TODO: Does the Session have a default spec for 'local.localhost'? Can/should we reference it? # See also https://github.com/radical-cybertools/radical.pilot/issues/2181 resource = 'local.localhost' resource_config = {resource: {}} if resource in rp_config['config']: resource_config[resource].update(rp_config.config[resource]) resource_config[resource].update({ 'project': None, 'queue': None, 'schema': None, 'cores': 1, 'gpus': 0 }) pilot_description = dict(resource=resource, runtime=30, exit_on_error=True, project=resource_config[resource]['project'], queue=resource_config[resource]['queue'], cores=resource_config[resource]['cores'], gpus=resource_config[resource]['gpus']) task_description = { 'executable': '/bin/date', 'cpu_processes': 1, } pmgr = rp.PilotManager(session=session) umgr = rp.UnitManager(session=session) pilot = pmgr.submit_pilots( rp.ComputePilotDescription(pilot_description)) task = umgr.submit_units(rp.ComputeUnitDescription(task_description)) umgr.add_pilots(pilot) umgr.wait_units() assert task.exit_code == 0 assert session.closed
def setup_comet(request): session2 = rp.Session() print "session id comet: {0}".format(session2.uid) c = rp.Context('ssh') c.user_id = CONFIG["xsede.comet"]["user_id"] session2.add_context(c) try: pmgr2 = rp.PilotManager(session=session2) print "pm id gordon: {0}".format(pmgr2.uid) umgr2 = rp.UnitManager(session=session2, scheduler=rp.SCHED_DIRECT_SUBMISSION) pdesc2 = rp.ComputePilotDescription() pdesc2.resource = "xsede.comet" pdesc2.project = CONFIG["xsede.comet"]["project"] pdesc2.runtime = 30 pdesc2.cores = 24 pdesc2.cleanup = False pilot2 = pmgr2.submit_pilots(pdesc2) pilot2.register_callback(pilot_state_cb) umgr2.add_pilots(pilot2) except Exception as e: print 'test failed' raise def fin(): print "finalizing..." pmgr2.cancel_pilots() pmgr2.wait_pilots() print 'closing session' session2.close() request.addfinalizer(fin) return session2, pilot2, pmgr2, umgr2, "xsede.comet"
def setup_stampede_two(request): session3 = rp.Session() print "session id stampede: {0}".format(session3.uid) c = rp.Context('ssh') c.user_id = CONFIG["xsede.stampede"]["user_id"] session3.add_context(c) try: pmgr3 = rp.PilotManager(session=session3) print "pm id stampede: {0}".format(pmgr3.uid) umgr3 = rp.UnitManager(session=session3, scheduler=rp.SCHED_DIRECT_SUBMISSION) pdesc3 = rp.ComputePilotDescription() pdesc3.resource = "xsede.stampede" pdesc3.project = CONFIG["xsede.stampede"]["project"] pdesc3.runtime = 20 pdesc3.cores = int(CONFIG["xsede.stampede"]["cores"]) * 2 pdesc3.cleanup = False pilot3 = pmgr3.submit_pilots(pdesc3) pilot3.register_callback(pilot_state_cb) umgr3.add_pilots(pilot3) except Exception as e: print 'test failed' raise def fin(): print "finalizing..." pmgr3.cancel_pilots() pmgr3.wait_pilots() print 'closing session' session3.close() request.addfinalizer(fin) return session3, pilot3, pmgr3, umgr3, "xsede.stampede"
def setup_stampede_683(request): session = rp.Session() print "session id stampede: {0}".format(session.uid) c = rp.Context('ssh') c.user_id = CONFIG["xsede.stampede"]["user_id"] session.add_context(c) try: pmgr = rp.PilotManager(session=session) umgr = rp.UnitManager(session=session, scheduler=rp.SCHEDULER_BACKFILLING) pdesc = rp.ComputePilotDescription() pdesc.resource = "xsede.stampede" pdesc.project = CONFIG["xsede.stampede"]["project"] pdesc.runtime = 40 pdesc.cores = 683 pdesc.cleanup = False pilot = pmgr.submit_pilots(pdesc) pilot.register_callback(pilot_state_cb) umgr.add_pilots(pilot) except Exception as e: print 'test failed' raise def fin(): print "finalizing..." pmgr.cancel_pilots() pmgr.wait_pilots() print 'closing session' session.close() request.addfinalizer(fin) return session, pilot, pmgr, umgr, "xsede.stampede"
def test__pilotmanager_reconnect(self): """ Test if pilot manager re-connect works as expected. """ session = rp.Session() pm = rp.PilotManager(session=session) assert session.list_pilot_managers() == [ pm.uid ], "Wrong list of pilot managers" pm_r = session.get_pilot_managers(pilot_manager_ids=pm.uid) assert session.list_pilot_managers() == [ pm_r.uid ], "Wrong list of pilot managers" assert pm.uid == pm_r.uid, "Pilot Manager IDs not matching!" session.close()
def test__issue_114_part_2(self): """ https://github.com/radical-cybertools/radical.pilot/issues/114 """ session = rp.Session(database_url=DBURL, database_name=DBNAME) pm = rp.PilotManager(session=session) cpd = rp.ComputePilotDescription() cpd.resource = "local.localhost" cpd.cores = 1 cpd.runtime = 5 cpd.sandbox = "/tmp/radical.pilot.sandbox.unittests" cpd.cleanup = True pilot = pm.submit_pilots(pilot_descriptions=cpd) um = rp.UnitManager(session=session, scheduler=rp.SCHED_DIRECT_SUBMISSION) um.add_pilots(pilot) state = pm.wait_pilots(state=[rp.ACTIVE, rp.DONE, rp.FAILED], timeout=5 * 60) assert (pilot.state == rp.ACTIVE), "pilot state: %s" % pilot.state cudesc = rp.ComputeUnitDescription() cudesc.cores = 1 cudesc.executable = "/bin/sleep" cudesc.arguments = ['60'] cu = um.submit_units(cudesc) state = um.wait_units(state=[rp.EXECUTING], timeout=60) assert state == [rp.EXECUTING], 'state : %s' % state assert cu.state == rp.EXECUTING, 'cu state: %s' % cu.state state = um.wait_units(timeout=2 * 60) assert state == [rp.DONE], 'state : %s' % state assert cu.state == rp.DONE, 'cu state: %s' % cu.state session.close()
def setup_local_1(request): session1 = rp.Session() print "session id local_1: {0}".format(session1.uid) try: pmgr1 = rp.PilotManager(session=session1) print "pm id local_1: {0}".format(pmgr1.uid) umgr1 = rp.UnitManager(session=session1, scheduler=rp.SCHED_DIRECT_SUBMISSION) pdesc1 = rp.ComputePilotDescription() pdesc1.resource = "local.localhost" pdesc1.runtime = 30 pdesc1.cores = 1 pdesc1.cleanup = False pilot1 = pmgr1.submit_pilots(pdesc1) pilot1.register_callback(pilot_state_cb) umgr1.add_pilots(pilot1) except Exception as e: print 'test failed' raise def fin(): print "finalizing..." pmgr1.cancel_pilots() pmgr1.wait_pilots() print 'closing session' session1.close() time.sleep(5) request.addfinalizer(fin) return session1, pilot1, pmgr1, umgr1, "local.localhost"