def test_configure(self, mocked_init): session, configs = self.setUp() component = Default(cfg=None, session=None) component._cfg = mock.Mock() component._log = ru.Logger('dummy') component._rp_version = '0.0' component._session = session component._pmgr = 'pmgr.0' component._prof = ru.Config(cfg = {'enabled': False}) component._cache_lock = ru.Lock() component._cache = dict() component._sandboxes = dict() component._mod_dir = os.path.dirname(os.path.abspath(__file__)) component._root_dir = "%s/../../src/radical/pilot/" % component._mod_dir component._conf_dir = "%s/configs/" % component._root_dir component._rp_version = rp.version component._rp_sdist_name = rp.sdist_name component._rp_sdist_path = rp.sdist_path resource = 'local.localhost' rcfg = configs.local.localhost pilot = { 'uid' : 'pilot.0000', 'description' : {'cores' : 10, 'gpus' : 2, 'queue' : 'default', 'project' : 'foo', 'job_name' : None, 'runtime' : 10, 'app_comm' : 0, 'cleanup' : 0, 'memory' : 0, 'candidate_hosts': None, } } ret = component._prepare_pilot(resource, rcfg, pilot, {}) assert(ret['jd'].name == 'pilot.0000') pilot = { 'uid' : 'pilot.0000', 'description' : {'cores' : 10, 'gpus' : 2, 'queue' : 'default', 'project' : 'foo', 'job_name' : 'bar', 'runtime' : 10, 'app_comm' : 0, 'cleanup' : 0, 'memory' : 0, 'candidate_hosts': None, } } ret = component._prepare_pilot(resource, rcfg, pilot, {}) assert(ret['jd'].name == 'bar')
def __init__ (self, default=True, uid=None): """ default: bool ret: None """ simple_base = super (Session, self) simple_base.__init__ (uid=uid) self._logger = ru.Logger('radical.saga') # if the default session is expected, we point our context list to the # shared list of the default session singleton. Otherwise, we create # a private list which is not populated. # a session also has a lease manager, for adaptors in this session to use. if default : default_session = DefaultSession (uid=self._id) self.contexts = copy.deepcopy(default_session.contexts) self._lease_manager = default_session._lease_manager else : self.contexts = _ContextList (session=self) # FIXME: at the moment, the lease manager is owned by the session. # Howevwer, the pty layer is the main user of the lease manager, # and we thus keep the lease manager options in the pty subsection. # So here we are, in the session, evaluating the pty config options. self._cfg = ru.Config(module='radical.saga.session') self._lease_manager = ru.LeaseManager ( max_pool_size=self._cfg.pty.connection_pool_size, max_pool_wait=self._cfg.pty.connection_pool_wait, max_obj_age =self._cfg.pty.connection_pool_ttl )
def add_resource_config(self, resource_config): ''' Adds a new :class:`ru.Config` to the session's dictionary of known resources, or accept a string which points to a configuration file. For example:: rc = ru.Config("./mycluster.json") rc.job_manager_endpoint = "ssh+pbs://mycluster rc.filesystem_endpoint = "sftp://mycluster rc.default_queue = "private" session = rp.Session() session.add_resource_config(rc) pd = rp.ComputePilotDescription() pd.resource = "mycluster" pd.cores = 16 pd.runtime = 5 # minutes pilot = pm.submit_pilots(pd) ''' if isinstance(resource_config, str): # let exceptions fall through rcs = ru.Config('radical.pilot.resource', name=resource_config) for rc in rcs: self._log.info('load rcfg for %s' % rc) self._rcfgs[rc] = rcs[rc].as_dict() else: self._log.debug('load rcfg for %s', resource_config.label) self._rcfgs[resource_config.label] = resource_config.as_dict()
def __init__(self): # Engine manages cpis from adaptors self._adaptor_registry = dict() # get angine, adaptor and pty configs self._cfg = ru.Config('radical.saga.engine') self._pty_cfg = ru.Config('radical.saga.pty') self._registry = ru.Config('radical.saga.registry') # Initialize the logging, and log version (this is a singleton!) self._logger = ru.Logger('radical.saga') self._logger.info('radical.saga version: %s' % version_detail) # load adaptors self._load_adaptors()
def main(): # TODO: Test both with and without a provided config file. kwargs = {} if len(sys.argv) > 1: cfg = ru.Config(cfg=ru.read_json(sys.argv[1])) kwargs['cfg'] = cfg descr = cfg.worker_descr, count = cfg.n_workers, cores = cfg.cpn, gpus = cfg.gpn else: descr = rp.TaskDescription({ 'uid': 'raptor.worker', 'executable': 'scalems_rp_worker', 'arguments': [] }) count = 1 cores = 1 gpus = 0 master = ScaleMSMaster(**kwargs) master.submit(descr=descr, count=count, cores=cores, gpus=gpus) master.start() master.join() master.stop()
def __init__(self, url, session=None, logger=None, cfg=None, posix=True, interactive=True): if logger: self.logger = logger else: self.logger = ru.Logger('radical.saga.pty') if session: self.session = session else: self.session = ss.Session(default=True) self.logger.debug("PTYShell init %s" % self) self.url = url # describes the shell to run self.posix = posix # /bin/sh compatible? self.interactive = interactive # bash -i ? self.latency = 0.0 # set by factory self.cp_slave = None # file copy channel self.initialized = False self.pty_id = PTYShell._pty_id PTYShell._pty_id += 1 name = None if isinstance(cfg, str): name = cfg cfg = None self.cfg = ru.Config('radical.saga.session', name=name, cfg=cfg) self.cfg = self.cfg.pty # get prompt pattern from config, or use default self.prompt = self.cfg.get('prompt_pattern', DEFAULT_PROMPT) self.prompt_re = re.compile("^(.*?)%s" % self.prompt, re.DOTALL) self.logger.info("PTY prompt pattern: %s" % self.prompt) # local dir for file staging caches self.base = ru.get_radical_base('saga') + 'adaptors/shell/' try: ru.rec_makedir(self.base) except OSError as e: raise rse.NoSuccess('could not create staging dir: %s' % e) from e self.factory = supsf.PTYShellFactory() self.pty_info = self.factory.initialize(self.url, self.session, self.prompt, self.logger, self.cfg, self.posix, interactive=self.interactive) self.pty_shell = self.factory.run_shell(self.pty_info) self._trace('init : %s' % self.pty_shell.command) self.initialize()
def start_components(self, cfg=None): ''' check if any components are defined under `cfg['components']` and start them ''' self._prof.prof('start_components_start', uid=self._uid) timeout = self._cfg.heartbeat.timeout if cfg is None: cfg = self._cfg # we pass a copy of the complete session config to all components, but # merge it into the component specific config settings (no overwrite), # and then remove the `bridges` and `components` sections # scfg = ru.Config(cfg=cfg) if 'bridges' in scfg: del (scfg['bridges']) if 'components' in scfg: del (scfg['components']) for cname, ccfg in cfg.get('components', {}).items(): for _ in range(ccfg.get('count', 1)): ccfg.uid = ru.generate_id(cname, ns=self._sid) ccfg.cmgr = self.uid ccfg.kind = cname ccfg.sid = cfg.sid ccfg.base = cfg.base ccfg.path = cfg.path ccfg.heartbeat = cfg.heartbeat ccfg.merge(scfg, policy=ru.PRESERVE, log=self._log) fname = '%s/%s.json' % (cfg.path, ccfg.uid) ccfg.write(fname) self._log.info('create component %s [%s]', cname, ccfg.uid) out, err, ret = ru.sh_callout('radical-pilot-component %s' % fname) self._log.debug('out: %s', out) self._log.debug('err: %s', err) if ret: raise RuntimeError('bridge startup failed') self._uids.append(ccfg.uid) self._log.info('created component %s [%s]', cname, ccfg.uid) # all components should start now, for their heartbeats # to appear. failed = self._hb.wait_startup(self._uids, timeout=timeout * 10) if failed: raise RuntimeError('could not start all components %s' % failed) self._prof.prof('start_components_stop', uid=self._uid)
def __init__(self, command, cfg='utils', logger=None): """ The class constructor, which runs (execvpe) command in a separately forked process. The bew process will inherit the environment of the application process. :type command: string or list of strings :param command: The given command is what is run as a child, and fed/drained via pty pipes. If given as string, command is split into an array of strings, using :func:`shlex.split`. :type logger: :class:`radical.utils.logger.Logger` instance :param logger: logger stream to send status messages to. """ self.rlock = mt.RLock() self.logger = logger if not self.logger: self.logger = ru.Logger('radical.saga.pty') self.logger.debug("PTYProcess init %s" % self) name = None if isinstance(cfg, str): name = cfg cfg = None self.cfg = ru.Config('radical.saga.session', name=name, cfg=cfg) self.cfg = self.cfg.pty if isinstance(command, str): command = shlex.split(command) if not isinstance(command, list): raise se.BadParameter("PTYProcess expects string or list command") if len(command) < 1: raise se.BadParameter("PTYProcess expects non-empty command") self.command = command # list of strings too run() self.cache = "" # data cache self.tail = "" # tail of data data cache for error messages self.child = None # the process as created by subprocess.Popen self.ptyio = None # the process' io channel, from pty.fork() self.exit_code = None # child died with code (may be revived) self.exit_signal = None # child kill by signal (may be revived) self.recover_max = 3 # TODO: make configure option. This does not self.recover_attempts = 0 # apply for recovers triggered by gc_timeout! try: self.initialize() except Exception as e: raise ptye.translate_exception(e, "pty or process creation failed")\ from e
def __init__(self, cfg=None): self.logger = ru.Logger('radical.saga.pty') self.rlock = mt.RLock() self.registry = dict() name = None if isinstance(cfg, str): name = cfg cfg = None self.cfg = ru.Config('radical.saga.session', name=name, cfg=cfg) self.cfg = self.cfg.pty
def __init__(self, adaptor_info, adaptor_options=None, expand_env=True): # FIXME: engine is loading cfg already, here we load again... self._info = adaptor_info self._name = adaptor_info['name'] self._schemas = adaptor_info['schemas'] self._lock = mt.RLock() self._logger = ru.Logger('radical.saga.api') self._cfg = ru.Config(module='radical.saga.adaptors', name=self._name, expand=expand_env) if 'enabled' not in self._cfg: self._cfg['enabled'] = True
def __init__(self, adaptor_info, adaptor_options=None, expand_env=True): # FIXME: engine is loading cfg already, here we load again... self._info = adaptor_info self._name = adaptor_info['name'] self._schemas = adaptor_info['schemas'] self._lock = ru.RLock(self._name) self._logger = ru.Logger('radical.saga.api') # we need to expand later once we got env from the remote resource self._cfg = ru.Config(module='radical.saga', name=self._name, expand=expand_env) if 'enabled' not in self._cfg: self._cfg['enabled'] = True
def _get_config(self, cfg=None): ''' derive a worker base configuration from the control pubsub configuration ''' # FIXME: this uses insider knowledge on the config location and # structure. It would be better if agent.0 creates the worker # base config from scratch on startup. pwd = os.getcwd() ru.dict_merge(cfg, ru.read_json('%s/../control_pubsub.json' % pwd)) del (cfg['channel']) del (cfg['cmgr']) cfg['log_lvl'] = 'debug' cfg['kind'] = 'master' cfg['base'] = pwd cfg['uid'] = ru.generate_id('master.%(item_counter)06d', ru.ID_CUSTOM, ns=self._session.uid) return ru.Config(cfg=cfg)
def rp_config(): """Provide a RADICAL Pilot Resource Config to a test suite. The 'resource' key in a Pilot Description must name a key that the Session can use to get default values for the execution environment. """ # Ref: https://radicalpilot.readthedocs.io/en/stable/machconf.html#customizing-resource-configurations-programatically import radical.pilot as rp import radical.utils as ru # TODO: Resolve usage error. # Ref: https://github.com/radical-cybertools/radical.pilot/issues/2181 try: cfg = rp.ResourceConfig( 'local.localhost', ru.Config('radical.pilot.session', name='default', cfg=None)) except: cfg = dict() # `local.localhost` is preconfigured, but some of the properties are likely not appropriate. # Ref: https://github.com/radical-cybertools/radical.pilot/blob/devel/src/radical/pilot/configs/resource_local.json # TODO: Is there a more canonical way to programmatically generate a valid config? # Ref: https://radicalpilot.readthedocs.io/en/stable/machconf.html#writing-a-custom-resource-configuration-file # TODO: Set a sensible number of cores / threads / GPUs. return dict(config=cfg, rp=rp, ru=ru)
def check_runs(cfg_file, run_file): runs = list() n_smiles = dict() rec_path = 'input/receptors.ad/' # FIXME smi_path = 'input/smiles/' # FIXME cfg = ru.Config(cfg=ru.read_json(cfg_file)) res_path = cfg.fs_url + cfg.workload.results fs = rs.filesystem.Directory(res_path) with open(run_file, 'r') as fin: for line in fin.readlines(): line = line.strip() if not line: continue if line.startswith('#'): continue elems = line.split() assert (len(elems) == 4), line receptor = str(elems[0]) smiles = str(elems[1]) n_workers = int(elems[2]) runtime = int(elems[3]) runs.append([receptor, smiles, n_workers, runtime]) return runs
def setUp(self): class Session(object): def __init__(self): self.uid = 'uid.0' self.sid = 'sid.0' self.cfg = ru.Config(cfg={'dburl': 'db://'}) def _get_resource_sandbox(self, pilot): return ru.Url('/resource/sandbox/%s' % pilot) def _get_session_sandbox(self, pilot): return ru.Url('/session/sandbox/%s' % pilot) def _get_pilot_sandbox(self, pilot): return ru.Url('/pilot/sandbox/%s' % pilot) def _get_client_sandbox(self): return ru.Url('/client/sandbox') session = Session() configs = ru.Config('radical.pilot.resource', name='*') return session, configs
def test_zmq_pubsub(): ''' create a bridge, 2 producers (A, B) and 2 consumers (C, D). Send with the following rates for 10 seconds: A: 10/s B: 20/s Ensure that - the ratios of sent / received messages reflects the rates - the local order of messages is preserved - messages are received exactly once (no messages get lost / duplicated) ''' c_a = 200 c_b = 400 cfg = ru.Config(cfg={'uid' : 'test_pubsub', 'channel' : 'test', 'kind' : 'pubsub', 'log_level': 'error', 'path' : '/tmp/', 'sid' : 'test_sid', 'bulk_size': 0, 'stall_hwm': 1, }) b = ru.zmq.PubSub(cfg) b.start() assert(b.addr_in != b.addr_out) assert(b.addr_in == b.addr_pub) assert(b.addr_out == b.addr_sub) data = dict() for i in 'ABCD': data[i] = dict() for j in 'AB': data[i][j] = 0 def cb(uid, topic, msg): if msg['idx'] is None: return False data[uid][msg['src']] += 1 cb_C = lambda t,m: cb('C', t, m) cb_D = lambda t,m: cb('D', t, m) ru.zmq.Subscriber(channel=cfg['channel'], url=str(b.addr_sub), topic='topic', cb=cb_C) ru.zmq.Subscriber(channel=cfg['channel'], url=str(b.addr_sub), topic='topic', cb=cb_D) time.sleep(0.1) # -------------------------------------------------------------------------- def work_pub(uid, n, delay): pub = ru.zmq.Publisher(channel=cfg['channel'], url=str(b.addr_pub)) idx = 0 while idx < n: time.sleep(delay) pub.put('topic', {'src': uid, 'idx': idx}) idx += 1 data[uid][uid] += 1 # send EOF pub.put('topic', {'src': uid, 'idx': None}) # -------------------------------------------------------------------------- t_a = mt.Thread(target=work_pub, args=['A', c_a, 0.005]) t_b = mt.Thread(target=work_pub, args=['B', c_b, 0.005]) t_a.start() t_b.start() t_a.join() t_b.join() b.stop() time.sleep(0.1) assert(data['A']['A'] == c_a) assert(data['B']['B'] == c_b) assert(data['C']['A'] + data['C']['B'] + data['D']['A'] + data['D']['B'] == 2 * (c_a + c_b))
def __init__(self, cfg=None, backend='zmq'): self._backend = backend # FIXME: use self._lock = ru.Lock('master') self._workers = dict() # wid: worker self._requests = dict() # bookkeeping of submitted requests self._lock = mt.Lock() # lock the request dist on updates cfg.sid = os.environ['RP_SESSION_ID'] cfg.base = os.environ['RP_PILOT_SANDBOX'] cfg.path = os.environ['RP_PILOT_SANDBOX'] self._session = Session(cfg=cfg, uid=cfg.sid, _primary=False) cfg = self._get_config(cfg) rpu.Component.__init__(self, cfg, self._session) self.register_output(rps.AGENT_STAGING_INPUT_PENDING, rpc.AGENT_STAGING_INPUT_QUEUE) self.register_subscriber(rpc.CONTROL_PUBSUB, self._control_cb) # set up RU ZMQ Queues for request distribution and result collection req_cfg = ru.Config( cfg={ 'channel': '%s.to_req' % self._uid, 'type': 'queue', 'uid': self._uid + '.req', 'path': os.getcwd(), 'stall_hwm': 0, 'bulk_size': 56 }) res_cfg = ru.Config( cfg={ 'channel': '%s.to_res' % self._uid, 'type': 'queue', 'uid': self._uid + '.res', 'path': os.getcwd(), 'stall_hwm': 0, 'bulk_size': 56 }) self._req_queue = ru.zmq.Queue(req_cfg) self._res_queue = ru.zmq.Queue(res_cfg) self._req_queue.start() self._res_queue.start() self._req_addr_put = str(self._req_queue.addr_put) self._req_addr_get = str(self._req_queue.addr_get) self._res_addr_put = str(self._res_queue.addr_put) self._res_addr_get = str(self._res_queue.addr_get) # this master will put requests onto the request queue, and will get # responses from the response queue. Note that the responses will be # delivered via an async callback (`self._result_cb`). self._req_put = ru.zmq.Putter('%s.to_req' % self._uid, self._req_addr_put) self._res_get = ru.zmq.Getter('%s.to_res' % self._uid, self._res_addr_get, cb=self._result_cb) # for the workers it is the opposite: they will get requests from the # request queue, and will send responses to the response queue. self._info = { 'req_addr_get': self._req_addr_get, 'res_addr_put': self._res_addr_put } # make sure the channels are up before allowing to submit requests time.sleep(1) # connect to the local agent self._log.debug('startup complete')
} } self.request(item) self._prof.prof('create_stop') # ------------------------------------------------------------------------------ if __name__ == '__main__': # This master script runs as a task within a pilot allocation. The purpose # of this master is to (a) spawn a set or workers within the same # allocation, (b) to distribute work items (`dock` function calls) to those # workers, and (c) to collect the responses again. cfg_fname = 'wf2_md.cfg' cfg = ru.Config(cfg=ru.read_json(cfg_fname)) cfg.idx = int(sys.argv[1]) # FIXME: worker startup should be moved into master workload = cfg.workload n_nodes = cfg.nodes cpn = cfg.cpn gpn = cfg.gpn # Prepare dirlist in case we are iterating and we have detected outliers initial_MD = True outlier_filepath = '%s/Outlier_search/restart_points.json' % cfg[ 'base_path'] if os.path.exists(outlier_filepath): initial_MD = False
def __init__(self): self.logger = ru.Logger('radical.saga.pty') self.registry = {} self.rlock = ru.RLock('pty shell factory') self.cfg = ru.Config('radical.saga', 'utils')['pty']
def get_config(self, name=None): return ru.Config(module='radical.saga', name=name)
def __init__(self, dburl=None, uid=None, cfg=None, _primary=True): ''' Creates a new session. A new Session instance is created and stored in the database. **Arguments:** * **dburl** (`string`): The MongoDB URL. If none is given, RP uses the environment variable RADICAL_PILOT_DBURL. If that is not set, an error will be raises. * **cfg** (`str` or `dict`): a named or instantiated configuration to be used for the session. * **uid** (`string`): Create a session with this UID. Session UIDs MUST be unique - otherwise they will lead to conflicts in the underlying database, resulting in undefined behaviours (or worse). * **_primary** (`bool`): only sessions created by the original application process (via `rp.Session()`, will connect to the DB. Secondary session instances are instantiated internally in processes spawned (directly or indirectly) by the initial session, for example in some of it's components. A secondary session will inherit the original session ID, but will not attempt to create a new DB collection - if such a DB connection is needed, the component needs to establish that on its own. ''' # NOTE: `name` and `cfg` are overloaded, the user cannot point to # a predefined config and amed it at the same time. This might # be ok for the session, but introduces a minor API inconsistency. name = 'default' if isinstance(cfg, str): name = cfg cfg = None self._dbs = None self._closed = False self._primary = _primary self._pmgrs = dict() # map IDs to pmgr instances self._umgrs = dict() # map IDs to umgr instances self._cmgr = None # only primary sessions have a cmgr self._cfg = ru.Config('radical.pilot.session', name=name, cfg=cfg) self._rcfgs = ru.Config('radical.pilot.resource', name='*') if _primary: pwd = os.getcwd() if not self._cfg.sid: if uid: self._cfg.sid = uid else: self._cfg.sid = ru.generate_id('rp.session', mode=ru.ID_PRIVATE) if not self._cfg.base: self._cfg.base = pwd if not self._cfg.path: self._cfg.path = '%s/%s' % (self._cfg.base, self._cfg.sid) if not self._cfg.client_sandbox: self._cfg.client_sandbox = pwd else: for k in ['sid', 'base', 'path']: assert(k in self._cfg), 'non-primary session misses %s' % k # change RU defaults to point logfiles etc. to the session sandbox def_cfg = ru.DefaultConfig() def_cfg.log_dir = self._cfg.path def_cfg.report_dir = self._cfg.path def_cfg.profile_dir = self._cfg.path self._uid = self._cfg.sid self._prof = self._get_profiler(name=self._uid) self._rep = self._get_reporter(name=self._uid) self._log = self._get_logger (name=self._uid, level=self._cfg.get('debug')) from . import version_detail as rp_version_detail self._log.info('radical.pilot version: %s' % rp_version_detail) self._log.info('radical.saga version: %s' % rs.version_detail) self._log.info('radical.utils version: %s' % ru.version_detail) self._prof.prof('session_start', uid=self._uid, msg=int(_primary)) # now we have config and uid - initialize base class (saga session) rs.Session.__init__(self, uid=self._uid) # cache sandboxes etc. self._cache_lock = ru.RLock() self._cache = {'resource_sandbox' : dict(), 'session_sandbox' : dict(), 'pilot_sandbox' : dict(), 'client_sandbox' : self._cfg.client_sandbox, 'js_shells' : dict(), 'fs_dirs' : dict()} if _primary: self._initialize_primary(dburl) # at this point we have a DB connection, logger, etc, and are done self._prof.prof('session_ok', uid=self._uid, msg=int(_primary))
def __init__(self, session, cfg='default', scheduler=None): """ Creates a new UnitManager and attaches it to the session. **Arguments:** * session [:class:`radical.pilot.Session`]: The session instance to use. * cfg (`dict` or `string`): The configuration or name of configuration to use. * scheduler (`string`): The name of the scheduler plug-in to use. **Returns:** * A new `UnitManager` object [:class:`radical.pilot.UnitManager`]. """ self._pilots = dict() self._pilots_lock = ru.RLock('umgr.pilots_lock') self._units = dict() self._units_lock = ru.RLock('umgr.units_lock') self._callbacks = dict() self._cb_lock = ru.RLock('umgr.cb_lock') self._terminate = mt.Event() self._closed = False self._rec_id = 0 # used for session recording self._uid = ru.generate_id('umgr.%(item_counter)04d', ru.ID_CUSTOM, ns=session.uid) for m in rpc.UMGR_METRICS: self._callbacks[m] = dict() # NOTE: `name` and `cfg` are overloaded, the user cannot point to # a predefined config and amed it at the same time. This might # be ok for the session, but introduces a minor API inconsistency. # name = None if isinstance(cfg, str): name = cfg cfg = None cfg = ru.Config('radical.pilot.umgr', name=name, cfg=cfg) cfg.uid = self._uid cfg.owner = self._uid cfg.sid = session.uid cfg.base = session.base cfg.path = session.path cfg.dburl = session.dburl cfg.heartbeat = session.cfg.heartbeat if scheduler: # overwrite the scheduler from the config file cfg.scheduler = scheduler rpu.Component.__init__(self, cfg, session=session) self.start() self._log.info('started umgr %s', self._uid) self._rep.info('<<create unit manager') # create pmgr bridges and components, use session cmgr for that self._cmgr = rpu.ComponentManager(self._cfg) self._cmgr.start_bridges() self._cmgr.start_components() # The output queue is used to forward submitted units to the # scheduling component. self.register_output(rps.UMGR_SCHEDULING_PENDING, rpc.UMGR_SCHEDULING_QUEUE) # the umgr will also collect units from the agent again, for output # staging and finalization if self._cfg.bridges.umgr_staging_output_queue: self._has_sout = True self.register_output(rps.UMGR_STAGING_OUTPUT_PENDING, rpc.UMGR_STAGING_OUTPUT_QUEUE) else: self._has_sout = False # register the state notification pull cb # FIXME: this should be a tailing cursor in the update worker self.register_timed_cb(self._state_pull_cb, timer=self._cfg['db_poll_sleeptime']) # register callback which pulls units back from agent # FIXME: this should be a tailing cursor in the update worker self.register_timed_cb(self._unit_pull_cb, timer=self._cfg['db_poll_sleeptime']) # also listen to the state pubsub for unit state changes self.register_subscriber(rpc.STATE_PUBSUB, self._state_sub_cb) # let session know we exist self._session._register_umgr(self) self._prof.prof('setup_done', uid=self._uid) self._rep.ok('>>ok\n')
def __init__(self, url, session=None, logger=None, cfg=None, posix=True, interactive=True): if logger: self.logger = logger else: self.logger = ru.Logger('radical.saga.pty') if session: self.session = session else: self.session = ss.Session(default=True) self.logger.debug("PTYShell init %s" % self) self.url = url # describes the shell to run self.posix = posix # /bin/sh compatible? self.interactive = interactive # bash -i ? self.latency = 0.0 # set by factory self.cp_slave = None # file copy channel self.initialized = False self.pty_id = PTYShell._pty_id PTYShell._pty_id += 1 name = None if isinstance(cfg, str): name = cfg cfg = None self.cfg = ru.Config('radical.saga.session', name=name, cfg=cfg) self.cfg = self.cfg.pty # get prompt pattern from config, or use default self.prompt = self.cfg.get('prompt_pattern', DEFAULT_PROMPT) self.prompt_re = re.compile("^(.*?)%s" % self.prompt, re.DOTALL) self.logger.info("PTY prompt pattern: %s" % self.prompt) # we need a local dir for file staging caches. At this point we use # $HOME, but should make this configurable (FIXME) self.base = os.environ['HOME'] + '/.radical/saga/adaptors/shell/' try: os.makedirs(self.base) except OSError as e: if e.errno == errno.EEXIST and os.path.isdir(self.base): pass else: raise rse.NoSuccess ("could not create staging dir: %s" % e) \ from e self.factory = supsf.PTYShellFactory() self.pty_info = self.factory.initialize(self.url, self.session, self.prompt, self.logger, self.cfg, self.posix, interactive=self.interactive) self.pty_shell = self.factory.run_shell(self.pty_info) self._trace('init : %s' % self.pty_shell.command) self.initialize()
def __init__(self, cfg): if isinstance(cfg, str): cfg = ru.Config(cfg=ru.read_json(cfg)) else : cfg = ru.Config(cfg=cfg) self._n_cores = cfg.cores self._n_gpus = cfg.gpus self._info = ru.Config(cfg=cfg.get('info', {})) self._session = Session(cfg=cfg, uid=cfg.sid, _primary=False) rpu.Component.__init__(self, cfg, self._session) self._term = mp.Event() # set to terminate self._res_evt = mp.Event() # set on free resources self._mlock = ru.Lock(self._uid) # lock `_modes` and `_mdata` self._modes = dict() # call modes (call, exec, eval, ...) self._mdata = dict() # call mode meta data # We need to make sure to run only up to `gpn` tasks using a gpu # within that pool, so need a separate counter for that. self._resources = {'cores' : [0] * self._n_cores, 'gpus' : [0] * self._n_gpus} # resources are initially all free self._res_evt.set() # # create a multiprocessing pool with `cpn` worker processors. Set # # `maxtasksperchild` to `1` so that we get a fresh process for each # # task. That will also allow us to run command lines via `exec`, # # effectively replacing the worker process in the pool for a specific # # task. # # # # We use a `fork` context to inherit log and profile handles. # # # # NOTE: The mp documentation is wrong; mp.Pool does *not* have a context # # parameters. Instead, the Pool has to be created within # # a context. # ctx = mp.get_context('fork') # self._pool = ctx.Pool(processes=self._n_cores, # initializer=None, # maxtasksperchild=1) # NOTE: a multiprocessing pool won't work, as pickle is not able to # serialize our worker object. So we use our own process pool. # It's not much of a loss since we want to respawn new processes for # each task anyway (to improve isolation). self._pool = dict() # map task uid to process instance self._plock = ru.Lock('p' + self._uid) # lock _pool # We also create a queue for communicating results back, and a thread to # watch that queue self._result_queue = mp.Queue() self._result_thead = mt.Thread(target=self._result_watcher) self._result_thead.daemon = True self._result_thead.start() # connect to master self.register_subscriber(rpc.CONTROL_PUBSUB, self._control_cb) self.register_publisher(rpc.CONTROL_PUBSUB) # run worker initialization *before* starting to work on requests. # the worker provides three builtin methods: # eval: evaluate a piece of python code # exec: execute a command line (fork/exec) # shell: execute a shell command # call: execute a method or function call self.register_mode('call', self._call) self.register_mode('eval', self._eval) self.register_mode('exec', self._exec) self.register_mode('shell', self._shell) self.pre_exec() # connect to the request / response ZMQ queues self._res_put = ru.zmq.Putter('to_res', self._info.res_addr_put) self._req_get = ru.zmq.Getter('to_req', self._info.req_addr_get, cb=self._request_cb) # the worker can return custom information which will be made available # to the master. This can be used to communicate, for example, worker # specific communication endpoints. # `info` is a placeholder for any additional meta data communicated to # the worker self.publish(rpc.CONTROL_PUBSUB, {'cmd': 'worker_register', 'arg': {'uid' : self._uid, 'info': self._info}})
def test_zmq_queue(): ''' create a bridge, 2 producers (A, B) and 2 consumers (C, D). Send with the following rates for 10 seconds: A: 10/s B: 20/s Ensure that - the ratios of sent / received messages reflects the rates - the local order of messages is preserved - messages are received exactly once (no messages get lost / duplicated) ''' c_a = 100 c_b = 200 cfg = ru.Config( cfg={ 'uid': 'test_queue', 'channel': 'test', 'kind': 'queue', 'log_level': 'error', 'path': '/tmp/', 'sid': 'test_sid', 'bulk_size': 50, 'stall_hwm': 1, }) b = ru.zmq.Queue(cfg) b.start() assert (b.addr_in != b.addr_out) assert (b.addr_in == b.addr_put) assert (b.addr_out == b.addr_get) C = ru.zmq.Getter(channel=cfg['channel'], url=str(b.addr_get)) D = ru.zmq.Getter(channel=cfg['channel'], url=str(b.addr_get)) A = ru.zmq.Putter(channel=cfg['channel'], url=str(b.addr_put)) B = ru.zmq.Putter(channel=cfg['channel'], url=str(b.addr_put)) data = dict() def work_put(putter, uid, n, delay): data[uid] = list() idx = 0 while idx < n: time.sleep(delay) putter.put({'src': uid, 'idx': idx}) idx += 1 data[uid].append(uid) # send EOF putter.put({'src': uid, 'idx': None}) def work_get(getter, uid): data[uid] = list() done = False n = 0 while not done: msgs = getter.get() for msg in msgs: msg = ru.as_string(msg) if msg['idx'] is None: final = True done = True else: data[uid].append(msg['src']) n += 1 getter.stop() t_a = mt.Thread(target=work_put, args=[A, 'A', c_a, 0.010]) t_b = mt.Thread(target=work_put, args=[B, 'B', c_b, 0.005]) t_c = mt.Thread(target=work_get, args=[C, 'C']) t_d = mt.Thread(target=work_get, args=[D, 'D']) t_a.daemon = True t_b.daemon = True t_c.daemon = True t_d.daemon = True t_a.start() t_b.start() t_c.start() t_d.start() time.sleep(3) b.stop() # uids = list(data.keys()) # for x in uids: # for y in uids: # print('%s: %s: %d' % (x, y, data[x].count(y))) # # print(len(data['A'])) # print(len(data['B'])) # print(len(data['C'])) # print(len(data['D'])) assert (data['A'].count('A') == c_a) assert (data['B'].count('B') == c_b) assert (len(data['A']) == c_a) assert (len(data['B']) == c_b) assert (data['C'].count('A') + data['C'].count('B') + data['D'].count('A') + data['D'].count('B') == c_a + c_b) avg = (c_a + c_b) / 2 assert (avg - 30 < data['C'].count('A') + data['C'].count('B') < avg + 30) assert (avg - 30 < data['D'].count('A') + data['D'].count('B') < avg + 30)
def test_zmq_queue_cb(): ''' same test, but use subscriber callbacks for message delivery, and only use one subscriber ''' data = {'put': dict(), 'get': dict()} c_a = 2 c_b = 4 cfg = ru.Config( cfg={ 'uid': 'test_queue', 'channel': 'test', 'kind': 'queue', 'log_level': 'error', 'path': '/tmp/', 'sid': 'test_sid', 'bulk_size': 0, 'stall_hwm': 1, }) def get_msg_a(msg): uid, _ = msg.split('.') if uid not in data['get']: data['get'][uid] = list() data['get'][uid].append(uid) b = ru.zmq.Queue(cfg) b.start() assert (b.addr_in != b.addr_out) assert (b.addr_in == b.addr_put) assert (b.addr_out == b.addr_get) ru.zmq.Getter(channel=cfg['channel'], url=str(b.addr_get), cb=get_msg_a) time.sleep(1.0) A = ru.zmq.Putter(channel=cfg['channel'], url=str(b.addr_put)) B = ru.zmq.Putter(channel=cfg['channel'], url=str(b.addr_put)) def work_put(putter, uid, n, delay): data['put'][uid] = list() idx = 0 while idx < n: time.sleep(delay) msg = '%s.%d' % (uid, idx) putter.put(msg) idx += 1 data['put'][uid].append(uid) t_a = mt.Thread(target=work_put, args=[A, 'A', c_a, 0.010]) t_b = mt.Thread(target=work_put, args=[B, 'B', c_b, 0.005]) t_a.daemon = True t_b.daemon = True t_a.start() t_b.start() time.sleep(1.0) b.stop() # import pprint # pprint.pprint(data) assert (data['put']['A'].count('A') == c_a) assert (data['put']['B'].count('B') == c_b) assert (len(data['put']['A']) == c_a) assert (len(data['put']['B']) == c_b) # print(data['get']['A'].count('A')) # print(data['get']['B'].count('B')) # print(c_a) # print(c_b) assert (data['get']['A'].count('A') + data['get']['B'].count('B') == c_a + c_b)
# resource specified as argument if len(sys.argv) == 7: cfg_file = sys.argv[1] cfg_ml1_file = sys.argv[2] cfg_wf1_file = sys.argv[3] cfg_wf2_file = sys.argv[4] cfg_wf3_cg_file = sys.argv[5] cfg_wf3_fg_file = sys.argv[6] else: reporter.exit( 'Usage:\t%s [config.json] [config_ml1.json] [config_wf1.json] [config_wf2.json] [config_wf3_cg.json] [config_wf3_fg.json]\n\n' % sys.argv[0]) try: cfg = ru.Config(cfg=ru.read_json(cfg_file)) cfg_ml1 = ru.Config(cfg=ru.read_json(cfg_ml1_file)) cfg_wf1 = ru.Config(cfg=ru.read_json(cfg_wf1_file)) cfg_wf2 = ru.Config(cfg=ru.read_json(cfg_wf2_file)) cfg_wf3_cg = ru.Config(cfg=ru.read_json(cfg_wf3_cg_file)) cfg_wf3_fg = ru.Config(cfg=ru.read_json(cfg_wf3_fg_file)) if not check_environment(): raise ("ERROR: Incorrect environment set up.") pdesc = { 'resource': cfg['pdesc']['resource'], 'queue': cfg['pdesc']['queue'], 'schema': cfg['pdesc']['schema'], 'walltime': cfg['pdesc']['walltime'], 'cpus': cfg['pdesc']['cpus_node'] * 4 * cfg['pdesc']['nodes'],
def test_add_md_stage(self, mocked_generate_id, mocked_Logger): self.maxDiff = None pwd = os.path.dirname(os.path.abspath(__file__)) wl_cfg = ru.read_json(pwd + '/test_case/workflow_gromacs.json') workload = ru.Config(cfg=wl_cfg) test_rep = Replica(workload=workload) test_rep.add_md_stage(sid='test_sid') self.assertEqual(len(test_rep.stages), len(workload['md']['description'])) task0 = list(test_rep.stages[0].tasks)[0] self.assertEqual(task0.name, 'test.0000.0000.md') self.assertEqual(task0.sandbox, 'test.0000.md') link_inputs = [ 'pilot:///inputs//mdin.mdp.test > task:///mdin.mdp', 'pilot:///inputs//sys.top > task:///sys.top', 'pilot:///inputs//sys.itp > task:///sys.itp', 'pilot:///inputs//inp.ener > task:///inp.ener', 'pilot:///inputs//martini_v2.2.itp > task:///martini_v2.2.itp', 'pilot:///inputs//inpcrd.gro.test > task:///inpcrd.gro' ] self.assertEqual(task0.link_input_data, link_inputs) self.assertEqual(task0.arguments, [ 'grompp', '-f', 'mdin.mdp', '-c', 'inpcrd.gro', '-o', 'sys.tpr', '-p', 'sys.top' ]) self.assertEqual( task0.cpu_reqs, { 'cpu_process_type': 'MPI', 'cpu_processes': 1, 'cpu_thread_type': None, 'cpu_threads': 1 }) self.assertEqual(task0.executable, 'gmx_mpi') self.assertEqual( task0.pre_exec, ["module load gromacs/2020.2-cpu", "export GMX_MAXBACKUP=-1"]) task1 = list(test_rep.stages[1].tasks)[0] self.assertEqual(task1.name, 'test.0000.0001.md') self.assertEqual(task1.sandbox, 'test.0000.md') link_inputs = [] self.assertEqual(task1.link_input_data, link_inputs) self.assertEqual(task1.arguments, [ "mdrun", "-s", "sys.tpr", "-deffnm", "sys", "-c", "outcrd.gro", "-e", "sys.edr" ]) self.assertEqual( task1.cpu_reqs, { 'cpu_process_type': 'MPI', 'cpu_processes': 4, 'cpu_thread_type': None, 'cpu_threads': 1 }) self.assertEqual(task1.executable, 'gmx_mpi') self.assertEqual( task1.pre_exec, ["module load gromacs/2020.2-cpu", "export GMX_MAXBACKUP=-1"]) task2 = list(test_rep.stages[2].tasks)[0] self.assertEqual(task2.name, 'test.0000.0002.md') self.assertEqual(task2.sandbox, 'test.0000.md') link_inputs = [] self.assertEqual(task2.link_input_data, link_inputs) download_output_data = [ 'task:///outcrd.gro > ' + 'client:///outputs//outcrd.gro.test.0000' ] self.assertEqual(task2.download_output_data, download_output_data) self.assertEqual(task2.arguments, [ "energy", "-f", "sys.edr", "-b", 0.25, "<", "inp.ener", ">", "mdinfo" ]) self.assertEqual( task2.cpu_reqs, { 'cpu_process_type': 'MPI', 'cpu_processes': 1, 'cpu_thread_type': None, 'cpu_threads': 1 }) self.assertEqual(task2.executable, 'gmx_mpi') self.assertEqual( task2.pre_exec, ["module load gromacs/2020.2-cpu", "export GMX_MAXBACKUP=-1"]) # Inserting second MD cycle ex_0 = Task() ex_0.name = 'test.ex' ex_0.sandbox = 'ex.0' test_rep.add_md_stage(sid='test.sid', exchanged_from=ex_0) task0 = list(test_rep.stages[3].tasks)[0] self.assertEqual(task0.name, 'test.0001.0000.md') self.assertEqual(task0.sandbox, 'test.0001.md') link_inputs = [ 'pilot:///inputs//mdin.mdp.test > task:///mdin.mdp', 'pilot:///inputs//sys.top > task:///sys.top', 'pilot:///inputs//sys.itp > task:///sys.itp', 'pilot:///inputs//inp.ener > task:///inp.ener', 'pilot:///inputs//martini_v2.2.itp > task:///martini_v2.2.itp', 'pilot:///ex.0/outcrd.gro.test > task:///inpcrd.gro' ] self.assertEqual(task0.link_input_data, link_inputs) self.assertEqual(task0.arguments, [ 'grompp', '-f', 'mdin.mdp', '-c', 'inpcrd.gro', '-o', 'sys.tpr', '-p', 'sys.top' ]) self.assertEqual( task0.cpu_reqs, { 'cpu_process_type': 'MPI', 'cpu_processes': 1, 'cpu_thread_type': None, 'cpu_threads': 1 }) self.assertEqual(task0.executable, 'gmx_mpi') self.assertEqual( task0.pre_exec, ["module load gromacs/2020.2-cpu", "export GMX_MAXBACKUP=-1"]) task1 = list(test_rep.stages[4].tasks)[0] self.assertEqual(task1.name, 'test.0001.0001.md') self.assertEqual(task1.sandbox, 'test.0001.md') link_inputs = [] self.assertEqual(task1.link_input_data, link_inputs) self.assertEqual(task1.arguments, [ "mdrun", "-s", "sys.tpr", "-deffnm", "sys", "-c", "outcrd.gro", "-e", "sys.edr" ]) self.assertEqual( task1.cpu_reqs, { 'cpu_process_type': 'MPI', 'cpu_processes': 4, 'cpu_thread_type': None, 'cpu_threads': 1 }) self.assertEqual(task1.executable, 'gmx_mpi') self.assertEqual( task1.pre_exec, ["module load gromacs/2020.2-cpu", "export GMX_MAXBACKUP=-1"]) task2 = list(test_rep.stages[5].tasks)[0] self.assertEqual(task2.name, 'test.0001.0002.md') self.assertEqual(task2.sandbox, 'test.0001.md') link_inputs = [] self.assertEqual(task2.link_input_data, link_inputs) download_output_data = [ 'task:///outcrd.gro > ' + 'client:///outputs//outcrd.gro.test.0001' ] self.assertEqual(task2.download_output_data, download_output_data) self.assertEqual(task2.arguments, [ "energy", "-f", "sys.edr", "-b", 0.25, "<", "inp.ener", ">", "mdinfo" ]) self.assertEqual( task2.cpu_reqs, { 'cpu_process_type': 'MPI', 'cpu_processes': 1, 'cpu_thread_type': None, 'cpu_threads': 1 }) self.assertEqual(task2.executable, 'gmx_mpi') self.assertEqual( task2.pre_exec, ["module load gromacs/2020.2-cpu", "export GMX_MAXBACKUP=-1"])
if __name__ == '__main__': reporter = ru.Reporter(name='radical.entk') reporter.title('COVID-19 - Workflow2') # resource specified as argument if len(sys.argv) == 2: cfg_file = sys.argv[1] elif sys.argv[0] == "molecules_adrp.py": cfg_file = "adrp_system.json" elif sys.argv[0] == "molecules_3clpro.py": cfg_file = "3clpro_system.json" else: reporter.exit('Usage:\t%s [config.json]\n\n' % sys.argv[0]) cfg = ru.Config(cfg=ru.read_json(cfg_file)) cfg['node_counts'] = max(1, cfg['md_counts'] // cfg['gpu_per_node']) res_dict = { 'resource': cfg['resource'], 'queue' : cfg['queue'], 'schema' : cfg['schema'], 'walltime': cfg['walltime'], 'project' : cfg['project'], 'cpus' : 42 * 4 * cfg['node_counts'], 'gpus' : 6 * cfg['node_counts'] } # Create Application Manager appman = AppManager(hostname=os.environ.get('RMQ_HOSTNAME'), port=int(os.environ.get('RMQ_PORT')),
def _load_adaptors(self, inject_registry=None): """ Try to load all adaptors that are registered in saga.engine.registry.py. This method is called from the constructor. As Engine is a singleton, this method is called once after the module is first loaded in any python application. :param inject_registry: Inject a fake registry. *For unit tests only*. """ self._logger.debug("listing adaptor registry: %s" % self._registry) # check if some unit test wants to use a special registry. If # so, we reset cpi infos from the earlier singleton creation. if inject_registry is not None: self._adaptor_registry = dict() self._registry = {'adaptor_registry': inject_registry} # attempt to load all registered modules for module_name in self._registry.get('adaptor_registry', []): self._logger.info("loading adaptor %s" % module_name) # first, import the module adaptor_module = None try: adaptor_module = ru.import_module(module_name) except Exception: self._logger.warn("skip adaptor %s: import failed", module_name, exc_info=True) continue # we expect the module to have an 'Adaptor' class # implemented, which, on calling 'register()', returns # a info dict for all implemented adaptor classes. adaptor_instance = None adaptor_info = None try: adaptor_instance = adaptor_module.Adaptor() adaptor_info = adaptor_instance.register() except rse.SagaException: self._logger.warn("skip adaptor %s: failed to load", module_name, exc_info=True) continue except Exception: self._logger.warn("skip adaptor %s: init failed", module_name, exc_info=True) continue # the adaptor must also provide a sanity_check() method, which sould # be used to confirm that the adaptor can function properly in the # current runtime environment (e.g., that all pre-requisites and # system dependencies are met). try: adaptor_instance.sanity_check() except Exception: self._logger.warn("skip adaptor %s: test failed", module_name, exc_info=True) continue # check if we have a valid adaptor_info if adaptor_info is None: self._logger.warn("skip adaptor %s: invalid adaptor data", module_name) continue if 'name' not in adaptor_info or \ 'cpis' not in adaptor_info or \ 'version' not in adaptor_info or \ 'schemas' not in adaptor_info : self._logger.warn("skip adaptor %s: incomplete data", module_name) continue adaptor_name = adaptor_info['name'] adaptor_version = adaptor_info['version'] adaptor_schemas = adaptor_info['schemas'] adaptor_enabled = True # default # disable adaptors in 'alpha' or 'beta' versions -- unless # the 'load_beta_adaptors' config option is set to True if not self._cfg['load_beta_adaptors']: if 'alpha' in adaptor_version.lower() or \ 'beta' in adaptor_version.lower() : self._logger.warn("skip beta adaptor %s (version %s)", module_name, adaptor_version) continue # get the 'enabled' option in the adaptor's config # section (radical.saga.cpi.base) ensures that the option exists, # if it is initialized correctly in the adaptor class. adaptor_config = None adaptor_enabled = False try: adaptor_config = ru.Config('radical.saga', name=adaptor_name) adaptor_enabled = adaptor_config.get('enabled', True) except rse.SagaException: self._logger.warn("skip adaptor %s: init failed", module_name, exc_info=True) continue except Exception as e: self._logger.warn("skip adaptor %s: init error", module_name, exc_info=True) continue # only load adaptor if it is not disabled via config files if not adaptor_enabled: self._logger.warn("skip adaptor %s: disabled", module_name) continue # check if the adaptor has anything to register if 0 == len(adaptor_info['cpis']): self._logger.warn("skip adaptor %s: adaptor has no cpis", module_name) continue # we got an enabled adaptor with valid info - yay! We can # now register all adaptor classes (cpi implementations). for cpi_info in adaptor_info['cpis']: # check cpi information details for completeness if 'type' not in cpi_info or \ 'class' not in cpi_info : self._logger.warn("skip %s cpi: incomplete info detail", module_name) continue # adaptor classes are registered for specific API types. cpi_type = cpi_info['type'] cpi_cname = cpi_info['class'] cpi_class = None try: cpi_class = getattr(adaptor_module, cpi_cname) except Exception: # this exception likely means that the adaptor does not call # the radical.saga.adaptors.Base initializer (correctly) self._logger.warn("skip adaptor %s: invalid %s", module_name, cpi_info['class'], exc_info=True) continue # make sure the cpi class is a valid cpi for the given type. # We walk through the list of known modules, and try to find # a modules which could have that class. We do the following # tests: # # cpi_class: ShellJobService # cpi_type: radical.saga.job.Service # modules: radical.saga.adaptors.cpi.job # modules: radical.saga.adaptors.cpi.job.service # classes: radical.saga.adaptors.cpi.job.Service # classes: radical.saga.adaptors.cpi.job.service.Service # # cpi_class: X509Context # cpi_type: radical.saga.Context # modules: radical.saga.adaptors.cpi.context # classes: radical.saga.adaptors.cpi.context.Context # # So, we add a 'adaptors.cpi' after the 'saga' namespace # element, then append the rest of the given namespace. If that # gives a module which has the requested class, fine -- if not, # we add a lower cased version of the class name as last # namespace element, and check again. # -> radical . saga . job . Service # <- ['radical', 'saga', 'job', 'Service'] cpi_type_nselems = cpi_type.split('.') if len(cpi_type_nselems) < 3 or \ len(cpi_type_nselems) > 4 : self._logger.warn("skip adaptor %s invalid cpi %s", module_name, cpi_type) continue if cpi_type_nselems[0] != 'radical' and \ cpi_type_nselems[1] != 'saga' : self._logger.warn("skip adaptor %s: invalid cpi ns %s", module_name, cpi_type, exc_info=True) continue # -> ['radical', 'saga', 'job', 'Service'] # <- ['radical', 'saga', 'adaptors', 'cpi', 'job', 'Service'] cpi_type_nselems.insert(2, 'adaptors') cpi_type_nselems.insert(3, 'cpi') # # -> ['radical', 'saga', 'adaptors', 'cpi', 'job', 'Service'] # # <- ['radical', 'saga', 'adaptors', 'cpi', 'job'], 'Service' # cpi_type_cname = cpi_type_nselems.pop () # # # -> ['radical', 'saga', 'adaptors', 'cpi', 'job'], 'Service' # # <- 'radical.saga.adaptors.cpi.job # # <- 'radical.saga.adaptors.cpi.job.service # cpi_type_modname_1 = '.'.join (cpi_type_nselems) # cpi_type_modname_2 = '.'.join (cpi_type_nselems + \ # [cpi_type_cname.lower()]) # # # does either module exist? # cpi_type_modname = None # # if cpi_type_modname_1 in sys.modules : # cpi_type_modname = cpi_type_modname_1 # # if cpi_type_modname_2 in sys.modules : # cpi_type_modname = cpi_type_modname_2 # # if not cpi_type_modname : # self._logger.warn("skip adaptor %s: unknown cpi %s", # module_name, cpi_type, exc_info=True) # sys.exit() # continue # # # so, make sure the given cpi is actually # # implemented by the adaptor class # cpi_ok = False # for name, cpi_obj \ # in inspect.getmembers (sys.modules[cpi_type_modname]): # if name == cpi_type_cname and \ # inspect.isclass (cpi_obj) : # if issubclass (cpi_class, cpi_obj) : # cpi_ok = True # # if not cpi_ok : # self._logger.warn("skip adaptor %s: no cpi %s (%s)", # module_name, cpi_class, cpi_type, # exc_info=True) # continue # finally, register the cpi for all its schemas! registered_schemas = list() for adaptor_schema in adaptor_schemas: adaptor_schema = adaptor_schema.lower() # make sure we can register that cpi type if cpi_type not in self._adaptor_registry: self._adaptor_registry[cpi_type] = dict() # make sure we can register that schema if adaptor_schema not in self._adaptor_registry[cpi_type]: self._adaptor_registry[cpi_type][adaptor_schema] = [] # we register the cpi class, so that we can create # instances as needed, and the adaptor instance, # as that is passed to the cpi class c'tor later # on (the adaptor instance is used to share state # between cpi instances, amongst others) info = { 'cpi_cname': cpi_cname, 'cpi_class': cpi_class, 'adaptor_name': adaptor_name, 'adaptor_instance': adaptor_instance } # make sure this tuple was not registered, yet if info in self._adaptor_registry[cpi_type][ adaptor_schema]: self._logger.warn("skip adaptor %s: exists %s: %s", module_name, cpi_class, adaptor_instance, exc_info=True) continue self._adaptor_registry[cpi_type] \ [adaptor_schema].append(info) registered_schemas.append(str("%s://" % adaptor_schema)) self._logger.info("Register adaptor %s for %s API: %s" % (module_name, cpi_type, registered_schemas))