class TaskManagementHandler(ServiceMessageHandler): def handle_message(self, msg): pass def __init__(self): super(TaskManagementHandler, self).__init__() self.radb = RADBRPC() self.otdb = OTDBRPC() self.obs_ctrl = ObservationControlRPCClient() def AbortTask(self, otdb_id): """aborts tasks based on otdb id :param otdb_id: :return: dict with aborted key saying if aborting was succesful and otdb_id key """ if self._is_active_observation(otdb_id): aborted = self._abort_active_observation(otdb_id) else: aborted = self._abort_inactive_task(otdb_id) return {"aborted": aborted, "otdb_id": otdb_id} def _is_active_observation(self, otdb_id): task_type, task_status = self._get_task_type_and_status(otdb_id) return task_type == "observation" and (task_status == "running" or task_status == "queued") def _abort_inactive_task(self, otdb_id): logger.info("Aborting inactive task: %s", otdb_id) try: self.otdb.taskSetStatus(otdb_id=otdb_id, new_status="aborted") aborted = True except OTDBPRCException: aborted = False return aborted def _abort_active_observation(self, otdb_id): logger.info("Aborting active task: %s", otdb_id) result = self.obs_ctrl.abort_observation(otdb_id) aborted = result["aborted"] is True return aborted def _get_task_type_and_status(self, otdb_id): task = self.radb.getTask(otdb_id) task_type = task["type"] task_status = task['status'] return task_type, task_status
class PipelineControl(OTDBBusListener): def __init__(self, otdb_notification_busname=DEFAULT_OTDB_NOTIFICATION_BUSNAME, otdb_service_busname=DEFAULT_OTDB_SERVICE_BUSNAME, **kwargs): super(PipelineControl, self).__init__(busname=otdb_notification_busname, **kwargs) self.otdb_service_busname = otdb_service_busname self.otdbrpc = OTDBRPC(busname=otdb_service_busname) self.slurm = Slurm() def _setStatus(self, obsid, status): try: self.otdbrpc.taskSetStatus(otdb_id=obsid, new_status=status) except RPCTimeoutException, e: # We use a queue, so delivery is guaranteed. We don't care about the answer. pass
class RAtoOTDBPropagator(): def __init__(self, radb_busname=RADB_BUSNAME, radb_servicename=RADB_SERVICENAME, radb_broker=None, otdb_busname=DEFAULT_OTDB_SERVICE_BUSNAME, otdb_servicename=DEFAULT_OTDB_SERVICENAME, mom_busname=DEFAULT_MOMQUERY_BUSNAME, mom_servicename=DEFAULT_MOMQUERY_SERVICENAME, otdb_broker=None, mom_broker=None, broker=None): """ RAtoOTDBPropagator updates tasks in the OTDB after the ResourceAssigner is done with them. :param radb_busname: busname on which the radb service listens (default: lofar.ra.command) :param radb_servicename: servicename of the radb service (default: RADBService) :param radb_broker: valid Qpid broker host (default: None, which means localhost) :param otdb_busname: busname on which the OTDB service listens (default: lofar.otdb.command) :param otdb_servicename: servicename of the OTDB service (default: OTDBService) :param otdb_broker: valid Qpid broker host (default: None, which means localhost) :param broker: if specified, overrules radb_broker and otdb_broker. Valid Qpid broker host (default: None, which means localhost) """ if broker: radb_broker = broker otdb_broker = broker mom_broker = broker self.radbrpc = RADBRPC( busname=radb_busname, servicename=radb_servicename, broker=radb_broker ) ## , ForwardExceptions=True hardcoded in RPCWrapper right now self.otdbrpc = OTDBRPC( busname=otdb_busname, servicename=otdb_servicename, broker=otdb_broker ) ## , ForwardExceptions=True hardcoded in RPCWrapper right now self.momrpc = MoMQueryRPC(busname=mom_busname, servicename=mom_servicename, broker=mom_broker) self.translator = RAtoOTDBTranslator() def __enter__(self): """Internal use only. (handles scope 'with')""" self.open() return self def __exit__(self, exc_type, exc_val, exc_tb): """Internal use only. (handles scope 'with')""" self.close() def open(self): """Open rpc connections to radb service and resource estimator service""" self.radbrpc.open() self.otdbrpc.open() self.momrpc.open() def close(self): """Close rpc connections to radb service and resource estimator service""" self.radbrpc.close() self.otdbrpc.close() self.momrpc.close() def doTaskConflict(self, otdb_id): logger.info('doTaskConflict: otdb_id=%s' % (otdb_id, )) if not otdb_id: logger.warning('doTaskConflict no valid otdb_id: otdb_id=%s' % (otdb_id, )) return try: self.otdbrpc.taskSetStatus(otdb_id, 'conflict') except Exception as e: logger.error(e) def doTaskScheduled(self, ra_id, otdb_id, mom_id): try: logger.info('doTaskScheduled: ra_id=%s otdb_id=%s mom_id=%s' % (ra_id, otdb_id, mom_id)) if not otdb_id: logger.warning('doTaskScheduled no valid otdb_id: otdb_id=%s' % (otdb_id, )) return ra_info = self.getRAinfo(ra_id) logger.info('RA info for ra_id=%s otdb_id=%s: %s' % (ra_id, otdb_id, ra_info)) # check if this is a CEP4 task, or an old CEP2 task # at this moment the most simple check is to see if RA claimed (CEP4) storage # TODO: do proper check on cluster/storage/etc if not ra_info['storage']: logger.info( "No (CEP4) storage claimed for ra_id=%s otdb_id=%s, skipping otdb specification update." % (ra_id, otdb_id)) return #get mom project name try: project = self.momrpc.getProjectDetails(mom_id) logger.info(project) project_name = "_".join( project[str(mom_id)]['project_name'].split()) except (RPCException, KeyError) as e: logger.error( 'Could not get project name from MoM for mom_id %s: %s' % (mom_id, str(e))) logger.info('Using \'unknown\' as project name.') project_name = 'unknown' otdb_info = self.translator.CreateParset(otdb_id, ra_info, project_name) logger.debug("Parset info for OTDB: %s" % otdb_info) self.setOTDBinfo(otdb_id, otdb_info, 'scheduled') except Exception as e: logger.error(e) self.doTaskConflict(otdb_id) def getRAinfo(self, ra_id): info = {} info["storage"] = {} task = self.radbrpc.getTask(ra_id) claims = self.radbrpc.getResourceClaims(task_ids=ra_id, extended=True, include_properties=True) for claim in claims: logger.debug("Processing claim: %s" % claim) if claim['resource_type_name'] == 'storage': info['storage'] = claim info["starttime"] = task["starttime"] info["endtime"] = task["endtime"] info["status"] = task["status"] return info def setOTDBinfo(self, otdb_id, otdb_info, otdb_status): try: logger.info('Setting specticication for otdb_id %s: %s' % (otdb_id, otdb_info)) self.otdbrpc.taskSetSpecification(otdb_id, otdb_info) self.otdbrpc.taskPrepareForScheduling( otdb_id, otdb_info["LOFAR.ObsSW.Observation.startTime"], otdb_info["LOFAR.ObsSW.Observation.stopTime"]) logger.info('Setting status (%s) for otdb_id %s' % (otdb_status, otdb_id)) self.otdbrpc.taskSetStatus(otdb_id, otdb_status) except Exception as e: logger.error(e) self.doTaskConflict(otdb_id)
class RAtoOTDBPropagator(): def __init__(self, radb_busname=RADB_BUSNAME, radb_servicename=RADB_SERVICENAME, radb_broker=None, otdb_busname=DEFAULT_OTDB_SERVICE_BUSNAME, otdb_servicename=DEFAULT_OTDB_SERVICENAME, mom_busname=DEFAULT_MOMQUERY_BUSNAME, mom_servicename=DEFAULT_MOMQUERY_SERVICENAME, otdb_broker=None, mom_broker=None, broker=None): """ RAtoOTDBPropagator updates tasks in the OTDB after the ResourceAssigner is done with them. :param radb_busname: busname on which the radb service listens (default: lofar.ra.command) :param radb_servicename: servicename of the radb service (default: RADBService) :param radb_broker: valid Qpid broker host (default: None, which means localhost) :param otdb_busname: busname on which the OTDB service listens (default: lofar.otdb.command) :param otdb_servicename: servicename of the OTDB service (default: OTDBService) :param otdb_broker: valid Qpid broker host (default: None, which means localhost) :param broker: if specified, overrules radb_broker and otdb_broker. Valid Qpid broker host (default: None, which means localhost) """ if broker: radb_broker = broker otdb_broker = broker mom_broker = broker self.radbrpc = RADBRPC(busname=radb_busname, servicename=radb_servicename, broker=radb_broker) ## , ForwardExceptions=True hardcoded in RPCWrapper right now self.otdbrpc = OTDBRPC(busname=otdb_busname, servicename=otdb_servicename, broker=otdb_broker) ## , ForwardExceptions=True hardcoded in RPCWrapper right now self.momrpc = MoMQueryRPC(busname=mom_busname, servicename=mom_servicename, broker=mom_broker) self.translator = RAtoOTDBTranslator() def __enter__(self): """Internal use only. (handles scope 'with')""" self.open() return self def __exit__(self, exc_type, exc_val, exc_tb): """Internal use only. (handles scope 'with')""" self.close() def open(self): """Open rpc connections to radb service and resource estimator service""" self.radbrpc.open() self.otdbrpc.open() self.momrpc.open() def close(self): """Close rpc connections to radb service and resource estimator service""" self.radbrpc.close() self.otdbrpc.close() self.momrpc.close() def doTaskConflict(self, otdb_id): logger.info('doTaskConflict: otdb_id=%s' % (otdb_id,)) if not otdb_id: logger.warning('doTaskConflict no valid otdb_id: otdb_id=%s' % (otdb_id,)) return try: self.otdbrpc.taskSetStatus(otdb_id, 'conflict') except Exception as e: logger.error(e) def doTaskScheduled(self, ra_id, otdb_id, mom_id): try: logger.info('doTaskScheduled: ra_id=%s otdb_id=%s mom_id=%s' % (ra_id, otdb_id, mom_id)) if not otdb_id: logger.warning('doTaskScheduled no valid otdb_id: otdb_id=%s' % (otdb_id,)) return ra_info = self.getRAinfo(ra_id) logger.info('RA info for ra_id=%s otdb_id=%s: %s' % (ra_id, otdb_id, ra_info)) # check if this is a CEP4 task, or an old CEP2 task # at this moment the most simple check is to see if RA claimed (CEP4) storage # TODO: do proper check on cluster/storage/etc if not ra_info['storage']: logger.info("No (CEP4) storage claimed for ra_id=%s otdb_id=%s, skipping otdb specification update." % (ra_id, otdb_id)) return #get mom project name try: project = self.momrpc.getProjectDetails(mom_id) logger.info(project) project_name = "_".join(project[str(mom_id)]['project_name'].split()) except (RPCException, KeyError) as e: logger.error('Could not get project name from MoM for mom_id %s: %s' % (mom_id, str(e))) logger.info('Using \'unknown\' as project name.') project_name = 'unknown' otdb_info = self.translator.CreateParset(otdb_id, ra_info, project_name) logger.debug("Parset info for OTDB: %s" %otdb_info) self.setOTDBinfo(otdb_id, otdb_info, 'scheduled') except Exception as e: logger.error(e) self.doTaskConflict(otdb_id) def getRAinfo(self, ra_id): info = {} info["storage"] = {} task = self.radbrpc.getTask(ra_id) claims = self.radbrpc.getResourceClaims(task_ids=ra_id, extended=True, include_properties=True) for claim in claims: logger.debug("Processing claim: %s" % claim) if claim['resource_type_name'] == 'storage': info['storage'] = claim info["starttime"] = task["starttime"] info["endtime"] = task["endtime"] info["status"] = task["status"] return info def setOTDBinfo(self, otdb_id, otdb_info, otdb_status): try: logger.info('Setting specticication for otdb_id %s: %s' % (otdb_id, otdb_info)) self.otdbrpc.taskSetSpecification(otdb_id, otdb_info) self.otdbrpc.taskPrepareForScheduling(otdb_id, otdb_info["LOFAR.ObsSW.Observation.startTime"], otdb_info["LOFAR.ObsSW.Observation.stopTime"]) logger.info('Setting status (%s) for otdb_id %s' % (otdb_status, otdb_id)) self.otdbrpc.taskSetStatus(otdb_id, otdb_status) except Exception as e: logger.error(e) self.doTaskConflict(otdb_id)