def __init__(self, radb_busname=RADB_BUSNAME, radb_servicename=RADB_SERVICENAME, re_busname=RE_BUSNAME, re_servicename=RE_SERVICENAME, ssdb_busname=DEFAULT_SSDB_BUSNAME, ssdb_servicename=DEFAULT_SSDB_SERVICENAME, otdb_busname=DEFAULT_OTDB_SERVICE_BUSNAME, otdb_servicename=DEFAULT_OTDB_SERVICENAME, broker=None): """ ResourceAssigner inserts/updates tasks in the radb and assigns resources to it based on incoming parset. :param radb_busname: busname on which the radb service listens (default: lofar.ra.command) :param radb_servicename: servicename of the radb service (default: RADBService) :param re_busname: busname on which the resource estimator service listens (default: lofar.ra.command) :param re_servicename: servicename of the resource estimator service (default: ResourceEstimation) :param ssdb_busname: busname on which the ssdb service listens (default: lofar.system) :param ssdb_servicename: servicename of the radb service (default: SSDBService) :param broker: Valid Qpid broker host (default: None, which means localhost) """ self.radbrpc = RARPC(servicename=radb_servicename, busname=radb_busname, broker=broker) self.rerpc = RPC(re_servicename, busname=re_busname, broker=broker, ForwardExceptions=True) self.ssdbrpc = SSDBRPC(servicename=ssdb_servicename, busname=ssdb_busname, broker=broker) self.otdbrpc = OTDBRPC( busname=otdb_busname, servicename=otdb_servicename, broker=broker ) ## , ForwardExceptions=True hardcoded in RPCWrapper right now
def __init__(self, otdb_notification_busname=DEFAULT_OTDB_NOTIFICATION_BUSNAME, otdb_notification_subject=DEFAULT_OTDB_NOTIFICATION_SUBJECT, radb_busname=DEFAULT_RADB_BUSNAME, radb_servicename=DEFAULT_RADB_SERVICENAME, broker=None, **kwargs): super(OTDBtoRATaskStatusPropagator, self).__init__(busname=otdb_notification_busname, subject=otdb_notification_subject, broker=broker, **kwargs) self.radb = RARPC(busname=radb_busname, servicename=radb_servicename, broker=broker)
def main(): # make sure we run in UTC timezone import os os.environ['TZ'] = 'UTC' # Check the invocation arguments parser = OptionParser('%prog [options]', description='run the resource assignment editor web service') parser.add_option('-p', '--port', dest='port', type='int', default=5000, help='port number on which to host the webservice, default: %default') parser.add_option('-q', '--broker', dest='broker', type='string', default=None, help='Address of the qpid broker, default: localhost') parser.add_option('--radb_busname', dest='radb_busname', type='string', default=DEFAULT_RADB_BUSNAME, help='Name of the bus exchange on the qpid broker on which the radbservice listens, default: %default') parser.add_option('--radb_servicename', dest='radb_servicename', type='string', default=DEFAULT_RADB_SERVICENAME, help='Name of the radbservice, default: %default') parser.add_option('--radb_notification_busname', dest='radb_notification_busname', type='string', default=DEFAULT_RADB_CHANGES_BUSNAME, help='Name of the notification bus exchange on the qpid broker on which the radb notifications are published, default: %default') parser.add_option('--radb_notification_subjects', dest='radb_notification_subjects', type='string', default=DEFAULT_RADB_CHANGES_SUBJECTS, help='Subject(s) to listen for on the radb notification bus exchange on the qpid broker, default: %default') parser.add_option('--mom_busname', dest='mom_busname', type='string', default=DEFAULT_MOMQUERY_BUSNAME, help='Name of the bus exchange on the qpid broker on which the momservice listens, default: %default') parser.add_option('--mom_servicename', dest='mom_servicename', type='string', default=DEFAULT_MOMQUERY_SERVICENAME, help='Name of the momservice, default: %default') parser.add_option('-V', '--verbose', dest='verbose', action='store_true', help='verbose logging') (options, args) = parser.parse_args() logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s', level=logging.DEBUG if options.verbose else logging.INFO) global rarpc rarpc = RARPC(busname=DEFAULT_RADB_BUSNAME, servicename=DEFAULT_RADB_SERVICENAME, broker=options.broker) global momrpc momrpc = MoMQueryRPC(busname=DEFAULT_MOMQUERY_BUSNAME, servicename=DEFAULT_MOMQUERY_SERVICENAME, timeout=2.5, broker=options.broker) global radbchangeshandler radbchangeshandler = RADBChangesHandler(DEFAULT_RADB_CHANGES_BUSNAME, broker=options.broker, momrpc=momrpc) with radbchangeshandler, rarpc, momrpc: '''Start the webserver''' app.run(debug=options.verbose, threaded=True, host='0.0.0.0', port=options.port)
def __init__(self, radb_busname=RADB_BUSNAME, radb_servicename=RADB_SERVICENAME, radb_broker=None, otdb_busname=DEFAULT_OTDB_SERVICE_BUSNAME, otdb_servicename=DEFAULT_OTDB_SERVICENAME, mom_busname=DEFAULT_MOMQUERY_BUSNAME, mom_servicename=DEFAULT_MOMQUERY_SERVICENAME, otdb_broker=None, mom_broker=None, broker=None): """ RAtoOTDBPropagator updates tasks in the OTDB after the ResourceAssigner is done with them. :param radb_busname: busname on which the radb service listens (default: lofar.ra.command) :param radb_servicename: servicename of the radb service (default: RADBService) :param radb_broker: valid Qpid broker host (default: None, which means localhost) :param otdb_busname: busname on which the OTDB service listens (default: lofar.otdb.command) :param otdb_servicename: servicename of the OTDB service (default: OTDBService) :param otdb_broker: valid Qpid broker host (default: None, which means localhost) :param broker: if specified, overrules radb_broker and otdb_broker. Valid Qpid broker host (default: None, which means localhost) """ if broker: radb_broker = broker otdb_broker = broker mom_broker = broker self.radbrpc = RADBRPC(busname=radb_busname, servicename=radb_servicename, broker=radb_broker) ## , ForwardExceptions=True hardcoded in RPCWrapper right now self.otdbrpc = OTDBRPC(busname=otdb_busname, servicename=otdb_servicename, broker=otdb_broker) ## , ForwardExceptions=True hardcoded in RPCWrapper right now self.momrpc = MoMQueryRPC(busname=mom_busname, servicename=mom_servicename, broker=mom_broker) self.translator = RAtoOTDBTranslator()
def __init__(self, radb_busname=RADB_BUSNAME, radb_servicename=RADB_SERVICENAME, radb_broker=None, otdb_busname=DEFAULT_OTDB_SERVICE_BUSNAME, otdb_servicename=DEFAULT_OTDB_SERVICENAME, mom_busname=DEFAULT_MOMQUERY_BUSNAME, mom_servicename=DEFAULT_MOMQUERY_SERVICENAME, otdb_broker=None, mom_broker=None, broker=None): """ RAtoOTDBPropagator updates tasks in the OTDB after the ResourceAssigner is done with them. :param radb_busname: busname on which the radb service listens (default: lofar.ra.command) :param radb_servicename: servicename of the radb service (default: RADBService) :param radb_broker: valid Qpid broker host (default: None, which means localhost) :param otdb_busname: busname on which the OTDB service listens (default: lofar.otdb.command) :param otdb_servicename: servicename of the OTDB service (default: OTDBService) :param otdb_broker: valid Qpid broker host (default: None, which means localhost) :param broker: if specified, overrules radb_broker and otdb_broker. Valid Qpid broker host (default: None, which means localhost) """ if broker: radb_broker = broker otdb_broker = broker mom_broker = broker self.radbrpc = RADBRPC( busname=radb_busname, servicename=radb_servicename, broker=radb_broker ) ## , ForwardExceptions=True hardcoded in RPCWrapper right now self.otdbrpc = OTDBRPC( busname=otdb_busname, servicename=otdb_servicename, broker=otdb_broker ) ## , ForwardExceptions=True hardcoded in RPCWrapper right now self.momrpc = MoMQueryRPC(busname=mom_busname, servicename=mom_servicename, broker=mom_broker) self.translator = RAtoOTDBTranslator()
def __init__(self, radb_busname=RADB_BUSNAME, radb_servicename=RADB_SERVICENAME, re_busname=RE_BUSNAME, re_servicename=RE_SERVICENAME, ssdb_busname=DEFAULT_SSDB_BUSNAME, ssdb_servicename=DEFAULT_SSDB_SERVICENAME, otdb_busname=DEFAULT_OTDB_SERVICE_BUSNAME, otdb_servicename=DEFAULT_OTDB_SERVICENAME, broker=None): """ ResourceAssigner inserts/updates tasks in the radb and assigns resources to it based on incoming parset. :param radb_busname: busname on which the radb service listens (default: lofar.ra.command) :param radb_servicename: servicename of the radb service (default: RADBService) :param re_busname: busname on which the resource estimator service listens (default: lofar.ra.command) :param re_servicename: servicename of the resource estimator service (default: ResourceEstimation) :param ssdb_busname: busname on which the ssdb service listens (default: lofar.system) :param ssdb_servicename: servicename of the radb service (default: SSDBService) :param broker: Valid Qpid broker host (default: None, which means localhost) """ self.radbrpc = RARPC(servicename=radb_servicename, busname=radb_busname, broker=broker) self.rerpc = RPC(re_servicename, busname=re_busname, broker=broker, ForwardExceptions=True) self.ssdbrpc = SSDBRPC(servicename=ssdb_servicename, busname=ssdb_busname, broker=broker) self.otdbrpc = OTDBRPC(busname=otdb_busname, servicename=otdb_servicename, broker=broker) ## , ForwardExceptions=True hardcoded in RPCWrapper right now
class RAtoOTDBPropagator(): def __init__(self, radb_busname=RADB_BUSNAME, radb_servicename=RADB_SERVICENAME, radb_broker=None, otdb_busname=DEFAULT_OTDB_SERVICE_BUSNAME, otdb_servicename=DEFAULT_OTDB_SERVICENAME, mom_busname=DEFAULT_MOMQUERY_BUSNAME, mom_servicename=DEFAULT_MOMQUERY_SERVICENAME, otdb_broker=None, mom_broker=None, broker=None): """ RAtoOTDBPropagator updates tasks in the OTDB after the ResourceAssigner is done with them. :param radb_busname: busname on which the radb service listens (default: lofar.ra.command) :param radb_servicename: servicename of the radb service (default: RADBService) :param radb_broker: valid Qpid broker host (default: None, which means localhost) :param otdb_busname: busname on which the OTDB service listens (default: lofar.otdb.command) :param otdb_servicename: servicename of the OTDB service (default: OTDBService) :param otdb_broker: valid Qpid broker host (default: None, which means localhost) :param broker: if specified, overrules radb_broker and otdb_broker. Valid Qpid broker host (default: None, which means localhost) """ if broker: radb_broker = broker otdb_broker = broker mom_broker = broker self.radbrpc = RADBRPC( busname=radb_busname, servicename=radb_servicename, broker=radb_broker ) ## , ForwardExceptions=True hardcoded in RPCWrapper right now self.otdbrpc = OTDBRPC( busname=otdb_busname, servicename=otdb_servicename, broker=otdb_broker ) ## , ForwardExceptions=True hardcoded in RPCWrapper right now self.momrpc = MoMQueryRPC(busname=mom_busname, servicename=mom_servicename, broker=mom_broker) self.translator = RAtoOTDBTranslator() def __enter__(self): """Internal use only. (handles scope 'with')""" self.open() return self def __exit__(self, exc_type, exc_val, exc_tb): """Internal use only. (handles scope 'with')""" self.close() def open(self): """Open rpc connections to radb service and resource estimator service""" self.radbrpc.open() self.otdbrpc.open() self.momrpc.open() def close(self): """Close rpc connections to radb service and resource estimator service""" self.radbrpc.close() self.otdbrpc.close() self.momrpc.close() def doTaskConflict(self, otdb_id): logger.info('doTaskConflict: otdb_id=%s' % (otdb_id, )) if not otdb_id: logger.warning('doTaskConflict no valid otdb_id: otdb_id=%s' % (otdb_id, )) return try: self.otdbrpc.taskSetStatus(otdb_id, 'conflict') except Exception as e: logger.error(e) def doTaskScheduled(self, ra_id, otdb_id, mom_id): try: logger.info('doTaskScheduled: ra_id=%s otdb_id=%s mom_id=%s' % (ra_id, otdb_id, mom_id)) if not otdb_id: logger.warning('doTaskScheduled no valid otdb_id: otdb_id=%s' % (otdb_id, )) return ra_info = self.getRAinfo(ra_id) logger.info('RA info for ra_id=%s otdb_id=%s: %s' % (ra_id, otdb_id, ra_info)) # check if this is a CEP4 task, or an old CEP2 task # at this moment the most simple check is to see if RA claimed (CEP4) storage # TODO: do proper check on cluster/storage/etc if not ra_info['storage']: logger.info( "No (CEP4) storage claimed for ra_id=%s otdb_id=%s, skipping otdb specification update." % (ra_id, otdb_id)) return #get mom project name try: project = self.momrpc.getProjectDetails(mom_id) logger.info(project) project_name = "_".join( project[str(mom_id)]['project_name'].split()) except (RPCException, KeyError) as e: logger.error( 'Could not get project name from MoM for mom_id %s: %s' % (mom_id, str(e))) logger.info('Using \'unknown\' as project name.') project_name = 'unknown' otdb_info = self.translator.CreateParset(otdb_id, ra_info, project_name) logger.debug("Parset info for OTDB: %s" % otdb_info) self.setOTDBinfo(otdb_id, otdb_info, 'scheduled') except Exception as e: logger.error(e) self.doTaskConflict(otdb_id) def getRAinfo(self, ra_id): info = {} info["storage"] = {} task = self.radbrpc.getTask(ra_id) claims = self.radbrpc.getResourceClaims(task_ids=ra_id, extended=True, include_properties=True) for claim in claims: logger.debug("Processing claim: %s" % claim) if claim['resource_type_name'] == 'storage': info['storage'] = claim info["starttime"] = task["starttime"] info["endtime"] = task["endtime"] info["status"] = task["status"] return info def setOTDBinfo(self, otdb_id, otdb_info, otdb_status): try: logger.info('Setting specticication for otdb_id %s: %s' % (otdb_id, otdb_info)) self.otdbrpc.taskSetSpecification(otdb_id, otdb_info) self.otdbrpc.taskPrepareForScheduling( otdb_id, otdb_info["LOFAR.ObsSW.Observation.startTime"], otdb_info["LOFAR.ObsSW.Observation.stopTime"]) logger.info('Setting status (%s) for otdb_id %s' % (otdb_status, otdb_id)) self.otdbrpc.taskSetStatus(otdb_id, otdb_status) except Exception as e: logger.error(e) self.doTaskConflict(otdb_id)
class OTDBtoRATaskStatusPropagator(OTDBBusListener): def __init__(self, otdb_notification_busname=DEFAULT_OTDB_NOTIFICATION_BUSNAME, otdb_notification_subject=DEFAULT_OTDB_NOTIFICATION_SUBJECT, radb_busname=DEFAULT_RADB_BUSNAME, radb_servicename=DEFAULT_RADB_SERVICENAME, broker=None, **kwargs): super(OTDBtoRATaskStatusPropagator, self).__init__(busname=otdb_notification_busname, subject=otdb_notification_subject, broker=broker, **kwargs) self.radb = RARPC(busname=radb_busname, servicename=radb_servicename, broker=broker) def start_listening(self, **kwargs): self.radb.open() super(OTDBtoRATaskStatusPropagator, self).start_listening(**kwargs) def stop_listening(self, **kwargs): self.radb.close() super(OTDBtoRATaskStatusPropagator, self).stop_listening(**kwargs) def _update_radb_task_status(self, otdb_id, task_status): logger.info("updating radb-task with otdb_id %s to status %s" % (otdb_id, task_status)) result = self.radb.updateTaskStatusForOtdbId(otdb_id=otdb_id, status=task_status) if not result or 'updated' not in result or not result['updated']: logger.warning("could not update task with otdb_id %s to status %s" % (otdb_id, task_status)) def onObservationPrepared(self, treeId, modificationTime): self._update_radb_task_status(treeId, 'prepared') def onObservationApproved(self, treeId, modificationTime): self._update_radb_task_status(treeId, 'approved') def onObservationOnHold(self, treeId, modificationTime): self._update_radb_task_status(treeId, 'on_hold') def onObservationConflict(self, treeId, modificationTime): self._update_radb_task_status(treeId, 'conflict') def onObservationPrescheduled(self, treeId, modificationTime): logger.info("not propagating prescheduled status for otdb_id %s to radb because the resource assigner takes care of this" % (treeId)) def onObservationScheduled(self, treeId, modificationTime): self._update_radb_task_status(treeId, 'scheduled') def onObservationQueued(self, treeId, modificationTime): self._update_radb_task_status(treeId, 'queued') def onObservationStarted(self, treeId, modificationTime): self._update_radb_task_status(treeId, 'active') def onObservationCompleting(self, treeId, modificationTime): self._update_radb_task_status(treeId, 'completing') def onObservationFinished(self, treeId, modificationTime): self._update_radb_task_status(treeId, 'finished') def onObservationAborted(self, treeId, modificationTime): self._update_radb_task_status(treeId, 'aborted')
class OTDBtoRATaskStatusPropagator(OTDBBusListener): def __init__(self, otdb_notification_busname=DEFAULT_OTDB_NOTIFICATION_BUSNAME, otdb_notification_subject=DEFAULT_OTDB_NOTIFICATION_SUBJECT, radb_busname=DEFAULT_RADB_BUSNAME, radb_servicename=DEFAULT_RADB_SERVICENAME, broker=None, **kwargs): super(OTDBtoRATaskStatusPropagator, self).__init__(busname=otdb_notification_busname, subject=otdb_notification_subject, broker=broker, **kwargs) self.radb = RARPC(busname=radb_busname, servicename=radb_servicename, broker=broker) def start_listening(self, **kwargs): self.radb.open() super(OTDBtoRATaskStatusPropagator, self).start_listening(**kwargs) def stop_listening(self, **kwargs): self.radb.close() super(OTDBtoRATaskStatusPropagator, self).stop_listening(**kwargs) def _update_radb_task_status(self, otdb_id, task_status): logger.info("updating radb-task with otdb_id %s to status %s" % (otdb_id, task_status)) result = self.radb.updateTaskStatusForOtdbId(otdb_id=otdb_id, status=task_status) if not result or 'updated' not in result or not result['updated']: logger.warning( "could not update task with otdb_id %s to status %s" % (otdb_id, task_status)) def onObservationPrepared(self, treeId, modificationTime): self._update_radb_task_status(treeId, 'prepared') def onObservationApproved(self, treeId, modificationTime): self._update_radb_task_status(treeId, 'approved') def onObservationOnHold(self, treeId, modificationTime): self._update_radb_task_status(treeId, 'on_hold') def onObservationConflict(self, treeId, modificationTime): self._update_radb_task_status(treeId, 'conflict') def onObservationPrescheduled(self, treeId, modificationTime): logger.info( "not propagating prescheduled status for otdb_id %s to radb because the resource assigner takes care of this" % (treeId)) def onObservationScheduled(self, treeId, modificationTime): self._update_radb_task_status(treeId, 'scheduled') def onObservationQueued(self, treeId, modificationTime): self._update_radb_task_status(treeId, 'queued') def onObservationStarted(self, treeId, modificationTime): self._update_radb_task_status(treeId, 'active') def onObservationCompleting(self, treeId, modificationTime): self._update_radb_task_status(treeId, 'completing') def onObservationFinished(self, treeId, modificationTime): self._update_radb_task_status(treeId, 'finished') def onObservationAborted(self, treeId, modificationTime): self._update_radb_task_status(treeId, 'aborted')
class ResourceAssigner(): def __init__(self, radb_busname=RADB_BUSNAME, radb_servicename=RADB_SERVICENAME, re_busname=RE_BUSNAME, re_servicename=RE_SERVICENAME, ssdb_busname=DEFAULT_SSDB_BUSNAME, ssdb_servicename=DEFAULT_SSDB_SERVICENAME, otdb_busname=DEFAULT_OTDB_SERVICE_BUSNAME, otdb_servicename=DEFAULT_OTDB_SERVICENAME, broker=None): """ ResourceAssigner inserts/updates tasks in the radb and assigns resources to it based on incoming parset. :param radb_busname: busname on which the radb service listens (default: lofar.ra.command) :param radb_servicename: servicename of the radb service (default: RADBService) :param re_busname: busname on which the resource estimator service listens (default: lofar.ra.command) :param re_servicename: servicename of the resource estimator service (default: ResourceEstimation) :param ssdb_busname: busname on which the ssdb service listens (default: lofar.system) :param ssdb_servicename: servicename of the radb service (default: SSDBService) :param broker: Valid Qpid broker host (default: None, which means localhost) """ self.radbrpc = RARPC(servicename=radb_servicename, busname=radb_busname, broker=broker) self.rerpc = RPC(re_servicename, busname=re_busname, broker=broker, ForwardExceptions=True) self.ssdbrpc = SSDBRPC(servicename=ssdb_servicename, busname=ssdb_busname, broker=broker) self.otdbrpc = OTDBRPC(busname=otdb_busname, servicename=otdb_servicename, broker=broker) ## , ForwardExceptions=True hardcoded in RPCWrapper right now def __enter__(self): """Internal use only. (handles scope 'with')""" self.open() return self def __exit__(self, exc_type, exc_val, exc_tb): """Internal use only. (handles scope 'with')""" self.close() def open(self): """Open rpc connections to radb service and resource estimator service""" self.radbrpc.open() self.rerpc.open() self.otdbrpc.open() self.ssdbrpc.open() def close(self): """Close rpc connections to radb service and resource estimator service""" self.radbrpc.close() self.rerpc.close() self.otdbrpc.close() self.ssdbrpc.close() def doAssignment(self, specification_tree): logger.info('doAssignment: specification_tree=%s' % (specification_tree)) otdb_id = specification_tree['otdb_id'] taskType = specification_tree.get('task_type', '').lower() status = specification_tree.get('state', '').lower() if status not in ['approved', 'prescheduled']: # cep2 accepts both, cep4 only prescheduled, see below logger.info('skipping specification for otdb_id=%s because status=%s', (otdb_id, status)) #parse main parset... mainParset = parameterset(specification_tree['specification']) momId = mainParset.getInt('Observation.momID', -1) try: startTime = datetime.strptime(mainParset.getString('Observation.startTime'), '%Y-%m-%d %H:%M:%S') endTime = datetime.strptime(mainParset.getString('Observation.stopTime'), '%Y-%m-%d %H:%M:%S') except ValueError: logger.warning('cannot parse for start/end time from specification for otdb_id=%s', (otdb_id, )) # insert new task and specification in the radb # any existing specification and task with same otdb_id will be deleted automatically logger.info('doAssignment: insertSpecification momId=%s, otdb_id=%s, status=%s, taskType=%s, startTime=%s, endTime=%s' % (momId, otdb_id, status, taskType, startTime, endTime)) result = self.radbrpc.insertSpecificationAndTask(momId, otdb_id, status, taskType, startTime, endTime, str(mainParset)) if not result['inserted']: logger.error('could not insert specification and task') return specificationId = result['specification_id'] taskId = result['task_id'] logger.info('doAssignment: inserted specification (id=%s) and task (id=%s)' % (specificationId,taskId)) # do not assign resources to task for other clusters than cep4 if not self.checkClusterIsCEP4(mainParset): return if status != 'prescheduled': logger.info('skipping resource assignment for CEP4 task otdb_id=%s because status=%s' % (otdb_id, status)) return needed = self.getNeededResouces(specification_tree) logger.info('doAssignment: getNeededResouces=%s' % (needed,)) if not str(otdb_id) in needed: logger.error("no otdb_id %s found in estimator results %s" % (otdb_id, needed)) return if not taskType in needed[str(otdb_id)]: logger.error("no task type %s found in estimator results %s" % (taskType, needed[str(otdb_id)])) return # make sure the availability in the radb is up to date # TODO: this should be updated regularly try: self.updateAvailableResources('cep4') except Exception as e: logger.warning("Exception while updating available resources: %s" % str(e)) # claim the resources for this task # during the claim inserts the claims are automatically validated # and if not enough resources are available, then they are put to conflict status # also, if any claim is in conflict state, then the task is put to conflict status as well main_needed = needed[str(otdb_id)] task = self.radbrpc.getTask(taskId) claimed, claim_ids = self.claimResources(main_needed, task) if claimed: conflictingClaims = self.radbrpc.getResourceClaims(task_ids=taskId, status='conflict') if conflictingClaims: logger.warning('doAssignment: %s conflicting claims detected. Task cannot be scheduled. %s' % (len(conflictingClaims), conflictingClaims)) else: logger.info('doAssignment: all claims for task %s were succesfully claimed. Setting task status to scheduled' % (taskId,)) self.radbrpc.updateTaskAndResourceClaims(taskId, task_status='scheduled', claim_status='allocated') self.processPredecessors(specification_tree) def processPredecessors(self, specification_tree): try: predecessor_trees = specification_tree['predecessors'] if predecessor_trees: otdb_id = specification_tree['otdb_id'] task = self.radbrpc.getTask(otdb_id=otdb_id) for predecessor_tree in predecessor_trees: pred_otdb_id = predecessor_tree['otdb_id'] predecessor_task = self.radbrpc.getTask(otdb_id=pred_otdb_id) if predecessor_task: self.radbrpc.insertTaskPredecessor(task['id'], predecessor_task['id']) self.processPredecessors(predecessor_tree) except Exception as e: logger.error(e) def checkClusterIsCEP4(self, parset): # check storageClusterName for enabled DataProducts # if any storageClusterName is not CEP4, we do not accept this parset keys = ['Output_Correlated', 'Output_IncoherentStokes', 'Output_CoherentStokes', 'Output_InstrumentModel', 'Output_SkyImage', 'Output_Pulsar'] for key in keys: if parset.getBool('Observation.DataProducts.%s.enabled' % key, False): if parset.getString('Observation.DataProducts.%s.storageClusterName' % key, '') != 'CEP4': logger.warn("storageClusterName not CEP4, rejecting specification.") return False logger.info("all enabled storageClusterName's are CEP4, accepting specification.") return True def getNeededResouces(self, specification_tree): replymessage, status = self.rerpc({"specification_tree":specification_tree}, timeout=10) logger.info('getNeededResouces: %s' % replymessage) return replymessage def updateAvailableResources(self, cluster): # find out which resources are available # and what is their capacity # For now, only look at CEP4 storage # Later, also look at stations up/down for short term scheduling #get all active groupnames, find id for cluster group groupnames = self.ssdbrpc.getactivegroupnames() cluster_group_id = next(k for k,v in groupnames.items() if v == cluster) # for CEP4 cluster, do hard codes lookup of first and only node node_info = self.ssdbrpc.gethostsforgid(cluster_group_id)['nodes'][0] storage_resources = self.radbrpc.getResources(resource_types='storage', include_availability=True) cep4_storage_resource = next(x for x in storage_resources if 'cep4' in x['name']) active = node_info['statename'] == 'Active' total_capacity = node_info['totalspace'] available_capacity = total_capacity - node_info['usedspace'] logger.info("Updating resource availability of %s (id=%s) to active=%s available_capacity=%s total_capacity=%s" % (cep4_storage_resource['name'], cep4_storage_resource['id'], active, available_capacity, total_capacity)) self.radbrpc.updateResourceAvailability(cep4_storage_resource['id'], active=active, available_capacity=available_capacity, total_capacity=total_capacity) def claimResources(self, needed_resources, task): logger.info('claimResources: task %s needed_resources=%s' % (task, needed_resources)) # get the needed resources for the task type needed_resources_for_task_type = needed_resources[task['type']] # get db lists rc_property_types = {rcpt['name']:rcpt['id'] for rcpt in self.radbrpc.getResourceClaimPropertyTypes()} resource_types = {rt['name']:rt['id'] for rt in self.radbrpc.getResourceTypes()} resources = self.radbrpc.getResources() # loop over needed_resources -> resource_type -> claim (and props) # flatten the tree dict to a list of claims (with props) claims = [] for resource_type_name, needed_claim_for_resource_type in needed_resources_for_task_type.items(): if resource_type_name in resource_types: logger.info('claimResources: processing resource_type: %s' % resource_type_name) db_resource_type_id = resource_types[resource_type_name] db_resources_for_type = [r for r in resources if r['type_id'] == db_resource_type_id] # needed_claim_for_resource_type is a dict containing exactly one kvp of which the value is an int # that value is the value for the claim needed_claim_value = next((v for k,v in needed_claim_for_resource_type.items() if isinstance(v, int))) # FIXME: right now we just pick the first resource from the 'cep4' resources. # estimator will deliver this info in the future db_cep4_resources_for_type = [r for r in db_resources_for_type if 'cep4' in r['name'].lower()] if db_cep4_resources_for_type: claim = {'resource_id':db_cep4_resources_for_type[0]['id'], 'starttime':task['starttime'], 'endtime':task['endtime'], 'status':'claimed', 'claim_size':needed_claim_value} #FIXME: find proper way to extend storage time with a month if 'storage' in db_cep4_resources_for_type[0]['name']: claim['endtime'] += timedelta(days=31) # if the needed_claim_for_resource_type dict contains more kvp's, # then the subdict contains groups of properties for the claim if len(needed_claim_for_resource_type) > 1: claim['properties'] = [] needed_prop_groups = next((v for k,v in needed_claim_for_resource_type.items() if isinstance(v, collections.Iterable))) def processProperties(propertiesDict, sap_nr=None): for prop_type_name, prop_value in propertiesDict.items(): if prop_type_name in rc_property_types: rc_property_type_id = rc_property_types[prop_type_name] property = {'type':rc_property_type_id, 'value':prop_value} if sap_nr is not None: property['sap_nr'] = sap_nr claim['properties'].append(property) else: logger.error('claimResources: unknown prop_type:%s' % prop_type_name) for group_name, needed_prop_group in needed_prop_groups.items(): if group_name == 'saps': for sap_dict in needed_prop_group: processProperties(sap_dict['properties'], sap_dict['sap_nr']) else: processProperties(needed_prop_group) logger.info('claimResources: created claim:%s' % claim) claims.append(claim) else: logger.error('claimResources: unknown resource_type:%s' % resource_type_name) logger.info('claimResources: inserting %d claims in the radb' % len(claims)) claim_ids = self.radbrpc.insertResourceClaims(task['id'], claims, 1, 'anonymous', -1)['ids'] logger.info('claimResources: %d claims were inserted in the radb' % len(claim_ids)) return len(claim_ids) == len(claims), claim_ids
def test(self): '''basic test ''' rpc = RARPC(busname=busname) self.assertEqual(mock.getTaskStatuses.return_value, rpc.getTaskStatuses()) self.assertEqual(mock.getTaskTypes.return_value, rpc.getTaskTypes()) self.assertEqual(mock.getResourceClaimStatuses.return_value, rpc.getResourceClaimStatuses()) self.assertEqual(mock.getUnits.return_value, rpc.getUnits()) self.assertEqual(mock.getResourceTypes.return_value, rpc.getResourceTypes()) self.assertEqual(mock.getResourceGroupTypes.return_value, rpc.getResourceGroupTypes()) self.assertEqual(mock.getResources.return_value, rpc.getResources()) self.assertEqual(mock.getResourceGroups.return_value, rpc.getResourceGroups()) self.assertEqual(mock.getTasks.return_value, rpc.getTasks()) self.assertEqual(mock.getResourceClaims.return_value, rpc.getResourceClaims()) #TODO: fix this test #self.assertEqual(None, rpc.getTask(1)) #self.assertEqual(mock.getTask.return_value, rpc.getTask(5)) # test non existing service method, should timeout with self.assertRaises(ValueError) as cm: rpc.rpc('foo', timeout=1) self.assertEqual( cm.exception.message, "{'backtrace': '', 'state': 'TIMEOUT', 'errmsg': 'RPC Timed out'}" )
class RAtoOTDBPropagator(): def __init__(self, radb_busname=RADB_BUSNAME, radb_servicename=RADB_SERVICENAME, radb_broker=None, otdb_busname=DEFAULT_OTDB_SERVICE_BUSNAME, otdb_servicename=DEFAULT_OTDB_SERVICENAME, mom_busname=DEFAULT_MOMQUERY_BUSNAME, mom_servicename=DEFAULT_MOMQUERY_SERVICENAME, otdb_broker=None, mom_broker=None, broker=None): """ RAtoOTDBPropagator updates tasks in the OTDB after the ResourceAssigner is done with them. :param radb_busname: busname on which the radb service listens (default: lofar.ra.command) :param radb_servicename: servicename of the radb service (default: RADBService) :param radb_broker: valid Qpid broker host (default: None, which means localhost) :param otdb_busname: busname on which the OTDB service listens (default: lofar.otdb.command) :param otdb_servicename: servicename of the OTDB service (default: OTDBService) :param otdb_broker: valid Qpid broker host (default: None, which means localhost) :param broker: if specified, overrules radb_broker and otdb_broker. Valid Qpid broker host (default: None, which means localhost) """ if broker: radb_broker = broker otdb_broker = broker mom_broker = broker self.radbrpc = RADBRPC(busname=radb_busname, servicename=radb_servicename, broker=radb_broker) ## , ForwardExceptions=True hardcoded in RPCWrapper right now self.otdbrpc = OTDBRPC(busname=otdb_busname, servicename=otdb_servicename, broker=otdb_broker) ## , ForwardExceptions=True hardcoded in RPCWrapper right now self.momrpc = MoMQueryRPC(busname=mom_busname, servicename=mom_servicename, broker=mom_broker) self.translator = RAtoOTDBTranslator() def __enter__(self): """Internal use only. (handles scope 'with')""" self.open() return self def __exit__(self, exc_type, exc_val, exc_tb): """Internal use only. (handles scope 'with')""" self.close() def open(self): """Open rpc connections to radb service and resource estimator service""" self.radbrpc.open() self.otdbrpc.open() self.momrpc.open() def close(self): """Close rpc connections to radb service and resource estimator service""" self.radbrpc.close() self.otdbrpc.close() self.momrpc.close() def doTaskConflict(self, otdb_id): logger.info('doTaskConflict: otdb_id=%s' % (otdb_id,)) if not otdb_id: logger.warning('doTaskConflict no valid otdb_id: otdb_id=%s' % (otdb_id,)) return try: self.otdbrpc.taskSetStatus(otdb_id, 'conflict') except Exception as e: logger.error(e) def doTaskScheduled(self, ra_id, otdb_id, mom_id): try: logger.info('doTaskScheduled: ra_id=%s otdb_id=%s mom_id=%s' % (ra_id, otdb_id, mom_id)) if not otdb_id: logger.warning('doTaskScheduled no valid otdb_id: otdb_id=%s' % (otdb_id,)) return ra_info = self.getRAinfo(ra_id) logger.info('RA info for ra_id=%s otdb_id=%s: %s' % (ra_id, otdb_id, ra_info)) # check if this is a CEP4 task, or an old CEP2 task # at this moment the most simple check is to see if RA claimed (CEP4) storage # TODO: do proper check on cluster/storage/etc if not ra_info['storage']: logger.info("No (CEP4) storage claimed for ra_id=%s otdb_id=%s, skipping otdb specification update." % (ra_id, otdb_id)) return #get mom project name try: project = self.momrpc.getProjectDetails(mom_id) logger.info(project) project_name = "_".join(project[str(mom_id)]['project_name'].split()) except (RPCException, KeyError) as e: logger.error('Could not get project name from MoM for mom_id %s: %s' % (mom_id, str(e))) logger.info('Using \'unknown\' as project name.') project_name = 'unknown' otdb_info = self.translator.CreateParset(otdb_id, ra_info, project_name) logger.debug("Parset info for OTDB: %s" %otdb_info) self.setOTDBinfo(otdb_id, otdb_info, 'scheduled') except Exception as e: logger.error(e) self.doTaskConflict(otdb_id) def getRAinfo(self, ra_id): info = {} info["storage"] = {} task = self.radbrpc.getTask(ra_id) claims = self.radbrpc.getResourceClaims(task_ids=ra_id, extended=True, include_properties=True) for claim in claims: logger.debug("Processing claim: %s" % claim) if claim['resource_type_name'] == 'storage': info['storage'] = claim info["starttime"] = task["starttime"] info["endtime"] = task["endtime"] info["status"] = task["status"] return info def setOTDBinfo(self, otdb_id, otdb_info, otdb_status): try: logger.info('Setting specticication for otdb_id %s: %s' % (otdb_id, otdb_info)) self.otdbrpc.taskSetSpecification(otdb_id, otdb_info) self.otdbrpc.taskPrepareForScheduling(otdb_id, otdb_info["LOFAR.ObsSW.Observation.startTime"], otdb_info["LOFAR.ObsSW.Observation.stopTime"]) logger.info('Setting status (%s) for otdb_id %s' % (otdb_status, otdb_id)) self.otdbrpc.taskSetStatus(otdb_id, otdb_status) except Exception as e: logger.error(e) self.doTaskConflict(otdb_id)
def test(self): '''basic test ''' rpc = RARPC(busname=busname) self.assertEqual(mock.getTaskStatuses.return_value, rpc.getTaskStatuses()) self.assertEqual(mock.getTaskTypes.return_value, rpc.getTaskTypes()) self.assertEqual(mock.getResourceClaimStatuses.return_value, rpc.getResourceClaimStatuses()) self.assertEqual(mock.getUnits.return_value, rpc.getUnits()) self.assertEqual(mock.getResourceTypes.return_value, rpc.getResourceTypes()) self.assertEqual(mock.getResourceGroupTypes.return_value, rpc.getResourceGroupTypes()) self.assertEqual(mock.getResources.return_value, rpc.getResources()) self.assertEqual(mock.getResourceGroups.return_value, rpc.getResourceGroups()) self.assertEqual(mock.getTasks.return_value, rpc.getTasks()) self.assertEqual(mock.getResourceClaims.return_value, rpc.getResourceClaims()) #TODO: fix this test #self.assertEqual(None, rpc.getTask(1)) #self.assertEqual(mock.getTask.return_value, rpc.getTask(5)) # test non existing service method, should timeout with self.assertRaises(ValueError) as cm: rpc.rpc('foo', timeout=1) self.assertEqual(cm.exception.message, "{'backtrace': '', 'state': 'TIMEOUT', 'errmsg': 'RPC Timed out'}")
class ResourceAssigner(): def __init__(self, radb_busname=RADB_BUSNAME, radb_servicename=RADB_SERVICENAME, re_busname=RE_BUSNAME, re_servicename=RE_SERVICENAME, ssdb_busname=DEFAULT_SSDB_BUSNAME, ssdb_servicename=DEFAULT_SSDB_SERVICENAME, otdb_busname=DEFAULT_OTDB_SERVICE_BUSNAME, otdb_servicename=DEFAULT_OTDB_SERVICENAME, broker=None): """ ResourceAssigner inserts/updates tasks in the radb and assigns resources to it based on incoming parset. :param radb_busname: busname on which the radb service listens (default: lofar.ra.command) :param radb_servicename: servicename of the radb service (default: RADBService) :param re_busname: busname on which the resource estimator service listens (default: lofar.ra.command) :param re_servicename: servicename of the resource estimator service (default: ResourceEstimation) :param ssdb_busname: busname on which the ssdb service listens (default: lofar.system) :param ssdb_servicename: servicename of the radb service (default: SSDBService) :param broker: Valid Qpid broker host (default: None, which means localhost) """ self.radbrpc = RARPC(servicename=radb_servicename, busname=radb_busname, broker=broker) self.rerpc = RPC(re_servicename, busname=re_busname, broker=broker, ForwardExceptions=True) self.ssdbrpc = SSDBRPC(servicename=ssdb_servicename, busname=ssdb_busname, broker=broker) self.otdbrpc = OTDBRPC( busname=otdb_busname, servicename=otdb_servicename, broker=broker ) ## , ForwardExceptions=True hardcoded in RPCWrapper right now def __enter__(self): """Internal use only. (handles scope 'with')""" self.open() return self def __exit__(self, exc_type, exc_val, exc_tb): """Internal use only. (handles scope 'with')""" self.close() def open(self): """Open rpc connections to radb service and resource estimator service""" self.radbrpc.open() self.rerpc.open() self.otdbrpc.open() self.ssdbrpc.open() def close(self): """Close rpc connections to radb service and resource estimator service""" self.radbrpc.close() self.rerpc.close() self.otdbrpc.close() self.ssdbrpc.close() def doAssignment(self, specification_tree): logger.info('doAssignment: specification_tree=%s' % (specification_tree)) otdb_id = specification_tree['otdb_id'] taskType = specification_tree.get('task_type', '').lower() status = specification_tree.get('state', '').lower() if status not in [ 'approved', 'prescheduled' ]: # cep2 accepts both, cep4 only prescheduled, see below logger.info( 'skipping specification for otdb_id=%s because status=%s', (otdb_id, status)) #parse main parset... mainParset = parameterset(specification_tree['specification']) momId = mainParset.getInt('Observation.momID', -1) try: startTime = datetime.strptime( mainParset.getString('Observation.startTime'), '%Y-%m-%d %H:%M:%S') endTime = datetime.strptime( mainParset.getString('Observation.stopTime'), '%Y-%m-%d %H:%M:%S') except ValueError: logger.warning( 'cannot parse for start/end time from specification for otdb_id=%s', (otdb_id, )) # insert new task and specification in the radb # any existing specification and task with same otdb_id will be deleted automatically logger.info( 'doAssignment: insertSpecification momId=%s, otdb_id=%s, status=%s, taskType=%s, startTime=%s, endTime=%s' % (momId, otdb_id, status, taskType, startTime, endTime)) result = self.radbrpc.insertSpecificationAndTask( momId, otdb_id, status, taskType, startTime, endTime, str(mainParset)) if not result['inserted']: logger.error('could not insert specification and task') return specificationId = result['specification_id'] taskId = result['task_id'] logger.info( 'doAssignment: inserted specification (id=%s) and task (id=%s)' % (specificationId, taskId)) # do not assign resources to task for other clusters than cep4 if not self.checkClusterIsCEP4(mainParset): return if status != 'prescheduled': logger.info( 'skipping resource assignment for CEP4 task otdb_id=%s because status=%s' % (otdb_id, status)) return needed = self.getNeededResouces(specification_tree) logger.info('doAssignment: getNeededResouces=%s' % (needed, )) if not str(otdb_id) in needed: logger.error("no otdb_id %s found in estimator results %s" % (otdb_id, needed)) return if not taskType in needed[str(otdb_id)]: logger.error("no task type %s found in estimator results %s" % (taskType, needed[str(otdb_id)])) return # make sure the availability in the radb is up to date # TODO: this should be updated regularly try: self.updateAvailableResources('cep4') except Exception as e: logger.warning("Exception while updating available resources: %s" % str(e)) # claim the resources for this task # during the claim inserts the claims are automatically validated # and if not enough resources are available, then they are put to conflict status # also, if any claim is in conflict state, then the task is put to conflict status as well main_needed = needed[str(otdb_id)] task = self.radbrpc.getTask(taskId) claimed, claim_ids = self.claimResources(main_needed, task) if claimed: conflictingClaims = self.radbrpc.getResourceClaims( task_ids=taskId, status='conflict') if conflictingClaims: logger.warning( 'doAssignment: %s conflicting claims detected. Task cannot be scheduled. %s' % (len(conflictingClaims), conflictingClaims)) else: logger.info( 'doAssignment: all claims for task %s were succesfully claimed. Setting task status to scheduled' % (taskId, )) self.radbrpc.updateTaskAndResourceClaims( taskId, task_status='scheduled', claim_status='allocated') self.processPredecessors(specification_tree) def processPredecessors(self, specification_tree): try: predecessor_trees = specification_tree['predecessors'] if predecessor_trees: otdb_id = specification_tree['otdb_id'] task = self.radbrpc.getTask(otdb_id=otdb_id) for predecessor_tree in predecessor_trees: pred_otdb_id = predecessor_tree['otdb_id'] predecessor_task = self.radbrpc.getTask( otdb_id=pred_otdb_id) if predecessor_task: self.radbrpc.insertTaskPredecessor( task['id'], predecessor_task['id']) self.processPredecessors(predecessor_tree) except Exception as e: logger.error(e) def checkClusterIsCEP4(self, parset): # check storageClusterName for enabled DataProducts # if any storageClusterName is not CEP4, we do not accept this parset keys = [ 'Output_Correlated', 'Output_IncoherentStokes', 'Output_CoherentStokes', 'Output_InstrumentModel', 'Output_SkyImage', 'Output_Pulsar' ] for key in keys: if parset.getBool('Observation.DataProducts.%s.enabled' % key, False): if parset.getString( 'Observation.DataProducts.%s.storageClusterName' % key, '') != 'CEP4': logger.warn( "storageClusterName not CEP4, rejecting specification." ) return False logger.info( "all enabled storageClusterName's are CEP4, accepting specification." ) return True def getNeededResouces(self, specification_tree): replymessage, status = self.rerpc( {"specification_tree": specification_tree}, timeout=10) logger.info('getNeededResouces: %s' % replymessage) return replymessage def updateAvailableResources(self, cluster): # find out which resources are available # and what is their capacity # For now, only look at CEP4 storage # Later, also look at stations up/down for short term scheduling #get all active groupnames, find id for cluster group groupnames = self.ssdbrpc.getactivegroupnames() cluster_group_id = next(k for k, v in groupnames.items() if v == cluster) # for CEP4 cluster, do hard codes lookup of first and only node node_info = self.ssdbrpc.gethostsforgid(cluster_group_id)['nodes'][0] storage_resources = self.radbrpc.getResources( resource_types='storage', include_availability=True) cep4_storage_resource = next(x for x in storage_resources if 'cep4' in x['name']) active = node_info['statename'] == 'Active' total_capacity = node_info['totalspace'] available_capacity = total_capacity - node_info['usedspace'] logger.info( "Updating resource availability of %s (id=%s) to active=%s available_capacity=%s total_capacity=%s" % (cep4_storage_resource['name'], cep4_storage_resource['id'], active, available_capacity, total_capacity)) self.radbrpc.updateResourceAvailability( cep4_storage_resource['id'], active=active, available_capacity=available_capacity, total_capacity=total_capacity) def claimResources(self, needed_resources, task): logger.info('claimResources: task %s needed_resources=%s' % (task, needed_resources)) # get the needed resources for the task type needed_resources_for_task_type = needed_resources[task['type']] # get db lists rc_property_types = { rcpt['name']: rcpt['id'] for rcpt in self.radbrpc.getResourceClaimPropertyTypes() } resource_types = { rt['name']: rt['id'] for rt in self.radbrpc.getResourceTypes() } resources = self.radbrpc.getResources() # loop over needed_resources -> resource_type -> claim (and props) # flatten the tree dict to a list of claims (with props) claims = [] for resource_type_name, needed_claim_for_resource_type in needed_resources_for_task_type.items( ): if resource_type_name in resource_types: logger.info('claimResources: processing resource_type: %s' % resource_type_name) db_resource_type_id = resource_types[resource_type_name] db_resources_for_type = [ r for r in resources if r['type_id'] == db_resource_type_id ] # needed_claim_for_resource_type is a dict containing exactly one kvp of which the value is an int # that value is the value for the claim needed_claim_value = next( (v for k, v in needed_claim_for_resource_type.items() if isinstance(v, int))) # FIXME: right now we just pick the first resource from the 'cep4' resources. # estimator will deliver this info in the future db_cep4_resources_for_type = [ r for r in db_resources_for_type if 'cep4' in r['name'].lower() ] if db_cep4_resources_for_type: claim = { 'resource_id': db_cep4_resources_for_type[0]['id'], 'starttime': task['starttime'], 'endtime': task['endtime'], 'status': 'claimed', 'claim_size': needed_claim_value } #FIXME: find proper way to extend storage time with a month if 'storage' in db_cep4_resources_for_type[0]['name']: claim['endtime'] += timedelta(days=31) # if the needed_claim_for_resource_type dict contains more kvp's, # then the subdict contains groups of properties for the claim if len(needed_claim_for_resource_type) > 1: claim['properties'] = [] needed_prop_groups = next(( v for k, v in needed_claim_for_resource_type.items() if isinstance(v, collections.Iterable))) def processProperties(propertiesDict, sap_nr=None): for prop_type_name, prop_value in propertiesDict.items( ): if prop_type_name in rc_property_types: rc_property_type_id = rc_property_types[ prop_type_name] property = { 'type': rc_property_type_id, 'value': prop_value } if sap_nr is not None: property['sap_nr'] = sap_nr claim['properties'].append(property) else: logger.error( 'claimResources: unknown prop_type:%s' % prop_type_name) for group_name, needed_prop_group in needed_prop_groups.items( ): if group_name == 'saps': for sap_dict in needed_prop_group: processProperties(sap_dict['properties'], sap_dict['sap_nr']) else: processProperties(needed_prop_group) logger.info('claimResources: created claim:%s' % claim) claims.append(claim) else: logger.error('claimResources: unknown resource_type:%s' % resource_type_name) logger.info('claimResources: inserting %d claims in the radb' % len(claims)) claim_ids = self.radbrpc.insertResourceClaims(task['id'], claims, 1, 'anonymous', -1)['ids'] logger.info('claimResources: %d claims were inserted in the radb' % len(claim_ids)) return len(claim_ids) == len(claims), claim_ids