async def handle_register_req(self, iface, request): #pylint: disable=unused-argument """Handle register request. Args: iface (interface): the interface from which request came request (RegisterReq): request Returns: Response: response to send back """ if self.stop_processing: return Response.error('processing stopped') if request.params['id'] in self.managers: return Response.error('Manager with id "{}" already registered') if not request.params['address']: return Response.error('Missing registry entity address') try: resources = Resources.from_dict(request.params['resources']) self.managers[request.params['id']] = ManagerInstance( request.params['id'], resources, request.params['address']) self.total_resources.append(resources) except Exception: return Response.error('Failed to register manager: {}'.format( sys.exc_info()[0])) _logger.info( '%sth manager instance %s @ %s with resources (%s) registered successfully', len(self.managers), request.params['id'], request.params['address'], request.params['resources']) return Response.ok(data={'id': request.params['id']})
async def handle_control_req(self, iface, request): #pylint: disable=unused-argument """Handle control request. Control commands are used to configure system during run-time. Args: iface (interface): the interface from which request came request (ControlReq): control request data Returns: Response: the response to send back """ if self.stop_processing: return Response.error('processing stopped') if request.command != ControlReq.REQ_CONTROL_CMD_FINISHAFTERALLTASKSDONE: return Response.error( 'Not supported command "{}" of finish control request'.format( request.command)) if self._finish_task is not None: return Response.error('Finish request already requested') self._finish_task = asyncio.ensure_future(self._wait_for_all_jobs()) return Response.ok('{} command accepted'.format(request.command))
async def handle_submit_req(self, iface, request): #pylint: disable=unused-argument """Handle submit request. Args: iface (interface): the interface from which request came request (SubmitReq): submit request data Returns: Response: the response to send back """ if self.stop_processing: return Response.error('processing stopped') if self._min_scheduling_managers <= len( self.managers) and self.total_resources.total_cores == 0: return Response.error('Error: no resources available') if len(self._submit_reqs_buffer) >= self._max_buffered_jobs: return Response.error( 'Error: submit buffer overflow (currently {} buffered jobs) - try submit ' 'later'.format(len(self._submit_reqs_buffer))) try: # validate jobs self._validate_submit_req(request.jobs) except Exception as exc: _logger.error('Submit error: %s', sys.exc_info()) _logger.error(traceback.format_exc()) return Response.error(str(exc)) try: if self._min_scheduling_managers > len(self.managers): # we don't have all (partition) managers registered - buffer jobs self._submit_reqs_buffer.append(request.jobs) _logger.debug( 'buffering submit request, current buffer size: %d', len(self._submit_reqs_buffer)) return Response.ok('{} jobs buffered'.format(len( request.jobs)), data={'buffered': len(request.jobs)}) # submit at once # split jobs equally between all available managers (_, job_names) = await self._schedule_jobs(request.jobs) data = {'submitted': len(job_names), 'jobs': job_names} return Response.ok('{} jobs submitted'.format(len(job_names)), data=data) except Exception as exc: _logger.error('Submit error: %s', sys.exc_info()) _logger.error(traceback.format_exc()) return Response.error(str(exc))
async def handle_removejob_req(self, iface, request): #pylint: disable=unused-argument """Handle remove job request. Currently not implemented. Args: iface (interface): the interface from which request came request (SubmitReq): remove job request data Returns: Response: the response to send back """ if self.stop_processing: return Response.error('processing stopped') # TODO: implement mechanism return Response.error('Currently not supported')
async def handle_finish_req(self, iface, request): #pylint: disable=unused-argument """Handle finish request. Args: iface (interface): the interface from which request came request (SubmitReq): finish request data Returns: Response: the response to send back """ if self.stop_processing: return Response.error('processing stopped') delay = 2 if self._finish_task is not None: return Response.error('Finish request already requested') self._finish_task = asyncio.ensure_future(self._delayed_finish(delay)) return Response.ok(data={'when': '{}s'.format(delay)})
async def handle_notify_req(self, iface, request): #pylint: disable=unused-argument """Handle notify request. Args: iface (interface): the interface from which request came request (SubmitReq): notify request data Returns: Response: the response to send back """ global_job_id = request.params.get('attributes', {}).get('parent_job_id') global_job_part_id = request.params.get('attributes', {}).get('parent_job_part_id') if global_job_id is None or global_job_part_id is None: return Response.error('Unknown job notify data {}'.format( str(request.params))) job = self.jobs.get(global_job_id, None) if not job: _logger.warning('job notified %s not exist', global_job_id) return Response.error('Job {} unknown'.format(global_job_id)) new_state = request.params.get('state', 'UNKNOWN') if new_state not in JobState.__members__: _logger.warning( 'notification for job %s contains unknown state %s', global_job_id, new_state) return Response.error('Job\'s {} state {} unknown'.format( global_job_id, new_state)) if job.update_part_status(global_job_part_id, JobState[new_state]): _logger.debug('job state %s successfully update to %s', global_job_id, str(new_state)) return Response.ok('job {} updated'.format(global_job_id)) return Response.error( 'Failed to update job\'s {} part {} status to {}'.format( global_job_id, global_job_part_id, str(new_state)))
async def handle_jobstatus_req(self, iface, request): #pylint: disable=unused-argument """Handle job status request. Args: iface (interface): the interface from which request came request (SubmitReq): job status request data Returns: Response: the response to send back """ if self.stop_processing: return Response.error('processing stopped') result = {} for job_name in request.job_names: try: job = self.jobs.get(job_name) if job is None: return Response.error('Job {} doesn\'t exist'.format( request.jobName)) result[job_name] = { 'status': int(ResponseCode.OK), 'data': { 'jobName': job_name, 'status': str(job.status.name) } } except Exception as exc: _logger.warning('error to get job status: %s', str(exc)) _logger.warning(traceback.format_exc()) result[job_name] = { 'status': int(ResponseCode.ERROR), 'message': exc.args[0] } return Response.ok(data={'jobs': result})
async def handle_status_req(self, iface, request): #pylint: disable=unused-argument """Handle status request. Args: iface (interface): the interface from which request came request (SubmitReq): status request data Returns: Response: the response to send back """ if self.stop_processing: return Response.error('processing stopped') return await self.generate_status_response()
async def handle_listjobs_req(self, iface, request): #pylint: disable=unused-argument """Handle list jobs request. Currently not implemented. Args: iface (interface): the interface from which request came request (SubmitReq): list jobs request data Returns: Response: the response to send back """ # TODO: implement mechanism return Response.error('Currently not supported')
async def handle_resourcesinfo_req(self, iface, request): #pylint: disable=unused-argument """Handle resources info request. Args: iface (interface): the interface from which request came request (SubmitReq): resources info request data Returns: Response: the response to send back """ if self.stop_processing: return Response.error('processing stopped') return Response.ok( data={ 'total_nodes': self.total_resources.total_nodes, 'total_cores': self.total_resources.total_cores, 'used_cores': self.total_resources.used_cores, 'free_cores': self.total_resources.free_cores })