Exemplo n.º 1
0
    async def handle_control_req(self, iface, request):  #pylint: disable=unused-argument
        """Handle control request.

        Control commands are used to configure system during run-time.

        Args:
            iface (interface): the interface from which request came
            request (ControlReq): control request data

        Returns:
            Response: the response to send back
        """
        if self.stop_processing:
            return Response.error('processing stopped')

        if request.command != ControlReq.REQ_CONTROL_CMD_FINISHAFTERALLTASKSDONE:
            return Response.error(
                'Not supported command "{}" of finish control request'.format(
                    request.command))

        if self._finish_task is not None:
            return Response.error('Finish request already requested')

        self._finish_task = asyncio.ensure_future(self._wait_for_all_jobs())
        return Response.ok('{} command accepted'.format(request.command))
Exemplo n.º 2
0
    async def handle_register_req(self, iface, request):  #pylint: disable=unused-argument
        """Handle register request.

        Args:
            iface (interface): the interface from which request came
            request (RegisterReq): request

        Returns:
            Response: response to send back
        """
        if self.stop_processing:
            return Response.error('processing stopped')

        if request.params['id'] in self.managers:
            return Response.error('Manager with id "{}" already registered')

        if not request.params['address']:
            return Response.error('Missing registry entity address')

        try:
            resources = Resources.from_dict(request.params['resources'])
            self.managers[request.params['id']] = ManagerInstance(
                request.params['id'], resources, request.params['address'])
            self.total_resources.append(resources)
        except Exception:
            return Response.error('Failed to register manager: {}'.format(
                sys.exc_info()[0]))

        _logger.info(
            '%sth manager instance %s @ %s with resources (%s) registered successfully',
            len(self.managers), request.params['id'],
            request.params['address'], request.params['resources'])

        return Response.ok(data={'id': request.params['id']})
Exemplo n.º 3
0
    async def handle_removejob_req(self, iface, request):  #pylint: disable=unused-argument
        """Handle remove job request.

        Currently not implemented.

        Args:
            iface (interface): the interface from which request came
            request (SubmitReq): remove job request data

        Returns:
            Response: the response to send back
        """
        if self.stop_processing:
            return Response.error('processing stopped')

        # TODO: implement mechanism
        return Response.error('Currently not supported')
Exemplo n.º 4
0
    async def handle_resourcesinfo_req(self, iface, request):  #pylint: disable=unused-argument
        """Handle resources info request.

        Args:
            iface (interface): the interface from which request came
            request (SubmitReq): resources info request data

        Returns:
            Response: the response to send back
        """
        if self.stop_processing:
            return Response.error('processing stopped')

        return Response.ok(
            data={
                'total_nodes': self.total_resources.total_nodes,
                'total_cores': self.total_resources.total_cores,
                'used_cores': self.total_resources.used_cores,
                'free_cores': self.total_resources.free_cores
            })
Exemplo n.º 5
0
    async def handle_finish_req(self, iface, request):  #pylint: disable=unused-argument
        """Handle finish request.

        Args:
            iface (interface): the interface from which request came
            request (SubmitReq): finish request data

        Returns:
            Response: the response to send back
        """
        if self.stop_processing:
            return Response.error('processing stopped')

        delay = 2

        if self._finish_task is not None:
            return Response.error('Finish request already requested')

        self._finish_task = asyncio.ensure_future(self._delayed_finish(delay))

        return Response.ok(data={'when': '{}s'.format(delay)})
Exemplo n.º 6
0
    async def handle_notify_req(self, iface, request):  #pylint: disable=unused-argument
        """Handle notify request.

        Args:
            iface (interface): the interface from which request came
            request (SubmitReq): notify request data

        Returns:
            Response: the response to send back
        """
        global_job_id = request.params.get('attributes',
                                           {}).get('parent_job_id')
        global_job_part_id = request.params.get('attributes',
                                                {}).get('parent_job_part_id')

        if global_job_id is None or global_job_part_id is None:
            return Response.error('Unknown job notify data {}'.format(
                str(request.params)))

        job = self.jobs.get(global_job_id, None)
        if not job:
            _logger.warning('job notified %s not exist', global_job_id)
            return Response.error('Job {} unknown'.format(global_job_id))

        new_state = request.params.get('state', 'UNKNOWN')
        if new_state not in JobState.__members__:
            _logger.warning(
                'notification for job %s contains unknown state %s',
                global_job_id, new_state)
            return Response.error('Job\'s {} state {} unknown'.format(
                global_job_id, new_state))

        if job.update_part_status(global_job_part_id, JobState[new_state]):
            _logger.debug('job state %s successfully update to %s',
                          global_job_id, str(new_state))
            return Response.ok('job {} updated'.format(global_job_id))

        return Response.error(
            'Failed to update job\'s {} part {} status to {}'.format(
                global_job_id, global_job_part_id, str(new_state)))
Exemplo n.º 7
0
    async def handle_submit_req(self, iface, request):  #pylint: disable=unused-argument
        """Handle submit request.

        Args:
            iface (interface): the interface from which request came
            request (SubmitReq): submit request data

        Returns:
            Response: the response to send back
        """
        if self.stop_processing:
            return Response.error('processing stopped')

        if self._min_scheduling_managers <= len(
                self.managers) and self.total_resources.total_cores == 0:
            return Response.error('Error: no resources available')

        if len(self._submit_reqs_buffer) >= self._max_buffered_jobs:
            return Response.error(
                'Error: submit buffer overflow (currently {} buffered jobs) - try submit '
                'later'.format(len(self._submit_reqs_buffer)))

        try:
            # validate jobs
            self._validate_submit_req(request.jobs)
        except Exception as exc:
            _logger.error('Submit error: %s', sys.exc_info())
            _logger.error(traceback.format_exc())
            return Response.error(str(exc))

        try:
            if self._min_scheduling_managers > len(self.managers):
                # we don't have all (partition) managers registered - buffer jobs
                self._submit_reqs_buffer.append(request.jobs)
                _logger.debug(
                    'buffering submit request, current buffer size: %d',
                    len(self._submit_reqs_buffer))

                return Response.ok('{} jobs buffered'.format(len(
                    request.jobs)),
                                   data={'buffered': len(request.jobs)})

            # submit at once
            # split jobs equally between all available managers
            (_, job_names) = await self._schedule_jobs(request.jobs)

            data = {'submitted': len(job_names), 'jobs': job_names}

            return Response.ok('{} jobs submitted'.format(len(job_names)),
                               data=data)
        except Exception as exc:
            _logger.error('Submit error: %s', sys.exc_info())
            _logger.error(traceback.format_exc())
            return Response.error(str(exc))
Exemplo n.º 8
0
    async def handle_jobstatus_req(self, iface, request):  #pylint: disable=unused-argument
        """Handle job status request.

        Args:
            iface (interface): the interface from which request came
            request (SubmitReq): job status request data

        Returns:
            Response: the response to send back
        """
        if self.stop_processing:
            return Response.error('processing stopped')

        result = {}

        for job_name in request.job_names:
            try:
                job = self.jobs.get(job_name)

                if job is None:
                    return Response.error('Job {} doesn\'t exist'.format(
                        request.jobName))

                result[job_name] = {
                    'status': int(ResponseCode.OK),
                    'data': {
                        'jobName': job_name,
                        'status': str(job.status.name)
                    }
                }
            except Exception as exc:
                _logger.warning('error to get job status: %s', str(exc))
                _logger.warning(traceback.format_exc())
                result[job_name] = {
                    'status': int(ResponseCode.ERROR),
                    'message': exc.args[0]
                }

        return Response.ok(data={'jobs': result})
Exemplo n.º 9
0
    async def generate_status_response(self):
        """Generate current statistics about governor manager."""
        n_scheduling = n_failed = n_finished = n_executing = 0

        for job in self.jobs.values():
            if job.status in [JobState.QUEUED, JobState.SCHEDULED]:
                n_scheduling += 1
            elif job.status in [JobState.EXECUTING]:
                n_executing += 1
            elif job.status in [JobState.FAILED, JobState.OMITTED]:
                n_failed += 1
            elif job.status in [JobState.CANCELED, JobState.SUCCEED]:
                n_finished += 1

        return Response.ok(
            data={
                'System': {
                    'Uptime':
                    str(datetime.now() - self.start_time),
                    'Zmqaddress':
                    self._receiver.zmq_address,
                    'Ifaces':
                    [iface.name() for iface in self._receiver.interfaces]
                    if self._receiver and self._receiver.interfaces else [],
                    'Host':
                    socket.gethostname(),
                    'Account':
                    getpass.getuser(),
                    'Wd':
                    os.getcwd(),
                    'PythonVersion':
                    sys.version.replace('\n', ' '),
                    'Python':
                    sys.executable,
                    'Platform':
                    sys.platform,
                },
                'Resources': {
                    'total_nodes': self.total_resources.total_nodes,
                    'total_cores': self.total_resources.total_cores,
                    'used_cores': self.total_resources.used_cores,
                    'free_cores': self.total_resources.free_cores,
                },
                'JobStats': {
                    'TotalJobs': len(self.jobs),
                    'InScheduleJobs': n_scheduling,
                    'FailedJobs': n_failed,
                    'FinishedJobs': n_finished,
                    'ExecutingJobs': n_executing,
                }
            })
Exemplo n.º 10
0
    async def handle_status_req(self, iface, request):  #pylint: disable=unused-argument
        """Handle status request.

        Args:
            iface (interface): the interface from which request came
            request (SubmitReq): status request data

        Returns:
            Response: the response to send back
        """
        if self.stop_processing:
            return Response.error('processing stopped')

        return await self.generate_status_response()
Exemplo n.º 11
0
    async def handle_listjobs_req(self, iface, request):  #pylint: disable=unused-argument
        """Handle list jobs request.

        Currently not implemented.

        Args:
            iface (interface): the interface from which request came
            request (SubmitReq): list jobs request data

        Returns:
            Response: the response to send back
        """
        # TODO: implement mechanism
        return Response.error('Currently not supported')