async def submit_jobs(self, req): """Send submit request to the manager instance. Args: req: the submit request to send Returns: dict: with: njobs (int): number of submitted jobs names (list(str)): list of submited job names Raises: InvalidRequest: when request has wrong format """ await self.get_socket().send_json({'request': 'submit', 'jobs': req}) msg = await self.get_socket().recv_json() if not msg['code'] == 0: raise InvalidRequest('Failed to submit jobs: {}'.format( msg.get('message', ''))) self.submitted_jobs += msg.get('data', {}).get('submitted', 0) return { 'njobs': msg.get('data', {}).get('submitted', 0), 'names': msg.get('data', {}).get('jobs', []) }
def __init__(self, data): """Initialize request. Args: data (dict): request data """ assert data is not None if 'entity' not in data or not data['entity'] in self.NOTIFY_ENTITY: raise InvalidRequest('Wrong notify request - missing/unknown entity') if 'params' not in data: raise InvalidRequest('Wrong notify request - missing register parameters') if not all(e in data['params'] for e in ['name', 'state', 'attributes']): raise InvalidRequest('Wrong notify request - missing key notify parameters') self.entity = data['entity'] self.params = data['params']
def __init__(self, data): """Initialize request. Args: data (dict): request data """ assert data is not None if 'entity' not in data or not data['entity'] in self.REQ_REGISTER_ENTITIES: raise InvalidRequest('Wrong register request - missing/unknown entity') if 'params' not in data: raise InvalidRequest('Wrong register request - missing register parameters') if not all(e in data['params'] for e in ['id', 'address', 'resources']): raise InvalidRequest('Wrong register request - missing key register parameters') self.entity = data['entity'] self.params = data['params']
def parse(cls, data): """ Parse request. Args: data (dict): parsed data Returns: req (Request): request object Raises: InvalidRequest: in case of wrong or unknown request """ if not isinstance(data, dict) or 'request' not in data or not data['request']: raise InvalidRequest('Invalid request') if data['request'] not in __REQS__: raise InvalidRequest('Unknown request name: %s' % data['request']) return __REQS__[data['request']](data)
def __init__(self, data): """Initialize request. Args: data (dict): request data """ assert data is not None if 'jobNames' not in data or not isinstance(data['jobNames'], list) or len(data['jobNames']) < 1: raise InvalidRequest('Wrong remove job request - missing job names') self.job_names = data['jobNames']
def __init__(self, data): """Initialize request. Args: data (dict): request data Raises: InvalidRequest: in case of wrong request format """ assert data is not None if 'jobNames' not in data or not isinstance(data['jobNames'], list) or len(data['jobNames']) < 1: raise InvalidRequest('Wrong job info request - missing job names') self.include_childs = False if data.get('params'): if data['params'].get('withChilds'): self.include_childs = True self.job_names = data['jobNames']
def generate(self): """Generate exact job's resource requirements for next iteration. Yields: exact resource requirements for following iterations Raises: InvalidRequest: when parameter ``max`` is used in resource description """ _logger.debug("iteration scheduler '%s' algorithm called", MaximumIters.SCHED_NAME) if 'max' in self.job_resources: raise InvalidRequest( 'Wrong submit request - split-into directive mixed with max directive' ) pmin = 1 if 'min' in self.job_resources: pmin = self.job_resources['min'] if self.iterations * pmin <= self.avail_resources: # a single round _logger.debug( "iterations in single round to schedule: %s, available resources: %s, " "minimum iteration resources: %s", self.iterations, self.avail_resources, pmin) avail_resources = self.avail_resources for iteration in range(self.iterations): # assign part of round_resources to the iteration_in_round iteration_resources = math.floor(avail_resources / (self.iterations - iteration)) avail_resources -= iteration_resources _logger.debug( "iteration: %s/%s, iteration_resources: %s, rest avail_resources: %s", iteration, self.iterations, iteration_resources, avail_resources) yield IterScheduler.get_exact_iter_plan( self.job_resources.copy(), iteration_resources) else: # more than one round # minimum number of needed rounds rounds = math.ceil( (self.iterations / math.floor(float(self.avail_resources) / pmin))) iterations_to_schedule = self.iterations _logger.debug( "iterations to schedule: %s, rounds: %s, resources: %s, minimum iteration " "resources: %s", iterations_to_schedule, rounds, self.avail_resources, pmin) while iterations_to_schedule > 0: for ex_round in range(rounds): iterations_in_round = math.ceil(iterations_to_schedule / (rounds - ex_round)) round_resources = self.avail_resources _logger.debug("round: %s/%s, iterations_in_round: %s", ex_round, rounds, iterations_in_round) for iteration_in_round in range(iterations_in_round): # assign part of round_resources to the iteration_in_round iteration_resources = math.floor( round_resources / (iterations_in_round - iteration_in_round)) round_resources -= iteration_resources _logger.debug( "round: %s/%s, iteration_in_round: %s/%s, iteration_resources: %s, rest " "round_resources: %s", ex_round, rounds, iteration_in_round, iterations_in_round, iteration_resources, round_resources) yield IterScheduler.get_exact_iter_plan( self.job_resources.copy(), iteration_resources) iterations_to_schedule -= iterations_in_round _logger.debug( "end of round: %s/%s, iterations_to_schedule: %s", ex_round, rounds, iterations_to_schedule)
def __init__(self, data): """Initialize request. Args: data (dict): request data Raises: InvalidRequest: in case of wrong job description format """ self.jobs = [] assert data is not None if 'jobs' not in data or not data['jobs'] or not isinstance(data['jobs'], list): raise InvalidRequest('Wrong submit request - missing jobs data') # watch out for values - this data can be copied with the 'shallow' method # so complex structures should be omited job_vars = { 'rcnt': str(SubmitReq.REQ_CNT), 'uniq': str(uuid.uuid4()), 'sname': 'local', 'date': str(datetime.datetime.today()), 'time': str(datetime.time()), 'dateTime': str(datetime.datetime.now()) } SubmitReq.REQ_CNT += 1 _logger.debug('request data contains %s jobs', len(data['jobs'])) new_jobs = [] for job_desc in data['jobs']: if not isinstance(job_desc, dict): raise InvalidRequest('Wrong submit request - wrong job data') if 'name' not in job_desc: raise InvalidRequest('Missing name in job description') if 'execution' not in job_desc: raise InvalidRequest('Missing execution element in job description') # look for 'iterate' directive if 'iteration' in job_desc: if not isinstance(job_desc['iteration'], dict) or \ all(attr not in job_desc['iteration'] for attr in ['stop', 'values']): raise InvalidRequest('Wrong format of iteration directive: not a dictionary') if 'stop' in job_desc['iteration']: start = job_desc['iteration'].get('start', 0) end = job_desc['iteration']['stop'] if start > end: raise InvalidRequest('Wrong format of iteration directive: start index larger then stop one') # default value for missing 'resources' definition if 'resources' not in job_desc: job_desc['resources'] = {'numCores': {'exact': 1}} new_jobs.append({'req': job_desc, 'vars': job_vars.copy()}) self.jobs.extend(new_jobs)
async def _launch_partition_managers(self, nodes_in_partition): """Launch partition managers. The information about Slurm allocation is gathered, and all nodes are split by ``nodes_in_partition`` to form a single partition, and partition manager instance is created to control each partition. Args: nodes_in_partition (int): how many nodes each partition should have Raises: InvalidRequest: when * ``nodes_in_partition`` is less than 1 * governor manager has not been launched in Slurm allocation * missing nodes in Slurm allocation InternalError: when * missing ZMQ interface in governor manager """ _logger.info('setup allocation split into partitions by %s nodes', nodes_in_partition) if nodes_in_partition < 1: _logger.error( 'Failed to partition resources - partition size must be greater or equal 1' ) raise InvalidRequest( 'Failed to partition resources - partition size must be greater or equal 1' ) # get slurm resources - currently only slurm is supported if not in_slurm_allocation(): _logger.error( 'Failed to partition resources - partitioning resources is currently available only within ' 'slurm allocation') raise InvalidRequest( 'Failed to partition resources - partitioning resources is currently available only ' 'within slurm allocation') if not self.zmq_address: _logger.error( 'Failed to partition resources - missing zmq interface address' ) raise InternalError( 'Failed to partition resources - missing zmq interface address' ) slurm_resources = parse_slurm_resources(self._config) if slurm_resources.total_nodes < 1: raise InvalidRequest( 'Failed to partition resources - allocation contains no nodes') npartitions = math.ceil(slurm_resources.total_nodes / nodes_in_partition) _logger.info( '%s partitions will be created (in allocation containing %s total nodes)', npartitions, slurm_resources.total_nodes) # launch partition manager in the same directory as governor partition_manager_wdir = Config.EXECUTOR_WD.get(self._config) partition_manager_auxdir = Config.AUX_DIR.get(self._config) _logger.debug('partition managers working directory %s', partition_manager_wdir) for part_idx in range(npartitions): _logger.debug('creating partition manager %s configuration', part_idx) part_node = slurm_resources.nodes[part_idx * nodes_in_partition] _logger.debug('partition manager node %s', part_node.name) self._partition_managers.append( PartitionManager( 'partition-{}'.format(part_idx), part_node.name, part_idx * nodes_in_partition, min(part_idx * nodes_in_partition + nodes_in_partition, slurm_resources.total_nodes), partition_manager_wdir, self.zmq_address, partition_manager_auxdir, self._config)) _logger.debug('partition manager %s configuration created', part_idx) self._min_scheduling_managers = len(self._partition_managers) _logger.info( 'created partition managers configuration and set minimum scheduling managers to %s', self._min_scheduling_managers) asyncio.ensure_future(self.manage_start_partition_managers()) # launch task in the background that schedule buffered submit requests self._schedule_buffered_jobs_task = asyncio.ensure_future( self._schedule_buffered_jobs())