def create_workers(self, worker_spec_list): """ Creates a worker """ start_time = time.time() tmp_log = core_utils.make_logger(_base_logger, 'harvester_id={0}'.format( self.harvester_id), method_name='create_workers') if not self.__active: tmp_log.debug('APFMon reporting not enabled') return try: tmp_log.debug('start') url = '{0}/jobs'.format(self.base_url) for worker_spec_shard in generic_utils.create_shards( worker_spec_list, 20): apfmon_workers = [] for worker_spec in worker_spec_shard: batch_id = worker_spec.batchID worker_id = worker_spec.workerID if not batch_id: tmp_log.debug( 'no batchID found for workerID {0}... skipping'. format(worker_id)) continue factory = self.harvester_id computingsite = worker_spec.computingSite try: ce = clean_ce(worker_spec.computingElement) except AttributeError: tmp_log.debug( 'no CE found for workerID {0} batchID {1}'.format( worker_id, batch_id)) ce = NO_CE # extract the log URLs stdout_url = '' stderr_url = '' log_url = '' jdl_url = '' work_attribs = worker_spec.workAttributes if work_attribs: if 'stdOut' in work_attribs: stdout_url = work_attribs['stdOut'] # jdl_url = '{0}.jdl'.format(stdout_url[:-4]) if 'stdErr' in work_attribs: stderr_url = work_attribs['stdErr'] if 'batchLog' in work_attribs: log_url = work_attribs['batchLog'] if 'jdl' in work_attribs: jdl_url = work_attribs['jdl'] apfmon_worker = { 'cid': batch_id, 'factory': factory, 'label': '{0}-{1}'.format(computingsite, ce), 'jdlurl': jdl_url, 'stdouturl': stdout_url, 'stderrurl': stderr_url, 'logurl': log_url } tmp_log.debug('packed worker: {0}'.format(apfmon_worker)) apfmon_workers.append(apfmon_worker) payload = json.dumps(apfmon_workers) try: r = requests.put(url, data=payload, timeout=self.__worker_timeout) tmp_log.debug( 'worker creation for {0} ended with {1} {2}'.format( apfmon_workers, r.status_code, r.text)) except: tmp_log.debug('worker creation for {0} failed with'.format( apfmon_workers, format(traceback.format_exc()))) end_time = time.time() tmp_log.debug('done (took {0})'.format(end_time - start_time)) except: tmp_log.error('Excepted with: {0}'.format(traceback.format_exc()))
def create_labels(self): """ Creates or updates a collection of labels (=panda queue+CE) """ start_time = time.time() tmp_log = core_utils.make_logger(_base_logger, 'harvester_id={0}'.format( self.harvester_id), method_name='create_labels') if not self.__active: tmp_log.debug('APFMon reporting not enabled') return try: tmp_log.debug('start') url = '{0}/labels'.format(self.base_url) # get the active queues from the config mapper all_sites = self.queue_config_mapper.get_active_queues().keys() panda_queues_dict = PandaQueuesDict() # publish the active queues to APF mon in shards for sites in generic_utils.create_shards(all_sites, 20): labels = [] for site in sites: try: site_info = panda_queues_dict.get(site, dict()) if not site_info: tmp_log.warning( 'No site info for {0}'.format(site)) continue # when no CEs associated to a queue, e.g. P1, HPCs, etc. Try to see if there is something # in local configuration, otherwise set it to a dummy value try: ce = self.queue_config_mapper.queueConfig[ site].submitter['ceEndpoint'] queues = [{'ce_endpoint': ce}] except KeyError: if site_info['queues']: queues = site_info['queues'] else: queues = [{'ce_endpoint': NO_CE}] for queue in queues: try: ce = clean_ce(queue['ce_endpoint']) except: ce = '' try: ce_queue_id = queue['ce_queue_id'] except KeyError: ce_queue_id = 0 labels.append({ 'name': '{0}-{1}'.format(site, ce), 'wmsqueue': site, 'ce_queue_id': ce_queue_id, 'factory': self.harvester_id }) except: tmp_log.error('Excepted for site {0} with: {1}'.format( site, traceback.format_exc())) continue payload = json.dumps(labels) r = requests.put(url, data=payload, timeout=self.__label_timeout) tmp_log.debug( 'label creation for {0} ended with {1} {2}'.format( sites, r.status_code, r.text)) end_time = time.time() tmp_log.debug('done (took {0})'.format(end_time - start_time)) except: tmp_log.error('Excepted with: {0}'.format(traceback.format_exc()))