def read_workload(once=False, thread=0, waiting_time=1800, sleep_time=10): """ Thread to collect the workload information from PanDA. """ workload_collector = WorkloadCollector() timer = waiting_time while not GRACEFUL_STOP.is_set(): if timer < waiting_time: timer += sleep_time sleep(sleep_time) continue logging.info('collecting workload') workload_collector.collect_workload() timer = 0
def print_workload(once=False, thread=0, waiting_time=600, sleep_time=10): """ Thread to regularly output the workload to logs for debugging. """ workload_collector = WorkloadCollector() timer = waiting_time while not GRACEFUL_STOP.is_set(): if timer < waiting_time: timer += sleep_time sleep(sleep_time) continue logging.info('Number of sites cached %d' % len(workload_collector.get_sites())) for site in workload_collector.get_sites(): logging.info('%s: %d / %d / %d' % (site, workload_collector.get_cur_jobs(site), workload_collector.get_avg_jobs(site), workload_collector.get_max_jobs(site))) timer = 0
def __init__(self): self._mc = MappingCollector() self._wc = WorkloadCollector() self.__setup_penalties()
class PlacementAlgorithm: def __init__(self): self._mc = MappingCollector() self._wc = WorkloadCollector() self.__setup_penalties() def __setup_penalties(self): self._penalties = {} for panda_site in self._wc.get_sites(): site = self._mc.panda_to_site(panda_site) self._penalties[site] = 0.1 def __update_penalties(self): for site, penalty in self._penalties.items(): if penalty > 0.1: self._penalties[site] = penalty - 0.1 def place(self, did): self.__update_penalties() decision = {'did': ':'.join(did)} try: meta = get_did(did[0], did[1]) except DataIdentifierNotFound: decision['error_reason'] = 'did does not exist' return decision if meta['length'] is None: meta['length'] = 0 if meta['bytes'] is None: meta['bytes'] = 0 logging.debug('got %s:%s, num_files: %d, bytes: %d' % (did[0], did[1], meta['length'], meta['bytes'])) decision['length'] = meta['length'] decision['bytes'] = meta['bytes'] available_rses = [] available_sites = [] reps = list_dataset_replicas(did[0], did[1]) num_reps = 0 for rep in reps: if rep['state'] == ReplicaState.AVAILABLE: available_rses.append(rep['rse']) available_sites.append(self._mc.ddm_to_site(rep['rse'])) num_reps += 1 decision['replica_rses'] = available_rses decision['num_replicas'] = num_reps if num_reps >= 5: decision['error_reason'] = 'more than 4 replicas already exist' return decision site_ratios = {} site_job_info = {} for panda_site in self._wc.get_sites(): site = self._mc.panda_to_site(panda_site) job_info = self._wc.get_job_info(panda_site) ratio = float( job_info[0]) / (float(job_info[1]) + float(job_info[2]) / 2) penalty = self._penalties[site] site_ratios[site] = ratio * penalty site_job_info[site] = (job_info, penalty) decision['site_ratios'] = site_ratios decision['site_job_info'] = site_job_info picked_site = None picked_rse = None for site, _ in sorted(site_ratios.items(), key=itemgetter(1)): if site in available_sites: continue rses_for_site = self._mc.site_to_ddm(site) if rses_for_site is None: continue for rse in rses_for_site: if 'DATADISK' in rse: picked_rse = rse picked_site = site break if picked_rse: break if picked_rse is None: decision['error_reason'] = 'could not pick RSE' return decision decision['destination_rse'] = picked_rse if picked_site: self._penalties[site] = 1 picked_source = None shuffle(available_rses) for rse in available_rses: if 'TAPE' in rse: continue picked_source = rse break if picked_source is None: picked_source = available_rses[0] decision['source_rse'] = picked_source logging.debug("Picked %s as source and %s as destination RSE" % (picked_source, picked_rse)) return decision