Пример #1
0
def read_workload(once=False, thread=0, waiting_time=1800, sleep_time=10):
    """
    Thread to collect the workload information from PanDA.
    """
    workload_collector = WorkloadCollector()
    timer = waiting_time
    while not GRACEFUL_STOP.is_set():
        if timer < waiting_time:
            timer += sleep_time
            sleep(sleep_time)
            continue

        logging.info('collecting workload')
        workload_collector.collect_workload()
        timer = 0
Пример #2
0
def print_workload(once=False, thread=0, waiting_time=600, sleep_time=10):
    """
    Thread to regularly output the workload to logs for debugging.
    """
    workload_collector = WorkloadCollector()
    timer = waiting_time
    while not GRACEFUL_STOP.is_set():
        if timer < waiting_time:
            timer += sleep_time
            sleep(sleep_time)
            continue

        logging.info('Number of sites cached %d' % len(workload_collector.get_sites()))
        for site in workload_collector.get_sites():
            logging.info('%s: %d / %d / %d' % (site, workload_collector.get_cur_jobs(site), workload_collector.get_avg_jobs(site), workload_collector.get_max_jobs(site)))
        timer = 0
Пример #3
0
 def __init__(self):
     self._mc = MappingCollector()
     self._wc = WorkloadCollector()
     self.__setup_penalties()
Пример #4
0
class PlacementAlgorithm:
    def __init__(self):
        self._mc = MappingCollector()
        self._wc = WorkloadCollector()
        self.__setup_penalties()

    def __setup_penalties(self):
        self._penalties = {}
        for panda_site in self._wc.get_sites():
            site = self._mc.panda_to_site(panda_site)
            self._penalties[site] = 0.1

    def __update_penalties(self):
        for site, penalty in self._penalties.items():
            if penalty > 0.1:
                self._penalties[site] = penalty - 0.1

    def place(self, did):
        self.__update_penalties()
        decision = {'did': ':'.join(did)}
        try:
            meta = get_did(did[0], did[1])
        except DataIdentifierNotFound:
            decision['error_reason'] = 'did does not exist'
            return decision
        if meta['length'] is None:
            meta['length'] = 0
        if meta['bytes'] is None:
            meta['bytes'] = 0
        logging.debug('got %s:%s, num_files: %d, bytes: %d' %
                      (did[0], did[1], meta['length'], meta['bytes']))

        decision['length'] = meta['length']
        decision['bytes'] = meta['bytes']

        available_rses = []
        available_sites = []
        reps = list_dataset_replicas(did[0], did[1])

        num_reps = 0
        for rep in reps:
            if rep['state'] == ReplicaState.AVAILABLE:
                available_rses.append(rep['rse'])
                available_sites.append(self._mc.ddm_to_site(rep['rse']))
                num_reps += 1

        decision['replica_rses'] = available_rses
        decision['num_replicas'] = num_reps
        if num_reps >= 5:
            decision['error_reason'] = 'more than 4 replicas already exist'
            return decision

        site_ratios = {}
        site_job_info = {}
        for panda_site in self._wc.get_sites():
            site = self._mc.panda_to_site(panda_site)
            job_info = self._wc.get_job_info(panda_site)
            ratio = float(
                job_info[0]) / (float(job_info[1]) + float(job_info[2]) / 2)
            penalty = self._penalties[site]
            site_ratios[site] = ratio * penalty
            site_job_info[site] = (job_info, penalty)

        decision['site_ratios'] = site_ratios
        decision['site_job_info'] = site_job_info
        picked_site = None
        picked_rse = None

        for site, _ in sorted(site_ratios.items(), key=itemgetter(1)):
            if site in available_sites:
                continue
            rses_for_site = self._mc.site_to_ddm(site)
            if rses_for_site is None:
                continue

            for rse in rses_for_site:
                if 'DATADISK' in rse:
                    picked_rse = rse
                    picked_site = site
                    break
            if picked_rse:
                break

        if picked_rse is None:
            decision['error_reason'] = 'could not pick RSE'
            return decision

        decision['destination_rse'] = picked_rse
        if picked_site:
            self._penalties[site] = 1

        picked_source = None
        shuffle(available_rses)
        for rse in available_rses:
            if 'TAPE' in rse:
                continue
            picked_source = rse
            break

        if picked_source is None:
            picked_source = available_rses[0]

        decision['source_rse'] = picked_source
        logging.debug("Picked %s as source and %s as destination RSE" %
                      (picked_source, picked_rse))

        return decision