def __init__(self, datatypes, dest_rse_expr, max_bytes_hour, max_files_hour, max_bytes_hour_rse, max_files_hour_rse, min_popularity, min_recent_requests, max_replicas):
        self._fsc = FreeSpaceCollector()
        self._nmc = NetworkMetricsCollector()
        self._added_cache = ExpiringDatasetCache(config_get('c3po', 'redis_host'), config_get_int('c3po', 'redis_port'), timeout=86400)
        self._dc = DatasetCache(config_get('c3po', 'redis_host'), config_get_int('c3po', 'redis_port'), timeout=86400)
        self._added_bytes = RedisTimeSeries(config_get('c3po', 'redis_host'), config_get_int('c3po', 'redis_port'), window=3600, prefix="added_bytes_")
        self._added_files = RedisTimeSeries(config_get('c3po', 'redis_host'), config_get_int('c3po', 'redis_port'), window=3600, prefix="added_files_")

        self._datatypes = datatypes.split(',')
        self._dest_rse_expr = dest_rse_expr
        self._max_bytes_hour = max_bytes_hour
        self._max_files_hour = max_files_hour
        self._max_bytes_hour_rse = max_bytes_hour_rse
        self._max_files_hour_rse = max_files_hour_rse
        self._min_popularity = min_popularity
        self._min_recent_requests = min_recent_requests
        self._max_replicas = max_replicas

        rses = parse_expression(self._dest_rse_expr)

        self._rses = {}
        self._sites = {}
        for rse in rses:
            rse_attrs = list_rse_attributes(rse['rse'])
            rse_attrs['rse'] = rse['rse']
            self._rses[rse['rse']] = rse_attrs
            self._sites[rse_attrs['site']] = rse_attrs

        self._dst_penalties = {}
        self._src_penalties = {}

        self._print_params()
Пример #2
0
        def __init__(self, delete_keys=False):
            self._avg_jobs = {}
            self._cur_jobs = {}
            self._max_jobs = {}
            self._tms = RedisTimeSeries(config_get('c3po', 'redis_host'), config_get_int('c3po', 'redis_port'), config_get_int('c3po-workload', 'window'), 'jobs_')

            self._request_headers = {"Accept": "application/json", "Content-Type": "application/json"}
            self._request_url = config_get('c3po-workload', 'panda_url')
            if delete_keys:
                self._tms.delete_keys()
            self.reload_cache()
Пример #3
0
    def __init__(self,
                 redis_host,
                 redis_port,
                 timeout=1,
                 prefix='did_cache',
                 delete_keys=False):
        self._prefix = prefix + '_' + str(uuid4()).split('-')[0]
        self._tms = RedisTimeSeries(redis_host, redis_port, timeout,
                                    self._prefix)

        if delete_keys:
            self._tms.delete_keys()
Пример #4
0
class DatasetCache(object):
    """
    Utility to count the accesses of the datasets during the last day.
    """
    def __init__(self,
                 redis_host,
                 redis_port,
                 timeout=1,
                 prefix='did_cache',
                 delete_keys=False):
        self._prefix = prefix + '_' + str(uuid4()).split('-')[0]
        self._tms = RedisTimeSeries(redis_host, redis_port, timeout,
                                    self._prefix)

        if delete_keys:
            self._tms.delete_keys()

    def add_did(self, did):
        self._tms.add_point('_'.join(did), 1)

    def get_did(self, did):
        self._tms.trim()

        series = self._tms.get_series('_'.join(did))

        return len(series)
Пример #5
0
    class __WorkloadCollector(object):
        """
        Private class needed implement singleton.
        """
        def __init__(self, delete_keys=False):
            self._avg_jobs = {}
            self._cur_jobs = {}
            self._max_jobs = {}
            self._tms = RedisTimeSeries(
                config_get('c3po', 'redis_host'),
                config_get_int('c3po', 'redis_port'),
                config_get_int('c3po-workload', 'window'), 'jobs_')

            self._request_headers = {
                "Accept": "application/json",
                "Content-Type": "application/json"
            }
            self._request_url = config_get('c3po-workload', 'panda_url')
            if delete_keys:
                self._tms.delete_keys()
            self.reload_cache()

        def reload_cache(self):
            self._tms.trim()

            for key in self._tms.get_keys():
                site = "_".join(key.split('_')[1:])
                job_series = self._tms.get_series(site)
                num_jobs = len(job_series)
                if num_jobs > 0:
                    self._avg_jobs[site] = sum(job_series) / num_jobs
                    self._max_jobs[site] = max(job_series)
                    self._cur_jobs[site] = job_series[-1]

        def collect_workload(self):
            start = time()
            resp = get(self._request_url, headers=self._request_headers)
            logging.debug("PanDA response took %fs" % (time() - start))

            start = time()
            jobs = loads(resp.text)['jobs']
            logging.debug("decoding JSON response took %fs" % (time() - start))
            sites = {}

            start = time()
            for job in jobs:
                if job['computingsite'] not in sites:
                    sites[job['computingsite']] = 0
                sites[job['computingsite']] += 1
            for site, jobs in sites.items():
                self._tms.add_point(site, jobs)

            logging.debug("processing took %fs" % (time() - start))
            self.reload_cache()
class PlacementAlgorithm:
    """
    Placement algorithm that focusses on free space on T2 DATADISK RSEs. It incorporates network metrics for placement decisions.
    """
    def __init__(self, datatypes, dest_rse_expr, max_bytes_hour, max_files_hour, max_bytes_hour_rse, max_files_hour_rse, min_popularity, min_recent_requests, max_replicas):
        self._fsc = FreeSpaceCollector()
        self._nmc = NetworkMetricsCollector()
        self._added_cache = ExpiringDatasetCache(config_get('c3po', 'redis_host'), config_get_int('c3po', 'redis_port'), timeout=86400)
        self._dc = DatasetCache(config_get('c3po', 'redis_host'), config_get_int('c3po', 'redis_port'), timeout=86400)
        self._added_bytes = RedisTimeSeries(config_get('c3po', 'redis_host'), config_get_int('c3po', 'redis_port'), window=3600, prefix="added_bytes_")
        self._added_files = RedisTimeSeries(config_get('c3po', 'redis_host'), config_get_int('c3po', 'redis_port'), window=3600, prefix="added_files_")

        self._datatypes = datatypes.split(',')
        self._dest_rse_expr = dest_rse_expr
        self._max_bytes_hour = max_bytes_hour
        self._max_files_hour = max_files_hour
        self._max_bytes_hour_rse = max_bytes_hour_rse
        self._max_files_hour_rse = max_files_hour_rse
        self._min_popularity = min_popularity
        self._min_recent_requests = min_recent_requests
        self._max_replicas = max_replicas

        rses = parse_expression(self._dest_rse_expr)

        self._rses = {}
        self._sites = {}
        for rse in rses:
            rse_attrs = list_rse_attributes(rse['rse'])
            rse_attrs['rse'] = rse['rse']
            self._rses[rse['rse']] = rse_attrs
            self._sites[rse_attrs['site']] = rse_attrs

        self._dst_penalties = {}
        self._src_penalties = {}

        self._print_params()

    def _print_params(self):
        logging.debug('Parameter Overview:')
        logging.debug('Algorithm: t2_free_space_only_pop_with_network')
        logging.debug('Datatypes: %s' % ','.join(self._datatypes))
        logging.debug('Max bytes/files per hour: %d / %d' % (self._max_bytes_hour, self._max_files_hour))
        logging.debug('Max bytes/files per hour per RSE: %d / %d' % (self._max_bytes_hour_rse, self._max_files_hour_rse))
        logging.debug('Min recent requests / Min popularity: %d / %d' % (self._min_recent_requests, self._min_popularity))
        logging.debug('Max existing replicas: %d' % self._max_replicas)

    def __update_penalties(self):
        for rse, penalty in self._dst_penalties.items():
            if penalty < 100.0:
                self._dst_penalties[rse] += 10.0

        for rse, penalty in self._src_penalties.items():
            if penalty < 100.0:
                self._src_penalties[rse] += 10.0

    def check_did(self, did):
        decision = {'did': ':'.join(did)}
        if (self._added_cache.check_dataset(':'.join(did))):
            decision['error_reason'] = 'already added replica for this did in the last 24h'
            return decision

        if (not did[0].startswith('data')) and (not did[0].startswith('mc')):
            decision['error_reason'] = 'not a data or mc dataset'
            return decision

        datatype = did[1].split('.')[4].split('_')[0]

        if datatype not in self._datatypes:
            decision['error_reason'] = 'wrong datatype'
            return decision

        try:
            meta = get_did(did[0], did[1])
        except DataIdentifierNotFound:
            decision['error_reason'] = 'did does not exist'
            return decision
        if meta['length'] is None:
            meta['length'] = 0
        if meta['bytes'] is None:
            meta['bytes'] = 0
        logging.debug('got %s:%s, num_files: %d, bytes: %d' % (did[0], did[1], meta['length'], meta['bytes']))

        decision['length'] = meta['length']
        decision['bytes'] = meta['bytes']

        total_added_bytes = sum(self._added_bytes.get_series('total'))
        total_added_files = sum(self._added_files.get_series('total'))

        logging.debug("total_added_bytes: %d" % total_added_bytes)
        logging.debug("total_added_files: %d" % total_added_files)

        if ((total_added_bytes + meta['bytes']) > self._max_bytes_hour):
            decision['error_reason'] = 'above bytes limit of %d bytes' % self._max_bytes_hour
            return decision
        if ((total_added_files + meta['length']) > self._max_files_hour):
            decision['error_reason'] = 'above files limit of %d files' % self._max_files_hour
            return decision

        last_accesses = self._dc.get_did(did)
        self._dc.add_did(did)

        decision['last_accesses'] = last_accesses

        try:
            pop = get_popularity(did)
            decision['popularity'] = pop or 0.0
        except Exception:
            decision['error_reason'] = 'problems connecting to ES'
            return decision

        if (last_accesses < self._min_recent_requests) and (pop < self._min_popularity):
            decision['error_reason'] = 'did not popular enough'
            return decision

        return decision

    def place(self, did):
        self.__update_penalties()
        self._added_bytes.trim()
        self._added_files.trim()

        decision = self.check_did(did)

        if 'error_reason' in decision:
            return decision

        meta = get_did(did[0], did[1])
        available_reps = {}
        reps = list_dataset_replicas(did[0], did[1])
        num_reps = 0
        space_info = self._fsc.get_rse_space()
        max_mbps = 0.0
        for rep in reps:
            rse_attr = list_rse_attributes(rep['rse'])
            src_rse = rep['rse']
            if 'site' not in rse_attr:
                continue

            src_site = rse_attr['site']
            src_rse_info = get_rse(src_rse)

            if 'type' not in rse_attr:
                continue
            if rse_attr['type'] != 'DATADISK':
                continue
            if src_rse_info['availability'] & 4 == 0:
                continue

            if rep['state'] == ReplicaState.AVAILABLE:
                if rep['available_length'] == 0:
                    continue
                net_metrics = {}
                net_metrics_type = None
                for metric_type in ('fts', 'fax', 'perfsonar', 'dashb'):
                    net_metrics_type = metric_type
                    net_metrics = self._nmc.getMbps(src_site, metric_type)
                    if net_metrics:
                        break
                if len(net_metrics) == 0:
                    continue
                available_reps[src_rse] = {}
                for dst_site, mbps in net_metrics.items():
                    if src_site == dst_site:
                        continue
                    if dst_site in self._sites:
                        if mbps > max_mbps:
                            max_mbps = mbps
                        dst_rse = self._sites[dst_site]['rse']
                        dst_rse_info = get_rse(dst_rse)

                        if dst_rse_info['availability'] & 2 == 0:
                            continue

                        site_added_bytes = sum(self._added_bytes.get_series(dst_rse))
                        site_added_files = sum(self._added_files.get_series(dst_rse))

                        if ((site_added_bytes + meta['bytes']) > self._max_bytes_hour_rse):
                            continue
                        if ((site_added_files + meta['length']) > self._max_files_hour_rse):
                            continue

                        queued = self._nmc.getQueuedFiles(src_site, dst_site)

                        # logging.debug('queued %s -> %s: %d' % (src_site, dst_site, queued))
                        if queued > 0:
                            continue
                        rse_space = space_info.get(dst_rse, {'free': 0, 'total': 1})
                        if src_rse not in self._src_penalties:
                            self._src_penalties[src_rse] = 100.0
                        src_penalty = self._src_penalties[src_rse]
                        if dst_rse not in self._dst_penalties:
                            self._dst_penalties[dst_rse] = 100.0
                        dst_penalty = self._dst_penalties[dst_rse]

                        free_space = float(rse_space['free']) / float(rse_space['total']) * 100.0
                        available_reps[src_rse][dst_rse] = {'free_space': free_space, 'src_penalty': src_penalty, 'dst_penalty': dst_penalty, 'mbps': float(mbps), 'metrics_type': net_metrics_type}

                num_reps += 1

        # decision['replica_rses'] = available_reps
        decision['num_replicas'] = num_reps

        if num_reps >= 5:
            decision['error_reason'] = 'more than 4 replicas already exist'
            return decision

        src_dst_ratios = []

        if max_mbps == 0.0:
            decision['error_reason'] = 'could not find enough network metrics'
            return decision

        for src, dsts in available_reps.items():
            for dst, metrics in dsts.items():
                if dst in available_reps:
                    continue
                bdw = (metrics['mbps'] / max_mbps) * 100.0
                src_penalty = self._src_penalties[src]
                dst_penalty = self._dst_penalties[dst]

                ratio = ((metrics['free_space'] / 4.0) + bdw) * src_penalty * dst_penalty
                src_dst_ratios.append((src, dst, ratio))

        if len(src_dst_ratios) == 0:
            decision['error_reason'] = 'found no suitable src/dst for replication'
            return decision

        sorted_ratios = sorted(src_dst_ratios, key=itemgetter(2), reverse=True)
        logging.debug(sorted_ratios)
        destination_rse = sorted_ratios[0][1]
        source_rse = sorted_ratios[0][0]
        decision['destination_rse'] = destination_rse
        decision['source_rse'] = source_rse
        # decision['rse_ratios'] = src_dst_ratios
        self._dst_penalties[destination_rse] = 10.0
        self._src_penalties[source_rse] = 10.0

        self._added_cache.add_dataset(':'.join(did))

        self._added_bytes.add_point(destination_rse, meta['bytes'])
        self._added_files.add_point(destination_rse, meta['length'])

        self._added_bytes.add_point('total', meta['bytes'])
        self._added_files.add_point('total', meta['length'])

        return decision