Exemplo n.º 1
0
def list_dataset_replicas(scope, name, deep=False):
    """
    :param scope: The scope of the dataset.
    :param name: The name of the dataset.
    :param deep: Lookup at the file level.

    :returns: A list of dict dataset replicas
    """
    return replica.list_dataset_replicas(scope=scope, name=name, deep=deep)
Exemplo n.º 2
0
def list_dataset_replicas(scope, name, deep=False):
    """
    :param scope: The scope of the dataset.
    :param name: The name of the dataset.
    :param deep: Lookup at the file level.

    :returns: A list of dict dataset replicas
    """

    scope = InternalScope(scope)

    replicas = replica.list_dataset_replicas(scope=scope, name=name, deep=deep)

    for r in replicas:
        r['scope'] = r['scope'].external
        yield r
Exemplo n.º 3
0
def list_dataset_replicas(scope, name, deep=False, vo='def'):
    """
    :param scope: The scope of the dataset.
    :param name: The name of the dataset.
    :param deep: Lookup at the file level.
    :param vo: The VO to act on.

    :returns: A list of dict dataset replicas
    """

    scope = InternalScope(scope, vo=vo)

    replicas = replica.list_dataset_replicas(scope=scope, name=name, deep=deep)

    for r in replicas:
        yield api_update_return_dict(r)
Exemplo n.º 4
0
def list_dataset_replicas(scope, name, deep=False, vo='def', session=None):
    """
    :param scope: The scope of the dataset.
    :param name: The name of the dataset.
    :param deep: Lookup at the file level.
    :param vo: The VO to act on.
    :param session: The database session in use.

    :returns: A list of dict dataset replicas
    """

    scope = InternalScope(scope, vo=vo)

    replicas = replica.list_dataset_replicas(scope=scope,
                                             name=name,
                                             deep=deep,
                                             session=session)

    for r in replicas:
        r['scope'] = r['scope'].external
        yield r
Exemplo n.º 5
0
    def place(self, did):
        self.__update_penalties()
        decision = {'did': ':'.join(did)}
        if (not did[0].startswith('data')) and (not did[0].startswith('mc')):
            decision['error_reason'] = 'not a data or mc dataset'
            return decision

        try:
            meta = get_did(did[0], did[1])
        except DataIdentifierNotFound:
            decision['error_reason'] = 'did does not exist'
            return decision
        if meta['length'] is None:
            meta['length'] = 0
        if meta['bytes'] is None:
            meta['bytes'] = 0
        logging.debug('got %s:%s, num_files: %d, bytes: %d' %
                      (did[0], did[1], meta['length'], meta['bytes']))

        decision['length'] = meta['length']
        decision['bytes'] = meta['bytes']

        last_accesses = self._dc.get_did(did)
        self._dc.add_did(did)

        decision['last_accesses'] = last_accesses

        pop = get_popularity(did)
        decision['popularity'] = pop or 0.0

        if (last_accesses < 5) and (pop < 10.0):
            decision['error_reason'] = 'did not popular enough'
            return decision

        free_rses = self._rses
        available_reps = []
        reps = list_dataset_replicas(did[0], did[1])
        num_reps = 0
        for rep in reps:
            rse_attr = list_rse_attributes(rep['rse'])
            if 'type' not in rse_attr:
                continue
            if rse_attr['type'] != 'DATADISK':
                continue
            if rep['state'] == ReplicaState.AVAILABLE:
                if rep['rse'] in free_rses:
                    free_rses.remove(rep['rse'])
                available_reps.append(rep['rse'])
                num_reps += 1

        decision['replica_rses'] = available_reps
        decision['num_replicas'] = num_reps
        if num_reps >= 5:
            decision['error_reason'] = 'more than 4 replicas already exist'
            return decision

        rse_ratios = {}
        space_info = self._fsc.get_rse_space()
        for rse in free_rses:
            rse_space = space_info[rse]
            penalty = self._penalties[rse]
            rse_ratios[rse] = float(rse_space['free']) / float(
                rse_space['total']) * 100.0 / penalty

        sorted_rses = sorted(rse_ratios.items(),
                             key=itemgetter(1),
                             reverse=True)
        decision['destination_rse'] = sorted_rses[0][0]
        decision['rse_ratios'] = sorted_rses
        self._penalties[sorted_rses[0][0]] = 10.0

        return decision
Exemplo n.º 6
0
    def place(self, did):
        self.__update_penalties()
        decision = {'did': ':'.join(did)}
        try:
            meta = get_did(did[0], did[1])
        except DataIdentifierNotFound:
            decision['error_reason'] = 'did does not exist'
            return decision
        if meta['length'] is None:
            meta['length'] = 0
        if meta['bytes'] is None:
            meta['bytes'] = 0
        logging.debug('got %s:%s, num_files: %d, bytes: %d' %
                      (did[0], did[1], meta['length'], meta['bytes']))

        decision['length'] = meta['length']
        decision['bytes'] = meta['bytes']

        available_rses = []
        available_sites = []
        reps = list_dataset_replicas(did[0], did[1])

        num_reps = 0
        for rep in reps:
            if rep['state'] == ReplicaState.AVAILABLE:
                available_rses.append(rep['rse'])
                available_sites.append(self._mc.ddm_to_site(rep['rse']))
                num_reps += 1

        decision['replica_rses'] = available_rses
        decision['num_replicas'] = num_reps
        if num_reps >= 5:
            decision['error_reason'] = 'more than 4 replicas already exist'
            return decision

        site_ratios = {}
        site_job_info = {}
        for panda_site in self._wc.get_sites():
            site = self._mc.panda_to_site(panda_site)
            job_info = self._wc.get_job_info(panda_site)
            ratio = float(
                job_info[0]) / (float(job_info[1]) + float(job_info[2]) / 2)
            penalty = self._penalties[site]
            site_ratios[site] = ratio * penalty
            site_job_info[site] = (job_info, penalty)

        decision['site_ratios'] = site_ratios
        decision['site_job_info'] = site_job_info
        picked_site = None
        picked_rse = None

        for site, _ in sorted(site_ratios.items(), key=itemgetter(1)):
            if site in available_sites:
                continue
            rses_for_site = self._mc.site_to_ddm(site)
            if rses_for_site is None:
                continue

            for rse in rses_for_site:
                if 'DATADISK' in rse:
                    picked_rse = rse
                    picked_site = site
                    break
            if picked_rse:
                break

        if picked_rse is None:
            decision['error_reason'] = 'could not pick RSE'
            return decision

        decision['destination_rse'] = picked_rse
        if picked_site:
            self._penalties[site] = 1

        picked_source = None
        shuffle(available_rses)
        for rse in available_rses:
            if 'TAPE' in rse:
                continue
            picked_source = rse
            break

        if picked_source is None:
            picked_source = available_rses[0]

        decision['source_rse'] = picked_source
        logging.debug("Picked %s as source and %s as destination RSE" %
                      (picked_source, picked_rse))

        return decision
    def place(self, did):
        self.__update_penalties()
        self._added_bytes.trim()
        self._added_files.trim()

        decision = self.check_did(did)

        if 'error_reason' in decision:
            return decision

        meta = get_did(did[0], did[1])
        available_reps = {}
        reps = list_dataset_replicas(did[0], did[1])
        num_reps = 0
        space_info = self._fsc.get_rse_space()
        max_mbps = 0.0
        for rep in reps:
            rse_attr = list_rse_attributes(rep['rse'])
            src_rse = rep['rse']
            if 'site' not in rse_attr:
                continue

            src_site = rse_attr['site']
            src_rse_info = get_rse(src_rse)

            if 'type' not in rse_attr:
                continue
            if rse_attr['type'] != 'DATADISK':
                continue
            if src_rse_info['availability'] & 4 == 0:
                continue

            if rep['state'] == ReplicaState.AVAILABLE:
                if rep['available_length'] == 0:
                    continue
                net_metrics = {}
                net_metrics_type = None
                for metric_type in ('fts', 'fax', 'perfsonar', 'dashb'):
                    net_metrics_type = metric_type
                    net_metrics = self._nmc.getMbps(src_site, metric_type)
                    if net_metrics:
                        break
                if len(net_metrics) == 0:
                    continue
                available_reps[src_rse] = {}
                for dst_site, mbps in net_metrics.items():
                    if src_site == dst_site:
                        continue
                    if dst_site in self._sites:
                        if mbps > max_mbps:
                            max_mbps = mbps
                        dst_rse = self._sites[dst_site]['rse']
                        dst_rse_info = get_rse(dst_rse)

                        if dst_rse_info['availability'] & 2 == 0:
                            continue

                        site_added_bytes = sum(self._added_bytes.get_series(dst_rse))
                        site_added_files = sum(self._added_files.get_series(dst_rse))

                        if ((site_added_bytes + meta['bytes']) > self._max_bytes_hour_rse):
                            continue
                        if ((site_added_files + meta['length']) > self._max_files_hour_rse):
                            continue

                        queued = self._nmc.getQueuedFiles(src_site, dst_site)

                        # logging.debug('queued %s -> %s: %d' % (src_site, dst_site, queued))
                        if queued > 0:
                            continue
                        rse_space = space_info.get(dst_rse, {'free': 0, 'total': 1})
                        if src_rse not in self._src_penalties:
                            self._src_penalties[src_rse] = 100.0
                        src_penalty = self._src_penalties[src_rse]
                        if dst_rse not in self._dst_penalties:
                            self._dst_penalties[dst_rse] = 100.0
                        dst_penalty = self._dst_penalties[dst_rse]

                        free_space = float(rse_space['free']) / float(rse_space['total']) * 100.0
                        available_reps[src_rse][dst_rse] = {'free_space': free_space, 'src_penalty': src_penalty, 'dst_penalty': dst_penalty, 'mbps': float(mbps), 'metrics_type': net_metrics_type}

                num_reps += 1

        # decision['replica_rses'] = available_reps
        decision['num_replicas'] = num_reps

        if num_reps >= 5:
            decision['error_reason'] = 'more than 4 replicas already exist'
            return decision

        src_dst_ratios = []

        if max_mbps == 0.0:
            decision['error_reason'] = 'could not find enough network metrics'
            return decision

        for src, dsts in available_reps.items():
            for dst, metrics in dsts.items():
                if dst in available_reps:
                    continue
                bdw = (metrics['mbps'] / max_mbps) * 100.0
                src_penalty = self._src_penalties[src]
                dst_penalty = self._dst_penalties[dst]

                ratio = ((metrics['free_space'] / 4.0) + bdw) * src_penalty * dst_penalty
                src_dst_ratios.append((src, dst, ratio))

        if len(src_dst_ratios) == 0:
            decision['error_reason'] = 'found no suitable src/dst for replication'
            return decision

        sorted_ratios = sorted(src_dst_ratios, key=itemgetter(2), reverse=True)
        logging.debug(sorted_ratios)
        destination_rse = sorted_ratios[0][1]
        source_rse = sorted_ratios[0][0]
        decision['destination_rse'] = destination_rse
        decision['source_rse'] = source_rse
        # decision['rse_ratios'] = src_dst_ratios
        self._dst_penalties[destination_rse] = 10.0
        self._src_penalties[source_rse] = 10.0

        self._added_cache.add_dataset(':'.join(did))

        self._added_bytes.add_point(destination_rse, meta['bytes'])
        self._added_files.add_point(destination_rse, meta['length'])

        self._added_bytes.add_point('total', meta['bytes'])
        self._added_files.add_point('total', meta['length'])

        return decision