def check_did(self, did):
        decision = {'did': ':'.join(did)}
        if (self._added_cache.check_dataset(':'.join(did))):
            decision['error_reason'] = 'already added replica for this did in the last 24h'
            return decision

        if (not did[0].startswith('data')) and (not did[0].startswith('mc')):
            decision['error_reason'] = 'not a data or mc dataset'
            return decision

        datatype = did[1].split('.')[4].split('_')[0]

        if datatype not in self._datatypes:
            decision['error_reason'] = 'wrong datatype'
            return decision

        try:
            meta = get_did(did[0], did[1])
        except DataIdentifierNotFound:
            decision['error_reason'] = 'did does not exist'
            return decision
        if meta['length'] is None:
            meta['length'] = 0
        if meta['bytes'] is None:
            meta['bytes'] = 0
        logging.debug('got %s:%s, num_files: %d, bytes: %d' % (did[0], did[1], meta['length'], meta['bytes']))

        decision['length'] = meta['length']
        decision['bytes'] = meta['bytes']

        total_added_bytes = sum(self._added_bytes.get_series('total'))
        total_added_files = sum(self._added_files.get_series('total'))

        logging.debug("total_added_bytes: %d" % total_added_bytes)
        logging.debug("total_added_files: %d" % total_added_files)

        if ((total_added_bytes + meta['bytes']) > self._max_bytes_hour):
            decision['error_reason'] = 'above bytes limit of %d bytes' % self._max_bytes_hour
            return decision
        if ((total_added_files + meta['length']) > self._max_files_hour):
            decision['error_reason'] = 'above files limit of %d files' % self._max_files_hour
            return decision

        last_accesses = self._dc.get_did(did)
        self._dc.add_did(did)

        decision['last_accesses'] = last_accesses

        try:
            pop = get_popularity(did)
            decision['popularity'] = pop or 0.0
        except Exception:
            decision['error_reason'] = 'problems connecting to ES'
            return decision

        if (last_accesses < self._min_recent_requests) and (pop < self._min_popularity):
            decision['error_reason'] = 'did not popular enough'
            return decision

        return decision
Пример #2
0
    def place(self, did):
        self.__update_penalties()
        decision = {'did': ':'.join(did)}
        if (not did[0].startswith('data')) and (not did[0].startswith('mc')):
            decision['error_reason'] = 'not a data or mc dataset'
            return decision

        try:
            meta = get_did(did[0], did[1])
        except DataIdentifierNotFound:
            decision['error_reason'] = 'did does not exist'
            return decision
        if meta['length'] is None:
            meta['length'] = 0
        if meta['bytes'] is None:
            meta['bytes'] = 0
        logging.debug('got %s:%s, num_files: %d, bytes: %d' %
                      (did[0], did[1], meta['length'], meta['bytes']))

        decision['length'] = meta['length']
        decision['bytes'] = meta['bytes']

        last_accesses = self._dc.get_did(did)
        self._dc.add_did(did)

        decision['last_accesses'] = last_accesses

        pop = get_popularity(did)
        decision['popularity'] = pop or 0.0

        if (last_accesses < 5) and (pop < 10.0):
            decision['error_reason'] = 'did not popular enough'
            return decision

        free_rses = self._rses
        available_reps = []
        reps = list_dataset_replicas(did[0], did[1])
        num_reps = 0
        for rep in reps:
            rse_attr = list_rse_attributes(rep['rse'])
            if 'type' not in rse_attr:
                continue
            if rse_attr['type'] != 'DATADISK':
                continue
            if rep['state'] == ReplicaState.AVAILABLE:
                if rep['rse'] in free_rses:
                    free_rses.remove(rep['rse'])
                available_reps.append(rep['rse'])
                num_reps += 1

        decision['replica_rses'] = available_reps
        decision['num_replicas'] = num_reps
        if num_reps >= 5:
            decision['error_reason'] = 'more than 4 replicas already exist'
            return decision

        rse_ratios = {}
        space_info = self._fsc.get_rse_space()
        for rse in free_rses:
            rse_space = space_info[rse]
            penalty = self._penalties[rse]
            rse_ratios[rse] = float(rse_space['free']) / float(
                rse_space['total']) * 100.0 / penalty

        sorted_rses = sorted(rse_ratios.items(),
                             key=itemgetter(1),
                             reverse=True)
        decision['destination_rse'] = sorted_rses[0][0]
        decision['rse_ratios'] = sorted_rses
        self._penalties[sorted_rses[0][0]] = 10.0

        return decision