def list_dataset_replicas(scope, name, deep=False): """ :param scope: The scope of the dataset. :param name: The name of the dataset. :param deep: Lookup at the file level. :returns: A list of dict dataset replicas """ return replica.list_dataset_replicas(scope=scope, name=name, deep=deep)
def list_dataset_replicas(scope, name, deep=False): """ :param scope: The scope of the dataset. :param name: The name of the dataset. :param deep: Lookup at the file level. :returns: A list of dict dataset replicas """ scope = InternalScope(scope) replicas = replica.list_dataset_replicas(scope=scope, name=name, deep=deep) for r in replicas: r['scope'] = r['scope'].external yield r
def list_dataset_replicas(scope, name, deep=False, vo='def'): """ :param scope: The scope of the dataset. :param name: The name of the dataset. :param deep: Lookup at the file level. :param vo: The VO to act on. :returns: A list of dict dataset replicas """ scope = InternalScope(scope, vo=vo) replicas = replica.list_dataset_replicas(scope=scope, name=name, deep=deep) for r in replicas: yield api_update_return_dict(r)
def list_dataset_replicas(scope, name, deep=False, vo='def', session=None): """ :param scope: The scope of the dataset. :param name: The name of the dataset. :param deep: Lookup at the file level. :param vo: The VO to act on. :param session: The database session in use. :returns: A list of dict dataset replicas """ scope = InternalScope(scope, vo=vo) replicas = replica.list_dataset_replicas(scope=scope, name=name, deep=deep, session=session) for r in replicas: r['scope'] = r['scope'].external yield r
def place(self, did): self.__update_penalties() decision = {'did': ':'.join(did)} if (not did[0].startswith('data')) and (not did[0].startswith('mc')): decision['error_reason'] = 'not a data or mc dataset' return decision try: meta = get_did(did[0], did[1]) except DataIdentifierNotFound: decision['error_reason'] = 'did does not exist' return decision if meta['length'] is None: meta['length'] = 0 if meta['bytes'] is None: meta['bytes'] = 0 logging.debug('got %s:%s, num_files: %d, bytes: %d' % (did[0], did[1], meta['length'], meta['bytes'])) decision['length'] = meta['length'] decision['bytes'] = meta['bytes'] last_accesses = self._dc.get_did(did) self._dc.add_did(did) decision['last_accesses'] = last_accesses pop = get_popularity(did) decision['popularity'] = pop or 0.0 if (last_accesses < 5) and (pop < 10.0): decision['error_reason'] = 'did not popular enough' return decision free_rses = self._rses available_reps = [] reps = list_dataset_replicas(did[0], did[1]) num_reps = 0 for rep in reps: rse_attr = list_rse_attributes(rep['rse']) if 'type' not in rse_attr: continue if rse_attr['type'] != 'DATADISK': continue if rep['state'] == ReplicaState.AVAILABLE: if rep['rse'] in free_rses: free_rses.remove(rep['rse']) available_reps.append(rep['rse']) num_reps += 1 decision['replica_rses'] = available_reps decision['num_replicas'] = num_reps if num_reps >= 5: decision['error_reason'] = 'more than 4 replicas already exist' return decision rse_ratios = {} space_info = self._fsc.get_rse_space() for rse in free_rses: rse_space = space_info[rse] penalty = self._penalties[rse] rse_ratios[rse] = float(rse_space['free']) / float( rse_space['total']) * 100.0 / penalty sorted_rses = sorted(rse_ratios.items(), key=itemgetter(1), reverse=True) decision['destination_rse'] = sorted_rses[0][0] decision['rse_ratios'] = sorted_rses self._penalties[sorted_rses[0][0]] = 10.0 return decision
def place(self, did): self.__update_penalties() decision = {'did': ':'.join(did)} try: meta = get_did(did[0], did[1]) except DataIdentifierNotFound: decision['error_reason'] = 'did does not exist' return decision if meta['length'] is None: meta['length'] = 0 if meta['bytes'] is None: meta['bytes'] = 0 logging.debug('got %s:%s, num_files: %d, bytes: %d' % (did[0], did[1], meta['length'], meta['bytes'])) decision['length'] = meta['length'] decision['bytes'] = meta['bytes'] available_rses = [] available_sites = [] reps = list_dataset_replicas(did[0], did[1]) num_reps = 0 for rep in reps: if rep['state'] == ReplicaState.AVAILABLE: available_rses.append(rep['rse']) available_sites.append(self._mc.ddm_to_site(rep['rse'])) num_reps += 1 decision['replica_rses'] = available_rses decision['num_replicas'] = num_reps if num_reps >= 5: decision['error_reason'] = 'more than 4 replicas already exist' return decision site_ratios = {} site_job_info = {} for panda_site in self._wc.get_sites(): site = self._mc.panda_to_site(panda_site) job_info = self._wc.get_job_info(panda_site) ratio = float( job_info[0]) / (float(job_info[1]) + float(job_info[2]) / 2) penalty = self._penalties[site] site_ratios[site] = ratio * penalty site_job_info[site] = (job_info, penalty) decision['site_ratios'] = site_ratios decision['site_job_info'] = site_job_info picked_site = None picked_rse = None for site, _ in sorted(site_ratios.items(), key=itemgetter(1)): if site in available_sites: continue rses_for_site = self._mc.site_to_ddm(site) if rses_for_site is None: continue for rse in rses_for_site: if 'DATADISK' in rse: picked_rse = rse picked_site = site break if picked_rse: break if picked_rse is None: decision['error_reason'] = 'could not pick RSE' return decision decision['destination_rse'] = picked_rse if picked_site: self._penalties[site] = 1 picked_source = None shuffle(available_rses) for rse in available_rses: if 'TAPE' in rse: continue picked_source = rse break if picked_source is None: picked_source = available_rses[0] decision['source_rse'] = picked_source logging.debug("Picked %s as source and %s as destination RSE" % (picked_source, picked_rse)) return decision
def place(self, did): self.__update_penalties() self._added_bytes.trim() self._added_files.trim() decision = self.check_did(did) if 'error_reason' in decision: return decision meta = get_did(did[0], did[1]) available_reps = {} reps = list_dataset_replicas(did[0], did[1]) num_reps = 0 space_info = self._fsc.get_rse_space() max_mbps = 0.0 for rep in reps: rse_attr = list_rse_attributes(rep['rse']) src_rse = rep['rse'] if 'site' not in rse_attr: continue src_site = rse_attr['site'] src_rse_info = get_rse(src_rse) if 'type' not in rse_attr: continue if rse_attr['type'] != 'DATADISK': continue if src_rse_info['availability'] & 4 == 0: continue if rep['state'] == ReplicaState.AVAILABLE: if rep['available_length'] == 0: continue net_metrics = {} net_metrics_type = None for metric_type in ('fts', 'fax', 'perfsonar', 'dashb'): net_metrics_type = metric_type net_metrics = self._nmc.getMbps(src_site, metric_type) if net_metrics: break if len(net_metrics) == 0: continue available_reps[src_rse] = {} for dst_site, mbps in net_metrics.items(): if src_site == dst_site: continue if dst_site in self._sites: if mbps > max_mbps: max_mbps = mbps dst_rse = self._sites[dst_site]['rse'] dst_rse_info = get_rse(dst_rse) if dst_rse_info['availability'] & 2 == 0: continue site_added_bytes = sum(self._added_bytes.get_series(dst_rse)) site_added_files = sum(self._added_files.get_series(dst_rse)) if ((site_added_bytes + meta['bytes']) > self._max_bytes_hour_rse): continue if ((site_added_files + meta['length']) > self._max_files_hour_rse): continue queued = self._nmc.getQueuedFiles(src_site, dst_site) # logging.debug('queued %s -> %s: %d' % (src_site, dst_site, queued)) if queued > 0: continue rse_space = space_info.get(dst_rse, {'free': 0, 'total': 1}) if src_rse not in self._src_penalties: self._src_penalties[src_rse] = 100.0 src_penalty = self._src_penalties[src_rse] if dst_rse not in self._dst_penalties: self._dst_penalties[dst_rse] = 100.0 dst_penalty = self._dst_penalties[dst_rse] free_space = float(rse_space['free']) / float(rse_space['total']) * 100.0 available_reps[src_rse][dst_rse] = {'free_space': free_space, 'src_penalty': src_penalty, 'dst_penalty': dst_penalty, 'mbps': float(mbps), 'metrics_type': net_metrics_type} num_reps += 1 # decision['replica_rses'] = available_reps decision['num_replicas'] = num_reps if num_reps >= 5: decision['error_reason'] = 'more than 4 replicas already exist' return decision src_dst_ratios = [] if max_mbps == 0.0: decision['error_reason'] = 'could not find enough network metrics' return decision for src, dsts in available_reps.items(): for dst, metrics in dsts.items(): if dst in available_reps: continue bdw = (metrics['mbps'] / max_mbps) * 100.0 src_penalty = self._src_penalties[src] dst_penalty = self._dst_penalties[dst] ratio = ((metrics['free_space'] / 4.0) + bdw) * src_penalty * dst_penalty src_dst_ratios.append((src, dst, ratio)) if len(src_dst_ratios) == 0: decision['error_reason'] = 'found no suitable src/dst for replication' return decision sorted_ratios = sorted(src_dst_ratios, key=itemgetter(2), reverse=True) logging.debug(sorted_ratios) destination_rse = sorted_ratios[0][1] source_rse = sorted_ratios[0][0] decision['destination_rse'] = destination_rse decision['source_rse'] = source_rse # decision['rse_ratios'] = src_dst_ratios self._dst_penalties[destination_rse] = 10.0 self._src_penalties[source_rse] = 10.0 self._added_cache.add_dataset(':'.join(did)) self._added_bytes.add_point(destination_rse, meta['bytes']) self._added_files.add_point(destination_rse, meta['length']) self._added_bytes.add_point('total', meta['bytes']) self._added_files.add_point('total', meta['length']) return decision