def get_group(self, name): #override if self.include is not None: matched = False for pattern in self.include: if fnmatch.fnmatch(name, pattern): matched = True break if not matched: LOG.info('get_group(%s) %s is not included by configuration', name, name) return None if self.exclude is not None: for pattern in self.exclude: if fnmatch.fnmatch(name, pattern): LOG.info('get_group(%s) %s is excluded by configuration', name, name) return None LOG.info('get_group(%s) Fetching info on group %s', name, name) result = self._phedex.make_request('groups', ['group=' + name]) if len(result) == 0: return None group = Group(name) if name in self.dataset_level_groups: group.olevel = Dataset else: group.olevel = Block return group
def maker_blockreplicas(block, block_entry): replicas = [] for replica_entry in block_entry['replica']: block_replica = BlockReplica( block, Site(replica_entry['node']), Group(replica_entry['group']), is_complete=(replica_entry['bytes'] == block.size), is_custodial=(replica_entry['custodial'] == 'y'), size=replica_entry['bytes'], last_update=int(replica_entry['time_update'])) replicas.append(block_replica) return replicas
def _load_groups(self, inventory, group_names, id_group_map): sql = 'SELECT g.`id`, g.`name`, g.`olevel` FROM `groups` AS g' if group_names is not None: # first dump the group ids into a temporary table, then constrain the original table self._mysql.query( 'CREATE TABLE `groups_load_tmp` (`id` int(11) unsigned NOT NULL, PRIMARY KEY (`id`))' ) sqlbase = 'INSERT INTO `groups_load_tmp` SELECT `id` FROM `groups`' self._mysql.execute_many(sqlbase, 'name', group_names) sql += ' INNER JOIN `groups_load_tmp` AS t ON t.`id` = g.`id`' for group_id, name, olname in self._mysql.xquery(sql): if olname == 'Dataset': olevel = Dataset else: olevel = Block group = Group(name, olevel) inventory.groups[name] = group id_group_map[group_id] = group
def get_group_list(self): #override LOG.info('get_group_list Fetching the list of groups from PhEDEx') LOG.debug('Groups with dataset-level ownership: %s', str(self.dataset_level_groups)) group_list = [] for entry in self._phedex.make_request('groups'): if self.include is not None: matched = False for pattern in self.include: if fnmatch.fnmatch(entry['name'], pattern): matched = True break if not matched: continue if self.exclude is not None: matched = False for pattern in self.exclude: if fnmatch.fnmatch(entry['name'], pattern): matched = True break if matched: continue if entry['name'] in self.dataset_level_groups: olevel = Dataset else: olevel = Block group_list.append(Group(entry['name'], olevel=olevel)) return group_list
def get_replicas(self, site=None, dataset=None, block=None): #override if site is None: site_check = self.check_allowed_site else: site_check = None if not self.check_allowed_site(site): return [] if dataset is None and block is None: dataset_check = self.check_allowed_dataset else: dataset_check = None if dataset is not None: if not self.check_allowed_dataset(dataset): return [] if block is not None: if not self.check_allowed_dataset(block[:block.find('#')]): return [] options = [] if site is not None: options.append('node=' + site) if dataset is not None: options.append('dataset=' + dataset) if block is not None: options.append('block=' + block) LOG.info('get_replicas(' + ','.join(options) + ') Fetching the list of replicas from PhEDEx') if len(options) == 0: return [] block_entries = self._phedex.make_request('blockreplicas', options, timeout=7200) parallelizer = Map() parallelizer.timeout = 7200 # Automatically starts a thread as we add the output of block_entries combine_file = parallelizer.get_starter(self._combine_file_info) for block_entry in block_entries: for replica_entry in block_entry['replica']: if replica_entry['complete'] == 'n': break else: continue # there is at least one incomplete replica try: dataset_name, block_name = Block.from_full_name( block_entry['name']) except ObjectError: # invalid name continue if dataset_check and not dataset_check(dataset_name): continue combine_file.add_input(block_entry) combine_file.close() # _combine_file_info alters block_entries directly - no need to deal with output combine_file.get_outputs() block_replicas = PhEDExReplicaInfoSource.make_block_replicas( block_entries, PhEDExReplicaInfoSource.maker_blockreplicas, site_check=site_check, dataset_check=dataset_check) # Also use subscriptions call which has a lower latency than blockreplicas # For example, group change on a block replica at time T may not show up in blockreplicas until up to T + 15 minutes # while in subscriptions it is visible within a few seconds # But subscriptions call without a dataset or block takes too long if dataset is None and block is None: return block_replicas indexed = collections.defaultdict(dict) for replica in block_replicas: indexed[(replica.site.name, replica.block.dataset.name)][replica.block.name] = replica dataset_entries = self._phedex.make_request('subscriptions', options, timeout=3600) for dataset_entry in dataset_entries: dataset_name = dataset_entry['name'] if not self.check_allowed_dataset(dataset_name): continue try: subscriptions = dataset_entry['subscription'] except KeyError: pass else: for sub_entry in subscriptions: site_name = sub_entry['node'] if not self.check_allowed_site(site_name): continue replicas = indexed[(site_name, dataset_name)] for replica in replicas.itervalues(): replica.group = Group(sub_entry['group']) replica.is_custodial = (sub_entry['custodial'] == 'y') try: block_entries = dataset_entry['block'] except KeyError: pass else: for block_entry in block_entries: try: _, block_name = Block.from_full_name( block_entry['name']) except ObjectError: continue try: subscriptions = block_entry['subscription'] except KeyError: continue for sub_entry in subscriptions: site_name = sub_entry['node'] if not self.check_allowed_site(site_name): continue try: replica = indexed[(site_name, dataset_name)][block_name] except KeyError: continue replica.group = Group(sub_entry['group']) if sub_entry['node_bytes'] == block_entry['bytes']: # complete replica.size = sub_entry['node_bytes'] if replica.size is None: replica.size = 0 replica.files = None else: # incomplete - since we cannot know what files are there, we'll just have to pretend there is none replica.size = 0 replica.files = tuple() replica.is_custodial = (sub_entry['custodial'] == 'y') if sub_entry['time_update'] is not None: replica.last_update = 0 else: replica.last_update = int(sub_entry['time_update']) return block_replicas
def maker_blockreplicas(block, block_entry, site_check=None): """Return a list of block replicas using blockreplicas data or a combination of blockreplicas and filereplicas calls.""" sites = {} invalid_sites = set() groups = {} block_replicas = {} for replica_entry in block_entry['replica']: site_name = replica_entry['node'] try: site = sites[site_name] except KeyError: if site_check: if site_name in invalid_sites: continue if not site_check(site_name): invalid_sites.add(site_name) continue site = sites[site_name] = Site(site_name) group_name = replica_entry['group'] try: group = groups[group_name] except KeyError: group = groups[group_name] = Group(group_name) try: time_update = int(replica_entry['time_update']) except TypeError: # time_update was None time_update = 0 block_replica = BlockReplica( block, site, group, is_custodial=(replica_entry['custodial'] == 'y'), last_update=time_update) block_replicas[site_name] = block_replica if replica_entry['complete'] == 'n': # temporarily make this a list block_replica.file_ids = [] block_replica.size = 0 LOG.info("Incomplete %s" % str(block_replica)) if 'file' in block_entry: for file_entry in block_entry['file']: for replica_entry in file_entry['replica']: site_name = replica_entry['node'] try: block_replica = block_replicas[site_name] except KeyError: continue if block_replica.file_ids is None: continue # add LFN instead of file id block_replica.file_ids.append(file_entry['name']) file_size = file_entry['bytes'] if file_size is not None: block_replica.size += file_size try: time_create = int(replica_entry['time_create']) except TypeError: pass else: if time_create > block_replica.last_update: block_replica.last_update = time_create for block_replica in block_replicas.itervalues(): if block_replica.file_ids is not None: block_replica.file_ids = tuple(block_replica.file_ids) return block_replicas.values()