示例#1
0
    def get_group(self, name):  #override
        if self.include is not None:
            matched = False
            for pattern in self.include:
                if fnmatch.fnmatch(name, pattern):
                    matched = True
                    break

            if not matched:
                LOG.info('get_group(%s)  %s is not included by configuration',
                         name, name)
                return None

        if self.exclude is not None:
            for pattern in self.exclude:
                if fnmatch.fnmatch(name, pattern):
                    LOG.info('get_group(%s)  %s is excluded by configuration',
                             name, name)
                    return None

        LOG.info('get_group(%s)  Fetching info on group %s', name, name)

        result = self._phedex.make_request('groups', ['group=' + name])
        if len(result) == 0:
            return None

        group = Group(name)

        if name in self.dataset_level_groups:
            group.olevel = Dataset
        else:
            group.olevel = Block

        return group
示例#2
0
    def maker_blockreplicas(block, block_entry):
        replicas = []

        for replica_entry in block_entry['replica']:
            block_replica = BlockReplica(
                block,
                Site(replica_entry['node']),
                Group(replica_entry['group']),
                is_complete=(replica_entry['bytes'] == block.size),
                is_custodial=(replica_entry['custodial'] == 'y'),
                size=replica_entry['bytes'],
                last_update=int(replica_entry['time_update']))

            replicas.append(block_replica)

        return replicas
示例#3
0
    def _load_groups(self, inventory, group_names, id_group_map):
        sql = 'SELECT g.`id`, g.`name`, g.`olevel` FROM `groups` AS g'

        if group_names is not None:
            # first dump the group ids into a temporary table, then constrain the original table
            self._mysql.query(
                'CREATE TABLE `groups_load_tmp` (`id` int(11) unsigned NOT NULL, PRIMARY KEY (`id`))'
            )
            sqlbase = 'INSERT INTO `groups_load_tmp` SELECT `id` FROM `groups`'
            self._mysql.execute_many(sqlbase, 'name', group_names)

            sql += ' INNER JOIN `groups_load_tmp` AS t ON t.`id` = g.`id`'

        for group_id, name, olname in self._mysql.xquery(sql):
            if olname == 'Dataset':
                olevel = Dataset
            else:
                olevel = Block

            group = Group(name, olevel)

            inventory.groups[name] = group
            id_group_map[group_id] = group
示例#4
0
    def get_group_list(self):  #override
        LOG.info('get_group_list  Fetching the list of groups from PhEDEx')
        LOG.debug('Groups with dataset-level ownership: %s',
                  str(self.dataset_level_groups))

        group_list = []

        for entry in self._phedex.make_request('groups'):
            if self.include is not None:
                matched = False
                for pattern in self.include:
                    if fnmatch.fnmatch(entry['name'], pattern):
                        matched = True
                        break

                if not matched:
                    continue

            if self.exclude is not None:
                matched = False
                for pattern in self.exclude:
                    if fnmatch.fnmatch(entry['name'], pattern):
                        matched = True
                        break

                if matched:
                    continue

            if entry['name'] in self.dataset_level_groups:
                olevel = Dataset
            else:
                olevel = Block

            group_list.append(Group(entry['name'], olevel=olevel))

        return group_list
    def get_replicas(self, site=None, dataset=None, block=None):  #override
        if site is None:
            site_check = self.check_allowed_site
        else:
            site_check = None
            if not self.check_allowed_site(site):
                return []

        if dataset is None and block is None:
            dataset_check = self.check_allowed_dataset
        else:
            dataset_check = None
            if dataset is not None:
                if not self.check_allowed_dataset(dataset):
                    return []
            if block is not None:
                if not self.check_allowed_dataset(block[:block.find('#')]):
                    return []

        options = []
        if site is not None:
            options.append('node=' + site)
        if dataset is not None:
            options.append('dataset=' + dataset)
        if block is not None:
            options.append('block=' + block)

        LOG.info('get_replicas(' + ','.join(options) +
                 ')  Fetching the list of replicas from PhEDEx')

        if len(options) == 0:
            return []

        block_entries = self._phedex.make_request('blockreplicas',
                                                  options,
                                                  timeout=7200)

        parallelizer = Map()
        parallelizer.timeout = 7200

        # Automatically starts a thread as we add the output of block_entries
        combine_file = parallelizer.get_starter(self._combine_file_info)

        for block_entry in block_entries:
            for replica_entry in block_entry['replica']:
                if replica_entry['complete'] == 'n':
                    break
            else:
                continue

            # there is at least one incomplete replica
            try:
                dataset_name, block_name = Block.from_full_name(
                    block_entry['name'])
            except ObjectError:  # invalid name
                continue

            if dataset_check and not dataset_check(dataset_name):
                continue

            combine_file.add_input(block_entry)

        combine_file.close()

        # _combine_file_info alters block_entries directly - no need to deal with output
        combine_file.get_outputs()

        block_replicas = PhEDExReplicaInfoSource.make_block_replicas(
            block_entries,
            PhEDExReplicaInfoSource.maker_blockreplicas,
            site_check=site_check,
            dataset_check=dataset_check)

        # Also use subscriptions call which has a lower latency than blockreplicas
        # For example, group change on a block replica at time T may not show up in blockreplicas until up to T + 15 minutes
        # while in subscriptions it is visible within a few seconds
        # But subscriptions call without a dataset or block takes too long
        if dataset is None and block is None:
            return block_replicas

        indexed = collections.defaultdict(dict)
        for replica in block_replicas:
            indexed[(replica.site.name,
                     replica.block.dataset.name)][replica.block.name] = replica

        dataset_entries = self._phedex.make_request('subscriptions',
                                                    options,
                                                    timeout=3600)

        for dataset_entry in dataset_entries:
            dataset_name = dataset_entry['name']

            if not self.check_allowed_dataset(dataset_name):
                continue

            try:
                subscriptions = dataset_entry['subscription']
            except KeyError:
                pass
            else:
                for sub_entry in subscriptions:
                    site_name = sub_entry['node']

                    if not self.check_allowed_site(site_name):
                        continue

                    replicas = indexed[(site_name, dataset_name)]

                    for replica in replicas.itervalues():
                        replica.group = Group(sub_entry['group'])
                        replica.is_custodial = (sub_entry['custodial'] == 'y')

            try:
                block_entries = dataset_entry['block']
            except KeyError:
                pass
            else:
                for block_entry in block_entries:
                    try:
                        _, block_name = Block.from_full_name(
                            block_entry['name'])
                    except ObjectError:
                        continue

                    try:
                        subscriptions = block_entry['subscription']
                    except KeyError:
                        continue

                    for sub_entry in subscriptions:
                        site_name = sub_entry['node']

                        if not self.check_allowed_site(site_name):
                            continue

                        try:
                            replica = indexed[(site_name,
                                               dataset_name)][block_name]
                        except KeyError:
                            continue

                        replica.group = Group(sub_entry['group'])

                        if sub_entry['node_bytes'] == block_entry['bytes']:
                            # complete
                            replica.size = sub_entry['node_bytes']
                            if replica.size is None:
                                replica.size = 0
                            replica.files = None
                        else:
                            # incomplete - since we cannot know what files are there, we'll just have to pretend there is none
                            replica.size = 0
                            replica.files = tuple()

                        replica.is_custodial = (sub_entry['custodial'] == 'y')

                        if sub_entry['time_update'] is not None:
                            replica.last_update = 0
                        else:
                            replica.last_update = int(sub_entry['time_update'])

        return block_replicas
    def maker_blockreplicas(block, block_entry, site_check=None):
        """Return a list of block replicas using blockreplicas data or a combination of blockreplicas and filereplicas calls."""

        sites = {}
        invalid_sites = set()
        groups = {}

        block_replicas = {}

        for replica_entry in block_entry['replica']:
            site_name = replica_entry['node']
            try:
                site = sites[site_name]
            except KeyError:
                if site_check:
                    if site_name in invalid_sites:
                        continue
                    if not site_check(site_name):
                        invalid_sites.add(site_name)
                        continue

                site = sites[site_name] = Site(site_name)

            group_name = replica_entry['group']
            try:
                group = groups[group_name]
            except KeyError:
                group = groups[group_name] = Group(group_name)

            try:
                time_update = int(replica_entry['time_update'])
            except TypeError:
                # time_update was None
                time_update = 0

            block_replica = BlockReplica(
                block,
                site,
                group,
                is_custodial=(replica_entry['custodial'] == 'y'),
                last_update=time_update)

            block_replicas[site_name] = block_replica

            if replica_entry['complete'] == 'n':
                # temporarily make this a list
                block_replica.file_ids = []
                block_replica.size = 0
                LOG.info("Incomplete %s" % str(block_replica))

        if 'file' in block_entry:
            for file_entry in block_entry['file']:
                for replica_entry in file_entry['replica']:
                    site_name = replica_entry['node']
                    try:
                        block_replica = block_replicas[site_name]
                    except KeyError:
                        continue

                    if block_replica.file_ids is None:
                        continue

                    # add LFN instead of file id
                    block_replica.file_ids.append(file_entry['name'])
                    file_size = file_entry['bytes']
                    if file_size is not None:
                        block_replica.size += file_size

                    try:
                        time_create = int(replica_entry['time_create'])
                    except TypeError:
                        pass
                    else:
                        if time_create > block_replica.last_update:
                            block_replica.last_update = time_create

        for block_replica in block_replicas.itervalues():
            if block_replica.file_ids is not None:
                block_replica.file_ids = tuple(block_replica.file_ids)

        return block_replicas.values()