Пример #1
0
def set_metadata(scope, name, key, value, recursive=False, session=None):
    """
    Sets metadata for a given did.

    :param scope: The scope of the did.
    :param name: The data identifier name.
    :param key: Metadata key.
    :param value: Metadata value.
    :param recursive: (optional) Propagate the metadata change recursively to content.
    :param session: (optional) The database session in use.
    :raises: InvalidMetadata
    """
    # Check for forbidden characters in key.
    for char in RESTRICTED_CHARACTERS:
        if char in key:
            raise exception.InvalidMetadata('Restricted character "{}" found in metadata key. Reason: {}'.format(
                char,
                RESTRICTED_CHARACTERS[char]
            ))

    # Sequentially check if each metadata plugin manages this key. Note that the order of [METADATA_PLUGIN_MODULES]
    # means that the key is always checked for existence in the base list first.
    metadata_was_set = False
    for metadata_plugin in METADATA_PLUGIN_MODULES:
        if metadata_plugin.manages_key(key, session=session):
            metadata_plugin.set_metadata(scope, name, key, value, recursive, session=session)
            metadata_was_set = True
            break

    if not metadata_was_set:
        raise exception.InvalidMetadata('No plugin manages metadata key %s for DID %s:%s' % (key, scope, name))
Пример #2
0
def set_metadata_bulk(scope, name, meta, recursive=False, session=None):
    """
    Bulk sets metadata for a given did.

    :param scope: The scope name.
    :param name: The data identifier name.
    :param meta: The key-value mapping of metadata to set.
    :param recursive: (optional) Propagate the metadata change recursively to content.
    :param session: (optional) The database session in use.
    :raises: InvalidMetadata
    """
    metadata = meta

    unmanaged_keys = list()
    if not isinstance(metadata, dict):
        metadata = dict(metadata)
    metadata_plugin_keys = {
        metadata_plugin: []
        for metadata_plugin in METADATA_PLUGIN_MODULES
    }

    # Iterate through all keys, sequentially checking if each metadata plugin manages the considered key. If it
    # does, add it to the list in the plugin's entry in {metadata_plugin_keys}. Note that the order of
    # [METADATA_PLUGIN_MODULES] means that the key is always checked for existence in the base list first.
    for key in metadata.keys():
        # Check for forbidden characters in key.
        for char in RESTRICTED_CHARACTERS:
            if char in key:
                raise exception.InvalidMetadata(
                    'Restricted character "{}" found in metadata key. Reason: {}'
                    .format(char, RESTRICTED_CHARACTERS[char]))
        metadata_is_included = False
        for metadata_plugin in METADATA_PLUGIN_MODULES:
            if metadata_plugin.manages_key(key, session=session):
                metadata_plugin_keys[metadata_plugin].append(key)
                metadata_is_included = True
                break
        if not metadata_is_included:
            unmanaged_keys.append(key)
    if unmanaged_keys:
        raise exception.InvalidMetadata(
            'No plugin manages metadata keys %s on DID %s:%s' %
            (unmanaged_keys, scope, name))

    # For each plugin, set the metadata.
    for metadata_plugin, keys_managed_by_this_plugin in metadata_plugin_keys.items(
    ):
        if keys_managed_by_this_plugin:
            this_plugin_metadata = {
                key: metadata[key]
                for key in keys_managed_by_this_plugin
            }
            metadata_plugin.set_metadata_bulk(scope,
                                              name,
                                              metadata=this_plugin_metadata,
                                              recursive=recursive,
                                              session=session)
Пример #3
0
def list_dids(scope=None, filters=None, did_type='collection', ignore_case=False, limit=None,
              offset=None, long=False, recursive=False, ignore_dids=None, session=None):
    """
    Search data identifiers.

    All filter keys should belong to a single plugin. Queries across plugins are not currently supported.

    :param scope: the scope name.
    :param filters: dictionary of attributes by which the results should be filtered.
    :param did_type: the type of the did: all(container, dataset, file), collection(dataset or container), dataset, container, file.
    :param ignore_case: ignore case distinctions.
    :param limit: limit number.
    :param offset: offset number.
    :param long: Long format option to display more information for each DID.
    :param recursive: Recursively list DIDs content.
    :param ignore_dids: List of DIDs to refrain from yielding.
    :param session: The database session in use.
    :returns: List of dids satisfying metadata criteria.
    :raises: InvalidMetadata
    """
    # backwards compatability for filters as single {}.
    if isinstance(filters, dict):
        filters = [filters]

    required_unique_plugins = set()                 # keep track of which plugins are required
    for or_group in filters:
        for key in or_group.keys():
            if key == 'name':                       # [name] is always passed through, and needs to be in schema of all plugins
                continue
            key_nooperator = key.split('.')[0]      # remove operator attribute from key if suffixed

            # Iterate through the list of metadata plugins, checking which (if any) manages this particular key
            # and appending the corresponding plugin to the set, required_unique_plugins.
            is_this_key_managed = False
            for metadata_plugin in METADATA_PLUGIN_MODULES:
                if metadata_plugin.manages_key(key_nooperator, session=session):
                    required_unique_plugins.add(metadata_plugin)
                    is_this_key_managed = True
                    break
            if not is_this_key_managed:
                raise exception.InvalidMetadata('There is no metadata plugin that manages the filter key(s) you requested.')

    if not required_unique_plugins:               # if no metadata keys were specified, fall back to using the base plugin
        required_unique_plugins = [METADATA_PLUGIN_MODULES[0]]
    elif len(required_unique_plugins) > 1:        # check that only a single plugin is required for the query, otherwise not supported
        raise exception.InvalidMetadata('Filter keys used do not all belong to the same metadata plugin.')
    selected_plugin_to_use = list(required_unique_plugins)[0]

    return selected_plugin_to_use.list_dids(scope=scope, filters=filters, did_type=did_type,
                                            ignore_case=ignore_case, limit=limit,
                                            offset=offset, long=long, recursive=recursive,
                                            ignore_dids=ignore_dids, session=session)
Пример #4
0
    def set_metadata_bulk(self, scope, name, meta, recursive=False, session=None):
        did_query = session.query(models.DataIdentifier).with_hint(models.DataIdentifier, "INDEX(DIDS DIDS_PK)", 'oracle').filter_by(scope=scope, name=name)
        if did_query.one_or_none() is None:
            raise exception.DataIdentifierNotFound("Data identifier '%s:%s' not found" % (scope, name))

        remainder = {}
        for key, value in meta.items():
            if key == 'lifetime':
                try:
                    expired_at = None
                    if value is not None:
                        expired_at = datetime.utcnow() + timedelta(seconds=float(value))
                    rowcount = did_query.update({'expired_at': expired_at}, synchronize_session='fetch')
                except TypeError as error:
                    raise exception.InvalidValueForKey(error)
                if not rowcount:
                    # check for did presence
                    raise exception.UnsupportedOperation('%s for %s:%s cannot be updated' % (key, scope, name))
            elif key in ['guid', 'events']:
                rowcount = did_query.filter_by(did_type=DIDType.FILE).update({key: value}, synchronize_session=False)
                if not rowcount:
                    # check for did presence
                    raise exception.UnsupportedOperation('%s for %s:%s cannot be updated' % (key, scope, name))

                session.query(models.DataIdentifierAssociation).filter_by(child_scope=scope, child_name=name, child_type=DIDType.FILE).update({key: value}, synchronize_session=False)
                if key == 'events':
                    for parent_scope, parent_name in session.query(models.DataIdentifierAssociation.scope, models.DataIdentifierAssociation.name).filter_by(child_scope=scope, child_name=name):
                        events = session.query(func.sum(models.DataIdentifierAssociation.events)).filter_by(scope=parent_scope, name=parent_name).one()[0]
                        session.query(models.DataIdentifier).filter_by(scope=parent_scope, name=parent_name).update({'events': events}, synchronize_session=False)

            elif key == 'adler32':
                rowcount = did_query.filter_by(did_type=DIDType.FILE).update({key: value}, synchronize_session=False)
                if not rowcount:
                    # check for did presence
                    raise exception.UnsupportedOperation('%s for %s:%s cannot be updated' % (key, scope, name))

                session.query(models.DataIdentifierAssociation).filter_by(child_scope=scope, child_name=name, child_type=DIDType.FILE).update({key: value}, synchronize_session=False)
                session.query(models.Request).filter_by(scope=scope, name=name).update({key: value}, synchronize_session=False)
                session.query(models.RSEFileAssociation).filter_by(scope=scope, name=name).update({key: value}, synchronize_session=False)
            elif key == 'bytes':
                rowcount = did_query.filter_by(did_type=DIDType.FILE).update({key: value}, synchronize_session=False)
                if not rowcount:
                    # check for did presence
                    raise exception.UnsupportedOperation('%s for %s:%s cannot be updated' % (key, scope, name))

                session.query(models.DataIdentifierAssociation).filter_by(child_scope=scope, child_name=name, child_type=DIDType.FILE).update({key: value}, synchronize_session=False)
                session.query(models.Request).filter_by(scope=scope, name=name).update({key: value}, synchronize_session=False)

                for account, bytes, rse_id, rule_id in session.query(models.ReplicaLock.account, models.ReplicaLock.bytes, models.ReplicaLock.rse_id, models.ReplicaLock.rule_id).filter_by(scope=scope, name=name):
                    session.query(models.ReplicaLock).filter_by(scope=scope, name=name, rule_id=rule_id, rse_id=rse_id).update({key: value}, synchronize_session=False)
                    account_counter.decrease(rse_id=rse_id, account=account, files=1, bytes=bytes, session=session)
                    account_counter.increase(rse_id=rse_id, account=account, files=1, bytes=value, session=session)

                for bytes, rse_id in session.query(models.RSEFileAssociation.bytes, models.RSEFileAssociation.rse_id).filter_by(scope=scope, name=name):
                    session.query(models.RSEFileAssociation).filter_by(scope=scope, name=name, rse_id=rse_id).update({key: value}, synchronize_session=False)
                    rse_counter.decrease(rse_id=rse_id, files=1, bytes=bytes, session=session)
                    rse_counter.increase(rse_id=rse_id, files=1, bytes=value, session=session)

                for parent_scope, parent_name in session.query(models.DataIdentifierAssociation.scope, models.DataIdentifierAssociation.name).filter_by(child_scope=scope, child_name=name):
                    values = {}
                    values['length'], values['bytes'], values['events'] = session.query(func.count(models.DataIdentifierAssociation.scope),
                                                                                        func.sum(models.DataIdentifierAssociation.bytes),
                                                                                        func.sum(models.DataIdentifierAssociation.events)).filter_by(scope=parent_scope, name=parent_name).one()
                    session.query(models.DataIdentifier).filter_by(scope=parent_scope, name=parent_name).update(values, synchronize_session=False)
                    session.query(models.DatasetLock).filter_by(scope=parent_scope, name=parent_name).update({'length': values['length'], 'bytes': values['bytes']}, synchronize_session=False)
            else:
                remainder[key] = value

        if remainder:
            try:
                rowcount = did_query.update(remainder, synchronize_session='fetch')
            except CompileError as error:
                raise exception.InvalidMetadata(error)
            except InvalidRequestError:
                raise exception.InvalidMetadata("Some of the keys are not accepted: " + str(list(remainder.keys())))
            if not rowcount:
                raise exception.UnsupportedOperation('Some of the keys for %s:%s cannot be updated: %s' % (scope, name, str(list(remainder.keys()))))

            # propagate metadata updates to child content
            if recursive:
                content_query = session.query(models.DataIdentifierAssociation.child_scope, models.DataIdentifierAssociation.child_name)
                content_query = content_query.with_hint(models.DataIdentifierAssociation, "INDEX(CONTENTS CONTENTS_PK)", 'oracle').filter_by(scope=scope, name=name)

                for child_scope, child_name in content_query:
                    try:
                        child_did_query = session.query(models.DataIdentifier).with_hint(models.DataIdentifier, "INDEX(DIDS DIDS_PK)", 'oracle').filter_by(scope=child_scope, name=child_name)
                        child_did_query.update(remainder, synchronize_session='fetch')
                    except CompileError as error:
                        raise exception.InvalidMetadata(error)
                    except InvalidRequestError:
                        raise exception.InvalidMetadata("Some of the keys are not accepted recursively: " + str(list(remainder.keys())))
Пример #5
0
    def list_dids(self,
                  scope,
                  filters,
                  did_type='collection',
                  ignore_case=False,
                  limit=None,
                  offset=None,
                  long=False,
                  recursive=False,
                  ignore_dids=None,
                  session=None):
        if not json_implemented(session=session):
            raise NotImplementedError

        if not ignore_dids:
            ignore_dids = set()

        # backwards compatability for filters as single {}.
        if isinstance(filters, dict):
            filters = [filters]

        # instantiate fe and create sqla query, note that coercion to a model keyword
        # is not appropriate here as the filter words are stored in a single json column.
        fe = FilterEngine(filters,
                          model_class=models.DidMeta,
                          strict_coerce=False)
        query = fe.create_sqla_query(additional_model_attributes=[
            models.DidMeta.scope, models.DidMeta.name
        ],
                                     additional_filters=[(models.DidMeta.scope,
                                                          operator.eq, scope)],
                                     json_column=models.DidMeta.meta)

        if limit:
            query = query.limit(limit)
        if recursive:
            from rucio.core.did import list_content

            # Get attached DIDs and save in list because query has to be finished before starting a new one in the recursion
            collections_content = []
            for did in query.yield_per(100):
                if (did.did_type == DIDType.CONTAINER
                        or did.did_type == DIDType.DATASET):
                    collections_content += [
                        d for d in list_content(scope=did.scope, name=did.name)
                    ]

            # Replace any name filtering with recursed DID names.
            for did in collections_content:
                for or_group in filters:
                    or_group['name'] = did['name']
                for result in self.list_dids(scope=did['scope'],
                                             filters=filters,
                                             recursive=True,
                                             did_type=did_type,
                                             limit=limit,
                                             offset=offset,
                                             long=long,
                                             ignore_dids=ignore_dids,
                                             session=session):
                    yield result

        try:
            for did in query.yield_per(
                    5
            ):  # don't unpack this as it makes it dependent on query return order!
                if long:
                    did_full = "{}:{}".format(did.scope, did.name)
                    if did_full not in ignore_dids:  # concatenating results of OR clauses may contain duplicate DIDs if query result sets not mutually exclusive.
                        ignore_dids.add(did_full)
                        yield {
                            'scope': did.scope,
                            'name': did.name,
                            'did_type': None,  # not available with JSON plugin
                            'bytes': None,  # not available with JSON plugin
                            'length': None  # not available with JSON plugin
                        }
                else:
                    did_full = "{}:{}".format(did.scope, did.name)
                    if did_full not in ignore_dids:  # concatenating results of OR clauses may contain duplicate DIDs if query result sets not mutually exclusive.
                        ignore_dids.add(did_full)
                        yield did.name
        except DataError as e:
            raise exception.InvalidMetadata(
                "Database query failed: {}. This can be raised when the datatype of a key is inconsistent between dids."
                .format(e))
Пример #6
0
    def set_metadata(self, scope, name, key, value, recursive=False, session=None):
        """
        Add metadata to data identifier.

        :param scope: The scope name.
        :param name: The data identifier name.
        :param key: the key.
        :param value: the value.
        :param did: The data identifier info.
        :param recursive: Option to propagate the metadata change to content.
        :param session: The database session in use.
        """
        try:
            rowcount = session.query(models.DataIdentifier).filter_by(scope=scope, name=name).\
                with_hint(models.DataIdentifier, "INDEX(DIDS DIDS_PK)", 'oracle').one()
        except NoResultFound:
            raise exception.DataIdentifierNotFound("Data identifier '%s:%s' not found" % (scope, name))

        if key == 'lifetime':
            try:
                expired_at = None
                if value is not None:
                    expired_at = datetime.utcnow() + timedelta(seconds=float(value))
                rowcount = session.query(models.DataIdentifier).filter_by(scope=scope, name=name).update({'expired_at': expired_at}, synchronize_session='fetch')
            except TypeError as error:
                raise exception.InvalidValueForKey(error)
        elif key in ['guid', 'events']:
            rowcount = session.query(models.DataIdentifier).filter_by(scope=scope, name=name, did_type=DIDType.FILE).update({key: value}, synchronize_session=False)

            session.query(models.DataIdentifierAssociation).filter_by(child_scope=scope, child_name=name, child_type=DIDType.FILE).update({key: value}, synchronize_session=False)
            if key == 'events':
                for parent_scope, parent_name in session.query(models.DataIdentifierAssociation.scope, models.DataIdentifierAssociation.name).filter_by(child_scope=scope, child_name=name):
                    events = session.query(func.sum(models.DataIdentifierAssociation.events)).filter_by(scope=parent_scope, name=parent_name).one()[0]
                    session.query(models.DataIdentifier).filter_by(scope=parent_scope, name=parent_name).update({'events': events}, synchronize_session=False)

        elif key == 'adler32':
            rowcount = session.query(models.DataIdentifier).filter_by(scope=scope, name=name, did_type=DIDType.FILE).update({key: value}, synchronize_session=False)
            session.query(models.DataIdentifierAssociation).filter_by(child_scope=scope, child_name=name, child_type=DIDType.FILE).update({key: value}, synchronize_session=False)
            session.query(models.Request).filter_by(scope=scope, name=name).update({key: value}, synchronize_session=False)
            session.query(models.RSEFileAssociation).filter_by(scope=scope, name=name).update({key: value}, synchronize_session=False)
        elif key == 'bytes':
            rowcount = session.query(models.DataIdentifier).filter_by(scope=scope, name=name, did_type=DIDType.FILE).update({key: value}, synchronize_session=False)
            session.query(models.DataIdentifierAssociation).filter_by(child_scope=scope, child_name=name, child_type=DIDType.FILE).update({key: value}, synchronize_session=False)
            session.query(models.Request).filter_by(scope=scope, name=name).update({key: value}, synchronize_session=False)

            for account, bytes, rse_id, rule_id in session.query(models.ReplicaLock.account, models.ReplicaLock.bytes, models.ReplicaLock.rse_id, models.ReplicaLock.rule_id).filter_by(scope=scope, name=name):
                session.query(models.ReplicaLock).filter_by(scope=scope, name=name, rule_id=rule_id, rse_id=rse_id).update({key: value}, synchronize_session=False)
                account_counter.decrease(rse_id=rse_id, account=account, files=1, bytes=bytes, session=session)
                account_counter.increase(rse_id=rse_id, account=account, files=1, bytes=value, session=session)

            for bytes, rse_id in session.query(models.RSEFileAssociation.bytes, models.RSEFileAssociation.rse_id).filter_by(scope=scope, name=name):
                session.query(models.RSEFileAssociation).filter_by(scope=scope, name=name, rse_id=rse_id).update({key: value}, synchronize_session=False)
                rse_counter.decrease(rse_id=rse_id, files=1, bytes=bytes, session=session)
                rse_counter.increase(rse_id=rse_id, files=1, bytes=value, session=session)

            for parent_scope, parent_name in session.query(models.DataIdentifierAssociation.scope, models.DataIdentifierAssociation.name).filter_by(child_scope=scope, child_name=name):

                values = {}
                values['length'], values['bytes'], values['events'] = session.query(func.count(models.DataIdentifierAssociation.scope),
                                                                                    func.sum(models.DataIdentifierAssociation.bytes),
                                                                                    func.sum(models.DataIdentifierAssociation.events)).filter_by(scope=parent_scope, name=parent_name).one()
                session.query(models.DataIdentifier).filter_by(scope=parent_scope, name=parent_name).update(values, synchronize_session=False)
                session.query(models.DatasetLock).filter_by(scope=parent_scope, name=parent_name).update({'length': values['length'], 'bytes': values['bytes']}, synchronize_session=False)
        else:
            try:
                rowcount = session.query(models.DataIdentifier).\
                    with_hint(models.DataIdentifier, "INDEX(DIDS DIDS_PK)", 'oracle').\
                    filter_by(scope=scope, name=name).\
                    update({key: value}, synchronize_session='fetch')
            except CompileError as error:
                raise exception.InvalidMetadata(error)
            except InvalidRequestError:
                raise exception.InvalidMetadata("Key %s is not accepted" % key)

            # propagate metadata updates to child content
            if recursive:
                content_query = session.query(models.DataIdentifierAssociation.child_scope,
                                              models.DataIdentifierAssociation.child_name).\
                    with_hint(models.DataIdentifierAssociation,
                              "INDEX(CONTENTS CONTENTS_PK)", 'oracle').\
                    filter_by(scope=scope, name=name)

                for child_scope, child_name in content_query:
                    try:
                        session.query(models.DataIdentifier).\
                            with_hint(models.DataIdentifier, "INDEX(DIDS DIDS_PK)", 'oracle').\
                            filter_by(scope=child_scope, name=child_name).\
                            update({key: value}, synchronize_session='fetch')
                    except CompileError as error:
                        raise exception.InvalidMetadata(error)
                    except InvalidRequestError:
                        raise exception.InvalidMetadata("Key %s is not accepted" % key)

        if not rowcount:
            # check for did presence
            raise exception.UnsupportedOperation('%(key)s for %(scope)s:%(name)s cannot be updated' % locals())