Exemplo n.º 1
0
    def __init__(self, root, username=None, password=None):
        self.opener = AuthorizingServerConnection(root, username, password)
        self.api = ApiFacade(self.opener)
        self.fedora_root = self.opener.base_url

        self.username = username
        self.password = password
        self._risearch = None
Exemplo n.º 2
0
class Repository(object):
    "Pythonic interface to a single Fedora Commons repository instance."

    default_object_type = DigitalObject
    "Default type to use for methods that return fedora objects - :class:`DigitalObject`"

    search_fields = [
        "pid",
        "label",
        "state",
        "ownerId",
        "cDate",
        "mDate",
        "dcmDate",
        "title",
        "creator",
        "subject",
        "description",
        "publisher",
        "contributor",
        "date",
        "type",
        "format",
        "identifier",
        "source",
        "language",
        "relation",
        "coverage",
        "rights",
    ]
    "fields that can be searched against in :meth:`find_objects`"

    search_fields_aliases = {"owner": "ownerId", "created": "cDate", "modified": "mDate", "dc_modified": "dcmDate"}
    "human-readable aliases for oddly-named fedora search fields"

    def __init__(self, root, username=None, password=None):
        self.opener = AuthorizingServerConnection(root, username, password)
        self.api = ApiFacade(self.opener)
        self.fedora_root = self.opener.base_url

        self.username = username
        self.password = password
        self._risearch = None

    @property
    def risearch(self):
        "instance of :class:`ResourceIndex`, with the same root url and credentials"
        if self._risearch is None:
            self._risearch = ResourceIndex(self.opener)
        return self._risearch

    def get_next_pid(self, namespace=None, count=None):
        """
        Request next available pid or pids from Fedora, optionally in a specified
        namespace.  Calls :meth:`ApiFacade.getNextPID`.

        .. deprecated :: 0.14
          Mint pids for new objects with
          :func:`eulcore.fedora.models.DigitalObject.get_default_pid`
          instead, or call :meth:`ApiFacade.getNextPID` directly.

        :param namespace: (optional) get the next pid in the specified pid namespace;
            otherwise, Fedora will return the next pid in the configured default namespace.
        :param count: (optional) get the specified number of pids; by default, returns 1 pid
        :rtype: string or list of strings
        """
        # this method should no longer be needed - default pid logic moved to DigitalObject
        warnings.warn(
            """get_next_pid() method is deprecated; you should mint new pids via DigitalObject or ApiFacade.getNextPID() instead.""",
            DeprecationWarning,
        )
        kwargs = {}
        if namespace:
            kwargs["namespace"] = namespace
        if count:
            kwargs["numPIDs"] = count
        data, url = self.api.getNextPID(**kwargs)
        nextpids = parse_xml_object(NewPids, data, url)

        if count is None:
            return nextpids.pids[0]
        else:
            return nextpids.pids

    def ingest(self, text, log_message=None):
        """
        Ingest a new object into Fedora. Returns the pid of the new object on
        success.  Calls :meth:`ApiFacade.ingest`.

        :param text: full text content of the object to be ingested
        :param log_message: optional log message
        :rtype: string
        """
        kwargs = {"text": text}
        if log_message:
            kwargs["logMessage"] = log_message
        return self.api.ingest(**kwargs)

    def purge_object(self, pid, log_message=None):
        """
        Purge an object from Fedora.  Calls :meth:`ApiFacade.purgeObject`.

        :param pid: pid of the object to be purged
        :param log_message: optional log message
        :rtype: boolean
        """
        kwargs = {"pid": pid}
        if log_message:
            kwargs["logMessage"] = log_message
        success, timestamp = self.api.purgeObject(**kwargs)
        return success

    def get_objects_with_cmodel(self, cmodel_uri, type=None):
        """
        Find objects in Fedora with the specified content model.

        :param cmodel_uri: content model URI (should be full URI in  info:fedora/pid:### format)
        :param type: type of object to return (e.g., class:`DigitalObject`)
        :rtype: list of objects
        """
        uris = self.risearch.get_subjects(modelns.hasModel, cmodel_uri)
        return [self.get_object(uri, type) for uri in uris]

    def get_object(self, pid=None, type=None, create=None):
        """
        Initialize a single object from Fedora, or create a new one, with the
        same Fedora configuration and credentials.

        :param pid: pid of the object to request, or a function that can be
                    called to get one. if not specified, :meth:`get_next_pid`
                    will be called if a pid is needed
        :param type: type of object to return; defaults to :class:`DigitalObject`
        :rtype: single object of the type specified
        :create: boolean: create a new object? (if not specified, defaults
                 to False when pid is specified, and True when it is not)
        """
        type = type or self.default_object_type

        if pid is None:
            if create is None:
                create = True
        else:
            if isinstance(pid, basestring) and pid.startswith("info:fedora/"):  # passed a uri
                pid = pid[len("info:fedora/") :]

            if create is None:
                create = False

        return type(self.api, pid, create)

    def find_objects(self, terms=None, type=None, chunksize=None, **kwargs):
        """
        Find objects in Fedora.  Find query should be generated via keyword
        args, based on the fields in Fedora documentation.  By default, the
        query uses a contains (~) search for all search terms.  Calls
        :meth:`ApiFacade.findObjects`. Results seem to return consistently
        in ascending PID order.

        Example usage - search for all objects where the owner contains 'jdoe'::
        
            repository.find_objects(ownerId='jdoe')

        Supports all search operators provided by Fedora findObjects query (exact,
        gt, gte, lt, lte, and contains).  To specify the type of query for
        a particular search term, call find_objects like this::

            repository.find_objects(ownerId__exact='lskywalker')
            repository.find_objects(date__gt='20010302')

        :param type: type of objects to return; defaults to :class:`DigitalObject`
        :param chunksize: number of objects to return at a time
        :rtype: generator for list of objects
        """
        type = type or self.default_object_type

        find_opts = {"chunksize": chunksize}

        search_operators = {"exact": "=", "gt": ">", "gte": ">=", "lt": "<", "lte": "<=", "contains": "~"}

        if terms is not None:
            find_opts["terms"] = terms
        else:
            conditions = []
            for field, value in kwargs.iteritems():
                if "__" in field:
                    field, filter = field.split("__")
                    if filter not in search_operators:
                        raise Exception("Unsupported search filter '%s'" % filter)
                    op = search_operators[filter]
                else:
                    op = search_operators["contains"]  # default search mode

                if field in self.search_fields_aliases:
                    field = self.search_fields_aliases[field]
                if field not in self.search_fields:
                    raise Exception("Error generating Fedora findObjects query: unknown search field '%s'" % field)
                if " " in value:
                    # if value contains whitespace, it must be delimited with single quotes
                    value = "'%s'" % value
                conditions.append("%s%s%s" % (field, op, value))

            query = " ".join(conditions)
            find_opts["query"] = query

        data, url = self.api.findObjects(**find_opts)
        chunk = parse_xml_object(SearchResults, data, url)
        while True:
            for result in chunk.results:
                yield type(self.api, result.pid)

            if chunk.session_token:
                data, url = self.api.findObjects(session_token=chunk.session_token, **find_opts)
                chunk = parse_xml_object(SearchResults, data, url)
            else:
                break