Exemplo n.º 1
0
class OaiData:
    def __init__(self):
        self.tokensDB = None

    def __activate__(self, context):
        if self.tokensDB is None:
            self.tokensDB = TokensDatabase(context)

        # Set up configuration
        self.systemConfig = JsonSimpleConfig()
        self.oaiConfig = None
        self.getMetadataFormats()

        self.velocityContext = context
        self.services = context["Services"]
        self.log = context["log"]
        self.sessionState = context["sessionState"]
        self.portalDir = context["portalDir"]

        self.__result = None

        # Check if the OAI request has an overriding portal ('set') to the URL
        paramSet = self.vc("formData").get("set")
        self.__portalName = context["page"].getPortal().getName()
        illegalSet = False
        if paramSet is not None:
            portals = self.vc("page").getPortals().keySet()
            if portals.contains(paramSet):
                self.__portalName = paramSet
            else:
                illegalSet = True

        self.__metadataPrefix = ""
        self.__sessionExpiry = self.systemConfig.getInteger(None, ["portal", "oai-pmh", "sessionExpiry"])

        # Check if there's a resumption token in the formData
        self.__currentToken = None
        resumptionToken = self.vc("formData").get("resumptionToken")
        if resumptionToken is not None:
            # This could still be be null
            self.__currentToken = self.tokensDB.getToken(resumptionToken)

        # Process/parse the request we've received for validity
        self.vc("request").setAttribute("Content-Type", "text/xml")
        self.__request = OaiPmhVerb(context, self.tokensDB, self.__currentToken)
        if self.getError() is None and illegalSet:
            self.__request.setError("badArgument", "Set '%s' is not valid!" % paramSet)

        # If there are no errors... and the request requires some additional
        #  data (like a search result) do so now. Everything else can be
        #  handled in the templates.
        if self.getError() is None and \
                self.getVerb() in ["GetRecord", "ListIdentifiers", "ListRecords"]:

            # Find the metadata prefix requested
            self.__metadataPrefix = self.vc("formData").get("metadataPrefix")
            if self.__metadataPrefix is None:
                self.__metadataPrefix = self.__currentToken.getMetadataPrefix()

            # Only list records if the metadata format is enabled in this view
            if self.isInView(self.__metadataPrefix):
                self.__search()

    # Get from velocity context
    def vc(self, index):
        if self.velocityContext[index] is not None:
            return self.velocityContext[index]
        else:
            self.log.error("ERROR: Requested context entry '" + index + "' doesn't exist")
            return None

    def isInView(self, format, view = None):
        # Sanity check
        if format is None or format == "":
            return False
        # Default to current poral
        if view is None:
            view = self.__portalName

        # Make sure there is some config for this format
        formatConfig = self.getMetadataFormats().get(format)
        if formatConfig is None:
            return False
        # Is it visible everywhere?
        allViews = formatConfig.getBoolean(False, ["enabledInAllViews"])
        if allViews:
            return True
        # Check if it is visible in this view
        else:
            allowedViews = formatConfig.getStringList(["enabledViews"])
            if view in allowedViews:
                return True
        # Rejection
        return False

    def getID(self, item):
        identifier = item.getFirst("oai_identifier")
        # Fallback to the default
        if identifier is None or identifier == "":
            return "oai:fascinator.usq.edu.au:" + item.getFirst("id")
        # Use the indexed value
        return identifier

    def isDeleted(self, item):
        return bool(item.getFirst("oai_deleted"))

    def getSet(self, item):
        set = item.getFirst("oai_set")
        # Fallback to the portal name
        if set is None or set == "":
            return self.__portalName
        # Use the required set
        return set

    def getVerb(self):
        return self.getRequest().getVerb()

    def getError(self):
        return self.getRequest().getError()

    def getResponseDate(self):
        return time.strftime("%Y-%m-%dT%H:%M:%SZ")

    def getRequest(self):
        return self.__request

    def getResult(self):
        return self.__result

    def getElement(self, elementName, values):
        elementStr = ""
        if values:
            for value in values:
                elementStr += "<%s>%s</%s>" % (elementName, value, elementName)
        return elementStr

    def __search(self):
        self.__result = SolrResult(None)

        portal = self.services.getPortalManager().get(self.__portalName)
        recordsPerPage = portal.recordsPerPage

        # Resolve our identifier
        id = self.vc("formData").get("identifier")
        query = "*:*"
        if id is not None and id != "":
            # A default TF2 OID
            if id.startswith("oai:fascinator.usq.edu.au:"):
                idString = id.replace("oai:fascinator.usq.edu.au:", "")
                idString = self.__escapeQuery(idString)
                query = "id:" + idString
            # Or a custom OAI ID
            else:
                idString = self.__escapeQuery(id)
                query = "oai_identifier:" + idString

        req = SearchRequest(query)
        req.setParam("facet", "true")
        req.setParam("rows", str(recordsPerPage))
        req.setParam("facet.field", portal.facetFieldList)
        req.setParam("facet.limit", str(portal.facetCount))
        req.setParam("sort", "f_dc_title asc")

        portalQuery = portal.query
        if portalQuery:
            req.addParam("fq", portalQuery)
        req.addParam("fq", "item_type:object")

        # Date data... is supplied
        fromDate = self.__request.getFromDate()
        untilDate = self.__request.getUntilDate()
        if fromDate is not None:
            fromStr = fromDate.isoformat() + "Z"
            self.log.debug("From Date: '{}'", fromStr)
            if untilDate is not None:
                untilStr = untilDate.isoformat() + "Z"
                self.log.debug("Until Date: '{}'", untilStr)
                queryStr = "last_modified:[%s TO %s]" % (fromStr, untilStr)
            else:
                queryStr = "last_modified:[%s TO *]" % (fromStr)
            self.log.debug("Date query: '{}'", queryStr)
            req.addParam("fq", queryStr)
        else:
            if untilDate is not None:
                untilStr = untilDate.isoformat() + "Z"
                self.log.debug("Until Date: '{}'", untilDate.isoformat())
                queryStr = "last_modified:[* TO %s]" % (untilStr)
                self.log.debug("Date query: '{}'", queryStr)
                req.addParam("fq", queryStr)

        # Check if there's resumption token exist in the formData
        newToken = None
        if self.__currentToken is not None:
            start = int(self.__currentToken.getStart())
            totalFound = int(self.__currentToken.getTotalFound())
            nextTokenStart = start + recordsPerPage
            if nextTokenStart < totalFound:
                newToken = self.__currentToken
                newToken.resetExpiry(self.__sessionExpiry)
                newToken.setStart(nextTokenStart)
        # or start a new resumption token
        else:
            start = 0
            newToken = ResumptionToken(None, recordsPerPage, \
                        self.__metadataPrefix, self.__sessionExpiry)

        req.setParam("start", str(start))

        out = ByteArrayOutputStream()
        self.services.indexer.search(req, out)
        self.__result = SolrResult(ByteArrayInputStream(out.toByteArray()))

        totalFound = self.__result.getNumFound()
        if totalFound == 0:
            newToken = None
            # If an ID was requested, and not found, this is an error
            if id is not None and id != "":
                self.__request.setError("idDoesNotExist", "ID: '%s' not found" % id)
            else:
                self.__request.setError("noRecordsMatch", "No records match this request")

        # We need to store this for NEW tokens
        elif self.__currentToken is None:
            # Assuming there are enough results to even keep the token
            if newToken.getStart() < totalFound:
                newToken.setTotalFound(totalFound)
            else:
                newToken = None
        # Check if we need to remove the resumption token
        else:
            if self.__result.getResults().size() < recordsPerPage:
                self.tokensDB.removeToken(self.__currentToken)

        # Store/update the resumption token
        if newToken is not None:
            # Brand new token
            if self.__currentToken is None:
                self.tokensDB.storeToken(newToken)
            # Or update an old token
            else:
                self.tokensDB.updateToken(newToken)
            self.__currentToken = newToken

    def getToken(self):
        if self.isInView(self.__metadataPrefix):
            return self.__currentToken
        return None

    def getMetadataFormats(self):
        if self.oaiConfig is None:
            self.oaiConfig = self.systemConfig.getJsonSimpleMap(["portal", "oai-pmh", "metadataFormats"])
        return self.oaiConfig

    def encodeXml(self, string):
        return StringEscapeUtils.escapeXml(string);

    def getPayload(self, oid, metadataFileName):
        # First get the Object from storage
        object = None
        try:
            object = self.services.getStorage().getObject(oid)
        except StorageException, e:
            return None

        # Check whether the payload exists
        try:
            return object.getPayload(metadataFileName)
        except StorageException, e:
            return None
Exemplo n.º 2
0
class OaiData:
    def __init__(self):
        pass

    def __activate__(self, context):
        self.systemConfig = JsonSimpleConfig()

        self.velocityContext = context
        self.services = context["Services"]
        self.log = context["log"]
        self.sessionState = context["sessionState"]
        self.portalDir = context["portalDir"]
        self.__result = None
        self.__token = None
        self.__portalName = context["page"].getPortal().getName()
        self.__enabledInAllViews = False
        self.__enabledInViews = []
        self.__metadataPrefix = ""
        self.__sessionExpiry = self.systemConfig.getInteger(None, ["portal", "oai-pmh", "sessionExpiry"])

        self.__resumptionTokenList = self.sessionState.get("resumptionTokenList")
        if self.__resumptionTokenList == None:
            self.__resumptionTokenList = {}
        #Check if there's resumption token exist in the formData
        self.__currentToken = None

        resumptionToken = self.vc("formData").get("resumptionToken")
        if resumptionToken:
            if self.__resumptionTokenList.has_key(resumptionToken):
                self.__currentToken = self.__resumptionTokenList[resumptionToken]

        print " * oai.py: formData=%s" % self.vc("formData")
        self.vc("request").setAttribute("Content-Type", "text/xml")
        self.__request = OaiPmhVerb(self.vc("formData"), self.__currentToken, self.sessionState)

        if self.getError() is None and \
                self.getVerb() in ["GetRecord", "ListIdentifiers", "ListRecords"]:

            ## Only list those data if the metadata format is enabled
            self.__metadataPrefix = self.vc("formData").get("metadataPrefix")
            if self.__metadataPrefix is None:
                self.__metadataPrefix = self.__currentToken.getMetadataPrefix()

            self.__enabledInAllViews = self.systemConfig.getBoolean(False, ["portal", "oai-pmh", "metadataFormats", self.__metadataPrefix, "enabledInAllViews"])
            if self.__enabledInAllViews:
                self.__search()
            else:
                self.__enabledInViews = self.systemConfig.getStringList(["portal", "oai-pmh", "metadataFormats", self.__metadataPrefix, "enabledViews"])
                if self.__portalName in self.__enabledInViews:
                    self.__search()

    # Get from velocity context
    def vc(self, index):
        if self.velocityContext[index] is not None:
            return self.velocityContext[index]
        else:
            self.log.error("ERROR: Requested context entry '" + index + "' doesn't exist")
            return None

    def getVerb(self):
        return self.getRequest().getVerb()

    def getError(self):
        return self.getRequest().getError()

    def getResponseDate(self):
        return time.strftime("%Y-%m-%dT%H:%M:%SZ")

    def getRequest(self):
        return self.__request

    def getResult(self):
        return self.__result

    def getElement(self, elementName, values):
        elementStr = ""
        if values:
            for value in values:
                elementStr += "<%s>%s</%s>" % (elementName, value, elementName)
        return elementStr

    def __search(self):
        self.__result = SolrResult(None)

        portal = self.services.getPortalManager().get(self.vc("portalId"))
        recordsPerPage = portal.recordsPerPage

        query = self.vc("formData").get("query")
        if query is None or query == "":
            query = "*:*"
        req = SearchRequest(query)
        req.setParam("facet", "true")
        req.setParam("rows", str(recordsPerPage))
        req.setParam("facet.field", portal.facetFieldList)
        req.setParam("facet.limit", str(portal.facetCount))
        req.setParam("sort", "f_dc_title asc")

        portalQuery = portal.query
        print " * portalQuery=%s" % portalQuery
        if portalQuery:
            req.addParam("fq", portalQuery)
        req.addParam("fq", "item_type:object")
        
        #Check if there's resumption token exist in the formData
        if self.__currentToken:
            start = self.__currentToken.getStart()
            totalFound = self.__currentToken.getTotalFound()
            nextTokenStart = start+recordsPerPage
            if nextTokenStart < totalFound:
                self.__token = ResumptionToken(start=nextTokenStart, metadataPrefix=self.__metadataPrefix, sessionExpiry=self.__sessionExpiry)
        else:
            start = 0
            metadataPrefix = self.vc("formData").get("metadataPrefix")
            self.__token = ResumptionToken(start=recordsPerPage, metadataPrefix=self.__metadataPrefix, sessionExpiry=self.__sessionExpiry)

        req.setParam("start", str(start))

        print " * oai.py:", req.toString()

        out = ByteArrayOutputStream()
        self.services.indexer.search(req, out)
        self.__result = SolrResult(ByteArrayInputStream(out.toByteArray()))

        totalFound = self.__result.getNumFound()
        if totalFound == 0:
            self.__token = None
        elif self.__token:
            if self.__token.getStart() < totalFound:
                self.__token.setTotalFound(totalFound)
            else:
                self.__token = None

        #Storing the resumptionToken to session
        if self.__token:
            self.__resumptionTokenList[self.__token.getToken()] = self.__token #(totalFound, self.__token.getConstructedToken())
            #Need to know how long the server need to store this token
            self.sessionState.set("resumptionTokenList", self.__resumptionTokenList)

    def getToken(self):
        if self.__enabledInAllViews or self.__portalName in self.__enabledInViews:
            return self.__token
        return None

    def getMetadataFormats(self):
        return self.systemConfig.getJsonSimpleMap(["portal", "oai-pmh", "metadataFormats"])

    def encodeXml(self, string):
        return StringEscapeUtils.escapeXml(string);
Exemplo n.º 3
0
class OaiData:
    def __init__(self):
        pass

    def __activate__(self, context):
        # Set up configuration
        self.systemConfig = JsonSimpleConfig()
        self.oaiConfig = None
        self.getMetadataFormats()

        self.velocityContext = context
        self.services = context["Services"]
        self.log = context["log"]
        self.sessionState = context["sessionState"]
        self.portalDir = context["portalDir"]

        self.__result = None
        self.__token = None

        # Check if the OAI request has an overriding portal ('set') to the URL
        paramSet = self.vc("formData").get("set")
        self.__portalName = context["page"].getPortal().getName()
        if paramSet is not None:
            portals = self.vc("page").getPortals().keySet()
            if portals.contains(paramSet):
                self.__portalName = paramSet
                self.log.debug("=== PORTAL override! : {}", self.__portalName);

        self.__metadataPrefix = ""
        self.__sessionExpiry = self.systemConfig.getInteger(None, ["portal", "oai-pmh", "sessionExpiry"])

        self.__resumptionTokenList = self.sessionState.get("resumptionTokenList")
        if self.__resumptionTokenList == None:
            self.__resumptionTokenList = {}
        #Check if there's resumption token exist in the formData
        self.__currentToken = None

        resumptionToken = self.vc("formData").get("resumptionToken")
        if resumptionToken:
            if self.__resumptionTokenList.has_key(resumptionToken):
                self.__currentToken = self.__resumptionTokenList[resumptionToken]

        self.vc("request").setAttribute("Content-Type", "text/xml")
        self.__request = OaiPmhVerb(self.vc("formData"), self.__currentToken, self.sessionState, context)

        if self.getError() is None and \
                self.getVerb() in ["GetRecord", "ListIdentifiers", "ListRecords"]:

            ## Only list those data if the metadata format is enabled
            self.__metadataPrefix = self.vc("formData").get("metadataPrefix")
            if self.__metadataPrefix is None:
                self.__metadataPrefix = self.__currentToken.getMetadataPrefix()

            if self.isInView(self.__metadataPrefix):
                self.__search()

    # Get from velocity context
    def vc(self, index):
        if self.velocityContext[index] is not None:
            return self.velocityContext[index]
        else:
            self.log.error("ERROR: Requested context entry '" + index + "' doesn't exist")
            return None

    def isInView(self, format, view = None):
        # Sanity check
        if format is None or format == "":
            return False
        # Default to current poral
        if view is None:
            view = self.__portalName

        # Make sure there is some config for this format
        formatConfig = self.getMetadataFormats().get(format)
        if formatConfig is None:
            return False
        # Is it visible everywhere?
        allViews = formatConfig.getBoolean(False, ["enabledInAllViews"])
        if allViews:
            self.log.debug("=== Format '{}' is in all views", format)
            return True
        # Check if it is visible in this view
        else:
            allowedViews = formatConfig.getStringList(["enabledViews"])
            if view in allowedViews:
                self.log.debug("=== Format '{}' is in view '{}'", format, view)
                return True
        # Rejection
        self.log.debug("=== Format '{}' is NOT in view '{}'", format, view)
        return False

    def getID(self, item):
        identifier = item.getFirst("oai_identifier")
        # Fallback to the default
        if identifier is None or identifier == "":
            return "oai:fascinator:" + item.getFirst("id")
        # Use the indexed value
        return identifier

    def getSet(self, item):
        set = item.getFirst("oai_set")
        # Fallback to the portal name
        if set is None or set == "":
            return self.__portalName
        # Use the required set
        return set

    def getVerb(self):
        return self.getRequest().getVerb()

    def getError(self):
        return self.getRequest().getError()

    def getResponseDate(self):
        return time.strftime("%Y-%m-%dT%H:%M:%SZ")

    def getRequest(self):
        return self.__request

    def getResult(self):
        return self.__result

    def getElement(self, elementName, values):
        elementStr = ""
        if values:
            for value in values:
                elementStr += "<%s>%s</%s>" % (elementName, value, elementName)
        return elementStr

    def __search(self):
        self.log.debug(" === __search()")
        self.__result = SolrResult(None)

        portal = self.services.getPortalManager().get(self.__portalName)
        recordsPerPage = portal.recordsPerPage

        # Resolve our identifier
        id = self.vc("formData").get("identifier")
        self.log.debug(" === ID: '{}'", id)
        query = "*:*"
        if id is not None and id != "":
            # A default TF2 OID
            if id.startswith("oai:fascinator:"):
                query = "id:" + id.replace("oai:fascinator:", "")
            # Or a custom OAI ID
            else:
                query = "oai_identifier:" + id.replace(":", "\\:")

        self.log.debug(" === QUERY: '{}'", query)
        req = SearchRequest(query)
        req.setParam("facet", "true")
        req.setParam("rows", str(recordsPerPage))
        req.setParam("facet.field", portal.facetFieldList)
        req.setParam("facet.limit", str(portal.facetCount))
        req.setParam("sort", "f_dc_title asc")

        portalQuery = portal.query
        self.log.debug(" * portalQuery={}", portalQuery)
        if portalQuery:
            req.addParam("fq", portalQuery)
        req.addParam("fq", "item_type:object")

        # Check if there's resumption token exist in the formData
        if self.__currentToken:
            start = self.__currentToken.getStart()
            totalFound = self.__currentToken.getTotalFound()
            nextTokenStart = start + recordsPerPage
            if nextTokenStart < totalFound:
                self.__token = ResumptionToken(start = nextTokenStart, \
                    metadataPrefix = self.__metadataPrefix, \
                    sessionExpiry = self.__sessionExpiry)
        else:
            start = 0
            metadataPrefix = self.vc("formData").get("metadataPrefix")
            self.__token = ResumptionToken(start = recordsPerPage, \
                metadataPrefix = self.__metadataPrefix, \
                sessionExpiry = self.__sessionExpiry)

        req.setParam("start", str(start))

        self.log.debug(" * oai.py:", req.toString())

        out = ByteArrayOutputStream()
        self.services.indexer.search(req, out)
        self.__result = SolrResult(ByteArrayInputStream(out.toByteArray()))

        totalFound = self.__result.getNumFound()
        if totalFound == 0:
            self.__token = None
        elif self.__token:
            if self.__token.getStart() < totalFound:
                self.__token.setTotalFound(totalFound)
            else:
                self.__token = None

        #Storing the resumptionToken to session
        if self.__token:
            self.__resumptionTokenList[self.__token.getToken()] = self.__token #(totalFound, self.__token.getConstructedToken())
            #Need to know how long the server need to store this token
            self.sessionState.set("resumptionTokenList", self.__resumptionTokenList)

    def getToken(self):
        if self.isInView(self.__metadataPrefix):
            return self.__token
        return None

    def getMetadataFormats(self):
        if self.oaiConfig is None:
            self.oaiConfig = self.systemConfig.getJsonSimpleMap(["portal", "oai-pmh", "metadataFormats"])
        return self.oaiConfig

    def encodeXml(self, string):
        return StringEscapeUtils.escapeXml(string);

    def getPayload(self, oid, metadataFileName):
        # First get the Object from storage
        object = None
        try:
            object = self.services.getStorage().getObject(oid)
        except StorageException, e:
            return None

        # Check whether the payload exists
        try:
            return object.getPayload(metadataFileName)
        except StorageException, e:
            return None