Пример #1
0
 def getSuggestedNames(self):
     # search common forms
     lookupNames = []
     surname = self.__metadata.getList("surname").get(0)
     firstName = self.__metadata.getList("firstName").get(0)
     firstInitial = firstName[0].upper()
     secondName = self.__metadata.getList("secondName")
     if not secondName.isEmpty():
         secondName = secondName.get(0)
     if secondName and secondName != "":
         secondInitial = secondName[0].upper()
         lookupNames.append("%s, %s. %s." % (surname, firstInitial, secondInitial))
         lookupNames.append("%s, %s %s." % (surname, firstName, secondInitial))
         lookupNames.append("%s, %s %s" % (surname, firstName, secondName))
         lookupNames.append("%s %s %s" % (firstName, secondName, surname))
     lookupNames.append("%s, %s." % (surname, firstInitial))
     lookupNames.append("%s, %s" % (surname, firstName))
     lookupNames.append("%s %s" % (firstName, surname))
     query = '" OR dc_title:"'.join(lookupNames)
     
     # general word search from each part of the name
     parts = [p for p in self.getPackageTitle().split(" ") if len(p) > 0]
     query2 = " OR dc_title:".join(parts)
     
     req = SearchRequest('(dc_title:"%s")^2.5 OR (dc_title:%s)^0.5' % (query, query2))
     self.log.info("suggestedNames query={}", req.query)
     req.setParam("fq", 'recordtype:"author"')
     req.addParam("fq", 'item_type:"object"')
     req.setParam("rows", "9999")
     req.setParam("fl", "score")
     req.setParam("sort", "score desc")
     
     # Make sure 'fq' has already been set in the session
     ##security_roles = self.authentication.get_roles_list();
     ##security_query = 'security_filter:("' + '" OR "'.join(security_roles) + '")'
     ##req.addParam("fq", security_query)
     
     out = ByteArrayOutputStream()
     indexer = self.services.getIndexer()
     indexer.search(req, out)
     result = JsonConfigHelper(ByteArrayInputStream(out.toByteArray()))
     
     #self.log.info("result={}", result.toString())
     docs = result.getJsonList("response/docs")
     
     map = LinkedHashMap()
     for doc in docs:
         authorName = doc.getList("dc_title").get(0)
         if map.containsKey(authorName):
             authorDocs = map.get(authorName)
         else:
             authorDocs = ArrayList()
             map.put(authorName, authorDocs)
         authorDocs.add(doc)
     
     self.__maxScore = max(1.0, float(result.get("response/maxScore")))
     
     return map
Пример #2
0
class OrganiseData:
    def __init__(self):
        self.__portal = Services.portalManager.get(portalId)
        self.__result = JsonConfigHelper()
        self.__pageNum = sessionState.get("pageNum", 1)
        self.__selected = []
        self.__storage = Services.storage
        uri = URLDecoder.decode(request.getAttribute("RequestURI"))
        basePath = portalId + "/" + pageName
        self.__oid = uri[len(basePath)+1:]
        slash = self.__oid.rfind("/")
        self.__pid = self.__oid[slash+1:]
        payload = self.__storage.getPayload(self.__oid, self.__pid)
        if payload is not None:
            self.__mimeType = payload.contentType
        else:
            self.__mimeType = "application/octet-stream"
        self.__metadata = JsonConfigHelper()
        print " * combined.py: uri='%s' oid='%s' pid='%s' mimeType='%s'" % (uri, self.__oid, self.__pid, self.__mimeType)
        self.__search()
    
    def getManifestItem(self):
        hashId = md5.new(self.__oid).hexdigest()
        return self.getPortal().getMap("manifest//node-%s" % hashId)
    
    def getMimeType(self):
        return self.__mimeType
    
    def __search(self):
        req = SearchRequest('id:"%s"' % self.__oid)
        out = ByteArrayOutputStream()
        Services.indexer.search(req, out)
        self.__json = JsonConfigHelper(ByteArrayInputStream(out.toByteArray()))
        self.__metadata = SolrDoc(self.__json)
    
    def getManifest(self):
        return self.getPortal().getJsonMap("manifest")
    
    def getContent(self):
        content = ""
        return content
    
    def getPortal(self):
        return Services.portalManager.get(portalId)
    
    def getPortalName(self):
        return Services.portalManager.get(portalId).description
    
    def __search(self):
        recordsPerPage = self.__portal.recordsPerPage
        
        query = None
        if query is None or query == "":
            query = formData.get("query")
        if query is None or query == "":
            query = "*:*"
        
        req = SearchRequest(query)
        req.setParam("facet", "true")
        req.setParam("rows", "1000")
        req.setParam("facet.field", self.__portal.facetFieldList)
        req.setParam("facet.sort", "true")
        req.setParam("facet.limit", str(self.__portal.facetCount))
        req.setParam("sort", "f_dc_title asc")
        
        # setup facets
        action = formData.get("verb")
        value = formData.get("value")
        fq = sessionState.get("fq")
        if fq is not None:
            self.__pageNum = 1
            req.setParam("fq", fq)
        if action == "add_fq":
            self.__pageNum = 1
            name = formData.get("name")
            print " * add_fq: %s" % value
            req.addParam("fq", URLDecoder.decode(value, "UTF-8"))
        elif action == "remove_fq":
            self.__pageNum = 1
            req.removeParam("fq", URLDecoder.decode(value, "UTF-8"))
        elif action == "clear_fq":
            self.__pageNum = 1
            req.removeParam("fq")
        elif action == "select-page":
            self.__pageNum = int(value)
        req.addParam("fq", 'item_type:"object"')
        
        portalQuery = self.__portal.query
        print " * portalQuery=%s" % portalQuery
        if portalQuery:
            req.addParam("fq", portalQuery)
        
        self.__selected = req.getParams("fq")
        
        sessionState.set("fq", self.__selected)
        sessionState.set("pageNum", self.__pageNum)
        
        req.setParam("start", str((self.__pageNum - 1) * recordsPerPage))
        
        print " * combined.py:", req.toString(), self.__pageNum
        
        out = ByteArrayOutputStream()
        Services.indexer.search(req, out)
        self.__result = JsonConfigHelper(ByteArrayInputStream(out.toByteArray()))
        if self.__result is not None:
            self.__paging = Pagination(self.__pageNum,
                                       int(self.__result.get("response/numFound")),
                                       self.__portal.recordsPerPage)
            print " * combined.py: updating manifest..."
            portal = self.getPortal()
            manifest = portal.getJsonMap("manifest")
            #add new items from search
            for doc in self.__result.getList("response/docs"):
                hashId = md5.new(doc.get("id")).hexdigest()
                node = portal.get("manifest//node-%s" % hashId)
                print " ********node=", node
                if node is None:
                    portal.set("manifest/node-%s/title" % hashId, doc.get("dc_title").get(0))
                    portal.set("manifest/node-%s/id" % hashId, doc.get("id"))
            #remove manifest items missing from search result
            print manifest
            for key in manifest.keySet():
                item = manifest.get(key)
                id = item.get("id")
                doc = self.__result.getList('response/docs[@id="%s"]' % id)
                if len(doc) == 0:
                    portal.removePath("manifest//%s" % key)
            Services.getPortalManager().save(portal)
    
    def getQueryTime(self):
        return int(self.__result.get("responseHeader/QTime")) / 1000.0;
    
    def getPaging(self):
        return self.__paging
    
    def getResult(self):
        return self.__result
    
    def getFacetField(self, key):
        return self.__portal.facetFields.get(key)
    
    def getFacetName(self, key):
        return self.__portal.facetFields.get(key).get("label")
    
    def getFacetCounts(self, key):
        values = LinkedHashMap()
        valueList = self.__result.getList("facet_counts/facet_fields/%s" % key)
        for i in range(0,len(valueList),2):
            name = valueList[i]
            count = valueList[i+1]
            if count > 0:
                values.put(name, count)
        return values
    
    def hasSelectedFacets(self):
        return (self.__selected is not None and len(self.__selected) > 1) and \
            not (self.__portal.query in self.__selected and len(self.__selected) == 2)
    
    def getSelectedFacets(self):
        return self.__selected
    
    def isPortalQueryFacet(self, fq):
        return fq == self.__portal.query
    
    def isSelected(self, fq):
        return fq in self.__selected
    
    def getSelectedFacetIds(self):
        return [md5.new(fq).hexdigest() for fq in self.__selected]
    
    def getFileName(self, path):
        return os.path.split(path)[1]
    
    def getFacetQuery(self, name, value):
        return '%s:"%s"' % (name, value)
    
    def isImage(self, format):
        return format.startswith("image/")
    
    def getThumbnail(self, oid):
        ext = os.path.splitext(oid)[1]
        url = oid[oid.rfind("/")+1:-len(ext)] + ".thumb.jpg"
        if Services.getStorage().getPayload(oid, url):
            return url
        return None
    
    def getContent(self):
        contentStr = "<div>"
        portal = self.getPortal()
        manifest = portal.getJsonMap("manifest")
        for key in manifest.keySet():
            item = manifest.get(key)
            if item.get("hidden", "false") == "false":
                oid = item.get("id")
                slash = oid.rfind("/")
                pid = oid[slash+1:]
                contentStr += "<div class='combined-item' rel='%s'><a name='content-%s'><!-- --></a><h2>%s</h2>" % (oid, key, item.get("title"))
                contentStr += self.__getPayloadContent(oid, pid)
                contentStr += "</div>"
                contentStr += "<div class='clear'></div>"
        return contentStr + "</div>"
    
    def __getPayloadContent(self, oid, pid):
        print " * combined.py: oid='%s' pid='%s'" % (oid, pid)
        payload = self.__storage.getPayload(oid, pid)
        if payload is None:
            return "<div>Error: No content for '%s'</div>" % oid
        mimeType = payload.contentType
        contentStr = ""
        if mimeType.startswith("text/"):
            if mimeType == "text/html":
                contentStr = '<iframe class="iframe-preview" src="%s/download/%s"></iframe>' % \
                    (portalPath, oid)
            else:
                sw = StringWriter()
                sw.write("<pre>")
                IOUtils.copy(payload.getInputStream(), sw)
                sw.write("</pre>")
                sw.flush()
                contentStr = sw.toString()
        elif mimeType == "application/pdf" or mimeType.find("vnd.ms")>-1 or mimeType.find("vnd.oasis.opendocument.")>-1:
            # get the html version if exist...
            pid = os.path.splitext(pid)[0] + ".htm"
            print " * combined.py: pid=%s" % pid
            payload = self.__storage.getPayload(oid, pid)
            saxReader = SAXReader(False)
            try:
                document = saxReader.read(payload.getInputStream())
                slideNode = document.selectSingleNode("//*[local-name()='body']")
                slideNode.setName("div")
                out = ByteArrayOutputStream()
                format = OutputFormat.createPrettyPrint()
                format.setSuppressDeclaration(True)
                format.setExpandEmptyElements(True)
                writer = XMLWriter(out, format)
                writer.write(slideNode)
                writer.close()
                contentStr = out.toString("UTF-8")
            except:
                traceback.print_exc()
                contentStr = "<p class=\"error\">No preview available</p>"
        elif mimeType.startswith("image/"):
            src = "%s/%s" % (oid, pid)
            contentStr = '<a class="image" href="%(src)s"  style="max-width:98%%">' \
                '<img src="%(src)s" style="max-width:100%%" /></a>' % { "src": pid }
        return contentStr
    
    def getOid(self):
        return self.__oid
Пример #3
0
    def getSuggestedNames(self):
        # search common forms
        lookupNames = []
        surname = self.__metadata.getList("surname").get(0)
        firstName = self.__metadata.getList("firstName").get(0)
        firstInitial = firstName[0].upper()
        secondName = self.__metadata.getList("secondName")
        if not secondName.isEmpty():
            secondName = secondName.get(0)
        if secondName and secondName != "":
            secondInitial = secondName[0].upper()
            lookupNames.append("%s, %s. %s." %
                               (surname, firstInitial, secondInitial))
            lookupNames.append("%s, %s %s." %
                               (surname, firstName, secondInitial))
            lookupNames.append("%s, %s %s" % (surname, firstName, secondName))
            lookupNames.append("%s %s %s" % (firstName, secondName, surname))
        lookupNames.append("%s, %s." % (surname, firstInitial))
        lookupNames.append("%s, %s" % (surname, firstName))
        lookupNames.append("%s %s" % (firstName, surname))
        query = '" OR dc_title:"'.join(lookupNames)

        # general word search from each part of the name
        parts = [p for p in self.getPackageTitle().split(" ") if len(p) > 0]
        query2 = " OR dc_title:".join(parts)

        req = SearchRequest('(dc_title:"%s")^2.5 OR (dc_title:%s)^0.5' %
                            (query, query2))
        self.log.info("suggestedNames query={}", req.query)
        req.setParam("fq", 'recordtype:"author"')
        req.addParam("fq", 'item_type:"object"')
        req.setParam("rows", "9999")
        req.setParam("fl", "score")
        req.setParam("sort", "score desc")

        # Make sure 'fq' has already been set in the session
        ##security_roles = self.authentication.get_roles_list();
        ##security_query = 'security_filter:("' + '" OR "'.join(security_roles) + '")'
        ##req.addParam("fq", security_query)

        out = ByteArrayOutputStream()
        indexer = self.services.getIndexer()
        indexer.search(req, out)
        result = JsonConfigHelper(ByteArrayInputStream(out.toByteArray()))

        #self.log.info("result={}", result.toString())
        docs = result.getJsonList("response/docs")

        map = LinkedHashMap()
        for doc in docs:
            authorName = doc.getList("dc_title").get(0)
            if map.containsKey(authorName):
                authorDocs = map.get(authorName)
            else:
                authorDocs = ArrayList()
                map.put(authorName, authorDocs)
            authorDocs.add(doc)

        self.__maxScore = max(1.0, float(result.get("response/maxScore")))

        return map