def __getSolrData(self): prefix = self.getSearchTerms() if prefix != "": terms = prefix.split(" ") if len(terms)>1: termsQuery = " OR %s" ' OR '.join(terms) else: termsQuery = "" queryValue = "%(prefix)s OR %(prefix)s*%(terms)s" % { "prefix": prefix, "terms": termsQuery } query = 'dc_title:(%(qv)s)^2 OR dc_identifier:(%(qv)s)^0.5' % { "qv": queryValue } else: query = "*:*" portal = self.services.portalManager.get(self.portalId) sq = portal.searchQuery if sq not in ["", "*:*"]: query = query + " AND " + portal.searchQuery req = SearchRequest(query) req.setParam("fq", 'item_type:"object"') if portal.query: req.addParam("fq", portal.query) req.setParam("fl", "score") req.setParam("sort", "score desc, f_dc_title asc") req.setParam("start", self.getStartIndex()) req.setParam("rows", self.getItemsPerPage()) try: out = ByteArrayOutputStream() indexer = self.services.getIndexer() indexer.search(req, out) return SolrResult(ByteArrayInputStream(out.toByteArray())) except Exception, e: self.log.error("Failed to lookup '{}': {}", prefix, e.getMessage())
def __getUsers(self, oid): indexer = Services.getIndexer() req = SearchRequest("id:" + oid) req.setParam("fl", "security_exception,owner") out = ByteArrayOutputStream() indexer.search(req, out) rtJson = "" try: qresult = SolrResult(ByteArrayInputStream( out.toByteArray())).getResults().get(0) owner = qresult.getString(None, 'owner') secException = qresult.getArray('security_exception') if secException is None: secException = JSONArray() self.log.debug("Owner of object: " + owner) self.log.debug("Viewer(s) of object: " + secException.toString()) if secException.contains(owner): secException.remove(owner) return '{"owner":"' + owner + '", "viewers": ' + secException.toString( ) + '}' except Exception, e: self.log.error("Error during query/package ownership data" + str(e))
def numberOfModifiedRecord(self): indexer = self.services.getIndexer() portalQuery = self.services.getPortalManager().get(self.portal.getName()).getQuery() portalSearchQuery = self.services.getPortalManager().get(self.portal.getName()).getSearchQuery() # Security prep work current_user = self.page.authentication.get_username() security_roles = self.page.authentication.get_roles_list() security_filter = 'security_filter:("' + '" OR "'.join(security_roles) + '")' security_exceptions = 'security_exception:"' + current_user + '"' owner_query = 'owner:"' + current_user + '"' security_query = "(" + security_filter + ") OR (" + security_exceptions + ") OR (" + owner_query + ")" req = SearchRequest("modified:true") req.setParam("fq", 'item_type:"object"') if portalQuery: req.addParam("fq", portalQuery) if portalSearchQuery: req.addParam("fq", portalSearchQuery) req.addParam("fq", "") req.setParam("rows", "0") if not self.page.authentication.is_admin(): req.addParam("fq", security_query) out = ByteArrayOutputStream() indexer.search(req, out) self.__result = JsonSimpleConfig(ByteArrayInputStream(out.toByteArray())) return self.__result.getString(None, "response", "numFound")
def checkAprovedRequests(self, provisioned=0, startPage=1): """ A customised query for package type of arms at workflow_step of arms-approved Get a list of requests filtered by provisioning_checklist """ workflowStep = "arms-approved" if self.packageType: req = SearchRequest(self.packageType) else: req = SearchRequest("packageType:arms") req.addParam("fq", 'workflow_step:' + workflowStep) if provisioned: req.addParam("fq", '-provisioning_checklist.4:null') else: req.addParam("fq", 'provisioning_checklist.4:null') req.setParam("sort", "date_object_modified desc, f_dc_title asc") req.setParam("fl","id,dc_title,date-provisioned") out = ByteArrayOutputStream() self.indexer.search(req, out) solrResults = SolrResult(ByteArrayInputStream(out.toByteArray())) if solrResults: results = solrResults.getResults() if results: results = self.mergeEvents(results, ["arms_draft","arms_redraft","arms_review","arms_approved","arms_rejected"]) self._setPaging(results.size()) return results else: return ArrayList()
def findPackagesToTransition(self, fromWorkflowId, fromWorkflowStage): req = SearchRequest("workflow_id:"+fromWorkflowId+" AND _query_:\"workflow_step:"+fromWorkflowStage+"\"") req.setParam("fq", "owner:[* TO *]") req.setParam("fq", "security_filter:[* TO *]") out = ByteArrayOutputStream() self.indexer.search(req, out) solrResult = SolrResult(ByteArrayInputStream(out.toByteArray())) return solrResult.getResults()
def checkRequests(self, checklist_filter=['1'], role_filter='reviewer', exclusive=True, startPage=1): """ A customised query for arms at arms-review Get a list of requests filtered by provisioning_checklist For assessor, it is user based and queries against committee-responses.metadata """ workflowStep = "arms-review" if self.packageType: req = SearchRequest(self.packageType) else: req = SearchRequest("packageType:arms") req.addParam("fq", 'workflow_step:' + workflowStep) for item in ['1','2','3','4']: if item in checklist_filter: req.addParam("fq", '-provisioning_checklist.' + item + ':null' + ' AND provisioning_checklist.' + item + ':[* TO *]') else: if exclusive: # ensure that brand new submissions (not yet saved by reviewer) are also returned req.addParam("fq", 'provisioning_checklist.' + item + ':null' + ' OR (*:* -provisioning_checklist.' + item + ':[* TO *])') req.setParam("sort", "date_object_modified desc, f_dc_title asc") req.setParam("fl",self.returnFields) out = ByteArrayOutputStream() self.indexer.search(req, out) solrResults = SolrResult(ByteArrayInputStream(out.toByteArray())) if solrResults: results = solrResults.getResults() if results: packageResults = results results = self.mergeEvents(packageResults, ["arms_draft","arms_redraft","arms_review","arms_approved","arms_rejected"]) returnArray = JSONArray() if role_filter.startswith('assessor'): if role_filter == 'assessor': query_status = ['new', 'draft'] elif role_filter == 'assessor-assessed': query_status = ['submitted'] x = Assessment() x.activate(self.velocityContext) i = 0 rows = self.recordsPerPage start = (startPage - 1) * self.recordsPerPage for r in results: status = x.queryStatus(r.get("id")) if status in query_status: if i >= start and i - start < rows: if status == 'submitted': assessment_submitted_date = x.queryMyAttr('date') if assessment_submitted_date: r.getJsonObject().put('date', assessment_submitted_date) returnArray.add(r) i = i + 1 else: returnArray = results self._setPaging(returnArray.size()) return returnArray else: return ArrayList()
def getAttachments(self): attachmentType = "review-attachments" req = SearchRequest("attached_to:%s AND attachment_type:%s" % (self.oid, attachmentType)) req.setParam("rows", "1000") out = ByteArrayOutputStream() self.Services.indexer.search(req, out) response = SolrResult(ByteArrayInputStream(out.toByteArray())) return response.getResults()
def findPackagesToTransition(self, fromWorkflowId, fromWorkflowStage): req = SearchRequest("workflow_id:" + fromWorkflowId + " AND _query_:\"workflow_step:" + fromWorkflowStage + "\"") req.setParam("fq", "owner:[* TO *]") req.setParam("fq", "security_filter:[* TO *]") out = ByteArrayOutputStream() self.indexer.search(req, out) solrResult = SolrResult(ByteArrayInputStream(out.toByteArray())) return solrResult.getResults()
def _searchSets(self, indexer, searchType, isAdmin=True, security_query=''): req = SearchRequest("packageType:"+searchType) req.setParam("fq", 'item_type:"object"') req.addParam("fq", "") req.setParam("sort", "last_modified desc, f_dc_title asc"); if not isAdmin: req.addParam("fq", security_query) out = ByteArrayOutputStream() indexer.search(req, out) return SolrResult(ByteArrayInputStream(out.toByteArray()))
def search_solr(self): query = "(rootUri:" if self.rootUriList: query += "(" + " OR ".join(self.rootUriList) + ")" else: query += "\"" + self.rootUri + "\"" if self.type: query += " AND type:\"" + self.type + "\"" query += ")" #print "**********", query req = SearchRequest(query) req.setParam("facet", "false") req.setParam("rows", str(99999)) req.setParam("sort", "dateCreated asc") req.setParam("start", str(0)) #security_roles = page.authentication.get_roles_list(); #security_query = 'security_filter:("' + '" OR "'.join(security_roles) + '")' #req.addParam("fq", security_query) out = ByteArrayOutputStream() Services.indexer.annotateSearch(req, out) result = SolrResult(ByteArrayInputStream( out.toByteArray())).getResults() # Every annotation for this URI if self.type == "http://www.purl.org/anotar/ns/type/0.1#Tag": return self.process_tags(result) else: return self.process_response(result)
def __search(self): indexer = self.services.getIndexer() # Security prep work isAdmin = self.vc("page").authentication.is_admin() if not isAdmin: print "ERROR: User is not an admin '" return None req = SearchRequest('eventType:harvestStart') req.setParam("rows", "100") out = ByteArrayOutputStream() indexer.searchByIndex(req, out, "eventLog") self.__harvestList = SolrResult(ByteArrayInputStream(out.toByteArray()))
def __searchSolr(self): query = "handle:http* AND item_type:object"; req = SearchRequest(query) req.setParam("rows", "99999") req.setParam("fl", "id,dc_title,handle,repository_type,repository_name") req.setParam("sort", "handle asc") req.setParam("facet", "true") req.setParam("facet.field", "repository_type,repository_name") out = ByteArrayOutputStream() self.services.indexer.search(req, out) return SolrResult(ByteArrayInputStream(out.toByteArray()))
def export(self, exportType): exportQuery = "%s:%s" % (self.facetField, self.facetFieldValue) outputType = "text/%s; charset=UTF-8" % type responseHeader = "attachment; filename=%s.%s" % (self.facetFieldValue, exportType) try: out = ByteArrayOutputStream() recnumreq = SearchRequest(exportQuery) recnumreq.setParam("fl","create_timestamp") recnumreq.setParam("rows", "0") self.indexer.search(recnumreq, out) recnumres = SolrResult(ByteArrayInputStream(out.toByteArray())) self.__rowsFoundSolr = "%s" % recnumres.getNumFound() except: self.errorMsg = "Export query failure. The issue has been logged (%s - %s)." % (sys.exc_info()[0], sys.exc_info()[1]) self.log.error("Export query threw an exception (package type was %s): %s - %s" % (self.facetFieldValue, sys.exc_info()[0], sys.exc_info()[1])) return out = ByteArrayOutputStream() req = SearchRequest(exportQuery) req.setParam("wt", exportType) req.setParam("rows", self.__rowsFoundSolr) self.indexer.search(req, out) self.response.setHeader("Content-Disposition", responseHeader) writer = self.response.getPrintWriter(outputType) writer.println(out.toString("UTF-8")) writer.close()
def search_solr(self): query = "(rootUri:" if self.rootUriList: query += "(" + " OR ".join(self.rootUriList) + ")" else: query += '"' + self.rootUri + '"' if self.type: query += ' AND type:"' + self.type + '"' query += ")" # print "**********", query req = SearchRequest(query) req.setParam("facet", "false") req.setParam("rows", str(99999)) req.setParam("sort", "dateCreated asc") req.setParam("start", str(0)) # security_roles = page.authentication.get_roles_list(); # security_query = 'security_filter:("' + '" OR "'.join(security_roles) + '")' # req.addParam("fq", security_query) out = ByteArrayOutputStream() Services.indexer.annotateSearch(req, out) result = SolrResult(ByteArrayInputStream(out.toByteArray())).getResults() # Every annotation for this URI if self.type == "http://www.purl.org/anotar/ns/type/0.1#Tag": return self.process_tags(result) else: return self.process_response(result)
def __getMetadata(self, oid): req = SearchRequest('id:%s' % oid) req.setParam("fq", 'item_type:"object"') # Make sure 'fq' has already been set in the session ##security_roles = self.authentication.get_roles_list(); ##security_query = 'security_filter:("' + '" OR "'.join(security_roles) + '")' ##req.addParam("fq", security_query) out = ByteArrayOutputStream() indexer = self.services.getIndexer() indexer.search(req, out) result = JsonConfigHelper(ByteArrayInputStream(out.toByteArray())) #self.log.info("result={}", result.toString()) return result.getJsonList("response/docs").get(0)
def __searchSolr(self): query = "handle:http* AND item_type:object" req = SearchRequest(query) req.setParam("rows", "99999") req.setParam("fl", "id,dc_title,handle,repository_type,repository_name") req.setParam("sort", "handle asc") req.setParam("facet", "true") req.setParam("facet.field", "repository_type,repository_name") out = ByteArrayOutputStream() self.services.indexer.search(req, out) return SolrResult(ByteArrayInputStream(out.toByteArray()))
def _searchSets(self, packageType, isAdmin=True, security_query='', startPage=1): req = SearchRequest("packageType:"+packageType) req.setParam("rows", str(self.recordsPerPage)) req.setParam("start", str((startPage - 1) * self.recordsPerPage)) req.setParam("fq", 'item_type:"object"') req.addParam("fq", "") req.setParam("sort", "date_object_modified desc, f_dc_title asc") req.setParam("fl",self.returnFields) if not isAdmin: req.addParam("fq", security_query) out = ByteArrayOutputStream() self.indexer.search(req, out) return SolrResult(ByteArrayInputStream(out.toByteArray()))
def _searchSets(self, indexer, searchType, isAdmin=True, security_query=''): req = SearchRequest("packageType:" + searchType) req.setParam("fq", 'item_type:"object"') req.addParam("fq", "") req.setParam("sort", "last_modified desc, f_dc_title asc") if not isAdmin: req.addParam("fq", security_query) out = ByteArrayOutputStream() indexer.search(req, out) return SolrResult(ByteArrayInputStream(out.toByteArray()))
def __search(self): indexer = self.services.getIndexer() # Security prep work isAdmin = self.vc("page").authentication.is_admin() if not isAdmin: print "ERROR: User is not an admin '" return None req = SearchRequest('eventType:harvestStart') req.setParam("rows", "100") out = ByteArrayOutputStream() indexer.searchByIndex(req, out, "eventLog") self.__harvestList = SolrResult(ByteArrayInputStream( out.toByteArray()))
def getSuggestedNames(self): # search common forms lookupNames = [] surname = self.__metadata.getList("surname").get(0) firstName = self.__metadata.getList("firstName").get(0) firstInitial = firstName[0].upper() secondName = self.__metadata.getList("secondName") if not secondName.isEmpty(): secondName = secondName.get(0) if secondName and secondName != "": secondInitial = secondName[0].upper() lookupNames.append("%s, %s. %s." % (surname, firstInitial, secondInitial)) lookupNames.append("%s, %s %s." % (surname, firstName, secondInitial)) lookupNames.append("%s, %s %s" % (surname, firstName, secondName)) lookupNames.append("%s %s %s" % (firstName, secondName, surname)) lookupNames.append("%s, %s." % (surname, firstInitial)) lookupNames.append("%s, %s" % (surname, firstName)) lookupNames.append("%s %s" % (firstName, surname)) query = '" OR dc_title:"'.join(lookupNames) # general word search from each part of the name parts = [p for p in self.getPackageTitle().split(" ") if len(p) > 0] query2 = " OR dc_title:".join(parts) req = SearchRequest('(dc_title:"%s")^2.5 OR (dc_title:%s)^0.5' % (query, query2)) self.log.info("suggestedNames query={}", req.query) req.setParam("fq", 'recordtype:"author"') req.addParam("fq", 'item_type:"object"') req.setParam("rows", "9999") req.setParam("fl", "score") req.setParam("sort", "score desc") # Make sure 'fq' has already been set in the session ##security_roles = self.authentication.get_roles_list(); ##security_query = 'security_filter:("' + '" OR "'.join(security_roles) + '")' ##req.addParam("fq", security_query) out = ByteArrayOutputStream() indexer = self.services.getIndexer() indexer.search(req, out) result = JsonConfigHelper(ByteArrayInputStream(out.toByteArray())) #self.log.info("result={}", result.toString()) docs = result.getJsonList("response/docs") map = LinkedHashMap() for doc in docs: authorName = doc.getList("dc_title").get(0) if map.containsKey(authorName): authorDocs = map.get(authorName) else: authorDocs = ArrayList() map.put(authorName, authorDocs) authorDocs.add(doc) self.__maxScore = max(1.0, float(result.get("response/maxScore"))) return map
def _searchSets(self, startPage=1): req = SearchRequest(self.getQuery()) req.setParam("fq", 'item_type:"object"') req.setParam("rows", str(self.getRecordsPerPage())) req.setParam("start", str((startPage - 1) * self.getRecordsPerPage())) req.addParam("fq", self.getFilterQuery()) req.setParam("fl", self.getReturnFields()) req.setParam("sort", "date_object_modified desc, f_dc_title asc") if not self.isAdmin(): req.addParam("fq", self.getSecurityQuery()) out = ByteArrayOutputStream() self.indexer.search(req, out) result = SolrResult(ByteArrayInputStream(out.toByteArray())) self._setPaging(result.getNumFound()) result.getJsonObject().put("lastPage", str(self.paging.getLastPage())) result.getJsonObject().put("curPage", str(startPage)) return result
def __getAuthorDetails(self, authorIds): query = " OR id:".join(authorIds) req = SearchRequest('id:%s' % query) req.setParam("fq", 'recordtype:"author"') req.addParam("fq", 'item_type:"object"') req.setParam("rows", "9999") # Make sure 'fq' has already been set in the session ##security_roles = self.authentication.get_roles_list(); ##security_query = 'security_filter:("' + '" OR "'.join(security_roles) + '")' ##req.addParam("fq", security_query) out = ByteArrayOutputStream() indexer = self.services.getIndexer() indexer.search(req, out) result = JsonConfigHelper(ByteArrayInputStream(out.toByteArray())) return result.getJsonList("response/docs")
def __getSolrData(self): level = self.getFormData("level", None) if level: if level == "top": query = 'rdf_type:"http://purl.org/asc/1297.0/2008/seo/SEO2"' else: query = 'skos_broader:"%s"' % level else: prefix = self.getSearchTerms() if prefix != "": terms = prefix.split(" ") if len(terms) > 1: termsQuery = " OR %s" ' OR '.join(terms) else: termsQuery = "" queryValue = "%(prefix)s OR %(prefix)s*%(terms)s" % { "prefix": prefix, "terms": termsQuery } query = 'dc_title:(%(qv)s)^2 OR dc_identifier:(%(qv)s)^0.5' % { "qv": queryValue } else: query = "*:*" portal = self.services.portalManager.get(self.portalId) sq = portal.searchQuery if sq not in ["", "*:*"]: query = query + " AND " + portal.searchQuery req = SearchRequest(query) req.setParam("fq", 'item_type:"object"') if portal.query: req.addParam("fq", portal.query) req.setParam("fl", "score") req.setParam("sort", "score desc, f_dc_title asc") req.setParam("start", self.getStartIndex()) req.setParam("rows", self.getItemsPerPage()) try: out = ByteArrayOutputStream() indexer = self.services.getIndexer() indexer.search(req, out) return SolrResult(ByteArrayInputStream(out.toByteArray())) except Exception, e: self.log.error("Failed to lookup '{}': {}", prefix, e.getMessage())
def search_solr(self): # Build our solr query readyForNla = "ready_for_nla:ready" nlaPidExists = "nlaId:http*" query = readyForNla + " AND NOT " + nlaPidExists # Prepare the query req = SearchRequest(query) req.setParam("facet", "false") req.setParam("rows", "20") # Run the query try: out = ByteArrayOutputStream() self.services.getIndexer().search(req, out) return SolrResult(ByteArrayInputStream(out.toByteArray())) except Exception, e: self.log.error("Error searching solr: ", e) self.throw_error("failure searching solr: " + e.getMessage()) return None
def __buildSearch(self, query): req = SearchRequest(query) req.setParam("rows", str(self.rowsPerQuery)) req.setParam("fl", "*") req.setParam("fq", 'item_type:"object"') # The portal filter query portal = self.__getPortal() if portal.query != "": req.setParam("fq", portal.query) return req
def getAuthorities(self): req = SearchRequest('package_node_id:%s' % self.metadata.get("id")) req.setParam("fq", 'recordtype:"master"') req.addParam("fq", 'item_type:"object"') req.setParam("rows", "9999") # Make sure 'fq' has already been set in the session ##security_roles = self.authentication.get_roles_list(); ##security_query = 'security_filter:("' + '" OR "'.join(security_roles) + '")' ##req.addParam("fq", security_query) out = ByteArrayOutputStream() indexer = self.services.getIndexer() indexer.search(req, out) result = JsonConfigHelper(ByteArrayInputStream(out.toByteArray())) docs = result.getJsonList("response/docs") return docs
def getAuthorities(self): req = SearchRequest("package_node_id:%s" % self.metadata.get("id")) req.setParam("fq", 'recordtype:"master"') req.addParam("fq", 'item_type:"object"') req.setParam("rows", "9999") # Make sure 'fq' has already been set in the session ##security_roles = self.authentication.get_roles_list(); ##security_query = 'security_filter:("' + '" OR "'.join(security_roles) + '")' ##req.addParam("fq", security_query) out = ByteArrayOutputStream() indexer = self.services.getIndexer() indexer.search(req, out) result = JsonConfigHelper(ByteArrayInputStream(out.toByteArray())) docs = result.getJsonList("response/docs") return docs
def _searchSets(self, searchType, isAdmin=True, security_query='', startPage=1): req = SearchRequest("packageType:" + searchType) req.setParam("rows", str(self.recordsPerPage)) req.setParam("start", str((startPage - 1) * self.recordsPerPage)) req.setParam("fq", 'item_type:"object"') req.addParam("fq", "") req.setParam("sort", "last_modified desc, f_dc_title asc") req.setParam("fl", self.returnFields) if not isAdmin: req.addParam("fq", security_query) out = ByteArrayOutputStream() self.indexer.search(req, out) return SolrResult(ByteArrayInputStream(out.toByteArray()))
def __activate__(self, context): formData = context["formData"] services = context["Services"] response = context["response"] # Prepare a query q = formData.get("q") if q is not None and q != "": query = "(item_type:object AND " + self.titleTokens( q.strip()) + ")" else: query = "item_type:object" # Can't link to yourself AND we're not interested in attachments oid = formData.get("qs") query += " AND -storage_id:\"" + oid + "\"" # And we're not interested in attachments query += " AND display_type:\"package-dataset\"" req = SearchRequest(query) req.setParam("fl", "dc_title,storage_id,pidProperty") limit = formData.get("limit") if limit is None: limit = 10 req.setParam("rows", limit) # Search Solr indexer = services.getIndexer() out = ByteArrayOutputStream() indexer.search(req, out) result = SolrResult(ByteArrayInputStream(out.toByteArray())) # Build a response list = [] for doc in result.getResults(): title = doc.getFirst("dc_title") #oid = doc.getFirst("storage_id") oid = doc.getFirst("pidProperty") list.append("%s::%s" % (oid, title)) result = "\n".join(list) writer = response.getPrintWriter("text/plain; charset=UTF-8") writer.println(result) writer.close()
def __getSolrData(self): query = "*:*" req = SearchRequest(query) req.setParam("fq", 'item_type:"object"') req.addParam("fq", 'repository_type:"Parties"') req.addParam("fq", 'repository_name:"People"') req.addParam("fq", 'ready_for_nla:"ready"') req.addParam("fq", "-nlaId:[* TO *]") req.setParam("fl", "score") req.setParam("sort", "score desc, f_dc_title asc") req.setParam("start", "0") req.setParam("rows", "99999") try: out = ByteArrayOutputStream() indexer = self.services.getIndexer() indexer.search(req, out) return SolrResult(ByteArrayInputStream(out.toByteArray())) except Exception, e: self.log.error("Failed to lookup '{}': {}", prefix, e.getMessage())
def getAttachedFiles(self, oid): # Build a query req = SearchRequest("attached_to:%s" % oid) req.setParam("rows", "1000") # Run a search out = ByteArrayOutputStream() self.Services.getIndexer().search(req, out) result = SolrResult(ByteArrayInputStream(out.toByteArray())) # Process results docs = JSONArray() for doc in result.getResults(): attachmentType = self.escapeHtml(WordUtils.capitalizeFully(doc.getFirst("attachment_type").replace("-", " "))) accessRights = self.escapeHtml(WordUtils.capitalizeFully(doc.getFirst("access_rights"))) entry = JsonObject() entry.put("filename", self.escapeHtml(doc.getFirst("filename"))) entry.put("attachment_type", attachmentType) entry.put("access_rights", accessRights) entry.put("id", self.escapeHtml(doc.getFirst("id"))) docs.add(entry) return docs
def __activate__(self, context): formData = context["formData"] services = context["Services"] response = context["response"] # Prepare a query q = formData.get("q") if q is not None and q != "": query = "(item_type:object AND " + self.titleTokens(q.strip()) + ")" else: query = "item_type:object" # Can't link to yourself AND we're not interested in attachments oid = formData.get("qs") query += " AND -storage_id:\""+oid+"\"" # And we're not interested in attachments query += " AND display_type:\"package-dataset\"" req = SearchRequest(query) req.setParam("fl", "dc_title,storage_id,pidProperty") limit = formData.get("limit") if limit is None: limit = 10 req.setParam("rows", limit) # Search Solr indexer = services.getIndexer() out = ByteArrayOutputStream() indexer.search(req, out) result = SolrResult(ByteArrayInputStream(out.toByteArray())) # Build a response list = [] for doc in result.getResults(): title = doc.getFirst("dc_title") #oid = doc.getFirst("storage_id") oid = doc.getFirst("pidProperty") list.append("%s::%s" % (oid, title)) result = "\n".join(list) writer = response.getPrintWriter("text/plain; charset=UTF-8") writer.println(result) writer.close()
def getViewers(self, oid): indexer = self.services.getIndexer() req = SearchRequest("id:" + oid) req.setParam("fl", "security_exception,owner") out = ByteArrayOutputStream() indexer.search(req, out) try: qresult = SolrResult(ByteArrayInputStream(out.toByteArray())).getResults().get(0) owner = qresult.getString(None, 'owner') secException = qresult.getArray('security_exception') if secException is None: secException = JSONArray() self.log.debug("Owner of object: " + owner) self.log.debug("Viewer(s) of object: " + secException.toString()) if secException.contains(owner): secException.remove(owner) return secException except Exception, e: self.log.error("Error during query/package ownership data" + str(e))
def handleGrantNumber(self): out = ByteArrayOutputStream() req = SearchRequest("grant_numbers:%s*" % self.term) req.setParam("fq", 'item_type:"object"') req.setParam("fq", 'workflow_id:"dataset"') req.setParam("rows", "1000") self.indexer.search(req, out) res = SolrResult(ByteArrayInputStream(out.toByteArray())) hits = HashSet() if (res.getNumFound() > 0): creatorResults = res.getResults() for creatorRes in creatorResults: creatorList = creatorRes.getList("grant_numbers") if (creatorList.isEmpty()==False): for hit in creatorList: hits.add(hit) self.writer.print("[") hitnum = 0 for hit in hits: if (hitnum > 0): self.writer.print(",\"%s\"" % hit) else: self.writer.print("\"%s\"" % hit) hitnum += 1 self.writer.print("]") else: self.writer.println("[\"\"]") self.writer.close()
def handleQuery(self, query, fieldName, formatStr): out = ByteArrayOutputStream() req = SearchRequest(query) req.setParam("fq", 'item_type:"object"') req.setParam("fq", 'workflow_id:"dataset"') req.setParam("rows", "1000") self.indexer.search(req, out) res = SolrResult(ByteArrayInputStream(out.toByteArray())) hits = HashSet() if (res.getNumFound() > 0): results = res.getResults() for searchRes in results: searchResList = searchRes.getList(fieldName) if (searchResList.isEmpty()==False): for hit in searchResList: if self.term is not None: if hit.find(self.term) != -1: hits.add(hit) else: hits.add(hit) self.writer.print("[") hitnum = 0 for hit in hits: if (hitnum > 0): self.writer.print(","+formatStr % {"hit":hit}) else: self.writer.print(formatStr % {"hit":hit}) hitnum += 1 self.writer.print("]") else: self.writer.println("[\"\"]") self.writer.close()
def handleWorkflowStep(self): out = ByteArrayOutputStream() req = SearchRequest("workflow_step_label:[* TO *]" ) req.setParam("fq", 'item_type:"object"') req.setParam("fq", 'workflow_id:"dataset"') req.setParam("rows", "1000") self.indexer.search(req, out) res = SolrResult(ByteArrayInputStream(out.toByteArray())) hits = HashSet() if (res.getNumFound() > 0): recordTypeResults = res.getResults() for recordTypeResult in recordTypeResults: recordTypeList = recordTypeResult.getList("workflow_step_label") if (recordTypeList.isEmpty()==False): for hit in recordTypeList: hits.add(hit) self.writer.println("[") hitnum = 0 for hit in hits: if (hitnum > 0): self.writer.println(",{\"value\": \"%s\",\n\"label\": \"%s\"}" % (hit,hit)) else: self.writer.println("{\"value\": \"%s\",\n\"label\": \"%s\"}" % (hit,hit)) hitnum += 1 self.writer.println("]") else: self.writer.println("[\"\"]") self.writer.close()
def handleQuery(self, query, fieldName, formatStr): out = ByteArrayOutputStream() req = SearchRequest(query) req.setParam("fq", 'item_type:"object"') req.setParam("fq", 'workflow_id:"dataset"') req.setParam("rows", "1000") self.indexer.search(req, out) res = SolrResult(ByteArrayInputStream(out.toByteArray())) hits = HashSet() if (res.getNumFound() > 0): results = res.getResults() for searchRes in results: searchResList = searchRes.getList(fieldName) if (searchResList.isEmpty() == False): for hit in searchResList: if self.term is not None: if hit.find(self.term) != -1: hits.add(hit) else: hits.add(hit) self.writer.print("[") hitnum = 0 for hit in hits: if (hitnum > 0): self.writer.print("," + formatStr % {"hit": hit}) else: self.writer.print(formatStr % {"hit": hit}) hitnum += 1 self.writer.print("]") else: self.writer.println("[\"\"]") self.writer.close()
def _searchEmbargoes(self): req = SearchRequest("item_type:object") req.setParam("fq", 'redbox\:embargo.redbox\:isEmbargoed:on') req.addParam("fq", 'workflow_step:final-review') req.addParam("fq", "") req.setParam("fl","id,date_embargoed,dc_title") req.setParam("rows", "25") req.setParam("sort", "date_embargoed asc, dc_title asc"); out = ByteArrayOutputStream() indexer = Services.getIndexer() indexer.search(req, out) self.__embargoes = SolrResult(ByteArrayInputStream(out.toByteArray())) self.velocityContext["log"].info("searchEmbargoes call ended" + str(self.__embargoes))
def __getSolrData(self): prefix = self.getSearchTerms() if prefix != "": terms = prefix.split(" ") if len(terms) > 1: termsQuery = " OR %s" ' OR '.join(terms) else: termsQuery = "" queryValue = "%(prefix)s OR %(prefix)s*%(terms)s" % { "prefix": prefix, "terms": termsQuery } query = 'dc_title:(%(qv)s)^0.5 OR grant_number:(%(qvu)s)^2' % { "qv": queryValue, "qvu": queryValue.upper() } else: query = "*:*" portal = self.services.portalManager.get(self.portalId) sq = portal.searchQuery if sq not in ["", "*:*"]: query = query + " AND " + portal.searchQuery req = SearchRequest(query) req.setParam("fq", 'item_type:"object"') if portal.query: req.addParam("fq", portal.query) req.setParam("fl", "score") req.setParam("sort", "score desc, f_dc_title asc") req.setParam("start", self.getStartIndex()) req.setParam("rows", self.getItemsPerPage()) try: out = ByteArrayOutputStream() indexer = self.services.getIndexer() indexer.search(req, out) return SolrResult(ByteArrayInputStream(out.toByteArray())) except Exception, e: self.log.error("Failed to lookup '{}': {}", prefix, e.getMessage())
def getAttachedFiles(self, oid): # Build a query req = SearchRequest("attached_to:%s" % oid) req.setParam("rows", "1000") # Run a search out = ByteArrayOutputStream() self.Services.getIndexer().search(req, out) result = SolrResult(ByteArrayInputStream(out.toByteArray())) # Process results docs = JSONArray() for doc in result.getResults(): attachmentType = self.escapeHtml( WordUtils.capitalizeFully( doc.getFirst("attachment_type").replace("-", " "))) accessRights = self.escapeHtml( WordUtils.capitalizeFully(doc.getFirst("access_rights"))) entry = JsonObject() entry.put("filename", self.escapeHtml(doc.getFirst("filename"))) entry.put("attachment_type", attachmentType) entry.put("access_rights", accessRights) entry.put("id", self.escapeHtml(doc.getFirst("id"))) docs.add(entry) return docs
def __activate__(self, context): formData = context["formData"] services = context["Services"] response = context["response"] query = "keywords:[* TO *]" q = formData.get("q") if q: query += " AND keywords:(%(q)s OR %(q)s*)" % { "q": q } req = SearchRequest(query) req.setParam("fl", "keywords") req.setParam("rows", "50") keywords = TreeSet() indexer = services.getIndexer() out = ByteArrayOutputStream() indexer.search(req, out) result = SolrResult(ByteArrayInputStream(out.toByteArray())) for doc in result.getResults(): for keyword in doc.getList("keywords"): if keyword.startswith(q): keywords.add(keyword) writer = response.getPrintWriter("text/plain; charset=UTF-8") writer.println("\n".join(keywords)) writer.close()
def __searchDataSetOids(self, oids): query_ids = "storage_id:" try: if (len(oids) > 1): query_ids += "(" + oids[0].get('oid') for oid in oids[1:]: query_ids += " OR " + oid.get('oid') query_ids += ")" else: query_ids = oids[0].get('oid') self.log.debug("related.datasets: query_ids = {}", query_ids) req = SearchRequest(query_ids) req.setParam("fq", 'item_type:"object"') req.addParam("fq", "") req.setParam("sort", "last_modified desc, f_dc_title asc"); # FIXME: security? out = ByteArrayOutputStream() self.indexer.search(req, out) return SolrResult(ByteArrayInputStream(out.toByteArray())) except: return None
def __searchDataSetOids(self, oids): query_ids = "storage_id:" try: if (len(oids) > 1): query_ids += "(" + oids[0].get('oid') for oid in oids[1:]: query_ids += " OR " + oid.get('oid') query_ids += ")" else: query_ids = oids[0].get('oid') self.log.debug("related.datasets: query_ids = {}", query_ids) req = SearchRequest(query_ids) req.setParam("fq", 'item_type:"object"') req.addParam("fq", "") req.setParam("sort", "last_modified desc, f_dc_title asc") # FIXME: security? out = ByteArrayOutputStream() self.indexer.search(req, out) return SolrResult(ByteArrayInputStream(out.toByteArray())) except: return None
def findPackagesToPurge(self,packageType): req = SearchRequest("display_type:"+packageType +" AND date_object_created:[* TO NOW-7DAY]") req.setParam("fq", "owner:[* TO *]") req.setParam("fq", "security_filter:[* TO *]") req.setParam("fl", "storage_id,date_object_created,date_object_modified") out = ByteArrayOutputStream() self.indexer.search(req, out) solrResult = SolrResult(ByteArrayInputStream(out.toByteArray())) return solrResult.getResults()
def findPackagesToPurge(self, packageType): req = SearchRequest("display_type:" + packageType + " AND date_object_created:[* TO NOW-7DAY]") req.setParam("fq", "owner:[* TO *]") req.setParam("fq", "security_filter:[* TO *]") req.setParam("fl", "storage_id,date_object_created,date_object_modified") out = ByteArrayOutputStream() self.indexer.search(req, out) solrResult = SolrResult(ByteArrayInputStream(out.toByteArray())) return solrResult.getResults()
def _searchStage(self, stage, startPage=1): req = SearchRequest("packageType:arms") req.setParam("rows", str(self.recordsPerPage)) req.setParam("start", str((startPage - 1) * self.recordsPerPage)) if ',' in stage: stages = stage.split(',') for s in stages: s = "workflow_step:" + s req.addParam("fq", " OR ".join(stages)) # print "Searching stages %s" % " OR ".join(stages) else: req.addParam("fq", 'workflow_step:' + stage) req.setParam("sort", "last_modified desc, f_dc_title asc") req.setParam("fl", self.returnFields) out = ByteArrayOutputStream() self.indexer.search(req, out) return SolrResult(ByteArrayInputStream(out.toByteArray()))
def getFacetFields(self): try: out = ByteArrayOutputStream() req = SearchRequest("*:*") req.setParam("fl","facet_counts") req.setParam("facet", "on") req.setParam("facet.field", self.facetField) req.setParam("wt", "json") self.indexer.search(req, out) res = SolrResult(ByteArrayInputStream(out.toByteArray())) facets = res.getFacets() facet = facets.get(self.facetField) if facet is not None and facet.values().size() > 0: self.facetFields = facet.values() else: self.errorMsg = "No facet field values to export. Please enter/harvest some data first." except: self.errorMsg = "Get facet field query failure. The issue has been logged (%s - %s)." % (sys.exc_info()[0], sys.exc_info()[1]) self.log.error("Get facet field threw an exception : %s - %s" % (sys.exc_info()[0], sys.exc_info()[1])) return
def getSearchTerms(self): searchTerms = [] prefix = self.getSuggestionPrefix() query = '%(prefix)s OR %(prefix)s*' % {"prefix": prefix} req = SearchRequest(query) req.addParam("fq", self.page.getPortal().getQuery()) req.addParam("fq", 'item_type:"object"') req.setParam("rows", "50") req.setParam("fl", "score,id,dc_title") req.setParam("sort", "score desc") out = ByteArrayOutputStream() indexer = self.services.getIndexer() indexer.search(req, out) result = JsonConfigHelper(ByteArrayInputStream(out.toByteArray())) docs = result.getJsonList("response/docs") for doc in docs: dc_title = doc.getList("dc_title").get(0) searchTerms.append(dc_title) return '", "'.join(searchTerms)
def __search(self): indexer = Services.getIndexer() portalQuery = Services.getPortalManager().get( self.vc("portalId")).getQuery() portalSearchQuery = Services.getPortalManager().get( self.vc("portalId")).getSearchQuery() # Security prep work current_user = self.vc("page").authentication.get_username() security_roles = self.vc("page").authentication.get_roles_list() security_filter = 'security_filter:("' + '" OR "'.join( security_roles) + '")' security_exceptions = 'security_exception:"' + current_user + '"' owner_query = 'owner:"' + current_user + '"' security_query = "(" + security_filter + ") OR (" + security_exceptions + ") OR (" + owner_query + ")" isAdmin = self.vc("page").authentication.is_admin() req = SearchRequest("*:*") req.setParam("fq", 'item_type:"object"') if portalQuery: req.addParam("fq", portalQuery) if portalSearchQuery: req.addParam("fq", portalSearchQuery) req.addParam("fq", "") req.setParam("rows", "0") req.setParam("facet", "true") req.setParam("facet.field", "workflow_step") if not isAdmin: req.addParam("fq", security_query) out = ByteArrayOutputStream() indexer.search(req, out) steps = SolrResult(ByteArrayInputStream(out.toByteArray())) self.__steps = steps.getFacets().get("workflow_step") wfConfig = JsonSimple( FascinatorHome.getPathFile("harvest/workflows/dataset.json")) jsonStageList = wfConfig.getJsonSimpleList(["stages"]) stages = [] for jsonStage in jsonStageList: wfStage = WorkflowStage(jsonStage, self.__steps) stages.append(wfStage) self.__stages = stages req = SearchRequest("*:*") req.setParam("fq", 'item_type:"object"') if portalQuery: req.addParam("fq", portalQuery) if portalSearchQuery: req.addParam("fq", portalSearchQuery) req.addParam("fq", "") req.setParam("rows", "25") req.setParam("sort", "last_modified desc, f_dc_title asc") if not isAdmin: req.addParam("fq", security_query) out = ByteArrayOutputStream() indexer.search(req, out) self.__result = SolrResult(ByteArrayInputStream(out.toByteArray())) req.addParam("fq", "workflow_step:%s" % stages[0].getName()) out = ByteArrayOutputStream() indexer.search(req, out) self.__alerts = SolrResult(ByteArrayInputStream(out.toByteArray())) req = SearchRequest( "last_modified:[NOW-1MONTH TO *] AND workflow_step:live") req.setParam("fq", 'item_type:"object"') if portalQuery: req.addParam("fq", portalQuery) if portalSearchQuery: req.addParam("fq", portalSearchQuery) req.setParam("rows", "10") req.setParam("sort", "last_modified desc, f_dc_title asc") if not isAdmin: req.addParam("fq", security_query) out = ByteArrayOutputStream() indexer.search(req, out) self.__latest = SolrResult(ByteArrayInputStream(out.toByteArray())) self.vc("sessionState").set("fq", 'item_type:"object"')