def searchWithRequestAndQuery(cls, query, indexReader, taxoReader, indexingParams, facetRequest): """ Search an index with facets for given query and facet requests. returns a List<FacetResult> """ # prepare searcher to search against searcher = IndexSearcher(indexReader) # collect matching documents into a collector topDocsCollector = TopScoreDocCollector.create(10, True) if not indexingParams: indexingParams = DefaultFacetIndexingParams() # Faceted search parameters indicate which facets are we interested in facetSearchParams = FacetSearchParams(indexingParams) # Add the facet request of interest to the search params facetSearchParams.addFacetRequest(facetRequest) facetsCollector = FacetsCollector(facetSearchParams, indexReader, taxoReader) # perform documents search and facets accumulation searcher.search( query, MultiCollector.wrap([topDocsCollector, facetsCollector])) # Obtain facets results and print them res = facetsCollector.getFacetResults() i = 0 for facetResult in res: print "Result #%d has %d descendants" % ( i, facetResult.getNumValidDescendants()) print "Result #%d : %s" % (i, facetResult) i += 1 return res
def getCrowds(self, query, field = CrowdFields.text): searcher = IndexSearcher(self.index, True) q = QueryParser(Version.LUCENE_CURRENT, field, self.analyzer).parse(query) collector = TopScoreDocCollector.create(hitsPerPage, True) searcher.search(q, collector) hits = collector.topDocs().scoreDocs return [ searcher.doc(scoreDoc.doc).get(CrowdFields.id) for scoreDoc in hits]
def getCrowds(self, query, field=CrowdFields.text): searcher = IndexSearcher(self.index, True) q = QueryParser(Version.LUCENE_CURRENT, field, self.analyzer).parse(query) collector = TopScoreDocCollector.create(hitsPerPage, True) searcher.search(q, collector) hits = collector.topDocs().scoreDocs return [ searcher.doc(scoreDoc.doc).get(CrowdFields.id) for scoreDoc in hits ]
def query(indexName, queryFile, runName): indReader = IndexReader.open(SimpleFSDirectory(File(indexName))) indSearcher = IndexSearcher(indReader) ir = indSearcher.getIndexReader() qp = QueryParser(Version.LUCENE_CURRENT, "content", StandardAnalyzer(Version.LUCENE_CURRENT)) f = open('results-'+runName, 'w') while(True): id = queryFile.readline() if id == "": break id = id.replace("C","") id = id.replace("\n","") queryString = queryFile.readline() queryString = queryString.replace("?","") queryString = queryString.replace("*","") queryString = queryString.replace("-","_") queryString = queryString.replace("\n","") query = qp.parse(queryString) queryFile.readline() returnedDocs = 1000 collector = TopScoreDocCollector.create(returnedDocs, True) indSearcher.search(query, collector) hits = collector.topDocs().scoreDocs size = len(hits) print "Total hits for query " +id+ ": "+str(size) i = 0 for hit in hits: docId = hits[i].doc score = hits[i].score doc = ir.document(docId) j = i + 1 f.write(id + " 0 " + doc.get('id') + " " + str(j) + " " + str(score) +" " + runName +"\n") i+=1 f.close()