def batch_search(self, queries: List[str], qids: List[str], k: int = 10, threads: int = 1) -> Dict[str, List[JSimpleSearcherResult]]: """Search the collection concurrently for multiple queries, using multiple threads. Parameters ---------- queries : List[str] List of query strings. qids : List[str] List of corresponding query ids. k : int Number of hits to return. threads : int Maximum number of threads to use. Returns ------- Dict[str, List[JSimpleSearcherResult]] Dictionary holding the search results, with the query ids as keys and the corresponding lists of search results as the values. """ query_strings = JArrayList() qid_strings = JArrayList() for query in queries: jq = JString(query.encode('utf8')) query_strings.add(jq) for qid in qids: jqid = JString(qid) qid_strings.add(jqid) results = self.object.batchSearch(query_strings, qid_strings, int(k), int(threads)).entrySet().toArray() return {r.getKey(): r.getValue() for r in results}
def batch_search( self, queries: List[str], qids: List[str], k: int = 10, threads: int = 1, fields=dict()) -> Dict[str, List[JImpactSearcherResult]]: """Search the collection concurrently for multiple queries, using multiple threads. Parameters ---------- queries : List[str] List of query string. qids : List[str] List of corresponding query ids. k : int Number of hits to return. threads : int Maximum number of threads to use. min_idf : int Minimum idf for query tokens fields : dict Optional map of fields to search with associated boosts. Returns ------- Dict[str, List[JImpactSearcherResult]] Dictionary holding the search results, with the query ids as keys and the corresponding lists of search results as the values. """ query_lst = JArrayList() qid_lst = JArrayList() for q in queries: encoded_query = self.query_encoder.encode(q) jquery = JHashMap() for (token, weight) in encoded_query.items(): if token in self.idf and self.idf[token] > self.min_idf: jquery.put(token, JFloat(weight)) query_lst.add(jquery) for qid in qids: jqid = qid qid_lst.add(jqid) jfields = JHashMap() for (field, boost) in fields.items(): jfields.put(field, JFloat(boost)) if not fields: results = self.object.batchSearch(query_lst, qid_lst, int(k), int(threads)) else: results = self.object.batchSearchFields(query_lst, qid_lst, int(k), int(threads), jfields) return {r.getKey(): r.getValue() for r in results.entrySet().toArray()}
def batch_search(self, queries: List[str], qids: List[str], k: int = 10, threads: int = 1, query_generator: JQueryGenerator = None, fields = dict()) -> Dict[str, List[JSimpleSearcherResult]]: """Search the collection concurrently for multiple queries, using multiple threads. Parameters ---------- queries : List[str] List of query strings. qids : List[str] List of corresponding query ids. k : int Number of hits to return. threads : int Maximum number of threads to use. query_generator : JQueryGenerator Generator to build queries. Set to ``None`` by default to use Anserini default. fields : dict Optional map of fields to search with associated boosts. Returns ------- Dict[str, List[JSimpleSearcherResult]] Dictionary holding the search results, with the query ids as keys and the corresponding lists of search results as the values. """ query_strings = JArrayList() qid_strings = JArrayList() for query in queries: jq = JString(query.encode('utf8')) query_strings.add(jq) for qid in qids: jqid = JString(qid) qid_strings.add(jqid) jfields = JHashMap() for (field, boost) in fields.items(): jfields.put(JString(field), JFloat(boost)) if query_generator: if not fields: results = self.object.batchSearch(query_generator, query_strings, qid_strings, int(k), int(threads)) else: results = self.object.batchSearchFields(query_generator, query_strings, qid_strings, int(k), int(threads), jfields) else: if not fields: results = self.object.batchSearch(query_strings, qid_strings, int(k), int(threads)) else: results = self.object.batchSearchFields(query_strings, qid_strings, int(k), int(threads), jfields) return {r.getKey(): r.getValue() for r in results.entrySet().toArray()}