def test_get_index_from_index_path(): index_path_1 = "/something/anserini/foo" index_path_2 = "/foo/bar" index_class = Index.get_index_from_index_path(index_path_1) assert index_class == AnseriniIndex index_class = Index.get_index_from_index_path(index_path_2) assert index_class is None
def get(self, request, *args, **kwargs): index_class = Index.get_index_from_index_path(request.GET["target_index"]) index = index_class(request.GET["target_index"]) doc_id = request.GET["doc_id"] context = {"doc": index.getdoc(doc_id)} return render(request, self.template_name, context)
def get_most_relevant_doc_based_on_config(config, query_string, target_index): """ 1. Instantiate various classes based on config 2. Get the most relevant doc """ # We still need to init a pipeline because it pre-processes some config params, and we rely on that to # construct paths e.t.c. config = config.copy() # because we end up modifying config pipeline = Pipeline(config) pipeline.initialize(config) path_dict = pipeline.get_paths(config) index_path = target_index index_class = Index.get_index_from_index_path(index_path) index = index_class(pipeline.collection, index_path, None) # TODO: Pass a proper index_key model_class = Reranker.ALL[config["reranker"]] tokenizer = NeuralQueryView.get_tokenizer(pipeline, config, index_class.name) embedding_holder = EmbeddingHolder.get_instance(config.get("embeddings", "glove6b")) trained_weight_path = path_dict["trained_weight_path"] config = NeuralQueryView.add_model_required_params_to_config(config, embedding_holder) return NeuralQueryView.do_query( config, query_string, pipeline, index, tokenizer, embedding_holder, model_class, trained_weight_path=trained_weight_path, )
def get(self, request, *args, **kwargs): index_class = Index.get_index_from_index_path( request.GET["target_index"]) # Hack - set the collection and index key as None since we won't need them for the purpose of WSDM demo index = index_class(None, request.GET["target_index"], None) doc_id = request.GET["doc_id"] context = {"doc": index.getdoc(doc_id)} return render(request, self.template_name, context)
def get_bm25_results(query_string, target_index, b, k1): index_class = Index.get_index_from_index_path(target_index) index = index_class(target_index) bm25_kwargs = {"n": NUM_RESULTS_TO_SHOW} if b is not None: bm25_kwargs["b"] = b if k1 is not None: bm25_kwargs["k1"] = k1 doc_ids, docs = BM25View.do_query(query_string, index, **bm25_kwargs) docs = [doc[:250] for doc in docs] collection = Collection.get_collection_from_index_path(index.index_path) relevances = collection.get_relevance(query_string, doc_ids) if collection is not None else [0] * len(doc_ids) result_dicts = NeuralQueryView.construct_result_dicts(doc_ids, docs, relevances) return result_dicts
def test_create_index(tmpdir_as_cache, index_name): provide = {"collection": DummyCollection()} index = Index.create(index_name, provide=provide) assert not index.exists() index.create_index() assert index.exists()