def check_dense(index): # dummy queries; there is no explicit validation... # we just try to initialize the and make sure there are no exceptions dummy_queries = QueryEncoder.load_encoded_queries( 'tct_colbert-msmarco-passage-dev-subset') print('\n') for entry in index: print(f'# Validating "{entry}"...') if "bpr" in entry: BinaryDenseSearcher.from_prebuilt_index(entry, dummy_queries) else: SimpleDenseSearcher.from_prebuilt_index(entry, dummy_queries) print('\n')
def do_model(self, arg): if arg == "tct": encoder = TctColBertQueryEncoder("castorini/tct_colbert-msmarco") index = "msmarco-passage-tct_colbert-hnsw" elif arg == "ance": encoder = AnceQueryEncoder("castorini/ance-msmarco-passage") index = "msmarco-passage-ance-bf" else: print( f'Model "{arg}" is invalid. Model should be one of [tct, ance].' ) return self.dsearcher = SimpleDenseSearcher.from_prebuilt_index( index, encoder) self.hsearcher = HybridSearcher(self.dsearcher, self.ssearcher) print(f'setting model = {arg}')
exit() query_encoder = init_query_encoder(args.dense.encoder, args.run.topics, args.dense.encoded_queries, args.dense.device) if not query_encoder: print(f'No encoded queries for topic {args.run.topics}') exit() if os.path.exists(args.dense.index): # create searcher from index directory dsearcher = SimpleDenseSearcher(args.dense.index, query_encoder) else: # create searcher from prebuilt index name dsearcher = SimpleDenseSearcher.from_prebuilt_index(args.dense.index, query_encoder) if not dsearcher: exit() if os.path.exists(args.sparse.index): # create searcher from index directory ssearcher = SimpleSearcher(args.sparse.index) else: # create searcher from prebuilt index name ssearcher = SimpleSearcher.from_prebuilt_index(args.sparse.index) if not ssearcher: exit() set_bm25_parameters(ssearcher, args.sparse.index, args.sparse.k1, args.sparse.b)
corpus = SimpleSearcher.from_prebuilt_index(args.retriever_corpus) obqa = OpenBookQA(reader, retriever, corpus) # run a warm up question obqa.predict('what is lobster roll') while True: question = input('Enter a question: ') answer = obqa.predict(question) answer_text = answer["answer"] answer_context = answer["context"]["text"] print(f"Answer:\t {answer_text}") print(f"Context:\t {answer_context}") elif args.qa_reader == 'fid': reader = FidReader(model_name=args.reader_model, device=args.reader_device) if args.retriever_model: # retriever = SimpleDenseSearcher(args.retriever_index, DkrrDprQueryEncoder(args.retriever_model)) retriever = SimpleDenseSearcher.from_prebuilt_index(args.retriever_index, DkrrDprQueryEncoder(args.retriever_model)) else: retriever = SimpleSearcher.from_prebuilt_index(args.retriever_corpus) corpus = SimpleSearcher.from_prebuilt_index(args.retriever_corpus) obqa = OpenBookQA(reader, retriever, corpus) # run a warm up question obqa.predict('what is lobster roll', 100, args.query, 'fid') while True: question = input('Enter a question: ') answer = obqa.predict(question, 100, args.query, 'fid') print(f"Answer:\t {answer}") else: cbqa = ClosedBookQA(args.cbqa_model, args.cbqa_device) # run a warm up question cbqa.predict('what is lobster roll')
class DPRDemo(cmd.Cmd): nq_dev_topics = list(search.get_topics('dpr-nq-dev').values()) trivia_dev_topics = list(search.get_topics('dpr-trivia-dev').values()) ssearcher = SimpleSearcher.from_prebuilt_index('wikipedia-dpr') searcher = ssearcher encoder = DprQueryEncoder("facebook/dpr-question_encoder-multiset-base") index = 'wikipedia-dpr-multi-bf' dsearcher = SimpleDenseSearcher.from_prebuilt_index( index, encoder ) hsearcher = HybridSearcher(dsearcher, ssearcher) k = 10 prompt = '>>> ' def precmd(self, line): if line[0] == '/': line = line[1:] return line def do_help(self, arg): print(f'/help : returns this message') print(f'/k [NUM] : sets k (number of hits to return) to [NUM]') print(f'/mode [MODE] : sets retriver type to [MODE] (one of sparse, dense, hybrid)') print(f'/random [COLLECTION]: returns results for a random question from the dev subset [COLLECTION] (one of nq, trivia).') def do_k(self, arg): print(f'setting k = {int(arg)}') self.k = int(arg) def do_mode(self, arg): if arg == "sparse": self.searcher = self.ssearcher elif arg == "dense": self.searcher = self.dsearcher elif arg == "hybrid": self.searcher = self.hsearcher else: print( f'Mode "{arg}" is invalid. Mode should be one of [sparse, dense, hybrid].') return print(f'setting retriver = {arg}') def do_random(self, arg): if arg == "nq": topics = self.nq_dev_topics elif arg == "trivia": topics = self.trivia_dev_topics else: print( f'Collection "{arg}" is invalid. Collection should be one of [nq, trivia].') return q = random.choice(topics)['title'] print(f'question: {q}') self.default(q) def do_EOF(self, line): return True def default(self, q): hits = self.searcher.search(q, self.k) for i in range(0, len(hits)): raw_doc = None if isinstance(self.searcher, SimpleSearcher): raw_doc = hits[i].raw else: doc = self.searcher.doc(hits[i].docid) if doc: raw_doc = doc.raw() jsondoc = json.loads(raw_doc) print(f'{i + 1:2} {hits[i].score:.5f} {jsondoc["contents"]}')