def check_dense(index):
    # dummy queries; there is no explicit validation...
    # we just try to initialize the and make sure there are no exceptions
    dummy_queries = QueryEncoder.load_encoded_queries(
        'tct_colbert-msmarco-passage-dev-subset')
    print('\n')
    for entry in index:
        print(f'# Validating "{entry}"...')
        if "bpr" in entry:
            BinaryDenseSearcher.from_prebuilt_index(entry, dummy_queries)
        else:
            SimpleDenseSearcher.from_prebuilt_index(entry, dummy_queries)
        print('\n')
Пример #2
0
    def do_model(self, arg):
        if arg == "tct":
            encoder = TctColBertQueryEncoder("castorini/tct_colbert-msmarco")
            index = "msmarco-passage-tct_colbert-hnsw"
        elif arg == "ance":
            encoder = AnceQueryEncoder("castorini/ance-msmarco-passage")
            index = "msmarco-passage-ance-bf"
        else:
            print(
                f'Model "{arg}" is invalid. Model should be one of [tct, ance].'
            )
            return

        self.dsearcher = SimpleDenseSearcher.from_prebuilt_index(
            index, encoder)
        self.hsearcher = HybridSearcher(self.dsearcher, self.ssearcher)
        print(f'setting model = {arg}')
Пример #3
0
        exit()

    query_encoder = init_query_encoder(args.dense.encoder,
                                       args.run.topics,
                                       args.dense.encoded_queries,
                                       args.dense.device)
    if not query_encoder:
        print(f'No encoded queries for topic {args.run.topics}')
        exit()

    if os.path.exists(args.dense.index):
        # create searcher from index directory
        dsearcher = SimpleDenseSearcher(args.dense.index, query_encoder)
    else:
        # create searcher from prebuilt index name
        dsearcher = SimpleDenseSearcher.from_prebuilt_index(args.dense.index, query_encoder)

    if not dsearcher:
        exit()

    if os.path.exists(args.sparse.index):
        # create searcher from index directory
        ssearcher = SimpleSearcher(args.sparse.index)
    else:
        # create searcher from prebuilt index name
        ssearcher = SimpleSearcher.from_prebuilt_index(args.sparse.index)

    if not ssearcher:
        exit()

    set_bm25_parameters(ssearcher, args.sparse.index, args.sparse.k1, args.sparse.b)
Пример #4
0
            corpus = SimpleSearcher.from_prebuilt_index(args.retriever_corpus)
            obqa = OpenBookQA(reader, retriever, corpus)
            # run a warm up question
            obqa.predict('what is lobster roll')
            while True:
                question = input('Enter a question: ')
                answer = obqa.predict(question)
                answer_text = answer["answer"]
                answer_context = answer["context"]["text"]
                print(f"Answer:\t {answer_text}")
                print(f"Context:\t {answer_context}")
        elif args.qa_reader == 'fid':
            reader = FidReader(model_name=args.reader_model, device=args.reader_device)
            if args.retriever_model:
                # retriever = SimpleDenseSearcher(args.retriever_index, DkrrDprQueryEncoder(args.retriever_model))
                retriever = SimpleDenseSearcher.from_prebuilt_index(args.retriever_index, DkrrDprQueryEncoder(args.retriever_model))
            else:
                retriever = SimpleSearcher.from_prebuilt_index(args.retriever_corpus)
            corpus = SimpleSearcher.from_prebuilt_index(args.retriever_corpus)
            obqa = OpenBookQA(reader, retriever, corpus)
            # run a warm up question
            obqa.predict('what is lobster roll', 100, args.query, 'fid')
            while True:
                question = input('Enter a question: ')
                answer = obqa.predict(question, 100, args.query, 'fid')
                print(f"Answer:\t {answer}")

    else:
        cbqa = ClosedBookQA(args.cbqa_model, args.cbqa_device)
        # run a warm up question
        cbqa.predict('what is lobster roll')
Пример #5
0
class DPRDemo(cmd.Cmd):
    nq_dev_topics = list(search.get_topics('dpr-nq-dev').values())
    trivia_dev_topics = list(search.get_topics('dpr-trivia-dev').values())

    ssearcher = SimpleSearcher.from_prebuilt_index('wikipedia-dpr')
    searcher = ssearcher

    encoder = DprQueryEncoder("facebook/dpr-question_encoder-multiset-base")
    index = 'wikipedia-dpr-multi-bf'
    dsearcher = SimpleDenseSearcher.from_prebuilt_index(
        index,
        encoder
    )
    hsearcher = HybridSearcher(dsearcher, ssearcher)

    k = 10
    prompt = '>>> '

    def precmd(self, line):
        if line[0] == '/':
            line = line[1:]
        return line

    def do_help(self, arg):
        print(f'/help    : returns this message')
        print(f'/k [NUM] : sets k (number of hits to return) to [NUM]')
        print(f'/mode [MODE] : sets retriver type to [MODE] (one of sparse, dense, hybrid)')
        print(f'/random [COLLECTION]: returns results for a random question from the dev subset [COLLECTION] (one of nq, trivia).')

    def do_k(self, arg):
        print(f'setting k = {int(arg)}')
        self.k = int(arg)

    def do_mode(self, arg):
        if arg == "sparse":
            self.searcher = self.ssearcher
        elif arg == "dense":
            self.searcher = self.dsearcher
        elif arg == "hybrid":
            self.searcher = self.hsearcher
        else:
            print(
                f'Mode "{arg}" is invalid. Mode should be one of [sparse, dense, hybrid].')
            return
        print(f'setting retriver = {arg}')

    def do_random(self, arg):
        if arg == "nq":
            topics = self.nq_dev_topics
        elif arg == "trivia":
            topics = self.trivia_dev_topics
        else:
            print(
                f'Collection "{arg}" is invalid. Collection should be one of [nq, trivia].')
            return
        q = random.choice(topics)['title']
        print(f'question: {q}')
        self.default(q)

    def do_EOF(self, line):
        return True

    def default(self, q):
        hits = self.searcher.search(q, self.k)

        for i in range(0, len(hits)):
            raw_doc = None
            if isinstance(self.searcher, SimpleSearcher):
                raw_doc = hits[i].raw
            else:
                doc = self.searcher.doc(hits[i].docid)
                if doc:
                    raw_doc = doc.raw()
            jsondoc = json.loads(raw_doc)
            print(f'{i + 1:2} {hits[i].score:.5f} {jsondoc["contents"]}')