Exemplo n.º 1
0
    def _init(self, loader: ResourceLoader, voc: Iterable[str]):
        # TODO we should not be building variables here
        if voc is not None:
            word_to_vec = loader.load_word_vec(self.vec_name, voc)
        else:
            word_to_vec = loader.load_word_vec(self.vec_name)
            voc = set(word_to_vec.keys())

        self._word_to_ix = {}

        dim = next(iter(word_to_vec.values())).shape[0]

        null_embed = tf.zeros((1, dim), dtype=tf.float32)
        unk_embed = tf.get_variable(shape=(1, dim),
                                    name="unk_embed",
                                    dtype=np.float32,
                                    trainable=self.learn_unk,
                                    initializer=tf.random_uniform_initializer(
                                        -self.word_vec_init_scale,
                                        self.word_vec_init_scale))
        ix = 2
        matrix_list = [null_embed, unk_embed]

        if self._special_tokens is not None and len(self._special_tokens) > 0:
            print("Building embeddings for %d special_tokens" %
                  (len(self._special_tokens)))
            tok_embed = tf.get_variable(
                shape=(len(self._special_tokens), dim),
                name="token_embed",
                dtype=np.float32,
                trainable=True,
                initializer=tf.random_uniform_initializer(
                    -self.word_vec_init_scale, self.word_vec_init_scale))
            matrix_list.append(tok_embed)
            for token in self._special_tokens:
                self._word_to_ix[token] = ix
                ix += 1

        mat = []
        for word in voc:
            if word in self._word_to_ix:
                continue  # in case we already added due after seeing a capitalized version of `word`
            if word in word_to_vec:
                mat.append(word_to_vec[word])
                self._word_to_ix[word] = ix
                ix += 1
            else:
                lower = word.lower()  # Full back to the lower-case version
                if lower in word_to_vec and lower not in self._word_to_ix:
                    mat.append(word_to_vec[lower])
                    self._word_to_ix[lower] = ix
                    ix += 1

        print("Had pre-trained word embeddings for %d of %d words" %
              (len(mat), len(voc)))

        #code.interact(local=locals())
        matrix_list.append(tf.constant(value=np.vstack(mat)))

        self._word_emb_mat = tf.concat(matrix_list, axis=0)
Exemplo n.º 2
0
def test_model_pickle(output_dir):
    print("Testing...")
    save_dir = join(output_dir, "save")
    test_questions = get_test_questions()

    with open(join(output_dir, "model.pkl"), "rb") as f:
        model = pickle.load(f)

    sess = tf.Session()
    model.set_input_spec(
        ParagraphAndQuestionSpec(1, None, None, 14), {"the"},
        ResourceLoader(lambda a, b: {"the": np.zeros(300, np.float32)}))
    pred = model.get_prediction()

    print("Rebuilding")
    all_vars = tf.global_variables() + tf.get_collection(
        tf.GraphKeys.SAVEABLE_OBJECTS)
    lm_var_names = {x.name for x in all_vars if x.name.startswith("bilm")}
    vars_to_restore = [x for x in all_vars if x.name not in lm_var_names]
    saver = tf.train.Saver(vars_to_restore)
    saver.restore(sess, tf.train.latest_checkpoint(save_dir))
    sess.run(
        tf.variables_initializer(
            [x for x in all_vars if x.name in lm_var_names]))

    feed = model.encode([test_questions], False)
    cpu_out = sess.run([pred.start_logits, pred.end_logits], feed_dict=feed)
    return cpu_out
Exemplo n.º 3
0
 def set_input_spec(self, input_spec: ParagraphAndQuestionSpec, voc: Set[str],
                    word_vec_loader: ResourceLoader=None):
     if word_vec_loader is None:
         word_vec_loader = ResourceLoader()
     if self.word_embed is not None:
         self.word_embed.init(word_vec_loader, voc)
     if self.char_embed is not None:
         self.char_embed.embeder.init(word_vec_loader, voc)
     self.encoder.init(input_spec, True, self.word_embed,
                       None if self.char_embed is None else self.char_embed.embeder)
     self._is_train_placeholder = tf.placeholder(tf.bool, ())
     return self.encoder.get_placeholders()
Exemplo n.º 4
0
def build_model_and_evaluator_runner(model_config, max_answer_len,
                                     n_paragraphs):
    with open(model_config.model_pickle_file, 'rb') as f:
        model = pickle.load(f)

    model.lm_model.weight_file = model_config.lm_weights_file
    model.lm_model.lm_vocab_file = model_config.vocab_file
    model.lm_model.embed_weights_file = model_config.lm_token_weights_file
    model.lm_model.options_file = model_config.lm_options_file
    model.word_embed.vec_name = model_config.word_vector_file
    vocab_to_ignore = {'<S>', '</S>', '<UNK>', '!!!MAXTERMID'}

    vocab_to_init_with = {
        line.strip()
        for line in open(model_config.vocab_file, encoding="utf-8")
        if line.strip() not in vocab_to_ignore
    }

    #evaluator_runner = AysncEvaluatorRunner([RecordParagraphSpanPrediction(max_answer_len, True)], model, 10)
    sess = tf.Session()
    with sess.as_default():
        model.set_input_spec(ParagraphAndQuestionSpec(None, None, None, 14),
                             vocab_to_init_with,
                             word_vec_loader=ResourceLoader(
                                 load_vec_fn=lambda x, y: load_word_vectors(
                                     x, y, is_path=True)))
        evaluator_runner = AysncEvaluatorRunner(
            [RecordParagraphSpanPrediction(max_answer_len, True)], model, 10)

        input_dict = {
            p: x
            for p, x in zip(model.get_placeholders(),
                            evaluator_runner.dequeue_op)
        }
        pred = model.get_predictions_for(input_dict)
    evaluator_runner.set_input(pred)

    all_vars = tf.global_variables() + tf.get_collection(
        tf.GraphKeys.SAVEABLE_OBJECTS)
    lm_var_names = {x.name for x in all_vars if x.name.startswith("bilm")}
    vars_to_restore = [x for x in all_vars if x.name not in lm_var_names]
    saver = tf.train.Saver(vars_to_restore)
    saver.restore(sess, model_config.checkpoint_file)
    sess.run(
        tf.variables_initializer(
            [x for x in all_vars if x.name in lm_var_names]))

    return sess, model, evaluator_runner
 def _build_model(self):
     vocab_to_init_with = {
         line.strip()
         for line in open(self.config.vocab_file, encoding="utf-8")
         if line.strip() not in vocab_to_ignore
     }
     self.model.word_embed.vec_name = self.config.word_vector_file
     with self.sess.as_default():
         self.model.set_input_spec(
             ParagraphAndQuestionSpec(None, None, None, 14),
             vocab_to_init_with,
             word_vec_loader=ResourceLoader(
                 load_vec_fn=lambda x, y: load_word_vectors(
                     x, y, is_path=True)))
         pred = self.model.get_production_predictions_for(
             {x: x
              for x in self.model.get_placeholders()})
     return pred.start_logits, pred.end_logits, self.model.context_rep
Exemplo n.º 6
0
    def set_input_spec(self, input_spec, voc, word_vec_loader=None):
        if word_vec_loader is None:
            word_vec_loader = ResourceLoader()
        if self.word_embed is not None:
            self.word_embed.init(word_vec_loader, voc)

        if self.char_embed is not None:
            self.char_embed.embeder.init(word_vec_loader, voc)

        batch_size = input_spec.batch_size
        self.batch_size = batch_size
        self.encoder.init(
            input_spec, True, self.word_embed,
            None if self.char_embed is None else self.char_embed.embeder)
        self._is_train_placeholder = tf.placeholder(tf.bool, ())

        if self.token_lookup:
            self._batcher = TokenBatcher(self.lm_model.lm_vocab_file)
            self._question_char_ids_placeholder = tf.placeholder(
                tf.int32, (batch_size, None))
            self._context_char_ids_placeholder = tf.placeholder(
                tf.int32, (batch_size, None))
            self._max_word_size = input_spec.max_word_size
            self._context_sentence_ixs = None
        else:
            input_spec.max_word_size = 50  # TODO hack, harded coded from the lm model
            self._batcher = Batcher(self.lm_model.lm_vocab_file, 50)
            self._max_word_size = input_spec.max_word_size
            self._question_char_ids_placeholder = tf.placeholder(
                tf.int32, (batch_size, None, self._max_word_size))
            if self.per_sentence:
                self._context_char_ids_placeholder = tf.placeholder(
                    tf.int32, (None, None, self._max_word_size))
                self._context_sentence_ixs = tf.placeholder(
                    tf.int32, (batch_size, 3, None, 3))
            else:
                self._context_char_ids_placeholder = tf.placeholder(
                    tf.int32, (batch_size, None, self._max_word_size))
                self._context_sentence_ixs = None

        return self.get_placeholders()
Exemplo n.º 7
0
    def __init__(self,
                 wiki_cache: str,
                 paragraph_splitter: DocumentSplitter,
                 paragraph_selector: ParagraphFilter,
                 vocab: Union[str, Set[str]],
                 model: Union[ParagraphQuestionModel, ModelDir],
                 loader: ResourceLoader = ResourceLoader(),
                 bing_api_key=None,
                 tagme_api_key=None,
                 blacklist_trivia_sites: bool = False,
                 n_dl_threads: int = 5,
                 span_bound: int = 8,
                 tagme_threshold: Optional[float] = 0.2,
                 download_timeout: int = None,
                 n_web_docs=10):
        self.log = logging.getLogger('qa_system')
        self.tagme_threshold = tagme_threshold
        self.n_web_docs = n_web_docs
        self.blacklist_trivia_sites = blacklist_trivia_sites
        self.tagme_api_key = tagme_api_key

        if bing_api_key is not None:
            self.searcher = AsyncWebSearcher(bing_api_key)
            self.text_extractor = AsyncBoilerpipeCliExtractor(
                n_dl_threads, download_timeout)
        else:
            self.text_extractor = None
            self.searcher = None
        self.wiki_corpus = WikiCorpus(wiki_cache, keep_inverse_mapping=True)
        self.paragraph_splitter = paragraph_splitter
        self.paragraph_selector = paragraph_selector
        self.model_dir = model

        voc = None
        if vocab is not None:
            if isinstance(vocab, str):
                voc = set()
                with open(vocab, "r") as f:
                    for line in f:
                        voc.add(line.strip())
            else:
                voc = vocab
            self.log.info("Using preset vocab of size %d", len(voc))

        self.log.info("Setting up model...")
        if isinstance(model, ModelDir):
            self.model = model.get_model()
        else:
            self.model = model

        self.model.set_input_spec(ParagraphAndQuestionSpec(None), voc, loader)

        self.sess = tf.Session()
        with self.sess.as_default():
            pred = self.model.get_prediction()

        model.restore_checkpoint(self.sess)

        self.span_scores = pred.get_span_scores()
        self.span, self.score = pred.get_best_span(span_bound)
        self.tokenizer = NltkAndPunctTokenizer()
        self.sess.graph.finalize()
Exemplo n.º 8
0
 def get_resource_loader(self):
     return ResourceLoader(self.get_pruned_word_vecs)
Exemplo n.º 9
0
def main():
    parser = argparse.ArgumentParser(description='Evaluate a model on TriviaQA data')
    parser.add_argument('model', help='model directory')
    parser.add_argument('-p', '--paragraph_output', type=str,
                        help="Save fine grained results for each paragraph in csv format")
    parser.add_argument('-o', '--official_output', type=str, help="Build an offical output file with the model's"
                                                                  " most confident span for each (question, doc) pair")
    parser.add_argument('--no_ema', action="store_true", help="Don't use EMA weights even if they exist")
    parser.add_argument('--n_processes', type=int, default=None,
                        help="Number of processes to do the preprocessing (selecting paragraphs+loading context) with")
    parser.add_argument('-i', '--step', type=int, default=None, help="checkpoint to load, default to latest")
    parser.add_argument('-n', '--n_sample', type=int, default=None, help="Number of questions to evaluate on")
    parser.add_argument('-a', '--async', type=int, default=10)
    parser.add_argument('-t', '--tokens', type=int, default=400,
                        help="Max tokens per a paragraph")
    parser.add_argument('-g', '--n_paragraphs', type=int, default=15,
                        help="Number of paragraphs to run the model on")
    parser.add_argument('-f', '--filter', type=str, default=None, choices=["tfidf", "truncate", "linear"],
                        help="How to select paragraphs")
    parser.add_argument('-b', '--batch_size', type=int, default=200,
                        help="Batch size, larger sizes might be faster but wll take more memory")
    parser.add_argument('--max_answer_len', type=int, default=8,
                        help="Max answer span to select")
    parser.add_argument('-c', '--corpus',
                        choices=["web-dev", "web-test", "web-verified-dev", "web-train",
                                 "open-dev", "open-train"],
                        default="web-verified-dev")
    args = parser.parse_args()

    model_dir = ModelDir(args.model)
    model = model_dir.get_model()

    if args.corpus.startswith('web'):
        dataset = TriviaQaWebDataset()
        corpus = dataset.evidence
        if args.corpus == "web-dev":
            test_questions = dataset.get_dev()
        elif args.corpus == "web-test":
            test_questions = dataset.get_test()
        elif args.corpus == "web-verified-dev":
            test_questions = dataset.get_verified()
        elif args.corpus == "web-train":
            test_questions = dataset.get_train()
        else:
            raise RuntimeError()
    else:
        dataset = TriviaQaOpenDataset()
        corpus = dataset.evidence
        if args.corpus == "open-dev":
            test_questions = dataset.get_dev()
        elif args.corpus == "open-train":
            test_questions = dataset.get_train()
        else:
            raise RuntimeError()

    splitter = MergeParagraphs(args.tokens)

    per_document = not args.corpus.startswith("open")

    filter_name = args.filter
    if filter_name is None:
        if args.corpus.startswith("open"):
            filter_name = "linear"
        else:
            filter_name = "tfidf"

    print("Selecting %d paragraphs using %s method per %s" % (args.n_paragraphs, filter_name,
                                                              ("question-document pair" if per_document else "question")))

    if filter_name == "tfidf":
        para_filter = TopTfIdf(NltkPlusStopWords(punctuation=True), args.n_paragraphs)
    elif filter_name == "truncate":
        para_filter = FirstN(args.n_paragraphs)
    elif filter_name == "linear":
        para_filter = ShallowOpenWebRanker(args.n_paragraphs)
    else:
        raise ValueError()

    n_questions = args.n_sample
    if n_questions is not None:
        test_questions.sort(key=lambda x:x.question_id)
        np.random.RandomState(0).shuffle(test_questions)
        test_questions = test_questions[:n_questions]

    print("Building question/paragraph pairs...")
    # Loads the relevant questions/documents, selects the right paragraphs, and runs the model's preprocessor
    if per_document:
        prep = ExtractMultiParagraphs(splitter, para_filter, model.preprocessor, require_an_answer=False)
    else:
        prep = ExtractMultiParagraphsPerQuestion(splitter, para_filter, model.preprocessor, require_an_answer=False)
    prepped_data = preprocess_par(test_questions, corpus, prep, args.n_processes, 1000)

    data = []
    for q in prepped_data.data:
        for i, p in enumerate(q.paragraphs):
            if q.answer_text is None:
                ans = None
            else:
                ans = TokenSpans(q.answer_text, p.answer_spans)
            data.append(DocumentParagraphQuestion(q.question_id, p.doc_id,
                                                 (p.start, p.end), q.question, p.text,
                                                  ans, i))

    # Reverse so our first batch will be the largest (so OOMs happen early)
    questions = sorted(data, key=lambda x: (x.n_context_words, len(x.question)), reverse=True)

    print("Done, starting eval")

    if args.step is not None:
        if args.step == "latest":
            checkpoint = model_dir.get_latest_checkpoint()
        else:
            checkpoint = model_dir.get_checkpoint(int(args.step))
    else:
        checkpoint = model_dir.get_best_weights()
        if checkpoint is not None:
            print("Using best weights")
        else:
            print("Using latest checkpoint")
            checkpoint = model_dir.get_latest_checkpoint()

    test_questions = ParagraphAndQuestionDataset(questions, FixedOrderBatcher(args.batch_size, True))

    evaluation = trainer.test(model,
                             [RecordParagraphSpanPrediction(args.max_answer_len, True)],
                              {args.corpus:test_questions}, ResourceLoader(), checkpoint, not args.no_ema, args.async)[args.corpus]

    if not all(len(x) == len(data) for x in evaluation.per_sample.values()):
        raise RuntimeError()

    df = pd.DataFrame(evaluation.per_sample)

    if args.official_output is not None:
        print("Saving question result")

        # I didn't store the unormalized filenames exactly, so unfortunately we have to reload
        # the source data to get exact filename to output an official test script
        fns = {}
        print("Loading proper filenames")
        if args.corpus == 'web-test':
            source = join(TRIVIA_QA, "qa", "web-test-without-answers.json")
        elif args.corpus == "web-dev":
            source = join(TRIVIA_QA, "qa", "web-dev.json")
        else:
            raise NotImplementedError()

        with open(join(source)) as f:
            data = json.load(f)["Data"]
        for point in data:
            for doc in point["EntityPages"]:
                filename = doc["Filename"]
                fn = join("wikipedia", filename[:filename.rfind(".")])
                fn = normalize_wiki_filename(fn)
                fns[(point["QuestionId"], fn)] = filename

        answers = {}
        scores = {}
        for q_id, doc_id, start, end, txt, score in df[["question_id", "doc_id", "para_start", "para_end",
                                                        "text_answer", "predicted_score"]].itertuples(index=False):
            filename = dataset.evidence.file_id_map[doc_id]
            if filename.startswith("web"):
                true_name = filename[4:] + ".txt"
            else:
                true_name = fns[(q_id, filename)]

            key = q_id + "--" + true_name
            prev_score = scores.get(key)
            if prev_score is None or prev_score < score:
                scores[key] = score
                answers[key] = txt

        with open(args.official_output, "w") as f:
            json.dump(answers, f)

    if per_document:
        group_by = ["question_id", "doc_id"]
    else:
        group_by = ["question_id"]

    # Print a table of scores as more paragraphs are used
    df.sort_values(group_by + ["rank"], inplace=True)
    f1 = compute_model_scores(df, "predicted_score", "text_f1", group_by)
    em = compute_model_scores(df, "predicted_score", "text_em", group_by)
    table = [["N Paragraphs", "EM", "F1"]]
    table += list([str(i+1), "%.4f" % e, "%.4f" % f] for i, (e, f) in enumerate(zip(em, f1)))
    print_table(table)

    output_file = args.paragraph_output
    if output_file is not None:
        print("Saving paragraph result")
        if output_file.endswith("json"):
            with open(output_file, "w") as f:
                json.dump(evaluation.per_sample, f)
        elif output_file.endswith("pkl"):
            with open(output_file, "wb") as f:
                pickle.dump(evaluation.per_sample, f)
        elif output_file.endswith("csv"):

            df.to_csv(output_file, index=False)
        else:
            raise ValueError("Unrecognized file format")
Exemplo n.º 10
0
def main():
    parser = argparse.ArgumentParser(description='Run the demo server')
    parser.add_argument('model', help='Models to use')

    parser.add_argument(
        '-v',
        '--voc',
        help='vocab to use, only words from this file will be used')
    parser.add_argument('-t',
                        '--tokens',
                        type=int,
                        default=400,
                        help='Number of tokens to use per paragraph')
    parser.add_argument('--vec_dir', help='Location to find word vectors')
    parser.add_argument('--n_paragraphs',
                        type=int,
                        default=12,
                        help="Number of paragraphs to run the model on")
    parser.add_argument('--span_bound',
                        type=int,
                        default=8,
                        help="Max span size to return as an answer")

    parser.add_argument(
        '--tagme_api_key',
        help="Key to use for TAGME (tagme.d4science.org/tagme)")
    parser.add_argument('--bing_api_key', help="Key to use for bing searches")
    parser.add_argument('--tagme_thresh', default=0.2, type=float)
    parser.add_argument('--no_wiki',
                        action="store_true",
                        help="Dont use TAGME")
    parser.add_argument('--n_web',
                        type=int,
                        default=10,
                        help='Number of web docs to fetch')
    parser.add_argument('--blacklist_trivia_sites',
                        action="store_true",
                        help="Don't use trivia websites")
    parser.add_argument('-c',
                        '--wiki_cache',
                        help="Cache wiki articles in this directory")

    parser.add_argument('--n_dl_threads',
                        type=int,
                        default=5,
                        help="Number of threads to download documents with")
    parser.add_argument('--request_timeout', type=int, default=60)
    parser.add_argument('--download_timeout', type=int, default=25)
    parser.add_argument('--workers',
                        type=int,
                        default=1,
                        help="Number of server workers")
    parser.add_argument('--debug',
                        default=None,
                        choices=["random_model", "dummy_qa"])

    args = parser.parse_args()
    span_bound = args.span_bound

    if args.tagme_api_key is not None:
        tagme_api_key = args.tagme_api_key
    else:
        tagme_api_key = environ.get("TAGME_API_KEY")

    if args.bing_api_key is not None:
        bing_api_key = args.bing_api_key
    else:
        bing_api_key = environ.get("BING_API_KEY")
        if bing_api_key is None and args.n_web > 0:
            raise ValueError("If n_web > 0 you must give a BING_API_KEY")

    if args.debug is None:
        model = ModelDir(args.model)
    else:
        model = RandomPredictor(5, WithIndicators())

    if args.vec_dir is not None:
        loader = LoadFromPath(args.vec_dir)
    else:
        loader = ResourceLoader()

    if args.debug == "dummy_qa":
        qa = DummyQa()
    else:
        qa = QaSystem(
            args.wiki_cache,
            MergeParagraphs(args.tokens),
            ShallowOpenWebRanker(args.n_paragraphs),
            args.voc,
            model,
            loader,
            bing_api_key,
            tagme_api_key=tagme_api_key,
            n_dl_threads=args.n_dl_threads,
            blacklist_trivia_sites=args.blacklist_trivia_sites,
            download_timeout=args.download_timeout,
            span_bound=span_bound,
            tagme_threshold=None if args.no_wiki else args.tagme_thresh,
            n_web_docs=args.n_web)

    logging.propagate = False
    formatter = logging.Formatter("%(asctime)s: %(levelname)s: %(message)s")
    handler = logging.StreamHandler()
    handler.setFormatter(formatter)
    logging.root.addHandler(handler)
    logging.root.setLevel(logging.DEBUG)

    app = Sanic()
    app.config.REQUEST_TIMEOUT = args.request_timeout

    @app.route("/answer")
    async def answer(request):
        try:
            question = request.args["question"][0]
            if question == "":
                return response.json({'message': 'No question given'},
                                     status=400)
            spans, paras = await qa.answer_question(question)
            answers = select_answers(paras, spans, span_bound, 10)
            return json([x.to_json() for x in answers])
        except Exception as e:
            log.info("Error: " + str(e))

            raise ServerError("Server Error", status_code=500)

    @app.route('/answer-from', methods=['POST'])
    async def answer_from(request):
        try:
            args = ujson.loads(request.body.decode("utf-8"))
            question = args.get("question")
            if question is None or question == "":
                return response.json({'message': 'No question given'},
                                     status=400)
            doc = args["document"]
            if len(doc) > 500000:
                raise ServerError("Document too large", status_code=400)
            spans, paras = qa.answer_with_doc(question, doc)
            answers = select_answers(paras, spans, span_bound, 10)
            return json([x.to_json() for x in answers])
        except Exception as e:
            log.info("Error: " + str(e))
            raise ServerError("Server Error", status_code=500)

    app.static('/', './docqa//server/static/index.html')
    app.static('/about.html', './docqa//service/static/about.html')
    app.run(host="0.0.0.0", port=8000, workers=args.workers, debug=False)
Exemplo n.º 11
0
    def _init(self, loader: ResourceLoader, voc: Iterable[str]):
        # TODO we should not be building variables here
        if voc is not None:
            word_to_vec = loader.load_word_vec(self.vec_name, voc)
        else:
            word_to_vec = loader.load_word_vec(self.vec_name)
            voc = set(word_to_vec.keys())

        self._word_to_ix = {}

        dim = next(iter(word_to_vec.values())).shape[0]
        if self.placeholder_flag:
            dim += 1

        null_embed = tf.zeros((1, dim), dtype=tf.float32)
        ix = 1
        matrix_list = [null_embed]

        if self._special_tokens is not None and len(self._special_tokens) > 0:
            print("Building embeddings for %d special_tokens" %
                  (len(self._special_tokens)))
            tok_embed = tf.get_variable(
                shape=(len(self._special_tokens), dim),
                name="token_embed",
                dtype=np.float32,
                trainable=True,
                initializer=tf.random_uniform_initializer(
                    -self.word_vec_init_scale, self.word_vec_init_scale))
            matrix_list.append(tok_embed)
            for token in self._special_tokens:
                self._word_to_ix[token] = ix
                ix += 1

        mat = []
        for word in voc:
            if word in self._word_to_ix:
                continue  # in case we already added due after seeing a capitalized version of `word`
            if word in word_to_vec:
                mat.append(word_to_vec[word])
                self._word_to_ix[word] = ix
                ix += 1
            else:
                lower = word.lower()  # Full back to the lower-case version
                if lower in word_to_vec and lower not in self._word_to_ix:
                    mat.append(word_to_vec[lower])
                    self._word_to_ix[lower] = ix
                    ix += 1

        print("Had pre-trained word embeddings for %d of %d words" %
              (len(mat), len(voc)))

        mat = np.vstack(mat)
        if self.placeholder_flag:
            mat = np.concatenate(
                [mat, np.zeros((len(mat), 1), dtype=np.float32)], axis=1)
        matrix_list.append(tf.constant(value=mat))

        self._placeholder_start = ix

        if self.placeholder_flag:

            def init(shape, dtype=None, partition_info=None):
                out = tf.random_normal((self.n_placeholders, dim - 1),
                                       stddev=self.placeholder_stddev)
                return tf.concat([out, tf.ones((self.n_placeholders, 1))],
                                 axis=1)

            init_fn = init
        else:
            init_fn = tf.random_normal_initializer(
                stddev=self.placeholder_stddev)

        matrix_list.append(
            tf.get_variable("placeholders",
                            (self.n_placeholders, mat.shape[1]),
                            tf.float32,
                            trainable=False,
                            initializer=init_fn))

        self._word_emb_mat = tf.concat(matrix_list, axis=0)
Exemplo n.º 12
0
def convert_saved_graph(model_dir, output_dir):
    print("Load model")
    md = ModelDir(model_dir)
    model = md.get_model()

    # remove the lm models word embeddings - cpu model will use Char-CNN
    model.lm_model.embed_weights_file = None
    dim = model.embed_mapper.layers[1].n_units

    print("Setting up cudnn version")
    sess = tf.Session()
    with sess.as_default():
        model.set_input_spec(
            ParagraphAndQuestionSpec(1, None, None, 14), {"the"},
            ResourceLoader(lambda a, b: {"the": np.zeros(300, np.float32)}))
        print("Buiding graph")
        pred = model.get_prediction()

    test_questions = get_test_questions()

    print("Load vars:")
    all_vars = tf.global_variables() + tf.get_collection(
        tf.GraphKeys.SAVEABLE_OBJECTS)
    lm_var_names = {x.name for x in all_vars if x.name.startswith("bilm")}
    vars = [x for x in all_vars if x.name not in lm_var_names]
    md.restore_checkpoint(sess, vars)
    sess.run(
        tf.variables_initializer(
            [x for x in all_vars if x.name in lm_var_names]))

    feed = model.encode([test_questions], False)
    cuddn_out = sess.run([pred.start_logits, pred.end_logits], feed_dict=feed)

    print("Done, copying files...")
    if not exists(output_dir):
        mkdir(output_dir)
    for file in listdir(model_dir):
        if isfile(file) and file != "model.npy":
            copyfile(join(model_dir, file), join(output_dir, file))

    print("Done, mapping tensors...")
    to_save, to_init = [], []
    for x in tf.trainable_variables():
        if x.name.endswith("/gru_parameters:0"):
            key = x.name[:-len("/gru_parameters:0")]
            indim, outdim = get_dims(x, dim)
            c = cudnn_rnn_ops.CudnnGRUSaveable(x, 1, outdim, indim, scope=key)
            for spec in c.specs:
                if spec.name.endswith("bias_cudnn 0") or \
                        spec.name.endswith("bias_cudnn 1"):
                    print('Unsupported spec: ' + spec.name)
                    continue
                if 'forward' in spec.name:
                    new_name = spec.name.replace(
                        'forward/rnn/multi_rnn_cell/cell_0/',
                        'bidirectional_rnn/fw/')
                else:
                    new_name = spec.name.replace(
                        'backward/rnn/multi_rnn_cell/cell_0/',
                        'bidirectional_rnn/bw/')
                v = tf.Variable(sess.run(spec.tensor), name=new_name)
                to_init.append(v)
                to_save.append(v)
        else:
            to_save.append(x)

    save_dir = join(output_dir, "save")
    if not exists(save_dir):
        mkdir(save_dir)

    # save:
    all_vars = tf.global_variables() + tf.get_collection(
        tf.GraphKeys.SAVEABLE_OBJECTS)
    vars_to_save = [x for x in all_vars if not x.name.startswith("bilm")]
    sess.run(tf.initialize_variables(to_init))
    saver = tf.train.Saver(vars_to_save)
    saver.save(
        sess,
        join(save_dir, 'checkpoint'),
        global_step=123456789,
        write_meta_graph=False,
    )

    sess.close()
    tf.reset_default_graph()
    return cuddn_out
Exemplo n.º 13
0
    def _init(self, loader: ResourceLoader, voc: Iterable[str],
              allow_update=False, do_update=False):
        # TODO we should not be building variables here
        if voc is not None:
            word_to_vec = loader.load_word_vec(self.vec_name, voc)
        else:
            word_to_vec = loader.load_word_vec(self.vec_name)
            voc = set(word_to_vec.keys())

        self._word_to_ix = {}

        dim = next(iter(word_to_vec.values())).shape[0]

        null_embed = tf.zeros((1, dim), dtype=tf.float32)
        if not do_update:
            self.unk_embed = tf.get_variable(
                shape=(1, dim), name="unk_embed",
                dtype=np.float32, trainable=self.learn_unk,
                initializer=tf.random_uniform_initializer(-self.word_vec_init_scale,
                                                          self.word_vec_init_scale))
        ix = 2
        matrix_list = [null_embed, self.unk_embed]

        if self._special_tokens is not None and len(self._special_tokens) > 0:
            print("Building embeddings for %d special_tokens" % (len(self._special_tokens)))
            tok_embed = tf.get_variable(shape=(len(self._special_tokens), dim), name="token_embed",
                                        dtype=np.float32, trainable=True,
                                        initializer=tf.random_uniform_initializer(-self.word_vec_init_scale,
                                                                                  self.word_vec_init_scale))
            matrix_list.append(tok_embed)
            for token in self._special_tokens:
                self._word_to_ix[token] = ix
                ix += 1

        mat = []
        for word in voc:
            if word in self._word_to_ix:
                continue  # in case we already added due after seeing a capitalized version of `word`
            if word in word_to_vec:
                mat.append(word_to_vec[word])
                self._word_to_ix[word] = ix
                ix += 1
            else:
                lower = word.lower()  # Full back to the lower-case version
                if lower in word_to_vec and lower not in self._word_to_ix:
                    mat.append(word_to_vec[lower])
                    self._word_to_ix[lower] = ix
                    ix += 1

        print("Had pre-trained word embeddings for %d of %d words" % (len(mat), len(voc)))

        # Encoder will feed this as value of self.common_word_mat
        # Allows us to quickly change the vocabulary at test time
        self.common_word_mat_np = np.vstack(mat)

        if not do_update:
            # Set up the tf graph only once
            if allow_update:
                self.common_word_mat = tf.placeholder(tf.float32, shape=(None, dim),
                                                      name='common_word_mat')

                matrix_list.append(self.common_word_mat)
            else:
                self.common_word_mat = None
                matrix_list.append(tf.constant(value=self.common_word_mat_np))
            self._word_emb_mat = tf.concat(matrix_list, axis=0)
def main():
    parser = argparse.ArgumentParser(
        description='Evaluate a model on TriviaQA data')
    parser.add_argument('model', help='model directory')
    parser.add_argument(
        '-p',
        '--paragraph_output',
        type=str,
        help="Save fine grained results for each paragraph in csv format")
    parser.add_argument('-o',
                        '--official_output',
                        type=str,
                        help="Build an offical output file with the model's"
                        " most confident span for each (question, doc) pair")
    parser.add_argument('--no_ema',
                        action="store_true",
                        help="Don't use EMA weights even if they exist")
    parser.add_argument(
        '--n_processes',
        type=int,
        default=None,
        help=
        "Number of processes to do the preprocessing (selecting paragraphs+loading context) with"
    )
    parser.add_argument('-i',
                        '--step',
                        type=int,
                        default=None,
                        help="checkpoint to load, default to latest")
    parser.add_argument('-n',
                        '--n_sample',
                        type=int,
                        default=None,
                        help="Number of questions to evaluate on")
    parser.add_argument('-a', '--async', type=int, default=10)
    parser.add_argument('-t',
                        '--tokens',
                        type=int,
                        default=400,
                        help="Max tokens per a paragraph")
    parser.add_argument('-g',
                        '--n_paragraphs',
                        type=int,
                        default=15,
                        help="Number of paragraphs to run the model on")
    parser.add_argument('-f',
                        '--filter',
                        type=str,
                        default=None,
                        choices=["tfidf", "truncate", "linear"],
                        help="How to select paragraphs")
    parser.add_argument(
        '-b',
        '--batch_size',
        type=int,
        default=200,
        help="Batch size, larger sizes might be faster but wll take more memory"
    )
    parser.add_argument('--max_answer_len',
                        type=int,
                        default=8,
                        help="Max answer span to select")
    parser.add_argument('-c',
                        '--corpus',
                        choices=[
                            "web-dev", "web-test", "web-verified-dev",
                            "web-train", "open-dev", "open-train", "wiki-dev",
                            "wiki-test"
                        ],
                        default="web-verified-dev")
    parser.add_argument("-s",
                        "--source_dir",
                        type=str,
                        default=None,
                        help="where to take input files")
    parser.add_argument("--n_span_per_q",
                        type=int,
                        default=1,
                        help="where to take input files")
    args = parser.parse_args()

    dataset_name = args.source_dir.split('/')[-1]
    model_name = args.model.split('/')[-1]
    ElasticLogger().write_log('INFO',
                              'Start Evaluation',
                              context_dict={
                                  'model': model_name,
                                  'dataset': dataset_name
                              })

    model_dir = ModelDir(args.model)
    model = model_dir.get_model()

    if args.corpus.startswith('web'):
        dataset = TriviaQaWebDataset()
        if args.corpus == "web-dev":
            test_questions = dataset.get_dev()
        elif args.corpus == "web-test":
            test_questions = dataset.get_test()
        elif args.corpus == "web-verified-dev":
            test_questions = dataset.get_verified()
        elif args.corpus == "web-train":
            test_questions = dataset.get_train()
        else:
            raise AssertionError()
    elif args.corpus.startswith("wiki"):
        dataset = TriviaQaWikiDataset()
        if args.corpus == "wiki-dev":
            test_questions = dataset.get_dev()
        elif args.corpus == "wiki-test":
            test_questions = dataset.get_test()
        else:
            raise AssertionError()
    else:
        dataset = TriviaQaOpenDataset(args.source_dir)
        if args.corpus == "open-dev":
            # just loading the pkl that was saved in build_span_corpus
            test_questions = dataset.get_dev()
        elif args.corpus == "open-train":
            test_questions = dataset.get_train()
        else:
            raise AssertionError()

    ### ALON debuging
    #test_questions = test_questions[0:5]

    corpus = dataset.evidence
    splitter = MergeParagraphs(args.tokens)

    per_document = args.corpus.startswith(
        "web")  # wiki and web are both multi-document
    #per_document = True

    filter_name = args.filter
    if filter_name is None:
        # Pick default depending on the kind of data we are using
        if per_document:
            filter_name = "tfidf"
        else:
            filter_name = "linear"

    print("Selecting %d paragraphs using method \"%s\" per %s" %
          (args.n_paragraphs, filter_name,
           ("question-document pair" if per_document else "question")))

    if filter_name == "tfidf":
        para_filter = TopTfIdf(NltkPlusStopWords(punctuation=True),
                               args.n_paragraphs)
    elif filter_name == "truncate":
        para_filter = FirstN(args.n_paragraphs)
    elif filter_name == "linear":
        para_filter = ShallowOpenWebRanker(args.n_paragraphs)
    else:
        raise ValueError()

    n_questions = args.n_sample
    docqa.config.SPANS_PER_QUESTION = args.n_span_per_q
    #n_questions = 1
    if n_questions is not None:
        test_questions.sort(key=lambda x: x.question_id)
        np.random.RandomState(0).shuffle(test_questions)
        test_questions = test_questions[:n_questions]

    print("Building question/paragraph pairs...")
    # Loads the relevant questions/documents, selects the right paragraphs, and runs the model's preprocessor
    if per_document:
        prep = ExtractMultiParagraphs(splitter,
                                      para_filter,
                                      model.preprocessor,
                                      require_an_answer=False)
    else:
        prep = ExtractMultiParagraphsPerQuestion(splitter,
                                                 para_filter,
                                                 model.preprocessor,
                                                 require_an_answer=False)
    prepped_data = preprocess_par(test_questions, corpus, prep,
                                  args.n_processes, 1000)

    data = []
    for q in prepped_data.data:
        for i, p in enumerate(q.paragraphs):
            if q.answer_text is None:
                ans = None
            else:
                ans = TokenSpans(q.answer_text, p.answer_spans)
            data.append(
                DocumentParagraphQuestion(q.question_id, p.doc_id,
                                          (p.start, p.end), q.question, p.text,
                                          ans, i))

    # Reverse so our first batch will be the largest (so OOMs happen early)
    questions = sorted(data,
                       key=lambda x: (x.n_context_words, len(x.question)),
                       reverse=True)

    print("Done, starting eval")

    if args.step is not None:
        if args.step == "latest":
            checkpoint = model_dir.get_latest_checkpoint()
        else:
            checkpoint = model_dir.get_checkpoint(int(args.step))
    else:
        checkpoint = model_dir.get_best_weights()
        if checkpoint is not None:
            print("Using best weights")
        else:
            print("Using latest checkpoint")
            checkpoint = model_dir.get_latest_checkpoint()

    test_questions = ParagraphAndQuestionDataset(
        questions, FixedOrderBatcher(args.batch_size, True))

    evaluation = trainer.test(
        model, [RecordParagraphSpanPrediction(args.max_answer_len, True)],
        {args.corpus: test_questions}, ResourceLoader(), checkpoint,
        not args.no_ema, args. async)[args.corpus]

    if not all(len(x) == len(data) for x in evaluation.per_sample.values()):
        raise RuntimeError()

    df = pd.DataFrame(evaluation.per_sample)

    if args.official_output is not None:
        print("Saving question result")

        fns = {}
        if per_document:
            # I didn't store the unormalized filenames exactly, so unfortunately we have to reload
            # the source data to get exact filename to output an official test script
            print("Loading proper filenames")
            if args.corpus == 'web-test':
                source = join(TRIVIA_QA, "qa", "web-test-without-answers.json")
            elif args.corpus == "web-dev":
                source = join(TRIVIA_QA, "qa", "web-dev.json")
            else:
                raise AssertionError()

            with open(join(source)) as f:
                data = json.load(f)["Data"]
            for point in data:
                for doc in point["EntityPages"]:
                    filename = doc["Filename"]
                    fn = join("wikipedia", filename[:filename.rfind(".")])
                    fn = normalize_wiki_filename(fn)
                    fns[(point["QuestionId"], fn)] = filename

        answers = {}
        scores = {}
        for q_id, doc_id, start, end, txt, score in df[[
                "question_id", "doc_id", "para_start", "para_end",
                "text_answer", "predicted_score"
        ]].itertuples(index=False):
            filename = dataset.evidence.file_id_map[doc_id]
            if per_document:
                if filename.startswith("web"):
                    true_name = filename[4:] + ".txt"
                else:
                    true_name = fns[(q_id, filename)]
                # Alon Patch for triviaqa test results
                true_name = true_name.replace('TriviaQA_Org/', '')
                key = q_id + "--" + true_name
            else:
                key = q_id

            prev_score = scores.get(key)
            if prev_score is None or prev_score < score:
                scores[key] = score
                answers[key] = txt

        with open(args.official_output, "w") as f:
            json.dump(answers, f)

    output_file = args.paragraph_output
    if output_file is not None:
        print("Saving paragraph result")
        df.to_csv(output_file, index=False)

    print("Computing scores")

    if per_document:
        group_by = ["question_id", "doc_id"]
    else:
        group_by = ["question_id"]

    # Print a table of scores as more paragraphs are used
    df.sort_values(group_by + ["rank"], inplace=True)
    df_scores = df.copy(deep=True)
    df_scores['predicted_score'] = df_scores['predicted_score'].apply(
        lambda x: pd.Series(x).max())

    em = compute_ranked_scores(df_scores, "predicted_score", "text_em",
                               group_by)
    f1 = compute_ranked_scores(df_scores, "predicted_score", "text_f1",
                               group_by)
    table = [["N Paragraphs", "EM", "F1"]]
    table += list([str(i + 1), "%.4f" % e, "%.4f" % f]
                  for i, (e, f) in enumerate(zip(em, f1)))

    table_df = pd.DataFrame(table[1:], columns=table[0]).drop(['N Paragraphs'],
                                                              axis=1)
    ElasticLogger().write_log('INFO', 'Results', context_dict={'model': model_name, 'dataset': dataset_name, \
                                                            'max_EM':table_df.max().ix['EM'], \
                                                            'max_F1':table_df.max().ix['F1'], \
                                                            'result_table': str(table_df)})

    df_flat = []
    for id, question in df.iterrows():
        for text_answer, predicted_span, predicted_score in zip(
                question['text_answer'], question['predicted_span'],
                question['predicted_score']):
            new_question = dict(question.copy())
            new_question.update({
                'text_answer': text_answer,
                'predicted_span': predicted_span,
                'predicted_score': predicted_score
            })
            df_flat.append(new_question)

    results_df = pd.DataFrame(df_flat)
    #Alon: outputing the estimates for all the
    #results_df = results_df.groupby(['question_id', 'text_answer']).apply(lambda df: df.ix[df['predicted_score'].argmax()]).reset_index(drop=True)
    results_df.sort_values(by=['question_id', 'predicted_score'],
                           ascending=False).set_index([
                               'question_id', 'text_answer'
                           ])[['question', 'predicted_score',
                               'text_em']].to_csv('results.csv')

    print_table(table)
Exemplo n.º 15
0
def perform_evaluation(model_name: str,
                       dataset_names: List[str],
                       tokens_per_paragraph: int,
                       filter_type: str,
                       n_processes: int,
                       n_paragraphs: int,
                       batch_size: int,
                       checkpoint: str,
                       no_ema: bool,
                       max_answer_len: int,
                       official_output_path: str,
                       paragraph_output_path: str,
                       aggregated_output_path: str,
                       elmo_char_cnn: bool,
                       n_samples: Union[int, None],
                       per_document: bool = False):
    """Perform an evaluation using cape's answer decoder

    A file will be created listing the answers per question ID for each dataset

    :param model_name: path to the model to evaluate
    :param dataset_names: list of strings of datasets to evaluate
    :param tokens_per_paragraph: how big to make paragraph chunks
    :param filter_type: how to select the paragraphs to read
    :param n_processes: how many processes to use when multiprocessing
    :param n_paragraphs: how many paragraphs to read per question
    :param batch_size: how many datapoints to evaluate at once
    :param checkpoint: string, checkpoint to load
    :param no_ema: if true, dont use EMA weights
    :param max_answer_len: the maximum allowable length of an answer in tokens
    :param official_output_path: path to write official output to
    :param paragraph_output_path: path to write paragraph output to
    :param aggregated_output_path: path to write aggregated output to
    :param elmo_char_cnn: if true, uses the elmo CNN to make token embeddings, less OOV but
        requires much more memory
    :param per_document: if false, return best scoring answer to a question, if true,
        the best scoring answer from each document is used instead.
    """
    async = True
    corpus_name = 'all'

    print('Setting Up:')
    model_dir = ModelDir(model_name)
    model = model_dir.get_model()
    dataset = get_multidataset(dataset_names)
    splitter = MergeParagraphs(tokens_per_paragraph)
    para_filter = get_para_filter(filter_type, per_document, n_paragraphs)
    test_questions, n_questions = get_questions(per_document, dataset,
                                                splitter, para_filter,
                                                model.preprocessor,
                                                n_processes, batch_size)

    print("Starting eval")
    checkpoint = get_checkpoint(checkpoint, model_dir)
    evaluation = test(model,
                      [RecordParagraphSpanPrediction(max_answer_len, True)],
                      {corpus_name: test_questions}, ResourceLoader(),
                      checkpoint, not no_ema, async, n_samples,
                      elmo_char_cnn)[corpus_name]

    print('Exporting and Post-processing')
    if not all(len(x) == n_questions for x in evaluation.per_sample.values()):
        raise RuntimeError()

    df = pd.DataFrame(evaluation.per_sample)
    compute_and_dump_official_output(df,
                                     official_output_path,
                                     per_document=per_document)

    print("Saving paragraph result")
    df.to_csv(paragraph_output_path, index=False)

    print("Computing scores")
    agg_df = get_aggregated_df(df, per_document)
    agg_df.to_csv(aggregated_output_path, index=False)
Exemplo n.º 16
0
def convert(model_dir, output_dir, best_weights=False):
    print("Load model")
    md = ModelDir(model_dir)
    model = md.get_model()
    dim = model.embed_mapper.layers[1].n_units
    global_step = tf.get_variable('global_step',
                                  shape=[],
                                  dtype='int32',
                                  initializer=tf.constant_initializer(0),
                                  trainable=False)

    print("Setting up cudnn version")
    #global_step = tf.get_variable('global_step', shape=[], dtype='int32', trainable=False)
    sess = tf.Session()
    sess.run(global_step.assign(0))
    with sess.as_default():
        model.set_input_spec(
            ParagraphAndQuestionSpec(1, None, None, 14), {"the"},
            ResourceLoader(lambda a, b: {"the": np.zeros(300, np.float32)}))

        print("Buiding graph")
        pred = model.get_prediction()

    test_questions = ParagraphAndQuestion(
        ["Harry", "Potter", "was", "written", "by", "JK"],
        ["Who", "wrote", "Harry", "Potter", "?"], None, "test_questions")

    print("Load vars")
    md.restore_checkpoint(sess)
    print("Restore finished")

    feed = model.encode([test_questions], False)
    cuddn_out = sess.run([pred.start_logits, pred.end_logits], feed_dict=feed)

    print("Done, copying files...")
    if not exists(output_dir):
        mkdir(output_dir)
    for file in listdir(model_dir):
        if isfile(file) and file != "model.npy":
            copyfile(join(model_dir, file), join(output_dir, file))

    print("Done, mapping tensors...")
    to_save = []
    to_init = []
    for x in tf.trainable_variables():
        if x.name.endswith("/gru_parameters:0"):
            key = x.name[:-len("/gru_parameters:0")]
            fw_params = x
            if "map_embed" in x.name:
                c = cudnn_rnn_ops.CudnnGRU(1, dim, 400)
            elif "chained-out" in x.name:
                c = cudnn_rnn_ops.CudnnGRU(1, dim, dim * 4)
            else:
                c = cudnn_rnn_ops.CudnnGRU(1, dim, dim * 2)
            params_saveable = cudnn_rnn_ops.RNNParamsSaveable(
                c, c.params_to_canonical, c.canonical_to_params, [fw_params],
                key)

            for spec in params_saveable.specs:
                if spec.name.endswith("bias_cudnn 0") or \
                        spec.name.endswith("bias_cudnn 1"):
                    # ??? What do these even do?
                    continue
                name = spec.name.split("/")
                name.remove("cell_0")
                if "forward" in name:
                    ix = name.index("forward")
                    name.insert(ix + 2, "fw")
                else:
                    ix = name.index("backward")
                    name.insert(ix + 2, "bw")
                del name[ix]

                ix = name.index("multi_rnn_cell")
                name[ix] = "bidirectional_rnn"
                name = "/".join(name)
                v = tf.Variable(sess.run(spec.tensor), name=name)
                to_init.append(v)
                to_save.append(v)

        else:
            to_save.append(x)

    other = [
        x for x in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
        if x not in tf.trainable_variables()
    ]
    print(other)
    sess.run(tf.initialize_variables(to_init))
    saver = tf.train.Saver(to_save + other)
    save_dir = join(output_dir, "save")
    if not exists(save_dir):
        mkdir(save_dir)

    saver.save(sess, join(save_dir, "checkpoint"), sess.run(global_step))

    sess.close()
    tf.reset_default_graph()

    print("Updating model...")
    model.embed_mapper.layers = [
        model.embed_mapper.layers[0],
        BiRecurrentMapper(CompatGruCellSpec(dim))
    ]
    model.match_encoder.layers = list(model.match_encoder.layers)
    other = model.match_encoder.layers[1].other
    other.layers = list(other.layers)
    other.layers[1] = BiRecurrentMapper(CompatGruCellSpec(dim))

    pred = model.predictor.predictor
    pred.first_layer = BiRecurrentMapper(CompatGruCellSpec(dim))
    pred.second_layer = BiRecurrentMapper(CompatGruCellSpec(dim))

    with open(join(output_dir, "model.pkl"), "wb") as f:
        pickle.dump(model, f)

    print("Testing...")
    with open(join(output_dir, "model.pkl"), "rb") as f:
        model = pickle.load(f)

    sess = tf.Session()

    model.set_input_spec(
        ParagraphAndQuestionSpec(1, None, None, 14), {"the"},
        ResourceLoader(lambda a, b: {"the": np.zeros(300, np.float32)}))
    pred = model.get_prediction()

    print("Rebuilding")
    saver = tf.train.Saver()
    saver.restore(sess, tf.train.latest_checkpoint(save_dir))

    feed = model.encode([test_questions], False)
    cpu_out = sess.run([pred.start_logits, pred.end_logits], feed_dict=feed)

    print("These should be close:")
    print([np.allclose(a, b) for a, b in zip(cpu_out, cuddn_out)])
    print(cpu_out)
    print(cuddn_out)
Exemplo n.º 17
0
def main():
    print('Starting...')
    model_dir = ModelDir(OPTS.model)
    model = model_dir.get_model()
    tokenizer = NltkAndPunctTokenizer()
    if not isinstance(model, ParagraphQuestionModel):
        raise ValueError(
            "This script is built to work for ParagraphQuestionModel models only"
        )
    if OPTS.reload_vocab:
        loader = ResourceLoader()
    else:
        loader = CachingResourceLoader()
    print('Loading word vectors...')
    model.set_input_spec(ParagraphAndQuestionSpec(batch_size=None),
                         set([',']),
                         word_vec_loader=loader,
                         allow_update=True)
    print('Starting Tensorflow session...')
    sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
    with sess.as_default():
        prediction = model.get_prediction()
        # Take 0-th here because we know we only truncate to one paragraph
        start_logits_tf = prediction.start_logits[0]
        end_logits_tf = prediction.end_logits[0]
        none_logit_tf = prediction.none_logit[0]
        #best_spans_tf, conf_tf = prediction.get_best_span(MAX_SPAN_LENGTH)
    model_dir.restore_checkpoint(sess)
    splitter = Truncate(400)  # NOTE: we truncate past 400 tokens
    selector = TopTfIdf(NltkPlusStopWords(True), n_to_select=5)
    app = bottle.Bottle()

    @app.route('/')
    def index():
        return bottle.template('index')

    @app.route('/post_query', method='post')
    def post_query():
        document_raw = bottle.request.forms.getunicode('document').strip()
        question_raw = bottle.request.forms.getunicode('question').strip()
        document = re.split("\s*\n\s*", document_raw)
        question = tokenizer.tokenize_paragraph_flat(question_raw)
        doc_toks = [tokenizer.tokenize_paragraph(p) for p in document]
        split_doc = splitter.split(doc_toks)
        context = selector.prune(question, split_doc)
        if model.preprocessor is not None:
            context = [
                model.preprocessor.encode_text(question, x) for x in context
            ]
        else:
            context = [flatten_iterable(x.text) for x in context]
        vocab = set(question)
        for txt in context:
            vocab.update(txt)
        data = [
            ParagraphAndQuestion(x, question, None, "user-question%d" % i)
            for i, x in enumerate(context)
        ]
        model.word_embed.update(loader, vocab)
        encoded = model.encode(data, is_train=False)
        start_logits, end_logits, none_logit = sess.run(
            [start_logits_tf, end_logits_tf, none_logit_tf], feed_dict=encoded)
        beam, p_na = logits_to_probs(document_raw,
                                     context[0],
                                     start_logits,
                                     end_logits,
                                     none_logit,
                                     beam_size=BEAM_SIZE)
        return bottle.template('results',
                               document=document_raw,
                               question=question_raw,
                               beam=beam,
                               p_na=p_na)

    cur_dir = os.path.abspath(os.path.dirname(__file__))
    bottle.TEMPLATE_PATH.insert(0, os.path.join(cur_dir, 'views'))
    bottle.run(app, host=OPTS.hostname, port=OPTS.port, debug=OPTS.debug)
Exemplo n.º 18
0
def main():
    parser = argparse.ArgumentParser(description='Run the demo server')
    parser.add_argument(
        'model',
        default=
        "/home/antriv/conversation_ai/ALLENAI_DocumentQA/document-qa/models/triviaqa-unfiltered-shared-norm/best-weights",
        help='Models to use')

    parser.add_argument(
        '-v',
        '--voc',
        default=
        "/home/antriv/conversation_ai/ALLENAI_DocumentQA/document-qa/data/triviaqa/evidence/vocab.txt",
        help='vocab to use, only words from this file will be used')
    parser.add_argument('-t',
                        '--tokens',
                        type=int,
                        default=400,
                        help='Number of tokens to use per paragraph')
    parser.add_argument('--vec_dir',
                        default="/home/antriv/data/glove",
                        help='Location to find word vectors')
    parser.add_argument('--n_paragraphs',
                        type=int,
                        default=15,
                        help="Number of paragraphs to run the model on")
    parser.add_argument('--paragraphs_to_return',
                        type=int,
                        default=10,
                        help="Number of paragraphs return to the frontend")
    parser.add_argument('--span_bound',
                        type=int,
                        default=8,
                        help="Max span size to return as an answer")

    parser.add_argument(
        '--tagme_api_key',
        default="1cdc0067-b2de-4774-afbe-38703b11a365-843339462",
        help="Key to use for TAGME (tagme.d4science.org/tagme)")
    parser.add_argument('--bing_api_key',
                        default="413239df9faa4f1494a914e0c9cec78e",
                        help="Key to use for bing searches")
    parser.add_argument(
        '--bing_version',
        choices=["v5.0", "v7.0"],
        default="v7.0",
        help='Version of Bing API to use (must be compatible with the API key)'
    )
    parser.add_argument(
        '--tagme_thresh',
        default=0.2,
        type=float,
        help="TAGME threshold for when to use the identified docs")
    parser.add_argument('--n_web',
                        type=int,
                        default=10,
                        help='Number of web docs to fetch')
    parser.add_argument('--blacklist_trivia_sites',
                        action="store_true",
                        help="Don't use trivia websites")
    parser.add_argument(
        '-c',
        '--wiki_cache',
        default=
        "/home/antriv/conversation_ai/ALLENAI_DocumentQA/document-qa/data/triviaqa/evidence/wikipedia",
        help="Cache wiki articles in this directory")

    parser.add_argument('--n_dl_threads',
                        type=int,
                        default=5,
                        help="Number of threads to download documents with")
    parser.add_argument('--request_timeout', type=int, default=60)
    parser.add_argument('--download_timeout',
                        type=int,
                        default=25,
                        help="how long to wait before timing out downloads")
    parser.add_argument('--workers',
                        type=int,
                        default=1,
                        help="Number of server workers")
    parser.add_argument('--debug',
                        default=None,
                        choices=["random_model", "dummy_qa"])

    args = parser.parse_args()
    span_bound = args.span_bound
    n_to_return = args.paragraphs_to_return

    if args.tagme_api_key is not None:
        tagme_api_key = args.tagme_api_key
    else:
        tagme_api_key = environ.get("TAGME_API_KEY")

    if args.bing_api_key is not None:
        bing_api_key = args.bing_api_key
    else:
        bing_api_key = environ.get("BING_API_KEY")
        if bing_api_key is None and args.n_web > 0:
            raise ValueError("If n_web > 0 you must give a BING_API_KEY")

    if args.debug is None:
        model = ModelDir(args.model)
    else:
        model = RandomPredictor(5, WithIndicators())

    if args.vec_dir is not None:
        loader = LoadFromPath(args.vec_dir)
    else:
        loader = ResourceLoader()

    # Update Sanic's logging to register our class's loggers
    log_config = LOGGING
    formatter = "%(asctime)s: %(levelname)s: %(message)s"
    log_config["formatters"]['my_formatter'] = {
        'format': formatter,
        'datefmt': '%Y-%m-%d %H:%M:%S',
    }
    log_config['handlers']['stream_handler'] = {
        'class': "logging.StreamHandler",
        'formatter': 'my_formatter',
        'stream': sys.stderr
    }
    log_config['handlers']['file_handler'] = {
        'class': "logging.FileHandler",
        'formatter': 'my_formatter',
        'filename': 'logging.log'
    }

    # It looks like we have to go and name every logger our own code might
    # use in order to register it with Sanic
    log_config["loggers"]['qa_system'] = {
        'level': 'INFO',
        'handlers': ['stream_handler', 'file_handler'],
    }
    log_config["loggers"]['downloader'] = {
        'level': 'INFO',
        'handlers': ['stream_handler', 'file_handler'],
    }
    log_config["loggers"]['server'] = {
        'level': 'INFO',
        'handlers': ['stream_handler', 'file_handler'],
    }

    app = Sanic()
    app.config.REQUEST_TIMEOUT = args.request_timeout
    log = logging.getLogger('server')

    @app.listener('before_server_start')
    async def setup_qa(app, loop):
        # To play nice with iohttp's async ClientSession objects, we need to construct the QaSystem
        # inside the event loop.
        if args.debug == "dummy_qa":
            qa = DummyQa()
        else:
            qa = QaSystem(
                args.wiki_cache,
                MergeParagraphs(args.tokens),
                ShallowOpenWebRanker(args.n_paragraphs),
                args.voc,
                model,
                loader,
                bing_api_key,
                bing_version=args.bing_version,
                tagme_api_key=tagme_api_key,
                n_dl_threads=args.n_dl_threads,
                blacklist_trivia_sites=args.blacklist_trivia_sites,
                download_timeout=args.download_timeout,
                span_bound=span_bound,
                tagme_threshold=None if
                (tagme_api_key is None) else args.tagme_thresh,
                n_web_docs=args.n_web,
            )
        app.qa = qa

    @app.listener('after_server_stop')
    async def setup_qa(app, loop):
        app.qa.close()

    @app.route("/answer")
    async def answer(request):
        try:
            question = request.args["question"][0]
            if question == "":
                return response.json({'message': 'No question given'},
                                     status=400)
            spans, paras = await app.qa.answer_question(question)
            answers = select_answers(paras, spans, span_bound, 10)
            answers = answers[:n_to_return]
            best_span = max(answers[0].answers, key=lambda x: x.conf)
            log.info("Answered \"%s\" (with web search): \"%s\"", question,
                     answers[0].original_text[best_span.start:best_span.end])
            return json([x.to_json() for x in answers])
        except Exception as e:
            log.info("Error: " + str(e))
            raise ServerError(e, status_code=500)

    @app.route('/answer-from', methods=['POST'])
    async def answer_from(request):
        try:
            args = ujson.loads(request.body.decode("utf-8"))
            question = args.get("question")
            if question is None or question == "":
                return response.json({'message': 'No question given'},
                                     status=400)
            doc = args["document"]
            if len(doc) > 500000:
                raise ServerError("Document too large", status_code=400)
            spans, paras = app.qa.answer_with_doc(question, doc)
            answers = select_answers(paras, spans, span_bound, 10)
            answers = answers[:n_to_return]
            best_span = max(answers[0].answers, key=lambda x: x.conf)
            log.info("Answered \"%s\" (with user doc): \"%s\"", question,
                     answers[0].original_text[best_span.start:best_span.end])
            return json([x.to_json() for x in answers])
        except Exception as e:
            log.info("Error: " + str(e))
            raise ServerError(e, status_code=500)

    app.static('/', './docqa//server/static/index.html')
    app.static('/about.html', './docqa/server/static/about.html')
    app.run(host="0.0.0.0",
            port=5000,
            workers=args.workers,
            debug=False,
            log_config=LOGGING)
def main():
    parser = argparse.ArgumentParser(
        description='Evaluate a model on document-level SQuAD')
    parser.add_argument('model', help='model to use')
    parser.add_argument(
        'output',
        type=str,
        help="Store the per-paragraph results in csv format in this file")
    parser.add_argument('-n',
                        '--n_sample',
                        type=int,
                        default=None,
                        help="(for testing) sample documents")
    parser.add_argument(
        '-s',
        '--async',
        type=int,
        default=10,
        help="Encoding batch asynchronously, queueing up to this many")
    parser.add_argument('-a',
                        '--answer_bound',
                        type=int,
                        default=17,
                        help="Max answer span length")
    parser.add_argument('-p',
                        '--n_paragraphs',
                        type=int,
                        default=None,
                        help="Max number of paragraphs to use")
    parser.add_argument(
        '-b',
        '--batch_size',
        type=int,
        default=200,
        help="Batch size, larger sizes can be faster but uses more memory")
    parser.add_argument('-c',
                        '--corpus',
                        choices=["dev", "train", "doc-rd-dev"],
                        default="dev")
    parser.add_argument('--no_ema',
                        action="store_true",
                        help="Don't use EMA weights even if they exist")
    args = parser.parse_args()

    model_dir = ModelDir(args.model)
    print("Loading data")

    questions = []
    ranker = SquadTfIdfRanker(NltkPlusStopWords(True),
                              args.n_paragraphs,
                              force_answer=False)

    if args.corpus == "doc-rd-dev":
        docs = SquadCorpus().get_dev()
        if args.n_sample is not None:
            docs.sort(key=lambda x: x.doc_id)
            np.random.RandomState(0).shuffle(docs)
            docs = docs[:args.n_sample]

        print("Fetching document reader docs...")
        doc_rd_versions = get_doc_rd_doc(docs)
        print("Ranking and matching with questions...")
        for doc in tqdm(docs):
            doc_questions = flatten_iterable(x.questions
                                             for x in doc.paragraphs)
            paragraphs = doc_rd_versions[doc.title]
            ranks = ranker.rank([x.words for x in doc_questions],
                                [x.text for x in paragraphs])
            for i, question in enumerate(doc_questions):
                para_ranks = np.argsort(ranks[i])
                for para_rank, para_num in enumerate(
                        para_ranks[:args.n_paragraphs]):
                    # Just use dummy answers spans for these pairs
                    questions.append(
                        RankedParagraphQuestion(
                            question.words,
                            TokenSpans(question.answer.answer_text,
                                       np.zeros((0, 2), dtype=np.int32)),
                            question.question_id, paragraphs[para_num],
                            para_rank, para_num))
        rl = ResourceLoader()
    else:
        if args.corpus == "dev":
            docs = SquadCorpus().get_dev()
        else:
            docs = SquadCorpus().get_train()
        rl = SquadCorpus().get_resource_loader()

        if args.n_sample is not None:
            docs.sort(key=lambda x: x.doc_id)
            np.random.RandomState(0).shuffle(docs)
            docs = docs[:args.n_sample]

        for q in ranker.ranked_questions(docs):
            for i, p in enumerate(q.paragraphs):
                questions.append(
                    RankedParagraphQuestion(
                        q.question, TokenSpans(q.answer_text, p.answer_spans),
                        q.question_id,
                        ParagraphWithInverse([p.text], p.original_text,
                                             p.spans), i, p.paragraph_num))

    print("Split %d docs into %d paragraphs" % (len(docs), len(questions)))

    questions = sorted(questions,
                       key=lambda x: (x.n_context_words, len(x.question)),
                       reverse=True)
    for q in questions:
        if len(q.answer.answer_spans.shape) != 2:
            raise ValueError()

    checkpoint = model_dir.get_best_weights()
    if checkpoint is not None:
        print("Using best weights")
    else:
        print("Using latest checkpoint")
        checkpoint = model_dir.get_latest_checkpoint()
        if checkpoint is None:
            raise ValueError("No checkpoints found")

    data = ParagraphAndQuestionDataset(
        questions, FixedOrderBatcher(args.batch_size, True))

    model = model_dir.get_model()
    evaluation = trainer.test(
        model, [RecordParagraphSpanPrediction(args.answer_bound, True)],
        {args.corpus: data}, rl, checkpoint, not args.no_ema,
        args. async)[args.corpus]

    print("Saving result")
    output_file = args.output

    df = pd.DataFrame(evaluation.per_sample)

    df.sort_values(["question_id", "rank"], inplace=True, ascending=True)
    group_by = ["question_id"]
    f1 = compute_ranked_scores(df, "predicted_score", "text_f1", group_by)
    em = compute_ranked_scores(df, "predicted_score", "text_em", group_by)
    table = [["N Paragraphs", "EM", "F1"]]
    table += list([str(i + 1), "%.4f" % e, "%.4f" % f]
                  for i, (e, f) in enumerate(zip(em, f1)))
    print_table(table)

    df.to_csv(output_file, index=False)
Exemplo n.º 20
0
def run():
    parser = argparse.ArgumentParser()
    parser.add_argument("squad_path", help="path to squad dev data file")
    parser.add_argument("output_path",
                        help="path where evaluation json file will be written")
    parser.add_argument("--model-path",
                        default="model",
                        help="path to model directory")
    parser.add_argument("--n", type=int, default=None)
    parser.add_argument("-b", "--batch_size", type=int, default=100)
    parser.add_argument("--ema", action="store_true")
    args = parser.parse_args()

    squad_path = args.squad_path
    output_path = args.output_path
    model_dir = ModelDir(args.model_path)
    nltk.data.path.append("nltk_data")

    print("Loading data")
    docs = parse_squad_data(squad_path, "", NltkAndPunctTokenizer(), False)
    pairs = split_docs(docs)
    dataset = ParagraphAndQuestionDataset(
        pairs, ClusteredBatcher(args.batch_size, ContextLenKey(), False, True))

    print("Done, init model")
    model = model_dir.get_model()
    loader = ResourceLoader(lambda a, b: load_word_vector_file(
        join(VEC_DIR, "glove.840B.300d.txt"), b))
    lm_model = model.lm_model
    basedir = join(LM_DIR, "squad-context-concat-skip")
    lm_model.lm_vocab_file = join(basedir,
                                  "squad_train_dev_all_unique_tokens.txt")
    lm_model.options_file = join(
        basedir, "options_squad_lm_2x4096_512_2048cnn_2xhighway_skip.json")
    lm_model.weight_file = join(
        basedir,
        "squad_context_concat_lm_2x4096_512_2048cnn_2xhighway_skip.hdf5")
    lm_model.embed_weights_file = None

    model.set_inputs([dataset], loader)

    print("Done, building graph")
    sess = tf.Session()
    with sess.as_default():
        pred = model.get_prediction()
    best_span = pred.get_best_span(17)[0]

    all_vars = tf.global_variables() + tf.get_collection(
        tf.GraphKeys.SAVEABLE_OBJECTS)
    dont_restore_names = {
        x.name
        for x in all_vars if x.name.startswith("bilm")
    }
    print(sorted(dont_restore_names))
    vars = [x for x in all_vars if x.name not in dont_restore_names]

    print("Done, loading weights")
    checkpoint = model_dir.get_best_weights()
    if checkpoint is None:
        print("Loading most recent checkpoint")
        checkpoint = model_dir.get_latest_checkpoint()
    else:
        print("Loading best weights")

    saver = tf.train.Saver(vars)
    saver.restore(sess, checkpoint)

    if args.ema:
        ema = tf.train.ExponentialMovingAverage(0)
        saver = tf.train.Saver(
            {ema.average_name(x): x
             for x in tf.trainable_variables()})
        saver.restore(sess, checkpoint)

    sess.run(
        tf.variables_initializer(
            [x for x in all_vars if x.name in dont_restore_names]))

    print("Done, starting evaluation")
    out = {}
    for i, batch in enumerate(dataset.get_epoch()):
        if args.n is not None and i == args.n:
            break
        print("On batch: %d" % (i + 1))
        enc = model.encode(batch, False)
        spans = sess.run(best_span, feed_dict=enc)
        for (s, e), point in zip(spans, batch):
            out[point.question_id] = point.get_original_text(s, e)

    sess.close()

    print("Done, saving")
    with open(output_path, "w") as f:
        json.dump(out, f)

    print("Mission accomplished!")
Exemplo n.º 21
0
 def get_resource_loader(self):
     return ResourceLoader()
Exemplo n.º 22
0
def run():
    parser = argparse.ArgumentParser()
    parser.add_argument("input_data")
    parser.add_argument("output_data")

    parser.add_argument("--plot_dir", type=str, default=None)

    parser.add_argument("--model_dir", type=str, default="/tmp/model/document-qa")
    parser.add_argument("--lm_dir", type=str, default="/home/castle/data/lm/squad-context-concat-skip")
    parser.add_argument("--glove_dir", type=str, default="/home/castle/data/glove")

    parser.add_argument("--n", type=int, default=None)
    parser.add_argument("-b", "--batch_size", type=int, default=30)
    parser.add_argument("--ema", action="store_true")
    args = parser.parse_args()

    input_data = args.input_data
    output_path = args.output_data
    model_dir = ModelDir(args.model_dir)
    nltk.data.path.append("nltk_data")

    print("Loading data")
    docs = parse_squad_data(input_data, "", NltkAndPunctTokenizer(), False)
    pairs = split_docs(docs)
    dataset = ParagraphAndQuestionDataset(pairs, ClusteredBatcher(args.batch_size, ContextLenKey(), False, True))

    print("Done, init model")
    model = model_dir.get_model()
    # small hack, just load the vector file at its expected location rather then using the config location
    loader = ResourceLoader(lambda a, b: load_word_vector_file(join(args.glove_dir, "glove.840B.300d.txt"), b))
    lm_model = model.lm_model
    basedir = args.lm_dir
    plotdir = args.plot_dir

    lm_model.lm_vocab_file = join(basedir, "squad_train_dev_all_unique_tokens.txt")
    lm_model.options_file = join(basedir, "options_squad_lm_2x4096_512_2048cnn_2xhighway_skip.json")
    lm_model.weight_file = join(basedir, "squad_context_concat_lm_2x4096_512_2048cnn_2xhighway_skip.hdf5")
    lm_model.embed_weights_file = None

    model.set_inputs([dataset], loader)

    print("Done, building graph")
    sess = tf.Session()
    with sess.as_default():
        pred = model.get_prediction()
    best_span = pred.get_best_span(17)[0]

    if plotdir != None:
        start_logits_op, end_logits_op = pred.get_logits()

    all_vars = tf.global_variables() + tf.get_collection(tf.GraphKeys.SAVEABLE_OBJECTS)
    dont_restore_names = {x.name for x in all_vars if x.name.startswith("bilm")}
    print(sorted(dont_restore_names))
    vars = [x for x in all_vars if x.name not in dont_restore_names]

    print("Done, loading weights")
    checkpoint = model_dir.get_best_weights()
    if checkpoint is None:
        print("Loading most recent checkpoint")
        checkpoint = model_dir.get_latest_checkpoint()
    else:
        print("Loading best weights")

    saver = tf.train.Saver(vars)
    saver.restore(sess, checkpoint)

    if args.ema:
        ema = tf.train.ExponentialMovingAverage(0)
        saver = tf.train.Saver({ema.average_name(x): x for x in tf.trainable_variables()})
        saver.restore(sess, checkpoint)

    sess.run(tf.variables_initializer([x for x in all_vars if x.name in dont_restore_names]))

    print("Done, starting evaluation")
    out = {}
    for i, batch in enumerate(dataset.get_epoch()):
        if args.n is not None and i == args.n:
            break
        print("On batch size [%d], now in %d th batch" % (args.batch_size, i +1))
        enc = model.encode(batch, False)
        if plotdir != None:
            spans, start_logits, end_logits = sess.run([best_span, start_logits_op, end_logits_op], feed_dict=enc)
            for bi, point in enumerate(batch):
                q = ' '.join(point.question)
                c = point.paragraph.get_context()
                gt = ' | '.join(point.answer.answer_text)
                s, e = spans[bi]
                pred = point.get_original_text(s, e)
                start_dist = start_logits[bi]
                end_dist = end_logits[bi]
                c_interval = np.arange(0.0, start_dist.shape[0], 1)
                c_label = c
                plt.figure(1)
                plt.subplot(211)
                plt.plot(c_interval, start_dist, color='r')
                plt.title("Q : " + q + " // A : " + gt, fontsize=9)
                plt.text(0, 0, r'Predict : %s [%d:%d]' % (pred, s, e), color='b')
                axes = plt.gca()
                axes.set_ylim([-20, 20])

                plt.subplot(212)
                plt.plot(c_interval, end_dist, color='g')
                plt.xticks(c_interval, c_label, rotation=90, fontsize=5)
                axes = plt.gca()
                axes.set_ylim([-20, 20])
                plt.show()

            break
        else:
            spans = sess.run(best_span, feed_dict=enc)

        for (s, e), point in zip(spans, batch):
            out[point.question_id] = point.get_original_text(s, e)

    sess.close()

    print("Done, saving")
    with open(output_path, "w") as f:
        json.dump(out, f)

    print("Mission accomplished!")