def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("model")
    args = parser.parse_args()

    model_dir = ModelDir(args.model)
    checkpoint = model_dir.get_best_weights()
    reader = tf.train.NewCheckpointReader(checkpoint)

    if reader.has_tensor("weight_embed_context_lm/layer_0/w"):
        x = "w"
    else:
        x = "ELMo_W_0"

    for i in reader.get_variable_to_shape_map().items():
        print(i)

    input_w = reader.get_tensor(
        "weight_embed_lm/layer_0/%s/ExponentialMovingAverage" % x)
    output_w = reader.get_tensor(
        "weight_lm/layer_0/%s/ExponentialMovingAverage" % x)

    print("Input")
    print(input_w)
    print("(Softmax): " + str(softmax(input_w)))

    print("Output")
    print(output_w)
    print("(Softmax): " + str(softmax(output_w)))
Exemplo n.º 2
0
def resume_training(out: ModelDir,
                    notes: str = None,
                    dry_run=False,
                    start_eval=False):
    """ Resume training an existing model """

    train_params = out.get_last_train_params()
    model = out.get_model()

    train_data = train_params["data"]

    evaluators = train_params["evaluators"]
    params = train_params["train_params"]
    params.num_epochs = 24 * 3

    if isinstance(train_data, PreprocessedData):
        # TODO don't hard code # of processes
        train_data.preprocess(6, 1000)

    latest = tf.train.latest_checkpoint(out.save_dir)
    if latest is None:
        raise ValueError("No checkpoint to resume from found in " +
                         out.save_dir)

    _train(model, train_data, latest, None, False, params, evaluators, out,
           notes, dry_run, start_eval)
Exemplo n.º 3
0
def main():
  print('Starting...')
  model_dir = ModelDir(OPTS.model)
  model = model_dir.get_model()
  if not isinstance(model, ParagraphQuestionModel):
    raise ValueError("This script is built to work for ParagraphQuestionModel models only")
  input_data, vocab = read_input_data(model)

  print('Loading word vectors...')
  model.set_input_spec(ParagraphAndQuestionSpec(batch_size=None), vocab)

  print('Starting Tensorflow session...')
  sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
  with sess.as_default():
    prediction = model.get_prediction()
    # Take 0-th here because we know we only truncate to one paragraph
    start_logits_tf = prediction.start_logits[0]
    end_logits_tf = prediction.end_logits[0]
    none_logit_tf = prediction.none_logit[0]
    context_rep_tf = model.context_rep[0]
    m1_tf = model.predictor.m1[0]
    m2_tf = model.predictor.m2[0]
  model_dir.restore_checkpoint(sess)

  with open(OPTS.output_file, 'w') as f:
    for doc_raw, q_raw, context, ex in tqdm(input_data):
      encoded = model.encode(ex, is_train=False)
      start_logits, end_logits, none_logit, context_rep, m1, m2 = sess.run(
          [start_logits_tf, end_logits_tf, none_logit_tf, context_rep_tf,
           m1_tf, m2_tf],
          feed_dict=encoded)
      beam, p_na = logits_to_probs(
          doc_raw, context[0], start_logits, end_logits, none_logit,
          beam_size=OPTS.beam_size)
      inputs = [context_rep, m1, m2]
      vec = np.concatenate([np.amax(x, axis=0) for x in inputs] +
                           [np.amin(x, axis=0) for x in inputs] +
                           [np.mean(x, axis=0) for x in inputs])
      #span_logits = np.add.outer(start_logits, end_logits)
      #all_logits = np.concatenate((np.array([none_logit]), span_logits.flatten()))
      #log_partition = scipy.special.logsumexp(all_logits)
      #vec = np.concatenate([
      #    np.amax(context_rep, axis=0),
      #    np.amin(context_rep, axis=0),
      #    np.mean(context_rep, axis=0),
      #    [np.amax(start_logits), scipy.special.logsumexp(start_logits),
      #     np.amax(end_logits), scipy.special.logsumexp(end_logits),
      #     none_logit, log_partition] 
      #])
      out_obj = {'paragraph': doc_raw, 'question': q_raw,
                 'beam': beam, 'p_na': p_na}
      if not OPTS.no_vec:
        out_obj['vec'] = vec.tolist()
      print(json.dumps(out_obj), file=f)
def init():
    global model, model_dir
    print("Loading Model...")
    # Load the model
    model_dir = ModelDir(
        "pretrained_models/models/triviaqa-unfiltered-shared-norm")
    model = model_dir.get_model()
    if not isinstance(model, ParagraphQuestionModel):
        raise ValueError(
            "This script is built to work for ParagraphQuestionModel models only"
        )
Exemplo n.º 5
0
def main():
    print('Starting...')
    model_dir = ModelDir(OPTS.model)
    model = model_dir.get_model()
    if isinstance(model, ParagraphQuestionModel):
        run_paragraph_model(model_dir, model)
    elif isinstance(model, ElmoQaModel):
        run_elmo_model(model_dir, model)
    else:
        raise ValueError(
            "This script is built to work for ParagraphQuestionModel and ElmoQaModel models only"
        )
def main():
    parser = argparse.ArgumentParser(description='')
    parser.add_argument('name', help='name of output to exmaine')
    parser.add_argument('--eval', "-e", action="store_true")
    args = parser.parse_args()

    resume_training(ModelDir(args.name), start_eval=args.eval)
Exemplo n.º 7
0
def resume_training(model_to_resume: str,
                    dataset_oversampling: Dict[str, int],
                    checkpoint: Optional[str] = None,
                    epochs: Optional[int] = None):
    """Resume training on a partially trained model (or finetune an existing model)


    :param model_to_resume: path to the model directory of the model to resume training
    :param dataset_oversampling: dictionary mapping dataset names to integer counts of how much
       to oversample them
    :param checkpoint: optional string to specify which checkpoint to resume from. Uses the latest
         if not specified
    :param epochs: Optional int specifying how many epochs to train for. If not detailed, runs for 24
    """
    out = ModelDir(model_to_resume)
    train_params = out.get_last_train_params()
    evaluators = train_params["evaluators"]
    params = train_params["train_params"]
    params.num_epochs = epochs if epochs is not None else 24
    model = out.get_model()

    notes = None
    dry_run = False
    data = prepare_data(model, TrainConfig(), dataset_oversampling)
    if checkpoint is None:
        checkpoint = tf.train.latest_checkpoint(out.save_dir)

    _train_async(model=model,
                 data=data,
                 checkpoint=checkpoint,
                 parameter_checkpoint=None,
                 save_start=False,
                 train_params=params,
                 evaluators=evaluators,
                 out=out,
                 notes=notes,
                 dry_run=dry_run,
                 start_eval=False)
Exemplo n.º 8
0
def convert_model_pickle(model_dir, output_dir):
    print("Updating model...")
    md = ModelDir(model_dir)
    model = md.get_model()
    # remove the lm models word embeddings - cpu model will use Char-CNN
    model.lm_model.embed_weights_file = None
    dim = model.embed_mapper.layers[1].n_units

    model.embed_mapper.layers = [
        model.embed_mapper.layers[0],
        BiRecurrentMapper(CompatGruCellSpec(dim)), model.embed_mapper.layers[2]
    ]
    model.match_encoder.layers = list(model.match_encoder.layers)
    other = model.match_encoder.layers[1].other
    other.layers = list(other.layers)
    other.layers[1] = BiRecurrentMapper(CompatGruCellSpec(dim))

    pred = model.predictor.predictor
    pred.first_layer = BiRecurrentMapper(CompatGruCellSpec(dim))
    pred.second_layer = BiRecurrentMapper(CompatGruCellSpec(dim))

    with open(join(output_dir, "model.pkl"), "wb") as f:
        pickle.dump(model, f)
def main():
    parser = argparse.ArgumentParser(description='')
    parser.add_argument("model")
    args = parser.parse_args()

    model_dir = ModelDir(args.model)
    checkpoint = model_dir.get_best_weights()
    print(checkpoint)
    if checkpoint is None:
        print("Show latest checkpoint")
        checkpoint = model_dir.get_latest_checkpoint()
    else:
        print("Show best weights")

    reader = tf.train.NewCheckpointReader(checkpoint)
    param_map = reader.get_variable_to_shape_map()
    total = 0
    for k in sorted(param_map):
        v = param_map[k]
        print('%s: %s' % (k, str(v)))
        total += np.prod(v)

    print("%d total" % total)
Exemplo n.º 10
0
def resume_training_with(data: TrainingData,
                         out: ModelDir,
                         train_params: TrainParams,
                         evaluators: List[Evaluator],
                         notes: str = None,
                         dry_run: bool = False):
    """ Resume training an existing model with the specified parameters """
    with open(join(out.dir, "model.pkl"), "rb") as f:
        model = pickle.load(f)
    latest = out.get_latest_checkpoint()
    if latest is None:
        raise ValueError("No checkpoint to resume from found in " +
                         out.save_dir)

    _train(model, data, latest, None, False, train_params, evaluators, out,
           notes, dry_run)
Exemplo n.º 11
0
def main():
    parser = argparse.ArgumentParser(
        description='Train a model on document-level SQuAD')
    parser.add_argument(
        'mode',
        choices=["paragraph", "confidence", "shared-norm", "merge", "sigmoid"])
    parser.add_argument("name", help="Output directory")
    args = parser.parse_args()
    mode = args.mode
    out = args.name + "-" + datetime.now().strftime("%m%d-%H%M%S")

    corpus = SquadCorpus()
    if mode == "merge":
        # Adds paragraph start tokens, since we will be concatenating paragraphs together
        pre = WithIndicators(True, para_tokens=False, doc_start_token=False)
    else:
        pre = None

    # model = get_model(50, 100, args.mode, pre)
    tmp = ModelDir("models/squad-shared-norm")
    model = tmp.get_model()

    if mode == "paragraph":
        # Run in the "standard" known-paragraph setting
        if model.preprocessor is not None:
            raise NotImplementedError()
        n_epochs = 26

        train_batching = ClusteredBatcher(45, ContextLenBucketedKey(3), True,
                                          False)
        eval_batching = ClusteredBatcher(45, ContextLenKey(), False, False)
        data = DocumentQaTrainingData(corpus, None, train_batching,
                                      eval_batching)
        eval = [LossEvaluator(), SpanEvaluator(bound=[17], text_eval="squad")]
    else:
        eval_set_mode = {
            "confidence": "flatten",
            "sigmoid": "flatten",
            "shared-norm": "group",
            "merge": "merge"
        }[mode]
        eval_dataset = RandomParagraphSetDatasetBuilder(
            100, eval_set_mode, True, 0)

        if mode == "confidence" or mode == "sigmoid":
            if mode == "sigmoid":
                # needs to be trained for a really long time for reasons unknown, even this might be too small
                n_epochs = 100
            else:
                n_epochs = 50  # more epochs since we only "see" the label very other epoch-osh
            train_batching = ClusteredBatcher(45, ContextLenBucketedKey(3),
                                              True, False)
            data = PreprocessedData(
                SquadCorpus(),
                SquadTfIdfRanker(NltkPlusStopWords(True), 4, True,
                                 model.preprocessor),
                StratifyParagraphsBuilder(train_batching, 1),
                eval_dataset,
                eval_on_verified=False,
            )
        else:
            n_epochs = 26
            data = PreprocessedData(
                SquadCorpus(),
                SquadTfIdfRanker(NltkPlusStopWords(True), 4, True,
                                 model.preprocessor),
                StratifyParagraphSetsBuilder(25, args.mode == "merge", True,
                                             1),
                eval_dataset,
                eval_on_verified=False,
            )

        eval = [LossEvaluator(), MultiParagraphSpanEvaluator(17, "squad")]
        data.preprocess(1)

    with open(__file__, "r") as f:
        notes = f.read()
        notes = args.mode + "\n" + notes

    params = train_params(n_epochs)
    if mode == "paragraph":
        params.best_weights = ("dev", "b17/text-f1")

    trainer.start_training(data,
                           model,
                           params,
                           eval,
                           model_dir.ModelDir(out),
                           notes,
                           initialize_from=tmp.get_best_weights())
Exemplo n.º 12
0
def main():
    parser = argparse.ArgumentParser(description='Evaluate a model on SQuAD')
    parser.add_argument('model', help='model directory to evaluate')
    parser.add_argument("-o", "--official_output", type=str,
                        help="where to output an official result file")
    parser.add_argument('-n', '--sample_questions', type=int, default=None,
                        help="(for testing) run on a subset of questions")
    parser.add_argument('--answer_bounds', nargs='+', type=int, default=[17],
                        help="Max size of answer")
    parser.add_argument('-b', '--batch_size', type=int, default=200,
                        help="Batch size, larger sizes can be faster but uses more memory")
    parser.add_argument('-s', '--step', default=None,
                        help="Weights to load, can be a checkpoint step or 'latest'")
    # Add ja_test choice to test Multilingual QA dataset.
    parser.add_argument(
        '-c', '--corpus', choices=["dev", "train", "ja_test", "pred"], default="dev")
    parser.add_argument('--no_ema', action="store_true",
                        help="Don't use EMA weights even if they exist")
    # Add ja_test choice to test Multilingual QA pipeline.
    parser.add_argument('-p', '--pred_filepath', default=None,
                        help="The csv file path if you try pred mode")
    args = parser.parse_args()

    model_dir = ModelDir(args.model)

    corpus = SquadCorpus()
    if args.corpus == "dev":
        questions = corpus.get_dev()
    # Add ja_test choice to test Multilingual QA pipeline.
    elif args.corpus == "ja_test":
        questions = corpus.get_ja_test()
    # This is for prediction mode for MLQA pipeline.
    elif args.corpus == "pred":
        questions = create_pred_dataset(args.pred_filepath)
    else:
        questions = corpus.get_train()
    questions = split_docs(questions)

    if args.sample_questions:
        np.random.RandomState(0).shuffle(
            sorted(questions, key=lambda x: x.question_id))
        questions = questions[:args.sample_questions]

    questions.sort(key=lambda x: x.n_context_words, reverse=True)
    dataset = ParagraphAndQuestionDataset(
        questions, FixedOrderBatcher(args.batch_size, True))

    evaluators = [SpanEvaluator(args.answer_bounds, text_eval="squad")]
    if args.official_output is not None:
        evaluators.append(RecordSpanPrediction(args.answer_bounds[0]))

    if args.step is not None:
        if args.step == "latest":
            checkpoint = model_dir.get_latest_checkpoint()
        else:
            checkpoint = model_dir.get_checkpoint(int(args.step))
    else:
        checkpoint = model_dir.get_best_weights()
        if checkpoint is not None:
            print("Using best weights")
        else:
            print("Using latest checkpoint")
            checkpoint = model_dir.get_latest_checkpoint()

    model = model_dir.get_model()

    evaluation = trainer.test(model, evaluators, {args.corpus: dataset},
                              corpus.get_resource_loader(), checkpoint, not args.no_ema)[args.corpus]

    # Print the scalar results in a two column table
    scalars = evaluation.scalars
    cols = list(sorted(scalars.keys()))
    table = [cols]
    header = ["Metric", ""]
    table.append([("%s" % scalars[x] if x in scalars else "-") for x in cols])
    print_table([header] + transpose_lists(table))

    # Save the official output
    if args.official_output is not None:
        quid_to_para = {}
        for x in questions:
            quid_to_para[x.question_id] = x.paragraph

        q_id_to_answers = {}
        q_ids = evaluation.per_sample["question_id"]
        spans = evaluation.per_sample["predicted_span"]
        for q_id, (start, end) in zip(q_ids, spans):
            text = quid_to_para[q_id].get_original_text(start, end)
            q_id_to_answers[q_id] = text

        with open(args.official_output, "w") as f:
            json.dump(q_id_to_answers, f)
Exemplo n.º 13
0
def run():
    parser = argparse.ArgumentParser()
    parser.add_argument("squad_path", help="path to squad dev data file")
    parser.add_argument("output_path",
                        help="path where evaluation json file will be written")
    parser.add_argument("--model-path",
                        default="model",
                        help="path to model directory")
    parser.add_argument("--n", type=int, default=None)
    parser.add_argument("-b", "--batch_size", type=int, default=100)
    parser.add_argument("--ema", action="store_true")
    args = parser.parse_args()

    squad_path = args.squad_path
    output_path = args.output_path
    model_dir = ModelDir(args.model_path)
    nltk.data.path.append("nltk_data")

    print("Loading data")
    docs = parse_squad_data(squad_path, "", NltkAndPunctTokenizer(), False)
    pairs = split_docs(docs)
    dataset = ParagraphAndQuestionDataset(
        pairs, ClusteredBatcher(args.batch_size, ContextLenKey(), False, True))

    print("Done, init model")
    model = model_dir.get_model()
    loader = ResourceLoader(lambda a, b: load_word_vector_file(
        join(VEC_DIR, "glove.840B.300d.txt"), b))
    lm_model = model.lm_model
    basedir = join(LM_DIR, "squad-context-concat-skip")
    lm_model.lm_vocab_file = join(basedir,
                                  "squad_train_dev_all_unique_tokens.txt")
    lm_model.options_file = join(
        basedir, "options_squad_lm_2x4096_512_2048cnn_2xhighway_skip.json")
    lm_model.weight_file = join(
        basedir,
        "squad_context_concat_lm_2x4096_512_2048cnn_2xhighway_skip.hdf5")
    lm_model.embed_weights_file = None

    model.set_inputs([dataset], loader)

    print("Done, building graph")
    sess = tf.Session()
    with sess.as_default():
        pred = model.get_prediction()
    best_span = pred.get_best_span(17)[0]

    all_vars = tf.global_variables() + tf.get_collection(
        tf.GraphKeys.SAVEABLE_OBJECTS)
    dont_restore_names = {
        x.name
        for x in all_vars if x.name.startswith("bilm")
    }
    print(sorted(dont_restore_names))
    vars = [x for x in all_vars if x.name not in dont_restore_names]

    print("Done, loading weights")
    checkpoint = model_dir.get_best_weights()
    if checkpoint is None:
        print("Loading most recent checkpoint")
        checkpoint = model_dir.get_latest_checkpoint()
    else:
        print("Loading best weights")

    saver = tf.train.Saver(vars)
    saver.restore(sess, checkpoint)

    if args.ema:
        ema = tf.train.ExponentialMovingAverage(0)
        saver = tf.train.Saver(
            {ema.average_name(x): x
             for x in tf.trainable_variables()})
        saver.restore(sess, checkpoint)

    sess.run(
        tf.variables_initializer(
            [x for x in all_vars if x.name in dont_restore_names]))

    print("Done, starting evaluation")
    out = {}
    for i, batch in enumerate(dataset.get_epoch()):
        if args.n is not None and i == args.n:
            break
        print("On batch: %d" % (i + 1))
        enc = model.encode(batch, False)
        spans = sess.run(best_span, feed_dict=enc)
        for (s, e), point in zip(spans, batch):
            out[point.question_id] = point.get_original_text(s, e)

    sess.close()

    print("Done, saving")
    with open(output_path, "w") as f:
        json.dump(out, f)

    print("Mission accomplished!")
Exemplo n.º 14
0
def main():
    parser = argparse.ArgumentParser(description='Evaluate a model on SQuAD')
    parser.add_argument('model', help='model directory to evaluate')
    parser.add_argument("-o", "--official_output", type=str, help="where to output an official result file")
    parser.add_argument('-n', '--sample_questions', type=int, default=None,
                        help="(for testing) run on a subset of questions")
    parser.add_argument('--answer_bounds', nargs='+', type=int, default=[17],
                        help="Max size of answer")
    parser.add_argument('-b', '--batch_size', type=int, default=200,
                        help="Batch size, larger sizes can be faster but uses more memory")
    parser.add_argument('-s', '--step', default=None,
                        help="Weights to load, can be a checkpoint step or 'latest'")
    parser.add_argument('-c', '--corpus', choices=["dev", "train"], default="dev")
    parser.add_argument('--no_ema', action="store_true", help="Don't use EMA weights even if they exist")
    parser.add_argument('--none_prob', action="store_true", help="Output none probability for samples")
    parser.add_argument('--elmo', action="store_true", help="Use elmo model")
    parser.add_argument('--per_question_loss_file', type=str, default=None,
            help="Run question by question and output a question_id -> loss output to this file")
    args = parser.parse_known_args()[0]

    model_dir = ModelDir(args.model)

    corpus = SquadCorpus()
    if args.corpus == "dev":
        questions = corpus.get_dev()
    else:
        questions = corpus.get_train()
    questions = split_docs(questions)

    if args.sample_questions:
        np.random.RandomState(0).shuffle(sorted(questions, key=lambda x: x.question_id))
        questions = questions[:args.sample_questions]

    questions.sort(key=lambda x:x.n_context_words, reverse=True)
    dataset = ParagraphAndQuestionDataset(questions, FixedOrderBatcher(args.batch_size, True))

    evaluators = [SpanEvaluator(args.answer_bounds, text_eval="squad")]
    if args.official_output is not None:
        evaluators.append(RecordSpanPrediction(args.answer_bounds[0]))
    if args.per_question_loss_file is not None:
        evaluators.append(RecordSpanPredictionScore(args.answer_bounds[0], args.batch_size, args.none_prob))

    if args.step is not None:
        if args.step == "latest":
            checkpoint = model_dir.get_latest_checkpoint()
        else:
            checkpoint = model_dir.get_checkpoint(int(args.step))
    else:
        checkpoint = model_dir.get_best_weights()
        if checkpoint is not None:
            print("Using best weights")
        else:
            print("Using latest checkpoint")
            checkpoint = model_dir.get_latest_checkpoint()

    model = model_dir.get_model()
    if args.elmo:
        model.lm_model.lm_vocab_file = './elmo-params/squad_train_dev_all_unique_tokens.txt'
        model.lm_model.options_file = './elmo-params/options_squad_lm_2x4096_512_2048cnn_2xhighway_skip.json'
        model.lm_model.weight_file = './elmo-params/squad_context_concat_lm_2x4096_512_2048cnn_2xhighway_skip.hdf5'
        model.lm_model.embed_weights_file = None


    evaluation = trainer.test(model, evaluators, {args.corpus: dataset},
                              corpus.get_resource_loader(), checkpoint, not args.no_ema)[args.corpus]

    # Print the scalar results in a two column table
    scalars = evaluation.scalars
    cols = list(sorted(scalars.keys()))
    table = [cols]
    header = ["Metric", ""]
    table.append([("%s" % scalars[x] if x in scalars else "-") for x in cols])
    print_table([header] + transpose_lists(table))

    # Save the official output
    if args.official_output is not None:
        quid_to_para = {}
        for x in questions:
            quid_to_para[x.question_id] = x.paragraph

        q_id_to_answers = {}
        q_ids = evaluation.per_sample["question_id"]
        spans = evaluation.per_sample["predicted_span"]
        for q_id, (start, end) in zip(q_ids, spans):
            text = quid_to_para[q_id].get_original_text(start, end)
            q_id_to_answers[q_id] = text

        with open(args.official_output, "w") as f:
            json.dump(q_id_to_answers, f)

    if args.per_question_loss_file is not None:
        print("Saving result")
        output_file = args.per_question_loss_file
        ids = evaluation.per_sample["question_ids"]
        f1s = evaluation.per_sample["text_f1"]
        ems = evaluation.per_sample["text_em"]
        losses = evaluation.per_sample["loss"]

        if args.none_prob:
            none_probs = evaluation.per_sample["none_probs"]
            """
            results = {question_id: {'f1': float(f1), 'em': float(em), 'loss': float(loss), 'none_prob': float(none_prob)} for question_id, f1, em, loss, none_prob in zip(ids, f1s, ems, losses, none_probs)}
            """
            results = {question_id: float(none_prob) for question_id, none_prob in zip(ids, none_probs)}
        else:
            results = {question_id: {'f1': float(f1), 'em': float(em), 'loss': float(loss)} for question_id, f1, em, loss in zip(ids, f1s, ems, losses)}


        with open(output_file, 'w') as f:
            json.dump(results, f)
Exemplo n.º 15
0
def main():
    print('Starting...')
    model_dir = ModelDir(OPTS.model)
    model = model_dir.get_model()
    tokenizer = NltkAndPunctTokenizer()
    if not isinstance(model, ParagraphQuestionModel):
        raise ValueError(
            "This script is built to work for ParagraphQuestionModel models only"
        )
    if OPTS.reload_vocab:
        loader = ResourceLoader()
    else:
        loader = CachingResourceLoader()
    print('Loading word vectors...')
    model.set_input_spec(ParagraphAndQuestionSpec(batch_size=None),
                         set([',']),
                         word_vec_loader=loader,
                         allow_update=True)
    print('Starting Tensorflow session...')
    sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
    with sess.as_default():
        prediction = model.get_prediction()
        # Take 0-th here because we know we only truncate to one paragraph
        start_logits_tf = prediction.start_logits[0]
        end_logits_tf = prediction.end_logits[0]
        none_logit_tf = prediction.none_logit[0]
        #best_spans_tf, conf_tf = prediction.get_best_span(MAX_SPAN_LENGTH)
    model_dir.restore_checkpoint(sess)
    splitter = Truncate(400)  # NOTE: we truncate past 400 tokens
    selector = TopTfIdf(NltkPlusStopWords(True), n_to_select=5)
    app = bottle.Bottle()

    @app.route('/')
    def index():
        return bottle.template('index')

    @app.route('/post_query', method='post')
    def post_query():
        document_raw = bottle.request.forms.getunicode('document').strip()
        question_raw = bottle.request.forms.getunicode('question').strip()
        document = re.split("\s*\n\s*", document_raw)
        question = tokenizer.tokenize_paragraph_flat(question_raw)
        doc_toks = [tokenizer.tokenize_paragraph(p) for p in document]
        split_doc = splitter.split(doc_toks)
        context = selector.prune(question, split_doc)
        if model.preprocessor is not None:
            context = [
                model.preprocessor.encode_text(question, x) for x in context
            ]
        else:
            context = [flatten_iterable(x.text) for x in context]
        vocab = set(question)
        for txt in context:
            vocab.update(txt)
        data = [
            ParagraphAndQuestion(x, question, None, "user-question%d" % i)
            for i, x in enumerate(context)
        ]
        model.word_embed.update(loader, vocab)
        encoded = model.encode(data, is_train=False)
        start_logits, end_logits, none_logit = sess.run(
            [start_logits_tf, end_logits_tf, none_logit_tf], feed_dict=encoded)
        beam, p_na = logits_to_probs(document_raw,
                                     context[0],
                                     start_logits,
                                     end_logits,
                                     none_logit,
                                     beam_size=BEAM_SIZE)
        return bottle.template('results',
                               document=document_raw,
                               question=question_raw,
                               beam=beam,
                               p_na=p_na)

    cur_dir = os.path.abspath(os.path.dirname(__file__))
    bottle.TEMPLATE_PATH.insert(0, os.path.join(cur_dir, 'views'))
    bottle.run(app, host=OPTS.hostname, port=OPTS.port, debug=OPTS.debug)
    def getAnswer(self):
        #parser = argparse.ArgumentParser(description="Run an ELMo model on user input")
        #parser.add_argument("model", help="Model directory")
        #parser.add_argument("question", help="Question to answer")
        #parser.add_argument("documents", help="List of text documents to answer the question with", nargs='+')
        #args = parser.parse_args()

        #print("Preprocessing...")

        # Load the model
        model_dir = ModelDir(MODEL_DIR)
        model = model_dir.get_model()
        if not isinstance(model, ParagraphQuestionModel):
            raise ValueError(
                "This script is built to work for ParagraphQuestionModel models only"
            )

        conn = pyodbc.connect(DB_CONN)

        cursor = conn.cursor()
        #(23211,28690,33214,25638,25837,26454,28693,26137,31428,32087)
        query="select cast(filetext as varchar(max)) as filetext, name, type from dbo.UserworkspaceData where objectmasterid= "+\
               str(self.ObjectMasterId)+\
               " order by id asc"
        #query="select cast(filetext as varchar(max)) as filetext from kpl_tmp"
        documents = []
        document = ""
        name = ""
        filetype = 0
        for doc in cursor.execute(query):
            document = document + doc[0]
            name = doc[1]
            filetype = doc[2]
        #open("E:/kpl.txt","w+").write(document)
        documents.append(document)
        #documents.replace("\n\n","\n")
        #r.sub("",documents)
        #documents=" ".join(documents.split())
        #open("E:\kpl_test.txt","w+").write(document)
        #doc="D:\Document QnA\document-qa-master\Data\Drug_Delivery_Surveying_Global_Competitive_Landscape_BMI.txt"
        # =============================================================================
        #     if not isfile(doc):
        #         raise ValueError(doc + " does not exist")
        #     with open(doc, "r") as f:
        #         documents.append(f.read())
        # =============================================================================

        #print("Loaded %d documents" % len(documents))
        #temp=documents[0].split()
        # Split documents into lists of paragraphs
        #documents=[" ".join(temp[i:(i+400)]) for i in range(1,len(temp),400)]
        documents = [re.split("\s*\n\s*", doc) for doc in documents]
        # Tokenize the input, the models expects data to be tokenized using `NltkAndPunctTokenizer`
        # Note the model expects case-sensitive input
        tokenizer = NltkAndPunctTokenizer()
        question = tokenizer.tokenize_paragraph_flat(
            self.Question)  # List of words

        # Now list of document->paragraph->sentence->word
        documents = [[tokenizer.tokenize_paragraph(p) for p in doc]
                     for doc in documents]

        # Now group the document into paragraphs, this returns `ExtractedParagraph` objects
        # that additionally remember the start/end token of the paragraph within the source document
        splitter = MergeParagraphs(400)
        #splitter = PreserveParagraphs() # Uncomment to use the natural paragraph grouping
        documents = [splitter.split(doc) for doc in documents]
        #print(str(len(documents))+" kpl") #kpl
        # Now select the top paragraphs using a `ParagraphFilter`
        if len(documents) == 1:
            # Use TF-IDF to select top paragraphs from the document
            selector = TopTfIdf(NltkPlusStopWords(True), n_to_select=5)
            context = selector.prune(question, documents[0])
        else:
            # Use a linear classifier to select top paragraphs among all the documents
            selector = ShallowOpenWebRanker(n_to_select=10)
            context = selector.prune(question, flatten_iterable(documents))

    #print("Select %d paragraph" % len(context))

        if model.preprocessor is not None:
            # Models are allowed to define an additional pre-processing step
            # This will turn the `ExtractedParagraph` objects back into simple lists of tokens
            context = [
                model.preprocessor.encode_text(question, x) for x in context
            ]
        else:
            # Otherwise just use flattened text
            context = [flatten_iterable(x.text) for x in context]
        #x=open("E:\context.txt","a+")
        #[x.write(" ".join(cont)) for cont in context]
        #x.write("\n.......................................................\n")

        #print("Setting up model")
        # Tell the model the batch size (can be None) and vocab to expect, This will load the
        # needed word vectors and fix the batch size to use when building the graph / encoding the input
        voc = set(question)
        for txt in context:
            voc.update(txt)

        model.set_input_spec(self.nlp,
                             ParagraphAndQuestionSpec(batch_size=len(context)),
                             voc)
        # Now we build the actual tensorflow graph, `best_span` and `conf` are
        # tensors holding the predicted span (inclusive) and confidence scores for each
        # element in the input batch, confidence scores being the pre-softmax logit for the span
        #print("Build tf graph") #kpl
        sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
        # We need to use sess.as_default when working with the cuNND stuff, since we need an active
        # session to figure out the # of parameters needed for each layer. The cpu-compatible models don't need this.
        with sess.as_default():
            # 8 means to limit the span to size 8 or less
            best_spans, conf = model.get_prediction().get_best_span(8)

    # Loads the saved weights
        model_dir.restore_checkpoint(sess)

        # Now the model is ready to run
        # The model takes input in the form of `ContextAndQuestion` objects, for example:
        data = [
            ParagraphAndQuestion(x, question, None, "user-question%d" % i)
            for i, x in enumerate(context)
        ]

        #print("Starting run")
        # The model is run in two steps, first it "encodes" a batch of paragraph/context pairs
        # into numpy arrays, then we use `sess` to run the actual model get the predictions
        encoded = model.encode(
            data, is_train=True)  # batch of `ContextAndQuestion` -> feed_dict
        best_spans, conf = sess.run(
            [best_spans, conf], feed_dict=encoded)  # feed_dict -> predictions

        best_para = np.argmax(
            conf
        )  # We get output for each paragraph, select the most-confident one to print

        #print("Best Paragraph: " + str(best_para))
        #print("Best span: " + str(best_spans[best_para]))
        #print("Answer text: " + " ".join(context[best_para][best_spans[best_para][0]:best_spans[best_para][1]+1]))
        #print("Confidence: " + str(conf[best_para]))
        Answer = " ".join(context[best_para]
                          [best_spans[best_para][0]:best_spans[best_para][1] +
                           1])

        print("Confidence: " + str(conf[best_para]))
        print("Best Paragraph: " + str(best_para))
        print("Best span: " + str(best_spans[best_para]))
        print("Answer text: " + Answer)
        print(" ".join(context[best_para]))
        context[best_para][best_spans[best_para][
            0]] = r"<em>" + context[best_para][best_spans[best_para][0]]
        context[best_para][best_spans[best_para][1]] = context[best_para][
            best_spans[best_para][1]] + r"</em>"

        start = 0
        end = len(context[best_para])

        positions = [
            x for x, n in enumerate(context[best_para]
                                    [0:best_spans[best_para][0]]) if n == "."
        ]
        if len(positions) >= 2: start = positions[len(positions) - 2] + 1
        positions = [
            x
            for x, n in enumerate(context[best_para][best_spans[best_para][1] +
                                                     1:]) if n == "."
        ]
        if len(positions) > 1:
            end = best_spans[best_para][1] + 1 + positions[1]

        d = dict()
        if conf[best_para] > 10:
            d["answer"] = Answer
        else:
            d["answer"] = ""
        d["name"] = name
        d["filetype"] = filetype
        d["paragraph"] = re.sub(r' (?=\W)', '',
                                " ".join(context[best_para][start:end]))
        d["ObjectMasterId"] = self.ObjectMasterId

        return d


#if __name__ == "__main__":
#    main()
Exemplo n.º 17
0
def convert_saved_graph(model_dir, output_dir):
    print("Load model")
    md = ModelDir(model_dir)
    model = md.get_model()

    # remove the lm models word embeddings - cpu model will use Char-CNN
    model.lm_model.embed_weights_file = None
    dim = model.embed_mapper.layers[1].n_units

    print("Setting up cudnn version")
    sess = tf.Session()
    with sess.as_default():
        model.set_input_spec(
            ParagraphAndQuestionSpec(1, None, None, 14), {"the"},
            ResourceLoader(lambda a, b: {"the": np.zeros(300, np.float32)}))
        print("Buiding graph")
        pred = model.get_prediction()

    test_questions = get_test_questions()

    print("Load vars:")
    all_vars = tf.global_variables() + tf.get_collection(
        tf.GraphKeys.SAVEABLE_OBJECTS)
    lm_var_names = {x.name for x in all_vars if x.name.startswith("bilm")}
    vars = [x for x in all_vars if x.name not in lm_var_names]
    md.restore_checkpoint(sess, vars)
    sess.run(
        tf.variables_initializer(
            [x for x in all_vars if x.name in lm_var_names]))

    feed = model.encode([test_questions], False)
    cuddn_out = sess.run([pred.start_logits, pred.end_logits], feed_dict=feed)

    print("Done, copying files...")
    if not exists(output_dir):
        mkdir(output_dir)
    for file in listdir(model_dir):
        if isfile(file) and file != "model.npy":
            copyfile(join(model_dir, file), join(output_dir, file))

    print("Done, mapping tensors...")
    to_save, to_init = [], []
    for x in tf.trainable_variables():
        if x.name.endswith("/gru_parameters:0"):
            key = x.name[:-len("/gru_parameters:0")]
            indim, outdim = get_dims(x, dim)
            c = cudnn_rnn_ops.CudnnGRUSaveable(x, 1, outdim, indim, scope=key)
            for spec in c.specs:
                if spec.name.endswith("bias_cudnn 0") or \
                        spec.name.endswith("bias_cudnn 1"):
                    print('Unsupported spec: ' + spec.name)
                    continue
                if 'forward' in spec.name:
                    new_name = spec.name.replace(
                        'forward/rnn/multi_rnn_cell/cell_0/',
                        'bidirectional_rnn/fw/')
                else:
                    new_name = spec.name.replace(
                        'backward/rnn/multi_rnn_cell/cell_0/',
                        'bidirectional_rnn/bw/')
                v = tf.Variable(sess.run(spec.tensor), name=new_name)
                to_init.append(v)
                to_save.append(v)
        else:
            to_save.append(x)

    save_dir = join(output_dir, "save")
    if not exists(save_dir):
        mkdir(save_dir)

    # save:
    all_vars = tf.global_variables() + tf.get_collection(
        tf.GraphKeys.SAVEABLE_OBJECTS)
    vars_to_save = [x for x in all_vars if not x.name.startswith("bilm")]
    sess.run(tf.initialize_variables(to_init))
    saver = tf.train.Saver(vars_to_save)
    saver.save(
        sess,
        join(save_dir, 'checkpoint'),
        global_step=123456789,
        write_meta_graph=False,
    )

    sess.close()
    tf.reset_default_graph()
    return cuddn_out
Exemplo n.º 18
0
def convert(model_dir, output_dir, best_weights=False):
    print("Load model")
    md = ModelDir(model_dir)
    model = md.get_model()
    dim = model.embed_mapper.layers[1].n_units
    global_step = tf.get_variable('global_step',
                                  shape=[],
                                  dtype='int32',
                                  initializer=tf.constant_initializer(0),
                                  trainable=False)

    print("Setting up cudnn version")
    #global_step = tf.get_variable('global_step', shape=[], dtype='int32', trainable=False)
    sess = tf.Session()
    sess.run(global_step.assign(0))
    with sess.as_default():
        model.set_input_spec(
            ParagraphAndQuestionSpec(1, None, None, 14), {"the"},
            ResourceLoader(lambda a, b: {"the": np.zeros(300, np.float32)}))

        print("Buiding graph")
        pred = model.get_prediction()

    test_questions = ParagraphAndQuestion(
        ["Harry", "Potter", "was", "written", "by", "JK"],
        ["Who", "wrote", "Harry", "Potter", "?"], None, "test_questions")

    print("Load vars")
    md.restore_checkpoint(sess)
    print("Restore finished")

    feed = model.encode([test_questions], False)
    cuddn_out = sess.run([pred.start_logits, pred.end_logits], feed_dict=feed)

    print("Done, copying files...")
    if not exists(output_dir):
        mkdir(output_dir)
    for file in listdir(model_dir):
        if isfile(file) and file != "model.npy":
            copyfile(join(model_dir, file), join(output_dir, file))

    print("Done, mapping tensors...")
    to_save = []
    to_init = []
    for x in tf.trainable_variables():
        if x.name.endswith("/gru_parameters:0"):
            key = x.name[:-len("/gru_parameters:0")]
            fw_params = x
            if "map_embed" in x.name:
                c = cudnn_rnn_ops.CudnnGRU(1, dim, 400)
            elif "chained-out" in x.name:
                c = cudnn_rnn_ops.CudnnGRU(1, dim, dim * 4)
            else:
                c = cudnn_rnn_ops.CudnnGRU(1, dim, dim * 2)
            params_saveable = cudnn_rnn_ops.RNNParamsSaveable(
                c, c.params_to_canonical, c.canonical_to_params, [fw_params],
                key)

            for spec in params_saveable.specs:
                if spec.name.endswith("bias_cudnn 0") or \
                        spec.name.endswith("bias_cudnn 1"):
                    # ??? What do these even do?
                    continue
                name = spec.name.split("/")
                name.remove("cell_0")
                if "forward" in name:
                    ix = name.index("forward")
                    name.insert(ix + 2, "fw")
                else:
                    ix = name.index("backward")
                    name.insert(ix + 2, "bw")
                del name[ix]

                ix = name.index("multi_rnn_cell")
                name[ix] = "bidirectional_rnn"
                name = "/".join(name)
                v = tf.Variable(sess.run(spec.tensor), name=name)
                to_init.append(v)
                to_save.append(v)

        else:
            to_save.append(x)

    other = [
        x for x in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
        if x not in tf.trainable_variables()
    ]
    print(other)
    sess.run(tf.initialize_variables(to_init))
    saver = tf.train.Saver(to_save + other)
    save_dir = join(output_dir, "save")
    if not exists(save_dir):
        mkdir(save_dir)

    saver.save(sess, join(save_dir, "checkpoint"), sess.run(global_step))

    sess.close()
    tf.reset_default_graph()

    print("Updating model...")
    model.embed_mapper.layers = [
        model.embed_mapper.layers[0],
        BiRecurrentMapper(CompatGruCellSpec(dim))
    ]
    model.match_encoder.layers = list(model.match_encoder.layers)
    other = model.match_encoder.layers[1].other
    other.layers = list(other.layers)
    other.layers[1] = BiRecurrentMapper(CompatGruCellSpec(dim))

    pred = model.predictor.predictor
    pred.first_layer = BiRecurrentMapper(CompatGruCellSpec(dim))
    pred.second_layer = BiRecurrentMapper(CompatGruCellSpec(dim))

    with open(join(output_dir, "model.pkl"), "wb") as f:
        pickle.dump(model, f)

    print("Testing...")
    with open(join(output_dir, "model.pkl"), "rb") as f:
        model = pickle.load(f)

    sess = tf.Session()

    model.set_input_spec(
        ParagraphAndQuestionSpec(1, None, None, 14), {"the"},
        ResourceLoader(lambda a, b: {"the": np.zeros(300, np.float32)}))
    pred = model.get_prediction()

    print("Rebuilding")
    saver = tf.train.Saver()
    saver.restore(sess, tf.train.latest_checkpoint(save_dir))

    feed = model.encode([test_questions], False)
    cpu_out = sess.run([pred.start_logits, pred.end_logits], feed_dict=feed)

    print("These should be close:")
    print([np.allclose(a, b) for a, b in zip(cpu_out, cuddn_out)])
    print(cpu_out)
    print(cuddn_out)
Exemplo n.º 19
0
def predict():
    json_data = {"success": False, "predictions": []}
    print("Preprocessing...")

    # Load the model
    model_dir = ModelDir(
        "/home/antriv/conversation_ai/Transfer_Learning/ALLENAI_DocumentQA/document-qa/pretrained_models/models/triviaqa-unfiltered-shared-norm"
    )
    model = model_dir.get_model()
    if not isinstance(model, ParagraphQuestionModel):
        raise ValueError(
            "This script is built to work for ParagraphQuestionModel models only"
        )

    # Load the question
    question = (flask.request.data).decode("utf-8")

    # Read the documents
    documents = []
    doclist = ["/home/antriv/data/The-Future-Computed.txt"]
    for doc in doclist:
        if not isfile(doc):
            raise ValueError(doc + " does not exist")
        with open(doc, "r") as f:
            documents.append(f.read())
    print("Loaded %d documents" % len(documents))

    # Split documents into lists of paragraphs
    documents = [re.split("\s*\n\s*", doc) for doc in documents]

    # Tokenize the input, the models expects data to be tokenized using `NltkAndPunctTokenizer`
    # Note the model expects case-sensitive input
    tokenizer = NltkAndPunctTokenizer()
    question = tokenizer.tokenize_paragraph_flat(question)  # List of words
    # Now list of document->paragraph->sentence->word
    documents = [[tokenizer.tokenize_paragraph(p) for p in doc]
                 for doc in documents]

    # Now group the document into paragraphs, this returns `ExtractedParagraph` objects
    # that additionally remember the start/end token of the paragraph within the source document
    splitter = MergeParagraphs(400)
    #splitter = PreserveParagraphs() # Uncomment to use the natural paragraph grouping
    documents = [splitter.split(doc) for doc in documents]

    # Now select the top paragraphs using a `ParagraphFilter`
    if len(documents) == 1:
        # Use TF-IDF to select top paragraphs from the document
        selector = TopTfIdf(NltkPlusStopWords(True), n_to_select=1000)
        context = selector.prune(question, documents[0])
    else:
        # Use a linear classifier to select top paragraphs among all the documents
        selector = ShallowOpenWebRanker(n_to_select=1000)
        context = selector.prune(question, flatten_iterable(documents))

    print("Select %d paragraph" % len(context))

    if model.preprocessor is not None:
        # Models are allowed to define an additional pre-processing step
        # This will turn the `ExtractedParagraph` objects back into simple lists of tokens
        context = [
            model.preprocessor.encode_text(question, x) for x in context
        ]
    else:
        # Otherwise just use flattened text
        context = [flatten_iterable(x.text) for x in context]

    print("Setting up model")
    # Tell the model the batch size (can be None) and vocab to expect, This will load the
    # needed word vectors and fix the batch size to use when building the graph / encoding the input
    voc = set(question)
    for txt in context:
        voc.update(txt)
    model.set_input_spec(ParagraphAndQuestionSpec(batch_size=len(context)),
                         voc)

    # Now we build the actual tensorflow graph, `best_span` and `conf` are
    # tensors holding the predicted span (inclusive) and confidence scores for each
    # element in the input batch, confidence scores being the pre-softmax logit for the span
    print("Build tf graph")
    sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
    # We need to use sess.as_default when working with the cuNND stuff, since we need an active
    # session to figure out the # of parameters needed for each layer. The cpu-compatible models don't need this.
    with sess.as_default():
        # 8 means to limit the span to size 8 or less
        best_spans, conf = model.get_prediction().get_best_span(8)

    # Loads the saved weights
    model_dir.restore_checkpoint(sess)

    # Now the model is ready to run
    # The model takes input in the form of `ContextAndQuestion` objects, for example:
    data = [
        ParagraphAndQuestion(x, question, None, "user-question%d" % i)
        for i, x in enumerate(context)
    ]

    print("Starting run")
    # The model is run in two steps, first it "encodes" a batch of paragraph/context pairs
    # into numpy arrays, then we use `sess` to run the actual model get the predictions
    encoded = model.encode(
        data, is_train=False)  # batch of `ContextAndQuestion` -> feed_dict
    best_spans, conf = sess.run([best_spans, conf],
                                feed_dict=encoded)  # feed_dict -> predictions

    best_para = np.argmax(
        conf
    )  # We get output for each paragraph, select the most-confident one to print
    print("Best Paragraph: " + str(best_para))
    print("Best span: " + str(best_spans[best_para]))
    print("Answer text: " +
          " ".join(context[best_para]
                   [best_spans[best_para][0]:best_spans[best_para][1] + 1]))
    print("Confidence: " + str(conf[best_para]))
    y_output = " ".join(
        context[best_para][best_spans[best_para][0]:best_spans[best_para][1] +
                           1])
    print(y_output)
    json_data["predictions"].append(str(y_output))

    #indicate that the request was a success
    json_data["success"] = True
    #return the data dictionary as a JSON response
    return flask.jsonify(json_data)
def main():
    parser = argparse.ArgumentParser(
        description='Evaluate a model on TriviaQA data')
    parser.add_argument('model', help='model directory')
    parser.add_argument(
        '-p',
        '--paragraph_output',
        type=str,
        help="Save fine grained results for each paragraph in csv format")
    parser.add_argument('-o',
                        '--official_output',
                        type=str,
                        help="Build an offical output file with the model's"
                        " most confident span for each (question, doc) pair")
    parser.add_argument('--no_ema',
                        action="store_true",
                        help="Don't use EMA weights even if they exist")
    parser.add_argument(
        '--n_processes',
        type=int,
        default=None,
        help=
        "Number of processes to do the preprocessing (selecting paragraphs+loading context) with"
    )
    parser.add_argument('-i',
                        '--step',
                        type=int,
                        default=None,
                        help="checkpoint to load, default to latest")
    parser.add_argument('-n',
                        '--n_sample',
                        type=int,
                        default=None,
                        help="Number of questions to evaluate on")
    parser.add_argument('-a', '--async', type=int, default=10)
    parser.add_argument('-t',
                        '--tokens',
                        type=int,
                        default=400,
                        help="Max tokens per a paragraph")
    parser.add_argument('-g',
                        '--n_paragraphs',
                        type=int,
                        default=15,
                        help="Number of paragraphs to run the model on")
    parser.add_argument('-f',
                        '--filter',
                        type=str,
                        default=None,
                        choices=["tfidf", "truncate", "linear"],
                        help="How to select paragraphs")
    parser.add_argument(
        '-b',
        '--batch_size',
        type=int,
        default=200,
        help="Batch size, larger sizes might be faster but wll take more memory"
    )
    parser.add_argument('--max_answer_len',
                        type=int,
                        default=8,
                        help="Max answer span to select")
    parser.add_argument('-c',
                        '--corpus',
                        choices=[
                            "web-dev", "web-test", "web-verified-dev",
                            "web-train", "open-dev", "open-train", "wiki-dev",
                            "wiki-test"
                        ],
                        default="web-verified-dev")
    parser.add_argument("-s",
                        "--source_dir",
                        type=str,
                        default=None,
                        help="where to take input files")
    parser.add_argument("--n_span_per_q",
                        type=int,
                        default=1,
                        help="where to take input files")
    args = parser.parse_args()

    dataset_name = args.source_dir.split('/')[-1]
    model_name = args.model.split('/')[-1]
    ElasticLogger().write_log('INFO',
                              'Start Evaluation',
                              context_dict={
                                  'model': model_name,
                                  'dataset': dataset_name
                              })

    model_dir = ModelDir(args.model)
    model = model_dir.get_model()

    if args.corpus.startswith('web'):
        dataset = TriviaQaWebDataset()
        if args.corpus == "web-dev":
            test_questions = dataset.get_dev()
        elif args.corpus == "web-test":
            test_questions = dataset.get_test()
        elif args.corpus == "web-verified-dev":
            test_questions = dataset.get_verified()
        elif args.corpus == "web-train":
            test_questions = dataset.get_train()
        else:
            raise AssertionError()
    elif args.corpus.startswith("wiki"):
        dataset = TriviaQaWikiDataset()
        if args.corpus == "wiki-dev":
            test_questions = dataset.get_dev()
        elif args.corpus == "wiki-test":
            test_questions = dataset.get_test()
        else:
            raise AssertionError()
    else:
        dataset = TriviaQaOpenDataset(args.source_dir)
        if args.corpus == "open-dev":
            # just loading the pkl that was saved in build_span_corpus
            test_questions = dataset.get_dev()
        elif args.corpus == "open-train":
            test_questions = dataset.get_train()
        else:
            raise AssertionError()

    ### ALON debuging
    #test_questions = test_questions[0:5]

    corpus = dataset.evidence
    splitter = MergeParagraphs(args.tokens)

    per_document = args.corpus.startswith(
        "web")  # wiki and web are both multi-document
    #per_document = True

    filter_name = args.filter
    if filter_name is None:
        # Pick default depending on the kind of data we are using
        if per_document:
            filter_name = "tfidf"
        else:
            filter_name = "linear"

    print("Selecting %d paragraphs using method \"%s\" per %s" %
          (args.n_paragraphs, filter_name,
           ("question-document pair" if per_document else "question")))

    if filter_name == "tfidf":
        para_filter = TopTfIdf(NltkPlusStopWords(punctuation=True),
                               args.n_paragraphs)
    elif filter_name == "truncate":
        para_filter = FirstN(args.n_paragraphs)
    elif filter_name == "linear":
        para_filter = ShallowOpenWebRanker(args.n_paragraphs)
    else:
        raise ValueError()

    n_questions = args.n_sample
    docqa.config.SPANS_PER_QUESTION = args.n_span_per_q
    #n_questions = 1
    if n_questions is not None:
        test_questions.sort(key=lambda x: x.question_id)
        np.random.RandomState(0).shuffle(test_questions)
        test_questions = test_questions[:n_questions]

    print("Building question/paragraph pairs...")
    # Loads the relevant questions/documents, selects the right paragraphs, and runs the model's preprocessor
    if per_document:
        prep = ExtractMultiParagraphs(splitter,
                                      para_filter,
                                      model.preprocessor,
                                      require_an_answer=False)
    else:
        prep = ExtractMultiParagraphsPerQuestion(splitter,
                                                 para_filter,
                                                 model.preprocessor,
                                                 require_an_answer=False)
    prepped_data = preprocess_par(test_questions, corpus, prep,
                                  args.n_processes, 1000)

    data = []
    for q in prepped_data.data:
        for i, p in enumerate(q.paragraphs):
            if q.answer_text is None:
                ans = None
            else:
                ans = TokenSpans(q.answer_text, p.answer_spans)
            data.append(
                DocumentParagraphQuestion(q.question_id, p.doc_id,
                                          (p.start, p.end), q.question, p.text,
                                          ans, i))

    # Reverse so our first batch will be the largest (so OOMs happen early)
    questions = sorted(data,
                       key=lambda x: (x.n_context_words, len(x.question)),
                       reverse=True)

    print("Done, starting eval")

    if args.step is not None:
        if args.step == "latest":
            checkpoint = model_dir.get_latest_checkpoint()
        else:
            checkpoint = model_dir.get_checkpoint(int(args.step))
    else:
        checkpoint = model_dir.get_best_weights()
        if checkpoint is not None:
            print("Using best weights")
        else:
            print("Using latest checkpoint")
            checkpoint = model_dir.get_latest_checkpoint()

    test_questions = ParagraphAndQuestionDataset(
        questions, FixedOrderBatcher(args.batch_size, True))

    evaluation = trainer.test(
        model, [RecordParagraphSpanPrediction(args.max_answer_len, True)],
        {args.corpus: test_questions}, ResourceLoader(), checkpoint,
        not args.no_ema, args. async)[args.corpus]

    if not all(len(x) == len(data) for x in evaluation.per_sample.values()):
        raise RuntimeError()

    df = pd.DataFrame(evaluation.per_sample)

    if args.official_output is not None:
        print("Saving question result")

        fns = {}
        if per_document:
            # I didn't store the unormalized filenames exactly, so unfortunately we have to reload
            # the source data to get exact filename to output an official test script
            print("Loading proper filenames")
            if args.corpus == 'web-test':
                source = join(TRIVIA_QA, "qa", "web-test-without-answers.json")
            elif args.corpus == "web-dev":
                source = join(TRIVIA_QA, "qa", "web-dev.json")
            else:
                raise AssertionError()

            with open(join(source)) as f:
                data = json.load(f)["Data"]
            for point in data:
                for doc in point["EntityPages"]:
                    filename = doc["Filename"]
                    fn = join("wikipedia", filename[:filename.rfind(".")])
                    fn = normalize_wiki_filename(fn)
                    fns[(point["QuestionId"], fn)] = filename

        answers = {}
        scores = {}
        for q_id, doc_id, start, end, txt, score in df[[
                "question_id", "doc_id", "para_start", "para_end",
                "text_answer", "predicted_score"
        ]].itertuples(index=False):
            filename = dataset.evidence.file_id_map[doc_id]
            if per_document:
                if filename.startswith("web"):
                    true_name = filename[4:] + ".txt"
                else:
                    true_name = fns[(q_id, filename)]
                # Alon Patch for triviaqa test results
                true_name = true_name.replace('TriviaQA_Org/', '')
                key = q_id + "--" + true_name
            else:
                key = q_id

            prev_score = scores.get(key)
            if prev_score is None or prev_score < score:
                scores[key] = score
                answers[key] = txt

        with open(args.official_output, "w") as f:
            json.dump(answers, f)

    output_file = args.paragraph_output
    if output_file is not None:
        print("Saving paragraph result")
        df.to_csv(output_file, index=False)

    print("Computing scores")

    if per_document:
        group_by = ["question_id", "doc_id"]
    else:
        group_by = ["question_id"]

    # Print a table of scores as more paragraphs are used
    df.sort_values(group_by + ["rank"], inplace=True)
    df_scores = df.copy(deep=True)
    df_scores['predicted_score'] = df_scores['predicted_score'].apply(
        lambda x: pd.Series(x).max())

    em = compute_ranked_scores(df_scores, "predicted_score", "text_em",
                               group_by)
    f1 = compute_ranked_scores(df_scores, "predicted_score", "text_f1",
                               group_by)
    table = [["N Paragraphs", "EM", "F1"]]
    table += list([str(i + 1), "%.4f" % e, "%.4f" % f]
                  for i, (e, f) in enumerate(zip(em, f1)))

    table_df = pd.DataFrame(table[1:], columns=table[0]).drop(['N Paragraphs'],
                                                              axis=1)
    ElasticLogger().write_log('INFO', 'Results', context_dict={'model': model_name, 'dataset': dataset_name, \
                                                            'max_EM':table_df.max().ix['EM'], \
                                                            'max_F1':table_df.max().ix['F1'], \
                                                            'result_table': str(table_df)})

    df_flat = []
    for id, question in df.iterrows():
        for text_answer, predicted_span, predicted_score in zip(
                question['text_answer'], question['predicted_span'],
                question['predicted_score']):
            new_question = dict(question.copy())
            new_question.update({
                'text_answer': text_answer,
                'predicted_span': predicted_span,
                'predicted_score': predicted_score
            })
            df_flat.append(new_question)

    results_df = pd.DataFrame(df_flat)
    #Alon: outputing the estimates for all the
    #results_df = results_df.groupby(['question_id', 'text_answer']).apply(lambda df: df.ix[df['predicted_score'].argmax()]).reset_index(drop=True)
    results_df.sort_values(by=['question_id', 'predicted_score'],
                           ascending=False).set_index([
                               'question_id', 'text_answer'
                           ])[['question', 'predicted_score',
                               'text_em']].to_csv('results.csv')

    print_table(table)
def main():
    parser = argparse.ArgumentParser("Train rejection model on SQuAD")

    parser.add_argument("--corpus_dir", type=str, default="~/data/document-qa")
    parser.add_argument("--output_dir",
                        type=str,
                        default="~/model/document-qa/squad")
    parser.add_argument("--lm_dir", type=str, default="~/data/lm")
    parser.add_argument("--exp_id", type=str, default="rejection")

    parser.add_argument("--lr", type=float, default=0.5)
    parser.add_argument("--epoch", type=int, default=20)

    parser.add_argument("--dim", type=int, default=100)
    parser.add_argument("--batch_size", type=int, default=45)

    parser.add_argument("--l2", type=float, default=0)
    parser.add_argument("--mode",
                        choices=["input", "output", "both", "none"],
                        default="both")
    parser.add_argument("--top_layer_only", action="store_true")

    args = parser.parse_args()

    print("Arguments : ", args)

    out = args.output_dir + "_" + args.exp_id + "_lr" + str(
        args.lr) + "-" + datetime.now().strftime("%m%d-%H%M%S")
    dim = args.dim
    batch_size = args.batch_size
    out = expanduser(out)
    lm_dir = expanduser(args.lm_dir)
    corpus_dir = expanduser(args.corpus_dir)

    print("Make global recurrent_layer...")
    recurrent_layer = CudnnGru(
        dim, w_init=tf.keras.initializers.TruncatedNormal(stddev=0.05))
    params = trainer.TrainParams(trainer.SerializableOptimizer(
        "Adadelta", dict(learning_rate=args.lr)),
                                 ema=0.999,
                                 max_checkpoints_to_keep=2,
                                 async_encoding=10,
                                 num_epochs=args.epoch,
                                 log_period=30,
                                 eval_period=1200,
                                 save_period=1200,
                                 best_weights=("dev", "b17/text-f1"),
                                 eval_samples=dict(dev=None, train=8000))

    lm_reduce = MapperSeq(
        ElmoLayer(args.l2,
                  layer_norm=False,
                  top_layer_only=args.top_layer_only),
        DropoutLayer(0.5),
    )

    model = AttentionWithElmo(
        encoder=DocumentAndQuestionEncoder(SingleSpanAnswerEncoder()),
        lm_model=SquadContextConcatSkip(lm_dir=lm_dir),
        append_before_atten=(args.mode == "both" or args.mode == "output"),
        append_embed=(args.mode == "both" or args.mode == "input"),
        max_batch_size=128,
        word_embed=FixedWordEmbedder(vec_name="glove.840B.300d",
                                     word_vec_init_scale=0,
                                     learn_unk=False,
                                     cpu=True),
        char_embed=CharWordEmbedder(LearnedCharEmbedder(word_size_th=14,
                                                        char_th=49,
                                                        char_dim=20,
                                                        init_scale=0.05,
                                                        force_cpu=True),
                                    MaxPool(Conv1d(100, 5, 0.8)),
                                    shared_parameters=True),
        embed_mapper=SequenceMapperSeq(
            VariationalDropoutLayer(0.8),
            recurrent_layer,
            VariationalDropoutLayer(0.8),
        ),
        lm_reduce=None,
        lm_reduce_shared=lm_reduce,
        per_sentence=False,
        memory_builder=NullBiMapper(),
        attention=BiAttention(TriLinear(bias=True), True),
        match_encoder=SequenceMapperSeq(
            FullyConnected(dim * 2, activation="relu"),
            ResidualLayer(
                SequenceMapperSeq(
                    VariationalDropoutLayer(0.8),
                    recurrent_layer,
                    VariationalDropoutLayer(0.8),
                    StaticAttentionSelf(TriLinear(bias=True),
                                        ConcatWithProduct()),
                    FullyConnected(dim * 2, activation="relu"),
                )), VariationalDropoutLayer(0.8)),
        predictor=BoundsPredictor(
            ChainBiMapper(first_layer=recurrent_layer,
                          second_layer=recurrent_layer)))

    batcher = ClusteredBatcher(batch_size, ContextLenKey(), False, False)
    data = DocumentQaTrainingData(SquadCorpus(corpus_dir), None, batcher,
                                  batcher)

    with open(__file__, "r") as f:
        notes = f.read()
        notes = str(sorted(args.__dict__.items(),
                           key=lambda x: x[0])) + "\n" + notes

    trainer.start_training(
        data, model, params,
        [LossEvaluator(),
         SpanEvaluator(bound=[17], text_eval="squad")], ModelDir(out), notes)
Exemplo n.º 22
0
def perform_evaluation(model_name: str,
                       dataset_names: List[str],
                       tokens_per_paragraph: int,
                       filter_type: str,
                       n_processes: int,
                       n_paragraphs: int,
                       batch_size: int,
                       checkpoint: str,
                       no_ema: bool,
                       max_answer_len: int,
                       official_output_path: str,
                       paragraph_output_path: str,
                       aggregated_output_path: str,
                       elmo_char_cnn: bool,
                       n_samples: Union[int, None],
                       per_document: bool = False):
    """Perform an evaluation using cape's answer decoder

    A file will be created listing the answers per question ID for each dataset

    :param model_name: path to the model to evaluate
    :param dataset_names: list of strings of datasets to evaluate
    :param tokens_per_paragraph: how big to make paragraph chunks
    :param filter_type: how to select the paragraphs to read
    :param n_processes: how many processes to use when multiprocessing
    :param n_paragraphs: how many paragraphs to read per question
    :param batch_size: how many datapoints to evaluate at once
    :param checkpoint: string, checkpoint to load
    :param no_ema: if true, dont use EMA weights
    :param max_answer_len: the maximum allowable length of an answer in tokens
    :param official_output_path: path to write official output to
    :param paragraph_output_path: path to write paragraph output to
    :param aggregated_output_path: path to write aggregated output to
    :param elmo_char_cnn: if true, uses the elmo CNN to make token embeddings, less OOV but
        requires much more memory
    :param per_document: if false, return best scoring answer to a question, if true,
        the best scoring answer from each document is used instead.
    """
    async = True
    corpus_name = 'all'

    print('Setting Up:')
    model_dir = ModelDir(model_name)
    model = model_dir.get_model()
    dataset = get_multidataset(dataset_names)
    splitter = MergeParagraphs(tokens_per_paragraph)
    para_filter = get_para_filter(filter_type, per_document, n_paragraphs)
    test_questions, n_questions = get_questions(per_document, dataset,
                                                splitter, para_filter,
                                                model.preprocessor,
                                                n_processes, batch_size)

    print("Starting eval")
    checkpoint = get_checkpoint(checkpoint, model_dir)
    evaluation = test(model,
                      [RecordParagraphSpanPrediction(max_answer_len, True)],
                      {corpus_name: test_questions}, ResourceLoader(),
                      checkpoint, not no_ema, async, n_samples,
                      elmo_char_cnn)[corpus_name]

    print('Exporting and Post-processing')
    if not all(len(x) == n_questions for x in evaluation.per_sample.values()):
        raise RuntimeError()

    df = pd.DataFrame(evaluation.per_sample)
    compute_and_dump_official_output(df,
                                     official_output_path,
                                     per_document=per_document)

    print("Saving paragraph result")
    df.to_csv(paragraph_output_path, index=False)

    print("Computing scores")
    agg_df = get_aggregated_df(df, per_document)
    agg_df.to_csv(aggregated_output_path, index=False)
Exemplo n.º 23
0
def main():
    parser = argparse.ArgumentParser(description='Evaluate a model on TriviaQA data')
    parser.add_argument('model', help='model directory')
    parser.add_argument('-p', '--paragraph_output', type=str,
                        help="Save fine grained results for each paragraph in csv format")
    parser.add_argument('-o', '--official_output', type=str, help="Build an offical output file with the model's"
                                                                  " most confident span for each (question, doc) pair")
    parser.add_argument('--no_ema', action="store_true", help="Don't use EMA weights even if they exist")
    parser.add_argument('--n_processes', type=int, default=None,
                        help="Number of processes to do the preprocessing (selecting paragraphs+loading context) with")
    parser.add_argument('-i', '--step', type=int, default=None, help="checkpoint to load, default to latest")
    parser.add_argument('-n', '--n_sample', type=int, default=None, help="Number of questions to evaluate on")
    parser.add_argument('-a', '--async', type=int, default=10)
    parser.add_argument('-t', '--tokens', type=int, default=400,
                        help="Max tokens per a paragraph")
    parser.add_argument('-g', '--n_paragraphs', type=int, default=15,
                        help="Number of paragraphs to run the model on")
    parser.add_argument('-f', '--filter', type=str, default=None, choices=["tfidf", "truncate", "linear"],
                        help="How to select paragraphs")
    parser.add_argument('-b', '--batch_size', type=int, default=200,
                        help="Batch size, larger sizes might be faster but wll take more memory")
    parser.add_argument('--max_answer_len', type=int, default=8,
                        help="Max answer span to select")
    parser.add_argument('-c', '--corpus',
                        choices=["web-dev", "web-test", "web-verified-dev", "web-train",
                                 "open-dev", "open-train"],
                        default="web-verified-dev")
    args = parser.parse_args()

    model_dir = ModelDir(args.model)
    model = model_dir.get_model()

    if args.corpus.startswith('web'):
        dataset = TriviaQaWebDataset()
        corpus = dataset.evidence
        if args.corpus == "web-dev":
            test_questions = dataset.get_dev()
        elif args.corpus == "web-test":
            test_questions = dataset.get_test()
        elif args.corpus == "web-verified-dev":
            test_questions = dataset.get_verified()
        elif args.corpus == "web-train":
            test_questions = dataset.get_train()
        else:
            raise RuntimeError()
    else:
        dataset = TriviaQaOpenDataset()
        corpus = dataset.evidence
        if args.corpus == "open-dev":
            test_questions = dataset.get_dev()
        elif args.corpus == "open-train":
            test_questions = dataset.get_train()
        else:
            raise RuntimeError()

    splitter = MergeParagraphs(args.tokens)

    per_document = not args.corpus.startswith("open")

    filter_name = args.filter
    if filter_name is None:
        if args.corpus.startswith("open"):
            filter_name = "linear"
        else:
            filter_name = "tfidf"

    print("Selecting %d paragraphs using %s method per %s" % (args.n_paragraphs, filter_name,
                                                              ("question-document pair" if per_document else "question")))

    if filter_name == "tfidf":
        para_filter = TopTfIdf(NltkPlusStopWords(punctuation=True), args.n_paragraphs)
    elif filter_name == "truncate":
        para_filter = FirstN(args.n_paragraphs)
    elif filter_name == "linear":
        para_filter = ShallowOpenWebRanker(args.n_paragraphs)
    else:
        raise ValueError()

    n_questions = args.n_sample
    if n_questions is not None:
        test_questions.sort(key=lambda x:x.question_id)
        np.random.RandomState(0).shuffle(test_questions)
        test_questions = test_questions[:n_questions]

    print("Building question/paragraph pairs...")
    # Loads the relevant questions/documents, selects the right paragraphs, and runs the model's preprocessor
    if per_document:
        prep = ExtractMultiParagraphs(splitter, para_filter, model.preprocessor, require_an_answer=False)
    else:
        prep = ExtractMultiParagraphsPerQuestion(splitter, para_filter, model.preprocessor, require_an_answer=False)
    prepped_data = preprocess_par(test_questions, corpus, prep, args.n_processes, 1000)

    data = []
    for q in prepped_data.data:
        for i, p in enumerate(q.paragraphs):
            if q.answer_text is None:
                ans = None
            else:
                ans = TokenSpans(q.answer_text, p.answer_spans)
            data.append(DocumentParagraphQuestion(q.question_id, p.doc_id,
                                                 (p.start, p.end), q.question, p.text,
                                                  ans, i))

    # Reverse so our first batch will be the largest (so OOMs happen early)
    questions = sorted(data, key=lambda x: (x.n_context_words, len(x.question)), reverse=True)

    print("Done, starting eval")

    if args.step is not None:
        if args.step == "latest":
            checkpoint = model_dir.get_latest_checkpoint()
        else:
            checkpoint = model_dir.get_checkpoint(int(args.step))
    else:
        checkpoint = model_dir.get_best_weights()
        if checkpoint is not None:
            print("Using best weights")
        else:
            print("Using latest checkpoint")
            checkpoint = model_dir.get_latest_checkpoint()

    test_questions = ParagraphAndQuestionDataset(questions, FixedOrderBatcher(args.batch_size, True))

    evaluation = trainer.test(model,
                             [RecordParagraphSpanPrediction(args.max_answer_len, True)],
                              {args.corpus:test_questions}, ResourceLoader(), checkpoint, not args.no_ema, args.async)[args.corpus]

    if not all(len(x) == len(data) for x in evaluation.per_sample.values()):
        raise RuntimeError()

    df = pd.DataFrame(evaluation.per_sample)

    if args.official_output is not None:
        print("Saving question result")

        # I didn't store the unormalized filenames exactly, so unfortunately we have to reload
        # the source data to get exact filename to output an official test script
        fns = {}
        print("Loading proper filenames")
        if args.corpus == 'web-test':
            source = join(TRIVIA_QA, "qa", "web-test-without-answers.json")
        elif args.corpus == "web-dev":
            source = join(TRIVIA_QA, "qa", "web-dev.json")
        else:
            raise NotImplementedError()

        with open(join(source)) as f:
            data = json.load(f)["Data"]
        for point in data:
            for doc in point["EntityPages"]:
                filename = doc["Filename"]
                fn = join("wikipedia", filename[:filename.rfind(".")])
                fn = normalize_wiki_filename(fn)
                fns[(point["QuestionId"], fn)] = filename

        answers = {}
        scores = {}
        for q_id, doc_id, start, end, txt, score in df[["question_id", "doc_id", "para_start", "para_end",
                                                        "text_answer", "predicted_score"]].itertuples(index=False):
            filename = dataset.evidence.file_id_map[doc_id]
            if filename.startswith("web"):
                true_name = filename[4:] + ".txt"
            else:
                true_name = fns[(q_id, filename)]

            key = q_id + "--" + true_name
            prev_score = scores.get(key)
            if prev_score is None or prev_score < score:
                scores[key] = score
                answers[key] = txt

        with open(args.official_output, "w") as f:
            json.dump(answers, f)

    if per_document:
        group_by = ["question_id", "doc_id"]
    else:
        group_by = ["question_id"]

    # Print a table of scores as more paragraphs are used
    df.sort_values(group_by + ["rank"], inplace=True)
    f1 = compute_model_scores(df, "predicted_score", "text_f1", group_by)
    em = compute_model_scores(df, "predicted_score", "text_em", group_by)
    table = [["N Paragraphs", "EM", "F1"]]
    table += list([str(i+1), "%.4f" % e, "%.4f" % f] for i, (e, f) in enumerate(zip(em, f1)))
    print_table(table)

    output_file = args.paragraph_output
    if output_file is not None:
        print("Saving paragraph result")
        if output_file.endswith("json"):
            with open(output_file, "w") as f:
                json.dump(evaluation.per_sample, f)
        elif output_file.endswith("pkl"):
            with open(output_file, "wb") as f:
                pickle.dump(evaluation.per_sample, f)
        elif output_file.endswith("csv"):

            df.to_csv(output_file, index=False)
        else:
            raise ValueError("Unrecognized file format")
def main():
    parser = argparse.ArgumentParser(
        description='Evaluate a model on document-level SQuAD')
    parser.add_argument('model', help='model to use')
    parser.add_argument(
        'output',
        type=str,
        help="Store the per-paragraph results in csv format in this file")
    parser.add_argument('-n',
                        '--n_sample',
                        type=int,
                        default=None,
                        help="(for testing) sample documents")
    parser.add_argument(
        '-s',
        '--async',
        type=int,
        default=10,
        help="Encoding batch asynchronously, queueing up to this many")
    parser.add_argument('-a',
                        '--answer_bound',
                        type=int,
                        default=17,
                        help="Max answer span length")
    parser.add_argument('-p',
                        '--n_paragraphs',
                        type=int,
                        default=None,
                        help="Max number of paragraphs to use")
    parser.add_argument(
        '-b',
        '--batch_size',
        type=int,
        default=200,
        help="Batch size, larger sizes can be faster but uses more memory")
    parser.add_argument('-c',
                        '--corpus',
                        choices=["dev", "train", "doc-rd-dev"],
                        default="dev")
    parser.add_argument('--no_ema',
                        action="store_true",
                        help="Don't use EMA weights even if they exist")
    args = parser.parse_args()

    model_dir = ModelDir(args.model)
    print("Loading data")

    questions = []
    ranker = SquadTfIdfRanker(NltkPlusStopWords(True),
                              args.n_paragraphs,
                              force_answer=False)

    if args.corpus == "doc-rd-dev":
        docs = SquadCorpus().get_dev()
        if args.n_sample is not None:
            docs.sort(key=lambda x: x.doc_id)
            np.random.RandomState(0).shuffle(docs)
            docs = docs[:args.n_sample]

        print("Fetching document reader docs...")
        doc_rd_versions = get_doc_rd_doc(docs)
        print("Ranking and matching with questions...")
        for doc in tqdm(docs):
            doc_questions = flatten_iterable(x.questions
                                             for x in doc.paragraphs)
            paragraphs = doc_rd_versions[doc.title]
            ranks = ranker.rank([x.words for x in doc_questions],
                                [x.text for x in paragraphs])
            for i, question in enumerate(doc_questions):
                para_ranks = np.argsort(ranks[i])
                for para_rank, para_num in enumerate(
                        para_ranks[:args.n_paragraphs]):
                    # Just use dummy answers spans for these pairs
                    questions.append(
                        RankedParagraphQuestion(
                            question.words,
                            TokenSpans(question.answer.answer_text,
                                       np.zeros((0, 2), dtype=np.int32)),
                            question.question_id, paragraphs[para_num],
                            para_rank, para_num))
        rl = ResourceLoader()
    else:
        if args.corpus == "dev":
            docs = SquadCorpus().get_dev()
        else:
            docs = SquadCorpus().get_train()
        rl = SquadCorpus().get_resource_loader()

        if args.n_sample is not None:
            docs.sort(key=lambda x: x.doc_id)
            np.random.RandomState(0).shuffle(docs)
            docs = docs[:args.n_sample]

        for q in ranker.ranked_questions(docs):
            for i, p in enumerate(q.paragraphs):
                questions.append(
                    RankedParagraphQuestion(
                        q.question, TokenSpans(q.answer_text, p.answer_spans),
                        q.question_id,
                        ParagraphWithInverse([p.text], p.original_text,
                                             p.spans), i, p.paragraph_num))

    print("Split %d docs into %d paragraphs" % (len(docs), len(questions)))

    questions = sorted(questions,
                       key=lambda x: (x.n_context_words, len(x.question)),
                       reverse=True)
    for q in questions:
        if len(q.answer.answer_spans.shape) != 2:
            raise ValueError()

    checkpoint = model_dir.get_best_weights()
    if checkpoint is not None:
        print("Using best weights")
    else:
        print("Using latest checkpoint")
        checkpoint = model_dir.get_latest_checkpoint()
        if checkpoint is None:
            raise ValueError("No checkpoints found")

    data = ParagraphAndQuestionDataset(
        questions, FixedOrderBatcher(args.batch_size, True))

    model = model_dir.get_model()
    evaluation = trainer.test(
        model, [RecordParagraphSpanPrediction(args.answer_bound, True)],
        {args.corpus: data}, rl, checkpoint, not args.no_ema,
        args. async)[args.corpus]

    print("Saving result")
    output_file = args.output

    df = pd.DataFrame(evaluation.per_sample)

    df.sort_values(["question_id", "rank"], inplace=True, ascending=True)
    group_by = ["question_id"]
    f1 = compute_ranked_scores(df, "predicted_score", "text_f1", group_by)
    em = compute_ranked_scores(df, "predicted_score", "text_em", group_by)
    table = [["N Paragraphs", "EM", "F1"]]
    table += list([str(i + 1), "%.4f" % e, "%.4f" % f]
                  for i, (e, f) in enumerate(zip(em, f1)))
    print_table(table)

    df.to_csv(output_file, index=False)
Exemplo n.º 25
0
def main():
    parser = argparse.ArgumentParser(description='Run the demo server')
    parser.add_argument('model', help='Models to use')

    parser.add_argument(
        '-v',
        '--voc',
        help='vocab to use, only words from this file will be used')
    parser.add_argument('-t',
                        '--tokens',
                        type=int,
                        default=400,
                        help='Number of tokens to use per paragraph')
    parser.add_argument('--vec_dir', help='Location to find word vectors')
    parser.add_argument('--n_paragraphs',
                        type=int,
                        default=12,
                        help="Number of paragraphs to run the model on")
    parser.add_argument('--span_bound',
                        type=int,
                        default=8,
                        help="Max span size to return as an answer")

    parser.add_argument(
        '--tagme_api_key',
        help="Key to use for TAGME (tagme.d4science.org/tagme)")
    parser.add_argument('--bing_api_key', help="Key to use for bing searches")
    parser.add_argument('--tagme_thresh', default=0.2, type=float)
    parser.add_argument('--no_wiki',
                        action="store_true",
                        help="Dont use TAGME")
    parser.add_argument('--n_web',
                        type=int,
                        default=10,
                        help='Number of web docs to fetch')
    parser.add_argument('--blacklist_trivia_sites',
                        action="store_true",
                        help="Don't use trivia websites")
    parser.add_argument('-c',
                        '--wiki_cache',
                        help="Cache wiki articles in this directory")

    parser.add_argument('--n_dl_threads',
                        type=int,
                        default=5,
                        help="Number of threads to download documents with")
    parser.add_argument('--request_timeout', type=int, default=60)
    parser.add_argument('--download_timeout', type=int, default=25)
    parser.add_argument('--workers',
                        type=int,
                        default=1,
                        help="Number of server workers")
    parser.add_argument('--debug',
                        default=None,
                        choices=["random_model", "dummy_qa"])

    args = parser.parse_args()
    span_bound = args.span_bound

    if args.tagme_api_key is not None:
        tagme_api_key = args.tagme_api_key
    else:
        tagme_api_key = environ.get("TAGME_API_KEY")

    if args.bing_api_key is not None:
        bing_api_key = args.bing_api_key
    else:
        bing_api_key = environ.get("BING_API_KEY")
        if bing_api_key is None and args.n_web > 0:
            raise ValueError("If n_web > 0 you must give a BING_API_KEY")

    if args.debug is None:
        model = ModelDir(args.model)
    else:
        model = RandomPredictor(5, WithIndicators())

    if args.vec_dir is not None:
        loader = LoadFromPath(args.vec_dir)
    else:
        loader = ResourceLoader()

    if args.debug == "dummy_qa":
        qa = DummyQa()
    else:
        qa = QaSystem(
            args.wiki_cache,
            MergeParagraphs(args.tokens),
            ShallowOpenWebRanker(args.n_paragraphs),
            args.voc,
            model,
            loader,
            bing_api_key,
            tagme_api_key=tagme_api_key,
            n_dl_threads=args.n_dl_threads,
            blacklist_trivia_sites=args.blacklist_trivia_sites,
            download_timeout=args.download_timeout,
            span_bound=span_bound,
            tagme_threshold=None if args.no_wiki else args.tagme_thresh,
            n_web_docs=args.n_web)

    logging.propagate = False
    formatter = logging.Formatter("%(asctime)s: %(levelname)s: %(message)s")
    handler = logging.StreamHandler()
    handler.setFormatter(formatter)
    logging.root.addHandler(handler)
    logging.root.setLevel(logging.DEBUG)

    app = Sanic()
    app.config.REQUEST_TIMEOUT = args.request_timeout

    @app.route("/answer")
    async def answer(request):
        try:
            question = request.args["question"][0]
            if question == "":
                return response.json({'message': 'No question given'},
                                     status=400)
            spans, paras = await qa.answer_question(question)
            answers = select_answers(paras, spans, span_bound, 10)
            return json([x.to_json() for x in answers])
        except Exception as e:
            log.info("Error: " + str(e))

            raise ServerError("Server Error", status_code=500)

    @app.route('/answer-from', methods=['POST'])
    async def answer_from(request):
        try:
            args = ujson.loads(request.body.decode("utf-8"))
            question = args.get("question")
            if question is None or question == "":
                return response.json({'message': 'No question given'},
                                     status=400)
            doc = args["document"]
            if len(doc) > 500000:
                raise ServerError("Document too large", status_code=400)
            spans, paras = qa.answer_with_doc(question, doc)
            answers = select_answers(paras, spans, span_bound, 10)
            return json([x.to_json() for x in answers])
        except Exception as e:
            log.info("Error: " + str(e))
            raise ServerError("Server Error", status_code=500)

    app.static('/', './docqa//server/static/index.html')
    app.static('/about.html', './docqa//service/static/about.html')
    app.run(host="0.0.0.0", port=8000, workers=args.workers, debug=False)
def main():
    parser = argparse.ArgumentParser(description="Run an ELMo model on user input")
    # parser.add_argument("model", type=int, help="Model directory")
    parser.add_argument("question", help="Question to answer")
    parser.add_argument("documents", help="List of text documents to answer the question with", nargs='+')
    args = parser.parse_args()

    # Models path
    SQUAD_MODEL_DIRECTORY_PATH = 'docqa/models-cpu/squad'
    SQUAD_SHARED_NORM_MODEL_DIRECTORY_PATH = 'docqa/models-cpu/squad-shared-norm'
    TRIVIAQA_MODEL_DIRECTORY_PATH = 'docqa/models-cpu/triviaqa-unfiltered-shared-norm'
    TRIVIAQA_SHARED_NORM_MODEL_DIRECTORY_PATH = 'docqa/models-cpu/triviaqa-web-shared-norm'
    
    models_directory = [
        SQUAD_MODEL_DIRECTORY_PATH,
        SQUAD_SHARED_NORM_MODEL_DIRECTORY_PATH,
        TRIVIAQA_MODEL_DIRECTORY_PATH,
        TRIVIAQA_SHARED_NORM_MODEL_DIRECTORY_PATH
    ]

    print("Preprocessing...")

    # Load the model
    # model_dir = ModelDir(args.model)
    model_dir = ModelDir(models_directory[0])
    model = model_dir.get_model()
    if not isinstance(model, ParagraphQuestionModel):
        raise ValueError("This script is built to work for ParagraphQuestionModel models only")

    # Read the documents
    documents = []
    for doc in args.documents:
        if not isfile(doc):
            raise ValueError(doc + " does not exist")
        with open(doc, "r") as f:
            documents.append(f.read())
    print("Loaded %d documents" % len(documents))

    # Split documents into lists of paragraphs
    documents = [re.split("\s*\n\s*", doc) for doc in documents]

    # Tokenize the input, the models expects data to be tokenized using `NltkAndPunctTokenizer`
    # Note the model expects case-sensitive input
    tokenizer = NltkAndPunctTokenizer()
    question = tokenizer.tokenize_paragraph_flat(args.question)  # List of words
    # Now list of document->paragraph->sentence->word
    documents = [[tokenizer.tokenize_paragraph(p) for p in doc] for doc in documents]

    # Now group the document into paragraphs, this returns `ExtractedParagraph` objects
    # that additionally remember the start/end token of the paragraph within the source document
    splitter = MergeParagraphs(400)
    # splitter = PreserveParagraphs() # Uncomment to use the natural paragraph grouping
    documents = [splitter.split(doc) for doc in documents]

    # Now select the top paragraphs using a `ParagraphFilter`
    if len(documents) == 1:
        # Use TF-IDF to select top paragraphs from the document
        selector = TopTfIdf(NltkPlusStopWords(True), n_to_select=5)
        context = selector.prune(question, documents[0])
    else:
        # Use a linear classifier to select top paragraphs among all the documents
        selector = ShallowOpenWebRanker(n_to_select=10)
        context = selector.prune(question, flatten_iterable(documents))

    print("Select %d paragraph" % len(context))

    if model.preprocessor is not None:
        # Models are allowed to define an additional pre-processing step
        # This will turn the `ExtractedParagraph` objects back into simple lists of tokens
        context = [model.preprocessor.encode_text(question, x) for x in context]
    else:
        # Otherwise just use flattened text
        context = [flatten_iterable(x.text) for x in context]
        
    print("Setting up model")
    
    # Tell the model the batch size (can be None) and vocab to expect, This will load the
    # needed word vectors and fix the batch size to use when building the graph / encoding the input
    voc = set(question)
    for txt in context:
        voc.update(txt)
    model.set_input_spec(ParagraphAndQuestionSpec(batch_size=len(context)), voc)

    # Now we build the actual tensorflow graph, `best_span` and `conf` are
    # tensors holding the predicted span (inclusive) and confidence scores for each
    # element in the input batch, confidence scores being the pre-softmax logit for the span
    print("Build tf graph")
    sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
    # We need to use sess.as_default when working with the cuNND stuff, since we need an active
    # session to figure out the # of parameters needed for each layer. The cpu-compatible models don't need this.
    with sess.as_default():
        # 8 means to limit the span to size 8 or less
        best_spans, conf = model.get_prediction().get_best_span(10)

    # Loads the saved weights
    model_dir.restore_checkpoint(sess)

    # Now the model is ready to run
    # The model takes input in the form of `ContextAndQuestion` objects, for example:
    data = [ParagraphAndQuestion(x, question, None, "user-question%d"%i)
            for i, x in enumerate(context)]

    print("Starting run")
    # The model is run in two steps, first it "encodes" a batch of paragraph/context pairs
    # into numpy arrays, then we use `sess` to run the actual model get the predictions
    encoded = model.encode(data, is_train=False)  # batch of `ContextAndQuestion` -> feed_dict
    best_spans, conf = sess.run([best_spans, conf], feed_dict=encoded)  # feed_dict -> predictions

    best_para = np.argmax(conf)  # We get output for each paragraph, select the most-confident one to print
    print("Best Paragraph: " + str(best_para))
    para_id = int(str(best_para))
    # print("Best Paragraph: \n" + (" ".join((paras[para_id].text)[0])))
    print("Best Paragraph: \n" + " ".join(context[para_id]))
    print("Best span: " + str(best_spans[best_para]))
    print("Answer text: " + " ".join(context[best_para][best_spans[best_para][0]:best_spans[best_para][1]+1]))
    print("Confidence: " + str(conf[best_para]))
def main():
    parser = argparse.ArgumentParser(
        description="Run an ELMo model on user input")
    parser.add_argument("model", help="Model directory")
    parser.add_argument("question", help="Question to answer")
    parser.add_argument("context", help="Context to answer the question with")
    args = parser.parse_args()

    # Tokenize the input, the models expected data to be tokenized using `NltkAndPunctTokenizer`
    # Note the model expects case-sensitive input
    tokenizer = NltkAndPunctTokenizer()
    question = tokenizer.tokenize_paragraph_flat(args.question)
    context = tokenizer.tokenize_paragraph_flat(args.context)

    print("Loading model")
    model_dir = ModelDir(args.model)
    model = model_dir.get_model()
    if not isinstance(model, ElmoQaModel):
        raise ValueError(
            "This script is build to work for ElmoQaModel models only")

    # Important! This tells the language model not to use the pre-computed word vectors,
    # which are only applicable for the SQuAD dev/train sets.
    # Instead the language model will use its character-level CNN to compute
    # the word vectors dynamically.
    model.lm_model.embed_weights_file = None

    # Tell the model the batch size and vocab to expect, This will load the needed
    # word vectors and fix the batch size when building the graph / encoding the input
    print("Setting up model")
    voc = set(question)
    voc.update(context)
    model.set_input_spec(ParagraphAndQuestionSpec(batch_size=1), voc)

    # Now we build the actual tensorflow graph, `best_span` and `conf` are
    # tensors holding the predicted span (inclusive) and confidence scores for each
    # element in the input batch
    print("Build tf graph")
    sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
    with sess.as_default():
        # 17 means to limit the span to size 17 or less
        best_spans, conf = model.get_prediction().get_best_span(17)

    # Now restore the weights, this is a bit fiddly since we need to avoid restoring the
    # bilm weights, and instead load them from the pre-computed data
    all_vars = tf.global_variables() + tf.get_collection(
        tf.GraphKeys.SAVEABLE_OBJECTS)
    lm_var_names = {x.name for x in all_vars if x.name.startswith("bilm")}
    vars = [x for x in all_vars if x.name not in lm_var_names]
    model_dir.restore_checkpoint(sess, vars)

    # Run the initializer of the lm weights, which will load them from the lm directory
    sess.run(
        tf.variables_initializer(
            [x for x in all_vars if x.name in lm_var_names]))

    # Now the model is ready to run
    # The model takes input in the form of `ContextAndQuestion` objects, for example:
    data = [ParagraphAndQuestion(context, question, None, "user-question1")]

    print("Starting run")
    # The model is run in two steps, first it "encodes" the paragraph/context pairs
    # into numpy arrays, then to use `sess` to run the actual model get the predictions
    encoded = model.encode(
        data, is_train=False)  # batch of `ContextAndQuestion` -> feed_dict
    best_spans, conf = sess.run([best_spans, conf],
                                feed_dict=encoded)  # feed_dict -> predictions
    print("Best span: " + str(best_spans[0]))
    print("Answer text: " +
          " ".join(context[best_spans[0][0]:best_spans[0][1] + 1]))
    print("Confidence: " + str(conf[0]))
def main():
    parser = argparse.ArgumentParser(
        description="Run an ELMo model on user input")
    parser.add_argument("model", help="Model directory")
    parser.add_argument("ja_filepath", help="File path to japanese questions")
    parser.add_argument("result_file",
                        help="File path to predicted result json")
    args = parser.parse_args()
    print(args)

    print("Preprocessing...")

    paragraphs, questions = read_squad_style_database(args.ja_filepath)
    # Load the model
    model_dir = ModelDir(args.model)
    model = model_dir.get_model()
    if not isinstance(model, ParagraphQuestionModel):
        raise ValueError(
            "This script is built to work for ParagraphQuestionModel models only"
        )

    paragraphs, questions = read_squad_style_database(args.ja_filepath)
    predictions = {}
    predictions["conf"] = {}
    for qa in questions:
        print(qa["id"])

        title = qa["title"]
        para_idx = qa["para_idx"]

        context = paragraphs[title][para_idx]
        question = qa["question"]

        print(context)
        print(question)

        if model.preprocessor is not None:
            context = [
                model.preprocessor.encode_text(question, x) for x in context
            ]

        print("Setting up model")

        voc = set(question)
        for txt in context:
            voc.update(txt)
        model.set_input_spec(ParagraphAndQuestionSpec(batch_size=len(context)),
                             voc)

        print("Build tf graph")
        sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
        with sess.as_default():
            best_spans, conf = model.get_prediction().get_best_span(8)

        # Loads the saved weights
        model_dir.restore_checkpoint(sess)

        data = [
            ParagraphAndQuestion(x, question, None, "user-question%d" % i)
            for i, x in enumerate(context)
        ]

        print("Starting run")

        encoded = model.encode(
            data, is_train=False)  # batch of `ContextAndQuestion` -> feed_dict
        best_spans, conf = sess.run(
            [best_spans, conf], feed_dict=encoded)  # feed_dict -> predictions
        print(best_spans)
        predictions[qa["id"]] = best_spans
        predictions["conf"][qa["id"]] = conf
        print(predictions)

    result_f = open(args.result_file, "w")
    json.dump(predictions, result_f)
    exit()
    official_evaluator = OfficialEvaluator(args.ja_filepath, args.result_file)
    evaluation = official_evaluator.evaluate()
    print(evaluation)
Exemplo n.º 29
0
def main():
    parser = argparse.ArgumentParser("Train our ELMo model on SQuAD")
    parser.add_argument("loss_mode", choices=['default', 'confidence'])
    parser.add_argument("output_dir")
    parser.add_argument("--dim", type=int, default=90)
    parser.add_argument("--l2", type=float, default=0)
    parser.add_argument("--mode",
                        choices=["input", "output", "both", "none"],
                        default="both")
    parser.add_argument("--top_layer_only", action="store_true")
    parser.add_argument("--no-tfidf",
                        action='store_true',
                        help="Don't add TF-IDF negative examples")
    args = parser.parse_args()

    out = args.output_dir + "-" + datetime.now().strftime("%m%d-%H%M%S")

    dim = args.dim
    recurrent_layer = CudnnGru(dim, w_init=TruncatedNormal(stddev=0.05))

    if args.loss_mode == 'default':
        n_epochs = 24
        answer_encoder = SingleSpanAnswerEncoder()
        predictor = BoundsPredictor(
            ChainBiMapper(first_layer=recurrent_layer,
                          second_layer=recurrent_layer))
        batcher = ClusteredBatcher(45, ContextLenKey(), False, False)
        data = DocumentQaTrainingData(SquadCorpus(), None, batcher, batcher)
    elif args.loss_mode == 'confidence':
        if args.no_tfidf:
            prepro = SquadDefault()
            n_epochs = 15
        else:
            prepro = SquadTfIdfRanker(NltkPlusStopWords(True), 4, True)
            n_epochs = 50
        answer_encoder = DenseMultiSpanAnswerEncoder()
        predictor = ConfidencePredictor(ChainBiMapper(
            first_layer=recurrent_layer,
            second_layer=recurrent_layer,
        ),
                                        AttentionEncoder(),
                                        FullyConnected(80, activation="tanh"),
                                        aggregate="sum")
        eval_dataset = RandomParagraphSetDatasetBuilder(
            100, 'flatten', True, 0)
        train_batching = ClusteredBatcher(45, ContextLenBucketedKey(3), True,
                                          False)
        data = PreprocessedData(SquadCorpus(),
                                prepro,
                                StratifyParagraphsBuilder(train_batching, 1),
                                eval_dataset,
                                eval_on_verified=False)
        data.preprocess(1)

    params = trainer.TrainParams(trainer.SerializableOptimizer(
        "Adadelta", dict(learning_rate=1.0)),
                                 ema=0.999,
                                 max_checkpoints_to_keep=2,
                                 async_encoding=10,
                                 num_epochs=n_epochs,
                                 log_period=30,
                                 eval_period=1200,
                                 save_period=1200,
                                 best_weights=("dev", "b17/text-f1"),
                                 eval_samples=dict(dev=None, train=8000))

    lm_reduce = MapperSeq(
        ElmoLayer(args.l2,
                  layer_norm=False,
                  top_layer_only=args.top_layer_only),
        DropoutLayer(0.5),
    )
    model = AttentionWithElmo(
        encoder=DocumentAndQuestionEncoder(answer_encoder),
        lm_model=SquadContextConcatSkip(),
        append_before_atten=(args.mode == "both" or args.mode == "output"),
        append_embed=(args.mode == "both" or args.mode == "input"),
        max_batch_size=128,
        word_embed=FixedWordEmbedder(vec_name="glove.840B.300d",
                                     word_vec_init_scale=0,
                                     learn_unk=False,
                                     cpu=True),
        char_embed=CharWordEmbedder(LearnedCharEmbedder(word_size_th=14,
                                                        char_th=49,
                                                        char_dim=20,
                                                        init_scale=0.05,
                                                        force_cpu=True),
                                    MaxPool(Conv1d(100, 5, 0.8)),
                                    shared_parameters=True),
        embed_mapper=SequenceMapperSeq(
            VariationalDropoutLayer(0.8),
            recurrent_layer,
            VariationalDropoutLayer(0.8),
        ),
        lm_reduce=None,
        lm_reduce_shared=lm_reduce,
        per_sentence=False,
        memory_builder=NullBiMapper(),
        attention=BiAttention(TriLinear(bias=True), True),
        match_encoder=SequenceMapperSeq(
            FullyConnected(dim * 2, activation="relu"),
            ResidualLayer(
                SequenceMapperSeq(
                    VariationalDropoutLayer(0.8),
                    recurrent_layer,
                    VariationalDropoutLayer(0.8),
                    StaticAttentionSelf(TriLinear(bias=True),
                                        ConcatWithProduct()),
                    FullyConnected(dim * 2, activation="relu"),
                )), VariationalDropoutLayer(0.8)),
        predictor=predictor)

    with open(__file__, "r") as f:
        notes = f.read()
        notes = str(sorted(args.__dict__.items(),
                           key=lambda x: x[0])) + "\n" + notes

    trainer.start_training(
        data, model, params,
        [LossEvaluator(),
         SpanEvaluator(bound=[17], text_eval="squad")], ModelDir(out), notes)
Exemplo n.º 30
0
def main():
    parser = argparse.ArgumentParser(description='Run the demo server')
    parser.add_argument(
        'model',
        default=
        "/home/antriv/conversation_ai/ALLENAI_DocumentQA/document-qa/models/triviaqa-unfiltered-shared-norm/best-weights",
        help='Models to use')

    parser.add_argument(
        '-v',
        '--voc',
        default=
        "/home/antriv/conversation_ai/ALLENAI_DocumentQA/document-qa/data/triviaqa/evidence/vocab.txt",
        help='vocab to use, only words from this file will be used')
    parser.add_argument('-t',
                        '--tokens',
                        type=int,
                        default=400,
                        help='Number of tokens to use per paragraph')
    parser.add_argument('--vec_dir',
                        default="/home/antriv/data/glove",
                        help='Location to find word vectors')
    parser.add_argument('--n_paragraphs',
                        type=int,
                        default=15,
                        help="Number of paragraphs to run the model on")
    parser.add_argument('--paragraphs_to_return',
                        type=int,
                        default=10,
                        help="Number of paragraphs return to the frontend")
    parser.add_argument('--span_bound',
                        type=int,
                        default=8,
                        help="Max span size to return as an answer")

    parser.add_argument(
        '--tagme_api_key',
        default="1cdc0067-b2de-4774-afbe-38703b11a365-843339462",
        help="Key to use for TAGME (tagme.d4science.org/tagme)")
    parser.add_argument('--bing_api_key',
                        default="413239df9faa4f1494a914e0c9cec78e",
                        help="Key to use for bing searches")
    parser.add_argument(
        '--bing_version',
        choices=["v5.0", "v7.0"],
        default="v7.0",
        help='Version of Bing API to use (must be compatible with the API key)'
    )
    parser.add_argument(
        '--tagme_thresh',
        default=0.2,
        type=float,
        help="TAGME threshold for when to use the identified docs")
    parser.add_argument('--n_web',
                        type=int,
                        default=10,
                        help='Number of web docs to fetch')
    parser.add_argument('--blacklist_trivia_sites',
                        action="store_true",
                        help="Don't use trivia websites")
    parser.add_argument(
        '-c',
        '--wiki_cache',
        default=
        "/home/antriv/conversation_ai/ALLENAI_DocumentQA/document-qa/data/triviaqa/evidence/wikipedia",
        help="Cache wiki articles in this directory")

    parser.add_argument('--n_dl_threads',
                        type=int,
                        default=5,
                        help="Number of threads to download documents with")
    parser.add_argument('--request_timeout', type=int, default=60)
    parser.add_argument('--download_timeout',
                        type=int,
                        default=25,
                        help="how long to wait before timing out downloads")
    parser.add_argument('--workers',
                        type=int,
                        default=1,
                        help="Number of server workers")
    parser.add_argument('--debug',
                        default=None,
                        choices=["random_model", "dummy_qa"])

    args = parser.parse_args()
    span_bound = args.span_bound
    n_to_return = args.paragraphs_to_return

    if args.tagme_api_key is not None:
        tagme_api_key = args.tagme_api_key
    else:
        tagme_api_key = environ.get("TAGME_API_KEY")

    if args.bing_api_key is not None:
        bing_api_key = args.bing_api_key
    else:
        bing_api_key = environ.get("BING_API_KEY")
        if bing_api_key is None and args.n_web > 0:
            raise ValueError("If n_web > 0 you must give a BING_API_KEY")

    if args.debug is None:
        model = ModelDir(args.model)
    else:
        model = RandomPredictor(5, WithIndicators())

    if args.vec_dir is not None:
        loader = LoadFromPath(args.vec_dir)
    else:
        loader = ResourceLoader()

    # Update Sanic's logging to register our class's loggers
    log_config = LOGGING
    formatter = "%(asctime)s: %(levelname)s: %(message)s"
    log_config["formatters"]['my_formatter'] = {
        'format': formatter,
        'datefmt': '%Y-%m-%d %H:%M:%S',
    }
    log_config['handlers']['stream_handler'] = {
        'class': "logging.StreamHandler",
        'formatter': 'my_formatter',
        'stream': sys.stderr
    }
    log_config['handlers']['file_handler'] = {
        'class': "logging.FileHandler",
        'formatter': 'my_formatter',
        'filename': 'logging.log'
    }

    # It looks like we have to go and name every logger our own code might
    # use in order to register it with Sanic
    log_config["loggers"]['qa_system'] = {
        'level': 'INFO',
        'handlers': ['stream_handler', 'file_handler'],
    }
    log_config["loggers"]['downloader'] = {
        'level': 'INFO',
        'handlers': ['stream_handler', 'file_handler'],
    }
    log_config["loggers"]['server'] = {
        'level': 'INFO',
        'handlers': ['stream_handler', 'file_handler'],
    }

    app = Sanic()
    app.config.REQUEST_TIMEOUT = args.request_timeout
    log = logging.getLogger('server')

    @app.listener('before_server_start')
    async def setup_qa(app, loop):
        # To play nice with iohttp's async ClientSession objects, we need to construct the QaSystem
        # inside the event loop.
        if args.debug == "dummy_qa":
            qa = DummyQa()
        else:
            qa = QaSystem(
                args.wiki_cache,
                MergeParagraphs(args.tokens),
                ShallowOpenWebRanker(args.n_paragraphs),
                args.voc,
                model,
                loader,
                bing_api_key,
                bing_version=args.bing_version,
                tagme_api_key=tagme_api_key,
                n_dl_threads=args.n_dl_threads,
                blacklist_trivia_sites=args.blacklist_trivia_sites,
                download_timeout=args.download_timeout,
                span_bound=span_bound,
                tagme_threshold=None if
                (tagme_api_key is None) else args.tagme_thresh,
                n_web_docs=args.n_web,
            )
        app.qa = qa

    @app.listener('after_server_stop')
    async def setup_qa(app, loop):
        app.qa.close()

    @app.route("/answer")
    async def answer(request):
        try:
            question = request.args["question"][0]
            if question == "":
                return response.json({'message': 'No question given'},
                                     status=400)
            spans, paras = await app.qa.answer_question(question)
            answers = select_answers(paras, spans, span_bound, 10)
            answers = answers[:n_to_return]
            best_span = max(answers[0].answers, key=lambda x: x.conf)
            log.info("Answered \"%s\" (with web search): \"%s\"", question,
                     answers[0].original_text[best_span.start:best_span.end])
            return json([x.to_json() for x in answers])
        except Exception as e:
            log.info("Error: " + str(e))
            raise ServerError(e, status_code=500)

    @app.route('/answer-from', methods=['POST'])
    async def answer_from(request):
        try:
            args = ujson.loads(request.body.decode("utf-8"))
            question = args.get("question")
            if question is None or question == "":
                return response.json({'message': 'No question given'},
                                     status=400)
            doc = args["document"]
            if len(doc) > 500000:
                raise ServerError("Document too large", status_code=400)
            spans, paras = app.qa.answer_with_doc(question, doc)
            answers = select_answers(paras, spans, span_bound, 10)
            answers = answers[:n_to_return]
            best_span = max(answers[0].answers, key=lambda x: x.conf)
            log.info("Answered \"%s\" (with user doc): \"%s\"", question,
                     answers[0].original_text[best_span.start:best_span.end])
            return json([x.to_json() for x in answers])
        except Exception as e:
            log.info("Error: " + str(e))
            raise ServerError(e, status_code=500)

    app.static('/', './docqa//server/static/index.html')
    app.static('/about.html', './docqa/server/static/about.html')
    app.run(host="0.0.0.0",
            port=5000,
            workers=args.workers,
            debug=False,
            log_config=LOGGING)