Exemplo n.º 1
0
def main(_):
    pp.pprint(flags.FLAGS.__flags)

    FLAGS_check(FLAGS)

    config = Config(FLAGS.config, verbose=False)
    vocabloader = VocabLoader(config)

    FLAGS.dropout_keep_prob = 1.0
    FLAGS.wordDropoutKeep = 1.0
    FLAGS.cohDropoutKeep = 1.0

    (intput_ta_files, output_ta_files) = getAllTAFilePaths(FLAGS)

    print("TOTAL NUMBER OF TAS : {}".format(len(intput_ta_files)))

    reader = TextAnnoTestReader(
        config=config,
        vocabloader=vocabloader,
        # test_mens_file=config.test_file,
        num_cands=30,
        batch_size=FLAGS.batch_size,
        strict_context=FLAGS.strict_context,
        pretrain_wordembed=FLAGS.pretrain_wordembed,
        coherence=FLAGS.coherence)
    model_mode = 'test'

    config_proto = tf.ConfigProto()
    config_proto.allow_soft_placement = True
    config_proto.gpu_options.allow_growth = True
    sess = tf.Session(config=config_proto)

    with sess.as_default():
        model = ELModel(
            sess=sess,
            reader=reader,
            dataset=FLAGS.dataset,
            max_steps=FLAGS.max_steps,
            pretrain_max_steps=FLAGS.pretraining_steps,
            word_embed_dim=FLAGS.word_embed_dim,
            context_encoded_dim=FLAGS.context_encoded_dim,
            context_encoder_num_layers=FLAGS.context_encoder_num_layers,
            context_encoder_lstmsize=FLAGS.context_encoder_lstmsize,
            coherence_numlayers=FLAGS.coherence_numlayers,
            jointff_numlayers=FLAGS.jointff_numlayers,
            learning_rate=FLAGS.learning_rate,
            dropout_keep_prob=FLAGS.dropout_keep_prob,
            reg_constant=FLAGS.reg_constant,
            checkpoint_dir=FLAGS.checkpoint_dir,
            optimizer=FLAGS.optimizer,
            mode=model_mode,
            strict=FLAGS.strict_context,
            pretrain_word_embed=FLAGS.pretrain_wordembed,
            typing=FLAGS.typing,
            el=FLAGS.el,
            coherence=FLAGS.coherence,
            textcontext=FLAGS.textcontext,
            useCNN=FLAGS.useCNN,
            WDLength=FLAGS.WDLength,
            Fsize=FLAGS.Fsize,
            entyping=FLAGS.entyping)

        model.load_ckpt_model(ckptpath=FLAGS.model_path)

        print("Total files: {}".format(len(output_ta_files)))
        erroneous_files = 0
        for in_ta_path, out_ta_path in zip(intput_ta_files, output_ta_files):
            # print("Running the inference for : {}".format(in_ta_path))
            try:
                reader.new_test_file(in_ta_path)
            except:
                print("Error reading : {}".format(in_ta_path))
                erroneous_files += 1
                continue

            (predTypScNPmat_list, widIdxs_list, priorProbs_list,
             textProbs_list, jointProbs_list, evWTs_list,
             pred_TypeSetsList) = model.inference_run()

            # model.inference(ckptpath=FLAGS.model_path)

            wiki_view = copy.deepcopy(reader.textanno.get_view("NER_LORELEI"))
            #wiki_view = copy.deepcopy(reader.textanno.get_view("MENTION"))
            #wiki_view = copy.deepcopy(reader.textanno.get_view("NER_CONLL"))
            #wiki_view = copy.deepcopy(reader.textanno.get_view("English_NERVIEW"))
            # wiki_view_json = copy.deepcopy(reader.textanno.get_view("NER").as_json)
            docta = reader.textanno

            #el_cons_list = [con for con in wiki_view.cons_list if "NAM" in con['label']]
            el_cons_list = wiki_view.cons_list
            numMentionsInference = len(widIdxs_list)

            # print("Number of mentions in model: {}".format(len(widIdxs_list)))
            # print("Number of NER mention: {}".format(len(el_cons_list)))

            assert len(el_cons_list) == numMentionsInference

            mentionnum = 0
            for ner_cons in el_cons_list:
                priorScoreMap = {}
                contextScoreMap = {}
                jointScoreMap = {}

                (wididxs, pps, mps,
                 jps) = (widIdxs_list[mentionnum], priorProbs_list[mentionnum],
                         textProbs_list[mentionnum],
                         jointProbs_list[mentionnum])

                maxJointProb = 0.0
                maxJointEntity = ""
                for (wididx, prp, mp, jp) in zip(wididxs, pps, mps, jps):
                    wT = reader.widIdx2WikiTitle(wididx)
                    priorScoreMap[wT] = prp
                    contextScoreMap[wT] = mp
                    jointScoreMap[wT] = jp

                    if jp > maxJointProb:
                        maxJointProb = jp
                        maxJointEntity = wT
                ''' add labels2score map here '''
                #ner_cons["jointScoreMap"] = jointScoreMap
                #ner_cons["contextScoreMap"] = contextScoreMap
                #ner_cons["priorScoreMap"] = priorScoreMap

                ner_cons["labelScoreMap"] = jointScoreMap
                # add max scoring entity as label
                ner_cons["label"] = maxJointEntity
                ner_cons["score"] = maxJointProb

                mentionnum += 1

            wiki_view.view_name = "NEUREL"
            docta.view_dictionary["NEUREL"] = wiki_view

            docta_json = docta.as_json
            json.dump(docta_json, open(out_ta_path, "w"), indent=True)

        print("Number of erroneous files: {}".format(erroneous_files))
        print("Annotation completed. Program can be exited safely.")
    sys.exit()
Exemplo n.º 2
0
def main(_):
    pp.pprint(flags.FLAGS.__flags)

    FLAGS_check(FLAGS)
    # pipeline = LocalPipeline()
    pipeline = RemotePipeline(server_api="http://macniece.seas.upenn.edu:4001")

    config = Config(FLAGS.config, verbose=False)
    vocabloader = VocabLoader(config)

    FLAGS.dropout_keep_prob = 1.0
    FLAGS.wordDropoutKeep = 1.0
    FLAGS.cohDropoutKeep = 1.0

    input_jsonl = FLAGS.input_jsonl
    output_jsonl = FLAGS.output_jsonl

    reader = TextAnnoTestReader(
        config=config,
        vocabloader=vocabloader,
        num_cands=30,
        batch_size=FLAGS.batch_size,
        strict_context=FLAGS.strict_context,
        pretrain_wordembed=FLAGS.pretrain_wordembed,
        coherence=FLAGS.coherence,
    )
    model_mode = "test"

    config_proto = tf.ConfigProto()
    config_proto.allow_soft_placement = True
    config_proto.gpu_options.allow_growth = True
    sess = tf.Session(config=config_proto)

    with sess.as_default():
        model = ELModel(
            sess=sess,
            reader=reader,
            dataset=FLAGS.dataset,
            max_steps=FLAGS.max_steps,
            pretrain_max_steps=FLAGS.pretraining_steps,
            word_embed_dim=FLAGS.word_embed_dim,
            context_encoded_dim=FLAGS.context_encoded_dim,
            context_encoder_num_layers=FLAGS.context_encoder_num_layers,
            context_encoder_lstmsize=FLAGS.context_encoder_lstmsize,
            coherence_numlayers=FLAGS.coherence_numlayers,
            jointff_numlayers=FLAGS.jointff_numlayers,
            learning_rate=FLAGS.learning_rate,
            dropout_keep_prob=FLAGS.dropout_keep_prob,
            reg_constant=FLAGS.reg_constant,
            checkpoint_dir=FLAGS.checkpoint_dir,
            optimizer=FLAGS.optimizer,
            mode=model_mode,
            strict=FLAGS.strict_context,
            pretrain_word_embed=FLAGS.pretrain_wordembed,
            typing=FLAGS.typing,
            el=FLAGS.el,
            coherence=FLAGS.coherence,
            textcontext=FLAGS.textcontext,
            useCNN=FLAGS.useCNN,
            WDLength=FLAGS.WDLength,
            Fsize=FLAGS.Fsize,
            entyping=FLAGS.entyping,
        )

        model.load_ckpt_model(ckptpath=FLAGS.model_path)

        outf = open(output_jsonl, "w")
        inpf = list(open(input_jsonl, "r"))

        for line in tqdm(inpf):
            example_json = json.loads(line)
            doctext = ftfy.fix_text(example_json['text'])
            out_dict = {
                'text': doctext,
                'qanta_id': example_json['qanta_id'],
                'sent_id': example_json['sent_id']
            }
            try:
                ta = pipeline.doc(doctext)
            except MisalignedCharError as e:
                out_dict['error'] = str(e)
                outf.write(json.dumps(out_dict))
                outf.write("\n")
                continue
            _ = ta.get_ner_conll

            # Make instances for this document
            reader.new_ta(ta)

            (
                predTypScNPmat_list,
                widIdxs_list,
                priorProbs_list,
                textProbs_list,
                jointProbs_list,
                evWTs_list,
                pred_TypeSetsList,
            ) = model.inference_run()
            if pred_TypeSetsList is None:
                continue

            wiki_view = copy.deepcopy(reader.textanno.get_view("NER_CONLL"))
            docta = reader.textanno

            el_cons_list = wiki_view.cons_list
            numMentionsInference = len(widIdxs_list)

            assert len(el_cons_list) == numMentionsInference

            el_mentions = []

            mentionnum = 0
            for ner_cons in el_cons_list:
                # ner_cons is a dict
                mentiondict = {}
                mentiondict["tokens"] = ner_cons["tokens"]
                mentiondict["end"] = ner_cons["end"]
                mentiondict["start"] = ner_cons["start"]

                priorScoreMap = {}
                contextScoreMap = {}
                jointScoreMap = {}

                (wididxs, pps, mps, jps) = (
                    widIdxs_list[mentionnum],
                    priorProbs_list[mentionnum],
                    textProbs_list[mentionnum],
                    jointProbs_list[mentionnum],
                )

                maxJointProb = 0.0
                maxJointEntity = ""
                for (wididx, prp, mp, jp) in zip(wididxs, pps, mps, jps):
                    wT = reader.widIdx2WikiTitle(wididx)
                    priorScoreMap[wT] = prp
                    contextScoreMap[wT] = mp
                    jointScoreMap[wT] = jp

                    if jp > maxJointProb:
                        maxJointProb = jp
                        maxJointEntity = wT

                mentiondict["jointScoreMap"] = jointScoreMap
                mentiondict["contextScoreMap"] = contextScoreMap
                mentiondict["priorScoreMap"] = priorScoreMap

                # add max scoring entity as label
                mentiondict["label"] = maxJointEntity
                mentiondict["score"] = maxJointProb

                mentionnum += 1

                el_mentions.append(mentiondict)

            out_dict["nel"] = el_mentions
            outf.write(json.dumps(out_dict))
            outf.write("\n")

        outf.close()

        print("Annotation completed. Program can be exited safely.")
    sys.exit()
Exemplo n.º 3
0
def main(_):
    pp.pprint(flags.FLAGS.__flags)

    FLAGS_check(FLAGS)

    config = Config(FLAGS.config, verbose=False)
    vocabloader = VocabLoader(config)

    if FLAGS.mode == 'inference':
        FLAGS.dropout_keep_prob = 1.0
        FLAGS.wordDropoutKeep = 1.0
        FLAGS.cohDropoutKeep = 1.0

        reader = InferenceReader(config=config,
                                 vocabloader=vocabloader,
                                 test_mens_file=config.test_file,
                                 num_cands=FLAGS.num_cand_entities,
                                 batch_size=FLAGS.batch_size,
                                 strict_context=FLAGS.strict_context,
                                 pretrain_wordembed=FLAGS.pretrain_wordembed,
                                 coherence=FLAGS.coherence)
        model_mode = 'test'
    else:
        print("MODE in FLAGS is incorrect : {}".format(FLAGS.mode))
        sys.exit()

    config_proto = tf.ConfigProto()
    config_proto.allow_soft_placement = True
    config_proto.gpu_options.allow_growth = True
    sess = tf.Session(config=config_proto)

    with sess.as_default():
        model = ELModel(
            sess=sess,
            reader=reader,
            dataset=FLAGS.dataset,
            max_steps=FLAGS.max_steps,
            pretrain_max_steps=FLAGS.pretraining_steps,
            word_embed_dim=FLAGS.word_embed_dim,
            context_encoded_dim=FLAGS.context_encoded_dim,
            context_encoder_num_layers=FLAGS.context_encoder_num_layers,
            context_encoder_lstmsize=FLAGS.context_encoder_lstmsize,
            coherence_numlayers=FLAGS.coherence_numlayers,
            jointff_numlayers=FLAGS.jointff_numlayers,
            learning_rate=FLAGS.learning_rate,
            dropout_keep_prob=FLAGS.dropout_keep_prob,
            reg_constant=FLAGS.reg_constant,
            checkpoint_dir=FLAGS.checkpoint_dir,
            optimizer=FLAGS.optimizer,
            mode=model_mode,
            strict=FLAGS.strict_context,
            pretrain_word_embed=FLAGS.pretrain_wordembed,
            typing=FLAGS.typing,
            el=FLAGS.el,
            coherence=FLAGS.coherence,
            textcontext=FLAGS.textcontext,
            useCNN=FLAGS.useCNN,
            WDLength=FLAGS.WDLength,
            Fsize=FLAGS.Fsize,
            entyping=FLAGS.entyping)

        if FLAGS.mode == 'inference':
            print("Doing inference")
            (predTypScNPmat_list, widIdxs_list, priorProbs_list,
             textProbs_list, evWTs_list,
             pred_TypeSetsList) = model.inference(ckptpath=FLAGS.model_path)

            numMentionsInference = len(widIdxs_list)
            numMentionsReader = 0
            for sent_idx in reader.sentidx2ners:
                numMentionsReader += len(reader.sentidx2ners[sent_idx])
            assert numMentionsInference == numMentionsReader

            mentionnum = 0
            for sent_idx in reader.sentidx2ners:
                nerDicts = reader.sentidx2ners[sent_idx]
                sentence = ' '.join(reader.sentences_tokenized[sent_idx])
                for s, ner in nerDicts:
                    [evWTs, evWIDS, evProbs] = evWTs_list[mentionnum]
                    predTypes = pred_TypeSetsList[mentionnum]
                    print(reader.bracketMentionInSentence(sentence, ner))
                    print("Prior: {} {}, Context: {} {}, Joint: {} {}".format(
                        evWTs[0], evProbs[0], evWTs[1], evProbs[1], evWTs[2],
                        evProbs[2]))
                    print("Predicted Entity Types : {}".format(predTypes))
                    print("\n")
                    mentionnum += 1

        else:
            print("WRONG MODE!")
            sys.exit(0)
    sys.exit()
Exemplo n.º 4
0
def main(_):
    pp.pprint(flags.FLAGS.__flags)

    FLAGS_check(FLAGS)

    config = Config(FLAGS.config, verbose=False)
    vocabloader = VocabLoader(config)

    if FLAGS.mode == 'inference':
        FLAGS.dropout_keep_prob = 1.0
        FLAGS.wordDropoutKeep = 1.0
        FLAGS.cohDropoutKeep = 1.0

        reader = InferenceReader(config=config,
                                 vocabloader=vocabloader,
                                 test_mens_file=config.test_file,
                                 num_cands=FLAGS.num_cand_entities,
                                 batch_size=FLAGS.batch_size,
                                 strict_context=FLAGS.strict_context,
                                 pretrain_wordembed=FLAGS.pretrain_wordembed,
                                 coherence=FLAGS.coherence)
        docta = reader.ccgdoc
        model_mode = 'inference'

    elif FLAGS.mode == 'test':
        FLAGS.dropout_keep_prob = 1.0
        FLAGS.wordDropoutKeep = 1.0
        FLAGS.cohDropoutKeep = 1.0

        reader = TestDataReader(config=config,
                                vocabloader=vocabloader,
                                test_mens_file=config.test_file,
                                num_cands=30,
                                batch_size=FLAGS.batch_size,
                                strict_context=FLAGS.strict_context,
                                pretrain_wordembed=FLAGS.pretrain_wordembed,
                                coherence=FLAGS.coherence)
        model_mode = 'test'

    else:
        print("MODE in FLAGS is incorrect : {}".format(FLAGS.mode))
        sys.exit()

    config_proto = tf.ConfigProto()
    config_proto.allow_soft_placement = True
    config_proto.gpu_options.allow_growth = True
    sess = tf.Session(config=config_proto)

    with sess.as_default():
        model = ELModel(
            sess=sess,
            reader=reader,
            dataset=FLAGS.dataset,
            max_steps=FLAGS.max_steps,
            pretrain_max_steps=FLAGS.pretraining_steps,
            word_embed_dim=FLAGS.word_embed_dim,
            context_encoded_dim=FLAGS.context_encoded_dim,
            context_encoder_num_layers=FLAGS.context_encoder_num_layers,
            context_encoder_lstmsize=FLAGS.context_encoder_lstmsize,
            coherence_numlayers=FLAGS.coherence_numlayers,
            jointff_numlayers=FLAGS.jointff_numlayers,
            learning_rate=FLAGS.learning_rate,
            dropout_keep_prob=FLAGS.dropout_keep_prob,
            reg_constant=FLAGS.reg_constant,
            checkpoint_dir=FLAGS.checkpoint_dir,
            optimizer=FLAGS.optimizer,
            mode=model_mode,
            strict=FLAGS.strict_context,
            pretrain_word_embed=FLAGS.pretrain_wordembed,
            typing=FLAGS.typing,
            el=FLAGS.el,
            coherence=FLAGS.coherence,
            textcontext=FLAGS.textcontext,
            useCNN=FLAGS.useCNN,
            WDLength=FLAGS.WDLength,
            Fsize=FLAGS.Fsize,
            entyping=FLAGS.entyping)

        if FLAGS.mode == 'inference':
            print("Doing inference")
            (predTypScNPmat_list, widIdxs_list, priorProbs_list,
             textProbs_list, jointProbs_list, evWTs_list,
             pred_TypeSetsList) = model.inference(ckptpath=FLAGS.model_path)

            numMentionsInference = len(widIdxs_list)
            numMentionsReader = 0
            for sent_idx in reader.sentidx2ners:
                numMentionsReader += len(reader.sentidx2ners[sent_idx])
            assert numMentionsInference == numMentionsReader

            mentionnum = 0
            entityTitleList = []
            for sent_idx in reader.sentidx2ners:
                nerDicts = reader.sentidx2ners[sent_idx]
                sentence = ' '.join(reader.sentences_tokenized[sent_idx])
                for s, ner in nerDicts:
                    [evWTs, evWIDS, evProbs] = evWTs_list[mentionnum]
                    predTypes = pred_TypeSetsList[mentionnum]
                    print(reader.bracketMentionInSentence(sentence, ner))
                    print("Prior: {} {}, Context: {} {}, Joint: {} {}".format(
                        evWTs[0], evProbs[0], evWTs[1], evProbs[1], evWTs[2],
                        evProbs[2]))

                    entityTitleList.append(evWTs[2])
                    print("Predicted Entity Types : {}".format(predTypes))
                    print("\n")
                    mentionnum += 1

            elview = copy.deepcopy(docta.view_dictionary['NER_CONLL'])
            elview.view_name = 'ENG_NEURAL_EL'
            for i, cons in enumerate(elview.cons_list):
                cons['label'] = entityTitleList[i]

            docta.view_dictionary['ENG_NEURAL_EL'] = elview

            print("elview.cons_list")
            print(elview.cons_list)
            print("\n")

            for v in docta.as_json['views']:
                print(v)
                print("\n")

        elif FLAGS.mode == 'test':
            print("Testing on Data ")
            (widIdxs_list, condProbs_list, contextProbs_list,
             condContextJointProbs_list, evWTs,
             sortedContextWTs) = model.dataset_test(ckptpath=FLAGS.model_path)

            print(len(widIdxs_list))
            print(len(condProbs_list))
            print(len(contextProbs_list))
            print(len(condContextJointProbs_list))
            print(len(reader.mentions))

            print("Writing Test Predictions: {}".format(FLAGS.test_out_fp))
            with open(FLAGS.test_out_fp, 'w') as f:
                for (wididxs, pps, mps,
                     jps) in zip(widIdxs_list, condProbs_list,
                                 contextProbs_list,
                                 condContextJointProbs_list):

                    mentionPred = ""

                    for (wididx, prp, mp, jp) in zip(wididxs, pps, mps, jps):
                        wit = reader.widIdx2WikiTitle(wididx)
                        mentionPred += wit + " " + str(prp) + " " + \
                            str(mp) + " " + str(jp)
                        mentionPred += "\t"

                    mentionPred = mentionPred.strip() + "\n"

                    f.write(mentionPred)

            print("Done writing. Can Exit.")

        else:
            print("WRONG MODE!")
            sys.exit(0)

    sys.exit()
Exemplo n.º 5
0
def main(_):
    pp.pprint(flags.FLAGS.__flags)

    FLAGS_check(FLAGS)

    config = Config(FLAGS.config, verbose=False)
    vocabloader = VocabLoader(config)

    FLAGS.dropout_keep_prob = 1.0
    FLAGS.wordDropoutKeep = 1.0
    FLAGS.cohDropoutKeep = 1.0

    input_jsonl = FLAGS.input_jsonl
    output_jsonl = FLAGS.output_jsonl
    doc_key = FLAGS.doc_key

    reader = TextAnnoTestReader(config=config,
                                vocabloader=vocabloader,
                                num_cands=30,
                                batch_size=FLAGS.batch_size,
                                strict_context=FLAGS.strict_context,
                                pretrain_wordembed=FLAGS.pretrain_wordembed,
                                coherence=FLAGS.coherence)
    model_mode = 'test'

    config_proto = tf.ConfigProto()
    config_proto.allow_soft_placement = True
    config_proto.gpu_options.allow_growth = True
    sess = tf.Session(config=config_proto)

    with sess.as_default():
        model = ELModel(
            sess=sess,
            reader=reader,
            dataset=FLAGS.dataset,
            max_steps=FLAGS.max_steps,
            pretrain_max_steps=FLAGS.pretraining_steps,
            word_embed_dim=FLAGS.word_embed_dim,
            context_encoded_dim=FLAGS.context_encoded_dim,
            context_encoder_num_layers=FLAGS.context_encoder_num_layers,
            context_encoder_lstmsize=FLAGS.context_encoder_lstmsize,
            coherence_numlayers=FLAGS.coherence_numlayers,
            jointff_numlayers=FLAGS.jointff_numlayers,
            learning_rate=FLAGS.learning_rate,
            dropout_keep_prob=FLAGS.dropout_keep_prob,
            reg_constant=FLAGS.reg_constant,
            checkpoint_dir=FLAGS.checkpoint_dir,
            optimizer=FLAGS.optimizer,
            mode=model_mode,
            strict=FLAGS.strict_context,
            pretrain_word_embed=FLAGS.pretrain_wordembed,
            typing=FLAGS.typing,
            el=FLAGS.el,
            coherence=FLAGS.coherence,
            textcontext=FLAGS.textcontext,
            useCNN=FLAGS.useCNN,
            WDLength=FLAGS.WDLength,
            Fsize=FLAGS.Fsize,
            entyping=FLAGS.entyping)

        model.load_ckpt_model(ckptpath=FLAGS.model_path)

        erroneous_files = 0

        outf = open(output_jsonl, 'w')
        inpf = open(input_jsonl, 'r')

        for line in inpf:
            jsonobj = json.loads(line)
            doctext = jsonobj[doc_key]
            ta = localpipeline.doc(doctext, pretokenized=FLAGS.pretokenized)
            _ = ta.get_ner_conll

            # Make instances for this document
            reader.new_ta(ta)

            (predTypScNPmat_list, widIdxs_list, priorProbs_list,
             textProbs_list, jointProbs_list, evWTs_list,
             pred_TypeSetsList) = model.inference_run()

            wiki_view = copy.deepcopy(reader.textanno.get_view("NER_CONLL"))
            docta = reader.textanno

            el_cons_list = wiki_view.cons_list
            numMentionsInference = len(widIdxs_list)

            assert len(el_cons_list) == numMentionsInference

            out_dict = {doc_key: doctext}
            el_mentions = []

            mentionnum = 0
            for ner_cons in el_cons_list:
                # ner_cons is a dict
                mentiondict = {}
                mentiondict['tokens'] = ner_cons['tokens']
                mentiondict['end'] = ner_cons['end']
                mentiondict['start'] = ner_cons['start']

                priorScoreMap = {}
                contextScoreMap = {}
                jointScoreMap = {}

                (wididxs, pps, mps,
                 jps) = (widIdxs_list[mentionnum], priorProbs_list[mentionnum],
                         textProbs_list[mentionnum],
                         jointProbs_list[mentionnum])

                maxJointProb = 0.0
                maxJointEntity = ""
                for (wididx, prp, mp, jp) in zip(wididxs, pps, mps, jps):
                    wT = reader.widIdx2WikiTitle(wididx)
                    priorScoreMap[wT] = prp
                    contextScoreMap[wT] = mp
                    jointScoreMap[wT] = jp

                    if jp > maxJointProb:
                        maxJointProb = jp
                        maxJointEntity = wT

                mentiondict["jointScoreMap"] = jointScoreMap
                mentiondict["contextScoreMap"] = contextScoreMap
                mentiondict["priorScoreMap"] = priorScoreMap

                # add max scoring entity as label
                mentiondict["label"] = maxJointEntity
                mentiondict["score"] = maxJointProb

                mentionnum += 1

                el_mentions.append(mentiondict)

            out_dict['nel'] = el_mentions
            outstr = json.dumps(out_dict)
            outf.write(outstr)
            outf.write("\n")

        outf.close()
        inpf.close()

        print("Number of erroneous files: {}".format(erroneous_files))
        print("Annotation completed. Program can be exited safely.")
    sys.exit()