Пример #1
0
def main(evaluate=False):
    corpus = HotpotQuestions()
    dev_qs = corpus.get_dev()
    train_qs = corpus.get_train()
    tokenizer = CoreNLPTokenizer()

    def tokenize(text):
        return tokenizer.tokenize(text).words()

    if evaluate:
        print("Train:")
        evaluate_question_detector(train_qs, tokenize, FastNormalizedAnswerDetector(), compute_f1s=True)
        print("Dev:")
        evaluate_question_detector(dev_qs, tokenize, FastNormalizedAnswerDetector(), compute_f1s=True)
    else:
        train = compute_answer_spans(train_qs, FastNormalizedAnswerDetector(), tokenize)
        dev = compute_answer_spans(dev_qs, FastNormalizedAnswerDetector(), tokenize)
        HotpotQuestions.make_corpus(train, dev)
Пример #2
0
def init():
    global PROCESS_TOK
    PROCESS_TOK = CoreNLPTokenizer()
    Finalize(PROCESS_TOK, PROCESS_TOK.shutdown, exitpriority=100)
Пример #3
0
def init():
    global PROCESS_TOK, PROCESS_DB
    PROCESS_TOK = CoreNLPTokenizer()
    Finalize(PROCESS_TOK, PROCESS_TOK.shutdown, exitpriority=100)
    PROCESS_DB = DocDB()
    Finalize(PROCESS_DB, PROCESS_DB.close, exitpriority=100)
Пример #4
0
def init_encoding_handler(encoding_dir):
    global DOC_ENCS_HANDLER, PROCESS_TOK
    DOC_ENCS_HANDLER = DocumentEncodingHandler(encoding_dir)
    PROCESS_TOK = CoreNLPTokenizer()
    Finalize(PROCESS_TOK, PROCESS_TOK.shutdown, exitpriority=100)