示例#1
0
def test_stanford_enable_disable():
    was_enabled = is_stanford_enabled()
    try:
        disable_stanford()
        assert_false(is_stanford_enabled())
        enable_stanford()
        assert_true(is_stanford_enabled())
    finally:
        if was_enabled:
            enable_stanford()
        else:
            disable_stanford()
def get_verbs(text, lowercase=False, lemmatize=False) -> Generator:
    """
    Get only verbs from text using Stanford libraries.

    :param text:
    :param lowercase:
    :param lemmatize:
    :return:
    """
    if not is_stanford_enabled():
        raise RuntimeError(
            "USE_STANFORD is set to False.  No Stanford functionality available."
        )

    # Get tokens and tag
    tokens = get_tokens_list(text)
    pos = STANFORD_TAGGER.tag(tokens)

    verb_index = [i for i in range(len(pos)) if pos[i][1].startswith("V")]
    if lemmatize:
        lemmas = get_lemma_list(text, lowercase=lowercase)
        for j in verb_index:
            yield lemmas[j]
    else:
        for j in verb_index:
            yield tokens[j].lower() if lowercase else tokens[j]
def get_tokens(text, lowercase=False, stopword=False) -> Generator:
    """
    Get token generator from text using Stanford libraries.
    :param text:
    :param lowercase:
    :param stopword:
    :return:
    """
    if not is_stanford_enabled():
        raise RuntimeError(
            "USE_STANFORD is set to False.  No Stanford functionality available."
        )

    if stopword:
        for token in STANFORD_TOKENIZER.tokenize(text):
            if token.lower() in STOPWORDS:
                continue
            if lowercase:
                yield token.lower()
            else:
                yield token
    else:
        for token in STANFORD_TOKENIZER.tokenize(text):
            if lowercase:
                yield token.lower()
            else:
                yield token
def get_tokens_list(text, lowercase=False, stopword=False) -> List:
    """
    Get token list form text using Stanford libraries.
    :param text:
    :param lowercase:
    :param stopword:
    :return:
    """
    if not is_stanford_enabled():
        raise RuntimeError("USE_STANFORD is set to False.  No Stanford functionality available.")

    return list(get_tokens(text, lowercase=lowercase, stopword=stopword))
示例#5
0
def check_stanford():
    if not is_stanford_enabled():
        raise RuntimeError(
            "USE_STANFORD is set to False. No Stanford functionality available."
        )
    if not STANFORD_TOKENIZER:
        raise RuntimeError("USE_STANFORD is set to True."
                           " But default POS tagger jar file is not found."
                           " No Stanford functionality available.")
    if not STANFORD_TAGGER:
        raise RuntimeError("USE_STANFORD is set to True."
                           " But default tagger model file is not found."
                           " No Stanford functionality available.")
示例#6
0
def test_stanford_method():
    """
    get_tokens() should throw an exception if Stanford is disabled.
    :return:
    """
    was_enabled = is_stanford_enabled()
    try:
        disable_stanford()
        from lexnlp.nlp.en.stanford import get_tokens_list
        with pytest.raises(RuntimeError):
            _ = get_tokens_list("This should throw an exception.")
    finally:
        if was_enabled:
            enable_stanford()
示例#7
0
    Setup environment pre-tests
    :return:
    """
    # enable_stanford()


def teardown_module():
    """
    Setup environment post-tests.
    :return:
    """
    # disable_stanford()


@with_setup(setup_module, teardown_module)
@pytest.mark.skipif(not is_stanford_enabled(), reason="Stanford is disabled.")
def test_stanford_tokens():
    from lexnlp.nlp.en.stanford import get_tokens_list
    lexnlp_tests.test_extraction_func_on_test_data(get_tokens_list)


@with_setup(setup_module, teardown_module)
@pytest.mark.skipif(not is_stanford_enabled(), reason="Stanford is disabled.")
def test_stanford_tokens_lc():
    from lexnlp.nlp.en.stanford import get_tokens_list
    lexnlp_tests.test_extraction_func_on_test_data(get_tokens_list,
                                                   lowercase=True)


@with_setup(setup_module, teardown_module)
@pytest.mark.skipif(not is_stanford_enabled(), reason="Stanford is disabled.")