def test_stanford_enable_disable(): was_enabled = is_stanford_enabled() try: disable_stanford() assert_false(is_stanford_enabled()) enable_stanford() assert_true(is_stanford_enabled()) finally: if was_enabled: enable_stanford() else: disable_stanford()
def get_verbs(text, lowercase=False, lemmatize=False) -> Generator: """ Get only verbs from text using Stanford libraries. :param text: :param lowercase: :param lemmatize: :return: """ if not is_stanford_enabled(): raise RuntimeError( "USE_STANFORD is set to False. No Stanford functionality available." ) # Get tokens and tag tokens = get_tokens_list(text) pos = STANFORD_TAGGER.tag(tokens) verb_index = [i for i in range(len(pos)) if pos[i][1].startswith("V")] if lemmatize: lemmas = get_lemma_list(text, lowercase=lowercase) for j in verb_index: yield lemmas[j] else: for j in verb_index: yield tokens[j].lower() if lowercase else tokens[j]
def get_tokens(text, lowercase=False, stopword=False) -> Generator: """ Get token generator from text using Stanford libraries. :param text: :param lowercase: :param stopword: :return: """ if not is_stanford_enabled(): raise RuntimeError( "USE_STANFORD is set to False. No Stanford functionality available." ) if stopword: for token in STANFORD_TOKENIZER.tokenize(text): if token.lower() in STOPWORDS: continue if lowercase: yield token.lower() else: yield token else: for token in STANFORD_TOKENIZER.tokenize(text): if lowercase: yield token.lower() else: yield token
def get_tokens_list(text, lowercase=False, stopword=False) -> List: """ Get token list form text using Stanford libraries. :param text: :param lowercase: :param stopword: :return: """ if not is_stanford_enabled(): raise RuntimeError("USE_STANFORD is set to False. No Stanford functionality available.") return list(get_tokens(text, lowercase=lowercase, stopword=stopword))
def check_stanford(): if not is_stanford_enabled(): raise RuntimeError( "USE_STANFORD is set to False. No Stanford functionality available." ) if not STANFORD_TOKENIZER: raise RuntimeError("USE_STANFORD is set to True." " But default POS tagger jar file is not found." " No Stanford functionality available.") if not STANFORD_TAGGER: raise RuntimeError("USE_STANFORD is set to True." " But default tagger model file is not found." " No Stanford functionality available.")
def test_stanford_method(): """ get_tokens() should throw an exception if Stanford is disabled. :return: """ was_enabled = is_stanford_enabled() try: disable_stanford() from lexnlp.nlp.en.stanford import get_tokens_list with pytest.raises(RuntimeError): _ = get_tokens_list("This should throw an exception.") finally: if was_enabled: enable_stanford()
Setup environment pre-tests :return: """ # enable_stanford() def teardown_module(): """ Setup environment post-tests. :return: """ # disable_stanford() @with_setup(setup_module, teardown_module) @pytest.mark.skipif(not is_stanford_enabled(), reason="Stanford is disabled.") def test_stanford_tokens(): from lexnlp.nlp.en.stanford import get_tokens_list lexnlp_tests.test_extraction_func_on_test_data(get_tokens_list) @with_setup(setup_module, teardown_module) @pytest.mark.skipif(not is_stanford_enabled(), reason="Stanford is disabled.") def test_stanford_tokens_lc(): from lexnlp.nlp.en.stanford import get_tokens_list lexnlp_tests.test_extraction_func_on_test_data(get_tokens_list, lowercase=True) @with_setup(setup_module, teardown_module) @pytest.mark.skipif(not is_stanford_enabled(), reason="Stanford is disabled.")