示例#1
0
def test_stanford_method():
    """
    get_tokens() should throw an exception if Stanford is disabled.
    :return:
    """
    was_enabled = is_stanford_enabled()
    try:
        disable_stanford()
        from lexnlp.nlp.en.stanford import get_tokens_list
        with pytest.raises(RuntimeError):
            _ = get_tokens_list("This should throw an exception.")
    finally:
        if was_enabled:
            enable_stanford()
def get_locations(text,
                  strict=False,
                  return_source=False,
                  window=2) -> Generator:
    """
    Get locations from text using Stanford libraries.
    :param window:
    :param return_source:
    :param strict:
    :param text:
    :return:
    """
    # Iterate through sentences
    for sentence in get_sentence_list(text):
        # Tag sentence
        sentence_pos = STANFORD_NER_TAGGER.tag(get_tokens_list(text))

        # Iterate through chunks
        locations = []
        last_loc_pos = None
        for i, token in enumerate(sentence_pos):
            # Check label
            if token[1] == 'LOCATION':
                if not strict and last_loc_pos is not None and (
                        i - last_loc_pos) < window:
                    locations[-1] += (" " if not token[0].startswith("'") else
                                      "") + token[0]
                else:
                    locations.append(token[0])
                last_loc_pos = i
            else:
                if token[0] in [".", ","]:
                    if not strict and last_loc_pos is not None and (
                            i - last_loc_pos) < window:
                        locations[-1] += (
                            " " if token[0] not in string.punctuation and
                            not token[0].startswith("'") else "") + token[0]
                        last_loc_pos = i

        # Cleanup and yield
        for location in locations:
            location = strip_unicode_punctuation(location).strip(
                string.punctuation).strip(string.whitespace)
            if return_source:
                yield location, sentence
            else:
                yield location
def get_persons(text,
                strict=False,
                return_source=False,
                window=2) -> Generator:
    """
    Get persons from text using Stanford libraries.
    :param window:
    :param return_source:
    :param strict:
    :param text:
    :return:
    """
    # Iterate through sentences
    for sentence in get_sentence_list(text):
        # Tag sentence
        sentence_pos = STANFORD_NER_TAGGER.tag(get_tokens_list(text))

        # Iterate through chunks
        names = []
        last_person_pos = None
        for i, token in enumerate(sentence_pos):
            # Check label
            if token[1] == 'PERSON':
                if not strict and last_person_pos is not None and (
                        i - last_person_pos) < window:
                    names[-1] += " " + token[0]
                else:
                    names.append(token[0])
                last_person_pos = i
            else:
                if token[0] in [".", ","]:
                    if not strict and last_person_pos is not None and (
                            i - last_person_pos) < window:
                        names[-1] += (" " if token[0] not in string.punctuation
                                      else "") + token[0]
                        last_person_pos = i

        # Cleanup and yield
        for name in names:
            name = strip_unicode_punctuation(name).strip(
                string.punctuation).strip(string.whitespace)
            if return_source:
                yield name, sentence
            else:
                yield name