Пример #1
0
def test_get_main_verbs_of_sent(spacy_doc):
    expected = [['going'], ['love', 'love'], ['damaged'], ['Thank']]
    observed = [
        [tok.text for tok in utils.get_main_verbs_of_sent(sent)]
        for sent in spacy_doc.sents
    ]
    for obs, exp in zip(observed, expected):
        assert obs == exp
Пример #2
0
def test_get_objects_of_verb(spacy_doc):
    expected = [[], ["Python"], ["incompatibilities"], [], ["God"]]
    main_verbs = [
        tok for sent in spacy_doc.sents
        for tok in utils.get_main_verbs_of_sent(sent)
    ]
    observed = [[tok.text for tok in utils.get_objects_of_verb(main_verb)]
                for main_verb in main_verbs]
    for obs, exp in zip(observed, expected):
        assert obs == exp
Пример #3
0
def test_get_subjects_of_verb(spacy_doc):
    expected = [["tests"], ["I"], ["I"], ["programmers"], []]
    main_verbs = [
        tok for sent in spacy_doc.sents
        for tok in utils.get_main_verbs_of_sent(sent)
    ]
    observed = [[tok.text for tok in utils.get_subjects_of_verb(main_verb)]
                for main_verb in main_verbs]
    for obs, exp in zip(observed, expected):
        assert obs == exp
Пример #4
0
    def __call__(self, text):
        """"""
        doc = self.nlp(text)
        svo = []

        for sent in doc.sents:
            chunks = ''

            # Return the main (non-auxiliary) verbs in a sentence.
            verbs = utils.get_main_verbs_of_sent(sent)

            for verb in verbs:
                # Filter negations
                negation = "".join([
                    t.text for t in sent if t.dep_ == 'neg' if t.head == verb
                ])
                # Return all subjects of a verb according to the dependency parse.
                subj = utils.get_subjects_of_verb(verb)

                # Return all objects of a verb according to the dependency parse,
                # including open clausal
                # Get noun chunks of verb
                obj = utils.get_objects_of_verb(verb)

                # Return document indexes spanning all (adjacent) tokens around a verb
                # that are auxiliary verbs or negations.
                start, end = utils.get_span_for_verb_auxiliaries(verb)
                aux = doc[start:end + 1].as_doc().text
                for o in obj:
                    for nc in sent.noun_chunks:
                        # st.write('VERB', verb.text,
                        #     'OBJ HEAD:', o.head.text, 'OBJ:', o.text,
                        #     'NC HEAD:', nc.root.head.text, 'NC:', nc.text,
                        #     'NC ANC:', [a.text for a in nc.root.head.ancestors] )
                        if o.text in nc.text.split():
                            chunks += f' {nc.text}'
                            # st.write('CHUNK:', nc.text)
                # obj = " ".join([f"{nc.text}" for nc in sent.noun_chunks if obj.text in nc.text])
                snippet = f'{" ".join([s.text for s in subj])} {negation} {aux} {chunks}'
                svo.append(snippet)
        return '. '.join(svo)
Пример #5
0
def test_get_main_verbs_of_sent(spacy_doc):
    expected = [["going"], ["love", "love"], ["damaged"], ["Thank"]]
    observed = [[tok.text for tok in utils.get_main_verbs_of_sent(sent)]
                for sent in spacy_doc.sents]
    for obs, exp in zip(observed, expected):
        assert obs == exp
Пример #6
0
#     """Return the main (non-auxiliary) verbs in a sentence."""
#     return [tok for tok in sent
#             if tok.pos == VERB and tok.dep_ not in constants.AUX_DEPS]
# File:      d:\miniconda3\envs\nlp\lib\site-packages\textacy\spacier\utils.py
# Type:      function

toy_sentence = 'Shivangi is an engineer'
doc = nlp(toy_sentence)

"""What are the entities in this sentence?"""

displacy.render(doc, style='ent', jupyter=True)

# Let's find out the main verb in this sentence:

verbs = spacy_utils.get_main_verbs_of_sent(doc)
print(verbs)

# And what are nominal subjects of this verb?

 
for verb in verbs:
    print(verb, spacy_utils.get_subjects_of_verb(verb))

"""*You will notice that this has a reasonable overlap with the noun phrases which we pulled from our part-of-speech tagging but can be different as well.*"""

[(token, token.tag_) for token in doc]

"""Tip: As an exercise, extend this approach to at least add Who, Where and When questions as practice.

## Level Up: Question and Answer