Пример #1
0
        def func(nlp):

            doc = nlp(doc_text)
            rules_analyzer = RulesAnalyzerFactory().get_rules_analyzer(nlp)
            rules_analyzer.initialize(doc)
            self.assertEqual(expected_sent_indexes,
                [token._.coref_chains.temp_sent_index for token in doc], nlp.meta['name'])
Пример #2
0
        def func(nlp):

            doc = nlp(doc_text)
            rules_analyzer = RulesAnalyzerFactory().get_rules_analyzer(nlp)
            rules_analyzer.initialize(doc)
            self.assertEqual(expected_sent_starts,
                doc._.coref_chains.temp_sent_starts, nlp.meta['name'])
Пример #3
0
        def func(nlp):

            doc = nlp('they')
            rules_analyzer = RulesAnalyzerFactory().get_rules_analyzer(nlp)
            rules_analyzer.initialize(doc)
            self.assertEqual(True, rules_analyzer.has_morph(doc[0], 'Number'), nlp.meta['name'])
            self.assertEqual(False, rules_analyzer.has_morph(doc[0], 'Other'), nlp.meta['name'])
Пример #4
0
        def func(nlp):

            doc = nlp(doc_text)
            rules_analyzer = RulesAnalyzerFactory().get_rules_analyzer(nlp)
            rules_analyzer.initialize(doc)
            self.assertEqual(expected_quote_array, doc[index]._.coref_chains.temp_quote_array,
                nlp.meta['name'])
Пример #5
0
        def func(nlp):

            doc = nlp(doc_text)
            rules_analyzer = RulesAnalyzerFactory().get_rules_analyzer(nlp)
            rules_analyzer.initialize(doc)
            non_or_truths = [token.i for token in doc
                if rules_analyzer.is_involved_in_non_or_conjunction(token)]
            self.assertEqual(expected_trues, non_or_truths, nlp.meta['name'])
Пример #6
0
 def func(nlp):
     if nlp.meta['name'] in excluded_nlps:
         return
     doc = nlp(doc_text)
     rules_analyzer = RulesAnalyzerFactory().get_rules_analyzer(nlp)
     rules_analyzer.initialize(doc)
     self.assertEqual(expected_truth,
         rules_analyzer.is_potential_coreferring_noun_pair(doc[referred_index],
         doc[referring_index]), nlp.meta['name'])
Пример #7
0
class CommonRulesTest(unittest.TestCase):

    def setUp(self):

        self.nlps = get_nlps('en')
        self.rules_analyzers = [RulesAnalyzerFactory().get_rules_analyzer(nlp) for
            nlp in self.nlps]
        self.sm_nlp = [nlp for nlp in self.nlps if nlp.meta['name'] == 'core_web_sm'][0]
        self.sm_rules_analyzer = RulesAnalyzerFactory().get_rules_analyzer(self.sm_nlp)

    def all_nlps(self, func):
        for nlp in self.nlps:
            func(nlp)

    def compare_sent_starts(self, doc_text, expected_sent_starts):

        def func(nlp):

            doc = nlp(doc_text)
            rules_analyzer = RulesAnalyzerFactory().get_rules_analyzer(nlp)
            rules_analyzer.initialize(doc)
            self.assertEqual(expected_sent_starts,
                doc._.coref_chains.temp_sent_starts, nlp.meta['name'])

        self.all_nlps(func)

    def test_sent_starts(self):
        self.compare_sent_starts('My name is Charles. I am here. The weather is good',
            [0, 5, 9])

    def compare_sent_indexes(self, doc_text, expected_sent_indexes):

        def func(nlp):

            doc = nlp(doc_text)
            rules_analyzer = RulesAnalyzerFactory().get_rules_analyzer(nlp)
            rules_analyzer.initialize(doc)
            self.assertEqual(expected_sent_indexes,
                [token._.coref_chains.temp_sent_index for token in doc], nlp.meta['name'])

        self.all_nlps(func)

    def test_sent_index(self):
        self.compare_sent_indexes('My name is Charles. I am here. The weather is good',
            [0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2])

    def compare_get_dependent_sibling_info(self, doc_text, index, expected_dependent_siblings,
        expected_governing_sibling, expected_has_or_coordination):

        def func(nlp):

            doc = nlp(doc_text)
            rules_analyzer = RulesAnalyzerFactory.get_rules_analyzer(nlp)
            rules_analyzer.initialize(doc)
            self.assertEqual(expected_dependent_siblings, str(
                doc[index]._.coref_chains.temp_dependent_siblings), nlp.meta['name'])
            for sibling in (sibling for sibling in
                    doc[index]._.coref_chains.temp_dependent_siblings if sibling.i != index):
                self.assertEqual(doc[index], sibling._.coref_chains.temp_governing_sibling,
                    nlp.meta['name'])
            if expected_governing_sibling is None:
                self.assertEqual(None, doc[index]._.coref_chains.temp_governing_sibling,
                    nlp.meta['name'])
            else:
                self.assertEqual(doc[expected_governing_sibling],
                    doc[index]._.coref_chains.temp_governing_sibling, nlp.meta['name'])
            self.assertEqual(expected_has_or_coordination,
                doc[index]._.coref_chains.temp_has_or_coordination, nlp.meta['name'])

        self.all_nlps(func)

    def test_get_dependent_sibling_info_no_conjunction(self):
        self.compare_get_dependent_sibling_info('Richard went home', 0, '[]', None, False)

    def test_get_dependent_sibling_info_two_member_conjunction_phrase_and(self):
        self.compare_get_dependent_sibling_info('Richard and Christine went home', 0,
            '[Christine]', None, False)

    def test_get_dependent_sibling_info_two_member_conjunction_phrase_or(self):
        self.compare_get_dependent_sibling_info('Richard or Christine went home', 0,
            '[Christine]', None, True)

    def test_get_dependent_sibling_info_three_member_conjunction_phrase_with_comma_and(self):
        self.compare_get_dependent_sibling_info('Carol, Richard and Ralf had a meeting', 0,
            '[Richard, Ralf]', None, False)

    def test_get_dependent_sibling_info_three_member_conjunction_phrase_with_comma_or(self):
        self.compare_get_dependent_sibling_info('Carol, Richard or Ralf had a meeting', 0,
            '[Richard, Ralf]', None, True)

    def test_get_dependent_sibling_info_three_member_conjunction_phrase_with_and(self):
        self.compare_get_dependent_sibling_info(
            'There was a meeting with Carol and Ralf and Richard', 5,
            '[Ralf, Richard]', None, False)

    def test_get_dependent_sibling_info_three_member_conjunction_phrase_with_or(self):
        self.compare_get_dependent_sibling_info(
            'A meeting with Carol or Ralf or Richard took place', 3,
            '[Ralf, Richard]', None, True)

    def test_get_dependent_sibling_info_three_member_conjunction_phrase_with_and_and_or(self):
        self.compare_get_dependent_sibling_info(
            'There was a meeting with Carol or Ralf and Richard', 5,
            '[Ralf, Richard]', None, True)

    def test_get_dependent_sibling_info_conjunction_itself(self):
        self.compare_get_dependent_sibling_info(
            'There was a meeting with Carol and Ralf and Richard', 6,
            '[]', None, False)

    def test_get_dependent_sibling_info_dependent_sibling(self):
        self.compare_get_dependent_sibling_info(
            'There was a meeting with Carol and Ralf and Richard', 7,
            '[]', 5, False)

    def compare_quote_array(self, doc_text, index, expected_quote_array):

        def func(nlp):

            doc = nlp(doc_text)
            rules_analyzer = RulesAnalyzerFactory().get_rules_analyzer(nlp)
            rules_analyzer.initialize(doc)
            self.assertEqual(expected_quote_array, doc[index]._.coref_chains.temp_quote_array,
                nlp.meta['name'])

        self.all_nlps(func)

    def test_quote_array_simple(self):
        self.compare_quote_array("He said 'Give it back'", 1, [0, 0, 0, 0])
        self.compare_quote_array("He said 'Give it back'", 3, [1, 0, 0, 0])

    def test_quote_array_complex(self):
        self.compare_quote_array("He said “Give it 'back'”", 1, [0, 0, 0, 0])
        self.compare_quote_array("He said “Give it 'back'”", 3, [0, 0, 1, 0])
        self.compare_quote_array("He said “Give it 'back'”", 6, [1, 0, 1, 0])

    def compare_potential_noun_pair(self, doc_text, referred_index, referring_index,
            expected_truth, *, excluded_nlps=[]):

        def func(nlp):
            if nlp.meta['name'] in excluded_nlps:
                return
            doc = nlp(doc_text)
            rules_analyzer = RulesAnalyzerFactory().get_rules_analyzer(nlp)
            rules_analyzer.initialize(doc)
            self.assertEqual(expected_truth,
                rules_analyzer.is_potential_coreferring_noun_pair(doc[referred_index],
                doc[referring_index]), nlp.meta['name'])

        self.all_nlps(func)

    def test_potential_noun_pair_proper_noun_referred(self):
        self.compare_potential_noun_pair('This is Peter. Peter is here', 2, 4, True)

    def test_potential_noun_pair_proper_noun_referred_multiword(self):
        self.compare_potential_noun_pair('This is Peter Smith. Peter Smith is here', 3, 6, True)

    def test_potential_noun_pair_proper_noun_referred_multiword_referring_end(self):
        self.compare_potential_noun_pair('This is Peter Smith. Smith is here', 3, 5, True)

    def test_potential_noun_pair_proper_noun_referred_multiword_referring_beginning(self):
        self.compare_potential_noun_pair('This is Peter Smith. Peter is here', 3, 5, False)

    def test_potential_noun_pair_referred_proper_noun_with_child(self):
        self.compare_potential_noun_pair('I spoke to big Peter. Peter is here', 4, 6, True)

    def test_potential_noun_pair_referred_proper_noun_conjunction_first_member(self):
        self.compare_potential_noun_pair('I spoke to Peter and Jane. Peter is here',
            3, 7, True)

    def test_potential_noun_pair_referred_proper_noun_conjunction_second_member(self):
        self.compare_potential_noun_pair('I spoke to Peter and Jane. Jane is here',
            5, 7, True)

    def test_potential_noun_pair_referring_back_proper_noun_with_non_proper_noun_child(self):
        self.compare_potential_noun_pair('I spoke to Peter. Big Peter is here', 3, 6, True,
            excluded_nlps=['core_web_sm', 'core_web_trf'])

    def test_potential_noun_pair_referring_back_proper_noun_with_proper_noun_child(self):
        self.compare_potential_noun_pair('I spoke to Peter. Big Peter is here', 3, 6, False,
            excluded_nlps=['core_web_md', 'core_web_lg'])

    def test_potential_noun_pair_referring_back_proper_noun_conjunction_first_member(self):
        self.compare_potential_noun_pair('I spoke to Peter. Peter and Jane are here',
            3, 5, True)

    def test_potential_noun_pair_referring_back_proper_noun_conjunction_second_member(self):
        self.compare_potential_noun_pair('I spoke to Jane. Peter and Jane are here',
            3, 7, True)

    def test_potential_noun_pair_definite_common_noun_referred(self):
        self.compare_potential_noun_pair('This is a man. The man is here', 3, 6, True)

    def test_potential_noun_pair_indefinite_common_noun_referred(self):
        self.compare_potential_noun_pair('This is the man. A man is here', 3, 6, False)

    def test_potential_noun_pair_definite_plural_common_noun_referred_1(self):
        self.compare_potential_noun_pair('These are men. The men is here', 2, 5, False)

    def test_potential_noun_pair_definite_plural_common_noun_referred_2(self):
        self.compare_potential_noun_pair('These are some men. The men are here', 3, 6, True)

    def test_potential_noun_pair_definite_plural_common_noun_referred_3(self):
        self.compare_potential_noun_pair('These are the men. The men are here', 3, 6, True)

    def test_potential_noun_pair_indefinite_plural_common_noun_referred(self):
        self.compare_potential_noun_pair('These are men. Men is here', 2, 4, False)

    def test_potential_noun_pair_numbers_do_not_match(self):
        self.compare_potential_noun_pair('This is a man. The men are here.', 3, 6, False)

    def test_potential_noun_pair_entity_labels_referred(self):
        self.compare_potential_noun_pair(
            'I spoke to the boss of Lehman Brothers. The company went bust', 7, 10, True)

    def test_potential_noun_pair_entity_labels_referred_not_definite_control(self):
        self.compare_potential_noun_pair(
            'I spoke to the boss of Lehman Brothers. A company went bust', 7, 10, False)

    def test_potential_noun_pair_wrong_entity_label_referred(self):
        self.compare_potential_noun_pair(
            'I spoke to the boss of Lehman Brothers. The person went bust', 7, 10, False)

    def test_potentially_independent_nouns_stored_on_token(self):
        doc = self.sm_nlp('They went to look at the space suits')
        self.sm_rules_analyzer.initialize(doc)
        self.assertFalse(doc[3]._.coref_chains.temp_potentially_referring)
        self.assertFalse(doc[6]._.coref_chains.temp_potentially_referring)
        self.assertTrue(doc[7]._.coref_chains.temp_potentially_referring)


    def compare_potential_pair(self, doc_text, referred_index, include_dependent_siblings,
        referring_index, expected_truth, consider_syntax=True, *, excluded_nlps=[]):

        def func(nlp):

            if nlp.meta['name'] in excluded_nlps:
                return
            doc = nlp(doc_text)
            rules_analyzer = RulesAnalyzerFactory.get_rules_analyzer(nlp)
            rules_analyzer.initialize(doc)
            assert rules_analyzer.is_independent_noun(doc[referred_index]) or \
                rules_analyzer.is_potential_anaphor(doc[referred_index])
            assert rules_analyzer.is_potential_anaphor(doc[referring_index])
            referred_mention = Mention(doc[referred_index], include_dependent_siblings)
            if consider_syntax:
                self.assertEqual(expected_truth,
                    rules_analyzer.language_independent_is_potential_anaphoric_pair(
                    referred_mention, doc[referring_index]), nlp.meta['name'])
            else:
                self.assertEqual(expected_truth, rules_analyzer.is_potential_anaphoric_pair(
                    referred_mention, doc[referring_index], False), nlp.meta['name'])
        self.all_nlps(func)


    def test_closer_within_structure_propn(self):

        self.compare_potential_pair('Richard arrived. Richard saw him.', 0, False, 5, 1)

    def test_closer_within_structure_propn_conjunction_first(self):

        self.compare_potential_pair('Richard and Peter arrived. Richard saw him.', 0, False, 7, 1)

    def test_closer_within_structure_propn_conjunction_second(self):

        self.compare_potential_pair('Richard and Peter arrived. Peter saw him.', 2, False, 7, 1)

    def test_closer_within_structure_child(self):

        self.compare_potential_pair('The dog arrived. The big dog saw him.', 1, False, 8, 2)

    def test_closer_within_structure_only_determiner(self):

        self.compare_potential_pair('The dog arrived. The dog saw him.', 1, False, 7, 1)

    def test_closer_within_structure_only_determiner_conjunction_first(self):

        self.compare_potential_pair('The dog and the cat arrived. The dog saw him.', 1, False,
            10, 1)

    def test_closer_within_structure_only_determiner_conjunction_second(self):

        self.compare_potential_pair('The dog and the cat arrived. The cat saw him.', 4, False,
            10, 1)

    def test_closer_within_structure_all_pronouns_control(self):

        self.compare_potential_pair('He arrived. He saw him.', 0, False, 5, 2)

    def test_consider_syntax_false(self):

        self.compare_potential_pair('He saw him.', 0, False, 2, 2, False)

    def test_consider_syntax_false_control(self):

        self.compare_potential_pair('He saw him', 0, False, 2, 0, True)

    def test_quotes(self):
        self.compare_potential_pair('"Richard is here", he said.', 1, False, 6, 1)

    def test_propn_subtree_beginning(self):
        doc = self.sm_nlp('Richard Hudson is here')
        self.assertEqual([], [t.i for t in self.sm_rules_analyzer.get_propn_subtree(doc[0])])
        self.assertEqual([0, 1], [t.i for t in self.sm_rules_analyzer.get_propn_subtree(doc[1])])
        self.assertEqual([], [t.i for t in self.sm_rules_analyzer.get_propn_subtree(doc[3])])

    def test_propn_subtree_middle(self):
        doc = self.sm_nlp('He spoke to Richard Hudson yesterday')
        self.assertEqual([], [t.i for t in self.sm_rules_analyzer.get_propn_subtree(doc[0])])
        self.assertEqual([], [t.i for t in self.sm_rules_analyzer.get_propn_subtree(doc[3])])
        self.assertEqual([3, 4], [t.i for t in self.sm_rules_analyzer.get_propn_subtree(doc[4])])
        self.assertEqual([], [t.i for t in self.sm_rules_analyzer.get_propn_subtree(doc[5])])

    def test_propn_subtree_end(self):
        doc = self.sm_nlp('He spoke to Richard Hudson')
        self.assertEqual([], [t.i for t in self.sm_rules_analyzer.get_propn_subtree(doc[0])])
        self.assertEqual([], [t.i for t in self.sm_rules_analyzer.get_propn_subtree(doc[3])])
        self.assertEqual([3, 4], [t.i for t in self.sm_rules_analyzer.get_propn_subtree(doc[4])])

    def test_propn_subtree_with_coordination(self):
        doc = self.sm_nlp('Richard Hudson and Peter Jones are here')
        self.assertEqual([], [t.i for t in self.sm_rules_analyzer.get_propn_subtree(doc[0])])
        self.assertEqual([0, 1], [t.i for t in self.sm_rules_analyzer.get_propn_subtree(doc[1])])
        self.assertEqual([], [t.i for t in self.sm_rules_analyzer.get_propn_subtree(doc[2])])
        self.assertEqual([], [t.i for t in self.sm_rules_analyzer.get_propn_subtree(doc[3])])
        self.assertEqual([3, 4], [t.i for t in self.sm_rules_analyzer.get_propn_subtree(doc[4])])

    def compare_potentially_referring(self, doc_text, expected_per_indexes, *,
        excluded_nlps=[]):

        def func(nlp):

            if nlp.meta['name'] in excluded_nlps:
                return
            doc = nlp(doc_text)
            rules_analyzer = RulesAnalyzerFactory.get_rules_analyzer(nlp)
            rules_analyzer.initialize(doc)
            per_indexes = [token.i for token in doc if
                    rules_analyzer.is_independent_noun(token)]
            self.assertEqual(expected_per_indexes, per_indexes, nlp.meta['name'])

        self.all_nlps(func)

    def test_has_morph_without_value(self):

        def func(nlp):

            doc = nlp('they')
            rules_analyzer = RulesAnalyzerFactory().get_rules_analyzer(nlp)
            rules_analyzer.initialize(doc)
            self.assertEqual(True, rules_analyzer.has_morph(doc[0], 'Number'), nlp.meta['name'])
            self.assertEqual(False, rules_analyzer.has_morph(doc[0], 'Other'), nlp.meta['name'])
        self.all_nlps(func)

    def test_has_morph_with_value(self):

        def func(nlp):

            doc = nlp('they')
            rules_analyzer = RulesAnalyzerFactory().get_rules_analyzer(nlp)
            rules_analyzer.initialize(doc)
            self.assertEqual(True, rules_analyzer.has_morph(
                doc[0], 'Number', 'Plur'), nlp.meta['name'])
            self.assertEqual(False, rules_analyzer.has_morph(
                doc[0], 'Number', 'Other'), nlp.meta['name'])
            self.assertEqual(False, rules_analyzer.has_morph(
                doc[0], 'Other', 'Other'), nlp.meta['name'])
        self.all_nlps(func)

    def compare_non_or_truths(self, doc_text, expected_trues):

        def func(nlp):

            doc = nlp(doc_text)
            rules_analyzer = RulesAnalyzerFactory().get_rules_analyzer(nlp)
            rules_analyzer.initialize(doc)
            non_or_truths = [token.i for token in doc
                if rules_analyzer.is_involved_in_non_or_conjunction(token)]
            self.assertEqual(expected_trues, non_or_truths, nlp.meta['name'])

        self.all_nlps(func)

    def test_is_involved_in_non_or_conjunction_and_conjunction(self):

        self.compare_non_or_truths('Richard and Christine went home', [0,2])

    def test_is_involved_in_non_or_conjunction_or_conjunction(self):

        self.compare_non_or_truths('Richard or Christine went home', [])

    def test_is_involved_in_non_or_conjunction_mixed_conjunction(self):

        self.compare_non_or_truths('Richard or Christine and Peter went home', [])

    def compare_potentially_introducing(self, doc_text, index, expected_truth, *,
            excluded_nlps=[]):

        def func(nlp):

            if nlp.meta['name'] in excluded_nlps:
                return
            doc = nlp(doc_text)
            rules_analyzer = RulesAnalyzerFactory.get_rules_analyzer(nlp)
            rules_analyzer.initialize(doc)
            self.assertEqual(expected_truth,
                rules_analyzer.is_potentially_introducing_noun(doc[index]),
                nlp.meta['name'])

        self.all_nlps(func)

    def test_potentially_introducing_not_noun(self):
        self.compare_potentially_introducing('I spoke to Peter', 2, False)

    def test_potentially_introducing_definite_noun(self):
        self.compare_potentially_introducing('I spoke to the man', 4, False)

    def test_potentially_introducing_indefinite_noun(self):
        self.compare_potentially_introducing('I spoke to a man', 4, True)

    def test_potentially_introducing_definite_noun_with_adjective(self):
        self.compare_potentially_introducing('I spoke to the big man', 5, True)

    def test_potentially_introducing_definite_noun_with_dependent_phrase(self):
        self.compare_potentially_introducing('I saw the man whom we had discussed', 3, True,
            excluded_nlps='core_web_sm')

    def test_potentially_introducing_common_noun_conjunction_first_member(self):
        self.compare_potentially_introducing('I spoke to a man and a woman', 4, True)

    def test_potentially_introducing_common_noun_conjunction_second_member(self):
        self.compare_potentially_introducing('I spoke to a man and a woman', 7, True)

    def compare_potentially_referring_back_noun(self, doc_text, index, expected_truth, *,
            excluded_nlps=[]):

        def func(nlp):

            if nlp.meta['name'] in excluded_nlps:
                return
            doc = nlp(doc_text)
            rules_analyzer = RulesAnalyzerFactory.get_rules_analyzer(nlp)
            rules_analyzer.initialize(doc)
            self.assertEqual(expected_truth,
                rules_analyzer.is_potentially_referring_back_noun(doc[index]),
                nlp.meta['name'])

        self.all_nlps(func)

    def test_potentially_referring_back_noun_not_noun(self):
        self.compare_potentially_referring_back_noun('I spoke to Peter', 2, False)

    def test_potentially_referring_back_noun_definite_noun(self):
        self.compare_potentially_referring_back_noun('I spoke to the man', 4, True)

    def test_potentially_referring_back_noun_indefinite_noun(self):
        self.compare_potentially_referring_back_noun('I spoke to a man', 4, False)

    def test_potentially_referring_back_noun_definite_noun_with_adjective(self):
        self.compare_potentially_referring_back_noun('I spoke to the big man', 5, False)

    def test_potentially_referring_back_noun_definite_noun_with_dependent_phrase(self):
        self.compare_potentially_referring_back_noun('I saw the man whom we had discussed', 3, False,
            excluded_nlps='core_web_sm')

    def test_potentially_referring_back_noun_common_noun_conjunction_first_member(self):
        self.compare_potentially_referring_back_noun('I spoke to the man and the woman', 4, True)

    def test_potentially_referring_back_noun_common_noun_conjunction_second_member(self):
        self.compare_potentially_referring_back_noun('I spoke to the man and the woman', 7, True)
Пример #8
0
class CommonTendenciesTest(unittest.TestCase):
    def setUp(self):

        warnings.filterwarnings("ignore",
                                message=r"\[W007\]",
                                category=UserWarning)

        nlps = get_nlps('en')
        for nlp in (nlp for nlp in nlps if nlp.meta['name'] == 'core_web_sm'):
            self.sm_nlp = nlp
        self.sm_rules_analyzer = RulesAnalyzerFactory().get_rules_analyzer(
            self.sm_nlp)
        sm_model_generator = ModelGenerator(self.sm_rules_analyzer,
                                            self.sm_nlp, self.sm_nlp)
        sm_doc = self.sm_nlp('Richard said he was entering the big house')
        self.sm_feature_table = sm_model_generator.generate_feature_table(
            [sm_doc])
        self.sm_tendencies_analyzer = TendenciesAnalyzer(
            self.sm_rules_analyzer, self.sm_nlp, self.sm_feature_table)

        for nlp in (nlp for nlp in nlps if nlp.meta['name'] == 'core_web_lg'):
            self.lg_nlp = nlp
        self.lg_rules_analyzer = RulesAnalyzerFactory().get_rules_analyzer(
            self.lg_nlp)
        lg_model_generator = ModelGenerator(self.lg_rules_analyzer,
                                            self.lg_nlp, self.lg_nlp)
        lg_doc = self.lg_nlp('Richard said he was entering the big house')
        self.lg_feature_table = lg_model_generator.generate_feature_table(
            [lg_doc])
        self.lg_tendencies_analyzer = TendenciesAnalyzer(
            self.lg_rules_analyzer, self.lg_nlp, self.lg_feature_table)

    def test_generate_feature_table(self):

        doc = self.sm_nlp('Richard said he was entering the big house')
        model_generator = ModelGenerator(self.sm_rules_analyzer, self.sm_nlp,
                                         self.sm_nlp)
        feature_table = model_generator.generate_feature_table([doc])
        self.assertEqual(
            {
                'tags': ['NN', 'NNP', 'PRP'],
                'morphs': [
                    'Case=Nom', 'Gender=Masc', 'NounType=Prop', 'Number=Sing',
                    'Person=3', 'PronType=Prs'
                ],
                'ent_types': ['', 'PERSON'],
                'lefthand_deps_to_children': ['amod', 'det'],
                'righthand_deps_to_children': [],
                'lefthand_deps_to_parents': ['nsubj'],
                'righthand_deps_to_parents': ['dobj'],
                'parent_tags': ['VBD', 'VBG'],
                'parent_morphs': [
                    'Aspect=Prog', 'Tense=Past', 'Tense=Pres', 'VerbForm=Fin',
                    'VerbForm=Part'
                ],
                'parent_lefthand_deps_to_children': ['aux', 'nsubj'],
                'parent_righthand_deps_to_children': ['ccomp', 'dobj']
            }, feature_table.__dict__)
        self.assertEqual(26, len(feature_table))

    def test_get_feature_map_simple_mention(self):

        doc = self.sm_nlp('Richard said he was entering the big house')
        self.sm_rules_analyzer.initialize(doc)
        mention = Mention(doc[0], False)
        feature_map = self.sm_tendencies_analyzer.get_feature_map(mention, doc)
        self.assertEqual(len(self.sm_feature_table), len(feature_map))
        self.assertEqual(mention.temp_feature_map, feature_map)
        self.assertEqual([
            0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0,
            0, 1, 1, 0
        ], feature_map)

        feature_map = self.sm_tendencies_analyzer.get_feature_map(
            Mention(doc[2], False), doc)
        self.assertEqual(len(self.sm_feature_table), len(feature_map))
        self.assertEqual([
            0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1,
            1, 1, 0, 1
        ], feature_map)

    def test_get_feature_map_simple_token(self):

        doc = self.sm_nlp('Richard said he was entering the big house')
        self.sm_rules_analyzer.initialize(doc)
        feature_map = self.sm_tendencies_analyzer.get_feature_map(doc[0], doc)
        self.assertEqual(len(self.sm_feature_table), len(feature_map))
        self.assertEqual(doc[0]._.coref_chains.temp_feature_map, feature_map)
        self.assertEqual([
            0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0,
            0, 1, 1, 0
        ], feature_map)

        feature_map = self.sm_tendencies_analyzer.get_feature_map(doc[2], doc)
        self.assertEqual(len(self.sm_feature_table), len(feature_map))
        self.assertEqual([
            0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1,
            1, 1, 0, 1
        ], feature_map)

    def test_get_feature_map_conjunction(self):

        doc = self.sm_nlp(
            'Richard and the man said they were entering the big house')
        self.sm_rules_analyzer.initialize(doc)
        feature_map = self.sm_tendencies_analyzer.get_feature_map(
            Mention(doc[0], False), doc)
        self.assertEqual(len(self.sm_feature_table), len(feature_map))
        self.assertEqual([
            0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0,
            0, 1, 1, 0
        ], feature_map)

        feature_map = self.sm_tendencies_analyzer.get_feature_map(
            Mention(doc[0], True), doc)
        self.assertEqual(len(self.sm_feature_table), len(feature_map))
        self.assertEqual([
            0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0,
            0, 1, 1, 0
        ], feature_map)

        feature_map = self.sm_tendencies_analyzer.get_feature_map(
            Mention(doc[5], False), doc)
        self.assertEqual(len(self.sm_feature_table), len(feature_map))
        self.assertEqual([
            0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1,
            1, 1, 0, 1
        ], feature_map)

    def test_get_position_map_first_sentence_token(self):

        doc = self.sm_nlp('Richard said he was entering the big house')
        self.sm_rules_analyzer.initialize(doc)
        position_map = self.sm_tendencies_analyzer.get_position_map(
            doc[0], doc)
        self.assertEqual(doc[0]._.coref_chains.temp_position_map, position_map)
        self.assertEqual([0, 1, 1, 0, 0, 0, 0], position_map)

        position_map = self.sm_tendencies_analyzer.get_position_map(
            doc[2], doc)
        self.assertEqual([2, 2, 2, 0, 0, 0, 0], position_map)

        position_map = self.sm_tendencies_analyzer.get_position_map(
            doc[6], doc)
        self.assertEqual([6, 3, 2, 1, 1, 0, 0], position_map)

    def test_get_position_map_first_sentence_mention(self):

        doc = self.sm_nlp('Richard said he was entering the big house')
        self.sm_rules_analyzer.initialize(doc)
        mention = Mention(doc[0], False)
        position_map = self.sm_tendencies_analyzer.get_position_map(
            mention, doc)
        self.assertEqual(mention.temp_position_map, position_map)
        self.assertEqual([0, 1, 1, 0, 0, 0, 0], position_map)

        position_map = self.sm_tendencies_analyzer.get_position_map(
            Mention(doc[2], False), doc)
        self.assertEqual([2, 2, 2, 0, 0, 0, 0], position_map)

        position_map = self.sm_tendencies_analyzer.get_position_map(
            Mention(doc[6], False), doc)
        self.assertEqual([6, 3, 2, 1, 1, 0, 0], position_map)

    def test_get_position_map_second_sentence_token(self):

        doc = self.sm_nlp(
            'This is a preceding sentence. Richard said he was entering the big house'
        )
        self.sm_rules_analyzer.initialize(doc)
        position_map = self.sm_tendencies_analyzer.get_position_map(
            doc[6], doc)
        self.assertEqual([0, 1, 1, 0, 0, 0, 0], position_map)

        position_map = self.sm_tendencies_analyzer.get_position_map(
            doc[8], doc)
        self.assertEqual([2, 2, 2, 0, 0, 0, 0], position_map)

    def test_get_position_map_second_sentence_mention(self):

        doc = self.sm_nlp(
            'This is a preceding sentence. Richard said he was entering the big house'
        )
        self.sm_rules_analyzer.initialize(doc)
        position_map = self.sm_tendencies_analyzer.get_position_map(
            Mention(doc[6], False), doc)
        self.assertEqual([0, 1, 1, 0, 0, 0, 0], position_map)

        position_map = self.sm_tendencies_analyzer.get_position_map(
            Mention(doc[8], False), doc)
        self.assertEqual([2, 2, 2, 0, 0, 0, 0], position_map)

    def test_get_position_map_root_token(self):

        doc = self.sm_nlp('Richard said he was entering the big house')
        self.sm_rules_analyzer.initialize(doc)
        position_map = self.sm_tendencies_analyzer.get_position_map(
            doc[1], doc)
        self.assertEqual([1, 0, 0, 0, -1, 0, 0], position_map)

    def test_get_position_map_root_mention(self):

        doc = self.sm_nlp('Richard said he was entering the big house')
        self.sm_rules_analyzer.initialize(doc)
        position_map = self.sm_tendencies_analyzer.get_position_map(
            Mention(doc[1], False), doc)
        self.assertEqual([1, 0, 0, 0, -1, 0, 0], position_map)

    def test_get_position_map_conjunction_first_sentence_tokens(self):

        doc = self.sm_nlp('Peter and Jane spoke to him and her.')
        self.sm_rules_analyzer.initialize(doc)
        position_map = self.sm_tendencies_analyzer.get_position_map(
            doc[0], doc)
        self.assertEqual([0, 1, 1, 0, 0, -1, 0], position_map)
        position_map = self.sm_tendencies_analyzer.get_position_map(
            doc[2], doc)
        self.assertEqual([2, 2, 1, 1, 1, -1, 1], position_map)
        position_map = self.sm_tendencies_analyzer.get_position_map(
            doc[5], doc)
        self.assertEqual([5, 2, 1, 2, 0, -1, 0], position_map)
        position_map = self.sm_tendencies_analyzer.get_position_map(
            doc[7], doc)
        self.assertEqual([7, 3, 1, 1, 1, -1, 1], position_map)

    def test_get_position_map_conjunction_first_sentence_mentions_false(self):

        doc = self.sm_nlp('Peter and Jane spoke to him and her.')
        self.sm_rules_analyzer.initialize(doc)
        position_map = self.sm_tendencies_analyzer.get_position_map(
            Mention(doc[0], False), doc)
        self.assertEqual([0, 1, 1, 0, 0, -1, 0], position_map)
        position_map = self.sm_tendencies_analyzer.get_position_map(
            Mention(doc[2], False), doc)
        self.assertEqual([2, 2, 1, 1, 1, -1, 1], position_map)
        position_map = self.sm_tendencies_analyzer.get_position_map(
            Mention(doc[5], False), doc)
        self.assertEqual([5, 2, 1, 2, 0, -1, 0], position_map)
        position_map = self.sm_tendencies_analyzer.get_position_map(
            Mention(doc[7], False), doc)
        self.assertEqual([7, 3, 1, 1, 1, -1, 1], position_map)

    def test_get_position_map_conjunction_second_sentence_mentions_false(self):
        doc = self.sm_nlp(
            'A preceding sentence. Peter and Jane spoke to him and her.')
        self.sm_rules_analyzer.initialize(doc)
        position_map = self.sm_tendencies_analyzer.get_position_map(
            Mention(doc[4], False), doc)
        self.assertEqual([0, 1, 1, 0, 0, -1, 0], position_map)
        position_map = self.sm_tendencies_analyzer.get_position_map(
            Mention(doc[6], False), doc)
        self.assertEqual([2, 2, 1, 1, 1, -1, 1], position_map)
        position_map = self.sm_tendencies_analyzer.get_position_map(
            Mention(doc[9], False), doc)
        self.assertEqual([5, 2, 1, 2, 0, -1, 0], position_map)
        position_map = self.sm_tendencies_analyzer.get_position_map(
            Mention(doc[11], False), doc)
        self.assertEqual([7, 3, 1, 1, 1, -1, 1], position_map)

    def test_get_position_map_conjunction_first_sentence_mentions_true(self):

        doc = self.sm_nlp('Peter and Jane spoke to him and her.')
        self.sm_rules_analyzer.initialize(doc)
        position_map = self.sm_tendencies_analyzer.get_position_map(
            Mention(doc[0], True), doc)
        self.assertEqual([0, 1, 1, 0, 0, 1, 0], position_map)
        position_map = self.sm_tendencies_analyzer.get_position_map(
            Mention(doc[5], True), doc)
        self.assertEqual([5, 2, 1, 2, 0, 1, 0], position_map)

    def test_get_position_map_conjunction_second_sentence_mentions_true(self):
        doc = self.sm_nlp(
            'A preceding sentence. Peter and Jane spoke to him and her.')
        self.sm_rules_analyzer.initialize(doc)
        position_map = self.sm_tendencies_analyzer.get_position_map(
            Mention(doc[4], True), doc)
        self.assertEqual([0, 1, 1, 0, 0, 1, 0], position_map)
        position_map = self.sm_tendencies_analyzer.get_position_map(
            Mention(doc[9], True), doc)
        self.assertEqual([5, 2, 1, 2, 0, 1, 0], position_map)

    def test_get_compatibility_map_simple(self):

        doc = self.sm_nlp('Richard said he was entering the big house')
        self.sm_rules_analyzer.initialize(doc)
        self.assertEqual(
            '[2, 0, 1, 0.30341318, 3]',
            str(
                self.sm_tendencies_analyzer.get_compatibility_map(
                    Mention(doc[0], False), doc[2])))

    def test_get_compatibility_map_coordination(self):

        doc = self.sm_nlp(
            'Richard and Jane said he was entering the big house')
        self.sm_rules_analyzer.initialize(doc)
        self.assertEqual(
            '[4, 0, 1, 0.23422348, 3]',
            str(
                self.sm_tendencies_analyzer.get_compatibility_map(
                    Mention(doc[0], True), doc[4])))

    def test_get_compatibility_map_different_sentences(self):

        doc = self.sm_nlp(
            'Richard called. He said he was entering the big house')
        self.sm_rules_analyzer.initialize(doc)
        self.assertEqual(
            '[3, 1, 0, 0.507851, 6]',
            str(
                self.sm_tendencies_analyzer.get_compatibility_map(
                    Mention(doc[0], False), doc[3])))

    def test_get_compatibility_map_same_sentence_no_governance(self):

        doc = self.sm_nlp(
            'After Richard arrived, he said he was entering the big house')
        self.sm_rules_analyzer.initialize(doc)
        self.assertEqual(
            '[4, 0, 0, 0.045050576, 5]',
            str(
                self.sm_tendencies_analyzer.get_compatibility_map(
                    Mention(doc[0], False), doc[4])))

    def test_get_compatibility_map_same_sentence_lefthand_sibling_governance(
            self):

        doc = self.lg_nlp(
            'Richard said Peter and he were entering the big house')
        self.lg_rules_analyzer.initialize(doc)
        self.assertEqual(
            '[4, 0, 1, 0.15999001, 3]',
            str(
                self.sm_tendencies_analyzer.get_compatibility_map(
                    Mention(doc[0], False), doc[4])))

    def test_get_compatibility_map_same_sentence_lefthand_sibling_no_governance(
            self):

        doc = self.sm_nlp(
            'After Richard arrived, Peter and he said he was entering the big house'
        )
        self.sm_rules_analyzer.initialize(doc)
        self.assertEqual(
            '[5, 0, 0, 0.41453412, 1]',
            str(
                self.sm_tendencies_analyzer.get_compatibility_map(
                    Mention(doc[1], False), doc[6])))

    def test_get_cosine_similarity_lg(self):

        doc = self.lg_nlp(
            'After Richard arrived, he said he was entering the big house')
        self.lg_rules_analyzer.initialize(doc)
        self.assertEqual(
            '[4, 0, 0, 0.3336621, 5]',
            str(
                self.lg_tendencies_analyzer.get_compatibility_map(
                    Mention(doc[0], False), doc[4])))

    def test_get_cosine_similarity_lg_no_vector_1(self):

        doc = self.lg_nlp(
            'After Richard arfewfewfrived, he said he was entering the big house'
        )
        self.lg_rules_analyzer.initialize(doc)

        self.assertEqual(
            '[4, 0, 0, 0.59521705, 5]',
            str(
                self.lg_tendencies_analyzer.get_compatibility_map(
                    Mention(doc[0], False), doc[4])))

    def test_get_cosine_similarity_lg_no_vector_2(self):

        doc = self.lg_nlp(
            'After Richard arrived, he saifefefwefefd he was entering the big house'
        )
        self.lg_rules_analyzer.initialize(doc)
        self.assertEqual(
            '[4, 0, 0, 0.4391515, 2]',
            str(
                self.lg_tendencies_analyzer.get_compatibility_map(
                    Mention(doc[0], False), doc[4])))

    def test_get_cosine_similarity_sm_root_1(self):

        doc = self.sm_nlp('Richard. He said he was entering the big house')
        self.sm_rules_analyzer.initialize(doc)

        self.assertEqual(
            '[2, 1, 0, -1, 1]',
            str(
                self.sm_tendencies_analyzer.get_compatibility_map(
                    Mention(doc[0], False), doc[2])))

    def test_get_cosine_similarity_sm_root_2(self):

        doc = self.sm_nlp('Richard arrived. He.')
        self.sm_rules_analyzer.initialize(doc)
        self.assertEqual(
            '[3, 1, 0, -1, 1]',
            str(
                self.sm_tendencies_analyzer.get_compatibility_map(
                    Mention(doc[0], False), doc[3])))

    def test_get_cosine_similarity_lg_root_1(self):

        doc = self.lg_nlp('Richard. He said he was entering the big house')
        self.lg_rules_analyzer.initialize(doc)

        self.assertEqual(
            '[2, 1, 0, -1, 1]',
            str(
                self.lg_tendencies_analyzer.get_compatibility_map(
                    Mention(doc[0], False), doc[2])))

    def test_get_cosine_similarity_lg_root_2(self):

        doc = self.lg_nlp('Richard arrived. He.')
        self.lg_rules_analyzer.initialize(doc)
        self.assertEqual(
            '[3, 1, 0, -1, 1]',
            str(
                self.lg_tendencies_analyzer.get_compatibility_map(
                    Mention(doc[0], False), doc[3])))

    def test_get_vectors_token_with_head_sm(self):

        doc = self.sm_nlp('He arrived')
        self.sm_rules_analyzer.initialize(doc)
        vectors = self.sm_tendencies_analyzer.get_vectors(doc[0], doc)
        self.assertTrue(vectors[0].any())
        self.assertTrue(vectors[1].any())
        self.assertEqual(len(vectors[0]), len(vectors[1]))
        self.assertEqual(vectors, doc[0]._.coref_chains.temp_vectors)

    def test_get_vectors_token_without_head_sm(self):

        doc = self.sm_nlp('He arrived')
        self.sm_rules_analyzer.initialize(doc)
        vectors = self.sm_tendencies_analyzer.get_vectors(doc[1], doc)
        self.assertTrue(vectors[0].any())
        self.assertFalse(vectors[1].any())
        self.assertEqual(len(vectors[0]), len(vectors[1]))
        self.assertEqual(vectors, doc[1]._.coref_chains.temp_vectors)

    def test_get_vectors_token_with_head_lg(self):

        doc = self.lg_nlp('He arrived')
        self.lg_rules_analyzer.initialize(doc)
        vectors = self.lg_tendencies_analyzer.get_vectors(doc[0], doc)
        self.assertTrue(vectors[0].any())
        self.assertTrue(vectors[1].any())
        self.assertEqual(len(vectors[0]), len(vectors[1]))
        self.assertEqual(vectors, doc[0]._.coref_chains.temp_vectors)

    def test_get_vectors_token_without_head_lg(self):

        doc = self.lg_nlp('He arrived')
        self.lg_rules_analyzer.initialize(doc)
        vectors = self.lg_tendencies_analyzer.get_vectors(doc[1], doc)
        self.assertTrue(vectors[0].any())
        self.assertFalse(vectors[1].any())
        self.assertEqual(len(vectors[0]), len(vectors[1]))
        self.assertEqual(vectors, doc[1]._.coref_chains.temp_vectors)

    def test_get_vectors_mention_with_head_sm(self):

        doc = self.sm_nlp('He arrived')
        self.sm_rules_analyzer.initialize(doc)
        mention = Mention(doc[0], False)
        vectors = self.sm_tendencies_analyzer.get_vectors(mention, doc)
        self.assertTrue(vectors[0].any())
        self.assertTrue(vectors[1].any())
        self.assertEqual(len(vectors[0]), len(vectors[1]))
        self.assertEqual(vectors, mention.temp_vectors)

    def test_get_vectors_mention_without_head_sm(self):

        doc = self.sm_nlp('He arrived')
        self.sm_rules_analyzer.initialize(doc)
        vectors = self.sm_tendencies_analyzer.get_vectors(
            Mention(doc[1], False), doc)
        self.assertTrue(vectors[0].any())
        self.assertFalse(vectors[1].any())
        self.assertEqual(len(vectors[0]), len(vectors[1]))

    def test_get_vectors_mention_with_head_lg(self):

        doc = self.lg_nlp('He arrived')
        self.lg_rules_analyzer.initialize(doc)
        vectors = self.lg_tendencies_analyzer.get_vectors(
            Mention(doc[0], False), doc)
        self.assertTrue(vectors[0].any())
        self.assertTrue(vectors[1].any())
        self.assertEqual(len(vectors[0]), len(vectors[1]))

    def test_get_vectors_mention_without_head_lg(self):

        doc = self.lg_nlp('He arrived')
        self.lg_rules_analyzer.initialize(doc)
        vectors = self.lg_tendencies_analyzer.get_vectors(
            Mention(doc[1], False), doc)
        self.assertTrue(vectors[0].any())
        self.assertFalse(vectors[1].any())
        self.assertEqual(len(vectors[0]), len(vectors[1]))

    def test_vectors_twoway_coordination_sm(self):
        doc = self.sm_nlp('Peter and Jane arrived')
        self.sm_rules_analyzer.initialize(doc)
        peter_vectors = self.sm_tendencies_analyzer.get_vectors(
            Mention(doc[0], False), doc)
        jane_vectors = self.sm_tendencies_analyzer.get_vectors(
            Mention(doc[2], False), doc)
        combined_vectors = self.sm_tendencies_analyzer.get_vectors(
            Mention(doc[0], True), doc)
        for index in range(len(peter_vectors[0])):
            self.assertAlmostEqual(
                (peter_vectors[0][index] + jane_vectors[0][index]) / 2,
                combined_vectors[0][index])

    def test_vectors_twoway_coordination_lg(self):
        doc = self.lg_nlp('Peter and Jane arrived')
        self.lg_rules_analyzer.initialize(doc)
        peter_vectors = self.lg_tendencies_analyzer.get_vectors(
            Mention(doc[0], False), doc)
        jane_vectors = self.lg_tendencies_analyzer.get_vectors(
            Mention(doc[2], False), doc)
        combined_vectors = self.lg_tendencies_analyzer.get_vectors(
            Mention(doc[0], True), doc)
        for index in range(len(peter_vectors[0])):
            self.assertAlmostEqual(
                (peter_vectors[0][index] + jane_vectors[0][index]) / 2,
                combined_vectors[0][index])

    def test_vectors_threeway_coordination_sm(self):
        doc = self.sm_nlp('Richard, Peter and Jane arrived')
        self.sm_rules_analyzer.initialize(doc)
        richard_vectors = self.sm_tendencies_analyzer.get_vectors(
            Mention(doc[0], False), doc)
        peter_vectors = self.sm_tendencies_analyzer.get_vectors(
            Mention(doc[2], False), doc)
        jane_vectors = self.sm_tendencies_analyzer.get_vectors(
            Mention(doc[4], False), doc)
        combined_vectors = self.sm_tendencies_analyzer.get_vectors(
            Mention(doc[0], True), doc)
        for index in range(len(peter_vectors[0])):
            self.assertAlmostEqual(
                (peter_vectors[0][index] + jane_vectors[0][index] +
                 richard_vectors[0][index]) / 3,
                combined_vectors[0][index],
                places=3)

    def test_vectors_threeway_coordination_lg(self):
        doc = self.lg_nlp('They spoke to Richard, Peter and Jane.')
        self.lg_rules_analyzer.initialize(doc)
        richard_vectors = self.lg_tendencies_analyzer.get_vectors(
            Mention(doc[3], False), doc)
        peter_vectors = self.lg_tendencies_analyzer.get_vectors(
            Mention(doc[5], False), doc)
        jane_vectors = self.lg_tendencies_analyzer.get_vectors(
            Mention(doc[7], False), doc)
        combined_vectors = self.lg_tendencies_analyzer.get_vectors(
            Mention(doc[3], True), doc)
        for index in range(len(peter_vectors[0])):
            self.assertAlmostEqual(
                (peter_vectors[0][index] + jane_vectors[0][index] +
                 richard_vectors[0][index]) / 3,
                combined_vectors[0][index],
                places=3)