Пример #1
0
    def test_1(self):
        frame_occurrence = VerbnetFrameOccurrence([{'elem': 'NP'}, {'elem': 'V'}, {'elem': 'NP'}, {'elem': 'with'}, {'elem': 'NP'}], 3, 'a predicate')
        frame2 = VerbnetOfficialFrame('Class 1', [
            {'elem': 'NP', 'role': 'Agent'},
            {'elem': 'V'},
            {'elem': 'NP', 'role': 'Patient'},
            {'elem': 'for'}, {'elem': 'NP', 'role': 'Role1'}])
        frame3 = VerbnetOfficialFrame('Class 1', [
            {'elem': 'NP', 'role': 'Agent'},
            {'elem': 'V'},
            {'elem': 'NP', 'role': 'Patient'},
            {'elem': 'with'}, {'elem': 'NP', 'role': 'Role2'}])
        frame4 = VerbnetOfficialFrame('Class 2', [
            {'elem': 'NP', 'role': 'Agent'},
            {'elem': 'V'},
            {'elem': 'NP', 'role': 'Patient'},
            {'elem': 'with'}, {'elem': 'NP', 'role': 'Role3'}])

        matcher = FrameMatcher(frame_occurrence, 'sync_predicates')
        best_score = matcher.perform_frame_matching([frame2])
        self.assertEqual(best_score, int(100 * 4 / 3))
        best_score = matcher.perform_frame_matching([frame3, frame4])
        self.assertEqual(best_score, 200)
        self.assertEqual(frame_occurrence.possible_roles(), [{'Agent'}, {'Patient'}, {'Role2', 'Role3'}])
        self.assertEqual(frame_occurrence.roles, [{'Agent'}, {'Patient'}, {'Role2', 'Role3'}])
    def test_annotated_chunks(self):
        tony_hall_gold_frame = tony_hall_frame_instances[0]
        tony_hall_gold_frame_chunks = [
            {'phrase_type': 'NP', 'type': 'arg', 'text': 'Rep . Tony Hall , D- Ohio'},
            {'text': '', 'type': 'text'},
            {'type': 'verb', 'text': 'urges'},
            {'text': '', 'type': 'text'},
            {'phrase_type': 'NP', 'type': 'arg', 'text': 'the United Nations'},
            {'text': '', 'type': 'text'},
            {'phrase_type': 'to S', 'type': 'arg', 'text': 'to allow a freer flow of food and medicine into Iraq'}]

        self.assertEqual(list(VerbnetFrameOccurrence.annotated_chunks(tony_hall_gold_frame, tony_hall_gold_frame.sentence)), tony_hall_gold_frame_chunks)

        without_subject = FrameInstance(
            "Rep . Tony Hall , D- Ohio , urges the United Nations to allow"
            " a freer flow of food and medicine into Iraq .",
            Predicate(28, 32, "urges", "urge"),
            [
                Arg(34, 51, "the United Nations", "Addressee", True, "NP"),
                Arg(53, 104,
                    "to allow a freer flow of food and medicine into Iraq",
                    "Content", True, "VPto"),
            ], [], "XXX")
        without_subject_chunks = [
            {'type': 'text', 'text': 'Rep . Tony Hall , D- Ohio ,'},
            {'type': 'verb', 'text': 'urges'},
            {'text': '', 'type': 'text'},
            {'phrase_type': 'NP', 'type': 'arg', 'text': 'the United Nations'},
            {'text': '', 'type': 'text'},
            {'phrase_type': 'to S', 'type': 'arg', 'text': 'to allow a freer flow of food and medicine into Iraq'}]

        self.assertEqual(list(VerbnetFrameOccurrence.annotated_chunks(without_subject, without_subject.sentence)), without_subject_chunks)
    def test_conversion(self):
        vn_frames = [
            VerbnetFrameOccurrence(
                [{'elem': 'NP'}, {'elem': 'V'}, {'elem': 'NP'}, {'elem': 'to'}, {'elem': 'S'}], 3, predicate="urge"),
            VerbnetFrameOccurrence([{'elem': 'NP'}, {'elem': 'V'}, {'elem': 'NP'}], 2, predicate="allow"),
        ]
        slot_preps = [
            [None, None, "to"],
            [None, None],
            [None, None, "in", None, "for", None, "after"]
        ]
        st = ComputeSlotTypeMixin.slot_types
        slot_types = [
            [st["subject"], st["object"], st["prep_object"]],
            [st["subject"], st["object"]],
            [st["subject"], st["subject"], st["prep_object"], st["object"],
             st["prep_object"], st["indirect_object"], st["prep_object"]]
        ]

        verbnet_frame = VerbnetFrameOccurrence.build_from_frame(tony_hall_frame_instances[0], None)
        self.assertEqual(vn_frames[0], verbnet_frame)
        self.assertEqual(verbnet_frame.slot_types, slot_types[0])
        self.assertEqual(verbnet_frame.slot_preps, slot_preps[0])

        verbnet_frame = VerbnetFrameOccurrence.build_from_frame(tony_hall_frame_instances[1], conll_frame_instance=None)
        self.assertEqual(vn_frames[1], verbnet_frame)
        self.assertEqual(verbnet_frame.slot_types, slot_types[1])
        self.assertEqual(verbnet_frame.slot_preps, slot_preps[1])
Пример #4
0
    def _matching_sync_predicates(self, verbnet_frame, slots_associations):
        """ Stop the algorithm at the first mismatch encountered after the verb,
        restart at the verb's position if a mismatch is encountered before the
        verb """

        num_match = 0
        i, j = 0, 0
        index_v_in_frame_occurrence = self.frame_occurrence.structure.index(
            {'elem': 'V'})
        index_v_in_official_frame = verbnet_frame.syntax.index({'elem': 'V'})
        slot_1, slot_2 = 0, 0
        num_slots_before_v_in_frame_occurrence = 0
        num_slots_before_v_in_official_frame = 0

        for part in self.frame_occurrence.structure:
            if VerbnetFrameOccurrence._is_a_slot(part):
                num_slots_before_v_in_frame_occurrence += 1
            elif part['elem'] == "V":
                break

        for part in verbnet_frame.syntax:
            if 'role' in part:
                num_slots_before_v_in_official_frame += 1
            elif part['elem'] == "V":
                break

        while i < len(self.frame_occurrence.structure) and j < len(
                verbnet_frame.syntax):
            occured_part = self.frame_occurrence.structure[i]
            official_part = verbnet_frame.syntax[j]

            if FrameMatcher._is_a_match(occured_part, official_part):
                if VerbnetFrameOccurrence._is_a_slot(occured_part):
                    num_match += 1
                    # TODO this is probably fixed with the SYNTAX-based VN reader
                    # verbnet_frame can have more syntax than roles.This will
                    # for instance happen in the "NP V NP S_INF" syntax of
                    # want-32.1, where S_INF is given no role since it's part
                    # of the NP
                    if slot_2 < verbnet_frame.num_slots:
                        slots_associations[slot_1] = slot_2
                        slot_1, slot_2 = slot_1 + 1, slot_2 + 1
            # no match, but not seen the verb everywhere yet
            elif i < index_v_in_frame_occurrence or j < index_v_in_official_frame:
                # If we have not encountered the verb yet, we continue the matching
                # with everything that follows the verb
                # This is for instance to prevent a "NP NP V" construct
                # from interrupting the matching early
                i, j = index_v_in_frame_occurrence, index_v_in_official_frame
                slot_1 = num_slots_before_v_in_frame_occurrence
                slot_2 = num_slots_before_v_in_official_frame
            else:
                break

            i, j = i + 1, j + 1

        return num_match, slots_associations
Пример #5
0
    def _matching_sync_predicates(self, verbnet_frame, slots_associations):
        """ Stop the algorithm at the first mismatch encountered after the verb,
        restart at the verb's position if a mismatch is encountered before the
        verb """

        num_match = 0
        i, j = 0, 0
        index_v_in_frame_occurrence = self.frame_occurrence.structure.index({'elem': 'V'})
        index_v_in_official_frame = verbnet_frame.syntax.index({'elem': 'V'})
        slot_1, slot_2 = 0, 0
        num_slots_before_v_in_frame_occurrence = 0
        num_slots_before_v_in_official_frame = 0

        for part in self.frame_occurrence.structure:
            if VerbnetFrameOccurrence._is_a_slot(part):
                num_slots_before_v_in_frame_occurrence += 1
            elif part['elem'] == "V":
                break

        for part in verbnet_frame.syntax:
            if 'role' in part:
                num_slots_before_v_in_official_frame += 1
            elif part['elem'] == "V":
                break

        while i < len(self.frame_occurrence.structure) and j < len(verbnet_frame.syntax):
            occured_part = self.frame_occurrence.structure[i]
            official_part = verbnet_frame.syntax[j]

            if FrameMatcher._is_a_match(occured_part, official_part):
                if VerbnetFrameOccurrence._is_a_slot(occured_part):
                    num_match += 1
                    # TODO this is probably fixed with the SYNTAX-based VN reader
                    # verbnet_frame can have more syntax than roles.This will
                    # for instance happen in the "NP V NP S_INF" syntax of
                    # want-32.1, where S_INF is given no role since it's part
                    # of the NP
                    if slot_2 < verbnet_frame.num_slots:
                        slots_associations[slot_1] = slot_2
                        slot_1, slot_2 = slot_1 + 1, slot_2 + 1
            # no match, but not seen the verb everywhere yet
            elif i < index_v_in_frame_occurrence or j < index_v_in_official_frame:
                # If we have not encountered the verb yet, we continue the matching
                # with everything that follows the verb
                # This is for instance to prevent a "NP NP V" construct
                # from interrupting the matching early
                i, j = index_v_in_frame_occurrence, index_v_in_official_frame
                slot_1 = num_slots_before_v_in_frame_occurrence
                slot_2 = num_slots_before_v_in_official_frame
            else:
                break

            i, j = i + 1, j + 1

        return num_match, slots_associations
Пример #6
0
    def test_present_that(self):
        frame_occurrence = VerbnetFrameOccurrence([{
            'elem': 'NP'
        }, {
            'elem': 'V'
        }, {
            'elem': 'that'
        }, {
            'elem': 'S'
        }], 2, 'consider')
        matcher = FrameMatcher(frame_occurrence, 'sync_predicates')

        best_score = matcher.perform_frame_matching([
            VerbnetOfficialFrame('consider-29.9-1', [{
                'elem': 'NP',
                'role': 'Agent'
            }, {
                'elem': 'V'
            }, {
                'elem': 'that'
            }, {
                'elem': 'S',
                'role': 'Patient'
            }])
        ])

        self.assertEqual(best_score, 200)
        self.assertEqual(frame_occurrence.roles, [{'Agent'}, {'Patient'}])
Пример #7
0
    def test_3(self):
        frame_occurrence = VerbnetFrameOccurrence([{
            'elem': 'NP'
        }, {
            'elem': 'V'
        }, {
            'elem': 'with'
        }, {
            'elem': 'NP'
        }], 2, 'a predicate')
        frame = VerbnetOfficialFrame('c', [{
            'elem': 'NP',
            'role': 'Agent'
        }, {
            'elem': 'V'
        }, {
            'elem': 'NP',
            'role': 'Patient'
        }, {
            'elem': 'with'
        }, {
            'elem': 'NP',
            'role': 'Role3'
        }])

        matcher = FrameMatcher(frame_occurrence, 'sync_predicates')
        best_score = matcher.perform_frame_matching([frame])
        self.assertEqual(best_score, int(100 / 2 + 100 / 3))
Пример #8
0
    def test_conversion(self):
        vn_frames = [
            VerbnetFrameOccurrence([{
                'elem': 'NP'
            }, {
                'elem': 'V'
            }, {
                'elem': 'NP'
            }, {
                'elem': 'to'
            }, {
                'elem': 'S'
            }],
                                   3,
                                   predicate="urge"),
            VerbnetFrameOccurrence([{
                'elem': 'NP'
            }, {
                'elem': 'V'
            }, {
                'elem': 'NP'
            }],
                                   2,
                                   predicate="allow"),
        ]
        slot_preps = [[None, None, "to"], [None, None],
                      [None, None, "in", None, "for", None, "after"]]
        st = ComputeSlotTypeMixin.slot_types
        slot_types = [[st["subject"], st["object"], st["prep_object"]],
                      [st["subject"], st["object"]],
                      [
                          st["subject"], st["subject"], st["prep_object"],
                          st["object"], st["prep_object"],
                          st["indirect_object"], st["prep_object"]
                      ]]

        verbnet_frame = VerbnetFrameOccurrence.build_from_frame(
            tony_hall_frame_instances[0], None)
        self.assertEqual(vn_frames[0], verbnet_frame)
        self.assertEqual(verbnet_frame.slot_types, slot_types[0])
        self.assertEqual(verbnet_frame.slot_preps, slot_preps[0])

        verbnet_frame = VerbnetFrameOccurrence.build_from_frame(
            tony_hall_frame_instances[1], conll_frame_instance=None)
        self.assertEqual(vn_frames[1], verbnet_frame)
        self.assertEqual(verbnet_frame.slot_types, slot_types[1])
        self.assertEqual(verbnet_frame.slot_preps, slot_preps[1])
Пример #9
0
    def _matching_stop_on_fail(self, verbnet_frame, slots_associations):
        """ Stop the algorithm at the first mismatch encountered """
        num_match = 0
        for occured_part, official_part in zip(self.frame_occurrence.structure, verbnet_frame.syntax):
            if FrameMatcher._is_a_match(occured_part, official_part):
                if VerbnetFrameOccurrence._is_a_slot(occured_part):
                    num_match += 1
                    if num_match - 1 < verbnet_frame.num_slots:
                        slots_associations[num_match - 1] = num_match - 1
            else:
                break

        return num_match, slots_associations
Пример #10
0
    def _matching_stop_on_fail(self, verbnet_frame, slots_associations):
        """ Stop the algorithm at the first mismatch encountered """
        num_match = 0
        for occured_part, official_part in zip(self.frame_occurrence.structure,
                                               verbnet_frame.syntax):
            if FrameMatcher._is_a_match(occured_part, official_part):
                if VerbnetFrameOccurrence._is_a_slot(occured_part):
                    num_match += 1
                    if num_match - 1 < verbnet_frame.num_slots:
                        slots_associations[num_match - 1] = num_match - 1
            else:
                break

        return num_match, slots_associations
Пример #11
0
    def test_baseline_alg(self):
        frame_occurrence = VerbnetFrameOccurrence([{
            'elem': 'NP'
        }, {
            'elem': 'V'
        }, {
            'elem': 'NP'
        }, {
            'elem': 'NP'
        }, {
            'elem': 'for'
        }, {
            'elem': 'NP'
        }], 4, 'a predicate')

        verbnet_frames = [
            VerbnetOfficialFrame('XX', [{
                'elem': 'NP',
                'role': 'R1'
            }, {
                'elem': 'V'
            }, {
                'elem': 'NP',
                'role': 'R2'
            }, {
                'elem': 'by'
            }, {
                'elem': 'NP',
                'role': 'R3'
            }]),
            VerbnetOfficialFrame('XX', [{
                'elem': 'NP',
                'role': 'R1'
            }, {
                'elem': 'V'
            }, {
                'elem': 'NP',
                'role': 'R4'
            }, {
                'elem': {'for', 'as'}
            }, {
                'elem': 'NP',
                'role': 'R5'
            }])
        ]
        matcher = FrameMatcher(frame_occurrence, 'baseline')
        matcher.perform_frame_matching(verbnet_frames)
        self.assertEqual(frame_occurrence.roles,
                         [{'R1'}, {'R4'}, set(), {'R5'}])
Пример #12
0
    def test_2(self):
        frame_occurrence = VerbnetFrameOccurrence([{
            'elem': 'to'
        }, {
            'elem': 'be'
        }], 0, 'a predicate')
        frame = VerbnetOfficialFrame('c', [{
            'elem': 'NP',
            'role': 'Agent'
        }, {
            'elem': 'V'
        }, {
            'elem': 'NP',
            'role': 'Patient'
        }, {
            'elem': 'with'
        }, {
            'elem': 'NP',
            'role': 'Role3'
        }])

        self.assertEqual(frame_occurrence.num_slots, 0)
Пример #13
0
    def test_4(self):
        frame_occurrence = VerbnetFrameOccurrence([{
            'elem': 'NP'
        }, {
            'elem': 'V'
        }, {
            'elem': 'NP'
        }], 2, 'a predicate')
        matcher = FrameMatcher(frame_occurrence, 'sync_predicates')
        verbnet_frames = [
            VerbnetOfficialFrame('XX', [{
                'elem': 'NP',
                'role': 'Agent'
            }, {
                'elem': 'V'
            }, {
                'elem': 'NP',
                'role': 'Theme'
            }]),
            VerbnetOfficialFrame('XX', [{
                'elem': 'NP',
                'role': 'Agent'
            }, {
                'elem': 'V'
            }, {
                'elem': 'NP',
                'role': 'Theme'
            }]),
            VerbnetOfficialFrame('XX', [{
                'elem': 'NP',
                'role': 'Theme'
            }, {
                'elem': 'V'
            }]),
            VerbnetOfficialFrame('XX', [{
                'elem': 'NP',
                'role': 'Agent'
            }, {
                'elem': 'V'
            }, {
                'elem': 'NP',
                'role': 'Theme'
            }]),
            VerbnetOfficialFrame('XX', [{
                'elem': 'NP',
                'role': 'Theme'
            }, {
                'elem': 'V'
            }, {
                'elem': 'with'
            }, {
                'elem': 'NP',
                'role': 'Instrument'
            }]),
            VerbnetOfficialFrame('XX', [{
                'elem': 'NP',
                'role': 'Agent'
            }, {
                'elem': 'V'
            }, {
                'elem': 'NP',
                'role': 'Theme'
            }, {
                'elem': 'with'
            }, {
                'elem': 'NP',
                'role': 'Instrument'
            }]),
            VerbnetOfficialFrame('XX', [{
                'elem': 'NP',
                'role': 'Instrument'
            }, {
                'elem': 'V'
            }, {
                'elem': 'NP',
                'role': 'Theme'
            }])
        ]
        matcher.perform_frame_matching(verbnet_frames)

        self.assertEqual(frame_occurrence.roles,
                         [{'Agent', 'Instrument'}, {'Theme'}])
Пример #14
0
def get_frames(corpus, verbnet_classes, frameNet, argid=False):
    """ Fills two list of the same size with content dependent of the kind of input
    
    The two lists are annotation_list and parsed_conll_list
    """
    logger = logging.getLogger(__name__)
    logger.setLevel(options.Options.loglevel)
    logger.debug("get_frames corpus={} input={}".format(
        corpus, options.Options.conll_input))

    if options.Options.conll_input is not None:
        annotation_list = [None]
        parsed_conll_list = [Path(options.Options.conll_input)]
    elif options.Options.corpus == 'FrameNet':
        annotation_list = options.Options.fulltext_annotations
        parsed_conll_list = options.Options.fulltext_parses
    elif options.Options.corpus == 'dicoinfo_fr':
        pass
    else:
        raise Exception('Unknown corpus {}'.format(corpus))

    if options.Options.corpus == 'FrameNet':
        logger.info("Loading FrameNet and VerbNet role mappings %s ..." %
                    paths.Paths.VNFN_MATCHING)
        role_matcher = rolematcher.VnFnRoleMatcher(paths.Paths.VNFN_MATCHING,
                                                   frameNet)

        for annotation_file, parsed_conll_file in zip(annotation_list,
                                                      parsed_conll_list):
            logger.debug("Handling {} {}".format(annotation_file,
                                                 parsed_conll_file))
            file_stem = annotation_file.stem if annotation_file else parsed_conll_file.stem
            annotated_frames = []
            vn_frames = []
            conllparsed_reader = ConllParsedReader()

            if argid:
                logger.debug("Argument identification")
                #
                # Argument identification
                #
                arg_guesser = argguesser.ArgGuesser(verbnet_classes)

                # Many instances are not actually FrameNet frames
                new_frame_instances = list(
                    arg_guesser.frame_instances_from_file(
                        conllparsed_reader.sentence_trees(parsed_conll_file),
                        parsed_conll_file))
                new_annotated_frames = roleextractor.fill_gold_roles(
                    new_frame_instances, annotation_file, parsed_conll_file,
                    verbnet_classes, role_matcher)

                for gold_frame, frame_instance in zip(new_annotated_frames,
                                                      new_frame_instances):
                    annotated_frames.append(gold_frame)
                    vn_frames.append(
                        VerbnetFrameOccurrence.build_from_frame(
                            gold_frame, conll_frame_instance=frame_instance))
            else:
                logger.info("Load gold arguments")
                #
                # Load gold arguments
                #
                fn_reader = FNAllReader(
                    add_non_core_args=options.Options.add_non_core_args)

                for framenet_instance in fn_reader.iter_frames(
                        annotation_file, parsed_conll_file):
                    annotated_frames.append(framenet_instance)
                    vn_frames.append(
                        VerbnetFrameOccurrence.build_from_frame(
                            framenet_instance, conll_frame_instance=None))

                stats.stats_data["files"] += fn_reader.stats["files"]

            yield annotated_frames, vn_frames
Пример #15
0
    def test_1(self):
        frame_occurrence = VerbnetFrameOccurrence([{
            'elem': 'NP'
        }, {
            'elem': 'V'
        }, {
            'elem': 'NP'
        }, {
            'elem': 'with'
        }, {
            'elem': 'NP'
        }], 3, 'a predicate')
        frame2 = VerbnetOfficialFrame('Class 1', [{
            'elem': 'NP',
            'role': 'Agent'
        }, {
            'elem': 'V'
        }, {
            'elem': 'NP',
            'role': 'Patient'
        }, {
            'elem': 'for'
        }, {
            'elem': 'NP',
            'role': 'Role1'
        }])
        frame3 = VerbnetOfficialFrame('Class 1', [{
            'elem': 'NP',
            'role': 'Agent'
        }, {
            'elem': 'V'
        }, {
            'elem': 'NP',
            'role': 'Patient'
        }, {
            'elem': 'with'
        }, {
            'elem': 'NP',
            'role': 'Role2'
        }])
        frame4 = VerbnetOfficialFrame('Class 2', [{
            'elem': 'NP',
            'role': 'Agent'
        }, {
            'elem': 'V'
        }, {
            'elem': 'NP',
            'role': 'Patient'
        }, {
            'elem': 'with'
        }, {
            'elem': 'NP',
            'role': 'Role3'
        }])

        matcher = FrameMatcher(frame_occurrence, 'sync_predicates')
        best_score = matcher.perform_frame_matching([frame2])
        self.assertEqual(best_score, int(100 * 4 / 3))
        best_score = matcher.perform_frame_matching([frame3, frame4])
        self.assertEqual(best_score, 200)
        self.assertEqual(frame_occurrence.possible_roles(),
                         [{'Agent'}, {'Patient'}, {'Role2', 'Role3'}])
        self.assertEqual(frame_occurrence.roles,
                         [{'Agent'}, {'Patient'}, {'Role2', 'Role3'}])
Пример #16
0
def get_frames(corpus, verbnet_classes, frameNet, argid=False):
    """ Fills two list of the same size with content dependent of the kind of input
    
    The two lists are annotation_list and parsed_conll_list
    """
    logger = logging.getLogger(__name__)
    logger.setLevel(options.Options.loglevel)
    logger.debug("get_frames corpus={} input={}".format(corpus,options.Options.conll_input))

    if options.Options.conll_input is not None:
        annotation_list = [None]
        parsed_conll_list = [Path(options.Options.conll_input)]
    elif options.Options.corpus == 'FrameNet':
        annotation_list = options.Options.fulltext_annotations
        parsed_conll_list = options.Options.fulltext_parses
    elif options.Options.corpus == 'dicoinfo_fr':
        pass
    else:
        raise Exception('Unknown corpus {}'.format(corpus))

    if options.Options.corpus == 'FrameNet':
        logger.info("Loading FrameNet and VerbNet role mappings %s ..."%paths.Paths.VNFN_MATCHING)
        role_matcher = rolematcher.VnFnRoleMatcher(paths.Paths.VNFN_MATCHING, frameNet)

        for annotation_file, parsed_conll_file in zip(annotation_list, parsed_conll_list):
            logger.debug("Handling {} {}" .format(annotation_file, parsed_conll_file))
            file_stem = annotation_file.stem if annotation_file else parsed_conll_file.stem
            annotated_frames = []
            vn_frames = []
            conllparsed_reader = ConllParsedReader()

            if argid:
                logger.debug("Argument identification")
                #
                # Argument identification
                #
                arg_guesser = argguesser.ArgGuesser(verbnet_classes)

                # Many instances are not actually FrameNet frames
                new_frame_instances = list(arg_guesser.frame_instances_from_file(
                    conllparsed_reader.sentence_trees(parsed_conll_file), parsed_conll_file))
                new_annotated_frames = roleextractor.fill_gold_roles(
                    new_frame_instances, annotation_file, parsed_conll_file,
                    verbnet_classes, role_matcher)

                for gold_frame, frame_instance in zip(new_annotated_frames, new_frame_instances):
                    annotated_frames.append(gold_frame)
                    vn_frames.append(VerbnetFrameOccurrence.build_from_frame(gold_frame, conll_frame_instance=frame_instance))
            else:
                logger.info("Load gold arguments")
                #
                # Load gold arguments
                #
                fn_reader = FNAllReader(
                    add_non_core_args=options.Options.add_non_core_args)

                for framenet_instance in fn_reader.iter_frames(annotation_file, parsed_conll_file):
                    annotated_frames.append(framenet_instance)
                    vn_frames.append(VerbnetFrameOccurrence.build_from_frame(
                        framenet_instance, conll_frame_instance=None))

                stats.stats_data["files"] += fn_reader.stats["files"]

            yield annotated_frames, vn_frames
Пример #17
0
    def test_annotated_chunks(self):
        tony_hall_gold_frame = tony_hall_frame_instances[0]
        tony_hall_gold_frame_chunks = [{
            'phrase_type': 'NP',
            'type': 'arg',
            'text': 'Rep . Tony Hall , D- Ohio'
        }, {
            'text': '',
            'type': 'text'
        }, {
            'type': 'verb',
            'text': 'urges'
        }, {
            'text': '',
            'type': 'text'
        }, {
            'phrase_type': 'NP',
            'type': 'arg',
            'text': 'the United Nations'
        }, {
            'text': '',
            'type': 'text'
        }, {
            'phrase_type':
            'to S',
            'type':
            'arg',
            'text':
            'to allow a freer flow of food and medicine into Iraq'
        }]

        self.assertEqual(
            list(
                VerbnetFrameOccurrence.annotated_chunks(
                    tony_hall_gold_frame, tony_hall_gold_frame.sentence)),
            tony_hall_gold_frame_chunks)

        without_subject = FrameInstance(
            "Rep . Tony Hall , D- Ohio , urges the United Nations to allow"
            " a freer flow of food and medicine into Iraq .",
            Predicate(28, 32, "urges", "urge"), [
                Arg(34, 51, "the United Nations", "Addressee", True, "NP"),
                Arg(53, 104,
                    "to allow a freer flow of food and medicine into Iraq",
                    "Content", True, "VPto"),
            ], [], "XXX")
        without_subject_chunks = [{
            'type': 'text',
            'text': 'Rep . Tony Hall , D- Ohio ,'
        }, {
            'type': 'verb',
            'text': 'urges'
        }, {
            'text': '',
            'type': 'text'
        }, {
            'phrase_type': 'NP',
            'type': 'arg',
            'text': 'the United Nations'
        }, {
            'text': '',
            'type': 'text'
        }, {
            'phrase_type':
            'to S',
            'type':
            'arg',
            'text':
            'to allow a freer flow of food and medicine into Iraq'
        }]

        self.assertEqual(
            list(
                VerbnetFrameOccurrence.annotated_chunks(
                    without_subject, without_subject.sentence)),
            without_subject_chunks)