def test_1(self): frame_occurrence = VerbnetFrameOccurrence([{'elem': 'NP'}, {'elem': 'V'}, {'elem': 'NP'}, {'elem': 'with'}, {'elem': 'NP'}], 3, 'a predicate') frame2 = VerbnetOfficialFrame('Class 1', [ {'elem': 'NP', 'role': 'Agent'}, {'elem': 'V'}, {'elem': 'NP', 'role': 'Patient'}, {'elem': 'for'}, {'elem': 'NP', 'role': 'Role1'}]) frame3 = VerbnetOfficialFrame('Class 1', [ {'elem': 'NP', 'role': 'Agent'}, {'elem': 'V'}, {'elem': 'NP', 'role': 'Patient'}, {'elem': 'with'}, {'elem': 'NP', 'role': 'Role2'}]) frame4 = VerbnetOfficialFrame('Class 2', [ {'elem': 'NP', 'role': 'Agent'}, {'elem': 'V'}, {'elem': 'NP', 'role': 'Patient'}, {'elem': 'with'}, {'elem': 'NP', 'role': 'Role3'}]) matcher = FrameMatcher(frame_occurrence, 'sync_predicates') best_score = matcher.perform_frame_matching([frame2]) self.assertEqual(best_score, int(100 * 4 / 3)) best_score = matcher.perform_frame_matching([frame3, frame4]) self.assertEqual(best_score, 200) self.assertEqual(frame_occurrence.possible_roles(), [{'Agent'}, {'Patient'}, {'Role2', 'Role3'}]) self.assertEqual(frame_occurrence.roles, [{'Agent'}, {'Patient'}, {'Role2', 'Role3'}])
def test_annotated_chunks(self): tony_hall_gold_frame = tony_hall_frame_instances[0] tony_hall_gold_frame_chunks = [ {'phrase_type': 'NP', 'type': 'arg', 'text': 'Rep . Tony Hall , D- Ohio'}, {'text': '', 'type': 'text'}, {'type': 'verb', 'text': 'urges'}, {'text': '', 'type': 'text'}, {'phrase_type': 'NP', 'type': 'arg', 'text': 'the United Nations'}, {'text': '', 'type': 'text'}, {'phrase_type': 'to S', 'type': 'arg', 'text': 'to allow a freer flow of food and medicine into Iraq'}] self.assertEqual(list(VerbnetFrameOccurrence.annotated_chunks(tony_hall_gold_frame, tony_hall_gold_frame.sentence)), tony_hall_gold_frame_chunks) without_subject = FrameInstance( "Rep . Tony Hall , D- Ohio , urges the United Nations to allow" " a freer flow of food and medicine into Iraq .", Predicate(28, 32, "urges", "urge"), [ Arg(34, 51, "the United Nations", "Addressee", True, "NP"), Arg(53, 104, "to allow a freer flow of food and medicine into Iraq", "Content", True, "VPto"), ], [], "XXX") without_subject_chunks = [ {'type': 'text', 'text': 'Rep . Tony Hall , D- Ohio ,'}, {'type': 'verb', 'text': 'urges'}, {'text': '', 'type': 'text'}, {'phrase_type': 'NP', 'type': 'arg', 'text': 'the United Nations'}, {'text': '', 'type': 'text'}, {'phrase_type': 'to S', 'type': 'arg', 'text': 'to allow a freer flow of food and medicine into Iraq'}] self.assertEqual(list(VerbnetFrameOccurrence.annotated_chunks(without_subject, without_subject.sentence)), without_subject_chunks)
def test_conversion(self): vn_frames = [ VerbnetFrameOccurrence( [{'elem': 'NP'}, {'elem': 'V'}, {'elem': 'NP'}, {'elem': 'to'}, {'elem': 'S'}], 3, predicate="urge"), VerbnetFrameOccurrence([{'elem': 'NP'}, {'elem': 'V'}, {'elem': 'NP'}], 2, predicate="allow"), ] slot_preps = [ [None, None, "to"], [None, None], [None, None, "in", None, "for", None, "after"] ] st = ComputeSlotTypeMixin.slot_types slot_types = [ [st["subject"], st["object"], st["prep_object"]], [st["subject"], st["object"]], [st["subject"], st["subject"], st["prep_object"], st["object"], st["prep_object"], st["indirect_object"], st["prep_object"]] ] verbnet_frame = VerbnetFrameOccurrence.build_from_frame(tony_hall_frame_instances[0], None) self.assertEqual(vn_frames[0], verbnet_frame) self.assertEqual(verbnet_frame.slot_types, slot_types[0]) self.assertEqual(verbnet_frame.slot_preps, slot_preps[0]) verbnet_frame = VerbnetFrameOccurrence.build_from_frame(tony_hall_frame_instances[1], conll_frame_instance=None) self.assertEqual(vn_frames[1], verbnet_frame) self.assertEqual(verbnet_frame.slot_types, slot_types[1]) self.assertEqual(verbnet_frame.slot_preps, slot_preps[1])
def _matching_sync_predicates(self, verbnet_frame, slots_associations): """ Stop the algorithm at the first mismatch encountered after the verb, restart at the verb's position if a mismatch is encountered before the verb """ num_match = 0 i, j = 0, 0 index_v_in_frame_occurrence = self.frame_occurrence.structure.index( {'elem': 'V'}) index_v_in_official_frame = verbnet_frame.syntax.index({'elem': 'V'}) slot_1, slot_2 = 0, 0 num_slots_before_v_in_frame_occurrence = 0 num_slots_before_v_in_official_frame = 0 for part in self.frame_occurrence.structure: if VerbnetFrameOccurrence._is_a_slot(part): num_slots_before_v_in_frame_occurrence += 1 elif part['elem'] == "V": break for part in verbnet_frame.syntax: if 'role' in part: num_slots_before_v_in_official_frame += 1 elif part['elem'] == "V": break while i < len(self.frame_occurrence.structure) and j < len( verbnet_frame.syntax): occured_part = self.frame_occurrence.structure[i] official_part = verbnet_frame.syntax[j] if FrameMatcher._is_a_match(occured_part, official_part): if VerbnetFrameOccurrence._is_a_slot(occured_part): num_match += 1 # TODO this is probably fixed with the SYNTAX-based VN reader # verbnet_frame can have more syntax than roles.This will # for instance happen in the "NP V NP S_INF" syntax of # want-32.1, where S_INF is given no role since it's part # of the NP if slot_2 < verbnet_frame.num_slots: slots_associations[slot_1] = slot_2 slot_1, slot_2 = slot_1 + 1, slot_2 + 1 # no match, but not seen the verb everywhere yet elif i < index_v_in_frame_occurrence or j < index_v_in_official_frame: # If we have not encountered the verb yet, we continue the matching # with everything that follows the verb # This is for instance to prevent a "NP NP V" construct # from interrupting the matching early i, j = index_v_in_frame_occurrence, index_v_in_official_frame slot_1 = num_slots_before_v_in_frame_occurrence slot_2 = num_slots_before_v_in_official_frame else: break i, j = i + 1, j + 1 return num_match, slots_associations
def _matching_sync_predicates(self, verbnet_frame, slots_associations): """ Stop the algorithm at the first mismatch encountered after the verb, restart at the verb's position if a mismatch is encountered before the verb """ num_match = 0 i, j = 0, 0 index_v_in_frame_occurrence = self.frame_occurrence.structure.index({'elem': 'V'}) index_v_in_official_frame = verbnet_frame.syntax.index({'elem': 'V'}) slot_1, slot_2 = 0, 0 num_slots_before_v_in_frame_occurrence = 0 num_slots_before_v_in_official_frame = 0 for part in self.frame_occurrence.structure: if VerbnetFrameOccurrence._is_a_slot(part): num_slots_before_v_in_frame_occurrence += 1 elif part['elem'] == "V": break for part in verbnet_frame.syntax: if 'role' in part: num_slots_before_v_in_official_frame += 1 elif part['elem'] == "V": break while i < len(self.frame_occurrence.structure) and j < len(verbnet_frame.syntax): occured_part = self.frame_occurrence.structure[i] official_part = verbnet_frame.syntax[j] if FrameMatcher._is_a_match(occured_part, official_part): if VerbnetFrameOccurrence._is_a_slot(occured_part): num_match += 1 # TODO this is probably fixed with the SYNTAX-based VN reader # verbnet_frame can have more syntax than roles.This will # for instance happen in the "NP V NP S_INF" syntax of # want-32.1, where S_INF is given no role since it's part # of the NP if slot_2 < verbnet_frame.num_slots: slots_associations[slot_1] = slot_2 slot_1, slot_2 = slot_1 + 1, slot_2 + 1 # no match, but not seen the verb everywhere yet elif i < index_v_in_frame_occurrence or j < index_v_in_official_frame: # If we have not encountered the verb yet, we continue the matching # with everything that follows the verb # This is for instance to prevent a "NP NP V" construct # from interrupting the matching early i, j = index_v_in_frame_occurrence, index_v_in_official_frame slot_1 = num_slots_before_v_in_frame_occurrence slot_2 = num_slots_before_v_in_official_frame else: break i, j = i + 1, j + 1 return num_match, slots_associations
def test_present_that(self): frame_occurrence = VerbnetFrameOccurrence([{ 'elem': 'NP' }, { 'elem': 'V' }, { 'elem': 'that' }, { 'elem': 'S' }], 2, 'consider') matcher = FrameMatcher(frame_occurrence, 'sync_predicates') best_score = matcher.perform_frame_matching([ VerbnetOfficialFrame('consider-29.9-1', [{ 'elem': 'NP', 'role': 'Agent' }, { 'elem': 'V' }, { 'elem': 'that' }, { 'elem': 'S', 'role': 'Patient' }]) ]) self.assertEqual(best_score, 200) self.assertEqual(frame_occurrence.roles, [{'Agent'}, {'Patient'}])
def test_3(self): frame_occurrence = VerbnetFrameOccurrence([{ 'elem': 'NP' }, { 'elem': 'V' }, { 'elem': 'with' }, { 'elem': 'NP' }], 2, 'a predicate') frame = VerbnetOfficialFrame('c', [{ 'elem': 'NP', 'role': 'Agent' }, { 'elem': 'V' }, { 'elem': 'NP', 'role': 'Patient' }, { 'elem': 'with' }, { 'elem': 'NP', 'role': 'Role3' }]) matcher = FrameMatcher(frame_occurrence, 'sync_predicates') best_score = matcher.perform_frame_matching([frame]) self.assertEqual(best_score, int(100 / 2 + 100 / 3))
def test_conversion(self): vn_frames = [ VerbnetFrameOccurrence([{ 'elem': 'NP' }, { 'elem': 'V' }, { 'elem': 'NP' }, { 'elem': 'to' }, { 'elem': 'S' }], 3, predicate="urge"), VerbnetFrameOccurrence([{ 'elem': 'NP' }, { 'elem': 'V' }, { 'elem': 'NP' }], 2, predicate="allow"), ] slot_preps = [[None, None, "to"], [None, None], [None, None, "in", None, "for", None, "after"]] st = ComputeSlotTypeMixin.slot_types slot_types = [[st["subject"], st["object"], st["prep_object"]], [st["subject"], st["object"]], [ st["subject"], st["subject"], st["prep_object"], st["object"], st["prep_object"], st["indirect_object"], st["prep_object"] ]] verbnet_frame = VerbnetFrameOccurrence.build_from_frame( tony_hall_frame_instances[0], None) self.assertEqual(vn_frames[0], verbnet_frame) self.assertEqual(verbnet_frame.slot_types, slot_types[0]) self.assertEqual(verbnet_frame.slot_preps, slot_preps[0]) verbnet_frame = VerbnetFrameOccurrence.build_from_frame( tony_hall_frame_instances[1], conll_frame_instance=None) self.assertEqual(vn_frames[1], verbnet_frame) self.assertEqual(verbnet_frame.slot_types, slot_types[1]) self.assertEqual(verbnet_frame.slot_preps, slot_preps[1])
def _matching_stop_on_fail(self, verbnet_frame, slots_associations): """ Stop the algorithm at the first mismatch encountered """ num_match = 0 for occured_part, official_part in zip(self.frame_occurrence.structure, verbnet_frame.syntax): if FrameMatcher._is_a_match(occured_part, official_part): if VerbnetFrameOccurrence._is_a_slot(occured_part): num_match += 1 if num_match - 1 < verbnet_frame.num_slots: slots_associations[num_match - 1] = num_match - 1 else: break return num_match, slots_associations
def test_baseline_alg(self): frame_occurrence = VerbnetFrameOccurrence([{ 'elem': 'NP' }, { 'elem': 'V' }, { 'elem': 'NP' }, { 'elem': 'NP' }, { 'elem': 'for' }, { 'elem': 'NP' }], 4, 'a predicate') verbnet_frames = [ VerbnetOfficialFrame('XX', [{ 'elem': 'NP', 'role': 'R1' }, { 'elem': 'V' }, { 'elem': 'NP', 'role': 'R2' }, { 'elem': 'by' }, { 'elem': 'NP', 'role': 'R3' }]), VerbnetOfficialFrame('XX', [{ 'elem': 'NP', 'role': 'R1' }, { 'elem': 'V' }, { 'elem': 'NP', 'role': 'R4' }, { 'elem': {'for', 'as'} }, { 'elem': 'NP', 'role': 'R5' }]) ] matcher = FrameMatcher(frame_occurrence, 'baseline') matcher.perform_frame_matching(verbnet_frames) self.assertEqual(frame_occurrence.roles, [{'R1'}, {'R4'}, set(), {'R5'}])
def test_2(self): frame_occurrence = VerbnetFrameOccurrence([{ 'elem': 'to' }, { 'elem': 'be' }], 0, 'a predicate') frame = VerbnetOfficialFrame('c', [{ 'elem': 'NP', 'role': 'Agent' }, { 'elem': 'V' }, { 'elem': 'NP', 'role': 'Patient' }, { 'elem': 'with' }, { 'elem': 'NP', 'role': 'Role3' }]) self.assertEqual(frame_occurrence.num_slots, 0)
def test_4(self): frame_occurrence = VerbnetFrameOccurrence([{ 'elem': 'NP' }, { 'elem': 'V' }, { 'elem': 'NP' }], 2, 'a predicate') matcher = FrameMatcher(frame_occurrence, 'sync_predicates') verbnet_frames = [ VerbnetOfficialFrame('XX', [{ 'elem': 'NP', 'role': 'Agent' }, { 'elem': 'V' }, { 'elem': 'NP', 'role': 'Theme' }]), VerbnetOfficialFrame('XX', [{ 'elem': 'NP', 'role': 'Agent' }, { 'elem': 'V' }, { 'elem': 'NP', 'role': 'Theme' }]), VerbnetOfficialFrame('XX', [{ 'elem': 'NP', 'role': 'Theme' }, { 'elem': 'V' }]), VerbnetOfficialFrame('XX', [{ 'elem': 'NP', 'role': 'Agent' }, { 'elem': 'V' }, { 'elem': 'NP', 'role': 'Theme' }]), VerbnetOfficialFrame('XX', [{ 'elem': 'NP', 'role': 'Theme' }, { 'elem': 'V' }, { 'elem': 'with' }, { 'elem': 'NP', 'role': 'Instrument' }]), VerbnetOfficialFrame('XX', [{ 'elem': 'NP', 'role': 'Agent' }, { 'elem': 'V' }, { 'elem': 'NP', 'role': 'Theme' }, { 'elem': 'with' }, { 'elem': 'NP', 'role': 'Instrument' }]), VerbnetOfficialFrame('XX', [{ 'elem': 'NP', 'role': 'Instrument' }, { 'elem': 'V' }, { 'elem': 'NP', 'role': 'Theme' }]) ] matcher.perform_frame_matching(verbnet_frames) self.assertEqual(frame_occurrence.roles, [{'Agent', 'Instrument'}, {'Theme'}])
def get_frames(corpus, verbnet_classes, frameNet, argid=False): """ Fills two list of the same size with content dependent of the kind of input The two lists are annotation_list and parsed_conll_list """ logger = logging.getLogger(__name__) logger.setLevel(options.Options.loglevel) logger.debug("get_frames corpus={} input={}".format( corpus, options.Options.conll_input)) if options.Options.conll_input is not None: annotation_list = [None] parsed_conll_list = [Path(options.Options.conll_input)] elif options.Options.corpus == 'FrameNet': annotation_list = options.Options.fulltext_annotations parsed_conll_list = options.Options.fulltext_parses elif options.Options.corpus == 'dicoinfo_fr': pass else: raise Exception('Unknown corpus {}'.format(corpus)) if options.Options.corpus == 'FrameNet': logger.info("Loading FrameNet and VerbNet role mappings %s ..." % paths.Paths.VNFN_MATCHING) role_matcher = rolematcher.VnFnRoleMatcher(paths.Paths.VNFN_MATCHING, frameNet) for annotation_file, parsed_conll_file in zip(annotation_list, parsed_conll_list): logger.debug("Handling {} {}".format(annotation_file, parsed_conll_file)) file_stem = annotation_file.stem if annotation_file else parsed_conll_file.stem annotated_frames = [] vn_frames = [] conllparsed_reader = ConllParsedReader() if argid: logger.debug("Argument identification") # # Argument identification # arg_guesser = argguesser.ArgGuesser(verbnet_classes) # Many instances are not actually FrameNet frames new_frame_instances = list( arg_guesser.frame_instances_from_file( conllparsed_reader.sentence_trees(parsed_conll_file), parsed_conll_file)) new_annotated_frames = roleextractor.fill_gold_roles( new_frame_instances, annotation_file, parsed_conll_file, verbnet_classes, role_matcher) for gold_frame, frame_instance in zip(new_annotated_frames, new_frame_instances): annotated_frames.append(gold_frame) vn_frames.append( VerbnetFrameOccurrence.build_from_frame( gold_frame, conll_frame_instance=frame_instance)) else: logger.info("Load gold arguments") # # Load gold arguments # fn_reader = FNAllReader( add_non_core_args=options.Options.add_non_core_args) for framenet_instance in fn_reader.iter_frames( annotation_file, parsed_conll_file): annotated_frames.append(framenet_instance) vn_frames.append( VerbnetFrameOccurrence.build_from_frame( framenet_instance, conll_frame_instance=None)) stats.stats_data["files"] += fn_reader.stats["files"] yield annotated_frames, vn_frames
def test_1(self): frame_occurrence = VerbnetFrameOccurrence([{ 'elem': 'NP' }, { 'elem': 'V' }, { 'elem': 'NP' }, { 'elem': 'with' }, { 'elem': 'NP' }], 3, 'a predicate') frame2 = VerbnetOfficialFrame('Class 1', [{ 'elem': 'NP', 'role': 'Agent' }, { 'elem': 'V' }, { 'elem': 'NP', 'role': 'Patient' }, { 'elem': 'for' }, { 'elem': 'NP', 'role': 'Role1' }]) frame3 = VerbnetOfficialFrame('Class 1', [{ 'elem': 'NP', 'role': 'Agent' }, { 'elem': 'V' }, { 'elem': 'NP', 'role': 'Patient' }, { 'elem': 'with' }, { 'elem': 'NP', 'role': 'Role2' }]) frame4 = VerbnetOfficialFrame('Class 2', [{ 'elem': 'NP', 'role': 'Agent' }, { 'elem': 'V' }, { 'elem': 'NP', 'role': 'Patient' }, { 'elem': 'with' }, { 'elem': 'NP', 'role': 'Role3' }]) matcher = FrameMatcher(frame_occurrence, 'sync_predicates') best_score = matcher.perform_frame_matching([frame2]) self.assertEqual(best_score, int(100 * 4 / 3)) best_score = matcher.perform_frame_matching([frame3, frame4]) self.assertEqual(best_score, 200) self.assertEqual(frame_occurrence.possible_roles(), [{'Agent'}, {'Patient'}, {'Role2', 'Role3'}]) self.assertEqual(frame_occurrence.roles, [{'Agent'}, {'Patient'}, {'Role2', 'Role3'}])
def get_frames(corpus, verbnet_classes, frameNet, argid=False): """ Fills two list of the same size with content dependent of the kind of input The two lists are annotation_list and parsed_conll_list """ logger = logging.getLogger(__name__) logger.setLevel(options.Options.loglevel) logger.debug("get_frames corpus={} input={}".format(corpus,options.Options.conll_input)) if options.Options.conll_input is not None: annotation_list = [None] parsed_conll_list = [Path(options.Options.conll_input)] elif options.Options.corpus == 'FrameNet': annotation_list = options.Options.fulltext_annotations parsed_conll_list = options.Options.fulltext_parses elif options.Options.corpus == 'dicoinfo_fr': pass else: raise Exception('Unknown corpus {}'.format(corpus)) if options.Options.corpus == 'FrameNet': logger.info("Loading FrameNet and VerbNet role mappings %s ..."%paths.Paths.VNFN_MATCHING) role_matcher = rolematcher.VnFnRoleMatcher(paths.Paths.VNFN_MATCHING, frameNet) for annotation_file, parsed_conll_file in zip(annotation_list, parsed_conll_list): logger.debug("Handling {} {}" .format(annotation_file, parsed_conll_file)) file_stem = annotation_file.stem if annotation_file else parsed_conll_file.stem annotated_frames = [] vn_frames = [] conllparsed_reader = ConllParsedReader() if argid: logger.debug("Argument identification") # # Argument identification # arg_guesser = argguesser.ArgGuesser(verbnet_classes) # Many instances are not actually FrameNet frames new_frame_instances = list(arg_guesser.frame_instances_from_file( conllparsed_reader.sentence_trees(parsed_conll_file), parsed_conll_file)) new_annotated_frames = roleextractor.fill_gold_roles( new_frame_instances, annotation_file, parsed_conll_file, verbnet_classes, role_matcher) for gold_frame, frame_instance in zip(new_annotated_frames, new_frame_instances): annotated_frames.append(gold_frame) vn_frames.append(VerbnetFrameOccurrence.build_from_frame(gold_frame, conll_frame_instance=frame_instance)) else: logger.info("Load gold arguments") # # Load gold arguments # fn_reader = FNAllReader( add_non_core_args=options.Options.add_non_core_args) for framenet_instance in fn_reader.iter_frames(annotation_file, parsed_conll_file): annotated_frames.append(framenet_instance) vn_frames.append(VerbnetFrameOccurrence.build_from_frame( framenet_instance, conll_frame_instance=None)) stats.stats_data["files"] += fn_reader.stats["files"] yield annotated_frames, vn_frames
def test_annotated_chunks(self): tony_hall_gold_frame = tony_hall_frame_instances[0] tony_hall_gold_frame_chunks = [{ 'phrase_type': 'NP', 'type': 'arg', 'text': 'Rep . Tony Hall , D- Ohio' }, { 'text': '', 'type': 'text' }, { 'type': 'verb', 'text': 'urges' }, { 'text': '', 'type': 'text' }, { 'phrase_type': 'NP', 'type': 'arg', 'text': 'the United Nations' }, { 'text': '', 'type': 'text' }, { 'phrase_type': 'to S', 'type': 'arg', 'text': 'to allow a freer flow of food and medicine into Iraq' }] self.assertEqual( list( VerbnetFrameOccurrence.annotated_chunks( tony_hall_gold_frame, tony_hall_gold_frame.sentence)), tony_hall_gold_frame_chunks) without_subject = FrameInstance( "Rep . Tony Hall , D- Ohio , urges the United Nations to allow" " a freer flow of food and medicine into Iraq .", Predicate(28, 32, "urges", "urge"), [ Arg(34, 51, "the United Nations", "Addressee", True, "NP"), Arg(53, 104, "to allow a freer flow of food and medicine into Iraq", "Content", True, "VPto"), ], [], "XXX") without_subject_chunks = [{ 'type': 'text', 'text': 'Rep . Tony Hall , D- Ohio ,' }, { 'type': 'verb', 'text': 'urges' }, { 'text': '', 'type': 'text' }, { 'phrase_type': 'NP', 'type': 'arg', 'text': 'the United Nations' }, { 'text': '', 'type': 'text' }, { 'phrase_type': 'to S', 'type': 'arg', 'text': 'to allow a freer flow of food and medicine into Iraq' }] self.assertEqual( list( VerbnetFrameOccurrence.annotated_chunks( without_subject, without_subject.sentence)), without_subject_chunks)