def test_member_comparisons(): from compare_versions import compare_members from_vn = VerbNetParser(version="3.2") to_vn = VerbNetParser(version="3.3") from_vn.parse_files() to_vn.parse_files() from_vn_members = from_vn.get_all_members() to_vn_members = to_vn.get_all_members() x = compare_members(from_vn_members, to_vn_members) for k, v in x.items(): if k: for change in v: if change.element_name in [ m.name[0] for m in to_vn.get_verb_class(k).members ]: continue #print("Success: %s is now in %s" % (change.element_name, k)) else: print("Failed, %s is in %s: " % (change.element_name, ', '.join( search.find_members(to_vn_members, name=change.element_name)))) print({k: change.__dict__}) else: # Deletions for change in v: potential_members = search.find_members( to_vn_members, name=change.element_name) if potential_members: print("%s marked as Deleted could be in %s" % (change.element_name, ', '.join(potential_members)))
def find_in_old_versions(ann, old_vns): for old_vn in old_vns: all_old_members = old_vn.get_members() if ann.exists_in(old_vn): return search.find_members(all_old_members, class_ID=ann.vn_class, name=ann.verb) else: return search.find_members(all_old_members, name=ann.verb)
def update_annotation_line(ann_line, new_vn, old_vns, log): ann = Annotation(ann_line) # If the verb in this annotation is not mapped directly to desired "new" version of VN if not ann.exists_in(new_vn): vn_members = find_in_old_versions(ann, old_vns) all_new_members = new_vn.get_all_members() updated_vn_members = [] for vn_member in vn_members: # search these members for the lookup member by name and wordnet mapping updated_vn_members += search.find_members(all_new_members, name=vn_member.name, wn=vn_member.wn) # Ambiguities in previous versions may all point to the same verb in new version # so we need to remove the duplicate members from this list unique_members = [] for m in updated_vn_members: if (m.name, m.class_id()) not in [u[1] for u in unique_members]: unique_members.append([m, (m.name, m.class_id())]) updated_vn_members = [u[0] for u in unique_members] if len(updated_vn_members) == 1: ann.update_vn_info(updated_vn_members[0]) log.write( "SUCCESS: Found %s in %s in VerbNet version %s" % (ann.verb, ann.class_ID, updated_vn_members[0].version())) #print("SUCCESS: Found %s from %s in VerbNet version %s in %s" % (ann.verb, ann.class_ID, updated_vn_members[0].version(), updated_vn_members[0].class_id())) elif len(updated_vn_members) > 1: # Otherwise there is ambiguity log.write( "ERROR: %s no longer belongs to %s and could belong to %s" % (ann.verb, ann.class_ID, ' OR '.join( [u.class_id() for u in updated_vn_members]))) #print("ERROR: %s in %s could now belong to %s" % (ann.verb, ann.class_ID, ' OR '.join([u.class_id() for u in updated_vn_members]))) return None else: # Otherwise this verb no longer exists in VN log.write("ERROR: %s no longer exists in VerbNet" % ann.verb) #print("ERROR: No member named %s (which was previously in class %s) exists in VerbNet" % (ann.verb, ann.class_ID)) return None else: log.write("SUCCESS: %s is still a reference to %s in %s" % (ann.verb, ann.verb, ann.class_ID)) return str(ann)
def move_member(member_name, current_classname, new_classname): ''' :return: (current_class_xml, new_class_xml) ''' current_member_search = search.find_members(name=member_name, class_ID=current_classname) if current_member_search: # Should only ever be one unique member name per class current_member = current_member_search[0] else: raise Exception("%s is not in the class %s in VerbNet version %s" % (member_name, current_classname, vn.version)) new_class = vn.get_verb_class(new_classname) current_class = vn.get_verb_class(current_classname) if new_class: current_class.remove_member(current_member) new_class.add_member(current_member) return (current_class.pp(), new_class.pp()) else: raise Exception("%s is not a class in VerbNet version %s" % (new_classname, vn.version))
def generate_groupings(): # CHANGE DIRECTORY to desired verbnet version; vn_directory = "../../vn_versions/verbnet3.3/" vnp = VerbNetParser(directory=vn_directory) member_names = set([m.name for m in vnp.get_members()]) for member_name in member_names: root = etree.Element("root") inv = etree.Element("inventory", lemma=member_name+"-v") members = search.find_members(members = vnp.get_members(), name=member_name) for i in range(len(members)): member = members[i] sense = etree.Element("sense", group="1", n=str(i), name=member.vnc + ".xml", type="") examples = etree.Element("examples") examples.text = "\n".join([f.examples[0] for f in vnp.verb_classes_dict[member.vnc].frames]) sense.append(examples) mappings = etree.Element("mappings") wn_map = etree.Element("wn", version="", lemma=" ".join(member.wn)) mappings.append(wn_map) mappings.append(etree.Element("pb")) mappings.append(etree.Element("vn")) mappings.append(etree.Element("fn")) sense.append(mappings) inv.append(sense) root.append(inv) doctype_string = '<!DOCTYPE inventory SYSTEM "inventory.dtd">' with open("groupings/" + member_name + "-v.xml", "wb") as output: output.write(etree.tostring(root, xml_declaration=True, encoding="UTF-8", doctype=doctype_string, pretty_print=True))
def compare_members(from_vn_members, to_vn_members): ''' returns a dict of {to_vn_class: [Change objects]} update: member still in class, but attributes changed delete: member removed from class, and not moved to a new VN class insert: member inserted to the class, and is newly added to VN move: member moved from the class to a new vn class ''' all_changes = {} for from_vn_member in from_vn_members: # Find that member in to_vn_members, and if there are differences, record them possible_to_vn_members = search.find_members(to_vn_members, name=from_vn_member.name, wn=from_vn_member.wn) changes = [] to_vn_member = None attr_diffs = None # First case is that this member is in the same class in to_vn possible_match = [m for m in possible_to_vn_members if m.class_id() == from_vn_member.class_id()] if possible_match: # If member is in the same class to_vn_member = possible_match[0] attr_diffs = from_vn_member.compare_attrs(to_vn_member) # If there are many possible members, none of which are in the same class elif len(possible_to_vn_members) > 1: # Then try to see if any of those ALSO exist in from_vn # This is to identify an instance of this member in a NEW class # to say that this is where it moved to for possible_to_vn_member in possible_to_vn_members: if len(search.find_members(from_vn_members, class_ID=possible_to_vn_member.class_id(), name=from_vn_member.name)) == 0: to_vn_member = possible_to_vn_member changes.append(Change(from_vn_member.name, "member", "move", from_vn_member.class_id())) # Compare the attributes attr_diffs = from_vn_member.compare_attrs(to_vn_member) elif len(possible_to_vn_members) == 1: to_vn_member = possible_to_vn_members[0] changes.append(Change(from_vn_member.name, "member", "move", from_vn_member.class_id())) # Compare the attributes attr_diffs = from_vn_member.compare_attrs(to_vn_member) else: changes.append(Change(from_vn_member.name, "member", "delete", from_vn_member.class_id())) if attr_diffs: # If member has updates to its attributes changes.append(Change(from_vn_member.name, "member", "update", from_vn_member.class_id(), ', '.join(["%s: %s" % (attr, diff) for attr, diff in attr_diffs.items()]))) if changes: all_changes[to_vn_member.class_id() if to_vn_member else to_vn_member] = changes ''' Lastly, we have to get all to_vn_members that did not exist in from_vn_members to record all of the rest of the insert operations Get string name in order to hash it for the set, and then change it back to a list in order to work with search.find_members ''' for name, class_ID in list(set([(m.name, m.class_id()) for m in to_vn_members]) - set( [(m.name, m.class_id()) for m in from_vn_members])): inserted_member = search.find_members(to_vn_members, class_ID=class_ID, name=[name]) if inserted_member: all_changes.setdefault(inserted_member[0].class_id(), []).append( Change(inserted_member[0].name, "member", "insert", notes=inserted_member[0].pp())) return all_changes
def update_annotation_line(ann_line, new_vn, old_vns, log=None): """ Logs changes to log if one is given Returns a tuple of (bool marking if the update was successful, updated_annotation) """ # Semlink annotations have mappings, and thus more attributes in a line if len(ann_line.strip().split()) > 5: ann = annotation.SemLinkAnnotation(ann_line) else: ann = annotation.VnAnnotation(ann_line) # Flag to tell us whether or not the annotation was updated success = False stats[4] += 1 # If the verb in this annotation is not mapped directly to desired "new" version of VN print("Searching for %s from %s...." % (ann.verb, ann.vn_class)) if not ann.exists_in(new_vn): possible_old_vn_members = find_in_old_versions(ann, old_vns) all_new_members = new_vn.get_members() updated_vn_members = [] for vn_member in possible_old_vn_members: # search these members for the lookup member by name and wordnet mapping updated_vn_members += search.find_members(all_new_members, name=vn_member.name, wn=vn_member.wn) """ Ambiguities in previous versions may all point to the same verb in new version I.e one verb may appear in multiple classes in version 3.2, and so this script will look for n new verbs in 3.3, where n is the number of times this member appears in 3.2. So if all n of those points to the same member of the same class in 3.3, then we need to remove those duplicate members from this list """ unique_members = [] for updated_member in updated_vn_members: if (updated_member.name, updated_member.class_id()) not in [ u[1] for u in unique_members ]: unique_members.append([ updated_member, (updated_member.name, updated_member.class_id()) ]) updated_vn_members = [u[0] for u in unique_members] if len(updated_vn_members ) == 1: # The verb maps to new version in a new class if log: log.write( "SUCCESS: Found %s from %s in %s in VerbNet version %s" % (ann.verb, ann.vn_class, updated_vn_members[0].class_id(), updated_vn_members[0].version)) stats[1] += 1 success = True ann.update_vn_info(updated_vn_members[0]) elif len(updated_vn_members) > 1: # Otherwise there is ambiguity if log: log.write( "ERROR: %s no longer belongs to %s and could belong to %s in VerbNet version %s" % (ann.verb, ann.vn_class, ' OR '.join([ u.class_id() for u in updated_vn_members ]), updated_vn_members[0].version)) stats[2] += 1 success = False else: # Otherwise this verb no longer exists in VN if log: log.write( "ERROR: %s from %s in an old version of VerbNet does not have an exact match in version %s" % (ann.verb, ann.vn_class, new_vn.version)) stats[3] += 1 success = False else: if log: log.write( "SUCCESS: %s is still a reference to %s in %s in VerbNet version %s" % (ann.verb, ann.verb, ann.vn_class, new_vn.version)) stats[0] += 1 success = True return (success, str(ann))