def resolve(self, kb, awake_db): print("ExternalURIResolver RESOLVE") resolved_kb = KnowledgeBase() super(ExternalURIResolver, self).copy_all(resolved_kb, kb) if awake_db == "NA": return resolved_kb kb_entity_to_entity_group = dict() for entgroupid, kb_entity_group in resolved_kb.get_entity_groups(): for kb_entity in kb_entity_group.members: kb_entity_to_entity_group[kb_entity] = kb_entity_group AwakeDB.initialize_awake_db(awake_db) for entid, kb_entity in resolved_kb.entid_to_kb_entity.items(): kb_entity_group = kb_entity_to_entity_group[kb_entity] source_string = AwakeDB.get_source_string(kb_entity_group.actor_id) if source_string is not None and source_string.find( "dbpedia.org") != -1: formatted_string = source_string.strip() if source_string.startswith("<"): source_string = source_string[1:] if source_string.endswith(">"): source_string = source_string[0:-1] source_string = source_string.replace("dbpedia.org/resource", "en.wikipedia.org/wiki", 1) kb_entity.properties["external_uri"] = source_string # For countries, add geoname_id to properties if (kb_entity_group.actor_id is not None and "external_uri" not in kb_entity.properties and "geonameid" not in kb_entity.properties): geonameid = AwakeDB.get_geonameid_from_actorid( kb_entity_group.actor_id) if geonameid is not None and len(str(geonameid).strip()) != 0: kb_entity.properties["geonameid"] = str(geonameid) return resolved_kb
def resolve(self, kb): print("EntityGroupEntityTypeResolver RESOLVE") resolved_kb = KnowledgeBase() super(EntityGroupEntityTypeResolver, self).copy_all(resolved_kb, kb) # Make sure best entity type across each entity group is consistent for entgroupid, entity_group in resolved_kb.get_entity_groups(): entity_type_to_count = dict() # entity_type => count for entity in entity_group.members: entity_type = entity.get_best_entity_type() if entity_type not in entity_type_to_count: entity_type_to_count[entity_type] = 0 entity_type_to_count[entity_type] += 1 # Get best entity type from dict best_entity_type = None highest_count = None for et, count in entity_type_to_count.items(): if best_entity_type is None or count > highest_count: best_entity_type = et highest_count = count continue if count < highest_count: continue # count and highest count is equal best_entity_type = self.get_better_entity_type( best_entity_type, et) # set entity type for group if len(entity_type_to_count) > 1: #print "Setting entity type for " + unidecode.unidecode(entity_group.canonical_name) + " " + entity_group.id + " to " + best_entity_type #print "Based on: " + str(entity_type_to_count) for entity in entity_group.members: entity.add_entity_type(best_entity_type, 0.9) return resolved_kb
def resolve(self, kb): print("AdditionalAffiliationResolver RESOLVE") resolved_kb = KnowledgeBase() super(AdditionalAffiliationResolver, self).copy_all(resolved_kb, kb) script_dir = os.path.dirname(os.path.realpath(__file__)) actor_affiliation = dict( ) # actor_id => affiliated_actor_id e.g. "Vladimir Putin" -> "Russia" actor_component_of = dict( ) # actor_id => actor_id e.g. "Estonia" -> ["Baltic States", "NATO"] # Load actor_id -> actor_id/CAMEO code has affiliation affiliation_file = os.path.join(script_dir, "..", "data_files", "actor_affiliation_info.txt") a = codecs.open(affiliation_file, 'r', encoding='utf8') for line in a: line = line.strip() if line.startswith("#"): continue pieces = line.split(" ", 2) actor_id = int(pieces[0]) affiliated_actor_id_or_cameo_code = pieces[1] description = pieces[2] actor_affiliation[ actor_id] = affiliated_actor_id_or_cameo_code # assumes one affiliation per actor id a.close() # Load actor_id -> actor_id component info component_file = os.path.join(script_dir, "..", "data_files", "actor_component_info.txt") c = codecs.open(component_file, 'r', encoding='utf8') for line in c: line = line.strip() if line.startswith("#"): continue pieces = line.split(" ", 2) actor_id = int(pieces[0]) containing_actor_id = int(pieces[1]) description = pieces[2] if actor_id not in actor_component_of: actor_component_of[actor_id] = [] actor_component_of[actor_id].append(containing_actor_id) c.close() # Set properties on entity groups for (entgroupid, entity_group) in resolved_kb.get_entity_groups(): actor_id = entity_group.actor_id if actor_id is None: continue if actor_id in actor_affiliation: affiliated_actor_id_or_cameo_code = actor_affiliation[actor_id] if AdditionalAffiliationResolver.cameo_code_re.match( affiliated_actor_id_or_cameo_code): entity_group.properties[ "awake_affiliated_cameo_code"] = affiliated_actor_id_or_cameo_code else: entity_group.properties["awake_affiliated_actor_id"] = int( affiliated_actor_id_or_cameo_code) if actor_id in actor_component_of: if "component_of_actor_ids" not in entity_group.properties: entity_group.properties["component_of_actor_ids"] = [] entity_group.properties["component_of_actor_ids"].extend( actor_component_of[actor_id]) return resolved_kb
def resolve(self, kb): print("CountryCodeResolver RESOLVE") resolved_kb = KnowledgeBase() super(CountryCodePropertyResolver, self).copy_all(resolved_kb, kb) for entgroupid, kb_entity_group in resolved_kb.get_entity_groups(): # Awake ISO code for geoname's country to cameo_code for geoname's country # This is for when the KB entity group is a city/geoname if ("country_iso_code" in kb_entity_group.properties and kb_entity_group.properties["country_iso_code"] in self.iso_country_codes): geonames_country_code = self.iso_country_codes[ kb_entity_group.properties["country_iso_code"]] kb_entity_group.properties[ "geonames_country_code"] = geonames_country_code for entid, kb_entity in resolved_kb.entid_to_kb_entity.items(): # cameo_country_code properties for GPE kb_entity # This is for when the KB entity is a country cameo_country_code = self.country_codes.get( kb_entity.canonical_name) if cameo_country_code is not None: kb_entity.properties["cameo_country_code"] = cameo_country_code # Reliable (entity, country_code) pairs reliable_country_codes = set() # citizenship_cameo_country_code property for PER kb_entity kb_entity_to_country_code_count = dict() for relid, kb_relation in resolved_kb.relid_to_kb_relation.items(): if kb_relation.relation_type != "GEN-AFF.Citizen-Resident-Religion-Ethnicity": continue left_id = kb_relation.left_argument_id right_id = kb_relation.right_argument_id left_entity = resolved_kb.entid_to_kb_entity[left_id] right_entity = resolved_kb.entid_to_kb_entity[right_id] if "PER.Individual" not in left_entity.entity_type_to_confidence and "PER.Group" not in left_entity.entity_type_to_confidence: continue if "GPE.Nation" not in right_entity.entity_type_to_confidence: continue country_code = self.country_codes.get(right_entity.canonical_name) if country_code is None: continue if left_entity not in kb_entity_to_country_code_count: kb_entity_to_country_code_count[left_entity] = dict() if country_code not in kb_entity_to_country_code_count[ left_entity]: kb_entity_to_country_code_count[left_entity][country_code] = 0 kb_entity_to_country_code_count[left_entity][country_code] += 1 # Record (entity, country_code) pair if relation is reliable on its own for relmention in kb_relation.relation_mentions: mention = relmention.left_mention if mention.link_confidence in CountryCodePropertyResolver.reliable_link_confidences: reliable_country_codes.add(( left_entity, country_code, )) # Take most common country_code in dictionary for kb_entity, country_code_count in kb_entity_to_country_code_count.items( ): most_common_country_code = None most_common_country_code_count = 0 for country_code, count in country_code_count.items(): if count > most_common_country_code_count: most_common_country_code = country_code most_common_country_code_count = count elif (count == most_common_country_code_count and country_code < most_common_country_code): most_common_country_code = country_code most_common_country_code_count = count # If it's a named entity, require reliable match if kb_entity.canonical_name is not None and ( kb_entity, most_common_country_code, ) not in reliable_country_codes: #print "Excluding: " + kb_entity.id + " from having country code: " + most_common_country_code continue kb_entity.properties[ "citizenship_cameo_country_code"] = most_common_country_code if most_common_country_code in self.ethnicities: kb_entity.properties["ethnicity"] = self.ethnicities[ most_common_country_code] return resolved_kb