def test_create_seedling_entity_with_alternate_names(self): g = aifutils.make_graph() # every AIF needs an object for the system responsible for creating it system = aifutils.make_system_with_uri( g, "http://www.test.edu/testSystem") entity = aifutils.make_entity(g, "http://www.test.edu/entities/1", system) # in order to allow uncertainty about the type of an entity, we don't mark an entity's type directly on the # entity, but rather make a separate assertion for it. type_assertion = aifutils.mark_type( g, "http://www.test.org/assertions/1", entity, SEEDLING_TYPES_NIST.Person, system, 1.0) # This is just a test to make sure that validation works for the different # mark types. Rare that you would have all three with a single entity. aifutils.mark_name(g, entity, "Name One") aifutils.mark_name(g, entity, "N. One") aifutils.mark_name(g, entity, "N-Money") aifutils.mark_text_value(g, entity, "TextValue") aifutils.mark_numeric_value_as_double(g, entity, 100) aifutils.mark_numeric_value_as_long(g, entity, 100) aifutils.mark_numeric_value_as_string(g, entity, "100") self.new_file( g, "test_create_a_seedling_entity_with_alternate_names.ttl") self.dump_graph(g, "Example of seedling entity with alternate names")
def test_create_an_entity_with_add_invalid_attribute(self): #g = get_initialized_graph() g = aifutils.make_graph() # every AIF needs an object for the system responsible for creating it system = aifutils.make_system_with_uri( g, "http://www.test.edu/testSystem") # it doesn't matter what URI we give entities, events, etc. so long as they are # unique entity = aifutils.make_entity(g, "http://www.test.edu/entities/1", system) aifutils.mark_attribute(g, entity, interchange_ontology.Irrealis) aifutils.mark_attribute(g, entity, interchange_ontology.Negated) aifutils.mark_attribute(g, entity, interchange_ontology.Hedged) aifutils.mark_attribute( g, entity, interchange_ontology.VideoJustificationChannelPicture) self.new_file(g, "test_create_an_entity_with_add_invalid_attribute.ttl") self.dump_graph( g, "Invalid: Semantic Attribute for Entity can only be must be aida:Generic" )
def test_create_a_relation_argument_add_attribute(self): #g = get_initialized_graph() g = aifutils.make_graph() # every AIF needs an object for the system responsible for creating it system = aifutils.make_system_with_uri( g, "http://www.test.edu/testSystem") bob = aifutils.make_entity(g, "http://www.test.edu/entites/person/Bob", system) maryland = aifutils.make_entity( g, "http://www.test.edu/entites/place/Maryland", system) aifutils.mark_type(g, "http://www.test.edu/assertions/bobIsAPerson", bob, ldc_ontology.PER, system, 1.0) aifutils.mark_type( g, "http://www.test.edu/assertions/marylandIsALocation", maryland, ldc_ontology.LOC_Position_Region, system, 1.0) # we make a resource for the event itself relationBobLiveInMD = aifutils.make_relation( g, "http://www.test.edu/relationss/bobLivesInMaryland", system) argument1 = aifutils.mark_as_argument( g, relationBobLiveInMD, ldc_ontology.Physical_Resident_Resident, bob, system, 1) aifutils.mark_attribute(g, argument1, interchange_ontology.Generic) self.new_file(g, "test_create_a_relation_argument_add_attribute.ttl") self.dump_graph( g, "Invalid: Relation Argument cannot have aida:Attribute")
def test_create_an_entity_with_uncertainty_about_its_type(self): g = aifutils.make_graph() g.bind('ldcOnt', SEEDLING_TYPES_NIST.uri) # every AIF needs an object for the system responsible for creating it system = aifutils.make_system_with_uri( g, "http://www.test.edu/testSystem") entity = aifutils.make_entity(g, "http://www.test.edu/entities/1", system) entity_is_a_person = aifutils.mark_type( g, "http://www.test.org/assertions/1", entity, SEEDLING_TYPES_NIST.Person, system, 0.5) entity_is_an_organization = aifutils.mark_type( g, "http://www.test.org/assertions/2", entity, SEEDLING_TYPES_NIST.Organization, system, 0.2) aifutils.mark_text_justification(g, [entity, entity_is_a_person], "NYT_ENG_201181231", 42, 143, system, 0.6) aifutils.mark_text_justification(g, [entity, entity_is_an_organization], "NYT_ENG_201181231", 343, 367, system, 0.3) aifutils.mark_as_mutually_exclusive( g, { tuple([entity_is_a_person]): 0.5, tuple([entity_is_an_organization]): 0.2 }, system, None) self.new_file( g, "test_create_an_entity_with_uncertainty_about_its_type.ttl") self.dump_graph(g, "Example of entity with uncertainty about type")
def test_create_a_simple_cluster_with_handle(self): g = aifutils.make_graph() g.bind('ldcOnt', SEEDLING_TYPES_NIST.uri) # every AIF needs an object for the system responsible for creating it system = aifutils.make_system_with_uri( g, 'http://www.test.edu/testSystem') # Two people, probably the same person vladimir_putin = aifutils.make_entity( g, "http://www.test.edu/entities/1", system) aifutils.mark_type(g, "http://www.test.edu/assertions/1", vladimir_putin, SEEDLING_TYPES_NIST.Person, system, 1.0) aifutils.mark_name(g, vladimir_putin, "Vladimir Putin") putin = aifutils.make_entity(g, "http://www.test.edu/entities/2", system) aifutils.mark_type(g, "http://www.test.edu/assertions/2", putin, SEEDLING_TYPES_NIST.Person, system, 1.0) aifutils.mark_name(g, putin, "Путин") # create a cluster with prototype putin_cluster = aifutils.make_cluster_with_prototype( g, "http://www.test.edu/clusters/1", vladimir_putin, system, "Vladimir Putin") # person 1 is definitely in the cluster, person 2 is probably in the cluster aifutils.mark_as_possible_cluster_member(g, putin, putin_cluster, 0.71, system) self.new_file(g, "test_create_a_simple_cluster_with_handle.ttl") self.dump_graph(g, "create a simple cluster with handle")
def test_create_an_event_with_ldc_time(self): g = aifutils.make_graph() g.bind('ldcOnt', SEEDLING_TYPES_NIST.uri) # every AIF needs an object for the system responsible for creating it system = aifutils.make_system_with_uri( g, "http://www.test.edu/testSystem") # create a start position event with unknown start and end time event_start_position = aifutils.make_event( g, "http://www.test.edu/event/1", system) aifutils.mark_type(g, "http://www.test.edu/assertions/1", event_start_position, SEEDLING_TYPES_NIST['Personnel.StartPosition'], system, 1.0) unknown = LDCTimeComponent(LDCTimeType.UNKNOWN, None, None, None) endBefore = LDCTimeComponent(LDCTimeType.BEFORE, "2016", None, None) aifutils.mark_ldc_time(g, event_start_position, unknown, endBefore, system) # create an attack event with an unknown start date, but definite end date event_attack_unknown = aifutils.make_event( g, "http://www.test.edu/event/2", system) aifutils.mark_type(g, "http://www.test.edu/assertions/2", event_attack_unknown, SEEDLING_TYPES_NIST['Conflict.Attack'], system, 1.0) start = LDCTimeComponent(LDCTimeType.AFTER, "2014", "--02", None) end = LDCTimeComponent(LDCTimeType.ON, "2014", "--02", "---21") aifutils.mark_ldc_time(g, event_attack_unknown, start, end, system) self.new_file(g, "test_create_an_event_with_ldc_time.ttl") self.dump_graph(g, "create an event with LDCTime")
def test_event_missing_type(self): # having mulitple type assertions in case of uncertainty is ok, but there must always be at # least one type assertion g = aifutils.make_graph() system = aifutils.make_system_with_uri( g, "http://www.test.edu/testSytem") aifutils.make_event(g, "http://www.test.edu/events/1", system) self.dump_graph(g, "Invalid: Event missing type")
def test_make_entity(self): g = make_graph() system = make_system_with_uri(g, "http://www.test.edu/system") entity = make_entity(g, "http://www.test.edu/entities/1", system) type_assertion = mark_type(g, "http://www.test.edu/assertions/1", entity, AIDA_PROGRAM_ONTOLOGY.Person, system, 1.0) mark_text_justification(g, [entity, type_assertion], "NYT_ENG_20181231", 42, 143, system, 0.973) self.dump_graph(g, "Example of creating an entity") self.assertEqual([type_assertion], get_type_assertions(g, entity))
def test_create_an_event_argument_add_invalid_attribute(self): #g = get_initialized_graph() g = aifutils.make_graph() # every AIF needs an object for the system responsible for creating it system = aifutils.make_system_with_uri( g, "http://www.test.edu/testSystem") # we make a resource for the event itself event = aifutils.make_event(g, "http://www.test.edu/events/1", system) # mark the event as a Personnel.Elect event; type is encoded separately so we can express # uncertainty about type aifutils.mark_type(g, "http://www.test.edu/assertions/5", event, ldc_ontology.Personnel_Elect, system, 1.0) # create the two entities involved in the event electee = aifutils.make_entity(g, "http://www.test.edu/entities/1", system) aifutils.mark_type(g, "http://www.test.edu/assertions/6", electee, ldc_ontology.PER, system, 1.0) election_country = aifutils.make_entity( g, "http://www.test.edu/entities/2", system) aifutils.mark_type(g, "http://www.test.edu/assertions/7", election_country, ldc_ontology.GPE, system, 1.0) # link those entities to the event argument1 = aifutils.mark_as_argument( g, event, ldc_ontology.Personnel_Elect_Candidate, electee, system, 0.785) argument2 = aifutils.mark_as_argument( g, event, ldc_ontology.Personnel_Elect_Place, election_country, system, 0.589) aifutils.mark_attribute(g, argument1, interchange_ontology.Irrealis) aifutils.mark_attribute(g, argument1, interchange_ontology.Generic) aifutils.mark_attribute( g, argument2, interchange_ontology.VideoJustificationChannelPicture) aifutils.mark_attribute( g, argument2, interchange_ontology.VideoJustificationChannelSound) self.new_file( g, "test_create_an_event_argument_add_invalid_attribute.ttl") self.dump_graph( g, "Invalid: Semantic Attribute for Event Argument, must be aida:Negated, aida:Hedged" )
def test_create_an_entity_with_information_justification(self): g = aifutils.make_graph() g.bind('ldcOnt', SEEDLING_TYPES_NIST.uri) # every AIF needs an object for the system responsible for creating it system = aifutils.make_system_with_uri( g, 'http://www.test.edu/testSystem') # Two people, probably the same person vladimir_putin = aifutils.make_entity( g, "http://www.test.edu/entities/1", system) aifutils.mark_name(g, vladimir_putin, "Vladimir Putin") type_assertion = aifutils.mark_type( g, "http://www.test.org/assertions/1", vladimir_putin, SEEDLING_TYPES_NIST.Person, system, 1.0) text_justification_1 = aifutils.mark_text_justification( g, [vladimir_putin, type_assertion], "HC00002Z0", 0, 10, system, 1.0) aifutils.mark_informative_justification(g, vladimir_putin, text_justification_1) putin = aifutils.make_entity(g, "http://www.test.edu/entities/2", system) aifutils.mark_type(g, "http://www.test.edu/assertions/2", putin, SEEDLING_TYPES_NIST.Person, system, 1.0) aifutils.mark_name(g, putin, "Путин") # create a cluster with prototype putin_cluster = aifutils.make_cluster_with_prototype( g, "http://www.test.edu/clusters/1", vladimir_putin, system, "Vladimir Putin") text_justification_2 = aifutils.mark_text_justification( g, [putin, type_assertion], "HC00002Z0", 0, 10, system, 1.0) aifutils.mark_informative_justification(g, putin_cluster, text_justification_2) # person 1 is definitely in the cluster, person 2 is probably in the cluster aifutils.mark_as_possible_cluster_member(g, putin, putin_cluster, 0.71, system) self.new_file( g, "test_create_an_entity_and_cluster_with_informative_mention.ttl") self.dump_graph( g, "create an entity and cluster with informative mention")
def test_make_entity(self): g = aifutils.make_graph() system = aifutils.make_system_with_uri(g, "http://www.test.edu/system") entity = aifutils.make_entity(g, "http://www.test.edu/entities/1", system) type_assertion = aifutils.mark_type( g, "http://www.test.edu/assertions/1", entity, SEEDLING_TYPES_NIST.Person, system, 1.0) aifutils.mark_text_justification(g, [entity, type_assertion], "NYT_ENG_20181231", 42, 143, system, 0.973) self.new_file(g, "test_make_an_entity.ttl") self.dump_graph(g, "Example of creating an entity") self.assertEqual([type_assertion], aifutils.get_type_assertions(g, entity))
def test_confidence_outside_of_zero_one(self): g = aifutils.make_graph() system = aifutils.make_system_with_uri(g, "http://test.edu/testSystem") entity = aifutils.make_entity(g, "http://www.test.edu/entities/1", system) aifutils.mark_type( g, "http://www.test.org/assertions/1", #illegal confidence value - not in [0.0, 1.0] entity, AIDA_PROGRAM_ONTOLOGY.Person, system, 100.0) self.dump_graph(g, "Invalid: Confidence outside of zero to one")
def test_non_type_used_as_type(self): g = aifutils.make_graph() system = aifutils.make_system_with_uri( g, "http://www.test.edu/testSystem") entity = aifutils.make_entity(g, "http://www.test.edu/entities/1", system) aifutils.mark_type( g, "http://www.test.edu/typeAssertion/1", entity, # use a blank node as teh bogus entity type BNode(), system, 1.0) self.dump_graph(g, "Invalid: Non type used as type")
def test_create_an_entity_with_image_justification_and_vector(self): g = aifutils.make_graph() g.bind('ldcOnt', SEEDLING_TYPES_NIST.uri) # every AIF needs an object for the system responsible for creating it system = aifutils.make_system_with_uri( g, "http://www.test.edu/testSystem") # it doesn't matter what URI we give entities, events, etc. so long as they are # unique entity = aifutils.make_entity(g, "http://www.test.edu/entities/1", system) # in order to allow uncertainty about the type of an entity, we don't mark an # entity's type directly on the entity, but rather make a separate assertion for it # its URI doesn't matter either type_assertion = aifutils.mark_type( g, "http://www.test.org/assertions/1", entity, SEEDLING_TYPES_NIST.Person, system, 1.0) # the justification provides the evidence for our claim about the entity's type # we attach this justification to both the type assertion and the entity object # itself, since it provides evidence both for the entity's existence and its type. # in TA1 -> TA2 communications, we attach confidences at the level of justifications # let's suppose we also have evidence from an image bb1 = Bounding_Box((123, 45), (167, 98)) aifutils.mark_image_justification(g, [entity, type_assertion], "NYT_ENG_20181231_03", bb1, system, 0.123) # also we can link this entity to something in an external KB aifutils.link_to_external_kb(g, entity, "freebase.FOO", system, .398) vec = { "vector_type": "http://www.test.edu/systemX/personVector", "vector_data": [2.0, 7.5, 0.2, 8.1] } # let's mark our entity with some arbitrary system-private data. You can attach such data # to nearly anything aifutils.mark_private_data_with_vector(g, entity, system, vec) self.new_file( g, "test_create_an_entity_with_image_justification_and_vector.ttl") self.dump_graph( g, "Example of entity with image justification and vector")
def test_create_an_event_add_invalid_attribute(self): #g = get_initialized_graph() g = aifutils.make_graph() # every AIF needs an object for the system responsible for creating it system = aifutils.make_system_with_uri( g, "http://www.test.edu/testSystem") # we make a resource for the event itself event = aifutils.make_event(g, "http://www.test.edu/events/1", system) aifutils.mark_attribute( g, event, interchange_ontology.VideoJustificationChannelPicture) self.new_file(g, "test_create_an_event_add_invalid_attribute.ttl") self.dump_graph( g, "Invalid: Semantic Attribute for Event, must be aida:Negated, aida:Hedged, aida:Irrealis, aida:Generic" )
def test_justification_missing_confidence(self): g = aifutils.make_graph() system = aifutils.make_system_with_uri(g, "http://test.edu/testSystem") entity = aifutils.make_entity(g, "http://www.test.edu/events/1", system) # create justification without the required confidence justification = BNode() g.add((justification, RDF.type, AIDA_ANNOTATION.TextJustification)) g.add((justification, AIDA_ANNOTATION.source, Literal("FOO", datatype=XSD.string))) g.add((justification, AIDA_ANNOTATION.startOffset, Literal(14, datatype=XSD.integer))) g.add((justification, AIDA_ANNOTATION.endOffsetInclusive, Literal(56, datatype=XSD.integer))) g.add((justification, AIDA_ANNOTATION.system, system)) g.add((entity, AIDA_ANNOTATION.justifiedBy, justification)) self.dump_graph(g, "Invalid: Justification missing confidence")
def test_create_a_relation_add_invalid_attribute(self): #g = get_initialized_graph() g = aifutils.make_graph() # every AIF needs an object for the system responsible for creating it system = aifutils.make_system_with_uri( g, "http://www.test.edu/testSystem") # we make a resource for the event itself relation = aifutils.make_relation( g, "https://github.com/NextCenturyCorporation/AIDA-Interchange-Format/LdcAnnotations#R779959.00004", system) aifutils.mark_attribute( g, relation, interchange_ontology.VideoJustificationChannelPicture) self.new_file(g, "test_create_a_relation_add_invalid_attribute.ttl") self.dump_graph( g, "Invalid: Semantic Attribute for Relation, must be aida:Negated, aida:Hedged, aida:Irrealis, aida:Generic" )
def test_create_seedling_event_with_event_argument_uri(self): g = aifutils.make_graph() # every AIF needs an object for the system responsible for creating it system = aifutils.make_system_with_uri( g, "http://www.test.edu/testSystem") # we make a resource for the event itself event = aifutils.make_event(g, "http://www.test.edu/events/1", system) # mark the event as a Personnel.Elect event; type is encoded separately so we can express # uncertainty about type event_type_string = "Personnel.Elect" aifutils.mark_type(g, "http://www.test.edu/assertions/5", event, SEEDLING_TYPES_NIST[event_type_string], system, 1.0) # create the two entities involved in the event electee = aifutils.make_entity(g, "http://www.test.edu/entities/1", system) aifutils.mark_type(g, "http://www.test.edu/assertions/7", electee, SEEDLING_TYPES_NIST.Person, system, 1.0) election_country = aifutils.make_entity( g, "http://www.test.edu/entities/2", system) aifutils.mark_type(g, "http://www.test.edu/assertions/7", election_country, SEEDLING_TYPES_NIST.GeopoliticalEntity, system, 1.0) # link those entities to the event aifutils.mark_as_argument( g, event, SEEDLING_TYPES_NIST[event_type_string] + "_Elect", electee, system, .785, "http://www.test.edu/eventArgument/1") aifutils.mark_as_argument( g, event, SEEDLING_TYPES_NIST[event_type_string] + "_Place", election_country, system, .589, "http://www.test.edu/eventArgument/2") self.new_file( g, "test_create_seedling_event_with_event_argument_uri.ttl") self.dump_graph(g, "Example of seedling event with event assertion URI")
def test_create_a_cluster_with_link_and_confidence(self): g = aifutils.make_graph() g.bind('ldcOnt', SEEDLING_TYPES_NIST.uri) # every AIF needs an object for the system responsible for creating it system = aifutils.make_system_with_uri( g, "http://www.test.edu/testSystem") putin = aifutils.make_entity(g, "http://www.test.edu/entities/1", system) aifutils.mark_type(g, "http://www.test.edu/assertions/1", putin, SEEDLING_TYPES_NIST.Person, system, 1.0) aifutils.mark_name(g, putin, "Путин") vladimir_putin = aifutils.make_entity( g, "http://www.test.edu/entities/2", system) aifutils.mark_type(g, "http://www.test.edu/assertions/2", vladimir_putin, SEEDLING_TYPES_NIST.Person, system, 1.0) aifutils.mark_name(g, vladimir_putin, "Vladimir Putin") # create a cluster with prototype putin_cluster = aifutils.make_cluster_with_prototype( g, "http://www.test.edu/clusters/1", vladimir_putin, system, "Vladimir Putin") # person 1 is definitely in the cluster, person 2 is probably in the cluster aifutils.mark_as_possible_cluster_member(g, putin, putin_cluster, 1.0, system) aifutils.mark_as_possible_cluster_member(g, vladimir_putin, putin_cluster, 0.71, system) # also we can link this entity to something in an external KB aifutils.link_to_external_kb(g, putin_cluster, "freebase.FOO", system, .398) self.new_file(g, "test_create_a_cluster_with_link_and_confidence.ttl") self.dump_graph(g, "create a cluster with link and confidence")
def test_create_an_event(self): g = aifutils.make_graph() g.bind('ldcOnt', SEEDLING_TYPES_NIST.uri) # every AIF needs an object for the system responsible for creating it system = aifutils.make_system_with_uri( g, "http://www.test.edu/testSystem") # we make a resource for the event itself event = aifutils.make_event(g, "http://www.test.edu/events/1", system) # mark the event as a Personnel.Elect event; type is encoded separately so we can express # uncertainty about type aifutils.mark_type(g, "http://www.test.edu/assertions/5", event, SEEDLING_TYPES_NIST['Personnel.Elect'], system, 1.0) # create the two entities involved in the event electee = aifutils.make_entity(g, "http://www.test.edu/entities/1", system) aifutils.mark_type(g, "http://www.test.edu/assertions/6", electee, SEEDLING_TYPES_NIST.Person, system, 1.0) election_country = aifutils.make_entity( g, "http://www.test.edu/entities/2", system) aifutils.mark_type(g, "http://www.test.edu/assertions/7", election_country, SEEDLING_TYPES_NIST.GeopoliticalEntity, system, 1.0) # link those entities to the event arg = URIRef(SEEDLING_TYPES_NIST['Personnel.Elect'] + "_Elect") aifutils.mark_as_argument(g, event, arg, electee, system, 0.785) arg2 = URIRef(SEEDLING_TYPES_NIST['Personnel.Elect'] + "_Place") aifutils.mark_as_argument(g, event, arg2, election_country, system, 0.589) self.new_file(g, "test_create_an_event.ttl") self.dump_graph(g, "Example of creating an event")
class ScalingTest(): filename = "scalingdata.ttl" LDC_NS = "https://github.com/NextCenturyCorporation/AIDA-Interchange-Format/LdcAnnotations#" g = aifutils.make_graph() system = aifutils.make_system_with_uri(g, 'http://www.test.edu/testSystem') # beginning sizes of data entity_count = 128000 event_count = 38400 relations_count = 200 assertion_count = 1500 entity_index = 1 event_index = 1 relation_index = 1 assertion_index = 1 # utility values, so taht we can easily create random things abc = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" entity_resource_list = [] def run_scaling_test(self): for ii in range(200): print("trying : Entity count: ", self.entity_count) start_time = int(round(time.time() * 1000)) self.run_single_test() end_time = int(round(time.time() * 1000)) duration = (start_time - end_time) / 1000 size = 0 f = open(self.filename) if os.path.isfile(self.filename): size = os.path.getsize(self.filename) size /= 1000000. print("Size of output: ", size, " duration: ", duration) # double size of entities and events after every iteration self.increase() def run_single_test(self): # adds entities and events and wrties to file for ii in range(self.entity_count): self.add_entity() for i in range(self.event_count): self.add_event() self.write_to_file(self.filename) def add_entity(self): # add an entity entity_resource = aifutils.make_entity(self.g, self.get_entity_uri(), self.system) self.entity_resource_list.append(entity_resource) # sometimes add hasName, textValue, or numericValue, NOTE: This does not check type!!! rand = random.random() if rand < 0.15: aifutils.mark_name(self.g, entity_resource, self.get_random_string(5)) elif rand < 0.3: aifutils.mark_text_value(self.g, entity_resource, self.get_random_string(7)) elif rand < 0.4: aifutils.mark_numeric_value_as_double(self.g, entity_resource, random.random()) # set the type type_to_use = self.get_random_entity() type_assertion = aifutils.mark_type(self.g, self.get_assertion_uri(), entity_resource, type_to_use, self.system, 1.0) self.add_justification_and_private_data(type_assertion) def add_event(self): # add an event event_resource = aifutils.make_event(self.g, self.get_event_uri(), self.system) # add the type type_resource = self.EVENT_TYPES[ random.randint(0, len(self.EVENT_TYPES)) - 1] type_assertion = aifutils.mark_type(self.g, self.get_assertion_uri(), event_resource, type_resource, self.system, 1.0) self.add_justification_and_private_data(type_assertion) # make two arguments for i in range(2): arg = URIRef(type_resource + self.get_random_suffix()) argument = aifutils.mark_as_argument(self.g, event_resource, arg, self.get_random_entity(), self.system, 0.785, self.get_assertion_uri()) self.add_justification_and_private_data(argument) def add_justification_and_private_data(self, resource): docId = self.get_random_doc_id() # justify the type assertion aifutils.mark_text_justification(self.g, resource, docId, 1029, 1033, self.system, 0.973) # add some private data aifutils.mark_private_data(self.g, resource, "{ 'provenance' : '" + docId + "' }", self.system) def increase(self): self.entity_count *= 2 self.event_count *= 2 def get_uri(self, uri): return self.LDC_NS + uri def get_entity_uri(self): self.entity_index += 1 return self.get_uri("entity-" + str(self.entity_index)) def get_event_uri(self): self.event_index += 1 return self.get_uri("event-" + str(self.event_index)) def get_relation_uri(self): self.relation_index += 1 return self.get_uri("relation-" + str(self.relation_index)) def get_assertion_uri(self): self.assertion_index += 1 return self.get_uri("assertion-" + str(self.assertion_index)) def get_test_system_uri(self): return self.get_uri("testSystem") def get_random_doc_id(self): s = "" if random.getrandbits(1) == 1: s += "IC" else: s += "HC" s += "00" s += "" + str((random.randint(0, 1000))) s += self.abc[random.randint(0, len(self.abc) - 1)] s += self.abc[random.randint(0, len(self.abc) - 1)] s += self.abc[random.randint(0, len(self.abc) - 1)] return s def get_random_string(self, length): s = "" for i in range(0, length): s += self.abc[random.randint(0, len(self.abc) - 1)] return s def get_random_entity(self): return self.ENTITY_TYPES[random.randint(0, len(self.ENTITY_TYPES) - 1)] def get_random_suffix(self): s = "_" + self.ROLES[random.randint(0, len(self.ROLES) - 1)] return s def write_to_file(self, testname): print("\n\n", testname, "\n\n") file = open(testname, "w") file.write(str(self.g.serialize(format='turtle'))) file.close() ENTITY_TYPES = [ seedling_ontology.Person, seedling_ontology.Organization, seedling_ontology.Location, seedling_ontology.Facility, seedling_ontology.GeopoliticalEntity, seedling_ontology.FillerType, seedling_ontology.Business_DeclareBankruptcy, seedling_ontology.Business_End, seedling_ontology.Business_Merge, seedling_ontology.Business_Start, seedling_ontology.Conflict_Attack, seedling_ontology.Conflict_Demonstrate, seedling_ontology.Contact_Broadcast, seedling_ontology.Contact_Contact, seedling_ontology.Contact_Correspondence, seedling_ontology.Contact_Meet, seedling_ontology.Existence_DamageDestroy, seedling_ontology.Government_Agreements, seedling_ontology.Government_Legislate, seedling_ontology.Government_Spy, seedling_ontology.Government_Vote, seedling_ontology.Inspection_Artifact, seedling_ontology.Inspection_People, seedling_ontology.Justice_Acquit, seedling_ontology.Justice_Appeal, seedling_ontology.Justice_ArrestJail, seedling_ontology.Justice_ChargeIndict, seedling_ontology.Justice_Convict, seedling_ontology.Justice_Execute, seedling_ontology.Justice_Extradite, seedling_ontology.Justice_Fine, seedling_ontology.Justice_Investigate, seedling_ontology.Justice_Pardon, seedling_ontology.Justice_ReleaseParole, seedling_ontology.Justice_Sentence, seedling_ontology.Justice_Sue, seedling_ontology.Justice_TrialHearing, seedling_ontology.Life_BeBorn, seedling_ontology.Life_Die, seedling_ontology.Life_Divorce, seedling_ontology.Life_Injure, seedling_ontology.Life_Marry, seedling_ontology.Manufacture_Artifact, seedling_ontology.Movement_TransportArtifact, seedling_ontology.Movement_TransportPerson, seedling_ontology.Personnel_Elect, seedling_ontology.Personnel_EndPosition, seedling_ontology.Personnel_Nominate, seedling_ontology.Personnel_StartPosition, seedling_ontology.Transaction_Transaction, seedling_ontology.Transaction_TransferControl, seedling_ontology.Transaction_TransferMoney, seedling_ontology.Transaction_TransferOwnership, seedling_ontology.GeneralAffiliation_APORA, seedling_ontology.GeneralAffiliation_MORE, seedling_ontology.GeneralAffiliation_OPRA, seedling_ontology.GeneralAffiliation_OrganizationWebsite, seedling_ontology.GeneralAffiliation_PersonAge, seedling_ontology.GeneralAffiliation_Sponsorship, seedling_ontology.Measurement_Count, seedling_ontology.OrganizationAffiliation_EmploymentMembership, seedling_ontology.OrganizationAffiliation_Founder, seedling_ontology.OrganizationAffiliation_InvestorShareholder, seedling_ontology.OrganizationAffiliation_Leadership, seedling_ontology.OrganizationAffiliation_Ownership, seedling_ontology.OrganizationAffiliation_StudentAlum, seedling_ontology.PartWhole_Membership, seedling_ontology.PartWhole_Subsidiary, seedling_ontology.PersonalSocial_Business, seedling_ontology.PersonalSocial_Family, seedling_ontology.PersonalSocial_RoleTitle, seedling_ontology.PersonalSocial_Unspecified, seedling_ontology.Physical_LocatedNear, seedling_ontology.Physical_OrganizationHeadquarter, seedling_ontology.Physical_OrganizationLocationOrigin, seedling_ontology.Physical_Resident ] EVENT_TYPES = [ seedling_ontology.Business_DeclareBankruptcy, seedling_ontology.Business_End, seedling_ontology.Business_Merge, seedling_ontology.Business_Start, seedling_ontology.Conflict_Attack, seedling_ontology.Conflict_Demonstrate, seedling_ontology.Contact_Broadcast, seedling_ontology.Contact_Contact, seedling_ontology.Contact_Correspondence, seedling_ontology.Contact_Meet, seedling_ontology.Existence_DamageDestroy, seedling_ontology.Government_Agreements, seedling_ontology.Government_Legislate, seedling_ontology.Government_Spy, seedling_ontology.Government_Vote, seedling_ontology.Inspection_Artifact, seedling_ontology.Inspection_People, seedling_ontology.Justice_Acquit, seedling_ontology.Justice_Appeal, seedling_ontology.Justice_ArrestJail, seedling_ontology.Justice_ChargeIndict, seedling_ontology.Justice_Convict, seedling_ontology.Justice_Execute, seedling_ontology.Justice_Extradite, seedling_ontology.Justice_Fine, seedling_ontology.Justice_Investigate, seedling_ontology.Justice_Pardon, seedling_ontology.Justice_ReleaseParole, seedling_ontology.Justice_Sentence, seedling_ontology.Justice_Sue, seedling_ontology.Justice_TrialHearing, seedling_ontology.Life_BeBorn, seedling_ontology.Life_Die, seedling_ontology.Life_Divorce, seedling_ontology.Life_Injure, seedling_ontology.Life_Marry, seedling_ontology.Manufacture_Artifact, seedling_ontology.Movement_TransportArtifact, seedling_ontology.Movement_TransportPerson, seedling_ontology.Personnel_Elect, seedling_ontology.Personnel_EndPosition, seedling_ontology.Personnel_Nominate, seedling_ontology.Personnel_StartPosition, seedling_ontology.Transaction_Transaction, seedling_ontology.Transaction_TransferControl, seedling_ontology.Transaction_TransferMoney, seedling_ontology.Transaction_TransferOwnership ] ROLES = [ "Attacker", "Instrument", "Place", "Target", "Time", "Broadcaster", "Place", "Time", "Participant", "Place", "Participant", "Time", "Participant", "Affiliate", "Affiliation", "Affiliation", "Person", "Entity", "Sponsor", "Defendant", "Prosecutor", "Adjudicator", "Defendant", "Agent", "Instrument", "Victim", "Artifact", "Manufacturer", "Agent", "Artifact", "Destination", "Instrument", "Origin", "Time", "Agent", "Destination", "Instrument", "Origin", "Person", "Employee", "Organization", "Person", "Entity", "Place", "Beneficiary", "Giver", "Recipient", "Thing", "Time" ]
def add_filetype(g, one_unique_ke, filetype_str): system = aifutils.make_system_with_uri(g, "http://www.rpi.edu/fileType") file_type_json_object = {'fileType': filetype_str} file_type_json_content = json.dumps(file_type_json_object) aifutils.mark_private_data(g, one_unique_ke, file_type_json_content, system)
def append_private_data(language_id, input_folder, lorelei_links, freebase_links, fine_grained_entity_dict, translation_dict, offset_vec, offset_entity_corefer, ltf_dir, doc_id_to_root_dict=None, eng_elmo=None, ukr_elmo=None, rus_elmo=None, trigger_vec=None, offset_event_vec=None): # count_flag = 0 for one_file in os.listdir(input_folder): # print(one_file) if ".ttl" not in one_file: continue # ent_json_list = dict() one_file_id = one_file.replace(".ttl", "") if doc_id_to_root_dict is not None: root_docid = doc_id_to_root_dict[one_file_id] else: root_docid = "" one_file_path = os.path.join(input_folder, one_file) output_file = os.path.join(output_folder, one_file) turtle_content = open(one_file_path).read() g = Graph().parse(data=turtle_content, format='ttl') # # append file type # system = aifutils.make_system_with_uri(g, "http://www.rpi.edu/fileType") # unique_ke_list = list() # for p, s, o in g: # if "http://www.rpi.edu" in o: # if p not in unique_ke_list: # unique_ke_list.append(p) # for one_unique_ke in unique_ke_list: # file_type_json_object = {'fileType': language_id} # file_type_json_content = json.dumps(file_type_json_object) # aifutils.mark_private_data(g, one_unique_ke, file_type_json_content, system) # append EDL fine_grained_data # system = aifutils.make_system_with_uri(g, "http://www.rpi.edu/EDL_FineGrained") # for p, s, o in g: # if 'linkTarget' not in s: # continue # linking_info = o.split(':')[-1] # if linking_info in fine_grained_entity_dict: # fine_grained_json_object = fine_grained_entity_dict[linking_info] # fine_grained_json_content = json.dumps(fine_grained_json_object) # aifutils.mark_private_data(g, p, fine_grained_json_content, system) # entities = [] events = [] args = [] for s, p, o in g: # print(s, p, o) if 'type' in p and 'Entity' in o: add_filetype(g, s, language_id) # entities.append(s) elif 'type' in p and 'Event' in o: add_filetype(g, s, language_id) events.append(s) elif 'type' in p and ('Statement' in o or 'Relation' in o): add_filetype(g, s, language_id) args.append(s) # get entities without TITLE/TIME, etc entity_type_ttl = defaultdict() for entity in g.subjects(predicate=RDF.type, object=AIDA.Entity): for assertion in g.subjects(object=entity, predicate=RDF.subject): object_assrt = g.value(subject=assertion, predicate=RDF.object) predicate_assrt = g.value(subject=assertion, predicate=RDF.predicate) # only predicate ==`type` if predicate_assrt == RDF.type: entity_type = object_assrt.split('#')[-1] parent_type = entity_type.split('.')[0] if parent_type in [ 'PER', 'ORG', 'GPE', 'LOC', 'FAC', 'WEA', 'VEH', 'SID', 'CRM', 'BAL' ]: entity_type_ttl[entity] = entity_type entity_offset_map = defaultdict(list) event_offset_map = defaultdict(list) for s, p, o in g: if 'justifiedBy' in p: if s in entity_type_ttl: #entities: entity_offset_map[s].append(o) if s in events: event_offset_map[s].append(o) offset_info = dict( ) # offset_info[offset]['startOffset']=start, offset_info[offset]['endOffsetInclusive']=end for s, p, o in g: p = p.toPython().split('#')[-1] if 'startOffset' == p or 'endOffsetInclusive' == p or 'source' == p: if s not in offset_info: offset_info[s] = dict() offset_info[s][p] = o # trigger_emb_lists = defaultdict() for event in event_offset_map: event_vecs = [] for one_offset in event_offset_map[event]: if len(offset_info[one_offset]) != 3: continue for one_offset_type in offset_info[one_offset]: if 'startOffset' in one_offset_type: start_offset = int( offset_info[one_offset][one_offset_type]) elif 'endOffsetInclusive' in one_offset_type: end_offset = int( offset_info[one_offset][one_offset_type]) elif 'source' in one_offset_type: docid = offset_info[one_offset][ one_offset_type].toPython() # search_key = "%s:%d-%d" % (docid, start_offset, end_offset) if trigger_vec: # event embedding from files for ent_vec_type in offset_event_vec[docid]: for (vec_start, vec_end, vec) in offset_event_vec[docid][ent_vec_type]: # print(vec_start, vec_end, vec) if vec_start >= start_offset and vec_end <= end_offset: # print(search_key) event_vecs.append(vec) else: # event embedding from elmo vec = generate_trigger_emb(docid, start_offset, end_offset, ltf_dir, language_id, eng_elmo, ukr_elmo, rus_elmo) if vec is not None: event_vecs.append(vec) if len(event_vecs) > 0: # print(event_vecs) trigger_emb_avg = np.mean(event_vecs, axis=0) evt_vec_json_object = { 'event_vec': ','.join(['%0.8f' % dim for dim in trigger_emb_avg]) } evt_vec_json_content = json.dumps(evt_vec_json_object) system = aifutils.make_system_with_uri( g, "http://www.rpi.edu/event_representations") aifutils.mark_private_data(g, event, evt_vec_json_content, system) # trigger_emb_lists[event] = evt_vec_json_content # unique_events = [] # for one_bnode in event_offset_map: # if event_offset_map[one_bnode] in unique_events: # continue # if len(offset_info[one_bnode]) != 2: # continue # for one_offset_type in offset_info[one_bnode]: # if 'startOffset' in one_offset_type: # start_offset = int(offset_info[one_bnode][one_offset_type]) # elif 'endOffsetInclusive' in one_offset_type: # end_offset = int(offset_info[one_bnode][one_offset_type]) # search_key = "%s:%d-%d" % (one_file_id, start_offset, end_offset) # # # append event time # try: # time = time_map[search_key] # time_norm = time_map_norm[search_key] # system = aifutils.make_system_with_uri(g, "http://www.rpi.edu/event_time") # time_json_dict = {'time': time, 'time_norm': time_norm} # time_json_content = json.dumps(time_json_dict) # aifutils.mark_private_data(g, event_offset_map[one_bnode], time_json_content, system) # unique_events.append(event_offset_map[one_bnode]) # except KeyError: # pass # # continue unique_entities = set() # ###### old ########### change to one entity may have multiple offsets # for one_bnode in entity_offset_map: # if len(offset_info[one_bnode]) != 2: # continue # for one_offset_type in offset_info[one_bnode]: # if 'startOffset' in one_offset_type: # start_offset = int(offset_info[one_bnode][one_offset_type]) # elif 'endOffsetInclusive' in one_offset_type: # end_offset = int(offset_info[one_bnode][one_offset_type]) # search_key = "%s:%d-%d" % (one_file_id, start_offset, end_offset) for entity in entity_offset_map: entity_vecs = [] entity_type = entity_type_ttl[entity] coarse_type = entity_type.split('.')[0] for one_offset in entity_offset_map[entity]: if len(offset_info[one_offset]) != 3: continue for one_offset_type in offset_info[one_offset]: if 'startOffset' in one_offset_type: start_offset = int( offset_info[one_offset][one_offset_type]) elif 'endOffsetInclusive' in one_offset_type: end_offset = int( offset_info[one_offset][one_offset_type]) elif 'source' in one_offset_type: docid = offset_info[one_offset][ one_offset_type].toPython() search_key = "%s:%d-%d" % (docid, start_offset, end_offset) # append links if entity not in unique_entities: # append Freebase linking result try: if search_key in freebase_links: freebase_link = freebase_links[search_key] system = aifutils.make_system_with_uri( g, "http://www.rpi.edu/EDL_Freebase") # freebase_json_dict = {'freebase_link': freebase_link} # freebase_json_content = json.dumps(freebase_json_dict) # aifutils.mark_private_data(g, one_offset, freebase_json_content, system) freebase_json_content = json.dumps( {'freebase_link': freebase_link}) aifutils.mark_private_data(g, entity, freebase_json_content, system) # append EDL fine_grained_data linking_info = sorted( freebase_link.items(), key=lambda x: x[1]['average_score'], reverse=True)[0][0] # linking_info = freebase_link.split(':')[-1] if linking_info in fine_grained_entity_dict: fine_grained_json_object = fine_grained_entity_dict[ linking_info] fine_grained_json_content = json.dumps({ 'finegrained_type': fine_grained_json_object }) system = aifutils.make_system_with_uri( g, "http://www.rpi.edu/EDL_FineGrained") aifutils.mark_private_data( g, entity, fine_grained_json_content, system) # append multiple confidence if search_key in lorelei_links: # lorelei_link_dict = lorelei_links[search_key] # print(lorelei_link_dict) system = aifutils.make_system_with_uri( g, "http://www.rpi.edu/EDL_LORELEI_maxPool") p_link = URIRef( 'https://tac.nist.gov/tracks/SM-KBP/2019/ontologies/InterchangeOntology#link' ) p_link_target = URIRef( 'https://tac.nist.gov/tracks/SM-KBP/2019/ontologies/InterchangeOntology#linkTarget' ) for lorelei_link_ttl in g.objects( subject=entity, predicate=p_link): link_target = str( g.value(subject=lorelei_link_ttl, predicate=p_link_target) ) #.split(':')[-1] # print('link_target', link_target) if search_key not in lorelei_links or link_target not in lorelei_links[ search_key]: #??? confidence = 0.001 else: confidence = lorelei_links[search_key][ link_target] # print('confidence', confidence) aifutils.mark_confidence( g, lorelei_link_ttl, confidence, system) # append corefer info if search_key in offset_entity_corefer: # print(one_file_id, search_key, entity_ttl, offset_entity_corefer[search_key]) if coarse_type in offset_entity_corefer[ search_key]: corefer_id = offset_entity_corefer[search_key][ coarse_type] # print(search_key, id) system = aifutils.make_system_with_uri( g, "http://www.rpi.edu/coreference") # cipher = AES.new(secret_key, AES.MODE_ECB) # never use ECB in strong systems obviously corefer_id_encoded = base64.b64encode( ('%s%s' % (root_docid, corefer_id) ).encode('utf-8')).decode("utf-8") corefer_json_dict = { 'coreference': corefer_id_encoded } #str(uuid.UUID(corefer_id).hex)} corefer_json_content = json.dumps( corefer_json_dict) aifutils.mark_private_data( g, entity, corefer_json_content, system) # save entity unique_entities.add(entity) except KeyError as e: traceback.print_exc() pass # append translation (mention-level) if 'en' in language_id: continue try: translation_list = translation_dict[search_key] system = aifutils.make_system_with_uri( g, "http://www.rpi.edu/EDL_Translation") translation_json_dict = {'translation': translation_list} translation_json_content = json.dumps( translation_json_dict) aifutils.mark_private_data(g, one_offset, translation_json_content, system) except KeyError: pass # continue # collect entity vectors (mention-level) for ent_vec_type in offset_vec[docid]: for (vec_start, vec_end, vec) in offset_vec[docid][ent_vec_type]: # print(vec_start, vec_end, vec) if vec_start >= start_offset and vec_end <= end_offset: # print(search_key) entity_vecs.append(vec) # append entity vectors (mention-level) if len(entity_vecs) > 0: entity_vec = np.average(entity_vecs, 0) # print(entity, entity_vec) system = aifutils.make_system_with_uri( g, "http://www.rpi.edu/entity_representations") ent_vec_json_object = { 'entity_vec_space': ent_vec_type, 'entity_vec': ','.join(['%0.8f' % dim for dim in entity_vec]) } ent_vec_json_content = json.dumps(ent_vec_json_object) # print(ent_vec_json_content) aifutils.mark_private_data(g, entity, ent_vec_json_content, system) # ent_json_list[entity] = ent_vec_json_content break g.serialize(destination=output_file, format='turtle') print("Now we have append the private data for %s" % language_id)
def append_private_data(language_id, input_folder, lorelei_links, freebase_links, fine_grained_entity_dict, translation_dict): count_flag = 0 for one_file in os.listdir(input_folder): # print(one_file) if ".ttl" not in one_file: continue ent_json_list = dict() one_file_id = one_file.replace(".ttl", "") one_file_path = os.path.join(input_folder, one_file) output_file = os.path.join(output_folder, one_file) turtle_content = open(one_file_path).read() g = Graph().parse(data=turtle_content, format='ttl') # append EDL fine_grained_data # system = aifutils.make_system_with_uri(g, "http://www.rpi.edu/EDL_FineGrained") # for p, s, o in g: # if 'linkTarget' not in s: # continue # linking_info = o.split(':')[-1] # if linking_info in fine_grained_entity_dict: # fine_grained_json_object = fine_grained_entity_dict[linking_info] # fine_grained_json_content = json.dumps(fine_grained_json_object) # aifutils.mark_private_data(g, p, fine_grained_json_content, system) entities = [] events = [] args = [] for s, p, o in g: # print(s, p, o) if 'type' in p and 'Entity' in o: add_filetype(g, s, language_id) entities.append(s) elif 'type' in p and 'Event' in o: add_filetype(g, s, language_id) events.append(s) elif 'type' in p and ('Statement' in o or 'Relation' in o): add_filetype(g, s, language_id) args.append(s) # print('entities: ', len(entities)) # print('events: ', len(events)) entity_offset_map = defaultdict(list) event_offset_map = defaultdict(list) for s, p, o in g: if 'justifiedBy' in p: if s in entities: entity_offset_map[s].append(o) if s in events: event_offset_map[s].append(o) # ###### old ########### change to one entity may have multiple offsets # entity_offset_map = {} # event_offset_map = {} # for s, p, o in g: # if 'justifiedBy' in p: # if s in entities: # entity_offset_map[o] = s # elif s in events: # event_offset_map[o] = s offset_info = dict( ) # offset_info[offset]['startOffset']=start, offset_info[offset]['endOffsetInclusive']=end for s, p, o in g: p = p.toPython().split('#')[-1] if 'startOffset' == p or 'endOffsetInclusive' == p or 'source' == p: if s not in offset_info: offset_info[s] = dict() offset_info[s][p] = o # unique_events = [] # for one_bnode in event_offset_map: # if event_offset_map[one_bnode] in unique_events: # continue # if len(offset_info[one_bnode]) != 2: # continue # for one_offset_type in offset_info[one_bnode]: # if 'startOffset' in one_offset_type: # start_offset = int(offset_info[one_bnode][one_offset_type]) # elif 'endOffsetInclusive' in one_offset_type: # end_offset = int(offset_info[one_bnode][one_offset_type]) # search_key = "%s:%d-%d" % (one_file_id, start_offset, end_offset) # # # append event time # try: # time = time_map[search_key] # time_norm = time_map_norm[search_key] # system = aifutils.make_system_with_uri(g, "http://www.rpi.edu/event_time") # time_json_dict = {'time': time, 'time_norm': time_norm} # time_json_content = json.dumps(time_json_dict) # aifutils.mark_private_data(g, event_offset_map[one_bnode], time_json_content, system) # unique_events.append(event_offset_map[one_bnode]) # except KeyError: # pass # # continue unique_entities = [] # ###### old ########### change to one entity may have multiple offsets # for one_bnode in entity_offset_map: # if len(offset_info[one_bnode]) != 2: # continue # for one_offset_type in offset_info[one_bnode]: # if 'startOffset' in one_offset_type: # start_offset = int(offset_info[one_bnode][one_offset_type]) # elif 'endOffsetInclusive' in one_offset_type: # end_offset = int(offset_info[one_bnode][one_offset_type]) # search_key = "%s:%d-%d" % (one_file_id, start_offset, end_offset) for entity in entity_offset_map: for one_offset in entity_offset_map[entity]: if len(offset_info[one_offset]) != 3: continue for one_offset_type in offset_info[one_offset]: if 'startOffset' in one_offset_type: start_offset = int( offset_info[one_offset][one_offset_type]) elif 'endOffsetInclusive' in one_offset_type: end_offset = int( offset_info[one_offset][one_offset_type]) elif 'source' in one_offset_type: docid = offset_info[one_offset][ one_offset_type].toPython() search_key = "%s:%d-%d" % (docid, start_offset, end_offset) # append entity vecs for (vec_start, vec_end, vec) in offset_vec[docid][ent_vec_type]: # print(vec_start, vec_end, vec) if vec_start >= start_offset and vec_end <= end_offset: # print(search_key) entity_vecs.append(vec) # append links if entity_offset_map[one_bnode] not in unique_entities: unique_entities.append(entity_offset_map[one_bnode]) # append Freebase linking result try: if search_key in freebase_links: freebase_link = freebase_links[search_key] system = aifutils.make_system_with_uri( g, "http://www.rpi.edu/EDL_Freebase") # freebase_json_dict = {'freebase_link': freebase_link} # freebase_json_content = json.dumps(freebase_json_dict) # aifutils.mark_private_data(g, one_bnode, freebase_json_content, system) freebase_json_content = json.dumps( {'freebase_link': freebase_link}) aifutils.mark_private_data( g, entity_offset_map[one_bnode], freebase_json_content, system) # append EDL fine_grained_data linking_info = sorted( freebase_link.items(), key=lambda x: x[1]['average_score'], reverse=True)[0][0] # linking_info = freebase_link.split(':')[-1] if linking_info in fine_grained_entity_dict: fine_grained_json_object = fine_grained_entity_dict[ linking_info] fine_grained_json_content = json.dumps({ 'finegrained_type': fine_grained_json_object }) system = aifutils.make_system_with_uri( g, "http://www.rpi.edu/EDL_FineGrained") aifutils.mark_private_data( g, entity_offset_map[one_bnode], fine_grained_json_content, system) # append multiple confidence if search_key in lorelei_links: lorelei_link_dict = lorelei_links[search_key] # print(lorelei_link_dict) system = aifutils.make_system_with_uri( g, "http://www.rpi.edu/EDL_LORELEI_maxPool") p_link = URIRef( 'https://tac.nist.gov/tracks/SM-KBP/2019/ontologies/InterchangeOntology#link' ) p_link_target = URIRef( 'https://tac.nist.gov/tracks/SM-KBP/2019/ontologies/InterchangeOntology#linkTarget' ) for lorelei_link_ttl in g.objects( subject=entity_offset_map[one_bnode], predicate=p_link): link_target = str( g.value(subject=lorelei_link_ttl, predicate=p_link_target) ) #.split(':')[-1] # print('link_target', link_target) if search_key not in lorelei_links or link_target not in lorelei_links[ search_key]: #??? confidence = 0.001 else: confidence = lorelei_links[search_key][ link_target] # print('confidence', confidence) aifutils.mark_confidence( g, lorelei_link_ttl, confidence, system) except KeyError as e: # print('error', str(e)) traceback.print_exc() pass # continue # append translation if 'en' in language_id: continue try: translation_list = translation_dict[search_key] system = aifutils.make_system_with_uri( g, "http://www.rpi.edu/EDL_Translation") translation_json_dict = {'translation': translation_list} translation_json_content = json.dumps( translation_json_dict) aifutils.mark_private_data(g, one_bnode, translation_json_content, system) except KeyError: pass # continue g.serialize(destination=output_file, format='ttl') print("Now we have append the private data for %s" % language_id)
def get_initialized_graph_and_system(): graph = aifutils.make_graph() graph.bind('test', prefix) system = aifutils.make_system_with_uri(graph, "http://www.test.edu/testSystem") return graph, system
def transferAIF(p_id): #for k,p_id in enumerate(parent_dict): # Todo test # if (k<8): # continue # print('k',k,p_id) g = Graph() #load rpi graph if exists if p_id in txt_mention_ttl_list: turtle_path = os.path.join(txt_mention_ttl_path, p_id + '.ttl') turtle_content = open(turtle_path).read() g.parse(data=turtle_content, format='n3') #load and merge cu graph if exists if p_id in cu_ttl_list: turtle_path = os.path.join(cu_ttl_path, p_id + '.ttl') turtle_content = open(turtle_path).read() g.parse(data=turtle_content, format='n3') #load and merge cu graph for instance matching if exists if p_id in cu_ttl_ins_list: turtle_path = os.path.join(cu_ttl_ins_path, p_id + '.ttl') turtle_content = open(turtle_path).read() g.parse(data=turtle_content, format='n3') sys_instance_matching = aifutils.make_system_with_uri( g, cu_pref + 'Systems/Instance-Matching/ResNet152') sys_grounding = aifutils.make_system_with_uri( g, cu_pref + 'Systems/Grounding/ELMo-PNASNET') usc_sys_grounding = aifutils.make_system_with_uri( g, usc_pref + 'Systems/ZSGrounder') #find vision and text entities sbj_all = set(g.subjects()) img_entities = {} keyframe_entities = {} ltf_entities = {} for sbj in sbj_all: sbj_name = sbj.toPython() if cu_objdet_pref in sbj_name: if sbj.__class__ == rdflib.term.URIRef: if 'JPG' in sbj_name: img_id = '/'.join(sbj_name.split('/')[-2:]) img_entities[img_id] = sbj elif 'Keyframe' in sbj_name: kfrm_id = '/'.join(sbj_name.split('/')[-2:]) keyframe_entities[kfrm_id] = sbj elif rpi_entity_pref in sbj_name: if sbj.__class__ == rdflib.term.URIRef and rpi_entity_pref in sbj_name: ltf_entities[sbj_name] = sbj # Done # if p_id in []:#['IC0011TIB']: # continue # print('k',k,p_id) # if (g==None): # print('p_id', k, p_id) ##adding private data to entities for cu grounding #images for key in img_entities: dtype = 'jpg' #instance features ftype = 'instance' data_instance = get_features(key, dtype, ftype) #semantic features ftype = 'semantic' data_semantic = get_features(key, dtype, ftype) #aggregation j_d_i = json.dumps({'columbia_vector_instance_v1.0': data_instance}) j_d_s = json.dumps({'columbia_vector_grounding_v1.0': data_semantic}) entity = img_entities[key] aifutils.mark_private_data(g, entity, j_d_i, sys_instance_matching) aifutils.mark_private_data(g, entity, j_d_s, sys_grounding) #keyframes for key in keyframe_entities: dtype = 'keyframe' #instance features ftype = 'instance' data_instance = get_features(key, dtype, ftype) #semantic features ftype = 'semantic' data_semantic = get_features(key, dtype, ftype) #aggregation j_d_i = json.dumps({'columbia_vector_instance_v1.0': data_instance}) j_d_s = json.dumps({'columbia_vector_grounding_v1.0': data_semantic}) entity = keyframe_entities[key] aifutils.mark_private_data(g, entity, j_d_i, sys_instance_matching) aifutils.mark_private_data(g, entity, j_d_s, sys_grounding) cnt_img = {} cnt_boxO = {} cnt_ltf = {} #add text features, grounding, linking for key in ltf_entities: if key not in grounding_dict: continue entity_name = None USC_GROUNDING = 'usc_vision' in grounding_dict[key][ 'grounding'].values() if not USC_GROUNDING: # print('our grounding') #text features j_d_t = json.dumps({ 'columbia_vector_text_v1.0': grounding_dict[key]['textual_features'].tolist() }) entity_ltf = ltf_entities[key] aifutils.mark_private_data(g, entity_ltf, j_d_t, sys_grounding) #type and name of entity to be linked type_rdf = grounding_dict[key]['type_rdf'] entity_name = grounding_dict[key]['name'] grndg_file_type = grounding_dict[key]['source_type'] if entity_name is None: continue #keep track of entities with same names for avoiding clustering overlap if entity_name in cnt_ltf: cnt_ltf[entity_name] += 1 else: cnt_ltf[entity_name] = 1 clstr_prot_flag = False #cluster obj for entity_ltf not created yet #adding grounding bboxes as new entities for img_id in grounding_dict[key]['grounding']: if img_id == 'system': continue grnd = grounding_dict[key]['grounding'][img_id] for ii, bbox in enumerate(grnd['bbox']): if img_id in cnt_img: #to keep track of cnt of bbox of same image cnt_img[img_id] += 1 else: cnt_img[img_id] = 1 #add grounding bbox as entity score = grnd['bbox_score'][ii] if not USC_GROUNDING: type_eid = cu_grndg_type_pref + f"{grndg_file_type}/{img_id.split('.')[0]}/{cnt_img[img_id]}/ERE" ent_eid = cu_grndg_ent_pref + f"{grndg_file_type}/{img_id.split('.')[0]}/{cnt_img[img_id]}" entity_grnd = aifutils.make_entity(g, ent_eid, sys_grounding) type_assertion = aifutils.mark_type( g, type_eid, entity_grnd, type_rdf, sys_grounding, score) elif USC_GROUNDING: type_eid = usc_grndg_type_pref + f"{grndg_file_type}/{img_id.split('.')[0]}/{cnt_img[img_id]}/ERE" ent_eid = usc_grndg_ent_pref + f"{grndg_file_type}/{img_id.split('.')[0]}/{cnt_img[img_id]}" entity_grnd = aifutils.make_entity(g, ent_eid, usc_sys_grounding) type_assertion = aifutils.mark_type( g, type_eid, entity_grnd, type_rdf, usc_sys_grounding, score) # Done: # 1. add if for the branches for image and keyframe. # 2. add aifutils.mark_keyframe_video_justification # 3. check output # aifutils.mark_keyframe_video_justification(g, [entity, type_assertion], "NYT_ENG_20181231_03", "keyframe ID", # bb2, system, 0.234) # source: HC0005BR6_23 # print(img_id) # Done: # merge usc_grounding dict # add usc_grounding entities and clusters # Test # print("type_assertion",type_assertion, img_id) bb = Bounding_Box((bbox[0], bbox[1]), (bbox[2], bbox[3])) if not USC_GROUNDING: if 'JPG' in type_assertion: imgid = img_id.split('.')[0] justif = aifutils.mark_image_justification( g, [entity_grnd, type_assertion], imgid, bb, sys_grounding, score) # elif 'Keyframe' in type_assertion: imgid = img_id.split('.')[0].split('_')[0] kfid = img_id.split('.')[0].split( '_' )[1] # it should be keyframe image id or keyframe number justif = aifutils.mark_keyframe_video_justification(g, [entity, type_assertion], imgid, kfid, \ bb, sys_grounding, score) elif USC_GROUNDING: imgid = img_id.split('.')[0] justif = aifutils.mark_image_justification( g, [entity_grnd, type_assertion], imgid, bb, usc_sys_grounding, score) else: print( '[Merge Error] in Main Body: the type_assertion is wrong' ) aifutils.add_source_document_to_justification(g, justif, p_id) aifutils.mark_informative_justification(g, entity_grnd, justif) if not USC_GROUNDING: grounding_features = grnd['grounding_features'][ii].tolist( ) instance_features = grnd['instance_features'][ii].tolist() #add private data to this very bbox entity j_d_g = json.dumps( {'columbia_vector_grounding_v1.0': grounding_features}) j_d_i = json.dumps( {'columbia_vector_instance_v1.0': instance_features}) aifutils.mark_private_data(g, entity_grnd, j_d_g, sys_grounding) aifutils.mark_private_data(g, entity_grnd, j_d_i, sys_instance_matching) #### add clusters # Grounding Cluster # Done: filtering about punctuation # translator = str.maketrans(string.punctuation, '_'*len(string.punctuation),'' ) # 'entity_name'.translate(translator) entity_name_tmp = '_'.join(entity_name.split(' ')) entity_name_in_IRI = "".join( x if x.isalpha() or x.isdigit() or x == '_' else '-' for x in entity_name_tmp) # '_'.join(entity_name.split(' ')) #gbbox entity to rpi entity if not USC_GROUNDING: if not clstr_prot_flag: #create cluster if not present clst_eid = cu_grndg_clstr_txt_pref + f"{entity_name_in_IRI}/{cnt_ltf[entity_name]}" clusterObj = aifutils.make_cluster_with_prototype( g, clst_eid, entity_ltf, sys_grounding) clstr_prot_flag = True #cluster current bbox with current ltf_entity score = grnd['men-img-score'][ii] aifutils.mark_as_possible_cluster_member( g, entity_grnd, clusterObj, score, sys_grounding) # Done: add prototype as member aifutils.mark_as_possible_cluster_member( g, entity_ltf, clusterObj, 1, sys_grounding) elif USC_GROUNDING: if not clstr_prot_flag: #create cluster if not present clst_eid = usc_grndg_clstr_txt_pref + f"{entity_name_in_IRI}/{cnt_ltf[entity_name]}" clusterObj = aifutils.make_cluster_with_prototype( g, clst_eid, entity_ltf, usc_sys_grounding) clstr_prot_flag = True #cluster current bbox with current ltf_entity score = grnd['men-img-score'][ii] aifutils.mark_as_possible_cluster_member( g, entity_grnd, clusterObj, score, usc_sys_grounding) # Done: add prototype as member aifutils.mark_as_possible_cluster_member( g, entity_ltf, clusterObj, 1, usc_sys_grounding) # BoundingBox Overlap Cluster (Instance Matching) #gbbox entity to objdet entity for instance matching if not USC_GROUNDING: clstr_prot_b2b_flag = False for jj, img_id_link in enumerate( grnd['link_ids'][ii]): #for all objdet bboxes if img_id_link in img_entities: entity_link_img = img_entities[img_id_link] elif img_id_link in keyframe_entities: entity_link_img = keyframe_entities[img_id_link] else: continue if img_id in cnt_boxO: #to keep track of cnt of bbox overlap for same image cnt_boxO[img_id] += 1 else: cnt_boxO[img_id] = 1 if not clstr_prot_b2b_flag: clst_b2b_eid = cu_grndg_clstr_img_pref + f"{img_id.split('.')[0]}/{cnt_boxO[img_id]}" clusterObj_b2b = aifutils.make_cluster_with_prototype( g, clst_b2b_eid, entity_grnd, sys_grounding) # sys_instance_matching clstr_prot_b2b_flag = True score = grnd['link_scores'][ii][ jj] #IoU of grnd bbox and objdet bbox aifutils.mark_as_possible_cluster_member( g, entity_link_img, clusterObj_b2b, score, sys_grounding) # sys_instance_matching # Done: add prototype as member aifutils.mark_as_possible_cluster_member( g, entity_grnd, clusterObj_b2b, 1, sys_grounding) # sys_instance_matching # Check Point: merged_ttl_D2 # /data/bobby/AIDA/M18_copy/data/merged_ttl/merged_ttl_D2/ # IC0011VEA.ttl # GroundingBox with open(os.path.join(merged_graph_path, p_id + '.ttl'), 'w') as fout: serialization = BytesIO() g.serialize(destination=serialization, format='turtle') fout.write(serialization.getvalue().decode('utf-8')) #sys.stdout.write('Key {}/{} \r'.format(k,len(parent_dict))) sys.stdout.flush()
for imgid in det_results_vid: vidid = imgid.split('_')[0] id_set_vid.add(vidid) # In[18]: kb_dict = {} entity_dict = {} event_dict = {} for root_doc in root_to_leaf: g = aifutils.make_graph() system_pa = aifutils.make_system_with_uri( g, "http://www.columbia.edu/AIDA/USC/Systems/ObjectDetection/FasterRCNN/PascalVOC" ) system_co = aifutils.make_system_with_uri( g, "http://www.columbia.edu/AIDA/DVMM/Systems/ObjectDetection/FasterRCNN-NASNet/COCO" ) system_oi = aifutils.make_system_with_uri( g, "http://www.columbia.edu/AIDA/DVMM/Systems/ObjectDetection/FasterRCNN-InceptionResNet/OpenImages" ) system_ws = aifutils.make_system_with_uri( g, "http://www.columbia.edu/AIDA/DVMM/Systems/ObjectDetection/MITWeaklySupervised-ResNet/OpenImages" ) for imgid in id_set_jpg & set(root_to_leaf[root_doc]):
def test_create_hierarchical_cluster(self): # we want to say that the cluster of Trump entities might be the same as the cluster of the president entities g = aifutils.make_graph() g.bind('ldcOnt', SEEDLING_TYPES_NIST.uri) #every AIF needs an object for the system responsible for creating it system = aifutils.make_system_with_uri( g, 'http://www.test.edu/testSystem') # create president entities president_usa = aifutils.make_entity(g, "http://www.test.edu/entities/1", system) aifutils.mark_type(g, "http://www.test.edu/assertions/1", president_usa, SEEDLING_TYPES_NIST.GeopoliticalEntity, system, 1.0) aifutils.mark_name(g, president_usa, "the president") new_president = aifutils.make_entity(g, "http://www.test.edu/entities/2", system) aifutils.mark_type(g, "http://www.test.edu/assertions/2", president_usa, SEEDLING_TYPES_NIST.GeopoliticalEntity, system, 1.0) aifutils.mark_name(g, president_usa, "the newly-inaugurated president") president_45 = aifutils.make_entity(g, "http://www.test.edu/entities/3", system) aifutils.mark_type(g, "http://www.test.edu/assertions/3", president_usa, SEEDLING_TYPES_NIST.GeopoliticalEntity, system, 1.0) aifutils.mark_name(g, president_usa, "the 45th president") # cluster president entities president_cluster = aifutils.make_cluster_with_prototype( g, "http://www.test.edu/clusters/president", president_usa, system) aifutils.mark_as_possible_cluster_member(g, president_usa, president_cluster, 1, system) aifutils.mark_as_possible_cluster_member(g, new_president, president_cluster, .9, system) aifutils.mark_as_possible_cluster_member(g, president_45, president_cluster, .9, system) # create Trump entities donald_trump = aifutils.make_entity(g, "http://www.test.edu/entities/4", system) aifutils.mark_type(g, "http://www.test.edu/assertions/4", president_usa, SEEDLING_TYPES_NIST.Person, system, 1.0) aifutils.mark_name(g, president_usa, "Donald Trump") trump = aifutils.make_entity(g, "http://www.test.edu/entities/5", system) aifutils.mark_type(g, "http://www.test.edu/assertions/5", president_usa, SEEDLING_TYPES_NIST.Person, system, 1.0) aifutils.mark_name(g, president_usa, "Trump") # cluster trump entities trump_cluster = aifutils.make_cluster_with_prototype( g, "http://www.test.edu/clusters/trump", donald_trump, system) aifutils.mark_as_possible_cluster_member(g, donald_trump, trump_cluster, 1, system) aifutils.mark_as_possible_cluster_member(g, trump, trump_cluster, .9, system) aifutils.mark_as_possible_cluster_member(g, president_cluster, trump_cluster, .6, system) self.new_file(g, "test_create_hierarchical_cluster.ttl") self.dump_graph(g, "Seedling hierarchical cluster")
def test_simple_hypothesis_with_importance_with_cluster(self): g = aifutils.make_graph() g.bind('ldcOnt', SEEDLING_TYPES_NIST.uri) # every AIF needs an object for the system responsible for creating it system = aifutils.make_system_with_uri( g, 'http://www.test.edu/testSystem') # buk document entity buk = aifutils.make_entity(g, "E779954.00005", system) buk_is_weapon = aifutils.mark_type( g, "https://tac.nist.gov/tracks/SM-KBP/2019/LdcAnnotations#assertion-1", buk, SEEDLING_TYPES_NIST.Weapon, system, 1.0) # buk cross-document-entity buk_kb_entity = aifutils.make_entity( g, "https://tac.nist.gov/tracks/SM-KBP/2019/LdcAnnotations#E0084", system) buk_kb_is_weapon = aifutils.mark_type( g, "https://tac.nist.gov/tracks/SM-KBP/2019/LdcAnnotations#assertion-2", buk_kb_entity, SEEDLING_TYPES_NIST.Weapon, system, 1.0) # russia document entity russia = aifutils.make_entity( g, "https://tac.nist.gov/tracks/SM-KBP/2019/LdcAnnotations#E779954.00004", system) russia_is_gpe = aifutils.mark_type( g, "https://tac.nist.gov/tracks/SM-KBP/2019/LdcAnnotations#assertion-3", russia, SEEDLING_TYPES_NIST.GeopoliticalEntity, system, 1.0) # cluster buk buk_cluster = aifutils.make_cluster_with_prototype( g, "https://tac.nist.gov/tracks/SM-KBP/2019/LdcAnnotations#cluster-1", buk_kb_entity, system) buk_is_clustered = aifutils.mark_as_possible_cluster_member( g, buk, buk_cluster, .9, system) # add importance to the cluster - test negative importance aifutils.mark_importance(g, buk_cluster, -70.234) # Russia owns buk relation buk_is_russian = aifutils.make_relation( g, "https://tac.nist.gov/tracks/SM-KBP/2019/LdcAnnotations#R779959.00004", system) aifutils.mark_type( g, "https://tac.nist.gov/tracks/SM-KBP/2019/LdcAnnotations#assertion-4", buk_is_russian, SEEDLING_TYPES_NIST['GeneralAffiliation.APORA'], system, 1.0) buk_argument = aifutils.mark_as_argument( g, buk_is_russian, SEEDLING_TYPES_NIST['GeneralAffiliation.APORA_Affiliate'], buk, system, 1.0) russia_argument = aifutils.mark_as_argument( g, buk_is_russian, SEEDLING_TYPES_NIST['GeneralAffiliation.APORA_Affiliation'], russia, system, 1.0) # add importance to the statements aifutils.mark_importance(g, buk_argument, 100.0) # add large importance aifutils.mark_importance(g, russia_argument, 9.999999e6) # Russia owns buk hypothesis buk_is_russian_hypothesis = aifutils.make_hypothesis( g, "https://tac.nist.gov/tracks/SM-KBP/2019/LdcAnnotations#hypothesis-1", [ buk, buk_is_weapon, buk_is_clustered, buk_is_russian, buk_argument, russia_argument ], system) # test highest possible importance value aifutils.mark_importance(g, buk_is_russian_hypothesis, sys.float_info.max) self.new_file(g, "test_simple_hypothesis_with_importance_cluster.ttl") self.dump_graph(g, "Simple hypothesis with importance with cluster")
def test_create_compound_justification(self): g = aifutils.make_graph() system = aifutils.make_system_with_uri(g, "http://www.test.edu/system") event = aifutils.make_event( g, "https://tac.nist.gov/tracks/SM-KBP/2019/LdcAnnotations#V779961.00010", system) event_type_assertion = aifutils.mark_type( g, "https://tac.nist.gov/tracks/SM-KBP/2019/LdcAnnotations#assertion-1", event, SEEDLING_TYPES_NIST['Personnel.Elect'], system, 1.0) # create the two entities involved in the event putin = aifutils.make_entity( g, "https://tac.nist.gov/tracks/SM-KBP/2019/LdcAnnotations#E781167.00398", system) person_type_assertion = aifutils.mark_type( g, "https://tac.nist.gov/tracks/SM-KBP/2019/LdcAnnotations#assertion-2", putin, SEEDLING_TYPES_NIST.Person, system, 1.0) russia = aifutils.make_entity( g, "https://tac.nist.gov/tracks/SM-KBP/2019/LdcAnnotations#E779954.00004", system) gpe_type_assertion = aifutils.mark_type( g, "https://tac.nist.gov/tracks/SM-KBP/2019/LdcAnnotations#assertion-3", russia, SEEDLING_TYPES_NIST.GeopoliticalEntity, system, 1.0) # link those entities to the event electee_argument = aifutils.mark_as_argument( g, event, SEEDLING_TYPES_NIST['Personnel.Elect_Elect'], putin, system, 0.785, "https://tac.nist.gov/tracks/SM-KBP/2019/LdcAnnotations#assertion-4" ) place_argument = aifutils.mark_as_argument( g, event, SEEDLING_TYPES_NIST['Personnel.Elect_Place'], russia, system, 0.589, "https://tac.nist.gov/tracks/SM-KBP/2019/LdcAnnotations#assertion-5" ) # the justification provides the evidence for our claim about the entity's type # we attach this justification to both the type assertion and the entity object itself, since it provides # evidence both for the entity's existence and its type. # in TA1 -> TA2 communications, we attach confidences at the level of justifications text_justification = aifutils.make_text_justification( g, "NYT_ENG_20181231", 42, 143, system, 0.973) aifutils.mark_justification(g, person_type_assertion, text_justification) aifutils.mark_justification(g, putin, text_justification) aifutils.add_source_document_to_justification( g, text_justification, "NYT_PARENT_ENG_20181231_03") bb1 = Bounding_Box((123, 45), (167, 98)) # let's suppose we also have evidence from an image image_justification = aifutils.make_image_justification( g, "NYT_ENG_20181231_03", bb1, system, 0.123) bb2 = Bounding_Box((234, 56), (345, 101)) # and also a video where the entity appears in a keyframe keyframe_video_justification = aifutils.make_keyframe_video_justification( g, "NYT_ENG_20181231_03", "keyframe ID", bb2, system, .0234) #and also a video where the entity does not appear in a keyframe shot_video_justification = aifutils.make_shot_video_justification( g, "SOME_VIDEO", "some shot ID", system, 0.487) # and even audio! audio_justification = aifutils.make_audio_justification( g, "NYT_ENG_201181231", 4.566, 9.876, system, 0.789) # combine all justifications into single justifiedBy triple with new confidence aifutils.mark_compound_justification(g, [electee_argument], [ text_justification, image_justification, keyframe_video_justification, shot_video_justification, audio_justification ], system, .321) aifutils.mark_compound_justification( g, [place_argument], [text_justification, image_justification], system, 0.543) self.new_file(g, "test_create_compound_justification.ttl") self.dump_graph(g, "Example of compound justification")