def test_combination_modalities_long(self): score0 = MultipleScore() score0.add_score(1, None, None) score1 = MultipleScore() score1.add_score(0.5, None, None) generated_fact0 = GeneratedFact("parent", "go", "to Paris", "TBC[many]", False, score0, MultipleSourceOccurrence.from_raw( "parents have many children", None, 1)) generated_fact1 = GeneratedFact("parent", "go to", "Paris", "", False, score1, MultipleSourceOccurrence.from_raw( "parents have children", None, 1)) inputs = self.empty_input.add_generated_facts([generated_fact0, generated_fact1]) fact_combinor = FactCombinor(None) inputs = fact_combinor.process(inputs) self.assertEqual(1, len(inputs.get_generated_facts())) self.assertIn("TBC[many]", inputs.get_generated_facts()[0].get_modality().get()) self.assertIn("parents have many children x#x1", str(inputs.get_generated_facts()[ 0].get_sentence_source())) self.assertIn("parents have children x#x1", str(inputs.get_generated_facts()[ 0].get_sentence_source())) self.assertEqual("go to", inputs.get_generated_facts()[ 0].get_predicate())
def test_combination(self): score0 = MultipleScore() score0.add_score(1, None, None) score1 = MultipleScore() score1.add_score(0.5, None, None) score2 = MultipleScore() score2.add_score(0.7, None, None) generated_fact0 = GeneratedFact("lion", "eat", "zebra", "", False, score0, MultipleSourceOccurrence.from_raw("lions eat zebras", None, 1)) mso = MultipleSourceOccurrence() mso.add_raw("lions eat zebras", None, 2) mso.add_raw("lions eat small zebras", None, 1) generated_fact1 = GeneratedFact("lion", "eat", "zebra", "", False, score1, mso) generated_fact2 = GeneratedFact("lion", "eat", "zebra", "", False, score2, MultipleSourceOccurrence.from_raw("lions eat small zebras", None, 1)) new_gfs = [generated_fact0, generated_fact1, generated_fact2] inputs = self.empty_input.add_generated_facts(new_gfs) fact_combinor = FactCombinor(None) inputs = fact_combinor.process(inputs) self.assertEqual(1, len(inputs.get_generated_facts())) self.assertEqual(3, len(inputs.get_generated_facts()[0].get_score().scores)) sentence = str(inputs.get_generated_facts()[0].get_sentence_source()) self.assertIn("lions eat zebras", sentence) self.assertIn("lions eat small zebras", sentence) self.assertIn("x#x3", sentence) self.assertIn("x#x2", sentence)
def test_save(self): inputs = Inputs() subjects = [Subject("baba"), Subject("coko")] patterns = [ PatternGoogle("why are"), PatternGoogle("Why are", "hasProperty", True) ] mmr = MultipleModuleReference(ModuleReferenceInterface("Module0")) mmr.add_reference(ModuleReferenceInterface("Module1")) msr = MultipleSubmoduleReference( SubmoduleReferenceInterface("Submodule0")) msr.add_reference(SubmoduleReferenceInterface("Submodule0")) ms0 = MultipleScore() ms0.add_score(1.0, ModuleReferenceInterface("Module0"), SubmoduleReferenceInterface("Submodule0")) ms1 = MultipleScore() ms1.add_score(1.0, mmr, msr) ms1.add_score(0.5, ModuleReferenceInterface("Module1"), SubmoduleReferenceInterface("Submodule2")) mp0 = MultiplePattern() mp0.add_pattern(patterns[0]) mp1 = MultiplePattern() mp1.add_pattern(patterns[0]) mp1.add_pattern(patterns[1]) gfs = [ GeneratedFact( "baba", "is", "you", "sometimes", False, ms0, MultipleSourceOccurrence.from_raw("baba is you", msr, 1), mp0), GeneratedFact( "coko", "is", "dead", "always", True, ms1, MultipleSourceOccurrence.from_raw("toto is always dead", msr, 1), mp1) ] seeds = [ Fact("baba", "is", "us", None, False), Fact("coko", "are", "missing", "coucou", True) ] objects = [Object("missing"), Object("you")] inputs = inputs.replace_seeds(seeds) inputs = inputs.replace_patterns(patterns) inputs = inputs.replace_subjects(subjects) inputs = inputs.replace_generated_facts(gfs) inputs = inputs.replace_objects(objects) inputs.save("temp.json") inputs_read = inputs.load("temp.json") self.assertEqual(len(inputs.get_generated_facts()), len(inputs_read.get_generated_facts())) self.assertEqual(len(inputs.get_subjects()), len(inputs_read.get_generated_facts())) self.assertEqual(len(inputs.get_patterns()), len(inputs_read.get_patterns())) self.assertEqual(len(inputs.get_seeds()), len(inputs_read.get_seeds())) self.assertEqual(len(inputs.get_objects()), len(inputs_read.get_objects()))
def test_serialize_multiple_source_occurrence(self): msr = MultipleSubmoduleReference( SubmoduleReferenceInterface("Submodule0")) msr.add_reference(SubmoduleReferenceInterface("Submodule0")) mso = MultipleSourceOccurrence.from_raw("baba is you", msr, 1) print(mso.to_dict()) self.assertIsNotNone(json.dumps(mso.to_dict()))
def test_fact_transformation(self): gf = GeneratedFact( "elephant", "eat", "zebra", "", False, 1.0, MultipleSourceOccurrence.from_raw("elephants do not eat zebras", None, 1)) fact = gf.get_fact() self.assertEqual(fact.get_subject(), "elephant") self.assertEqual(fact.get_predicate(), "eat") self.assertEqual(fact.get_object(), "zebra") self.assertEqual(fact.is_negative(), False) gf = GeneratedFact( "elephant", "eat", "zebra", "", True, 1.0, MultipleSourceOccurrence.from_raw("elephants do not eat zebras", None, 1)) fact = gf.get_fact() self.assertEqual(fact.is_negative(), True)
def test_conceptual_caption(self): sc = ConceptualCaptionsComparatorSubmodule(None) self.empty_input = Inputs() self.dummy_reference = ReferencableInterface("DUMMY") dataset = [("elephant", "download", "baby", 0), ("elephant", "have", "tusks", 1), ("lion", "eat", "gazella", 0), ("penguin", "eat", "fish", 0), ("gorilla", "eat", "banana", 0), ("sky", "hasProperty", "blue", 0), ("computer", "is", "working", 1), ("raccoon", "hasProperty", "blue", 0)] subjects = { Subject("elephant"), Subject("penguin"), Subject("lion"), Subject("gorilla"), Subject("sky"), Subject("computer"), Subject("raccoon") } gfs = [] pos = 0 for subject, predicate, obj, truth in dataset: pos += 1 score = MultipleScore() if pos % 2 == 0: score.add_score( truth, self.dummy_reference, GoogleAutocompleteSubmodule(self.dummy_reference)) else: score.add_score( truth, self.dummy_reference, BingAutocompleteSubmodule(self.dummy_reference)) gfs.append( GeneratedFact(subject, predicate, obj, "", False, score, MultipleSourceOccurrence())) score2 = MultipleScore() score2.add_score(1, self.dummy_reference, GoogleAutocompleteSubmodule(self.dummy_reference)) gfs.append( GeneratedFact( "elephant", "be", "big", "", False, score2, MultipleSourceOccurrence.from_raw("elephants are big", None, 1))) inputs = self.empty_input.add_generated_facts(gfs).add_subjects( subjects) inputs = sc.process(inputs) self.assertEqual(len(dataset) + 1, len(inputs.get_generated_facts())) self.assertEqual( len(inputs.get_generated_facts()[0].get_score().scores), 2) self.assertNotAlmostEqual( inputs.get_generated_facts()[1].get_score().scores[1][0], 0, delta=1e-5)
def add_facts_to_generated_facts(self, generated_facts, subject, predicate, obj, modality, negative, score_based_on_ranking, suggestion): if suggestion[SUBJECT] not in subject: return multiple_score = MultipleScore() multiple_score.add_score(1.0, self._module_reference, reference_corenlp) multiple_score.add_score(score_based_on_ranking, self._module_reference, self) new_fact_corenlp = GeneratedFact( subject, predicate, obj, modality, negative, multiple_score, MultipleSourceOccurrence.from_raw(suggestion[0], self, 1), suggestion[2]) generated_facts.append(new_fact_corenlp)
def get_fact_from_simple_extraction(self, extraction, score, suggestion): negative = get_negativity(suggestion) or extraction[3] multiple_score = MultipleScore() multiple_score.add_score(score, self._module_reference, self) multiple_score.add_score(1.0, self._module_reference, reference_manual) new_fact = GeneratedFact( extraction[0], extraction[1], extraction[2], None, negative, # For the score, inverse the ranking (higher is # better) and add the confidence of the triple multiple_score, MultipleSourceOccurrence.from_raw(suggestion[0], self, 1), suggestion[2]) return new_fact
def _openie_from_file(self, suggestions): openie_reader = OpenIEReader() generated_facts = [] new_suggestions = [] for suggestion in suggestions: self.transforms_suggestion_into_batch_component( suggestion, new_suggestions) for suggestion in new_suggestions: sentence = suggestion[STATEMENT] facts = openie_reader.get_from_sentence(sentence) negative = get_negativity(suggestion) facts = [ fact for fact in facts if len(fact) > 0 and len(fact[0]) > 1 and len(fact[1]) > 1 and len(fact[2]) > 1 ] score_based_on_ranking = self.get_score_based_on_ranking( suggestion) facts = self._take_earliest_predicate(sentence, facts) for fact in facts: if suggestion[SUBJECT] not in fact[0]: continue try: score = float(fact[3].replace(",", ".")) except: logging.info( "Problem in score reading in openie5 reader with " + fact[3]) continue multiple_score = MultipleScore() multiple_score.add_score(score, self._module_reference, reference_openie5) multiple_score.add_score(score_based_on_ranking, self._module_reference, self) generated_facts.append( GeneratedFact( fact[0], fact[1], fact[2], "", negative, multiple_score, MultipleSourceOccurrence.from_raw(sentence, self, 1), suggestion[2])) del openie_reader return generated_facts
def test_combination(self): dataset = [("elephant", "download", "baby", 0), ("elephant", "climb", "trunk", 0), ("elephant", "bear", "baby", 1), ("elephant", "download this cute illustration with", "baby", 0), ("elephant", "be", "ear", 0), ("elephant", "fry", "ear", 0), ("elephant", "trek", "travel", 0), ("elephant", "forbid love in", "water", 0), ("elephant", "eat", "bark", 1), ("elephant", "have", "tusks", 1)] gfs = [] pos = 0 for subject, predicate, obj, truth in dataset: pos += 1 score = MultipleScore() if pos % 2 == 0: score.add_score( truth, self.dummy_reference, GoogleAutocompleteSubmodule(self.dummy_reference)) else: score.add_score( truth, self.dummy_reference, BingAutocompleteSubmodule(self.dummy_reference)) gfs.append( GeneratedFact(subject, predicate, obj, "", False, score, MultipleSourceOccurrence())) score2 = MultipleScore() score2.add_score(1, self.dummy_reference, GoogleAutocompleteSubmodule(self.dummy_reference)) gfs.append( GeneratedFact( "elephant", "be", "big", "", False, score2, MultipleSourceOccurrence.from_raw("elephants are big", None, 1))) inputs = self.empty_input.add_generated_facts(gfs) inputs = self.linear_combination.process(inputs) self.assertEqual(len(dataset) + 1, len(inputs.get_generated_facts()))
def test_combination_modalities(self): score0 = MultipleScore() score0.add_score(1, None, None) score1 = MultipleScore() score1.add_score(0.5, None, None) generated_fact0 = GeneratedFact("lion", "eat", "zebra", "some", False, score0, MultipleSourceOccurrence.from_raw("lions eat zebras", None, 1)) generated_fact1 = GeneratedFact("lion", "eat", "zebra", "often", False, score1, MultipleSourceOccurrence.from_raw("lions eat zebras", None, 1)) inputs = self.empty_input.add_generated_facts([generated_fact0, generated_fact1]) fact_combinor = FactCombinor(None) inputs = fact_combinor.process(inputs) self.assertEqual(1, len(inputs.get_generated_facts())) self.assertIn("some", inputs.get_generated_facts()[0].get_modality().get()) self.assertIn("often", inputs.get_generated_facts()[0].get_modality().get())