Пример #1
0
 def test_combination_modalities_long(self):
     score0 = MultipleScore()
     score0.add_score(1, None, None)
     score1 = MultipleScore()
     score1.add_score(0.5, None, None)
     generated_fact0 = GeneratedFact("parent", "go", "to Paris",
                                     "TBC[many]",
                                     False,
                                     score0,
                                     MultipleSourceOccurrence.from_raw(
                                         "parents have many children", None,
                                         1))
     generated_fact1 = GeneratedFact("parent", "go to", "Paris",
                                     "",
                                     False,
                                     score1,
                                     MultipleSourceOccurrence.from_raw(
                                         "parents have children", None, 1))
     inputs = self.empty_input.add_generated_facts([generated_fact0,
                                                    generated_fact1])
     fact_combinor = FactCombinor(None)
     inputs = fact_combinor.process(inputs)
     self.assertEqual(1, len(inputs.get_generated_facts()))
     self.assertIn("TBC[many]",
                   inputs.get_generated_facts()[0].get_modality().get())
     self.assertIn("parents have many children x#x1",
                   str(inputs.get_generated_facts()[
                           0].get_sentence_source()))
     self.assertIn("parents have children x#x1",
                   str(inputs.get_generated_facts()[
                           0].get_sentence_source()))
     self.assertEqual("go to",
                      inputs.get_generated_facts()[
                           0].get_predicate())
Пример #2
0
 def test_combination(self):
     score0 = MultipleScore()
     score0.add_score(1, None, None)
     score1 = MultipleScore()
     score1.add_score(0.5, None, None)
     score2 = MultipleScore()
     score2.add_score(0.7, None, None)
     generated_fact0 = GeneratedFact("lion", "eat", "zebra", "", False, score0,
                                     MultipleSourceOccurrence.from_raw("lions eat zebras", None, 1))
     mso = MultipleSourceOccurrence()
     mso.add_raw("lions eat zebras", None, 2)
     mso.add_raw("lions eat small zebras", None, 1)
     generated_fact1 = GeneratedFact("lion", "eat", "zebra", "", False, score1,
                                     mso)
     generated_fact2 = GeneratedFact("lion", "eat", "zebra", "", False, score2,
                                     MultipleSourceOccurrence.from_raw("lions eat small zebras", None, 1))
     new_gfs = [generated_fact0, generated_fact1, generated_fact2]
     inputs = self.empty_input.add_generated_facts(new_gfs)
     fact_combinor = FactCombinor(None)
     inputs = fact_combinor.process(inputs)
     self.assertEqual(1, len(inputs.get_generated_facts()))
     self.assertEqual(3, len(inputs.get_generated_facts()[0].get_score().scores))
     sentence = str(inputs.get_generated_facts()[0].get_sentence_source())
     self.assertIn("lions eat zebras", sentence)
     self.assertIn("lions eat small zebras", sentence)
     self.assertIn("x#x3", sentence)
     self.assertIn("x#x2", sentence)
Пример #3
0
 def test_save(self):
     inputs = Inputs()
     subjects = [Subject("baba"), Subject("coko")]
     patterns = [
         PatternGoogle("why are"),
         PatternGoogle("Why are", "hasProperty", True)
     ]
     mmr = MultipleModuleReference(ModuleReferenceInterface("Module0"))
     mmr.add_reference(ModuleReferenceInterface("Module1"))
     msr = MultipleSubmoduleReference(
         SubmoduleReferenceInterface("Submodule0"))
     msr.add_reference(SubmoduleReferenceInterface("Submodule0"))
     ms0 = MultipleScore()
     ms0.add_score(1.0, ModuleReferenceInterface("Module0"),
                   SubmoduleReferenceInterface("Submodule0"))
     ms1 = MultipleScore()
     ms1.add_score(1.0, mmr, msr)
     ms1.add_score(0.5, ModuleReferenceInterface("Module1"),
                   SubmoduleReferenceInterface("Submodule2"))
     mp0 = MultiplePattern()
     mp0.add_pattern(patterns[0])
     mp1 = MultiplePattern()
     mp1.add_pattern(patterns[0])
     mp1.add_pattern(patterns[1])
     gfs = [
         GeneratedFact(
             "baba", "is", "you", "sometimes", False, ms0,
             MultipleSourceOccurrence.from_raw("baba is you", msr, 1), mp0),
         GeneratedFact(
             "coko", "is", "dead", "always", True, ms1,
             MultipleSourceOccurrence.from_raw("toto is always dead", msr,
                                               1), mp1)
     ]
     seeds = [
         Fact("baba", "is", "us", None, False),
         Fact("coko", "are", "missing", "coucou", True)
     ]
     objects = [Object("missing"), Object("you")]
     inputs = inputs.replace_seeds(seeds)
     inputs = inputs.replace_patterns(patterns)
     inputs = inputs.replace_subjects(subjects)
     inputs = inputs.replace_generated_facts(gfs)
     inputs = inputs.replace_objects(objects)
     inputs.save("temp.json")
     inputs_read = inputs.load("temp.json")
     self.assertEqual(len(inputs.get_generated_facts()),
                      len(inputs_read.get_generated_facts()))
     self.assertEqual(len(inputs.get_subjects()),
                      len(inputs_read.get_generated_facts()))
     self.assertEqual(len(inputs.get_patterns()),
                      len(inputs_read.get_patterns()))
     self.assertEqual(len(inputs.get_seeds()), len(inputs_read.get_seeds()))
     self.assertEqual(len(inputs.get_objects()),
                      len(inputs_read.get_objects()))
Пример #4
0
 def test_serialize_multiple_source_occurrence(self):
     msr = MultipleSubmoduleReference(
         SubmoduleReferenceInterface("Submodule0"))
     msr.add_reference(SubmoduleReferenceInterface("Submodule0"))
     mso = MultipleSourceOccurrence.from_raw("baba is you", msr, 1)
     print(mso.to_dict())
     self.assertIsNotNone(json.dumps(mso.to_dict()))
Пример #5
0
 def test_fact_transformation(self):
     gf = GeneratedFact(
         "elephant", "eat", "zebra", "", False, 1.0,
         MultipleSourceOccurrence.from_raw("elephants do not eat zebras",
                                           None, 1))
     fact = gf.get_fact()
     self.assertEqual(fact.get_subject(), "elephant")
     self.assertEqual(fact.get_predicate(), "eat")
     self.assertEqual(fact.get_object(), "zebra")
     self.assertEqual(fact.is_negative(), False)
     gf = GeneratedFact(
         "elephant", "eat", "zebra", "", True, 1.0,
         MultipleSourceOccurrence.from_raw("elephants do not eat zebras",
                                           None, 1))
     fact = gf.get_fact()
     self.assertEqual(fact.is_negative(), True)
Пример #6
0
    def test_conceptual_caption(self):
        sc = ConceptualCaptionsComparatorSubmodule(None)
        self.empty_input = Inputs()
        self.dummy_reference = ReferencableInterface("DUMMY")

        dataset = [("elephant", "download", "baby", 0),
                   ("elephant", "have", "tusks", 1),
                   ("lion", "eat", "gazella", 0),
                   ("penguin", "eat", "fish", 0),
                   ("gorilla", "eat", "banana", 0),
                   ("sky", "hasProperty", "blue", 0),
                   ("computer", "is", "working", 1),
                   ("raccoon", "hasProperty", "blue", 0)]
        subjects = {
            Subject("elephant"),
            Subject("penguin"),
            Subject("lion"),
            Subject("gorilla"),
            Subject("sky"),
            Subject("computer"),
            Subject("raccoon")
        }

        gfs = []
        pos = 0
        for subject, predicate, obj, truth in dataset:
            pos += 1
            score = MultipleScore()
            if pos % 2 == 0:
                score.add_score(
                    truth, self.dummy_reference,
                    GoogleAutocompleteSubmodule(self.dummy_reference))
            else:
                score.add_score(
                    truth, self.dummy_reference,
                    BingAutocompleteSubmodule(self.dummy_reference))
            gfs.append(
                GeneratedFact(subject, predicate, obj, "", False, score,
                              MultipleSourceOccurrence()))
        score2 = MultipleScore()
        score2.add_score(1, self.dummy_reference,
                         GoogleAutocompleteSubmodule(self.dummy_reference))
        gfs.append(
            GeneratedFact(
                "elephant", "be", "big", "", False, score2,
                MultipleSourceOccurrence.from_raw("elephants are big", None,
                                                  1)))
        inputs = self.empty_input.add_generated_facts(gfs).add_subjects(
            subjects)
        inputs = sc.process(inputs)
        self.assertEqual(len(dataset) + 1, len(inputs.get_generated_facts()))
        self.assertEqual(
            len(inputs.get_generated_facts()[0].get_score().scores), 2)
        self.assertNotAlmostEqual(
            inputs.get_generated_facts()[1].get_score().scores[1][0],
            0,
            delta=1e-5)
 def add_facts_to_generated_facts(self, generated_facts, subject, predicate,
                                  obj, modality, negative,
                                  score_based_on_ranking, suggestion):
     if suggestion[SUBJECT] not in subject:
         return
     multiple_score = MultipleScore()
     multiple_score.add_score(1.0, self._module_reference,
                              reference_corenlp)
     multiple_score.add_score(score_based_on_ranking,
                              self._module_reference, self)
     new_fact_corenlp = GeneratedFact(
         subject, predicate, obj, modality, negative, multiple_score,
         MultipleSourceOccurrence.from_raw(suggestion[0], self, 1),
         suggestion[2])
     generated_facts.append(new_fact_corenlp)
 def get_fact_from_simple_extraction(self, extraction, score, suggestion):
     negative = get_negativity(suggestion) or extraction[3]
     multiple_score = MultipleScore()
     multiple_score.add_score(score, self._module_reference, self)
     multiple_score.add_score(1.0, self._module_reference, reference_manual)
     new_fact = GeneratedFact(
         extraction[0],
         extraction[1],
         extraction[2],
         None,
         negative,
         # For the score, inverse the ranking (higher is
         # better) and add the confidence of the triple
         multiple_score,
         MultipleSourceOccurrence.from_raw(suggestion[0], self, 1),
         suggestion[2])
     return new_fact
 def _openie_from_file(self, suggestions):
     openie_reader = OpenIEReader()
     generated_facts = []
     new_suggestions = []
     for suggestion in suggestions:
         self.transforms_suggestion_into_batch_component(
             suggestion, new_suggestions)
     for suggestion in new_suggestions:
         sentence = suggestion[STATEMENT]
         facts = openie_reader.get_from_sentence(sentence)
         negative = get_negativity(suggestion)
         facts = [
             fact for fact in facts if len(fact) > 0 and len(fact[0]) > 1
             and len(fact[1]) > 1 and len(fact[2]) > 1
         ]
         score_based_on_ranking = self.get_score_based_on_ranking(
             suggestion)
         facts = self._take_earliest_predicate(sentence, facts)
         for fact in facts:
             if suggestion[SUBJECT] not in fact[0]:
                 continue
             try:
                 score = float(fact[3].replace(",", "."))
             except:
                 logging.info(
                     "Problem in score reading in openie5 reader with " +
                     fact[3])
                 continue
             multiple_score = MultipleScore()
             multiple_score.add_score(score, self._module_reference,
                                      reference_openie5)
             multiple_score.add_score(score_based_on_ranking,
                                      self._module_reference, self)
             generated_facts.append(
                 GeneratedFact(
                     fact[0], fact[1], fact[2], "", negative,
                     multiple_score,
                     MultipleSourceOccurrence.from_raw(sentence, self,
                                                       1), suggestion[2]))
     del openie_reader
     return generated_facts
 def test_combination(self):
     dataset = [("elephant", "download", "baby", 0),
                ("elephant", "climb", "trunk", 0),
                ("elephant", "bear", "baby", 1),
                ("elephant", "download this cute illustration with", "baby",
                 0), ("elephant", "be", "ear", 0),
                ("elephant", "fry", "ear", 0),
                ("elephant", "trek", "travel", 0),
                ("elephant", "forbid love in", "water", 0),
                ("elephant", "eat", "bark", 1),
                ("elephant", "have", "tusks", 1)]
     gfs = []
     pos = 0
     for subject, predicate, obj, truth in dataset:
         pos += 1
         score = MultipleScore()
         if pos % 2 == 0:
             score.add_score(
                 truth, self.dummy_reference,
                 GoogleAutocompleteSubmodule(self.dummy_reference))
         else:
             score.add_score(
                 truth, self.dummy_reference,
                 BingAutocompleteSubmodule(self.dummy_reference))
         gfs.append(
             GeneratedFact(subject, predicate, obj, "", False, score,
                           MultipleSourceOccurrence()))
     score2 = MultipleScore()
     score2.add_score(1, self.dummy_reference,
                      GoogleAutocompleteSubmodule(self.dummy_reference))
     gfs.append(
         GeneratedFact(
             "elephant", "be", "big", "", False, score2,
             MultipleSourceOccurrence.from_raw("elephants are big", None,
                                               1)))
     inputs = self.empty_input.add_generated_facts(gfs)
     inputs = self.linear_combination.process(inputs)
     self.assertEqual(len(dataset) + 1, len(inputs.get_generated_facts()))
Пример #11
0
 def test_combination_modalities(self):
     score0 = MultipleScore()
     score0.add_score(1, None, None)
     score1 = MultipleScore()
     score1.add_score(0.5, None, None)
     generated_fact0 = GeneratedFact("lion", "eat", "zebra", "some", False, score0, MultipleSourceOccurrence.from_raw("lions eat zebras", None, 1))
     generated_fact1 = GeneratedFact("lion", "eat", "zebra", "often", False, score1, MultipleSourceOccurrence.from_raw("lions eat zebras", None, 1))
     inputs = self.empty_input.add_generated_facts([generated_fact0, generated_fact1])
     fact_combinor = FactCombinor(None)
     inputs = fact_combinor.process(inputs)
     self.assertEqual(1, len(inputs.get_generated_facts()))
     self.assertIn("some", inputs.get_generated_facts()[0].get_modality().get())
     self.assertIn("often", inputs.get_generated_facts()[0].get_modality().get())