def test_combination_modalities_long(self): score0 = MultipleScore() score0.add_score(1, None, None) score1 = MultipleScore() score1.add_score(0.5, None, None) generated_fact0 = GeneratedFact("parent", "go", "to Paris", "TBC[many]", False, score0, MultipleSourceOccurrence.from_raw( "parents have many children", None, 1)) generated_fact1 = GeneratedFact("parent", "go to", "Paris", "", False, score1, MultipleSourceOccurrence.from_raw( "parents have children", None, 1)) inputs = self.empty_input.add_generated_facts([generated_fact0, generated_fact1]) fact_combinor = FactCombinor(None) inputs = fact_combinor.process(inputs) self.assertEqual(1, len(inputs.get_generated_facts())) self.assertIn("TBC[many]", inputs.get_generated_facts()[0].get_modality().get()) self.assertIn("parents have many children x#x1", str(inputs.get_generated_facts()[ 0].get_sentence_source())) self.assertIn("parents have children x#x1", str(inputs.get_generated_facts()[ 0].get_sentence_source())) self.assertEqual("go to", inputs.get_generated_facts()[ 0].get_predicate())
def test_conceptual_caption(self): sc = ConceptualCaptionsComparatorSubmodule(None) self.empty_input = Inputs() self.dummy_reference = ReferencableInterface("DUMMY") dataset = [("elephant", "download", "baby", 0), ("elephant", "have", "tusks", 1), ("lion", "eat", "gazella", 0), ("penguin", "eat", "fish", 0), ("gorilla", "eat", "banana", 0), ("sky", "hasProperty", "blue", 0), ("computer", "is", "working", 1), ("raccoon", "hasProperty", "blue", 0)] subjects = { Subject("elephant"), Subject("penguin"), Subject("lion"), Subject("gorilla"), Subject("sky"), Subject("computer"), Subject("raccoon") } gfs = [] pos = 0 for subject, predicate, obj, truth in dataset: pos += 1 score = MultipleScore() if pos % 2 == 0: score.add_score( truth, self.dummy_reference, GoogleAutocompleteSubmodule(self.dummy_reference)) else: score.add_score( truth, self.dummy_reference, BingAutocompleteSubmodule(self.dummy_reference)) gfs.append( GeneratedFact(subject, predicate, obj, "", False, score, MultipleSourceOccurrence())) score2 = MultipleScore() score2.add_score(1, self.dummy_reference, GoogleAutocompleteSubmodule(self.dummy_reference)) gfs.append( GeneratedFact( "elephant", "be", "big", "", False, score2, MultipleSourceOccurrence.from_raw("elephants are big", None, 1))) inputs = self.empty_input.add_generated_facts(gfs).add_subjects( subjects) inputs = sc.process(inputs) self.assertEqual(len(dataset) + 1, len(inputs.get_generated_facts())) self.assertEqual( len(inputs.get_generated_facts()[0].get_score().scores), 2) self.assertNotAlmostEqual( inputs.get_generated_facts()[1].get_score().scores[1][0], 0, delta=1e-5)
def test_save(self): inputs = Inputs() subjects = [Subject("baba"), Subject("coko")] patterns = [ PatternGoogle("why are"), PatternGoogle("Why are", "hasProperty", True) ] mmr = MultipleModuleReference(ModuleReferenceInterface("Module0")) mmr.add_reference(ModuleReferenceInterface("Module1")) msr = MultipleSubmoduleReference( SubmoduleReferenceInterface("Submodule0")) msr.add_reference(SubmoduleReferenceInterface("Submodule0")) ms0 = MultipleScore() ms0.add_score(1.0, ModuleReferenceInterface("Module0"), SubmoduleReferenceInterface("Submodule0")) ms1 = MultipleScore() ms1.add_score(1.0, mmr, msr) ms1.add_score(0.5, ModuleReferenceInterface("Module1"), SubmoduleReferenceInterface("Submodule2")) mp0 = MultiplePattern() mp0.add_pattern(patterns[0]) mp1 = MultiplePattern() mp1.add_pattern(patterns[0]) mp1.add_pattern(patterns[1]) gfs = [ GeneratedFact( "baba", "is", "you", "sometimes", False, ms0, MultipleSourceOccurrence.from_raw("baba is you", msr, 1), mp0), GeneratedFact( "coko", "is", "dead", "always", True, ms1, MultipleSourceOccurrence.from_raw("toto is always dead", msr, 1), mp1) ] seeds = [ Fact("baba", "is", "us", None, False), Fact("coko", "are", "missing", "coucou", True) ] objects = [Object("missing"), Object("you")] inputs = inputs.replace_seeds(seeds) inputs = inputs.replace_patterns(patterns) inputs = inputs.replace_subjects(subjects) inputs = inputs.replace_generated_facts(gfs) inputs = inputs.replace_objects(objects) inputs.save("temp.json") inputs_read = inputs.load("temp.json") self.assertEqual(len(inputs.get_generated_facts()), len(inputs_read.get_generated_facts())) self.assertEqual(len(inputs.get_subjects()), len(inputs_read.get_generated_facts())) self.assertEqual(len(inputs.get_patterns()), len(inputs_read.get_patterns())) self.assertEqual(len(inputs.get_seeds()), len(inputs_read.get_seeds())) self.assertEqual(len(inputs.get_objects()), len(inputs_read.get_objects()))
def test_not_remove(self): inputs = Inputs() mso = MultipleSourceOccurrence() mso.add_raw("elephants eat big bananas", None, 2) gfs = [ GeneratedFact("elephant", "eat", "bananas", "TBC[big bananas]", 0, MultipleScore(), mso) ] inputs = inputs.add_generated_facts(gfs) tbc_cleaner = TBCCleaner(None) inputs = tbc_cleaner.process(inputs) self.assertEqual(len(inputs.get_generated_facts()), 1)
def test_combination(self): score0 = MultipleScore() score0.add_score(1, None, None) score1 = MultipleScore() score1.add_score(0.5, None, None) score2 = MultipleScore() score2.add_score(0.7, None, None) generated_fact0 = GeneratedFact("lion", "eat", "zebra", "", False, score0, MultipleSourceOccurrence.from_raw("lions eat zebras", None, 1)) mso = MultipleSourceOccurrence() mso.add_raw("lions eat zebras", None, 2) mso.add_raw("lions eat small zebras", None, 1) generated_fact1 = GeneratedFact("lion", "eat", "zebra", "", False, score1, mso) generated_fact2 = GeneratedFact("lion", "eat", "zebra", "", False, score2, MultipleSourceOccurrence.from_raw("lions eat small zebras", None, 1)) new_gfs = [generated_fact0, generated_fact1, generated_fact2] inputs = self.empty_input.add_generated_facts(new_gfs) fact_combinor = FactCombinor(None) inputs = fact_combinor.process(inputs) self.assertEqual(1, len(inputs.get_generated_facts())) self.assertEqual(3, len(inputs.get_generated_facts()[0].get_score().scores)) sentence = str(inputs.get_generated_facts()[0].get_sentence_source()) self.assertIn("lions eat zebras", sentence) self.assertIn("lions eat small zebras", sentence) self.assertIn("x#x3", sentence) self.assertIn("x#x2", sentence)
def process(self, input_interface): logging.info("Start the " + self._name + " archit submodule") first = True spos = set() for gf in input_interface.get_generated_facts(): spos.add((gf.get_subject().get(), gf.get_predicate().get(), gf.get_object().get())) new_gfs = [] with open(FILENAME) as f: for line in f: if first: first = False continue line = line.strip().split("\t") subj = line[0] pred = line[1] obj = line[2] if (subj, pred, obj) not in spos: continue score = float(line[self._index]) if score == 0: continue multi_score = MultipleScore() multi_score.add_score(score, self._module_reference, self) new_gfs.append( GeneratedFact(subj, pred, obj, "", 0, multi_score, MultipleSourceOccurrence())) return input_interface.add_generated_facts(new_gfs)
def test_serialize_multiple_source_occurrence(self): msr = MultipleSubmoduleReference( SubmoduleReferenceInterface("Submodule0")) msr.add_reference(SubmoduleReferenceInterface("Submodule0")) mso = MultipleSourceOccurrence.from_raw("baba is you", msr, 1) print(mso.to_dict()) self.assertIsNotNone(json.dumps(mso.to_dict()))
def test_xbox(self): generated_fact = GeneratedFact("test", "xbox", "nothing", "", False, 0.0, MultipleSourceOccurrence()) inputs = self.empty_input.add_generated_facts([generated_fact]) inputs = self.cleaning_predicate.process(inputs) generated_facts = inputs.get_generated_facts() self.assertEqual(0, len(generated_facts))
def test_conjugated_verb3(self): generated_fact = GeneratedFact("elephant", "goes", "nowhere", "", False, 0.0, MultipleSourceOccurrence()) inputs = self.empty_input.add_generated_facts([generated_fact]) inputs = self.cleaning_predicate.process(inputs) generated_facts = inputs.get_generated_facts() self.assertEqual(1, len(generated_facts))
def test_empty_predicate(self): generated_fact = GeneratedFact("elephant", "", "fruits", "", False, 0.0, MultipleSourceOccurrence()) inputs = self.empty_input.add_generated_facts([generated_fact]) inputs = self.cleaning_predicate.process(inputs) generated_facts = inputs.get_generated_facts() self.assertEqual(0, len(generated_facts))
def test_ing(self): generated_fact = GeneratedFact("test", "adapting", "nothing", "", False, 0.0, MultipleSourceOccurrence()) inputs = self.empty_input.add_generated_facts([generated_fact]) inputs = self.present_continuous.process(inputs) generated_facts = inputs.get_generated_facts() self.assertEqual(0, len(generated_facts))
def test_has_beach(self): generated_fact = GeneratedFact("beach", "has", "sand", "", False, 0.0, MultipleSourceOccurrence()) inputs = self.empty_input.add_generated_facts([generated_fact]) inputs = self.cleaning_predicate.process(inputs) generated_facts = inputs.get_generated_facts() self.assertEqual(1, len(generated_facts))
def test_fact_transformation(self): gf = GeneratedFact( "elephant", "eat", "zebra", "", False, 1.0, MultipleSourceOccurrence.from_raw("elephants do not eat zebras", None, 1)) fact = gf.get_fact() self.assertEqual(fact.get_subject(), "elephant") self.assertEqual(fact.get_predicate(), "eat") self.assertEqual(fact.get_object(), "zebra") self.assertEqual(fact.is_negative(), False) gf = GeneratedFact( "elephant", "eat", "zebra", "", True, 1.0, MultipleSourceOccurrence.from_raw("elephants do not eat zebras", None, 1)) fact = gf.get_fact() self.assertEqual(fact.is_negative(), True)
def test_no_change(self): generated_fact = GeneratedFact("test", "is", "time", "", False, 0.0, MultipleSourceOccurrence()) inputs = self.empty_input.add_generated_facts([generated_fact]) inputs = self.cleaning_predicate.process(inputs) generated_facts = inputs.get_generated_facts() self.assertEqual(1, len(generated_facts)) self.assertEqual("time", generated_facts[0].get_object().get())
def test_removal(self): new_gfs = [ GeneratedFact("lion", "eat", "lion", "some", False, None, MultipleSourceOccurrence()) ] inputs = self.empty_input.replace_generated_facts(new_gfs) inputs = self.identical.process(inputs) self.assertEqual(0, len(inputs.get_generated_facts()))
def test_article(self): gfs = [ GeneratedFact("bee", "make", "hive", "", False, 0.1, MultipleSourceOccurrence()), GeneratedFact("bee", "make", "a hive", "", False, 0.1, MultipleSourceOccurrence()) ] inputs = Inputs() inputs = inputs.add_generated_facts(gfs) remover = SimilarObjectRemover(None) inputs = remover.process(inputs) self.assertEqual(len(inputs.get_generated_facts()), 2) self.assertEqual( len( set([ x.get_object().get() for x in inputs.get_generated_facts() ])), 1)
def test_no_change(self): generated_fact = GeneratedFact("test", "adapted", "nothing", "", False, 0.0, MultipleSourceOccurrence()) inputs = self.empty_input.add_generated_facts([generated_fact]) inputs = self.be_normalization.process(inputs) generated_facts = inputs.get_generated_facts() self.assertEqual(1, len(generated_facts)) self.assertEqual("adapted", generated_facts[0].get_predicate().get())
def test_false_s(self): generated_fact = GeneratedFact("test", "pass", "nothing", "", False, 0.0, MultipleSourceOccurrence()) inputs = self.empty_input.add_generated_facts([generated_fact]) inputs = self.present_conjugate.process(inputs) generated_facts = inputs.get_generated_facts() self.assertEqual(1, len(generated_facts)) self.assertEqual("pass", generated_facts[0].get_predicate().get())
def test_no_verb2(self): generated_fact = GeneratedFact("wall", "clock", "yellow", "", False, 0.0, MultipleSourceOccurrence()) inputs = self.empty_input.add_generated_facts([generated_fact]) inputs = self.cleaning_predicate.process(inputs) generated_facts = inputs.get_generated_facts() print(generated_facts) self.assertEqual(0, len(generated_facts))
def test_be_ing(self): generated_fact = GeneratedFact("test", "is adapting", "nothing", MultipleSourceOccurrence(), False, 0.0, "") inputs = self.empty_input.add_generated_facts([generated_fact]) inputs = self.present_continuous.process(inputs) generated_facts = inputs.get_generated_facts() self.assertEqual(1, len(generated_facts)) self.assertEqual("adapt", generated_facts[0].get_predicate().get())
def test_not_digest(self): generated_fact = GeneratedFact("elephant", "not digests", "fruits", "", False, 0.0, MultipleSourceOccurrence()) inputs = self.empty_input.add_generated_facts([generated_fact]) inputs = self.cleaning_predicate.process(inputs) generated_facts = inputs.get_generated_facts() self.assertEqual(1, len(generated_facts)) self.assertEqual(generated_facts[0].get_predicate().get(), "digests") self.assertTrue(generated_facts[0].is_negative())
def test_be_can_duplicate(self): generated_fact = GeneratedFact("test", "be", "can nothing", "", False, 0.0, MultipleSourceOccurrence()) inputs = self.empty_input.add_generated_facts([generated_fact]) inputs = self.can_transformation.process(inputs) generated_facts = inputs.get_generated_facts() self.assertEqual(1, len(generated_facts)) self.assertEqual("be", generated_facts[0].get_predicate().get()) self.assertEqual("nothing", generated_facts[0].get_object().get())
def initialize_for_generated_fact(self, generated_fact): fact_without_modality = get_fact_without_modality(generated_fact) if fact_without_modality not in self.found: self.found[fact_without_modality] = None self.sentences[fact_without_modality] = MultipleSourceOccurrence() self.modalities[fact_without_modality] = dict() self.patterns[fact_without_modality] = MultiplePattern() self.modules[fact_without_modality] = MultipleModuleReference() self.submodules[ fact_without_modality] = MultipleSubmoduleReference()
def test_texas(self): generated_fact = GeneratedFact("texas", "is a", "cat", "", False, MultipleScore(), MultipleSourceOccurrence()) inputs = self.empty_input.add_generated_facts( [generated_fact]).add_subjects({Subject("lion")}) inputs = self.to_singular.process(inputs) generated_facts = inputs.get_generated_facts() self.assertEqual(1, len(generated_facts)) self.assertEqual("texas", generated_facts[0].get_subject().get())
def read_generated_fact(generated_fact): if generated_fact["type"] == "GeneratedFact": return GeneratedFact( read_subject(generated_fact["subject"]), read_predicate(generated_fact["predicate"]), read_object(generated_fact["object"]), read_modality(generated_fact["modality"]), generated_fact["negative"], read_score(generated_fact["score"]), MultipleSourceOccurrence.from_dict(generated_fact["sentence_source"]), read_pattern(generated_fact["pattern"]) ) raise UnknownSerializedObject("Unknown generated fact type" + json.dumps(generated_fact))
def test_cache(self): google_book_cache = GoogleBookSubmodule( None, True, cache_name="google-book-cache-temp") generated_fact = GeneratedFact("lion", "eat", "zebra", "", False, MultipleScore(), MultipleSourceOccurrence()) inputs = self.empty_input.add_generated_facts([generated_fact]) google_book_cache.process(inputs) generated_fact = GeneratedFact("lion", "eat", "zebra", "", False, MultipleScore(), MultipleSourceOccurrence()) inputs = self.empty_input.add_generated_facts([generated_fact]) inputs = google_book_cache.process(inputs) self.assertEqual(1, len(inputs.get_generated_facts())) scores = inputs.get_generated_facts()[0].get_score() scores_google_book = [ x for x in scores.scores if x[2].get_name() == "Google Book Submodule" ] self.assertEqual(1, len(scores_google_book)) self.assertTrue(scores_google_book[0][0] != 0) google_book_cache.cache.delete_cache()
def test_cache(self): wikipedia_cache = WikipediaCooccurrenceSubmodule( None, True, "wikipedia-cache-test") generated_fact = GeneratedFact("lion", "is a", "cat", "", False, MultipleScore(), MultipleSourceOccurrence()) inputs = self.empty_input.add_generated_facts([generated_fact]) wikipedia_cache.process(inputs) generated_fact = GeneratedFact("lion", "is a", "cat", "", False, MultipleScore(), MultipleSourceOccurrence()) inputs = self.empty_input.add_generated_facts([generated_fact]) inputs = wikipedia_cache.process(inputs) self.assertEqual(1, len(inputs.get_generated_facts())) scores = inputs.get_generated_facts()[0].get_score() scores_wikipedia = [ x for x in scores.scores if x[2].get_name() == "Wikipedia Cooccurrence" ] self.assertEqual(1, len(scores_wikipedia)) self.assertTrue(scores_wikipedia[0][0] != 0) wikipedia_cache.cache.delete_cache()
def test_combination(self): dataset = [("elephant", "download", "baby", 0), ("elephant", "climb", "trunk", 0), ("elephant", "bear", "baby", 1), ("elephant", "download this cute illustration with", "baby", 0), ("elephant", "be", "ear", 0), ("elephant", "fry", "ear", 0), ("elephant", "trek", "travel", 0), ("elephant", "forbid love in", "water", 0), ("elephant", "eat", "bark", 1), ("elephant", "have", "tusks", 1)] gfs = [] pos = 0 for subject, predicate, obj, truth in dataset: pos += 1 score = MultipleScore() if pos % 2 == 0: score.add_score( truth, self.dummy_reference, GoogleAutocompleteSubmodule(self.dummy_reference)) else: score.add_score( truth, self.dummy_reference, BingAutocompleteSubmodule(self.dummy_reference)) gfs.append( GeneratedFact(subject, predicate, obj, "", False, score, MultipleSourceOccurrence())) score2 = MultipleScore() score2.add_score(1, self.dummy_reference, GoogleAutocompleteSubmodule(self.dummy_reference)) gfs.append( GeneratedFact( "elephant", "be", "big", "", False, score2, MultipleSourceOccurrence.from_raw("elephants are big", None, 1))) inputs = self.empty_input.add_generated_facts(gfs) inputs = self.linear_combination.process(inputs) self.assertEqual(len(dataset) + 1, len(inputs.get_generated_facts()))
def test_lion_eat_code(self): generated_fact = GeneratedFact("lion", "eat", "code", "", False, MultipleScore(), MultipleSourceOccurrence()) inputs = self.empty_input.add_generated_facts([generated_fact]) inputs = self.google_book_no_cache.process(inputs) self.assertEqual(1, len(inputs.get_generated_facts())) scores = inputs.get_generated_facts()[0].get_score() scores_google_book = [ x for x in scores.scores if x[2].get_name() == "Google Book Submodule" ] self.assertEqual(1, len(scores_google_book)) self.assertTrue(scores_google_book[0][0] == 0)
def test_panda_flickr_cluster(self): new_gfs = [ GeneratedFact("panda", "live", "china", "", False, MultipleScore(), MultipleSourceOccurrence()) ] inputs = self.empty_input.add_generated_facts(new_gfs).add_subjects( {Subject("panda")}) inputs = self.associations_flick_cluster.process(inputs) self.assertEqual(1, len(inputs.get_generated_facts())) scores = inputs.get_generated_facts()[0].get_score() scores_flickr = [ x for x in scores.scores if x[2].get_name() == "Flickr" ] self.assertEqual(1, len(scores_flickr))