class TestConjugatePresent(unittest.TestCase): def setUp(self) -> None: self.present_conjugate = PresentConjugateNormalization(None) self.empty_input = Inputs() def test_nothing(self): generated_fact = GeneratedFact("test", "adapt", "nothing", "", False, 0.0, MultipleSourceOccurrence()) inputs = self.empty_input.add_generated_facts([generated_fact]) inputs = self.present_conjugate.process(inputs) generated_facts = inputs.get_generated_facts() self.assertEqual(1, len(generated_facts)) self.assertEqual("adapt", generated_facts[0].get_predicate().get()) def test_s(self): generated_fact = GeneratedFact("test", "adapts", "nothing", "", False, 0.0, MultipleSourceOccurrence()) inputs = self.empty_input.add_generated_facts([generated_fact]) inputs = self.present_conjugate.process(inputs) generated_facts = inputs.get_generated_facts() self.assertEqual(1, len(generated_facts)) self.assertEqual("adapt", generated_facts[0].get_predicate().get()) def test_false_s(self): generated_fact = GeneratedFact("test", "pass", "nothing", "", False, 0.0, MultipleSourceOccurrence()) inputs = self.empty_input.add_generated_facts([generated_fact]) inputs = self.present_conjugate.process(inputs) generated_facts = inputs.get_generated_facts() self.assertEqual(1, len(generated_facts)) self.assertEqual("pass", generated_facts[0].get_predicate().get())
class TestPresentContinuous(unittest.TestCase): def setUp(self) -> None: self.present_continuous = PresentContinuousSubmodule(None) self.empty_input = Inputs() def test_nothing(self): generated_fact = GeneratedFact("test", "adapt", "nothing", "", False, 0.0, MultipleSourceOccurrence()) inputs = self.empty_input.add_generated_facts([generated_fact]) inputs = self.present_continuous.process(inputs) generated_facts = inputs.get_generated_facts() self.assertEqual(1, len(generated_facts)) self.assertEqual("adapt", generated_facts[0].get_predicate().get()) def test_be_ing(self): generated_fact = GeneratedFact("test", "is adapting", "nothing", MultipleSourceOccurrence(), False, 0.0, "") inputs = self.empty_input.add_generated_facts([generated_fact]) inputs = self.present_continuous.process(inputs) generated_facts = inputs.get_generated_facts() self.assertEqual(1, len(generated_facts)) self.assertEqual("adapt", generated_facts[0].get_predicate().get()) def test_ing(self): generated_fact = GeneratedFact("test", "adapting", "nothing", "", False, 0.0, MultipleSourceOccurrence()) inputs = self.empty_input.add_generated_facts([generated_fact]) inputs = self.present_continuous.process(inputs) generated_facts = inputs.get_generated_facts() self.assertEqual(0, len(generated_facts))
class TestSimpleWikipediaCooccurrence(unittest.TestCase): def setUp(self) -> None: self.simple_wikipedia_no_cache = SimpleWikipediaCooccurrenceSubmodule(None, False) self.empty_input = Inputs() def test_lion(self): generated_fact = GeneratedFact("lion", "is a", "cat", "", False, MultipleScore(), MultipleSourceOccurrence()) inputs = self.empty_input.add_generated_facts([generated_fact]) inputs = self.simple_wikipedia_no_cache.process(inputs) self.assertEqual(1, len(inputs.get_generated_facts())) scores = inputs.get_generated_facts()[0].get_score() scores_wikipedia = [x for x in scores.scores if x[2].get_name() == "Simple Wikipedia Cooccurrence"] self.assertEqual(1, len(scores_wikipedia)) self.assertTrue(scores_wikipedia[0][0] != 0) def test_cache(self): wikipedia_cache = SimpleWikipediaCooccurrenceSubmodule(None, True, "simple-wikipedia-cache-test") generated_fact = GeneratedFact("lion", "is a", "cat", "", False, MultipleScore(), MultipleSourceOccurrence()) inputs = self.empty_input.add_generated_facts([generated_fact]) wikipedia_cache.process(inputs) generated_fact = GeneratedFact("lion", "is a", "cat", "", False, MultipleScore(), MultipleSourceOccurrence()) inputs = self.empty_input.add_generated_facts([generated_fact]) inputs = wikipedia_cache.process(inputs) self.assertEqual(1, len(inputs.get_generated_facts())) scores = inputs.get_generated_facts()[0].get_score() scores_wikipedia = [x for x in scores.scores if x[2].get_name() == "Simple Wikipedia Cooccurrence"] self.assertEqual(1, len(scores_wikipedia)) self.assertTrue(scores_wikipedia[0][0] != 0) wikipedia_cache.cache.delete_cache()
class TestToSingular(unittest.TestCase): def setUp(self) -> None: self.to_singular = ToSingularSubjectSubmodule(None) self.empty_input = Inputs() def test_turn_singular(self): generated_fact = GeneratedFact("lions", "is a", "cat", "", False, MultipleScore(), MultipleSourceOccurrence()) inputs = self.empty_input.add_generated_facts( [generated_fact]).add_subjects({Subject("lion")}) inputs = self.to_singular.process(inputs) generated_facts = inputs.get_generated_facts() self.assertEqual(1, len(generated_facts)) self.assertEqual("lion", generated_facts[0].get_subject().get()) def test_turn_singular_duplicate(self): generated_fact = GeneratedFact("lions", "is a", "cat", "", False, MultipleScore(), MultipleSourceOccurrence()) inputs = self.empty_input.add_generated_facts( [generated_fact, generated_fact]).add_subjects({Subject("lion")}) inputs = self.to_singular.process(inputs) generated_facts = inputs.get_generated_facts() self.assertEqual(2, len(generated_facts)) self.assertEqual("lion", generated_facts[0].get_subject().get()) def test_do_nothing(self): generated_fact = GeneratedFact("lion", "is a", "cat", "", False, MultipleScore(), MultipleSourceOccurrence()) inputs = self.empty_input.add_generated_facts( [generated_fact]).add_subjects({Subject("lion")}) inputs = self.to_singular.process(inputs) generated_facts = inputs.get_generated_facts() self.assertEqual(1, len(generated_facts)) self.assertEqual("lion", generated_facts[0].get_subject().get()) def test_crisis(self): generated_fact = GeneratedFact("crisis", "is a", "cat", "", False, MultipleScore(), MultipleSourceOccurrence()) inputs = self.empty_input.add_generated_facts( [generated_fact]).add_subjects({Subject("lion")}) inputs = self.to_singular.process(inputs) generated_facts = inputs.get_generated_facts() self.assertEqual(1, len(generated_facts)) self.assertEqual("crisis", generated_facts[0].get_subject().get()) def test_texas(self): generated_fact = GeneratedFact("texas", "is a", "cat", "", False, MultipleScore(), MultipleSourceOccurrence()) inputs = self.empty_input.add_generated_facts( [generated_fact]).add_subjects({Subject("lion")}) inputs = self.to_singular.process(inputs) generated_facts = inputs.get_generated_facts() self.assertEqual(1, len(generated_facts)) self.assertEqual("texas", generated_facts[0].get_subject().get())
class TestGoogleBook(unittest.TestCase): def setUp(self) -> None: self.google_book_no_cache = GoogleBookSubmodule(None, False) self.empty_input = Inputs() def test_lion_eat_zebras(self): generated_fact = GeneratedFact("lion", "eat", "zebra", "", False, MultipleScore(), MultipleSourceOccurrence()) inputs = self.empty_input.add_generated_facts([generated_fact]) inputs = self.google_book_no_cache.process(inputs) self.assertEqual(1, len(inputs.get_generated_facts())) scores = inputs.get_generated_facts()[0].get_score() scores_google_book = [ x for x in scores.scores if x[2].get_name() == "Google Book Submodule" ] self.assertEqual(1, len(scores_google_book)) self.assertTrue(scores_google_book[0][0] != 0) def test_lion_eat_code(self): generated_fact = GeneratedFact("lion", "eat", "code", "", False, MultipleScore(), MultipleSourceOccurrence()) inputs = self.empty_input.add_generated_facts([generated_fact]) inputs = self.google_book_no_cache.process(inputs) self.assertEqual(1, len(inputs.get_generated_facts())) scores = inputs.get_generated_facts()[0].get_score() scores_google_book = [ x for x in scores.scores if x[2].get_name() == "Google Book Submodule" ] self.assertEqual(1, len(scores_google_book)) self.assertTrue(scores_google_book[0][0] == 0) def test_cache(self): google_book_cache = GoogleBookSubmodule( None, True, cache_name="google-book-cache-temp") generated_fact = GeneratedFact("lion", "eat", "zebra", "", False, MultipleScore(), MultipleSourceOccurrence()) inputs = self.empty_input.add_generated_facts([generated_fact]) google_book_cache.process(inputs) generated_fact = GeneratedFact("lion", "eat", "zebra", "", False, MultipleScore(), MultipleSourceOccurrence()) inputs = self.empty_input.add_generated_facts([generated_fact]) inputs = google_book_cache.process(inputs) self.assertEqual(1, len(inputs.get_generated_facts())) scores = inputs.get_generated_facts()[0].get_score() scores_google_book = [ x for x in scores.scores if x[2].get_name() == "Google Book Submodule" ] self.assertEqual(1, len(scores_google_book)) self.assertTrue(scores_google_book[0][0] != 0) google_book_cache.cache.delete_cache()
class TestBasicModality(unittest.TestCase): def setUp(self) -> None: dummy_reference = ReferencableInterface("Dummy reference") self.openie_fact_generator = OpenIEFactGeneratorSubmodule( dummy_reference) self.openie_fact_generator._name = "OPENIE" # Dummy name only useful for testing self.empty_input = Inputs() self.basic_modality = BasicModalitySubmodule(None) def test_always(self): suggestion = ("why does panda always climb tree", 1.0, None, "panda") new_gfs = self.openie_fact_generator.get_generated_facts([suggestion]) inputs = self.empty_input.add_generated_facts(new_gfs) inputs = self.basic_modality.process(inputs) self.assertEqual(1, len(inputs.get_generated_facts())) self.assertEqual("panda", inputs.get_generated_facts()[0].get_subject().get()) self.assertEqual("climb", inputs.get_generated_facts()[0].get_predicate().get()) self.assertEqual("tree", inputs.get_generated_facts()[0].get_object().get()) self.assertIn("always", inputs.get_generated_facts()[0].get_modality().get()) def test_often_object(self): suggestion = ("why do pandas climb in tree often", 1.0, None, "panda") new_gfs = self.openie_fact_generator.get_generated_facts([suggestion]) inputs = self.empty_input.add_generated_facts(new_gfs) inputs = self.basic_modality.process(inputs) self.assertEqual(1, len(inputs.get_generated_facts())) self.assertEqual("pandas", inputs.get_generated_facts()[0].get_subject().get()) self.assertEqual("climb in", inputs.get_generated_facts()[0].get_predicate().get()) self.assertEqual("tree", inputs.get_generated_facts()[0].get_object().get()) self.assertIn("often", inputs.get_generated_facts()[0].get_modality().get()) def test_with_already_one_modality(self): suggestion = ("why do african pandas eat bananas often", 1.0, None, "panda") new_gfs = self.openie_fact_generator.get_generated_facts([suggestion]) inputs = self.empty_input.add_generated_facts(new_gfs) inputs = self.basic_modality.process(inputs) gfs = [ x for x in inputs.get_generated_facts() if x.get_subject() == "pandas" ] self.assertEqual(1, len(gfs)) self.assertEqual("pandas", gfs[0].get_subject().get()) self.assertEqual("eat", gfs[0].get_predicate().get()) self.assertEqual("bananas", gfs[0].get_object().get()) self.assertIn("often", gfs[0].get_modality().get())
class TestAssociation(unittest.TestCase): def setUp(self) -> None: dummy_reference = ReferencableInterface("Dummy reference") self.openie_fact_generator = OpenIEFactGeneratorSubmodule( dummy_reference) self.openie_fact_generator._name = "OPENIE" # Dummy name only useful for testing self.empty_input = Inputs() self.associations = ImagetagSubmodule(None) self.associations_flick_cluster = FlickrClustersSubmodule(None) def test_panda_imagetag(self): new_gfs = [ GeneratedFact("panda", "climb", "tree", "", False, MultipleScore(), MultipleSourceOccurrence()) ] inputs = self.empty_input.add_generated_facts(new_gfs).add_subjects( {"panda"}) inputs = self.associations.process(inputs) self.assertEqual(1, len(inputs.get_generated_facts())) scores = inputs.get_generated_facts()[0].get_score() scores_imagetag = [ x for x in scores.scores if x[2].get_name() == "Image Tag submodule" ] self.assertEqual(1, len(scores_imagetag)) def test_panda_flickr_cluster(self): new_gfs = [ GeneratedFact("panda", "live", "china", "", False, MultipleScore(), MultipleSourceOccurrence()) ] inputs = self.empty_input.add_generated_facts(new_gfs).add_subjects( {Subject("panda")}) inputs = self.associations_flick_cluster.process(inputs) self.assertEqual(1, len(inputs.get_generated_facts())) scores = inputs.get_generated_facts()[0].get_score() scores_flickr = [ x for x in scores.scores if x[2].get_name() == "Flickr" ] self.assertEqual(1, len(scores_flickr)) def test_panda_flickr_cluster_raw(self): clusters = self.associations_flick_cluster._get_clusters("panda") merge_clusters = [] for cluster in clusters: merge_clusters += cluster self.assertIn("china", merge_clusters)
class TestCanTransformation(unittest.TestCase): def setUp(self) -> None: self.can_transformation = CanTransformationSubmodule(None) self.empty_input = Inputs() def test_can_duplicate(self): generated_fact = GeneratedFact("test", "can", "can nothing", "", False, 0.0, MultipleSourceOccurrence()) inputs = self.empty_input.add_generated_facts([generated_fact]) inputs = self.can_transformation.process(inputs) generated_facts = inputs.get_generated_facts() self.assertEqual(1, len(generated_facts)) self.assertEqual("can", generated_facts[0].get_predicate().get()) self.assertEqual("nothing", generated_facts[0].get_object().get()) def test_can_be_duplicate(self): generated_fact = GeneratedFact("test", "can", "can be nothing", "", False, 0.0, MultipleSourceOccurrence()) inputs = self.empty_input.add_generated_facts([generated_fact]) inputs = self.can_transformation.process(inputs) generated_facts = inputs.get_generated_facts() self.assertEqual(1, len(generated_facts)) self.assertEqual("can be", generated_facts[0].get_predicate().get()) self.assertEqual("nothing", generated_facts[0].get_object().get()) def test_be_can_duplicate(self): generated_fact = GeneratedFact("test", "be", "can nothing", "", False, 0.0, MultipleSourceOccurrence()) inputs = self.empty_input.add_generated_facts([generated_fact]) inputs = self.can_transformation.process(inputs) generated_facts = inputs.get_generated_facts() self.assertEqual(1, len(generated_facts)) self.assertEqual("be", generated_facts[0].get_predicate().get()) self.assertEqual("nothing", generated_facts[0].get_object().get()) def test_be_can_duplicate_pattern(self): generated_fact = GeneratedFact("test", "be", "can nothing", "", False, 0.0, MultipleSourceOccurrence(), PatternGoogle("why can <SUBJ>")) inputs = self.empty_input.add_generated_facts([generated_fact]) inputs = self.can_transformation.process(inputs) generated_facts = inputs.get_generated_facts() self.assertEqual(1, len(generated_facts)) self.assertEqual("can be", generated_facts[0].get_predicate().get()) self.assertEqual("nothing", generated_facts[0].get_object().get())
class TestFilterObject(unittest.TestCase): def setUp(self) -> None: self.cleaning_predicate = FilterObjectSubmodule(None) self.empty_input = Inputs() def test_forbidden(self): generated_fact = GeneratedFact("test", "is", "used", "", False, 0.0, MultipleSourceOccurrence()) inputs = self.empty_input.add_generated_facts([generated_fact]) inputs = self.cleaning_predicate.process(inputs) generated_facts = inputs.get_generated_facts() self.assertEqual(0, len(generated_facts)) def test_totally_forbidden(self): generated_fact = GeneratedFact("test", "is", "useful minecraft", "", False, 0.0, MultipleSourceOccurrence()) inputs = self.empty_input.add_generated_facts([generated_fact]) inputs = self.cleaning_predicate.process(inputs) generated_facts = inputs.get_generated_facts() self.assertEqual(0, len(generated_facts)) def test_one_letter(self): generated_fact = GeneratedFact("test", "is", "a", "", False, 0.0, MultipleSourceOccurrence()) inputs = self.empty_input.add_generated_facts([generated_fact]) inputs = self.cleaning_predicate.process(inputs) generated_facts = inputs.get_generated_facts() self.assertEqual(0, len(generated_facts)) def test_dirty(self): generated_fact = GeneratedFact("test", "is", "their time", "", False, 0.0, MultipleSourceOccurrence()) inputs = self.empty_input.add_generated_facts([generated_fact]) inputs = self.cleaning_predicate.process(inputs) generated_facts = inputs.get_generated_facts() self.assertEqual(1, len(generated_facts)) self.assertEqual("time", generated_facts[0].get_object().get()) def test_no_change(self): generated_fact = GeneratedFact("test", "is", "time", "", False, 0.0, MultipleSourceOccurrence()) inputs = self.empty_input.add_generated_facts([generated_fact]) inputs = self.cleaning_predicate.process(inputs) generated_facts = inputs.get_generated_facts() self.assertEqual(1, len(generated_facts)) self.assertEqual("time", generated_facts[0].get_object().get())
def test_not_remove(self): inputs = Inputs() mso = MultipleSourceOccurrence() mso.add_raw("elephants eat big bananas", None, 2) gfs = [ GeneratedFact("elephant", "eat", "bananas", "TBC[big bananas]", 0, MultipleScore(), mso) ] inputs = inputs.add_generated_facts(gfs) tbc_cleaner = TBCCleaner(None) inputs = tbc_cleaner.process(inputs) self.assertEqual(len(inputs.get_generated_facts()), 1)
class TestToSingular(unittest.TestCase): def setUp(self) -> None: self.to_lower_case = ToLowerCaseSubmodule(None) self.empty_input = Inputs() def test_subject(self): generated_fact = GeneratedFact("Lions", "is a", "cat", "", False, MultipleScore(), MultipleSourceOccurrence()) inputs = self.empty_input.add_generated_facts([generated_fact]).add_subjects({Subject("lion")}) inputs = self.to_lower_case.process(inputs) generated_facts = inputs.get_generated_facts() self.assertEqual(1, len(generated_facts)) self.assertEqual("lions", generated_facts[0].get_subject().get()) def test_predicate(self): generated_fact = GeneratedFact("lions", "is A", "cat", "", False, MultipleScore(), MultipleSourceOccurrence()) inputs = self.empty_input.add_generated_facts([generated_fact, generated_fact]).add_subjects({Subject("lion")}) inputs = self.to_lower_case.process(inputs) generated_facts = inputs.get_generated_facts() self.assertEqual(2, len(generated_facts)) self.assertEqual("is a", generated_facts[0].get_predicate().get()) def test_object(self): generated_fact = GeneratedFact("lion", "is a", "cAt", "", False, MultipleScore(), MultipleSourceOccurrence()) inputs = self.empty_input.add_generated_facts([generated_fact]).add_subjects({Subject("lion")}) inputs = self.to_lower_case.process(inputs) generated_facts = inputs.get_generated_facts() self.assertEqual(1, len(generated_facts)) self.assertEqual("cat", generated_facts[0].get_object().get()) def test_do_nothing(self): generated_fact = GeneratedFact("crisis", "is a", "cat", "", False, MultipleScore(), MultipleSourceOccurrence()) inputs = self.empty_input.add_generated_facts([generated_fact]).add_subjects({Subject("lion")}) inputs = self.to_lower_case.process(inputs) generated_facts = inputs.get_generated_facts() self.assertEqual(1, len(generated_facts)) self.assertEqual("crisis", generated_facts[0].get_subject().get())
def test_article(self): gfs = [ GeneratedFact("bee", "make", "hive", "", False, 0.1, MultipleSourceOccurrence()), GeneratedFact("bee", "make", "a hive", "", False, 0.1, MultipleSourceOccurrence()) ] inputs = Inputs() inputs = inputs.add_generated_facts(gfs) remover = SimilarObjectRemover(None) inputs = remover.process(inputs) self.assertEqual(len(inputs.get_generated_facts()), 2) self.assertEqual( len( set([ x.get_object().get() for x in inputs.get_generated_facts() ])), 1)
class TestLinearWeightedCombination(unittest.TestCase): def setUp(self) -> None: self.dummy_reference = ReferencableInterface("Dummy reference") self.linear_combination = LinearCombinationWeightedSubmodule( self.dummy_reference) self.empty_input = Inputs() def test_combination(self): dataset = [("elephant", "download", "baby", 0), ("elephant", "climb", "trunk", 0), ("elephant", "bear", "baby", 1), ("elephant", "download this cute illustration with", "baby", 0), ("elephant", "be", "ear", 0), ("elephant", "fry", "ear", 0), ("elephant", "trek", "travel", 0), ("elephant", "forbid love in", "water", 0), ("elephant", "eat", "bark", 1), ("elephant", "have", "tusks", 1)] gfs = [] pos = 0 for subject, predicate, obj, truth in dataset: pos += 1 score = MultipleScore() if pos % 2 == 0: score.add_score( truth, self.dummy_reference, GoogleAutocompleteSubmodule(self.dummy_reference)) else: score.add_score( truth, self.dummy_reference, BingAutocompleteSubmodule(self.dummy_reference)) gfs.append( GeneratedFact(subject, predicate, obj, "", False, score, MultipleSourceOccurrence())) score2 = MultipleScore() score2.add_score(1, self.dummy_reference, GoogleAutocompleteSubmodule(self.dummy_reference)) gfs.append( GeneratedFact( "elephant", "be", "big", "", False, score2, MultipleSourceOccurrence.from_raw("elephants are big", None, 1))) inputs = self.empty_input.add_generated_facts(gfs) inputs = self.linear_combination.process(inputs) self.assertEqual(len(dataset) + 1, len(inputs.get_generated_facts()))
class TestFactCombinor(unittest.TestCase): def setUp(self) -> None: dummy_reference = ReferencableInterface("Dummy reference") self.openie_fact_generator = OpenIEFactGeneratorSubmodule(dummy_reference) self.openie_fact_generator._name = "OPENIE" # Dummy name only useful for testing self.empty_input = Inputs() def test_combination(self): score0 = MultipleScore() score0.add_score(1, None, None) score1 = MultipleScore() score1.add_score(0.5, None, None) score2 = MultipleScore() score2.add_score(0.7, None, None) generated_fact0 = GeneratedFact("lion", "eat", "zebra", "", False, score0, MultipleSourceOccurrence.from_raw("lions eat zebras", None, 1)) mso = MultipleSourceOccurrence() mso.add_raw("lions eat zebras", None, 2) mso.add_raw("lions eat small zebras", None, 1) generated_fact1 = GeneratedFact("lion", "eat", "zebra", "", False, score1, mso) generated_fact2 = GeneratedFact("lion", "eat", "zebra", "", False, score2, MultipleSourceOccurrence.from_raw("lions eat small zebras", None, 1)) new_gfs = [generated_fact0, generated_fact1, generated_fact2] inputs = self.empty_input.add_generated_facts(new_gfs) fact_combinor = FactCombinor(None) inputs = fact_combinor.process(inputs) self.assertEqual(1, len(inputs.get_generated_facts())) self.assertEqual(3, len(inputs.get_generated_facts()[0].get_score().scores)) sentence = str(inputs.get_generated_facts()[0].get_sentence_source()) self.assertIn("lions eat zebras", sentence) self.assertIn("lions eat small zebras", sentence) self.assertIn("x#x3", sentence) self.assertIn("x#x2", sentence) def test_combination_modalities(self): score0 = MultipleScore() score0.add_score(1, None, None) score1 = MultipleScore() score1.add_score(0.5, None, None) generated_fact0 = GeneratedFact("lion", "eat", "zebra", "some", False, score0, MultipleSourceOccurrence.from_raw("lions eat zebras", None, 1)) generated_fact1 = GeneratedFact("lion", "eat", "zebra", "often", False, score1, MultipleSourceOccurrence.from_raw("lions eat zebras", None, 1)) inputs = self.empty_input.add_generated_facts([generated_fact0, generated_fact1]) fact_combinor = FactCombinor(None) inputs = fact_combinor.process(inputs) self.assertEqual(1, len(inputs.get_generated_facts())) self.assertIn("some", inputs.get_generated_facts()[0].get_modality().get()) self.assertIn("often", inputs.get_generated_facts()[0].get_modality().get()) def test_combination_modalities_tbc(self): score0 = MultipleScore() score0.add_score(1, None, None) score1 = MultipleScore() score1.add_score(0.5, None, None) generated_fact0 = GeneratedFact("parent", "have", "children", "TBC[many]", False, score0, MultipleSourceOccurrence.from_raw( "parents have many children", None, 1)) generated_fact1 = GeneratedFact("parent", "have", "children", "", False, score1, MultipleSourceOccurrence.from_raw( "parents have children", None, 1)) inputs = self.empty_input.add_generated_facts([generated_fact0, generated_fact1]) fact_combinor = FactCombinor(None) inputs = fact_combinor.process(inputs) self.assertEqual(1, len(inputs.get_generated_facts())) self.assertIn("TBC[many]", inputs.get_generated_facts()[0].get_modality().get()) self.assertIn("parents have many children x#x1", str(inputs.get_generated_facts()[ 0].get_sentence_source())) self.assertIn("parents have children x#x1", str(inputs.get_generated_facts()[ 0].get_sentence_source())) def test_combination_modalities_long(self): score0 = MultipleScore() score0.add_score(1, None, None) score1 = MultipleScore() score1.add_score(0.5, None, None) generated_fact0 = GeneratedFact("parent", "go", "to Paris", "TBC[many]", False, score0, MultipleSourceOccurrence.from_raw( "parents have many children", None, 1)) generated_fact1 = GeneratedFact("parent", "go to", "Paris", "", False, score1, MultipleSourceOccurrence.from_raw( "parents have children", None, 1)) inputs = self.empty_input.add_generated_facts([generated_fact0, generated_fact1]) fact_combinor = FactCombinor(None) inputs = fact_combinor.process(inputs) self.assertEqual(1, len(inputs.get_generated_facts())) self.assertIn("TBC[many]", inputs.get_generated_facts()[0].get_modality().get()) self.assertIn("parents have many children x#x1", str(inputs.get_generated_facts()[ 0].get_sentence_source())) self.assertIn("parents have children x#x1", str(inputs.get_generated_facts()[ 0].get_sentence_source())) self.assertEqual("go to", inputs.get_generated_facts()[ 0].get_predicate()) def test_beach(self): score0 = MultipleScore() score0.add_score(1, None, None) mso = MultipleSourceOccurrence() mso.add_raw("beaches have sand", "Google Autocomplete", 4) mso.add_raw("some beaches have sand", "Google Autocomplete", 2) mso.add_raw("some beaches have sand and some rocks", "Google " "Autocomplete", 1) mso.add_raw("all beaches have sand", "Google Autocomplete", 4) mso.add_raw("beach have sand", "Google Autocomplete", 1) generated_fact0 = GeneratedFact("beach", "have", "sand", "some[subj/some] x#x3 // " "some[subj/all] x#x4", False, score0, mso) inputs = self.empty_input.add_generated_facts([generated_fact0]) fact_combinor = FactCombinor(None) inputs = fact_combinor.process(inputs) self.assertEqual(1, len(inputs.get_generated_facts()))
class TestSentenceComparator(unittest.TestCase): def test_get_content(self): sc = ConceptualCaptionsComparatorSubmodule(None) self.empty_input = Inputs() subjects = { Subject("elephant"), Subject("penguin"), Subject("lion"), Subject("raccoon") } inputs = self.empty_input.add_subjects(subjects) sc.setup_processing(inputs) contents = sc.get_contents("elephant") self.assertEqual(3748, len(contents)) contents = sc.get_contents("penguin") self.assertEqual(1273, len(contents)) contents = sc.get_contents("lion") self.assertEqual(2616, len(contents)) contents = sc.get_contents("raccoon") self.assertEqual(365, len(contents)) def test_conceptual_caption(self): sc = ConceptualCaptionsComparatorSubmodule(None) self.empty_input = Inputs() self.dummy_reference = ReferencableInterface("DUMMY") dataset = [("elephant", "download", "baby", 0), ("elephant", "have", "tusks", 1), ("lion", "eat", "gazella", 0), ("penguin", "eat", "fish", 0), ("gorilla", "eat", "banana", 0), ("sky", "hasProperty", "blue", 0), ("computer", "is", "working", 1), ("raccoon", "hasProperty", "blue", 0)] subjects = { Subject("elephant"), Subject("penguin"), Subject("lion"), Subject("gorilla"), Subject("sky"), Subject("computer"), Subject("raccoon") } gfs = [] pos = 0 for subject, predicate, obj, truth in dataset: pos += 1 score = MultipleScore() if pos % 2 == 0: score.add_score( truth, self.dummy_reference, GoogleAutocompleteSubmodule(self.dummy_reference)) else: score.add_score( truth, self.dummy_reference, BingAutocompleteSubmodule(self.dummy_reference)) gfs.append( GeneratedFact(subject, predicate, obj, "", False, score, MultipleSourceOccurrence())) score2 = MultipleScore() score2.add_score(1, self.dummy_reference, GoogleAutocompleteSubmodule(self.dummy_reference)) gfs.append( GeneratedFact( "elephant", "be", "big", "", False, score2, MultipleSourceOccurrence.from_raw("elephants are big", None, 1))) inputs = self.empty_input.add_generated_facts(gfs).add_subjects( subjects) inputs = sc.process(inputs) self.assertEqual(len(dataset) + 1, len(inputs.get_generated_facts())) self.assertEqual( len(inputs.get_generated_facts()[0].get_score().scores), 2) self.assertNotAlmostEqual( inputs.get_generated_facts()[1].get_score().scores[1][0], 0, delta=1e-5)
class TestCleaningPredicate(unittest.TestCase): def setUp(self) -> None: self.cleaning_predicate = CleaningPredicateSubmodule(None) self.empty_input = Inputs() def test_so(self): generated_fact = GeneratedFact("test", "is so", "nothing", "", False, 0.0, MultipleSourceOccurrence()) inputs = self.empty_input.add_generated_facts([generated_fact]) inputs = self.cleaning_predicate.process(inputs) generated_facts = inputs.get_generated_facts() self.assertEqual(1, len(generated_facts)) self.assertEqual("is", generated_facts[0].get_predicate().get()) def test_xbox(self): generated_fact = GeneratedFact("test", "xbox", "nothing", "", False, 0.0, MultipleSourceOccurrence()) inputs = self.empty_input.add_generated_facts([generated_fact]) inputs = self.cleaning_predicate.process(inputs) generated_facts = inputs.get_generated_facts() self.assertEqual(0, len(generated_facts)) def test_no_change(self): generated_fact = GeneratedFact("test", "is", "nothing", "", False, 0.0, MultipleSourceOccurrence()) inputs = self.empty_input.add_generated_facts([generated_fact]) inputs = self.cleaning_predicate.process(inputs) generated_facts = inputs.get_generated_facts() self.assertEqual(1, len(generated_facts)) self.assertEqual("is", generated_facts[0].get_predicate().get()) def test_no_verb(self): generated_fact = GeneratedFact("test", "table", "nothing", "", False, 0.0, MultipleSourceOccurrence()) inputs = self.empty_input.add_generated_facts([generated_fact]) inputs = self.cleaning_predicate.process(inputs) generated_facts = inputs.get_generated_facts() print(generated_facts) self.assertEqual(0, len(generated_facts)) def test_no_verb2(self): generated_fact = GeneratedFact("wall", "clock", "yellow", "", False, 0.0, MultipleSourceOccurrence()) inputs = self.empty_input.add_generated_facts([generated_fact]) inputs = self.cleaning_predicate.process(inputs) generated_facts = inputs.get_generated_facts() print(generated_facts) self.assertEqual(0, len(generated_facts)) def test_conjugated_verb(self): generated_fact = GeneratedFact("elephant", "going", "nowhere", "", False, 0.0, MultipleSourceOccurrence()) inputs = self.empty_input.add_generated_facts([generated_fact]) inputs = self.cleaning_predicate.process(inputs) generated_facts = inputs.get_generated_facts() self.assertEqual(1, len(generated_facts)) def test_conjugated_verb2(self): generated_fact = GeneratedFact("elephant", "go", "nowhere", "", False, 0.0, MultipleSourceOccurrence()) inputs = self.empty_input.add_generated_facts([generated_fact]) inputs = self.cleaning_predicate.process(inputs) generated_facts = inputs.get_generated_facts() self.assertEqual(1, len(generated_facts)) def test_conjugated_verb3(self): generated_fact = GeneratedFact("elephant", "goes", "nowhere", "", False, 0.0, MultipleSourceOccurrence()) inputs = self.empty_input.add_generated_facts([generated_fact]) inputs = self.cleaning_predicate.process(inputs) generated_facts = inputs.get_generated_facts() self.assertEqual(1, len(generated_facts)) def test_not_digest(self): generated_fact = GeneratedFact("elephant", "not digests", "fruits", "", False, 0.0, MultipleSourceOccurrence()) inputs = self.empty_input.add_generated_facts([generated_fact]) inputs = self.cleaning_predicate.process(inputs) generated_facts = inputs.get_generated_facts() self.assertEqual(1, len(generated_facts)) self.assertEqual(generated_facts[0].get_predicate().get(), "digests") self.assertTrue(generated_facts[0].is_negative()) def test_empty_predicate(self): generated_fact = GeneratedFact("elephant", "", "fruits", "", False, 0.0, MultipleSourceOccurrence()) inputs = self.empty_input.add_generated_facts([generated_fact]) inputs = self.cleaning_predicate.process(inputs) generated_facts = inputs.get_generated_facts() self.assertEqual(0, len(generated_facts)) def test_has_beach(self): generated_fact = GeneratedFact("beach", "has", "sand", "", False, 0.0, MultipleSourceOccurrence()) inputs = self.empty_input.add_generated_facts([generated_fact]) inputs = self.cleaning_predicate.process(inputs) generated_facts = inputs.get_generated_facts() self.assertEqual(1, len(generated_facts))
def run_for_subject(subject): job = get_current_job() factory = DefaultSubmoduleFactory() submodule_generation_names = [ "google-autocomplete", "bing-autocomplete", "yahoo-questions", "answerscom-questions", "quora-questions", "reddit-questions", "fact-combinor", ] submodule_normalization_names = [ "lower-case", "tbc-cleaner", "only-subject", "filter-object", "no-personal", "singular-subject", "cleaning-predicate", "basic-modality", "present-continuous", "are-transformation", "can-transformation", "be-normalization", "identical-subj-obj", "present-conjugate" ] submodule_normalization_global_names = [ "similar-object-remover", "fact-combinor" ] submodule_validation_names = [ "google-book", "flickr-clusters", "imagetag", "wikipedia-cooccurrence", "simple-wikipedia-cooccurrence", "conceptual-captions", "what-questions" ] empty_input = Inputs() empty_input = empty_input.add_subjects({Subject(subject.lower())}) module_reference = ModuleReferenceInterface("") pattern_submodule = factory.get_submodule("manual-patterns-google", module_reference) empty_input = pattern_submodule.process(empty_input) result = [] result.append(dict()) result[-1]["step name"] = "Assertion Generation" result[-1]["steps"] = [] job.meta = result job.save_meta() generated_facts = [] for submodule_name in submodule_generation_names: submodule = factory.get_submodule(submodule_name, module_reference) begin_time = time.time() input_temp = submodule.process(empty_input) generated_facts += input_temp.get_generated_facts() step_info = dict() step_info["name"] = submodule.get_name() step_info["facts"] = [x.to_dict() for x in input_temp.get_generated_facts()] step_info["time"] = time.time() - begin_time result[-1]["steps"].append(step_info) job.meta = result job.save_meta() new_input = empty_input.add_generated_facts(generated_facts) result.append(dict()) result[-1]["step name"] = "Assertion Normalization" result[-1]["steps"] = [] for submodule_name in submodule_normalization_names: submodule = factory.get_submodule(submodule_name, module_reference) step_info = dict() begin_time = time.time() step_info["name"] = submodule.get_name() step_info["modifications"] = [] for generated_fact in new_input.get_generated_facts(): input_temp = empty_input.add_generated_facts([generated_fact]) input_temp = submodule.process(input_temp) if len(input_temp.get_generated_facts()) != 1 or input_temp.get_generated_facts()[0] != generated_fact: modification = { "from": generated_fact.to_dict(), "to": [x.to_dict() for x in input_temp.get_generated_facts()] } step_info["modifications"].append(modification) step_info["time"] = time.time() - begin_time result[-1]["steps"].append(step_info) job.meta = result job.save_meta() new_input = submodule.process(new_input) result.append(dict()) result[-1]["step name"] = "Assertion Normalization Global" result[-1]["steps"] = [] for submodule_name in submodule_normalization_global_names: submodule = factory.get_submodule(submodule_name, module_reference) begin_time = time.time() new_input = submodule.process(new_input) step_info = dict() step_info["name"] = submodule.get_name() step_info["facts"] = [x.to_dict() for x in new_input.get_generated_facts()] step_info["time"] = time.time() - begin_time result[-1]["steps"].append(step_info) job.meta = result job.save_meta() result.append(dict()) result[-1]["step name"] = "Assertion Validation" result[-1]["steps"] = [] begin_time = time.time() for submodule_name in submodule_validation_names: submodule = factory.get_submodule(submodule_name, module_reference) new_input = submodule.process(new_input) step_info = dict() step_info["name"] = "All validations" step_info["facts"] = [x.to_dict() for x in new_input.get_generated_facts()] step_info["time"] = time.time() - begin_time result[-1]["steps"].append(step_info) job.meta = result job.save_meta()
class TestBeNormalization(unittest.TestCase): def setUp(self) -> None: self.be_normalization = BeNormalizationSubmodule(None) self.empty_input = Inputs() def test_is_alone(self): generated_fact = GeneratedFact("test", "is", "nothing", "", False, 0.0, MultipleSourceOccurrence()) inputs = self.empty_input.add_generated_facts([generated_fact]) inputs = self.be_normalization.process(inputs) generated_facts = inputs.get_generated_facts() self.assertEqual(1, len(generated_facts)) self.assertEqual("be", generated_facts[0].get_predicate().get()) def test_are_alone(self): generated_fact = GeneratedFact("test", "are", "nothing", "", False, 0.0, MultipleSourceOccurrence()) inputs = self.empty_input.add_generated_facts([generated_fact]) inputs = self.be_normalization.process(inputs) generated_facts = inputs.get_generated_facts() self.assertEqual(1, len(generated_facts)) self.assertEqual("be", generated_facts[0].get_predicate().get()) def test_were_alone(self): generated_fact = GeneratedFact("test", "were", "nothing", "", False, 0.0, MultipleSourceOccurrence()) inputs = self.empty_input.add_generated_facts([generated_fact]) inputs = self.be_normalization.process(inputs) generated_facts = inputs.get_generated_facts() self.assertEqual(1, len(generated_facts)) self.assertEqual("was", generated_facts[0].get_predicate().get()) def test_is_not_alone(self): generated_fact = GeneratedFact("test", "is adapted", "nothing", "", False, 0.0, MultipleSourceOccurrence()) inputs = self.empty_input.add_generated_facts([generated_fact]) inputs = self.be_normalization.process(inputs) generated_facts = inputs.get_generated_facts() self.assertEqual(1, len(generated_facts)) self.assertEqual("be adapted", generated_facts[0].get_predicate().get()) def test_are_not_alone(self): generated_fact = GeneratedFact("test", "are adapted", "nothing", "", False, 0.0, MultipleSourceOccurrence()) inputs = self.empty_input.add_generated_facts([generated_fact]) inputs = self.be_normalization.process(inputs) generated_facts = inputs.get_generated_facts() self.assertEqual(1, len(generated_facts)) self.assertEqual("be adapted", generated_facts[0].get_predicate().get()) def test_were_not_alone(self): generated_fact = GeneratedFact("test", "were adapted", "nothing", "", False, 0.0, MultipleSourceOccurrence()) inputs = self.empty_input.add_generated_facts([generated_fact]) inputs = self.be_normalization.process(inputs) generated_facts = inputs.get_generated_facts() self.assertEqual(1, len(generated_facts)) self.assertEqual("was adapted", generated_facts[0].get_predicate().get()) def test_no_change(self): generated_fact = GeneratedFact("test", "adapted", "nothing", "", False, 0.0, MultipleSourceOccurrence()) inputs = self.empty_input.add_generated_facts([generated_fact]) inputs = self.be_normalization.process(inputs) generated_facts = inputs.get_generated_facts() self.assertEqual(1, len(generated_facts)) self.assertEqual("adapted", generated_facts[0].get_predicate().get())
class TestAreTransformation(unittest.TestCase): def setUp(self) -> None: dummy_reference = ReferencableInterface("Dummy reference") self.openie_fact_generator = OpenIEFactGeneratorSubmodule( dummy_reference) self.openie_fact_generator._name = "OPENIE" # Dummy name only useful for testing self.empty_input = Inputs() self.are_transformation = AreTransformationSubmodule(None) def test_color(self): suggestion = ("why are pandas black", 1.0, None, "panda") new_gfs = self.openie_fact_generator.get_generated_facts([suggestion]) inputs = self.empty_input.add_generated_facts(new_gfs) inputs = self.are_transformation.process(inputs) self.assertEqual(1, len(inputs.get_generated_facts())) self.assertEqual(inputs.get_generated_facts()[0].get_predicate(), "has_color") def test_color1(self): suggestion = ("why are pandas white", 1.0, None, "panda") new_gfs = self.openie_fact_generator.get_generated_facts([suggestion]) inputs = self.empty_input.add_generated_facts(new_gfs) inputs = self.are_transformation.process(inputs) self.assertEqual(1, len(inputs.get_generated_facts())) self.assertEqual(inputs.get_generated_facts()[0].get_predicate(), "has_color") def test_body(self): suggestion = ("why do pandas have hands", 1.0, None, "panda") new_gfs = self.openie_fact_generator.get_generated_facts([suggestion]) inputs = self.empty_input.add_generated_facts(new_gfs) inputs = self.are_transformation.process(inputs) self.assertEqual(1, len(inputs.get_generated_facts())) self.assertEqual(inputs.get_generated_facts()[0].get_predicate(), "has_body_part") def test_body2(self): suggestion = ("why do elephants have trunks", 1.0, None, "elephant") new_gfs = self.openie_fact_generator.get_generated_facts([suggestion]) inputs = self.empty_input.add_generated_facts(new_gfs) inputs = self.are_transformation.process(inputs) self.assertEqual(1, len(inputs.get_generated_facts())) self.assertEqual(inputs.get_generated_facts()[0].get_predicate(), "has_body_part") def test_trait(self): suggestion = ("why are pandas nice", 1.0, PatternGoogle("why are", "has_property", 1.0), "panda") new_gfs = self.openie_fact_generator.get_generated_facts([suggestion]) inputs = self.empty_input.add_generated_facts(new_gfs) inputs = self.are_transformation.process(inputs) self.assertEqual(1, len(inputs.get_generated_facts())) self.assertEqual(inputs.get_generated_facts()[0].get_predicate(), "has_trait") def test_property(self): suggestion = ("why are pandas dead", 1.0, PatternGoogle("why are", "has_property", 1.0), "panda") new_gfs = self.openie_fact_generator.get_generated_facts([suggestion]) inputs = self.empty_input.add_generated_facts(new_gfs) inputs = self.are_transformation.process(inputs) self.assertEqual(1, len(inputs.get_generated_facts())) self.assertEqual(inputs.get_generated_facts()[0].get_predicate(), "has_property") def test_property_no_relation(self): suggestion = ("why are pandas dead", 1.0, PatternGoogle("why are"), "panda") new_gfs = self.openie_fact_generator.get_generated_facts([suggestion]) inputs = self.empty_input.add_generated_facts(new_gfs) inputs = self.are_transformation.process(inputs) self.assertEqual(1, len(inputs.get_generated_facts())) self.assertEqual(inputs.get_generated_facts()[0].get_predicate(), "has_property") def test_quick(self): suggestion = ("why are pandas quick", 1.0, PatternGoogle("why are", "has_property", 1.0), "panda") new_gfs = self.openie_fact_generator.get_generated_facts([suggestion]) inputs = self.empty_input.add_generated_facts(new_gfs) inputs = self.are_transformation.process(inputs) self.assertEqual(1, len(inputs.get_generated_facts())) self.assertEqual(inputs.get_generated_facts()[0].get_predicate(), "has_movement") def test_ignore(self): suggestion = ("why do pandas eat bamboo", 1.0, PatternGoogle("why do", "has_property", 1.0), "panda") new_gfs = self.openie_fact_generator.get_generated_facts([suggestion]) inputs = self.empty_input.add_generated_facts(new_gfs) inputs = self.are_transformation.process(inputs) self.assertEqual(1, len(inputs.get_generated_facts())) self.assertEqual(inputs.get_generated_facts()[0].get_predicate(), "eat")