def setUp(self) -> None: dummy_reference = ReferencableInterface("Dummy reference") self.openie_fact_generator = OpenIEFactGeneratorSubmodule( dummy_reference) self.openie_fact_generator._name = "OPENIE" # Dummy name only useful for testing self.empty_input = Inputs() self.basic_modality = BasicModalitySubmodule(None)
def setUp(self) -> None: dummy_reference = ReferencableInterface("Dummy reference") self.openie_fact_generator = OpenIEFactGeneratorSubmodule( dummy_reference) self.openie_fact_generator._name = "OPENIE" # Dummy name only useful for testing self.empty_input = Inputs() self.are_transformation = AreTransformationSubmodule(None)
def setUp(self) -> None: dummy_reference = ReferencableInterface("Dummy reference") self.openie_fact_generator = OpenIEFactGeneratorSubmodule( dummy_reference) self.openie_fact_generator._name = "OPENIE" # Dummy name only useful for testing self.empty_input = Inputs() self.associations = ImagetagSubmodule(None) self.associations_flick_cluster = FlickrClustersSubmodule(None)
def test_conceptual_caption(self): sc = ConceptualCaptionsComparatorSubmodule(None) self.empty_input = Inputs() self.dummy_reference = ReferencableInterface("DUMMY") dataset = [("elephant", "download", "baby", 0), ("elephant", "have", "tusks", 1), ("lion", "eat", "gazella", 0), ("penguin", "eat", "fish", 0), ("gorilla", "eat", "banana", 0), ("sky", "hasProperty", "blue", 0), ("computer", "is", "working", 1), ("raccoon", "hasProperty", "blue", 0)] subjects = { Subject("elephant"), Subject("penguin"), Subject("lion"), Subject("gorilla"), Subject("sky"), Subject("computer"), Subject("raccoon") } gfs = [] pos = 0 for subject, predicate, obj, truth in dataset: pos += 1 score = MultipleScore() if pos % 2 == 0: score.add_score( truth, self.dummy_reference, GoogleAutocompleteSubmodule(self.dummy_reference)) else: score.add_score( truth, self.dummy_reference, BingAutocompleteSubmodule(self.dummy_reference)) gfs.append( GeneratedFact(subject, predicate, obj, "", False, score, MultipleSourceOccurrence())) score2 = MultipleScore() score2.add_score(1, self.dummy_reference, GoogleAutocompleteSubmodule(self.dummy_reference)) gfs.append( GeneratedFact( "elephant", "be", "big", "", False, score2, MultipleSourceOccurrence.from_raw("elephants are big", None, 1))) inputs = self.empty_input.add_generated_facts(gfs).add_subjects( subjects) inputs = sc.process(inputs) self.assertEqual(len(dataset) + 1, len(inputs.get_generated_facts())) self.assertEqual( len(inputs.get_generated_facts()[0].get_score().scores), 2) self.assertNotAlmostEqual( inputs.get_generated_facts()[1].get_score().scores[1][0], 0, delta=1e-5)
def setUp(self) -> None: self.autocomplete = BingAutocompleteSubmodule(None, use_cache=False, look_new=True) self.autocomplete_cache = BingAutocompleteSubmodule( None, use_cache=True, cache_name="google-cache-test", look_new=True) self.empty_input = Inputs()
def test_save(self): inputs = Inputs() subjects = [Subject("baba"), Subject("coko")] patterns = [ PatternGoogle("why are"), PatternGoogle("Why are", "hasProperty", True) ] mmr = MultipleModuleReference(ModuleReferenceInterface("Module0")) mmr.add_reference(ModuleReferenceInterface("Module1")) msr = MultipleSubmoduleReference( SubmoduleReferenceInterface("Submodule0")) msr.add_reference(SubmoduleReferenceInterface("Submodule0")) ms0 = MultipleScore() ms0.add_score(1.0, ModuleReferenceInterface("Module0"), SubmoduleReferenceInterface("Submodule0")) ms1 = MultipleScore() ms1.add_score(1.0, mmr, msr) ms1.add_score(0.5, ModuleReferenceInterface("Module1"), SubmoduleReferenceInterface("Submodule2")) mp0 = MultiplePattern() mp0.add_pattern(patterns[0]) mp1 = MultiplePattern() mp1.add_pattern(patterns[0]) mp1.add_pattern(patterns[1]) gfs = [ GeneratedFact( "baba", "is", "you", "sometimes", False, ms0, MultipleSourceOccurrence.from_raw("baba is you", msr, 1), mp0), GeneratedFact( "coko", "is", "dead", "always", True, ms1, MultipleSourceOccurrence.from_raw("toto is always dead", msr, 1), mp1) ] seeds = [ Fact("baba", "is", "us", None, False), Fact("coko", "are", "missing", "coucou", True) ] objects = [Object("missing"), Object("you")] inputs = inputs.replace_seeds(seeds) inputs = inputs.replace_patterns(patterns) inputs = inputs.replace_subjects(subjects) inputs = inputs.replace_generated_facts(gfs) inputs = inputs.replace_objects(objects) inputs.save("temp.json") inputs_read = inputs.load("temp.json") self.assertEqual(len(inputs.get_generated_facts()), len(inputs_read.get_generated_facts())) self.assertEqual(len(inputs.get_subjects()), len(inputs_read.get_generated_facts())) self.assertEqual(len(inputs.get_patterns()), len(inputs_read.get_patterns())) self.assertEqual(len(inputs.get_seeds()), len(inputs_read.get_seeds())) self.assertEqual(len(inputs.get_objects()), len(inputs_read.get_objects()))
def test_not_remove(self): inputs = Inputs() mso = MultipleSourceOccurrence() mso.add_raw("elephants eat big bananas", None, 2) gfs = [ GeneratedFact("elephant", "eat", "bananas", "TBC[big bananas]", 0, MultipleScore(), mso) ] inputs = inputs.add_generated_facts(gfs) tbc_cleaner = TBCCleaner(None) inputs = tbc_cleaner.process(inputs) self.assertEqual(len(inputs.get_generated_facts()), 1)
def process(self, input_interface): logging.info("Start the Pattern Generation module") new_inputs = [] for submodule in self._submodules: new_inputs.append(submodule.process(input_interface)) new_patterns = input_interface.get_patterns() for inputs in new_inputs: new_patterns += inputs.get_patterns() return Inputs(input_interface.get_seeds(), new_patterns, input_interface.get_generated_facts(), input_interface.get_subjects(), input_interface.get_objects())
def test_article(self): gfs = [ GeneratedFact("bee", "make", "hive", "", False, 0.1, MultipleSourceOccurrence()), GeneratedFact("bee", "make", "a hive", "", False, 0.1, MultipleSourceOccurrence()) ] inputs = Inputs() inputs = inputs.add_generated_facts(gfs) remover = SimilarObjectRemover(None) inputs = remover.process(inputs) self.assertEqual(len(inputs.get_generated_facts()), 2) self.assertEqual( len( set([ x.get_object().get() for x in inputs.get_generated_facts() ])), 1)
def test_get_content(self): sc = ConceptualCaptionsComparatorSubmodule(None) self.empty_input = Inputs() subjects = { Subject("elephant"), Subject("penguin"), Subject("lion"), Subject("raccoon") } inputs = self.empty_input.add_subjects(subjects) sc.setup_processing(inputs) contents = sc.get_contents("elephant") self.assertEqual(3748, len(contents)) contents = sc.get_contents("penguin") self.assertEqual(1273, len(contents)) contents = sc.get_contents("lion") self.assertEqual(2616, len(contents)) contents = sc.get_contents("raccoon") self.assertEqual(365, len(contents))
def setUp(self) -> None: self.simple_wikipedia_no_cache = SimpleWikipediaCooccurrenceSubmodule(None, False) self.empty_input = Inputs()
def setUp(self) -> None: dummy_reference = ReferencableInterface("Dummy reference") self.openie_fact_generator = OpenIEFactGeneratorSubmodule(dummy_reference) self.openie_fact_generator.statement_maker = StatementMaker(use_cache=False) self.openie_fact_generator._name = "OPENIE" # Dummy name only useful for testing self.empty_input = Inputs()
def generate_input(self): # just give an empty input to the seed module return self._seeds.process(Inputs([], [], [], [], []))
def setUp(self) -> None: self.google_book_no_cache = GoogleBookSubmodule(None, False) self.empty_input = Inputs()
def generate_input(self): # just give an empty input to the seed module empty_input = Inputs() return empty_input.add_subjects({Subject("elephant")})
def setUp(self) -> None: self.to_lower_case = ToLowerCaseSubmodule(None) self.empty_input = Inputs()
def run_for_subject(subject): job = get_current_job() factory = DefaultSubmoduleFactory() submodule_generation_names = [ "google-autocomplete", "bing-autocomplete", "yahoo-questions", "answerscom-questions", "quora-questions", "reddit-questions", "fact-combinor", ] submodule_normalization_names = [ "lower-case", "tbc-cleaner", "only-subject", "filter-object", "no-personal", "singular-subject", "cleaning-predicate", "basic-modality", "present-continuous", "are-transformation", "can-transformation", "be-normalization", "identical-subj-obj", "present-conjugate" ] submodule_normalization_global_names = [ "similar-object-remover", "fact-combinor" ] submodule_validation_names = [ "google-book", "flickr-clusters", "imagetag", "wikipedia-cooccurrence", "simple-wikipedia-cooccurrence", "conceptual-captions", "what-questions" ] empty_input = Inputs() empty_input = empty_input.add_subjects({Subject(subject.lower())}) module_reference = ModuleReferenceInterface("") pattern_submodule = factory.get_submodule("manual-patterns-google", module_reference) empty_input = pattern_submodule.process(empty_input) result = [] result.append(dict()) result[-1]["step name"] = "Assertion Generation" result[-1]["steps"] = [] job.meta = result job.save_meta() generated_facts = [] for submodule_name in submodule_generation_names: submodule = factory.get_submodule(submodule_name, module_reference) begin_time = time.time() input_temp = submodule.process(empty_input) generated_facts += input_temp.get_generated_facts() step_info = dict() step_info["name"] = submodule.get_name() step_info["facts"] = [x.to_dict() for x in input_temp.get_generated_facts()] step_info["time"] = time.time() - begin_time result[-1]["steps"].append(step_info) job.meta = result job.save_meta() new_input = empty_input.add_generated_facts(generated_facts) result.append(dict()) result[-1]["step name"] = "Assertion Normalization" result[-1]["steps"] = [] for submodule_name in submodule_normalization_names: submodule = factory.get_submodule(submodule_name, module_reference) step_info = dict() begin_time = time.time() step_info["name"] = submodule.get_name() step_info["modifications"] = [] for generated_fact in new_input.get_generated_facts(): input_temp = empty_input.add_generated_facts([generated_fact]) input_temp = submodule.process(input_temp) if len(input_temp.get_generated_facts()) != 1 or input_temp.get_generated_facts()[0] != generated_fact: modification = { "from": generated_fact.to_dict(), "to": [x.to_dict() for x in input_temp.get_generated_facts()] } step_info["modifications"].append(modification) step_info["time"] = time.time() - begin_time result[-1]["steps"].append(step_info) job.meta = result job.save_meta() new_input = submodule.process(new_input) result.append(dict()) result[-1]["step name"] = "Assertion Normalization Global" result[-1]["steps"] = [] for submodule_name in submodule_normalization_global_names: submodule = factory.get_submodule(submodule_name, module_reference) begin_time = time.time() new_input = submodule.process(new_input) step_info = dict() step_info["name"] = submodule.get_name() step_info["facts"] = [x.to_dict() for x in new_input.get_generated_facts()] step_info["time"] = time.time() - begin_time result[-1]["steps"].append(step_info) job.meta = result job.save_meta() result.append(dict()) result[-1]["step name"] = "Assertion Validation" result[-1]["steps"] = [] begin_time = time.time() for submodule_name in submodule_validation_names: submodule = factory.get_submodule(submodule_name, module_reference) new_input = submodule.process(new_input) step_info = dict() step_info["name"] = "All validations" step_info["facts"] = [x.to_dict() for x in new_input.get_generated_facts()] step_info["time"] = time.time() - begin_time result[-1]["steps"].append(step_info) job.meta = result job.save_meta()
def setUp(self): self.all_seeds_module = AllSeedsModule() empty_input = Inputs() self.inputs = self.all_seeds_module.process(empty_input)
def setUp(self) -> None: self.identical = IdenticalSubjectObjectSubmodule(None) self.empty_input = Inputs()
def setUp(self) -> None: self.dummy_reference = ReferencableInterface("Dummy reference") self.linear_combination = LinearCombinationWeightedSubmodule( self.dummy_reference) self.empty_input = Inputs()
def setUp(self) -> None: self.can_transformation = CanTransformationSubmodule(None) self.empty_input = Inputs()
def setUp(self) -> None: self.animal_submodule = AnimalSubmodule(None) self.animal_module = AnimalSeedModule() self.empty_input_interface = Inputs()
def setUp(self) -> None: self.quora = QuoraQuestionsSubmodule(None) self.empty_input = Inputs()
def setUp(self) -> None: self.to_singular = ToSingularSubjectSubmodule(None) self.empty_input = Inputs()
def setUp(self) -> None: self.cleaning_predicate = CleaningPredicateSubmodule(None) self.empty_input = Inputs()
def setUp(self) -> None: self.present_conjugate = PresentConjugateNormalization(None) self.empty_input = Inputs()
subject = second_collection_element else: pattern = second_collection_element subject = first_collection_element # Generate the query base_query = pattern.to_str_subject(subject) base_sentences = [] # Artificially add more suggestions to_process = [[]] yield base_query + " " # Generate inputs default_module_factory = DefaultModuleFactory() seeds = default_module_factory.get_module("all-seeds") inputs = Inputs([], [], [], [], []) inputs = seeds.process(inputs) patterns = default_module_factory.get_module("patterns") inputs = patterns.process(inputs) # Get query generator query_generator = get_all_queries(inputs) query_queue = Queue() for query in query_generator: query_queue.put(query) # Start Server app = Flask(__name__)
def setUp(self) -> None: self.cleaning_predicate = FilterObjectSubmodule(None) self.empty_input = Inputs()
def setUp(self) -> None: self.be_normalization = BeNormalizationSubmodule(None) self.empty_input = Inputs()
def setUp(self) -> None: self.present_continuous = PresentContinuousSubmodule(None) self.empty_input = Inputs()