示例#1
0
 def setUp(self) -> None:
     dummy_reference = ReferencableInterface("Dummy reference")
     self.openie_fact_generator = OpenIEFactGeneratorSubmodule(
         dummy_reference)
     self.openie_fact_generator._name = "OPENIE"  # Dummy name only useful for testing
     self.empty_input = Inputs()
     self.basic_modality = BasicModalitySubmodule(None)
示例#2
0
 def setUp(self) -> None:
     dummy_reference = ReferencableInterface("Dummy reference")
     self.openie_fact_generator = OpenIEFactGeneratorSubmodule(
         dummy_reference)
     self.openie_fact_generator._name = "OPENIE"  # Dummy name only useful for testing
     self.empty_input = Inputs()
     self.are_transformation = AreTransformationSubmodule(None)
示例#3
0
 def setUp(self) -> None:
     dummy_reference = ReferencableInterface("Dummy reference")
     self.openie_fact_generator = OpenIEFactGeneratorSubmodule(
         dummy_reference)
     self.openie_fact_generator._name = "OPENIE"  # Dummy name only useful for testing
     self.empty_input = Inputs()
     self.associations = ImagetagSubmodule(None)
     self.associations_flick_cluster = FlickrClustersSubmodule(None)
示例#4
0
    def test_conceptual_caption(self):
        sc = ConceptualCaptionsComparatorSubmodule(None)
        self.empty_input = Inputs()
        self.dummy_reference = ReferencableInterface("DUMMY")

        dataset = [("elephant", "download", "baby", 0),
                   ("elephant", "have", "tusks", 1),
                   ("lion", "eat", "gazella", 0),
                   ("penguin", "eat", "fish", 0),
                   ("gorilla", "eat", "banana", 0),
                   ("sky", "hasProperty", "blue", 0),
                   ("computer", "is", "working", 1),
                   ("raccoon", "hasProperty", "blue", 0)]
        subjects = {
            Subject("elephant"),
            Subject("penguin"),
            Subject("lion"),
            Subject("gorilla"),
            Subject("sky"),
            Subject("computer"),
            Subject("raccoon")
        }

        gfs = []
        pos = 0
        for subject, predicate, obj, truth in dataset:
            pos += 1
            score = MultipleScore()
            if pos % 2 == 0:
                score.add_score(
                    truth, self.dummy_reference,
                    GoogleAutocompleteSubmodule(self.dummy_reference))
            else:
                score.add_score(
                    truth, self.dummy_reference,
                    BingAutocompleteSubmodule(self.dummy_reference))
            gfs.append(
                GeneratedFact(subject, predicate, obj, "", False, score,
                              MultipleSourceOccurrence()))
        score2 = MultipleScore()
        score2.add_score(1, self.dummy_reference,
                         GoogleAutocompleteSubmodule(self.dummy_reference))
        gfs.append(
            GeneratedFact(
                "elephant", "be", "big", "", False, score2,
                MultipleSourceOccurrence.from_raw("elephants are big", None,
                                                  1)))
        inputs = self.empty_input.add_generated_facts(gfs).add_subjects(
            subjects)
        inputs = sc.process(inputs)
        self.assertEqual(len(dataset) + 1, len(inputs.get_generated_facts()))
        self.assertEqual(
            len(inputs.get_generated_facts()[0].get_score().scores), 2)
        self.assertNotAlmostEqual(
            inputs.get_generated_facts()[1].get_score().scores[1][0],
            0,
            delta=1e-5)
示例#5
0
 def setUp(self) -> None:
     self.autocomplete = BingAutocompleteSubmodule(None,
                                                   use_cache=False,
                                                   look_new=True)
     self.autocomplete_cache = BingAutocompleteSubmodule(
         None,
         use_cache=True,
         cache_name="google-cache-test",
         look_new=True)
     self.empty_input = Inputs()
示例#6
0
 def test_save(self):
     inputs = Inputs()
     subjects = [Subject("baba"), Subject("coko")]
     patterns = [
         PatternGoogle("why are"),
         PatternGoogle("Why are", "hasProperty", True)
     ]
     mmr = MultipleModuleReference(ModuleReferenceInterface("Module0"))
     mmr.add_reference(ModuleReferenceInterface("Module1"))
     msr = MultipleSubmoduleReference(
         SubmoduleReferenceInterface("Submodule0"))
     msr.add_reference(SubmoduleReferenceInterface("Submodule0"))
     ms0 = MultipleScore()
     ms0.add_score(1.0, ModuleReferenceInterface("Module0"),
                   SubmoduleReferenceInterface("Submodule0"))
     ms1 = MultipleScore()
     ms1.add_score(1.0, mmr, msr)
     ms1.add_score(0.5, ModuleReferenceInterface("Module1"),
                   SubmoduleReferenceInterface("Submodule2"))
     mp0 = MultiplePattern()
     mp0.add_pattern(patterns[0])
     mp1 = MultiplePattern()
     mp1.add_pattern(patterns[0])
     mp1.add_pattern(patterns[1])
     gfs = [
         GeneratedFact(
             "baba", "is", "you", "sometimes", False, ms0,
             MultipleSourceOccurrence.from_raw("baba is you", msr, 1), mp0),
         GeneratedFact(
             "coko", "is", "dead", "always", True, ms1,
             MultipleSourceOccurrence.from_raw("toto is always dead", msr,
                                               1), mp1)
     ]
     seeds = [
         Fact("baba", "is", "us", None, False),
         Fact("coko", "are", "missing", "coucou", True)
     ]
     objects = [Object("missing"), Object("you")]
     inputs = inputs.replace_seeds(seeds)
     inputs = inputs.replace_patterns(patterns)
     inputs = inputs.replace_subjects(subjects)
     inputs = inputs.replace_generated_facts(gfs)
     inputs = inputs.replace_objects(objects)
     inputs.save("temp.json")
     inputs_read = inputs.load("temp.json")
     self.assertEqual(len(inputs.get_generated_facts()),
                      len(inputs_read.get_generated_facts()))
     self.assertEqual(len(inputs.get_subjects()),
                      len(inputs_read.get_generated_facts()))
     self.assertEqual(len(inputs.get_patterns()),
                      len(inputs_read.get_patterns()))
     self.assertEqual(len(inputs.get_seeds()), len(inputs_read.get_seeds()))
     self.assertEqual(len(inputs.get_objects()),
                      len(inputs_read.get_objects()))
示例#7
0
 def test_not_remove(self):
     inputs = Inputs()
     mso = MultipleSourceOccurrence()
     mso.add_raw("elephants eat big bananas", None, 2)
     gfs = [
         GeneratedFact("elephant", "eat", "bananas", "TBC[big bananas]", 0,
                       MultipleScore(), mso)
     ]
     inputs = inputs.add_generated_facts(gfs)
     tbc_cleaner = TBCCleaner(None)
     inputs = tbc_cleaner.process(inputs)
     self.assertEqual(len(inputs.get_generated_facts()), 1)
示例#8
0
 def process(self, input_interface):
     logging.info("Start the Pattern Generation module")
     new_inputs = []
     for submodule in self._submodules:
         new_inputs.append(submodule.process(input_interface))
     new_patterns = input_interface.get_patterns()
     for inputs in new_inputs:
         new_patterns += inputs.get_patterns()
     return Inputs(input_interface.get_seeds(),
                   new_patterns,
                   input_interface.get_generated_facts(),
                   input_interface.get_subjects(),
                   input_interface.get_objects())
示例#9
0
 def test_article(self):
     gfs = [
         GeneratedFact("bee", "make", "hive", "", False, 0.1,
                       MultipleSourceOccurrence()),
         GeneratedFact("bee", "make", "a hive", "", False, 0.1,
                       MultipleSourceOccurrence())
     ]
     inputs = Inputs()
     inputs = inputs.add_generated_facts(gfs)
     remover = SimilarObjectRemover(None)
     inputs = remover.process(inputs)
     self.assertEqual(len(inputs.get_generated_facts()), 2)
     self.assertEqual(
         len(
             set([
                 x.get_object().get() for x in inputs.get_generated_facts()
             ])), 1)
示例#10
0
 def test_get_content(self):
     sc = ConceptualCaptionsComparatorSubmodule(None)
     self.empty_input = Inputs()
     subjects = {
         Subject("elephant"),
         Subject("penguin"),
         Subject("lion"),
         Subject("raccoon")
     }
     inputs = self.empty_input.add_subjects(subjects)
     sc.setup_processing(inputs)
     contents = sc.get_contents("elephant")
     self.assertEqual(3748, len(contents))
     contents = sc.get_contents("penguin")
     self.assertEqual(1273, len(contents))
     contents = sc.get_contents("lion")
     self.assertEqual(2616, len(contents))
     contents = sc.get_contents("raccoon")
     self.assertEqual(365, len(contents))
示例#11
0
 def setUp(self) -> None:
     self.simple_wikipedia_no_cache = SimpleWikipediaCooccurrenceSubmodule(None, False)
     self.empty_input = Inputs()
示例#12
0
 def setUp(self) -> None:
     dummy_reference = ReferencableInterface("Dummy reference")
     self.openie_fact_generator = OpenIEFactGeneratorSubmodule(dummy_reference)
     self.openie_fact_generator.statement_maker = StatementMaker(use_cache=False)
     self.openie_fact_generator._name = "OPENIE"  # Dummy name only useful for testing
     self.empty_input = Inputs()
示例#13
0
 def generate_input(self):
     # just give an empty input to the seed module
     return self._seeds.process(Inputs([], [], [], [], []))
示例#14
0
 def setUp(self) -> None:
     self.google_book_no_cache = GoogleBookSubmodule(None, False)
     self.empty_input = Inputs()
示例#15
0
 def generate_input(self):
     # just give an empty input to the seed module
     empty_input = Inputs()
     return empty_input.add_subjects({Subject("elephant")})
示例#16
0
 def setUp(self) -> None:
     self.to_lower_case = ToLowerCaseSubmodule(None)
     self.empty_input = Inputs()
示例#17
0
def run_for_subject(subject):
    job = get_current_job()

    factory = DefaultSubmoduleFactory()

    submodule_generation_names = [
        "google-autocomplete",
        "bing-autocomplete",
        "yahoo-questions",
        "answerscom-questions",
        "quora-questions",
        "reddit-questions",
        "fact-combinor",
    ]

    submodule_normalization_names = [
        "lower-case",
        "tbc-cleaner",
        "only-subject",
        "filter-object",
        "no-personal",
        "singular-subject",
        "cleaning-predicate",
        "basic-modality",
        "present-continuous",
        "are-transformation",
        "can-transformation",
        "be-normalization",
        "identical-subj-obj",
        "present-conjugate"
    ]

    submodule_normalization_global_names = [
        "similar-object-remover",
        "fact-combinor"
    ]

    submodule_validation_names = [
        "google-book",
        "flickr-clusters",
        "imagetag",
        "wikipedia-cooccurrence",
        "simple-wikipedia-cooccurrence",
        "conceptual-captions",
        "what-questions"
    ]

    empty_input = Inputs()
    empty_input = empty_input.add_subjects({Subject(subject.lower())})

    module_reference = ModuleReferenceInterface("")

    pattern_submodule = factory.get_submodule("manual-patterns-google", module_reference)
    empty_input = pattern_submodule.process(empty_input)

    result = []

    result.append(dict())
    result[-1]["step name"] = "Assertion Generation"
    result[-1]["steps"] = []
    job.meta = result
    job.save_meta()
    generated_facts = []
    for submodule_name in submodule_generation_names:
        submodule = factory.get_submodule(submodule_name, module_reference)
        begin_time = time.time()
        input_temp = submodule.process(empty_input)
        generated_facts += input_temp.get_generated_facts()
        step_info = dict()
        step_info["name"] = submodule.get_name()
        step_info["facts"] = [x.to_dict() for x in input_temp.get_generated_facts()]
        step_info["time"] = time.time() - begin_time
        result[-1]["steps"].append(step_info)
        job.meta = result
        job.save_meta()
    new_input = empty_input.add_generated_facts(generated_facts)

    result.append(dict())
    result[-1]["step name"] = "Assertion Normalization"
    result[-1]["steps"] = []
    for submodule_name in submodule_normalization_names:
        submodule = factory.get_submodule(submodule_name, module_reference)
        step_info = dict()
        begin_time = time.time()
        step_info["name"] = submodule.get_name()
        step_info["modifications"] = []
        for generated_fact in new_input.get_generated_facts():
            input_temp = empty_input.add_generated_facts([generated_fact])
            input_temp = submodule.process(input_temp)
            if len(input_temp.get_generated_facts()) != 1 or input_temp.get_generated_facts()[0] != generated_fact:
                modification = {
                    "from": generated_fact.to_dict(),
                    "to": [x.to_dict() for x in input_temp.get_generated_facts()]
                }
                step_info["modifications"].append(modification)
        step_info["time"] = time.time() - begin_time
        result[-1]["steps"].append(step_info)
        job.meta = result
        job.save_meta()
        new_input = submodule.process(new_input)

    result.append(dict())
    result[-1]["step name"] = "Assertion Normalization Global"
    result[-1]["steps"] = []
    for submodule_name in submodule_normalization_global_names:
        submodule = factory.get_submodule(submodule_name, module_reference)
        begin_time = time.time()
        new_input = submodule.process(new_input)
        step_info = dict()
        step_info["name"] = submodule.get_name()
        step_info["facts"] = [x.to_dict() for x in new_input.get_generated_facts()]
        step_info["time"] = time.time() - begin_time
        result[-1]["steps"].append(step_info)
        job.meta = result
        job.save_meta()

    result.append(dict())
    result[-1]["step name"] = "Assertion Validation"
    result[-1]["steps"] = []
    begin_time = time.time()
    for submodule_name in submodule_validation_names:
        submodule = factory.get_submodule(submodule_name, module_reference)
        new_input = submodule.process(new_input)
    step_info = dict()
    step_info["name"] = "All validations"
    step_info["facts"] = [x.to_dict() for x in new_input.get_generated_facts()]
    step_info["time"] = time.time() - begin_time
    result[-1]["steps"].append(step_info)
    job.meta = result
    job.save_meta()
示例#18
0
 def setUp(self):
     self.all_seeds_module = AllSeedsModule()
     empty_input = Inputs()
     self.inputs = self.all_seeds_module.process(empty_input)
示例#19
0
 def setUp(self) -> None:
     self.identical = IdenticalSubjectObjectSubmodule(None)
     self.empty_input = Inputs()
 def setUp(self) -> None:
     self.dummy_reference = ReferencableInterface("Dummy reference")
     self.linear_combination = LinearCombinationWeightedSubmodule(
         self.dummy_reference)
     self.empty_input = Inputs()
示例#21
0
 def setUp(self) -> None:
     self.can_transformation = CanTransformationSubmodule(None)
     self.empty_input = Inputs()
示例#22
0
 def setUp(self) -> None:
     self.animal_submodule = AnimalSubmodule(None)
     self.animal_module = AnimalSeedModule()
     self.empty_input_interface = Inputs()
示例#23
0
 def setUp(self) -> None:
     self.quora = QuoraQuestionsSubmodule(None)
     self.empty_input = Inputs()
示例#24
0
 def setUp(self) -> None:
     self.to_singular = ToSingularSubjectSubmodule(None)
     self.empty_input = Inputs()
示例#25
0
 def setUp(self) -> None:
     self.cleaning_predicate = CleaningPredicateSubmodule(None)
     self.empty_input = Inputs()
示例#26
0
 def setUp(self) -> None:
     self.present_conjugate = PresentConjugateNormalization(None)
     self.empty_input = Inputs()
示例#27
0
                subject = second_collection_element
            else:
                pattern = second_collection_element
                subject = first_collection_element
            # Generate the query
            base_query = pattern.to_str_subject(subject)
            base_sentences = []
            # Artificially add more suggestions
            to_process = [[]]
            yield base_query + " "


# Generate inputs
default_module_factory = DefaultModuleFactory()
seeds = default_module_factory.get_module("all-seeds")
inputs = Inputs([], [], [], [], [])
inputs = seeds.process(inputs)
patterns = default_module_factory.get_module("patterns")
inputs = patterns.process(inputs)

# Get query generator
query_generator = get_all_queries(inputs)

query_queue = Queue()
for query in query_generator:
    query_queue.put(query)

# Start Server
app = Flask(__name__)

示例#28
0
 def setUp(self) -> None:
     self.cleaning_predicate = FilterObjectSubmodule(None)
     self.empty_input = Inputs()
示例#29
0
 def setUp(self) -> None:
     self.be_normalization = BeNormalizationSubmodule(None)
     self.empty_input = Inputs()
示例#30
0
 def setUp(self) -> None:
     self.present_continuous = PresentContinuousSubmodule(None)
     self.empty_input = Inputs()