示例#1
0
 def _init_pipeline(self, folder, subject, candidates):
     pipeline = Pipeline(
         folder, {
             "embedder_partial_output_path":
             "embedder-tmp-" + slugify(subject) + ".tsv",
             "entailer_partial_output_path":
             "entailer-tmp-" + slugify(subject) + ".tsv",
         })
     kb = KnowledgeBase()
     for fact in list(self.subjects[subject]) + list(candidates.values()):
         copied_fact = copy.deepcopy(fact)
         copied_fact.index = len(kb) + 1
         kb[copied_fact.index] = copied_fact
     pipeline.set_kb(kb)
     return pipeline
示例#2
0
 def objective(args):
     trackers = list()
     for folder in self.folders:
         pipeline = Pipeline(
             folder,
             reduce(lambda x, y: dict(x, **y), (args, {
                 "log": ""
             })))
         pipeline.load()
         if feature == "evidence":
             pipeline.step_detective()
         if feature == "confidence":
             pipeline.step_assigner()
         tracker = Tracker()
         tracker.build(pipeline)
         trackers.append(tracker)
     PairEvaluator.FEATURE = feature
     return PairEvaluator(self.annotation_file, *trackers).evaluate()
示例#3
0
 def extend(self, subject, folder, verbose=True):
     if verbose:
         print("Extending '{}'".format(subject))
     candidates = self._generate_candidates(subject, verbose)
     if verbose:
         print("Adding", len(candidates), "facts.")
     if len(candidates) > 0:
         pipeline = self._init_pipeline(folder, subject, candidates)
         self._extend_inputs(subject, pipeline, verbose)
         tracker = self._extract_tracker(pipeline, verbose)
     else:
         pipeline = Pipeline(folder, dict())
         tracker = Tracker()
     if os.path.isfile("embedder-tmp-" + slugify(subject) + ".tsv"):
         os.system("rm " + "embedder-tmp-" + slugify(subject) + ".tsv")
     if os.path.isfile("entailer-tmp-" + slugify(subject) + ".tsv"):
         os.system("rm " + "entailer-tmp-" + slugify(subject) + ".tsv")
     return tracker
示例#4
0
 def objective(args):
     pipeline = Pipeline(inputs_folder, args)
     Parameters.process(**args)
     if feature == "evidence":
         pipeline.load_detective()
         # pipeline.step_detective()
     if feature == "confidence":
         bulk_pipeline = BulkPipeline(inputs_folder, partition)
         if os.path.isdir(BulkTuner.BULK_TUNER_FOLDER):
             shutil.rmtree(BulkTuner.BULK_TUNER_FOLDER)
         bulk_pipeline.process(BulkTuner.BULK_TUNER_FOLDER, int(n_jobs))
         del bulk_pipeline
         assignment = BulkGatherer(
             BulkTuner.BULK_TUNER_FOLDER).gather(False)
         pipeline.set_assignment(assignment)
     tracker = Tracker()
     tracker.build(pipeline)
     PairEvaluator.FEATURE = feature
     PairEvaluator.CONFIDENCE = .5
     print(PairEvaluator(self.annotation_file, tracker).evaluate(True))
     return PairEvaluator(self.annotation_file, tracker).evaluate()
示例#5
0
 def __call__(self, worker):
     if os.path.isfile(self.path("assignment.tsv")):
         return None
     if worker.verbose:
         print(os.getpid(), self.concept, len(facts))
     self.parameters["log"] = False
     pipeline = Pipeline(self.path(""), self.parameters)
     pipeline.set_kb(worker.inputs.get_kb().extract(self.facts), save=False)
     pipeline.set_taxonomy(worker.inputs.get_taxonomy(), save=False)
     pipeline.set_similarity_matrix(worker.inputs.get_similarity_matrix(),
                                    save=False)
     pipeline.set_detective(worker.inputs.get_detective(), save=False)
     pipeline.step_assigner()
     central_facts = self.facts[:self.n_central_facts]
     assignment = pipeline.get_assignment()
     for var in list(assignment.map.keys()):
         if var.index not in central_facts:
             del assignment.map[var]
     assignment.save(self.path("assignment.tsv"))
     del pipeline
     return None
示例#6
0
 def pre_process(self):
     Pipeline(self.inputs_folder, self.parameters).process(list(range(1,
                                                                      8)))
示例#7
0
文件: modules.py 项目: ychalier/dice
def pipeline(argv):
    """pipeline
    arguments:  <inputs-folder> [parameter=value]*

    options:    NAME                            DEFAULT VALUE
                log                             False
                verbose                         False
                notify                          False
                logger_path                     log
                kb_path                         data/kb.tsv
                entailer_batch_size             100
                entailer_n_jobs                 2
                steps                           1-8

    parameters:
                SIMILARITY_THRESHOLD = .75
                REMARKABLE_ALPHA = 0.2861481307916379
                TYPICAL_ALPHA = 0.9982891056446265
                TYPICAL_BETA = 0.0009590134436654157
                PLAUSIBLE_ALPHA = 0.3420266845860523
                PLAUSIBLE_BETA = 0.6786119833435241
                EVIDENCE_OFFSET = 1.3396791371188632
                ASSIGNMENT_METHOD = 1
                TAXONOMY_BUILDER = "webisalod"
                TAXONOMY_BUILDER_LOWS_THRESHOLD = 10
                TAXONOMY_BUILDER_EDGE_THRESHOLD = .4
                FUSE_ALPHA = .8
                FUSE_THRESHOLD = .1
                RULE_EXISTENCE_WEIGHT = 10.
                RULE_NOT_PLAUSIBLE_IMPLIES_REMARKABLE_SIBLINGS_WEIGHT = 0.4136813802555934
                RULE_NOT_PLAUSIBLE_IMPLIES_REMARKABLE_WEIGHT = 0.8385061349318154
                RULE_PLAUSIBILITY_INFERENCE_WEIGHT = 0.8899064124827547
                RULE_PLAUSIBILITY_INHERITANCE_WEIGHT = 0.09630439631215232
                RULE_REMARKABILITY_INHERITANCE_WEIGHT = 0
                RULE_REMARKABILITY_SIBLINGS_WEIGHT = 0.01
                RULE_SALIENT_IMPLIES_PLAUSIBLE_WEIGHT = 0.10873639531265827
                RULE_TYPICAL_AND_REMARKABLE_IMPLY_SALIENT_WEIGHT = 0
                RULE_TYPICAL_IMPLIES_PLAUSIBLE_WEIGHT = 0.5446133523460819
                RULE_TYPICAL_PREVENTS_REMARKABLE_SIBLINGS_WEIGHT = 0.0331447944410993
                RULE_TYPICAL_PREVENTS_REMARKABLE_WEIGHT = 0.05332233624133091
                RULE_TYPICALITY_INFERENCE_WEIGHT = 0.42207454477107076
                RULE_TYPICALITY_INHERITANCE_WEIGHT = 0.5266373056914903
                RULE_SIMILARITY_WEIGHT = 0.9843530983393707
                DUMMY_INJECTER_THRESHOLD = 0.0002
                DUMMY_INJECTER_SOURCE = "DUMMY"

    taxonomy_builder: {conceptnet|webisalod|wordnet|merged}

    assignment method:
                0:  Maximum Satisfiability
                1:  Integer Linear Programming

    steps:
                1. Loading knowledge base
                2. Building taxonomy
                3. Embedding facts
                4. Computing similarity matrix
                5. Computing probability
                6. Computing entailment
                7. Computing evidence
                8. Assigning dimensions
    """
    from dice import Pipeline
    inputs_folder, *parameters = argv
    args = {p.split("=")[0]: p.split("=")[1] for p in parameters}
    pipeline = Pipeline(inputs_folder, args)
    steps = [i for i in range(1, 9)]
    if "steps" in args:
        steps = []
        for isolated_range in args["steps"].split(","):
            if "-" in isolated_range:
                start, stop = tuple(map(int, isolated_range.split("-")))
                steps += [i for i in range(start, stop + 1)]
            else:
                steps.append(int(isolated_range))
    pipeline.process(steps)