CorpusPreProcessPipeline = Pipeline() CorpusPreProcessPipeline.enqueue( "corpus", "Read Corpus", lambda f, x: x["config"].reader[f["set"]] (f["set"])) CorpusPreProcessPipeline.enqueue( "graphify", "RDF to Graph", lambda f, _: f["corpus"].copy().generate_graphs()) CorpusPreProcessPipeline.enqueue( "spelling", "Fix Spelling", lambda f, _: f["graphify"].copy().fix_spelling()) CorpusPreProcessPipeline.enqueue( "entities", "Describe entities", lambda f, _: f["spelling"].copy().describe_entities()) TestCorpusPreProcessPipeline = CorpusPreProcessPipeline.mutate( {}) # Test does not need matching entities or plans CorpusPreProcessPipeline.enqueue( "match-ents", "Match Entities", lambda f, _: f["entities"].copy().match_entities()) CorpusPreProcessPipeline.enqueue( "match-plans", "Match Plans", lambda f, _: f["match-ents"].copy().match_plans()) CorpusPreProcessPipeline.enqueue( "tokenize", "Tokenize Plans & Sentences", lambda f, _: f["match-plans"].copy().tokenize_plans().tokenize_delex()) CorpusPreProcessPipeline.enqueue( "to-json", "Export in a readable format", lambda f, _: json.dumps(f["tokenize"].export()), ext="json")
MainPipeline.enqueue("test-corpus", "Pre-process test data", TestingPreProcessPipeline) MainPipeline.enqueue("translate", "Translate Test", TranslatePipeline) MainPipeline.enqueue("evaluate", "Evaluate Translations", EvaluationPipeline) if __name__ == "__main__": naive_planner = NaivePlanner(WeightedProductOfExperts([ RelationDirectionExpert, GlobalDirectionExpert, SplittingTendenciesExpert, RelationTransitionsExpert ])) # neural_planner = NeuralPlanner() # combined_planner = CombinedPlanner((neural_planner, naive_planner)) config = Config(reader=WebNLGDataReader, planner=naive_planner) res = MainPipeline.mutate({"config": config}).execute("WebNLG", cache_name="WebNLG") print() d = random.choice(res["translate"].data) print("Random Sample:") print("Graph:", d.graph.as_rdf()) print("Plan:", d.plan) print("Translation:", d.hyp) print("Reference: ", d.text) print() print("BLEU", res["evaluate"])
neural_planner = NeuralPlanner() PlanPipeline = Pipeline() PlanPipeline.enqueue("train-planner", "Train Planner", TrainPlannerPipeline) PlanPipeline.enqueue("test-corpus", "Pre-process test data", TestingPreProcessPipeline) ExperimentsPipeline = Pipeline() ExperimentsPipeline.enqueue("pre-process", "Pre-process training data", TrainingPreProcessPipeline) # Train all planners # # Naive Planner ExperimentsPipeline.enqueue( "naive-planner", "Train Naive Planner", PlanPipeline.mutate( {"config": Config(reader=WebNLGDataReader, planner=naive_planner)})) # # Neural Planner ExperimentsPipeline.enqueue( "neural-planner", "Train Neural Planner", PlanPipeline.mutate( {"config": Config(reader=WebNLGDataReader, planner=neural_planner)})) # REG # # Bert REG ExperimentsPipeline.enqueue( "naive-reg", "Train Naive Referring Expressions Generator", REGPipeline.mutate({"config": Config(reg=NaiveREG)})) # # Naive REG ExperimentsPipeline.enqueue( "bert-reg", "Train BERT Referring Expressions Generator", REGPipeline.mutate({"config": Config(reg=BertREG)}))