def compute_oracle_derivation(parser, dsg, mapping=id): validationMethod = "F1" best_der = None best_f1 = -1 best_prec = -1 best_rec = -1 relevant = dsg.labeled_frames(guard=lambda x: x[1] > 0) for _, derivation in parser.k_best_derivation_trees(): dog, sync = dog_evaluation(derivation) dsg2 = DeepSyntaxGraph(dsg.sentence, mapping(dog), sync) retrieved = dsg2.labeled_frames(guard=lambda x: x[1] > 0) inters = retrieved & relevant # in case of parse failure there are two options here: # - parse failure -> no spans at all, thus precision = 1 # - parse failure -> a dummy tree with all spans wrong, thus precision = 0 precision = 1.0 * len(inters) / len(retrieved) \ if len(retrieved) > 0 else 0 recall = 1.0 * len(inters) / len(relevant) \ if len(relevant) > 0 else 0 fmeasure = 2.0 * precision * recall / (precision + recall) \ if precision + recall > 0 else 0 if (validationMethod == "F1" and fmeasure > best_f1)\ or (validationMethod == "Precision" and precision > best_prec)\ or (validationMethod == "Recall"and recall > best_rec): best_der, best_f1, best_prec, best_rec = derivation, fmeasure, precision, recall return best_der
def build_dummy_dsg(sentence, label): dog = DirectedOrderedGraph() sync = [] for i, word in enumerate(sentence): dog.add_node(i) dog.add_terminal_edge([], (word[0], word[1], word[2], '_'), i) sync.append([i]) return DeepSyntaxGraph(sentence, dog, sync, label=label)
def generate_sdg(n_nodes, maximum_inputs=4, upward_closed=False, new_output=0.1, multiple_output=0.0): dog = generate(n_nodes, maximum_inputs, upward_closed, new_output, multiple_output) sentence = [dog.incoming_edge(node) for node in dog.nodes] sync = [[node] for node in dog.nodes] dsg = DeepSyntaxGraph(sentence, dog, sync) return dsg
def worker(parser, graph, return_dict): parser.parse() if parser.recognized(): derivation = parser.best_derivation_tree() assert derivation is not None dog, sync_list = dog_evaluation(derivation) result = DeepSyntaxGraph(graph.sentence, dog, sync_list, label=graph.label) return_dict[0] = result
def parsing_postprocess(self, sentence, derivation, label=None): dog, sync = dog_evaluation(derivation, compress=False) if self.induction_settings.binarize: dog = dog.debinarize(is_bin=self.induction_settings.is_bin) if not dog.output_connected(): self.statistics.not_output_connected += 1 if self.interactive: z2 = render_and_view_dog(dog, "parsed_" + str(label)) # z2.communicate() return DeepSyntaxGraph(sentence, dog, sync)
def parse_sentence(lines, label=None): dog = DirectedOrderedGraph() arguments = defaultdict(list) predicates = {} predicate_list = [] sentence = [] synchronization = [] for line in lines: contents = line.split() assert (len(contents) >= 7) idx = int(contents[0]) dog.add_node(idx) form = contents[1] lemma = contents[2] pos = contents[3] frame = contents[6] top = contents[4] is '+' if top: dog.add_to_outputs(idx) pred = contents[5] is '+' if pred: predicates[idx] = (form, lemma, pos, frame) predicate_list.append(idx) else: dog.add_terminal_edge([], (form, lemma, pos, frame), idx) args = contents[7:] sentence.append((form, lemma, pos)) synchronization.append([idx]) for i, arg in enumerate(args): if arg is not '_': arguments[i].append((idx, arg)) # print(predicates) # print(predicate_list) # print(arguments) for idx in predicates: edge = dog.add_terminal_edge(arguments[predicate_list.index(idx)], predicates[idx], idx) for i, arg in enumerate(arguments[predicate_list.index(idx)]): edge.set_function(i, arg[1]) dsg = DeepSyntaxGraph(sentence, dog, synchronization, label=label) return dsg
def parsing_postprocess(self, sentence, derivation, label=None): assert derivation is not None dog, sync_list = dog_evaluation(derivation) return DeepSyntaxGraph(sentence, dog, sync_list, label=label)
def do_parsing(parser, test_dsgs, term_labeling_token, oracle=False, debinarize=id): interactive = True # False scorer = PredicateArgumentScoring() not_output_connected = 0 start = time.time() for dsg in test_dsgs: parser.set_input(term_labeling_token.prepare_parser_input( dsg.sentence)) parser.parse() f = lambda token: token.pos() if isinstance(token, ConstituentTerminal ) else token dsg.dog.project_labels(f) if parser.recognized(): if oracle: derivation = compute_oracle_derivation(parser, dsg, debinarize) else: derivation = parser.best_derivation_tree() dog, sync = dog_evaluation(derivation) if not dog.output_connected(): not_output_connected += 1 if interactive: z2 = render_and_view_dog(dog, "parsed_" + dsg.label) # z2.communicate() dsg2 = DeepSyntaxGraph(dsg.sentence, debinarize(dog), sync) scorer.add_accuracy_frames( dsg.labeled_frames(guard=lambda x: len(x[1]) > 0), dsg2.labeled_frames(guard=lambda x: len(x[1]) > 0)) # print('dsg: ', dsg.dog, '\n', [dsg.get_graph_position(i) for i in range(len(dsg.sentence))], '\n\n parsed: ', dsg2.dog, '\n', [dsg2.get_graph_position(i+1) for i in range(len(dsg2.sentence))]) # print() if False and interactive: if dsg.label == 's50': pass if dsg.dog != dog: z1 = render_and_view_dog(dsg.dog, "corpus_" + dsg.label) z2 = render_and_view_dog(dog, "parsed_" + dsg.label) z1.communicate() z2.communicate() else: scorer.add_failure( dsg.labeled_frames(guard=lambda x: len(x[1]) > 0)) parser.clear() print("Completed parsing in", time.time() - start, "seconds.") print("Parse failures:", scorer.labeled_frame_scorer.n_failures()) print("Not output connected", not_output_connected) print("Labeled frames:") print("P", scorer.labeled_frame_scorer.precision(), "R", scorer.labeled_frame_scorer.recall(), "F1", scorer.labeled_frame_scorer.fmeasure(), "EM", scorer.labeled_frame_scorer.exact_match()) print("Unlabeled frames:") print("P", scorer.unlabeled_frame_scorer.precision(), "R", scorer.unlabeled_frame_scorer.recall(), "F1", scorer.unlabeled_frame_scorer.fmeasure(), "EM", scorer.unlabeled_frame_scorer.exact_match()) print("Labeled dependencies:") print("P", scorer.labeled_dependency_scorer.precision(), "R", scorer.labeled_dependency_scorer.recall(), "F1", scorer.labeled_dependency_scorer.fmeasure(), "EM", scorer.labeled_dependency_scorer.exact_match()) print("Unlabeled dependencies:") print("P", scorer.unlabeled_dependency_scorer.precision(), "R", scorer.unlabeled_dependency_scorer.recall(), "F1", scorer.unlabeled_dependency_scorer.fmeasure(), "EM", scorer.unlabeled_dependency_scorer.exact_match()) return scorer