def _test_features(config, feature_extractor_creator, filename, write_features): feature_extractor = feature_extractor_creator(config) passage = load_passage(filename, annotate=feature_extractor_creator.annotated) textutil.annotate(passage, as_array=True, as_extra=False, vocab=config.vocab()) config.set_format(passage.extra.get("format") or "ucca") oracle = Oracle(passage) state = State(passage) actions = Actions() for key, param in feature_extractor.params.items(): if not param.numeric: param.dropout = 0 feature_extractor.init_param(key) features = [feature_extractor.init_features(state)] while True: extract_features(feature_extractor, state, features) action = min(oracle.get_actions(state, actions).values(), key=str) state.transition(action) if state.need_label: extract_features(feature_extractor, state, features) label, _ = oracle.get_label(state, action) state.label_node(label) if state.finished: break features = ["%s %s\n" % i for f in features if f for i in (sorted(f.items()) + [("", "")])] compare_file = os.path.join("test_files", "features", "-".join((basename(filename), str(feature_extractor_creator))) + ".txt") if write_features: with open(compare_file, "w", encoding="utf-8") as f: f.writelines(features) with open(compare_file, encoding="utf-8") as f: assert f.readlines() == features, compare_file
def gen_actions(passage): oracle = Oracle(passage) state = State(passage) actions = Actions() while True: action = min(oracle.get_actions(state, actions).values(), key=str) state.transition(action) s = str(action) if state.need_label: label, _ = oracle.get_label(state, action) state.label_node(label) s += " " + str(label) yield s if state.finished: break
class PassageParser(AbstractParser): """ Parser for a single passage, has a state and optionally an oracle """ def __init__(self, passage, *args, **kwargs): super().__init__(*args, **kwargs) self.passage = self.out = passage self.format = self.passage.extra.get("format") if self.training or self.evaluation else \ sorted(set.intersection(*map(set, filter(None, (self.model.formats, self.config.args.formats)))) or self.model.formats)[0] if self.training and self.config.args.verify: errors = list(validate(self.passage)) assert not errors, errors self.in_format = self.format or "ucca" self.out_format = "ucca" if self.format in (None, "text") else self.format if self.config.args.use_bert and self.config.args.bert_multilingual is not None: self.lang = self.passage.attrib.get("lang") assert self.lang, "Attribute 'lang' is required per passage when using multilingual BERT" else: self.lang = self.passage.attrib.get("lang", self.config.args.lang) # Used in verify_passage to optionally ignore a mismatch in linkage nodes: self.ignore_node = None if self.config.args.linkage else lambda n: n.tag == layer1.NodeTags.Linkage self.state_hash_history = set() self.state = self.oracle = self.eval_type = None def init(self): self.config.set_format(self.in_format) WIKIFIER.enabled = self.config.args.wikification self.state = State(self.passage) # Passage is considered labeled if there are any edges or node labels in it edges, node_labels = map(any, zip(*[(n.outgoing, n.attrib.get(LABEL_ATTRIB)) for n in self.passage.layer(layer1.LAYER_ID).all])) self.oracle = Oracle(self.passage) if self.training or self.config.args.verify or ( (self.config.args.verbose > 1 or self.config.args.use_gold_node_labels or self.config.args.action_stats) and (edges or node_labels)) else None for model in self.models: model.init_model(self.config.format, lang=self.lang if self.config.args.multilingual else None) if ClassifierProperty.require_init_features in model.classifier_properties: model.init_features(self.state, self.training) def parse(self, display=True, write=False, accuracies=None): self.init() passage_id = self.passage.ID try: with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor: executor.submit(self.parse_internal).result(self.config.args.timeout) status = "(%d tokens/s)" % self.tokens_per_second() except ParserException as e: if self.training: raise self.config.log("%s %s: %s" % (self.config.passage_word, passage_id, e)) status = "(failed)" except concurrent.futures.TimeoutError: self.config.log("%s %s: timeout (%fs)" % (self.config.passage_word, passage_id, self.config.args.timeout)) status = "(timeout)" return self.finish(status, display=display, write=write, accuracies=accuracies) def parse_internal(self): """ Internal method to parse a single passage. If training, use oracle to train on given passages. Otherwise just parse with classifier. """ self.config.print(" initial state: %s" % self.state) while True: if self.config.args.check_loops: self.check_loop() self.label_node() # In case root node needs labeling true_actions = self.get_true_actions() action, predicted_action = self.choose(true_actions) self.state.transition(action) need_label, label, predicted_label, true_label = self.label_node(action) if self.config.args.action_stats: try: with open(self.config.args.action_stats, "a") as f: print(",".join(map(str, [predicted_action, action] + list(true_actions.values()))), file=f) except OSError: pass self.config.print(lambda: "\n".join([" predicted: %-15s true: %-15s taken: %-15s %s" % ( predicted_action, "|".join(map(str, true_actions.values())), action, self.state) if self.oracle else " action: %-15s %s" % (action, self.state)] + ( [" predicted label: %-9s true label: %s" % (predicted_label, true_label) if self.oracle and not self.config.args.use_gold_node_labels else " label: %s" % label] if need_label else []) + [ " " + l for l in self.state.log])) if self.state.finished: return # action is Finish (or early update is triggered) def get_true_actions(self): true_actions = {} if self.oracle: try: true_actions = self.oracle.get_actions(self.state, self.model.actions, create=self.training) except (AttributeError, AssertionError) as e: if self.training: raise ParserException("Error in getting action from oracle during training") from e return true_actions def get_true_label(self, node): try: return self.oracle.get_label(self.state, node) if self.oracle else (None, None) except AssertionError as e: if self.training: raise ParserException("Error in getting label from oracle during training") from e return None, None def label_node(self, action=None): true_label = label = predicted_label = None need_label = self.state.need_label # Label action that requires a choice of label if need_label: true_label, raw_true_label = self.get_true_label(action or need_label) label, predicted_label = self.choose(true_label, NODE_LABEL_KEY, "node label") self.state.label_node(raw_true_label if label == true_label else label) return need_label, label, predicted_label, true_label def choose(self, true, axis=None, name="action"): if axis is None: axis = self.model.axis elif axis == NODE_LABEL_KEY and self.config.args.use_gold_node_labels: return true, true labels = self.model.classifier.labels[axis] if axis == NODE_LABEL_KEY: true_keys = (labels[true],) if self.oracle else () # Must be before score() is_valid = self.state.is_valid_label else: true_keys = None is_valid = self.state.is_valid_action scores, features = self.model.score(self.state, axis) for model in self.models[1:]: # Ensemble if given more than one model; align label order and add scores label_scores = dict(zip(model.classifier.labels[axis].all, self.model.score(self.state, axis)[0])) scores += [label_scores.get(a, 0) for a in labels.all] # Product of Experts, assuming log(softmax) self.config.print(lambda: " %s scores: %s" % (name, tuple(zip(labels.all, scores))), level=4) try: label = pred = self.predict(scores, labels.all, is_valid) except StopIteration as e: raise ParserException("No valid %s available\n%s" % (name, self.oracle.log if self.oracle else "")) from e label, is_correct, true_keys, true_values = self.correct(axis, label, pred, scores, true, true_keys) if self.training: if not (is_correct and ClassifierProperty.update_only_on_error in self.model.classifier_properties): assert not self.model.is_finalized, "Updating finalized model" self.model.classifier.update( features, axis=axis, true=true_keys, pred=labels[pred] if axis == NODE_LABEL_KEY else pred.id, importance=[self.config.args.swap_importance if a.is_swap else 1 for a in true_values] or None) if not is_correct and self.config.args.early_update: self.state.finished = True for model in self.models: model.classifier.finished_step(self.training) if axis != NODE_LABEL_KEY: model.classifier.transition(label, axis=axis) return label, pred def correct(self, axis, label, pred, scores, true, true_keys): true_values = is_correct = () if axis == NODE_LABEL_KEY: if self.oracle: is_correct = (label == true) if is_correct: self.correct_label_count += 1 elif self.training: label = true self.label_count += 1 else: # action true_keys, true_values = map(list, zip(*true.items())) if true else (None, None) label = true.get(pred.id) is_correct = (label is not None) if is_correct: self.correct_action_count += 1 else: label = true_values[scores[true_keys].argmax()] if self.training else pred self.action_count += 1 return label, is_correct, true_keys, true_values @staticmethod def predict(scores, values, is_valid=None): """ Choose action/label based on classifier Usually the best action/label is valid, so max is enough to choose it in O(n) time Otherwise, sorts all the other scores to choose the best valid one in O(n lg n) :return: valid action/label with maximum probability according to classifier """ return next(filter(is_valid, (values[i] for i in PassageParser.generate_descending(scores)))) @staticmethod def generate_descending(scores): yield scores.argmax() yield from scores.argsort()[::-1] # Contains the max, but otherwise items might be missed (different order) def finish(self, status, display=True, write=False, accuracies=None): self.model.classifier.finished_item(self.training) for model in self.models[1:]: model.classifier.finished_item(renew=False) # So that dynet.renew_cg happens only once if not self.training or self.config.args.verify: self.out = self.state.create_passage(verify=self.config.args.verify, format=self.out_format) if write: for out_format in self.config.args.formats or [self.out_format]: if self.config.args.normalize and out_format == "ucca": normalize(self.out) ioutil.write_passage(self.out, output_format=out_format, binary=out_format == "pickle", outdir=self.config.args.outdir, prefix=self.config.args.prefix, converter=get_output_converter(out_format), verbose=self.config.args.verbose, append=self.config.args.join, basename=self.config.args.join) if self.oracle and self.config.args.verify: self.verify(self.out, self.passage) ret = (self.out,) if self.evaluation: ret += (self.evaluate(self.evaluation),) status = "%-14s %s F1=%.3f" % (status, self.eval_type, self.f1) if display: self.config.print("%s%.3fs %s" % (self.accuracy_str, self.duration, status), level=1) if accuracies is not None: accuracies[self.passage.ID] = self.correct_action_count / self.action_count if self.action_count else 0 return ret @property def accuracy_str(self): if self.oracle and self.action_count: accuracy_str = "a=%-14s" % percents_str(self.correct_action_count, self.action_count) if self.label_count: accuracy_str += " l=%-14s" % percents_str(self.correct_label_count, self.label_count) return "%-33s" % accuracy_str return "" def evaluate(self, mode=ParseMode.test): if self.format: self.config.print("Converting to %s and evaluating..." % self.format) self.eval_type = UNLABELED if self.config.is_unlabeled(self.in_format) else LABELED evaluator = EVALUATORS.get(self.format, evaluate_ucca) score = evaluator(self.out, self.passage, converter=get_output_converter(self.format), verbose=self.out and self.config.args.verbose > 3, constructions=self.config.args.constructions, eval_types=(self.eval_type,) if mode is ParseMode.dev else (LABELED, UNLABELED)) self.f1 = average_f1(score, self.eval_type) score.lang = self.lang return score def check_loop(self): """ Check if the current state has already occurred, indicating a loop """ h = hash(self.state) assert h not in self.state_hash_history, \ "\n".join(["Transition loop", self.state.str("\n")] + [self.oracle.str("\n")] if self.oracle else ()) self.state_hash_history.add(h) def verify(self, guessed, ref): """ Compare predicted passage to true passage and raise an exception if they differ :param ref: true passage :param guessed: predicted passage to compare """ assert ref.equals(guessed, ignore_node=self.ignore_node), \ "Failed to produce true passage" + (diffutil.diff_passages(ref, guessed) if self.training else "") @property def num_tokens(self): return len(set(self.state.terminals).difference(self.state.buffer)) # To count even incomplete parses @num_tokens.setter def num_tokens(self, _): pass
class PassageParser(AbstractParser): """ Parser for a single passage, has a state and optionally an oracle """ def __init__(self, passage, *args, **kwargs): super().__init__(*args, **kwargs) self.passage = self.out = passage self.format = self.passage.extra.get("format") if self.training or self.evaluation else \ sorted(set.intersection(*map(set, filter(None, (self.model.formats, self.config.args.formats)))) or self.model.formats)[0] if self.training and self.config.args.verify: errors = list(validate(self.passage)) assert not errors, errors self.in_format = self.format or "ucca" self.out_format = "ucca" if self.format in (None, "text") else self.format self.lang = self.passage.attrib.get("lang", self.config.args.lang) # Used in verify_passage to optionally ignore a mismatch in linkage nodes: self.ignore_node = None if self.config.args.linkage else lambda n: n.tag == layer1.NodeTags.Linkage self.state_hash_history = set() self.state = self.oracle = self.eval_type = None def init(self): self.config.set_format(self.in_format) WIKIFIER.enabled = self.config.args.wikification self.state = State(self.passage) # Passage is considered labeled if there are any edges or node labels in it edges, node_labels = map(any, zip(*[(n.outgoing, n.attrib.get(LABEL_ATTRIB)) for n in self.passage.layer(layer1.LAYER_ID).all])) self.oracle = Oracle(self.passage) if self.training or self.config.args.verify or ( (self.config.args.verbose > 1 or self.config.args.use_gold_node_labels or self.config.args.action_stats) and (edges or node_labels)) else None for model in self.models: model.init_model(self.config.format, lang=self.lang if self.config.args.multilingual else None) if ClassifierProperty.require_init_features in model.classifier_properties: model.init_features(self.state, self.training) def parse(self, display=True, write=False, accuracies=None): self.init() passage_id = self.passage.ID try: with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor: executor.submit(self.parse_internal).result(self.config.args.timeout) status = "(%d tokens/s)" % self.tokens_per_second() except ParserException as e: if self.training: raise self.config.log("%s %s: %s" % (self.config.passage_word, passage_id, e)) status = "(failed)" except concurrent.futures.TimeoutError: self.config.log("%s %s: timeout (%fs)" % (self.config.passage_word, passage_id, self.config.args.timeout)) status = "(timeout)" return self.finish(status, display=display, write=write, accuracies=accuracies) def parse_internal(self): """ Internal method to parse a single passage. If training, use oracle to train on given passages. Otherwise just parse with classifier. """ self.config.print(" initial state: %s" % self.state) while True: if self.config.args.check_loops: self.check_loop() self.label_node() # In case root node needs labeling true_actions = self.get_true_actions() action, predicted_action = self.choose(true_actions) self.state.transition(action) need_label, label, predicted_label, true_label = self.label_node(action) if self.config.args.action_stats: try: with open(self.config.args.action_stats, "a") as f: print(",".join(map(str, [predicted_action, action] + list(true_actions.values()))), file=f) except OSError: pass self.config.print(lambda: "\n".join([" predicted: %-15s true: %-15s taken: %-15s %s" % ( predicted_action, "|".join(map(str, true_actions.values())), action, self.state) if self.oracle else " action: %-15s %s" % (action, self.state)] + ( [" predicted label: %-9s true label: %s" % (predicted_label, true_label) if self.oracle and not self.config.args.use_gold_node_labels else " label: %s" % label] if need_label else []) + [ " " + l for l in self.state.log])) if self.state.finished: return # action is Finish (or early update is triggered) def get_true_actions(self): true_actions = {} if self.oracle: try: true_actions = self.oracle.get_actions(self.state, self.model.actions, create=self.training) except (AttributeError, AssertionError) as e: if self.training: raise ParserException("Error in getting action from oracle during training") from e return true_actions def get_true_label(self, node): try: return self.oracle.get_label(self.state, node) if self.oracle else (None, None) except AssertionError as e: if self.training: raise ParserException("Error in getting label from oracle during training") from e return None, None def label_node(self, action=None): true_label = label = predicted_label = None need_label = self.state.need_label # Label action that requires a choice of label if need_label: true_label, raw_true_label = self.get_true_label(action or need_label) label, predicted_label = self.choose(true_label, NODE_LABEL_KEY, "node label") self.state.label_node(raw_true_label if label == true_label else label) return need_label, label, predicted_label, true_label def choose(self, true, axis=None, name="action"): if axis is None: axis = self.model.axis elif axis == NODE_LABEL_KEY and self.config.args.use_gold_node_labels: return true, true labels = self.model.classifier.labels[axis] if axis == NODE_LABEL_KEY: true_keys = (labels[true],) if self.oracle else () # Must be before score() is_valid = self.state.is_valid_label else: true_keys = None is_valid = self.state.is_valid_action scores, features = self.model.score(self.state, axis) for model in self.models[1:]: # Ensemble if given more than one model; align label order and add scores label_scores = dict(zip(model.classifier.labels[axis].all, self.model.score(self.state, axis)[0])) scores += [label_scores.get(a, 0) for a in labels.all] # Product of Experts, assuming log(softmax) self.config.print(lambda: " %s scores: %s" % (name, tuple(zip(labels.all, scores))), level=4) try: label = pred = self.predict(scores, labels.all, is_valid) except StopIteration as e: raise ParserException("No valid %s available\n%s" % (name, self.oracle.log if self.oracle else "")) from e label, is_correct, true_keys, true_values = self.correct(axis, label, pred, scores, true, true_keys) if self.training: if not (is_correct and ClassifierProperty.update_only_on_error in self.model.classifier_properties): assert not self.model.is_finalized, "Updating finalized model" self.model.classifier.update( features, axis=axis, true=true_keys, pred=labels[pred] if axis == NODE_LABEL_KEY else pred.id, importance=[self.config.args.swap_importance if a.is_swap else 1 for a in true_values] or None) if not is_correct and self.config.args.early_update: self.state.finished = True for model in self.models: model.classifier.finished_step(self.training) if axis != NODE_LABEL_KEY: model.classifier.transition(label, axis=axis) return label, pred def correct(self, axis, label, pred, scores, true, true_keys): true_values = is_correct = () if axis == NODE_LABEL_KEY: if self.oracle: is_correct = (label == true) if is_correct: self.correct_label_count += 1 elif self.training: label = true self.label_count += 1 else: # action true_keys, true_values = map(list, zip(*true.items())) if true else (None, None) label = true.get(pred.id) is_correct = (label is not None) if is_correct: self.correct_action_count += 1 else: label = true_values[scores[true_keys].argmax()] if self.training else pred self.action_count += 1 return label, is_correct, true_keys, true_values @staticmethod def predict(scores, values, is_valid=None): """ Choose action/label based on classifier Usually the best action/label is valid, so max is enough to choose it in O(n) time Otherwise, sorts all the other scores to choose the best valid one in O(n lg n) :return: valid action/label with maximum probability according to classifier """ return next(filter(is_valid, (values[i] for i in PassageParser.generate_descending(scores)))) @staticmethod def generate_descending(scores): yield scores.argmax() yield from scores.argsort()[::-1] # Contains the max, but otherwise items might be missed (different order) def finish(self, status, display=True, write=False, accuracies=None): self.model.classifier.finished_item(self.training) for model in self.models[1:]: model.classifier.finished_item(renew=False) # So that dynet.renew_cg happens only once if not self.training or self.config.args.verify: self.out = self.state.create_passage(verify=self.config.args.verify, format=self.out_format) if write: for out_format in self.config.args.formats or [self.out_format]: if self.config.args.normalize and out_format == "ucca": normalize(self.out) ioutil.write_passage(self.out, output_format=out_format, binary=out_format == "pickle", outdir=self.config.args.outdir, prefix=self.config.args.prefix, converter=get_output_converter(out_format), verbose=self.config.args.verbose, append=self.config.args.join, basename=self.config.args.join) if self.oracle and self.config.args.verify: self.verify(self.out, self.passage) ret = (self.out,) if self.evaluation: ret += (self.evaluate(self.evaluation),) status = "%-14s %s F1=%.3f" % (status, self.eval_type, self.f1) if display: self.config.print("%s%.3fs %s" % (self.accuracy_str, self.duration, status), level=1) if accuracies is not None: accuracies[self.passage.ID] = self.correct_action_count / self.action_count if self.action_count else 0 return ret @property def accuracy_str(self): if self.oracle and self.action_count: accuracy_str = "a=%-14s" % percents_str(self.correct_action_count, self.action_count) if self.label_count: accuracy_str += " l=%-14s" % percents_str(self.correct_label_count, self.label_count) return "%-33s" % accuracy_str return "" def evaluate(self, mode=ParseMode.test): if self.format: self.config.print("Converting to %s and evaluating..." % self.format) self.eval_type = UNLABELED if self.config.is_unlabeled(self.in_format) else LABELED evaluator = EVALUATORS.get(self.format, evaluate_ucca) score = evaluator(self.out, self.passage, converter=get_output_converter(self.format), verbose=self.out and self.config.args.verbose > 3, constructions=self.config.args.constructions, eval_types=(self.eval_type,) if mode is ParseMode.dev else (LABELED, UNLABELED)) self.f1 = average_f1(score, self.eval_type) score.lang = self.lang return score def check_loop(self): """ Check if the current state has already occurred, indicating a loop """ h = hash(self.state) assert h not in self.state_hash_history, \ "\n".join(["Transition loop", self.state.str("\n")] + [self.oracle.str("\n")] if self.oracle else ()) self.state_hash_history.add(h) def verify(self, guessed, ref): """ Compare predicted passage to true passage and raise an exception if they differ :param ref: true passage :param guessed: predicted passage to compare """ assert ref.equals(guessed, ignore_node=self.ignore_node), \ "Failed to produce true passage" + (diffutil.diff_passages(ref, guessed) if self.training else "") @property def num_tokens(self): return len(set(self.state.terminals).difference(self.state.buffer)) # To count even incomplete parses @num_tokens.setter def num_tokens(self, _): pass