def from_lists(cls, all_list): head_tags = all_list['head_tags'] head_indices = all_list['head_indices'] tgt_tokens = all_list['tokens'] tgt_copy_indices = all_list['coref'] variables = [] variables_count = defaultdict(int) for i, token in enumerate(tgt_tokens): if tgt_copy_indices[i] != i: variables.append(variables[tgt_copy_indices[i]]) else: if token[0] in variables_count: variables.append(token[0] + str(variables_count[token[0]])) else: variables.append(token[0]) variables_count[token[0]] += 1 Triples = [] for variable, token in zip(variables, tgt_tokens): Triples.append(Triple(variable, "instance", token)) Triples.append( Triple( head_indices[variable], head_tags[variable], variable ) )
def update_edge_label(self, x, y, old, new): self._G[x][y]['label'] = new triples = [] for t in self._triples: if t.source == x.identifier and t.target == y.identifier and t.relation == old: t = Triple(x.identifier, new, y.identifier) triples.append(t) self._update_penman_graph(triples)
def _remove_wiki(graph): metadata = graph.metadata triples = [] for t in graph.triples: v1, rel, v2 = t if rel == ':wiki': t = Triple(v1, rel, '+') triples.append(t) graph = Graph(triples) graph.metadata = metadata return graph
def add_node(self, instance): identifier = instance[0] assert identifier.isalpha() if identifier in self.variables(): i = 2 while identifier + str(i) in self.variables(): i += 1 identifier += str(i) triples = self._triples + [Triple(identifier, 'instance', instance)] self._triples = penman.alphanum_order(triples) node = AMRNode(identifier, [('instance', instance)]) self._G.add_node(node) return node
def anonymize_graph(g): """Anonymize graph by replacing nodes of certain named types with tokens like "named0". Modifies original graph. (Gotcha: accesses private member var) Returns dict that can be used to recover the original values. """ replacements = [] id_counters = {} carg_triples = g.attributes(relation='carg') # anonymize each instance that has a cargs value, storing the mapping from value to token for carg_triple in carg_triples: named_triple = g.triples( relation='instance', source=carg_triple.source)[0] # assumes exactly 1 named_type = named_triple.target.replace("_", "") # _ causes tokenization issues value = carg_triple.target.strip('"') # extract char location of the word in original (untokenized) sentence span_triple = g.triples(relation="lnk", source=carg_triple.source)[0] span = [int(pos) for pos in span_triple.target[2:-2].split(':')] # '"<5:10>"' # create data struct to store mapping of this type and create an id counter if named_type not in id_counters: id_counters[named_type] = 0 # generate annonymized token and store it with span it should replace placeholder = '{}{}'.format(named_type, id_counters[named_type]) replacements.append({'ph': placeholder, 'span': span, 'value': value}) id_counters[named_type] += 1 new_triple = Triple( named_triple.source, named_triple.relation, placeholder, inverted=named_triple.inverted ) # gotcha: accessing private member var g._triples.insert(g._triples.index(named_triple), new_triple) g._triples.remove(named_triple) g._triples.remove(carg_triple) return replacements
def combine_attributes(g): """Group all attribute nodes into one. Attribute list is normalized by uppercasing the value and sorting the list by attribute name. Concatenated attributes are appended to the instance (predicate) target value so OpenNMT will interpret them as word features. Note that OpenNMT expects all tokens to have the same number of word features, but only predicate tokens have attributes, so an extra step will be required to make sure all tokens have a feature. (See _layout in PenmanToLinearCodec) """ for variable in g.variables(): old_attributes = [ attr for attr in g.attributes(source=variable) if attr.relation != 'instance' ] new_targets = [] for old_attr in old_attributes: old_relation = old_attr.relation old_target = old_attr.target.upper() if isinstance(old_attr.target, str) else old_attr.target # don't store span info (only needed for anonymization) or untensed (doesn't provide much info) if old_relation != 'lnk' and (old_relation, old_target) != ('tense', 'UNTENSED'): new_targets.append('{}={}'.format(old_relation, old_target)) g._triples.remove(old_attr) if new_targets: attr_features = '|'.join(sorted(new_targets)) # sort by attribute name instance = g.attributes(source=variable, relation='instance')[0] new_instance = Triple( source=instance.source, relation=instance.relation, target=instance.target + '│' + attr_features # N.B. '│' not '|' ) g._triples.insert(g._triples.index(instance), new_instance) g._triples.remove(instance)
"top_k": 10, "show_url": False, "fast": args.fast, # set this to be true if speed is a concern "output_path": models_path + "logs/", # logging directory "faiss_index": None, #"flat", "index_path": models_path + "faiss_flat_index.pkl", } args_blink = argparse.Namespace(**config) models = main_dense.load_models(args_blink, logger=logger) _, _, _, _, _, predictions, scores, = main_dense.run(args_blink, logger, *models, test_data=for_blink, device=args.device) for s, pp in zip(for_blink, predictions): pp = [p for p in pp if not p.startswith('List of')] p = f'"{pp[0]}"' if pp else '-' p = p.replace(' ', '_') graph_n = s['graph_n'] triple_n = s['triple_n'] triples = [g for g in graphs[graph_n].triples] n, rel, w = triples[triple_n] triples[triple_n] = Triple(n, rel, p) g = Graph(triples) g.metadata = graphs[graph_n].metadata graphs[graph_n] = g write_predictions(args.out, AMRBartTokenizer, graphs)