def tgt_html(grandparent, anno, naughty=False): """ Describe the given annotation in HTML and append that description to the given HTML grandparent node. """ parent = h.span(grandparent) h.span(parent, anno_code(anno)) type_span = h.span(parent, '[%s] ' % anno.type) if naughty: type_span.attrib['class'] = 'naughty' if anno in contexts: turn = contexts[anno].turn turn_info = stac.split_turn_text(doc.text(turn.span))[0] turn_splits = turn_info.split(":") if len(turn_splits) > 1: tid = ET.SubElement(parent, 'b') tid.text = turn_splits[0] + ":" h.span(parent, ":".join(turn_splits[1:])) else: h.span(parent, turn_info) if not stac.is_relation_instance(anno): t_text = text(anno) if stac.is_cdu(anno): trange = turn_range(anno) if trange: h.elem(parent, 'b', trange) h.span(parent, text=snippet(t_text, 100), attrib={'class': 'snippet'}) h.span(parent, ' %s' % anno.text_span()) return parent
def rough_type(anno): if anno.type == 'Segment' or stac.is_edu(anno): return 'EDU' elif stac.is_relation_instance(anno): return 'relation' else: return anno.type
def is_non2sided_rel(gra, _, rel): """ Relation instance which does not have exactly a source and target link in the graph How this can possibly happen is a mystery """ anno = gra.annotation(rel) return (stac.is_relation_instance(anno) and len(gra.links(rel)) != 2)
def in_dialogue(x): if stac.is_edu(x): return x in units elif stac.is_relation_instance(x): return x.source in units and x.target in units elif stac.is_cdu(x): return all(t in units for t in x.terminals()) else: return False
def is_arrow_inversion(g,contexts,r): """ Relation in a graph that traverse a CDU boundary """ n1, n2 = g.links(r) is_rel = stac.is_relation_instance(g.annotation(r)) span1 = g.annotation(n1).text_span() span2 = g.annotation(n2).text_span() return is_rel and span1 > span2
def in_dialogue(d_annos, anno): "if the given annotation is in the given dialogue" if stac.is_edu(anno): return anno in d_annos elif stac.is_relation_instance(anno): return anno.source in d_annos and anno.target in d_annos elif stac.is_cdu(anno): return all(t in d_annos for t in anno.terminals()) else: return False
def is_dupe_rel(gra, _, rel): """ Relation instance for which there are relation instances between the same source/target DUs (regardless of direction) """ src, tgt = gra.links(rel) return any(x != rel and ( gra.rel_links(x) == (src, tgt) or gra.rel_links(x) == (tgt, src)) for x in gra.links(src) if stac.is_relation_instance(gra.annotation(x)))
def is_arrow_inversion(gra, _, rel): """ Relation in a graph that goes from textual right to left (may not be a problem) """ node1, node2 = gra.links(rel) is_rel = stac.is_relation_instance(gra.annotation(rel)) span1 = gra.annotation(node1).text_span() span2 = gra.annotation(node2).text_span() return is_rel and span1 > span2
def is_dupe_rel(gra, _, rel): """ Relation instance for which there are relation instances between the same source/target DUs (regardless of direction) """ src, tgt = gra.links(rel) return any(x != rel and (gra.rel_links(x) == (src, tgt) or gra.rel_links(x) == (tgt, src)) for x in gra.links(src) if stac.is_relation_instance(gra.annotation(x)))
def is_puncture(gra, _, rel): """ Relation in a graph that traverse a CDU boundary """ if not stac.is_relation_instance(gra.annotation(rel)): return False n_from, n_to = gra.links(rel) cdus_from = gra.containing_cdu_chain(n_from) cdus_to = gra.containing_cdu_chain(n_to) prefix = len(cdus_from) - len(cdus_to) return prefix < 0 or cdus_from[prefix:] != cdus_to
def has_non_du_member(anno): """ True if `anno` is a relation that points to another relation, or if it's a CDU that has relation members """ if stac.is_relation_instance(anno): members = [anno.source, anno.target] elif stac.is_cdu(anno): members = anno.members else: return False return any(is_non_du(x) for x in members)
def rough_type(anno): """ Return either * "EDU" * "relation" * or the annotation type """ if anno.type == 'Segment' or stac.is_edu(anno): return 'EDU' elif stac.is_relation_instance(anno): return 'relation' else: return anno.type
def tgt_txt(t): tag = anno_code(t) if light: tagged_type = '' else: tagged_type = '%s[%s]' % (tag, t.type) if stac.is_relation_instance(t): return tagged_type else: sp = t.text_span() txt = doc.text(sp) return '%s {%s} %s' % (tagged_type, snippet(txt, 20), sp)
def is_bad(anno): "true if the annotation is crosses a dialogue boundary" if stac.is_relation_instance(anno): members = [anno.source, anno.target] elif stac.is_cdu(anno): members = list(anno.members) else: members = [] # don't worry about members which are relations members = [x for x in members if expect_dialogue(x)] dialogues = frozenset(dialogue(x) for x in members) if members: return len(dialogues) > 1 else: return False
def is_bad(anno): if stac.is_relation_instance(anno): members = [ anno.source, anno.target ] elif stac.is_cdu(anno): members = list(anno.members) else: members = [] # don't worry about members which are relations members = list(filter(expect_dialogue, members)) dialogues = frozenset(map(dialogue, members)) if members: return len(dialogues) > 1 else: return False
def tgt_txt(anno): """ Return a short text summary of the given annotation """ tag = anno_code(anno) if light: tagged_type = '' else: tagged_type = '%s[%s]' % (tag, anno.type) if stac.is_relation_instance(anno): return tagged_type else: span = anno.text_span() txt = doc.text(span) return '%s {%s} %s' % (tagged_type, snippet(txt, 20), span)
def tgt_html(grandparent, t, naughty=False): def tid(x): if x in contexts: tid_str = contexts[x].turn.features['Identifier'] return int(tid_str) if tid_str else None else: return None parent = html_span(grandparent) html_span(parent, anno_code(t)) type_span = html_span(parent, '[%s] ' % t.type) if naughty: type_span.attrib['class'] = 'naughty' if t in contexts: turn = contexts[t].turn turn_info = stac.split_turn_text(doc.text(turn.span))[0] turn_splits = turn_info.split(":") if len(turn_splits) > 1: tid = ET.SubElement(parent, 'b') tid.text = turn_splits[0] + ":" trest = html_span(parent, ":".join(turn_splits[1:])) else: html_span(parent, turn_info) if not stac.is_relation_instance(t): t_span = t.text_span() t_text = doc.text(t_span) if stac.is_cdu(t): tids = [x for x in map(tid, t.terminals()) if x] if tids: tspan = ET.SubElement(parent, 'b') min_tid = min(tids) max_tid = max(tids) if min_tid == max_tid: tspan.text = "%d: " % min_tid else: tspan.text = "%d-%d: " % (min_tid, max_tid) text_sp = html_span(parent, snippet(t_text, 100)) text_sp.attrib['class'] = 'snippet' html_span(parent, ' %s' % t_span) return parent
def without_cdus(self, sloppy=False): """ Return a deep copy of this graph with all CDUs removed. Links involving these CDUs will point instead from/to their deep heads """ g2 = copy.deepcopy(self) heads = g2.recursive_cdu_heads(sloppy) anno_heads = dict((g2.annotation(k),g2.annotation(v))\ for k,v in heads.items()) # replace all links to/from cdus with to/from their heads for e_edge in g2.relations(): links = g2.links(e_edge) attrs = g2.edge_attributes(e_edge) if any(g2.is_cdu(l) for l in links): # recreate the edge g2.del_edge(e_edge) g2.add_edge(e_edge) g2.add_edge_attributes(e_edge, attrs) for l in links: l2 = heads[g2.mirror(l)] if g2.is_cdu(l) else l g2.link(l2, e_edge) # now that we've pointed everything away, nuke the CDUs for e_cdu in g2.cdus(): g2.del_node(g2.mirror(e_cdu)) g2.del_edge(e_cdu) # to be on the safe side, we should also do similar link-rewriting # but on the underlying educe.annotation objects layer # (symptom of a yucky design) :-( for r in g2.doc.relations: if stac.is_relation_instance(r): src = r.source tgt = r.target src2 = anno_heads.get(src, src) tgt2 = anno_heads.get(tgt, tgt) r.source = src2 r.target = tgt2 r.span = annotation.RelSpan(src2.local_id(), tgt2.local_id()) # remove the actual CDU objects too g2.doc.schemas = [ s for s in g2.doc.schemas if not stac.is_cdu(s) ] return g2
def test_fake_objs(): assert stac.is_edu(edu1) assert stac.is_relation_instance(rel1) assert stac.is_cdu(cdu1)
def is_relation(self, x): return super(Graph, self).is_relation(x) and\ stac.is_relation_instance(self.annotation(x))