def single_ann_measures(a, kirchhoff=False): c = Counter() c['lexnodes'] = len(a.lexnodes) c['1W'] = sum(1 for n in a.lexnodes if len(n.tokens)==1) c['MW'] = sum(1 for n in a.lexnodes if len(n.tokens)>1) c['omittedtoks'] = len(a.alltokens)-sum(len(n.tokens) for n in a.lexnodes) c['coordnodes'] = len(a.coordnodes) c['anaphlinks'] = len(a.anaphlinks) c['FNs'] = len(a.fenodes) c['explicitly rooted utterances'] = len(a.root.children) #print({n: n.depth for n in a.nodes}) simplify_coord(a) c['projective'] = int(a.isProjective) # literal number of connected components in the graph (not counting the root when nothing was explicitly attached to it) c['fragments'] = len({n.frag for n in a.nodes if n.frag.nodes!={a.root}}) assert c['fragments']>0,(a.nodes,a.lexnodes) c['max utterances'] = max(1,c['explicitly rooted utterances'])+(c['fragments']-1) c['min utterances'] = max(1,c['explicitly rooted utterances']) assert c['max utterances']>=c['min utterances'],c upward(a) downward(a) c['possible utterance heads'] = sum(int(a.root in n.parentcandidates) for n in a.lexnodes) promcom(a,c, kirchhoff=kirchhoff) return c
def main(annsFF, verbose=False, simplifycoords=False, updatelex=False, escapebrackets=False): assert len(annsFF)>=2 i = 0 allC = Counter() while True: # iterate over items annsJ = [] # JSON input objects, one per annotator anns = [] # FUDG graphs, one per annotator locs = [] try: for j,annsF in enumerate(annsFF): # iterate over annotators #print('.',j,file=sys.stderr) ln = next(annsF) loc, sent, annJS = ln[:-1].split('\t') locs.append(loc) if j==0: sent0 = sent try: assert sent==sent0,(sent0,sent) # TODO: hmm, why is this failing? except AssertionError as ex: print(ex, file=sys.stderr) annJ = json.loads(annJS) annsJ.append(annJ) if verbose: print(i, loc, '<<', sent, file=sys.stderr) print(annJ, file=sys.stderr) #a = FUDGGraph(annJ) #anns.append(a) if simplifycoords: aX = FUDGGraph(annJ) simplify_coord(aX) annsJ[-1] = annJ = aX.to_json_simplecoord() #if verbose: print(annJ, file=sys.stderr) mergedJ = merge(annsJ, updatelex=updatelex) output = '|'.join(locs) + '\t' + sent + '\t' + json.dumps(mergedJ) if verbose: print(output, file=sys.stderr) try: a = FUDGGraph(mergedJ) print(output) try: c = single_ann_measures(a) if verbose: print(c, file=sys.stderr) allC += c except Exception as ex: print('CANNOT EVALUATE MERGE',loc,'::',ex, file=sys.stderr) allC['invalid'] += 1 except Exception as ex: if 'cycle' in ex.message: print('CANNOT MERGE',loc,'::',ex, file=sys.stderr) else: raise print() # blank line--invalid merge! i += 1 except StopIteration: break print(allC, file=sys.stderr)