def _main_lozenge_graph(args): """Display any lozenge shaped subgraphs """ corpus = read_corpus(args, preselected={'stage': ['discourse', 'units']}) output_dir = get_output_dir(args) keys = [k for k in corpus if k.stage == 'discourse'] loz_count = Counter() loz_edges = Counter() for key in sorted(keys): gra = stacgraph.Graph.from_doc(corpus, key) if args.strip_cdus: gra = gra.without_cdus(sloppy=True) interesting = set() for node in gra.nodes(): mloz = _maybe_lozenge(gra, node) if mloz is not None: l_n, l_e = mloz loz_count[key] += 1 loz_edges[key] += len(l_e) interesting |= l_n gra = gra.copy(interesting) dot_gra = stacgraph.DotGraph(gra) if dot_gra.get_nodes(): write_dot_graph(key, output_dir, dot_gra, run_graphviz=args.draw) for key in sorted(loz_count): print(key, loz_count[key], '({})'.format(loz_edges[key])) print('TOTAL lozenges:', sum(loz_count.values())) print('TOTAL edges in lozenges:', sum(loz_edges.values()))
def generate_graphs(settings): """ Draw SVG graphs for each of the documents in the corpus """ discourse_only = [k for k in settings.corpus if k.stage == 'discourse'] report = settings.report # generate dot files for k in discourse_only: try: gra = egr.DotGraph(egr.Graph.from_doc(settings.corpus, k)) dot_file = report.subreport_path(k, '.dot') create_dirname(dot_file) if gra.get_nodes(): with codecs.open(dot_file, 'w', encoding='utf-8') as fout: print(gra.to_string(), file=fout) except educe.graph.DuplicateIdException: warning = ("Couldn't graph %s because it has duplicate " "annotation ids") % dot_file print(warning, file=sys.stderr) # attempt to graphviz them try: print("Generating graphs... (you can safely ^-C here)", file=sys.stderr) for k in discourse_only: dot_file = report.subreport_path(k, '.dot') svg_file = report.subreport_path(k, '.svg') if fp.exists(dot_file) and settings.draw: subprocess.call('dot -T svg -o %s %s' % (svg_file, dot_file), shell=True) except OSError as oops: print("Couldn't run graphviz. (%s)" % oops, file=sys.stderr) print("You should install it for easier sanity check debugging.", file=sys.stderr)
def main(args): """ Subcommand main. You shouldn't need to call this yourself if you're using `config_argparser` """ args.stage = 'discourse|units' corpus = read_corpus(args, verbose=True) output_dir = get_output_dir(args) keys = [k for k in corpus if k.stage == 'discourse'] for k in sorted(keys): try: gra = stacgraph.Graph.from_doc(corpus, k, pred=_keep(corpus[k], args.rel_types)) dot_gra = stacgraph.DotGraph(gra) if dot_gra.get_nodes(): write_dot_graph(k, output_dir, dot_gra, run_graphviz=args.draw) else: print("Skipping %s (empty graph)" % k, file=sys.stderr) except graph.DuplicateIdException: warning = "WARNING: %s has duplicate annotation ids" % k print(warning, file=sys.stderr)
def dump_graph(dump_filename, graph): """ Write a dot graph and possibly run graphviz on it """ dot_graph = stac_gr.DotGraph(graph) dot_file = dump_filename + '.dot' svg_file = dump_filename + '.svg' mk_parent_dirs(dot_file) with codecs.open(dot_file, 'w', encoding='utf-8') as dotf: print(dot_graph.to_string(), file=dotf) print("Creating %s" % svg_file, file=sys.stderr) subprocess.call('dot -T svg -o %s %s' % (svg_file, dot_file), shell=True)
def _main_rel_graph(args): """ Draw graphs showing relation instances between EDUs """ args.stage = 'discourse|units' corpus = _read_corpus(args) output_dir = get_output_dir(args) if args.live: keys = corpus else: keys = [k for k in corpus if k.stage == 'discourse'] for k in sorted(keys): if args.highlight: highlights = [anno_id_from_tuple(x) for x in args.highlight] for anno in corpus[k].annotations(): if anno.local_id() in highlights: anno.features['highlight'] = 'orange' try: gra = stacgraph.Graph.from_doc(corpus, k) if args.strip_cdus: gra = gra.without_cdus(mode=args.strip_mode) dot_gra = stacgraph.DotGraph(gra) if dot_gra.get_nodes(): write_dot_graph(k, output_dir, dot_gra, run_graphviz=args.draw) if args.split: ccs = gra.connected_components() for part, nodes in enumerate(ccs, 1): gra2 = gra.copy(nodes) write_dot_graph(k, output_dir, stacgraph.DotGraph(gra2), part=part, run_graphviz=args.draw) else: print("Skipping %s (empty graph)" % k, file=sys.stderr) except graph.DuplicateIdException: warning = "WARNING: %s has duplicate annotation ids" % k print(warning, file=sys.stderr)
def _main_rfc_graph(args): """ Draw graphs showing relation instances between EDUs """ args.stage = 'discourse|units' corpus = _read_corpus(args) output_dir = get_output_dir(args) if args.rfc == 'basic': mk_rfc = BasicRfc elif args.rfc == 'mlast': mk_rfc = ThreadedRfc else: raise NotImplementedError if args.live: keys = corpus else: keys = [k for k in corpus if k.stage == 'discourse'] for key in sorted(keys): gra = stacgraph.Graph.from_doc(corpus, key) for subgra_nodes in gra.connected_components(): subgra = gra.copy(subgra_nodes) sub_rfc = mk_rfc(subgra) for node in sub_rfc.frontier(): gra.annotation(node).features['highlight'] = 'green' rfc = mk_rfc(gra) for link in rfc.violations(): gra.annotation(link).features['highlight'] = 'red' dot_gra = stacgraph.DotGraph(gra) if dot_gra.get_nodes(): write_dot_graph(key, output_dir, dot_gra, run_graphviz=args.draw) else: print("Skipping %s (empty graph)" % key, file=sys.stderr)