def run(args): check_present(args, ["input", "parser", "output"]) assert os.path.exists(args.input), args.input assert os.path.exists(args.parser), args.parser # Read parser flow. flow = Flow() flow.load(args.parser) # Initialize the spec from the flow. spec = Spec() spec.from_flow(flow) # Initialize the model from the flow. caspar = Caspar(spec) caspar.from_flow(flow) corpus = Corpora(args.input, caspar.spec.commons) writer = sling.RecordWriter(args.output) count = 0 for document in corpus: state, _, _, trace = caspar.forward(document, train=False, debug=args.trace) state.write() if trace: trace.write() writer.write(str(count), state.encoded()) count += 1 if count % 100 == 0: print "Annotated", count, "documents", now(), mem() writer.close() print "Annotated", count, "documents", now(), mem() print "Wrote annotated documents to", args.output if args.evaluate: f = tempfile.NamedTemporaryFile(delete=False) fname = f.name caspar.spec.commons.save(fname, binary=True) f.close() eval_result = frame_evaluation(gold_corpus_path=args.input, \ test_corpus_path=args.output, commons=caspar.spec.commons) os.unlink(fname) return eval_result