def main(args): set_quiet(args.quiet) if args.corpus: unigrams = count_unigrams(args.corpus) vocab, outfname = decide_vocab(unigrams, args.cutoff, args.vocab, args.want) if args.rewrite: rewrite_corpus(args.corpus, vocab, outfname) else: extract_stat(args.corpus, vocab, outfname, args.window) if args.stat: assert(args.m is not None and args.kappa is not None) if args.no_matlab: C = canon() C.set_params(args.m, args.kappa) C.get_stat(args.stat) C.start_logging() C.approx_cca() C.end_logging() C.write_result() else: call_matlab(args.stat, args.m, args.kappa) if args.clean: clean()
def main(args): set_quiet(args.quiet) if args.corpus: unigrams = count_unigrams(args.corpus) vocab, outfname = decide_vocab(unigrams, args.cutoff, args.vocab, args.want) if args.rewrite: rewrite_corpus(args.corpus, vocab, outfname) else: extract_stat(args.corpus, vocab, outfname, args.window) if args.stat: assert (args.m is not None and args.kappa is not None) if args.no_matlab: C = canon() C.set_params(args.m, args.kappa) C.get_stat(args.stat) C.start_logging() C.approx_cca() C.end_logging() C.write_result() else: call_matlab(args.stat, args.m, args.kappa) if args.clean: clean()
def check(): unigrams = count_unigrams(corpus) vocab, outfname = decide_vocab(unigrams, cutoff, None, None) XYcount, Xcount, Ycount, stat = extract_stat(corpus, vocab, outfname, window) for x in Xcount: assert(Xcount[x] == gold_Xcount[x]) for y in Ycount: assert(Ycount[y] == gold_Ycount[y]) for x, y in XYcount: assert(XYcount[x,y] == gold_XYcount[x,y]) return stat
def check(): unigrams = count_unigrams(corpus) vocab, outfname = decide_vocab(unigrams, cutoff, None, None) XYcount, Xcount, Ycount, stat = extract_stat(corpus, vocab, outfname, window) for x in Xcount: assert (Xcount[x] == gold_Xcount[x]) for y in Ycount: assert (Ycount[y] == gold_Ycount[y]) for x, y in XYcount: assert (XYcount[x, y] == gold_XYcount[x, y]) return stat