def main(): in_d, out_d,m,n = utils.argsdirs("Most frequent triples",["n"]) n = int(n) tomes = [ triple.Tome(filename) for filename in utils.filenames(in_d) ] filename_out = utils.new_filename(out_d,"most_frequent.gz") tome_out = triple.Tome(filename_out) print "joining the tomes.." tome_join = triple.Tome(tomes) print "grouping/summing (again).." tome_join = tome_join.group_sum(m) print "sorting the tomes (again).." tome_join = tome_join.sort() print "getting the first %d.."%n tome_join = tome_join.first(n) print "writing everything down.." writer = tome_out.writer() for tr in tome_join: writer(tr) print "done."
def main(): in_d, out_d,_ = utils.argsdirs("Sorting") for filename in utils.filenames(in_d): tome_in = triple.Tome(filename) filename_out = utils.new_filename(out_d,filename) tome_out = triple.Tome(filename_out) writer = tome_out.writer() for tr in tome_in.sort(): writer(tr)
def main(): in_d, out_d, members_groupby = utils.argsdirs("Counting the triples") for filename in utils.filenames(in_d): print "processing file %s.."%filename tome_in = triple.Tome(filename) filename_out = utils.new_filename(out_d,filename) print "writing to %s.."%filename_out tome_out = triple.Tome(filename_out) writer = tome_out.writer() for tr in tome_in.group_sum(members_groupby): writer(tr)
def main(): in_d, out_d,m = utils.argsdirs("Expectation Maximization (Model 0)") tv = prepare_tomes(in_d) mus = em(tv) write_results(tv, mus, out_d) import ipdb; ipdb.set_trace()