示例#1
0
def main():
    in_d, out_d,m,n = utils.argsdirs("Most frequent triples",["n"])
    n = int(n)
    tomes = [
        triple.Tome(filename)
        for filename
        in utils.filenames(in_d)
    ]
    
    filename_out = utils.new_filename(out_d,"most_frequent.gz")
    tome_out = triple.Tome(filename_out)
    
    print "joining the tomes.."
    tome_join = triple.Tome(tomes)
    print "grouping/summing (again).."
    tome_join = tome_join.group_sum(m)
    print "sorting the tomes (again).."
    tome_join = tome_join.sort()
    print "getting the first %d.."%n
    tome_join = tome_join.first(n)
    print "writing everything down.."
    writer = tome_out.writer()
    for tr in tome_join:
        writer(tr)
    print "done."
示例#2
0
def main():
    in_d, out_d,_ = utils.argsdirs("Sorting")
    
    for filename in utils.filenames(in_d):
        tome_in = triple.Tome(filename)
        filename_out = utils.new_filename(out_d,filename)
        tome_out = triple.Tome(filename_out)
        writer = tome_out.writer()
        for tr in tome_in.sort():
            writer(tr)
示例#3
0
def main():
    in_d, out_d, members_groupby = utils.argsdirs("Counting the triples")
    
    for filename in utils.filenames(in_d):
        print "processing file %s.."%filename
        tome_in = triple.Tome(filename)
        filename_out = utils.new_filename(out_d,filename)
        print "writing to %s.."%filename_out
        tome_out = triple.Tome(filename_out)
        
        writer = tome_out.writer()
        for tr in tome_in.group_sum(members_groupby):
            writer(tr)
示例#4
0
def main():
    in_d, out_d,m  = utils.argsdirs("Expectation Maximization (Model 0)")
    tv = prepare_tomes(in_d)
    mus = em(tv)
    write_results(tv, mus, out_d)
    import ipdb; ipdb.set_trace()