def test_singletons(self): tr = TreeFile(SINGLETONS) for recs in make_expert_rec(tr): raise "This shouldn't ever happen" tr = TreeFile(SINGLETONS) for recs in make_classic_recs(tr): raise "This shouldn't ever happen"
def make_classic(args, b, t): with t.get_batch_put_context() as batch: args.tree.seek(0) parser = TreeFile(args.tree) for classic_rec in process_record_stream(make_classic_recs(parser)): if args.verbose: print(classic_rec) if not args.dryrun: batch.put_item(classic_rec) b.increment()
tf = TreeFile(args.infile) if args.batch_size: writer = leveldb.WriteBatch() else: writer = db for recs in make_expert_rec(tf, args.limit): recd = [r.pid for r in recs] key = recs[0].target_pid + "|expert" writer.Put(key.encode(), msgpack.packb(recd)) b.increment() if args.batch_size and b.count % args.batch_size == 0: db.Write(writer) args.infile.seek(0) tf = TreeFile(args.infile) for recs in make_classic_recs(tf, args.limit): recd = [r.pid for r in recs] key = recs[0].target_pid + "|classic" writer.Put(key.encode(), msgpack.packb(recd)) b.increment() if args.batch_size and b.count % args.batch_size == 0: db.Write(writer) if args.batch_size: db.Write(writer, sync=True) b.print_freq() print(db.GetStats())
def test_classic_limit(self): for recs in make_classic_recs(self.tr, 5): recd = [r.pid for r in recs] answer = make_answer(CLASSIC, recs[0].target_pid)[:5] self.assertListEqual(recd, answer)
def test_classic(self): for recs in make_classic_recs(self.tr): recd = [r.pid for r in recs] self.assertListEqual(recd, make_answer(CLASSIC, recs[0].target_pid))
if args.verbose: print(expert_rec) if not args.dryrun: batch.put_item(expert_rec) entries += 1 if entries % 50000 == 0: current_time = time.time() current_rate = entries/(current_time - start) print("\nProcessed {0:,} entries in {1:.0f} seconds: {2:.2f} entries/sec".format(entries, time.time()-start, entries/(time.time()-start))) sys.stdout.flush() # Reset for the second pass print("Generating classic recommendations...") args.tree.seek(0) parser = TreeFile(args.tree) for classic_rec in process_record_stream(make_classic_recs(parser)): if args.verbose: print(classic_rec) if not args.dryrun: batch.put_item(classic_rec) entries += 1 if entries % 50000 == 0: current_time = time.time() current_rate = entries/(current_time - start) print("\nProcessed {0:,} entries in {1:.0f} seconds: {2:.2f} entries/sec".format(entries, time.time()-start, entries/(time.time()-start))) sys.stdout.flush() end = time.time() print("\nProcessed {0:,} entries in {1:.0f} seconds: {2:.2f} entries/sec".format(entries, end-start, entries/(end-start))) if not args.dryrun: t.update_throughput()
from recommenders.ef import make_classic_recs, make_expert_rec from util.misc import Benchmark if __name__ == "__main__": import argparse import sys parser = argparse.ArgumentParser(description="Creates EF recommendations and store them in a DBM") parser.add_argument('infile', nargs='?', type=argparse.FileType('r'), default=sys.stdin) parser.add_argument('shelf') parser.add_argument('--benchmark-freq', default=10000, type=int) parser.add_argument('--toint', help="Convert scores to integers, larger is better", action='store_true', default=False) parser.add_argument('-l', '--limit', type=int, help="Max number of recommendations to generate per-paper", default=10) args = parser.parse_args() tf = TreeFile(args.infile) b = Benchmark(args.benchmark_freq) with shelve.open(args.shelf, flag='n', protocol=pickle.HIGHEST_PROTOCOL) as s: for recs in make_expert_rec(tf, args.limit): recd = [r.pid for r in recs] s['expert|'+recs[0].target_pid] = recd b.increment() args.infile.seek(0) tf = TreeFile(args.infile) for recs in make_classic_recs(tf, args.limit): recd = [r.pid for r in recs] s['classic|'+recs[0].target_pid] = recd b.increment() b.print_freq()