def _test_random(self): print 'Single Scan Random Sample algorithm' for N in map(int, [10, 1e2, 1e3, 1e4, 1e5, 1e6]): pr = PPReader('population.csv', 'dummy.csv', N=N) # Read dummy files into buffer start_time = time.time() pr.init() elapsed_time = time.time() - start_time sys.stdout.write('init done: %f seconds taken.\t' % elapsed_time) sys.stdout.flush() # Actual test start_time = time.time() r = RandomSampling(dist_func, K=5) r.sample(pr) elapsed_time = time.time() - start_time sampled = r.get_sampled() sys.stdout.write('%d: %f\n' % (N, elapsed_time)) sys.stdout.flush()
def large_sample(self): pr = PPReader('test_pp.csv', 'test_pp.csv') return np.array([a for a in pr.itr()])