def test_estimate_functions_1(): res = functions.estimate_optimal_with_N_and_M(99, 1024) assert res[0] == 7, res[0] assert res[1] == 146, res[1] assert res[2] == 1022, res[2] assert abs(.008 - res[3]) < .001, res[3] res = functions.estimate_optimal_with_N_and_f(99, 0.00701925498897) assert res[0] == 7, res[0] assert res[1] == 145, res[1] assert res[2] == 1015, res[2] assert abs(.008 - res[3]) < .002, res[3] res = functions.estimate_optimal_with_N_and_M(1024, 2) assert res[0] == 1, res[0] assert res[1] == 2, res[1] assert res[2] == 2, res[2] assert res[3] == 1.0, res[3] # using a crazy high FP rate just for coverage res = functions.estimate_optimal_with_N_and_f(1024, 0.7) assert res[0] == 1, res[0] assert res[1] == 850, res[1] assert res[2] == 850, res[2] assert abs(.7 - res[3]) < 0.0022, abs(.7 - res[3])
def main(): info('estimate_optimal_hash.py', ['counting']) args = get_parser().parse_args() N = args.N if args.M: M = args.M result = estimate_optimal_with_N_and_M(N,M) print("number of estimated distinct k-mers: ", N, file=sys.stderr) print("size of memory available to use: ", M, file=sys.stderr) print("optimal number of hash tables: ", result.num_htables, file=sys.stderr) print("optimal size of hash tables: ", result.htable_size, file=sys.stderr) print("estimated false positive rate: ", result.fp_rate, file=sys.stderr) print("estimated usage of memory: ", result.mem_use, file=sys.stderr) elif args.f: f = args.f result = estimate_optimal_with_N_and_f(N,f) print("number of estimated distinct k-mers: ", N, file=sys.stderr) print("desired maximum false positive rate: ", f, file=sys.stderr) print("optimal number of hash tables: ", result.num_htables, file=sys.stderr) print("optimal size of hash tables: ", result.htable_size, file=sys.stderr) print("estimated false positive rate: ", result.fp_rate, file=sys.stderr) print("estimated usage of memory: ", result.mem_use, file=sys.stderr) else: get_parser().error('No action requested, add -M (size of memory available to use) or -f (desired maximum false posotive rate)')
def test_estimate_functions_namedtup(): res = functions.estimate_optimal_with_N_and_M(99, 1024) assert res.num_htables == 7, res[0] assert res.htable_size == 146, res[1] assert res.mem_use == 1022, res[2] assert abs(.008 - res.fp_rate) < .001, res[3] res = functions.estimate_optimal_with_N_and_f(99, 0.00701925498897) assert res.num_htables == 7, res[0] assert res.htable_size == 145, res[1] assert res.mem_use == 1015, res[2] assert abs(.008 - res.fp_rate) < .002, res[3]
def main(): info('estimate_optimal_hash.py', ['counting']) args = get_parser().parse_args() N = args.N if args.M: M = args.M result = estimate_optimal_with_N_and_M(N, M) print("number of estimated distinct k-mers: ", N, file=sys.stderr) print("size of memory available to use: ", M, file=sys.stderr) print("optimal number of hash tables: ", result.num_htables, file=sys.stderr) print("optimal size of hash tables: ", result.htable_size, file=sys.stderr) print("estimated false positive rate: ", result.fp_rate, file=sys.stderr) print("estimated usage of memory: ", result.mem_use, file=sys.stderr) elif args.f: f = args.f result = estimate_optimal_with_N_and_f(N, f) print("number of estimated distinct k-mers: ", N, file=sys.stderr) print("desired maximum false positive rate: ", f, file=sys.stderr) print("optimal number of hash tables: ", result.num_htables, file=sys.stderr) print("optimal size of hash tables: ", result.htable_size, file=sys.stderr) print("estimated false positive rate: ", result.fp_rate, file=sys.stderr) print("estimated usage of memory: ", result.mem_use, file=sys.stderr) else: get_parser().error( 'No action requested, add -M (size of memory available to use) or -f (desired maximum false posotive rate)' )