def create_countgraph(args, ksize=None, multiplier=1.0, fp_rate=0.1): """Create and return a countgraph.""" args = _check_fp_rate(args, fp_rate) if hasattr(args, 'force'): if args.n_tables > 20: if not args.force: print_error("\n** ERROR: khmer only supports number " "of tables <= 20.\n") sys.exit(1) else: if args.n_tables > 20: log_warn("\n*** Warning: Maximum recommended number of " "tables is 20, discarded by force nonetheless!\n") if ksize is None: ksize = args.ksize if ksize > 32: print_error("\n** ERROR: khmer only supports k-mer sizes <= 32.\n") sys.exit(1) if args.small_count: tablesize = calculate_graphsize(args, 'smallcountgraph', multiplier=multiplier) return khmer.SmallCountgraph(ksize, tablesize, args.n_tables) else: tablesize = calculate_graphsize(args, 'countgraph', multiplier=multiplier) cg = khmer.Countgraph(ksize, tablesize, args.n_tables) if hasattr(args, 'bigcount'): cg.set_use_bigcount(args.bigcount) return cg
def test_smallcountgraph_vs_table(): x = khmer.SmallCounttable(4, 21, 3) y = khmer.SmallCountgraph(4, 21, 3) assert hasattr(x, 'add') assert hasattr(y, 'add') assert not hasattr(x, 'consume_and_tag') assert hasattr(y, 'consume_and_tag')
def test_get_raw_tables_smallcountgraph(): # for the same number of entries a SmallCountgraph uses ~half the memory # of a normal Countgraph ht = khmer.SmallCountgraph(20, 1e5, 4) tables = ht.get_raw_tables() for size, table in zip(ht.hashsizes(), tables): assert isinstance(table, memoryview) assert size // 2 + 1 == len(table)
def test_get_raw_tables_view_smallcountgraph(): ht = khmer.SmallCountgraph(4, 1e5, 4) tables = ht.get_raw_tables() for tab in tables: assert sum(tab.tolist()) == 0 ht.consume('AAAA') # the actual count is 1 but stored in the first 4bits of a Byte # and so becomes 16 for tab in tables: assert sum(tab.tolist()) == int('00010000', 2)
def test_save_load_occupied_small(ctfile): print('working with', ctfile) inpath = utils.get_test_data('random-20-a.fa') savepath = utils.get_temp_filename(ctfile) orig = khmer.SmallCountgraph(12, 1e5, 4) orig.consume_seqfile(inpath) orig.save(savepath) loaded = khmer.load_countgraph(savepath, small=True) orig_count = orig.n_occupied() loaded_count = loaded.n_occupied() assert orig_count == 3886, orig_count assert loaded_count == orig_count, loaded_count