def test_lca_index_signatures_method(): # test 'signatures' method from base class Index filename = utils.get_test_data('lca/47+63.lca.json') db, ksize, scaled = lca_utils.load_single_database(filename) siglist = list(db.signatures()) assert len(siglist) == 2
def test_load_single_db(): filename = utils.get_test_data('lca/delmont-1.lca.json') db, ksize, scaled = lca_utils.load_single_database(filename) print(db) assert ksize == 31 assert scaled == 10000
def test_search_db_scaled_lt_sig_scaled(): dbfile = utils.get_test_data('lca/47+63.lca.json') db, ksize, scaled = lca_utils.load_single_database(dbfile) sig = sourmash.load_one_signature(utils.get_test_data('47.fa.sig')) sig.minhash = sig.minhash.downsample_scaled(100000) with pytest.raises(ValueError) as e: results = db.search(sig, threshold=.01, ignore_abundance=True)
def test_lca_index_find_method(): # test 'signatures' method from base class Index filename = utils.get_test_data('lca/47+63.lca.json') db, ksize, scaled = lca_utils.load_single_database(filename) sig = next(iter(db.signatures())) with pytest.raises(NotImplementedError) as e: db.find(None)
def test_gather_db_scaled_gt_sig_scaled(): dbfile = utils.get_test_data('lca/47+63.lca.json') db, ksize, scaled = lca_utils.load_single_database(dbfile) sig = sourmash.load_one_signature(utils.get_test_data('47.fa.sig')) results = db.gather(sig, threshold=.01, ignore_abundance=True) match_sig = results[0][1] sig.minhash = sig.minhash.downsample_scaled(10000) assert sig.minhash == match_sig.minhash
def test_db_idx_to_ident(): dbfile = utils.get_test_data('lca/47+63.lca.json') db, ksize, scaled = lca_utils.load_single_database(dbfile) d = db.idx_to_ident items = list(d.items()) items.sort() assert len(items) == 2 print(items) assert items == [(32, 'NC_009665'), (48, 'NC_011663')]
def test_db_lid_to_idx(): dbfile = utils.get_test_data('lca/47+63.lca.json') db, ksize, scaled = lca_utils.load_single_database(dbfile) d = db.lid_to_idx items = list(d.items()) items.sort() assert len(items) == 2 print(items) assert items == [(32, {32}), (48, {48})]
def main(): p = argparse.ArgumentParser() p.add_argument('lca_db') p.add_argument('genome', nargs='+') p.add_argument('output') p.add_argument('--fragment', default=100000, type=int) args = p.parse_args() db, ksize, scaled = lca_utils.load_single_database(args.lca_db) mh_factory = sourmash.MinHash(n=0, ksize=ksize, scaled=scaled) print('**', ksize, scaled) n = 0 m = 0 sum_bp = 0 sum_missed_bp = 0 outfp = open(args.output, 'wt') w = csv.writer(outfp) w.writerow(['filename', 'contig', 'begin', 'end', 'lca', 'lca_rank']) # # iterate over all contigs in genome file # for genome in args.genome: for record in screed.open(genome): # fragment longer contigs into smaller regions? for start in range(0, len(record.sequence), args.fragment): seq = record.sequence[start:start + args.fragment] n += 1 sum_bp += len(seq) mh = mh_factory.copy_and_clear() mh.add_sequence(seq, force=True) if not mh: sum_missed_bp += len(seq) continue lineage_counts = summarize(mh.get_mins(), [db], 1) for k in lineage_counts: lca = lca_utils.display_lineage(k, truncate_empty=False) try: lca_rank = k[-1].rank except IndexError: lca_rank = "none" w.writerow((genome, record.name, start, start + args.fragment, lca, lca_rank)) m += 1 min_value = min(mh.get_mins()) return 0
def test_db_lineage_to_lids(): dbfile = utils.get_test_data('lca/47+63.lca.json') db, ksize, scaled = lca_utils.load_single_database(dbfile) d = db.lineage_to_lids items = list(d.items()) items.sort() assert len(items) == 2 print(items) lin1 = items[0][0][-1] assert lin1.rank == 'strain' assert lin1.name == 'Shewanella baltica OS185' lin1 = items[1][0][-1] assert lin1.rank == 'strain' assert lin1.name == 'Shewanella baltica OS223'
def test_db_repr(): filename = utils.get_test_data('lca/delmont-1.lca.json') db, ksize, scaled = lca_utils.load_single_database(filename) assert repr(db) == "LCA_Database('{}')".format(filename)