Пример #1
0
def test_lca_index_signatures_method():
    # test 'signatures' method from base class Index
    filename = utils.get_test_data('lca/47+63.lca.json')
    db, ksize, scaled = lca_utils.load_single_database(filename)

    siglist = list(db.signatures())
    assert len(siglist) == 2
Пример #2
0
def test_load_single_db():
    filename = utils.get_test_data('lca/delmont-1.lca.json')
    db, ksize, scaled = lca_utils.load_single_database(filename)

    print(db)

    assert ksize == 31
    assert scaled == 10000
Пример #3
0
def test_search_db_scaled_lt_sig_scaled():
    dbfile = utils.get_test_data('lca/47+63.lca.json')
    db, ksize, scaled = lca_utils.load_single_database(dbfile)
    sig = sourmash.load_one_signature(utils.get_test_data('47.fa.sig'))
    sig.minhash = sig.minhash.downsample_scaled(100000)

    with pytest.raises(ValueError) as e:
        results = db.search(sig, threshold=.01, ignore_abundance=True)
Пример #4
0
def test_lca_index_find_method():
    # test 'signatures' method from base class Index
    filename = utils.get_test_data('lca/47+63.lca.json')
    db, ksize, scaled = lca_utils.load_single_database(filename)

    sig = next(iter(db.signatures()))

    with pytest.raises(NotImplementedError) as e:
        db.find(None)
Пример #5
0
def test_gather_db_scaled_gt_sig_scaled():
    dbfile = utils.get_test_data('lca/47+63.lca.json')
    db, ksize, scaled = lca_utils.load_single_database(dbfile)
    sig = sourmash.load_one_signature(utils.get_test_data('47.fa.sig'))

    results = db.gather(sig, threshold=.01, ignore_abundance=True)
    match_sig = results[0][1]

    sig.minhash = sig.minhash.downsample_scaled(10000)
    assert sig.minhash == match_sig.minhash
Пример #6
0
def test_db_idx_to_ident():
    dbfile = utils.get_test_data('lca/47+63.lca.json')
    db, ksize, scaled = lca_utils.load_single_database(dbfile)

    d = db.idx_to_ident
    items = list(d.items())
    items.sort()
    assert len(items) == 2

    print(items)
    assert items == [(32, 'NC_009665'), (48, 'NC_011663')]
Пример #7
0
def test_db_lid_to_idx():
    dbfile = utils.get_test_data('lca/47+63.lca.json')
    db, ksize, scaled = lca_utils.load_single_database(dbfile)

    d = db.lid_to_idx
    items = list(d.items())
    items.sort()
    assert len(items) == 2

    print(items)
    assert items == [(32, {32}), (48, {48})]
Пример #8
0
def main():
    p = argparse.ArgumentParser()
    p.add_argument('lca_db')
    p.add_argument('genome', nargs='+')
    p.add_argument('output')
    p.add_argument('--fragment', default=100000, type=int)
    args = p.parse_args()

    db, ksize, scaled = lca_utils.load_single_database(args.lca_db)
    mh_factory = sourmash.MinHash(n=0, ksize=ksize, scaled=scaled)
    print('**', ksize, scaled)

    n = 0
    m = 0
    sum_bp = 0
    sum_missed_bp = 0

    outfp = open(args.output, 'wt')
    w = csv.writer(outfp)
    w.writerow(['filename', 'contig', 'begin', 'end', 'lca', 'lca_rank'])

    #
    # iterate over all contigs in genome file
    #
    for genome in args.genome:
        for record in screed.open(genome):
            # fragment longer contigs into smaller regions?
            for start in range(0, len(record.sequence), args.fragment):
                seq = record.sequence[start:start + args.fragment]
                n += 1
                sum_bp += len(seq)

                mh = mh_factory.copy_and_clear()
                mh.add_sequence(seq, force=True)
                if not mh:
                    sum_missed_bp += len(seq)
                    continue

                lineage_counts = summarize(mh.get_mins(), [db], 1)

                for k in lineage_counts:
                    lca = lca_utils.display_lineage(k, truncate_empty=False)
                    try:
                        lca_rank = k[-1].rank
                    except IndexError:
                        lca_rank = "none"
                    w.writerow((genome, record.name, start,
                                start + args.fragment, lca, lca_rank))

                m += 1
                min_value = min(mh.get_mins())

    return 0
Пример #9
0
def test_db_lineage_to_lids():
    dbfile = utils.get_test_data('lca/47+63.lca.json')
    db, ksize, scaled = lca_utils.load_single_database(dbfile)

    d = db.lineage_to_lids
    items = list(d.items())
    items.sort()
    assert len(items) == 2

    print(items)

    lin1 = items[0][0][-1]
    assert lin1.rank == 'strain'
    assert lin1.name == 'Shewanella baltica OS185'
    lin1 = items[1][0][-1]
    assert lin1.rank == 'strain'
    assert lin1.name == 'Shewanella baltica OS223'
Пример #10
0
def test_db_repr():
    filename = utils.get_test_data('lca/delmont-1.lca.json')
    db, ksize, scaled = lca_utils.load_single_database(filename)

    assert repr(db) == "LCA_Database('{}')".format(filename)