示例#1
0
def test_calculate_containment_at_rank_4():
    # add two (nonmatching) hashvals to query
    hashval1 = 12345678
    ident = 'uniq'
    mh1, sig1, lin1 = make_sig_and_lin([hashval1], ident, 'a;b;c')
    lin2 = lca_utils.make_lineage('a;d')
    hashval2 = 87654321
    match_rank = "genus"
    # make lineage hashD
    lineage_hashD = defaultdict(test_gen_mh)
    lineage_hashD = add_hashes_at_ranks(lineage_hashD, [hashval1], lin1,
                                        match_rank)
    lineage_hashD = add_hashes_at_ranks(lineage_hashD, [hashval2], lin2,
                                        match_rank)

    # make query sig
    mh = make_mh([hashval1, hashval2, 33333333, 44444444])
    query_sig = sourmash.SourmashSignature(mh, name='query')

    # calculate containment
    containmentD = calculate_containment_at_rank(lineage_hashD, query_sig,
                                                 match_rank)

    # superkingdom lineage that should have 50% containment
    lin3 = lca_utils.make_lineage('a')
    assert containmentD["superkingdom"][0][1] == 0.5
    # each class should have 25% containment
    assert containmentD["class"][0][1] == 0.25
    assert [containmentD["phylum"][0][1],
            containmentD["phylum"][1][1]] == [0.25, 0.25]
示例#2
0
def test_calculate_containment_at_rank_3():
    # two lineages with overlapping hashes (50% containment)
    hashval1 = 12345678
    ident = 'uniq'
    mh1, sig1, lin1 = make_sig_and_lin([hashval1], ident, 'a;b;c')
    lin2 = lca_utils.make_lineage('a;d')
    hashval2 = 87654321
    match_rank = "genus"
    # make lineage hashD
    lineage_hashD = defaultdict(test_gen_mh)
    lineage_hashD = add_hashes_at_ranks(lineage_hashD, [hashval1], lin1,
                                        match_rank)
    lineage_hashD = add_hashes_at_ranks(lineage_hashD, [hashval2], lin2,
                                        match_rank)

    # make query sig
    mh = make_mh([hashval1, hashval2])
    query_sig = sourmash.SourmashSignature(mh, name='query')

    # calculate containment
    containmentD = calculate_containment_at_rank(lineage_hashD, query_sig,
                                                 match_rank)

    # superkingdom lineage that should have 100% containment
    lin3 = lca_utils.make_lineage('a')
    assert containmentD["superkingdom"][0][1] == 1.0
    # class should have 50% containment
    assert containmentD["class"][0][1] == 0.5
    phylum_containment = [
        containmentD["phylum"][0][1], containmentD["phylum"][1][1]
    ]
    assert [0.5, 0.5] == phylum_containment
示例#3
0
def test_count_lca_for_assignments_abund_4():
    # test basic mechanics of gather_assignments function with three lineages
    # and three hashvals
    hashval = 12345678
    hashval2 = 87654321
    hashval3 = 34567891
    hashval_counts = dict()
    hashval_counts[hashval] = 2
    hashval_counts[hashval2] = 5
    hashval_counts[hashval3] = 3

    lin = lca_utils.make_lineage('a;b;c')
    lin2 = lca_utils.make_lineage('a;b;d')
    lin3 = lca_utils.make_lineage('a;b;d;e')

    db = FakeLCA_Database()
    db._set_lineage_assignment(hashval, set([ lin, lin2 ])) # lca: a;b
    db._set_lineage_assignment(hashval2, set([ lin ])) # lca: a;b;c
    db._set_lineage_assignment(hashval3, set([ lin2, lin3 ])) # a;b;d;e

    assignments = lca_utils.gather_assignments(hashval_counts, [db])
    counts = count_lca_for_assignments(assignments, hashval_counts)
    print(counts)

    assert len(counts) == 3
    assert counts[lin] == 5               # makes sense b/c hashval2
    assert counts[lin2] == 0              # a;b;d (lin2) + a;b;d;e (lin3) -->a;b;d;e (lin3) only
    assert counts[lin3] == 3              # hashval3

    lca_lin = lca_utils.make_lineage('a;b')
    assert counts[lca_lin] == 2           # yes, b/c hashval
示例#4
0
def test_sort_by_rank_and_containment_2():
    # 1. three results, check that they sort by rank, containment
    hashval1 = 12345678
    ident = 'uniq'
    mh1, sig1, lin1 = make_sig_and_lin([hashval1], ident, 'a;b;c')
    lin2 = lca_utils.make_lineage('a;d')
    hashval2 = 87654321
    hashval3 = 33333333
    match_rank = "genus"
    # make lineage hashD
    lineage_hashD = defaultdict(test_gen_mh)
    lineage_hashD = add_hashes_at_ranks(lineage_hashD, [hashval1, hashval3],
                                        lin1, match_rank)
    lineage_hashD = add_hashes_at_ranks(lineage_hashD, [hashval2], lin2,
                                        match_rank)
    # make query sig
    mh = make_mh([hashval1, hashval2, hashval3, 44444444])
    query_sig = sourmash.SourmashSignature(mh, name='query')
    superK_lin = lca_utils.make_lineage('a')
    phylum_match_lin = lca_utils.make_lineage('a;b')
    # calculate containment
    containmentD = calculate_containment_at_rank(lineage_hashD, query_sig,
                                                 match_rank)
    sorted_results = sort_by_rank_and_containment(containmentD, match_rank)
    assert sorted_results[0].lineage == superK_lin
    assert sorted_results[0].contained_at_rank == 0.75
    # phylum results should also be sorted by containment
    assert sorted_results[1].lineage[-1].rank == "phylum"
    assert sorted_results[1].contained_at_rank == 0.5
    assert sorted_results[2].lineage[-1].rank == "phylum"
    assert sorted_results[2].contained_at_rank == 0.25
    # class results
    assert sorted_results[3].lineage[-1].rank == "class"
    assert sorted_results[3].contained_at_rank == 0.5
示例#5
0
def test_count_lca_for_assignments_abund_3():
    # test basic mechanics of gather_assignments function with two lineages
    # and two hashvals
    hashval = 12345678
    hashval2 = 87654321
    hashval_counts = dict()
    hashval_counts[hashval] = 2
    hashval_counts[hashval2] = 5

    lin = lca_utils.make_lineage('a;b;c')
    lin2 = lca_utils.make_lineage('a;b;d')

    db = FakeLCA_Database()
    db._set_lineage_assignment(hashval, set([ lin, lin2 ]))
    db._set_lineage_assignment(hashval2, set([ lin ]))

    assignments = lca_utils.gather_assignments(hashval_counts, [db])
    counts = count_lca_for_assignments(assignments, hashval_counts)
    print(counts)

    assert len(counts) == 2
    assert counts[lin] == 5               # makes sense
    assert counts[lin2] == 0              # makes sense

    lca_lin = lca_utils.make_lineage('a;b')
    assert counts[lca_lin] == 2           # yes!
示例#6
0
def test_contain_at_rank_1():
    # one minhash, one set of ranks
    hashval = 12345678
    ident = 'uniq'
    mh1, sig1, lin1 = make_sig_and_lin([hashval], ident, 'a;b;c')

    lca_db = LCA_Database(scaled=1, ksize=3)
    lca_db.insert(sig1, ident=ident)

    lin_db = LineageDB()
    lin_db.insert(ident, lin1)

    results, rank_results = search_containment_at_rank(mh1, lca_db, lin_db,
                                                       "class")
    assert len(results) == 1
    assert results[0].lineage == lin1
    assert results[0].name == ident
    assert results[0].similarity == 1.0

    superk_lin = lca_utils.make_lineage('a')
    phylum_match_lin = lca_utils.make_lineage('a;b')
    assert len(rank_results) == 3
    assert rank_results[0].lineage == superk_lin
    assert rank_results[0].contained_at_rank == 1.0
    assert rank_results[1].lineage == phylum_match_lin
    assert rank_results[1].contained_at_rank == 1.0
    assert rank_results[2].lineage == lin1
    assert rank_results[2].contained_at_rank == 1.0
示例#7
0
def test_calculate_containment_at_rank_2():
    # two lineages, match at phylum level
    hashval1 = 12345678
    ident = 'uniq'
    mh1, sig1, lin1 = make_sig_and_lin([hashval1], ident, 'a;b;c')
    lin2 = lca_utils.make_lineage('a;d')
    hashval2 = 87654321
    match_rank = "genus"
    # make lineage hashD
    lineage_hashD = defaultdict(test_gen_mh)
    lineage_hashD = add_hashes_at_ranks(lineage_hashD, [hashval1], lin1,
                                        match_rank)
    lineage_hashD = add_hashes_at_ranks(lineage_hashD, [hashval2], lin2,
                                        match_rank)

    # calculate containment
    containmentD = calculate_containment_at_rank(lineage_hashD, sig1,
                                                 match_rank)

    # superkingdom lineage that should have 100% containment
    lin3 = lca_utils.make_lineage('a')
    assert containmentD["superkingdom"][0][1] == 1.0
    assert containmentD["class"][0][1] == 1.0
    phylum_containment = set(
        [containmentD["phylum"][0][1], containmentD["phylum"][1][1]])
    assert set([0.0, 1.0]) == phylum_containment
示例#8
0
def test_find_lca_3():
    lin1 = lca_utils.make_lineage('a;b;c')
    lin2 = lca_utils.make_lineage('a;b')

    tree = build_tree([lin1, lin2])
    lca, reason = find_lca(tree)
    assert lca == lin1                    # find most specific leaf node
示例#9
0
def test_pop_to_rank_1():
    # basic behavior - pop to order?
    lin1 = make_lineage('d__a;p__b;c__c;o__d')
    lin2 = make_lineage('d__a;p__b;c__c;o__d;f__f')

    print(lin1)
    print(pop_to_rank(lin2, 'order'))
    assert pop_to_rank(lin2, 'order') == lin1
示例#10
0
def test_contain_at_rank_3():
    # two minhashes, totally distinct ranks
    # first sig
    hashval1 = 12345678
    ident1 = 'first'
    mh1, sig1, lin1 = make_sig_and_lin([hashval1], ident1, 'a;b;c')

    # second sig
    hashval2 = 87654321
    ident2 = 'second'
    mh2, sig2, lin2 = make_sig_and_lin([hashval2], ident2, 'd;e;f')

    # create lca_db w sig1
    lca_db = LCA_Database(scaled=1, ksize=3)
    lca_db.insert(sig1, ident=ident1)
    lca_db.insert(sig2, ident=ident2)

    # next, make lin_db
    lin_db = LineageDB()
    lin_db.insert(ident1, lin1)
    lin_db.insert(ident2, lin2)

    # search with combined hashvals
    search_mh = make_mh([hashval1, hashval2])
    results, rank_results = search_containment_at_rank(search_mh, lca_db,
                                                       lin_db, "class")

    assert len(results) == 2
    assert set([results[0].lineage, results[1].lineage]) == set([lin1, lin2])
    assert set([results[0].similarity, results[1].similarity]) == set([0.5])
    assert set([results[0].name, results[1].name]) == set([ident1, ident2])

    superk_lin1 = lca_utils.make_lineage('a')
    superk_lin2 = lca_utils.make_lineage('d')
    phylum_lin1 = lca_utils.make_lineage('a;b')
    phylum_lin2 = lca_utils.make_lineage('d;e')
    assert len(rank_results) == 6

    assert set([rank_results[0].lineage,
                rank_results[1].lineage]) == set([superk_lin1, superk_lin2])
    assert set(
        [rank_results[0].contained_at_rank,
         rank_results[1].contained_at_rank]) == set([0.5])

    assert set([rank_results[2].lineage,
                rank_results[3].lineage]) == set([phylum_lin1, phylum_lin2])
    assert set(
        [rank_results[2].contained_at_rank,
         rank_results[3].contained_at_rank]) == set([0.5])

    assert set([rank_results[4].lineage,
                rank_results[5].lineage]) == set([lin1, lin2])
    assert set(
        [rank_results[4].contained_at_rank,
         rank_results[5].contained_at_rank]) == set([0.5])
示例#11
0
def test_is_lineage_match_3():
    # one lineage is empty
    lin1 = make_lineage('')
    lin2 = make_lineage('d__a;p__b;c__c;o__d;f__f')

    assert not is_lineage_match(lin1, lin2, 'superkingdom')
    assert not is_lineage_match(lin1, lin2, 'family')
    assert not is_lineage_match(lin1, lin2, 'order')
    assert not is_lineage_match(lin1, lin2, 'class')
    assert not is_lineage_match(lin1, lin2, 'phylum')
    assert not is_lineage_match(lin1, lin2, 'genus')
    assert not is_lineage_match(lin1, lin2, 'species')
示例#12
0
def test_is_lineage_match_2():
    # match at family, and above, levels; no genus or species to match
    lin1 = make_lineage('d__a;p__b;c__c;o__d;f__f')
    lin2 = make_lineage('d__a;p__b;c__c;o__d;f__f')

    assert is_lineage_match(lin1, lin2, 'superkingdom')
    assert is_lineage_match(lin1, lin2, 'phylum')
    assert is_lineage_match(lin1, lin2, 'class')
    assert is_lineage_match(lin1, lin2, 'order')
    assert is_lineage_match(lin1, lin2, 'family')
    assert not is_lineage_match(lin1, lin2, 'genus')
    assert not is_lineage_match(lin1, lin2, 'species')
示例#13
0
def test_is_lineage_match_1():
    # basic behavior: match at order and above, but not at family or below.
    lin1 = make_lineage('d__a;p__b;c__c;o__d;f__e')
    lin2 = make_lineage('d__a;p__b;c__c;o__d;f__f')

    assert is_lineage_match(lin1, lin2, 'superkingdom')
    assert is_lineage_match(lin1, lin2, 'phylum')
    assert is_lineage_match(lin1, lin2, 'class')
    assert is_lineage_match(lin1, lin2, 'order')
    assert not is_lineage_match(lin1, lin2, 'family')
    assert not is_lineage_match(lin1, lin2, 'genus')
    assert not is_lineage_match(lin1, lin2, 'species')
示例#14
0
def test_gather_assignments_2():
    # test basic mechanics of gather_assignments function with two lineages
    hashval = 12345678
    lin = lca_utils.make_lineage('a;b;c')
    lin2 = lca_utils.make_lineage('a;b;d')

    db = FakeLCA_Database()
    db._set_lineage_assignment(hashval, set([ lin, lin2 ]))

    assignments = lca_utils.gather_assignments([hashval], [db])
    print(assignments)

    assert assignments[hashval] == set([ lin, lin2 ])
示例#15
0
def test_get_lineage_at_match_rank():
    hashval = 12345678
    ident1 = 'first'
    mh1, sig1, lin1 = make_sig_and_lin([hashval], ident1, 'a;b;c')
    # make lin_db
    lin_db = LineageDB()
    lin_db.insert(ident1, lin1)
    superkingdom = lineage = lca_utils.make_lineage('a')
    phylum = lineage = lca_utils.make_lineage('a;b')

    assert get_lineage_at_match_rank(lin_db, sig1, "phylum") == phylum
    assert get_lineage_at_match_rank(lin_db, sig1,
                                     "superkingdom") == superkingdom
    assert get_lineage_at_match_rank(lin_db, sig1, "class") == lin1
示例#16
0
def test_add_hashes_at_ranks_1():
    lin1 = lca_utils.make_lineage('a')
    hashval1 = 12345678
    lineage_hashD = defaultdict(test_gen_mh)
    # manually add hashval1 to a
    lineage_hashD[lin1].add_many([hashval1])

    # test that hashval2 gets added to lin1
    lin2 = lca_utils.make_lineage('a;b')
    hashval2 = 87654321
    match_rank = "genus"
    lineage_hashD = add_hashes_at_ranks(lineage_hashD, [hashval2], lin2,
                                        match_rank)

    assert set(lineage_hashD[lin1].hashes) == set([hashval1, hashval2])
    assert set(lineage_hashD[lin2].hashes) == set([hashval2])
示例#17
0
def test_gather_guess_tax_at_rank_1():
    # one minhash, one set of ranks
    hashval = 12345678
    ident = 'uniq'
    mh1, sig1, lin1 = make_sig_and_lin([hashval], ident, 'a;b;c')

    lca_db = LCA_Database(scaled=1, ksize=3)
    lca_db.insert(sig1, ident=ident)

    lin_db = LineageDB()
    lin_db.insert(ident, lin1)

    num_hashes = 1
    phylum_match_lin = lca_utils.make_lineage('a;b')

    gather_results = list(gather_at_rank(mh1, lca_db, lin_db, "class"))
    phylum_results = gather_guess_tax_at_rank(gather_results,
                                              num_hashes,
                                              "phylum",
                                              minimum_matches=1)

    assert len(phylum_results) == 3

    assert phylum_results[0] == phylum_match_lin
    assert phylum_results[1] == 1.0
示例#18
0
def test_gather_guess_tax_at_each_rank_1():
    #two minhashes, fully shared ranks

    # first sig
    hashval = 12345678
    ident1 = 'first'
    mh1, sig1, lin1 = make_sig_and_lin([hashval], ident1, 'a;b;c')

    # second sig
    hashval2 = 87654321
    ident2 = 'second'
    mh2, sig2, lin2 = make_sig_and_lin([hashval2], ident2, 'a;b;c')

    # create lca_db w sigs
    lca_db = LCA_Database(scaled=1, ksize=3)
    lca_db.insert(sig1, ident=ident1)
    lca_db.insert(sig2, ident=ident2)

    # make lin_db
    lin_db = LineageDB()
    lin_db.insert(ident1, lin1)
    lin_db.insert(ident2, lin2)

    num_hashes = 2
    superk_lin = lca_utils.make_lineage('a')
    phylum_lin = lca_utils.make_lineage('a;b')

    # search with combined hashvals
    search_mh = make_mh([hashval, hashval2])
    gather_results = list(gather_at_rank(search_mh, lca_db, lin_db, "class"))
    rank_results=gather_guess_tax_at_each_rank(gather_results, num_hashes, minimum_matches=1, \
                                               lowest_rank="class",
                                               taxlist=lca_utils.taxlist(include_strain=False))

    assert len(rank_results) == 3

    assert rank_results[0] == RankSumGatherResult(lineage=superk_lin,
                                                  f_ident=1.0,
                                                  f_major=1.0)
    assert rank_results[1] == RankSumGatherResult(lineage=phylum_lin,
                                                  f_ident=1.0,
                                                  f_major=1.0)
    assert rank_results[2] == RankSumGatherResult(lineage=lin1,
                                                  f_ident=1.0,
                                                  f_major=1.0)
示例#19
0
def test_gather_guess_tax_at_each_rank_3():
    # two minhashes, totally distinct ranks
    # first sig
    hashval1 = 12345678
    ident1 = 'first'
    mh1, sig1, lin1 = make_sig_and_lin([hashval1], ident1, 'a;b;c')

    # second sig
    hashval2 = 87654321
    ident2 = 'second'
    mh2, sig2, lin2 = make_sig_and_lin([hashval2], ident2, 'd;e;f')

    # create lca_db w sig1
    lca_db = LCA_Database(scaled=1, ksize=3)
    lca_db.insert(sig1, ident=ident1)
    lca_db.insert(sig2, ident=ident2)

    # next, make lin_db
    lin_db = LineageDB()
    lin_db.insert(ident1, lin1)
    lin_db.insert(ident2, lin2)

    num_hashes = 2
    #winner seems to be def lineage.. will this remain true always?
    superk_lin = lca_utils.make_lineage('d')
    phylum_lin = lca_utils.make_lineage('d;e')

    # search with combined hashvals
    search_mh = make_mh([hashval1, hashval2])
    gather_results = list(gather_at_rank(search_mh, lca_db, lin_db, "class"))
    rank_results=gather_guess_tax_at_each_rank(gather_results, num_hashes, minimum_matches=1, \
                                               lowest_rank="class",
                                               taxlist=lca_utils.taxlist(include_strain=False))
    assert len(rank_results) == 3

    assert rank_results[0] == RankSumGatherResult(lineage=superk_lin,
                                                  f_ident=1.0,
                                                  f_major=0.5)
    assert rank_results[1] == RankSumGatherResult(lineage=phylum_lin,
                                                  f_ident=1.0,
                                                  f_major=0.5)
    assert rank_results[2] == RankSumGatherResult(lineage=lin2,
                                                  f_ident=1.0,
                                                  f_major=0.5)
示例#20
0
def test_count_lca_for_assignments_2():
    # test basic mechanics of gather_assignments function with two lineages
    hashval = 12345678
    lin = lca_utils.make_lineage('a;b;c')
    lin2 = lca_utils.make_lineage('a;b;d')

    db = FakeLCA_Database()
    db._set_lineage_assignment(hashval, set([ lin, lin2 ]))

    assignments = lca_utils.gather_assignments([hashval], [db])
    counts = count_lca_for_assignments(assignments)
    print(counts)

    assert counts[lin] == 0
    assert counts[lin2] == 0

    assert len(counts) == 1
    lca_lin = lca_utils.make_lineage('a;b')
    assert counts[lca_lin] == 1
示例#21
0
def test_add_hashes_at_ranks_4():
    lin1 = lca_utils.make_lineage('a;b')
    lin2 = lca_utils.make_lineage('a')
    hashval1 = 12345678
    lineage_hashD = defaultdict(test_gen_mh)
    # manually add hashval1 to a;, a;b
    lineage_hashD[lin1].add_many([hashval1])
    lineage_hashD[lin2].add_many([hashval1])

    # test that hashval2 is added appropriately
    lin3 = lca_utils.make_lineage('a;d')
    hashval2 = 87654321
    match_rank = "genus"
    lineage_hashD = add_hashes_at_ranks(lineage_hashD, [hashval2], lin3,
                                        match_rank)

    assert set(lineage_hashD[lin1].hashes) == set([hashval1])
    assert set(lineage_hashD[lin2].hashes) == set([hashval1, hashval2])
    assert set(lineage_hashD[lin3].hashes) == set([hashval2])
示例#22
0
def test_contain_at_rank_4():
    # two minhashes, share ranks at phylum level
    hashval = 12345678
    ident1 = 'first'
    mh1, sig1, lin1 = make_sig_and_lin([hashval], ident1, 'a;b;c')
    hashval2 = 87654321
    ident2 = 'second'
    mh2, sig2, lin2 = make_sig_and_lin([hashval2], ident2, 'a;b;f')
    # create lca_db w sigs
    lca_db = LCA_Database(scaled=1, ksize=3)
    lca_db.insert(sig1, ident=ident1)
    lca_db.insert(sig2, ident=ident2)
    # make lin_db
    lin_db = LineageDB()
    lin_db.insert(ident1, lin1)
    lin_db.insert(ident2, lin2)

    # search with combined hashvals
    search_mh = make_mh([hashval, hashval2])
    results, rank_results = search_containment_at_rank(search_mh, lca_db,
                                                       lin_db, "class")

    assert len(results) == 2
    assert set([results[0].lineage, results[1].lineage]) == set([lin1, lin2])
    assert set([results[0].similarity, results[1].similarity]) == set([0.5])
    assert set([results[0].name, results[1].name]) == set([ident1, ident2])

    superk_lin = lca_utils.make_lineage('a')
    phylum_match_lin = lca_utils.make_lineage('a;b')

    assert len(rank_results) == 4
    # superk and phylum aggregate
    assert rank_results[0].lineage == superk_lin
    assert rank_results[0].contained_at_rank == 1.0
    assert rank_results[1].lineage == phylum_match_lin
    assert rank_results[1].contained_at_rank == 1.0
    # different results at class
    assert set([rank_results[2].lineage,
                rank_results[3].lineage]) == set([lin1, lin2])
    assert set(
        [rank_results[2].contained_at_rank,
         rank_results[3].contained_at_rank]) == set([0.5])
示例#23
0
def test_count_lca_for_assignments_abund_5():
    # test basic mechanics of gather_assignments function with two lineages
    # and two hashvals when linages match but one has lower taxo detail
    hashval = 12345678
    hashval2 = 87654321
    hashval_counts = dict()
    hashval_counts[hashval] = 2
    hashval_counts[hashval2] = 5

    lin = lca_utils.make_lineage('a;b;d')
    lin2 = lca_utils.make_lineage('a;b;d;e')

    db = FakeLCA_Database()
    db._set_lineage_assignment(hashval, set([ lin, lin2 ]))
    db._set_lineage_assignment(hashval2, set([ lin ]))

    assignments = lca_utils.gather_assignments(hashval_counts, [db])
    counts = count_lca_for_assignments(assignments, hashval_counts)
    print(counts)

    assert len(counts) == 2
    assert counts[lin] == 5               # makes sense
    assert counts[lin2] == 2              # lin+lin2 yield just lin2
示例#24
0
def test_count_lca_for_assignments_abund_1():
    # test basic mechanics of gather_assignments function
    hashval = 12345678
    hashval_counts = dict()
    hashval_counts[hashval] = 3

    lin = lca_utils.make_lineage('a;b;c')

    db = FakeLCA_Database()
    db._set_lineage_assignment(hashval, set([ lin ]))

    assignments = lca_utils.gather_assignments(hashval_counts.keys(), [db])
    counts = count_lca_for_assignments(assignments, hashval_counts)
    print(counts)

    assert len(counts) == 1
    assert counts[lin] == 3
示例#25
0
def make_sig_and_lin(hashvals, ident, lin, ksize=3, scaled=1):
    mh = make_mh(hashvals)
    sig = sourmash.SourmashSignature(mh, name=ident)
    lineage = lca_utils.make_lineage(lin)
    return mh, sig, lineage
示例#26
0
def test_pop_to_rank_2():
    # what if we're already above rank?
    lin2 = make_lineage('d__a;p__b;c__c;o__d;f__f')

    print(pop_to_rank(lin2, 'species'))
    assert pop_to_rank(lin2, 'species') == lin2