示例#1
0
def test_count_lca_for_assignments_abund_3():
    # test basic mechanics of gather_assignments function with two lineages
    # and two hashvals
    hashval = 12345678
    hashval2 = 87654321
    hashval_counts = dict()
    hashval_counts[hashval] = 2
    hashval_counts[hashval2] = 5

    lin = lca_utils.make_lineage('a;b;c')
    lin2 = lca_utils.make_lineage('a;b;d')

    db = FakeLCA_Database()
    db._set_lineage_assignment(hashval, set([ lin, lin2 ]))
    db._set_lineage_assignment(hashval2, set([ lin ]))

    assignments = lca_utils.gather_assignments(hashval_counts, [db])
    counts = count_lca_for_assignments(assignments, hashval_counts)
    print(counts)

    assert len(counts) == 2
    assert counts[lin] == 5               # makes sense
    assert counts[lin2] == 0              # makes sense

    lca_lin = lca_utils.make_lineage('a;b')
    assert counts[lca_lin] == 2           # yes!
示例#2
0
def test_count_lca_for_assignments_abund_4():
    # test basic mechanics of gather_assignments function with three lineages
    # and three hashvals
    hashval = 12345678
    hashval2 = 87654321
    hashval3 = 34567891
    hashval_counts = dict()
    hashval_counts[hashval] = 2
    hashval_counts[hashval2] = 5
    hashval_counts[hashval3] = 3

    lin = lca_utils.make_lineage('a;b;c')
    lin2 = lca_utils.make_lineage('a;b;d')
    lin3 = lca_utils.make_lineage('a;b;d;e')

    db = FakeLCA_Database()
    db._set_lineage_assignment(hashval, set([ lin, lin2 ])) # lca: a;b
    db._set_lineage_assignment(hashval2, set([ lin ])) # lca: a;b;c
    db._set_lineage_assignment(hashval3, set([ lin2, lin3 ])) # a;b;d;e

    assignments = lca_utils.gather_assignments(hashval_counts, [db])
    counts = count_lca_for_assignments(assignments, hashval_counts)
    print(counts)

    assert len(counts) == 3
    assert counts[lin] == 5               # makes sense b/c hashval2
    assert counts[lin2] == 0              # a;b;d (lin2) + a;b;d;e (lin3) -->a;b;d;e (lin3) only
    assert counts[lin3] == 3              # hashval3

    lca_lin = lca_utils.make_lineage('a;b')
    assert counts[lca_lin] == 2           # yes, b/c hashval
示例#3
0
def summarize(hashvals, dblist, threshold):
    """
    Classify 'hashvals' using the given list of databases.

    Insist on at least 'threshold' counts of a given lineage before taking
    it seriously.

    Return (lineage, counts) where 'lineage' is a tuple of LineagePairs.
    """

    # gather assignments from across all the databases
    assignments = lca_utils.gather_assignments(hashvals, dblist)

    # now convert to trees -> do LCA & counts
    counts = lca_utils.count_lca_for_assignments(assignments)

    # ok, we now have the LCAs for each hashval, and their number
    # of counts. Now aggregate counts across the tree, going up from
    # the leaves.
    aggregated_counts = defaultdict(int)
    for lca, count in counts.most_common():
        if count < threshold:
            break

        if not lca:
            aggregated_counts[lca] += count

        # climb from the lca to the root.
        aggregated_counts[lca] += count

    return aggregated_counts
示例#4
0
def test_gather_assignments_1():
    # test basic mechanics of gather_assignments function
    hashval = 12345678
    lin = lca_utils.make_lineage('a;b;c')

    db = FakeLCA_Database()
    db._set_lineage_assignment(hashval, set([ lin ]))

    assignments = lca_utils.gather_assignments([hashval], [db])
    print(assignments)

    assert assignments[hashval] == set([ lin ])
示例#5
0
def test_count_lca_for_assignments_abund_1():
    # test basic mechanics of gather_assignments function
    hashval = 12345678
    hashval_counts = dict()
    hashval_counts[hashval] = 3

    lin = lca_utils.make_lineage('a;b;c')

    db = FakeLCA_Database()
    db._set_lineage_assignment(hashval, set([ lin ]))

    assignments = lca_utils.gather_assignments(hashval_counts.keys(), [db])
    counts = count_lca_for_assignments(assignments, hashval_counts)
    print(counts)

    assert len(counts) == 1
    assert counts[lin] == 3
示例#6
0
def test_gather_assignments_3():
    # test basic mechanics of gather_assignments function with two lineages
    # and two hashvals
    hashval = 12345678
    hashval2 = 87654321
    lin = lca_utils.make_lineage('a;b;c')
    lin2 = lca_utils.make_lineage('a;b;d')

    db = FakeLCA_Database()
    db._set_lineage_assignment(hashval, set([ lin, lin2 ]))
    db._set_lineage_assignment(hashval2, set([ lin ]))

    assignments = lca_utils.gather_assignments([hashval, hashval2], [db])
    print(assignments)

    assert assignments[hashval] == set([ lin, lin2 ])
    assert assignments[hashval2] == set([ lin ])
示例#7
0
def test_count_lca_for_assignments_2():
    # test basic mechanics of gather_assignments function with two lineages
    hashval = 12345678
    lin = lca_utils.make_lineage('a;b;c')
    lin2 = lca_utils.make_lineage('a;b;d')

    db = FakeLCA_Database()
    db._set_lineage_assignment(hashval, set([ lin, lin2 ]))

    assignments = lca_utils.gather_assignments([hashval], [db])
    counts = count_lca_for_assignments(assignments)
    print(counts)

    assert counts[lin] == 0
    assert counts[lin2] == 0

    assert len(counts) == 1
    lca_lin = lca_utils.make_lineage('a;b')
    assert counts[lca_lin] == 1
示例#8
0
def test_count_lca_for_assignments_abund_5():
    # test basic mechanics of gather_assignments function with two lineages
    # and two hashvals when linages match but one has lower taxo detail
    hashval = 12345678
    hashval2 = 87654321
    hashval_counts = dict()
    hashval_counts[hashval] = 2
    hashval_counts[hashval2] = 5

    lin = lca_utils.make_lineage('a;b;d')
    lin2 = lca_utils.make_lineage('a;b;d;e')

    db = FakeLCA_Database()
    db._set_lineage_assignment(hashval, set([ lin, lin2 ]))
    db._set_lineage_assignment(hashval2, set([ lin ]))

    assignments = lca_utils.gather_assignments(hashval_counts, [db])
    counts = count_lca_for_assignments(assignments, hashval_counts)
    print(counts)

    assert len(counts) == 2
    assert counts[lin] == 5               # makes sense
    assert counts[lin2] == 2              # lin+lin2 yield just lin2