Exemplo n.º 1
0
def test_split_byte_murmur():
    # check the byte is correctly split when using murmur hash
    sct = SmallCounttable(4, 4, 1)

    # these kmers were carefully chosen to have hash values that produce
    # consecutive indices in the count table.
    a = "AAAC"
    b = "AAAG"

    assert sct.get_kmer_hashes(a) == [11898086063751343884]
    assert sct.get_kmer_hashes(b) == [10548630838975263317]

    sct.add(a)

    assert sct.get(a) == 1
    assert sct.get(b) == 0
Exemplo n.º 2
0
def test_save_load(Tabletype):
    kh = Tabletype(5)
    ttype = type(kh)
    savefile = utils.get_temp_filename('tablesave.out')

    # test add(dna)
    x = kh.add("ATGGC")
    z = kh.get("ATGGC")
    assert z == 1

    kh.save(savefile)

    # should we provide a single load function here? yes, probably. @CTB
    if ttype == _Countgraph:
        loaded = khmer.load_countgraph(savefile)
    elif ttype == Counttable:
        loaded = Counttable.load(savefile)
    elif ttype == _SmallCountgraph:
        loaded = khmer.load_countgraph(savefile, small=True)
    elif ttype == SmallCounttable:
        loaded = SmallCounttable.load(savefile)
    elif ttype == _Nodegraph:
        loaded = khmer.load_nodegraph(savefile)
    elif ttype == Nodetable:
        loaded = Nodetable.load(savefile)
    else:
        raise Exception("unknown tabletype")

    z = loaded.get('ATGGC')
    assert z == 1
Exemplo n.º 3
0
def test_read_write():
    rng = random.Random(1)

    sct = SmallCounttable(20, 1e2, 4)

    kmers = ["".join(rng.choice("ACGT") for _ in range(20))
             for n in range(400)]
    for kmer in kmers:
        sct.add(kmer)

    fname = utils.get_temp_filename('zzz')

    sct.save(fname)

    # on purpose choose parameters that are different from sct
    sct2 = SmallCounttable.load(fname)
    assert sct.ksize() == sct2.ksize()
    for kmer in kmers:
        assert sct.get(kmer) == sct2.get(kmer)
Exemplo n.º 4
0
def test_overflow():
    # check that we do not overflow into other part of the byte
    sct = SmallCounttable(4, 1e6, 4)
    a = "AAAA"
    b = "AAAT"

    # try to overflow our 4bit counter
    for n in range(17):
        sct.add(a)

    assert sct.get(a) == 15
    assert sct.get(b) == 0

    # repeat with the other kmer that hashes to the other half of the byte
    sct = SmallCounttable(4, 1e6, 4)
    a = "AAAA"
    b = "AAAT"

    # try to overflow our 4bit counter
    for n in range(17):
        sct.add(b)

    assert sct.get(b) == 15
    assert sct.get(a) == 0
Exemplo n.º 5
0
def test_random_kmers():
    # check for out-of-bounds errors and similar with random kmers
    rng = random.Random(1)

    sct = SmallCounttable(20, 1e2, 4)

    kmers = ["".join(rng.choice("ACGT") for _ in range(20))
             for n in range(400)]
    for kmer in kmers:
        sct.add(kmer)

    for kmer in kmers:
        sct.get(kmer)
Exemplo n.º 6
0
def test_split_byte_murmur():
    # check the byte is correctly split when using murmur hash
    sct = SmallCounttable(4, 4, 1)

    # these kmers were carefully chosen to have hash values that produce
    # consecutive indices in the count table.
    a = "AAAC"
    b = "AAAG"

    assert sct.get_kmer_hashes(a) == [11898086063751343884]
    assert sct.get_kmer_hashes(b) == [10548630838975263317]

    sct.add(a)

    assert sct.get(a) == 1
    assert sct.get(b) == 0
Exemplo n.º 7
0
def test_overflow():
    # check that we do not overflow into other part of the byte
    sct = SmallCounttable(4, 1e6, 4)
    a = "AAAA"
    b = "AAAT"

    # try to overflow our 4bit counter
    for n in range(17):
        sct.add(a)

    assert sct.get(a) == 15
    assert sct.get(b) == 0

    # repeat with the other kmer that hashes to the other half of the byte
    sct = SmallCounttable(4, 1e6, 4)
    a = "AAAA"
    b = "AAAT"

    # try to overflow our 4bit counter
    for n in range(17):
        sct.add(b)

    assert sct.get(b) == 15
    assert sct.get(a) == 0
Exemplo n.º 8
0
def test_single_add():
    sct = SmallCounttable(4, 1e6, 4)
    sct.add("AAAA")
    assert sct.get("AAAA") == 1
Exemplo n.º 9
0
def test_single_add():
    sct = SmallCounttable(4, 1e6, 4)
    sct.add("AAAA")
    assert sct.get("AAAA") == 1