Пример #1
0
def test_count_first_pass():
    mask = Nodetable.load(data_file('minitrio/mask.nt'))
    counts = Counttable(31, 1e4, 4)
    seqfile = data_file('minitrio/trio-proband.fq.gz')
    count_first_pass([seqfile], counts, mask)
    with NamedTemporaryFile(suffix='.ct') as countfile:
        counts.save(countfile.name)
        testcountfile = data_file('minitrio/trio-proband-mask-counts.ct')
        assert filecmp.cmp(testcountfile, countfile.name) is True
Пример #2
0
def get_unique_seeds(recordstream, seedsize):
    """Grab all unique seeds from the specified sequence file."""
    ct = Counttable(seedsize, 1, 1)
    kmers = set()
    for record in recordstream:
        for kmer in ct.get_kmers(record.sequence):
            minkmer = kevlar.revcommin(kmer)
            if minkmer not in kmers:
                kmers.add(minkmer)
                yield kmer
Пример #3
0
def test_save_load(Tabletype):
    kh = Tabletype(5)
    ttype = type(kh)
    savefile = utils.get_temp_filename('tablesave.out')

    # test add(dna)
    x = kh.add("ATGGC")
    z = kh.get("ATGGC")
    assert z == 1

    kh.save(savefile)

    # should we provide a single load function here? yes, probably. @CTB
    if ttype == _Countgraph:
        loaded = khmer.load_countgraph(savefile)
    elif ttype == Counttable:
        loaded = Counttable.load(savefile)
    elif ttype == _SmallCountgraph:
        loaded = khmer.load_countgraph(savefile, small=True)
    elif ttype == SmallCounttable:
        loaded = SmallCounttable.load(savefile)
    elif ttype == _Nodegraph:
        loaded = khmer.load_nodegraph(savefile)
    elif ttype == Nodetable:
        loaded = Nodetable.load(savefile)
    else:
        raise Exception("unknown tabletype")

    z = loaded.get('ATGGC')
    assert z == 1
Пример #4
0
def test_count_second_pass():
    mask = Nodetable.load(data_file('minitrio/mask.nt'))
    counts = Counttable.load(data_file('minitrio/trio-proband-mask-counts.ct'))
    seqfile = data_file('minitrio/trio-proband.fq.gz')
    abund = count_second_pass([seqfile], counts)
    assert abund == {10: 6, 11: 10, 12: 12, 13: 18, 14: 16, 15: 11, 16: 9,
                     17: 9, 18: 11, 19: 8, 20: 9, 21: 7, 22: 3}
Пример #5
0
def test_assumptions(kmer):
    ct = Counttable(27, 1e5, 2)
    kmer_rc = kevlar.revcom(kmer)
    assert ct.hash(kmer) == ct.hash(kmer_rc)
    assert ct.get_kmer_hashes(kmer)[0] == ct.get_kmer_hashes(kmer_rc)[0]