Пример #1
0
def test_n_labels():
    lh = LabelHash(20, 1e7, 4)
    filename = utils.get_test_data('test-labels.fa')
    lh.consume_fasta_and_tag_with_labels(filename)

    print lh.n_labels()
    assert lh.n_labels() == 4
Пример #2
0
def test_n_labels():
    lh = LabelHash(20, 1e7, 4)
    filename = utils.get_test_data('test-labels.fa')
    lh.consume_fasta_and_tag_with_labels(filename)

    print lh.n_labels()
    assert lh.n_labels() == 4
Пример #3
0
def test_consume_fasta_and_tag_with_labels():
    lb = LabelHash(20, 1e7, 4)
    read_1 = 'ACGTAACCGGTTAAACCCGGGTTTAAAACCCCGGGGTTTT'
    filename = utils.get_test_data('test-transcript.fa')

    total_reads, n_consumed = lb.consume_fasta_and_tag_with_labels(filename)
    print "doing get"
    assert lb.get(read_1[:20])
    assert total_reads == 3
    print "doing n_labels"
    print lb.n_labels()
    print "doing label dict"
    print lb.get_label_dict()
    print "get tagset"
    for tag in lb.get_tagset():
        print "forward hash"
        print tag, khmer.forward_hash(tag, 20)
    for record in screed.open(filename):
        print "Sweeping tags"
        print lb.sweep_tag_neighborhood(record.sequence, 40)
        print "Sweeping labels..."
        print lb.sweep_label_neighborhood(record.sequence, 40)
    assert lb.n_labels() == 3
Пример #4
0
def test_consume_fasta_and_tag_with_labels():
    lb = LabelHash(20, 1e7, 4)
    read_1 = 'ACGTAACCGGTTAAACCCGGGTTTAAAACCCCGGGGTTTT'
    filename = utils.get_test_data('test-transcript.fa')

    total_reads, n_consumed = lb.consume_fasta_and_tag_with_labels(filename)
    print "doing get"
    assert lb.get(read_1[:20])
    assert total_reads == 3
    print "doing n_labels"
    print lb.n_labels()
    print "doing label dict"
    print lb.get_label_dict()
    print "get tagset"
    for tag in lb.get_tagset():
        print "forward hash"
        print tag, khmer.forward_hash(tag, 20)
    for record in screed.open(filename):
        print "Sweeping tags"
        print lb.sweep_tag_neighborhood(record.sequence, 40)
        print "Sweeping labels..."
        print lb.sweep_label_neighborhood(record.sequence, 40)
    assert lb.n_labels() == 3
Пример #5
0
def test_consume_partitioned_fasta_and_tag_with_labels():
    lb = LabelHash(20, 1e7, 4)
    filename = utils.get_test_data('real-partition-small.fa')

    total_reads, n_consumed = lb.consume_partitioned_fasta_and_tag_with_labels(
        filename)
    labels = set()
    for record in screed.open(filename):
        seq = record.sequence
        labels.update(lb.sweep_label_neighborhood(seq, 0, False, False))
    # print lb.n_labels()
    # print labels
    assert len(labels) == 1
    assert labels.pop() == 2
    assert lb.n_labels() == 1
Пример #6
0
def test_consume_partitioned_fasta_and_tag_with_labels():
    lb = LabelHash(20, 1e7, 4)
    filename = utils.get_test_data('real-partition-small.fa')

    total_reads, n_consumed = lb.consume_partitioned_fasta_and_tag_with_labels(
        filename)
    labels = set()
    for record in screed.open(filename):
        seq = record.sequence
        labels.update(lb.sweep_label_neighborhood(seq, 0, False, False))
    # print lb.n_labels()
    # print labels
    assert len(labels) == 1
    assert labels.pop() == 2L
    assert lb.n_labels() == 1