Exemplo n.º 1
0
def test_lossless_reads():
    A = Alphabet('ACGT')
    S = rand_seq(A, 100)
    with pytest.raises(AssertionError):
        next(rand_read(S, len_mean=200, num=1))  # len_mean must be < len(S)
    with pytest.raises(AssertionError):
        # at most one of num or expected_coverage given
        next(rand_read(S, len_mean=50, num=1, expected_coverage=1))

    assert sum(1 for _ in rand_read(S, len_mean=50, num=10)) == 10, \
        'The number of sampled reads should be controllable'
    assert sum(1 for _ in rand_read(S, len_mean=50)) == 1, \
        'If neither num or expected coverage is given only one sample is read'

    # there should be no noise added
    read, pos = next(rand_read(S, len_mean=40, num=1))
    assert S[pos:pos+len(read)] == read

    S = A.parse('ACT' * 100)
    reads = [x for x in rand_read(S, len_mean=100, len_sd=0.01, num=100)]
    assert set(len(read) for read, _ in reads) > 1, \
        'Read lengths should be randomly chosen'
    len_mean = sum(len(read) for read, _ in reads) / 100.
    assert len_mean > 50 and len_mean < 150, \
        'Normal distribution of read lengths works'

    # index edge cases
    A = Alphabet(['00', '01'])
    S = A.parse('01' * 10)
    _bak = np.random.normal
    np.random.normal = mock.Mock(return_value=[1])
    assert next(rand_read(S, len_mean=1, num=1))[0] == A.parse('01'), \
        'sequences in alphabets with > 1 long letters can be sampled too'
    np.random.normal = _bak
Exemplo n.º 2
0
def test_expected_coverage():
    A = Alphabet('ACGT')
    S = rand_seq(A, 100)
    cov = 10
    reads = [r for r in rand_read(S, len_mean=len(S)/2, expected_coverage=cov)]
    assert len(reads) == 2 * cov