Exemplo n.º 1
0
def test_database_insert():
    A = Alphabet('ACGT')
    S = A.parse('AACT', name='foo')
    db = DB(':memory:', A)
    db.initialize()
    attrs = {'key': 'value'}
    rec = db.insert(S, source_file='source.fa', source_pos=10, attrs=attrs)
    assert isinstance(rec.id, int)
    assert rec.content_id == S.content_id
    assert rec.source_pos == 10
    assert rec.source_file == 'source.fa'
    assert 'key' in rec.attrs and rec.attrs['key'] == 'value', \
        'attributes must be populated correctly'
    with db.connection() as conn:
        cursor = conn.cursor()
        cursor.execute('SELECT content_id FROM sequence WHERE id = ?',
                       (rec.id,))
        # NOTE for some reason if we just say next(cursor) ==  ...
        # the cursor remains open after the context is over (which should
        # not happen as per docs). This leads to BusyError further down.
        assert cursor.fetchall() == [(S.content_id,)], \
            'content identifier is properly populated'

    # add a second sequence
    T = A.parse('GCTG', name='bar')
    new_rec = db.insert(T)
    assert new_rec.id != rec.id, 'new ids are assigned to new sequences'
    with db.connection() as conn:
        cursor = conn.cursor()
        cursor.execute('SELECT content_id FROM sequence WHERE id = ?',
                       (new_rec.id,))
        assert next(cursor) == (T.content_id,), \
            'correct id must be populated'
Exemplo n.º 2
0
def sequencing_sample(request):
    """Creates a random sequence, generates reads, with parameterized mutation
    probabilities, of equal length starting at whole multiples of half of read
    length. It is expected that successive reads have an overlap starting at
    their halfway position.

    Returns:
        tuple:
            A tuple containing the full genome, a list of reads, the gap
            probability and the seed index.
    """
    A = Alphabet('ACGT')
    gap_prob, subst_prob, wordlen = request.param
    seq_len, read_len = 2000, 500
    seq = rand_seq(A, seq_len).to_named('genome')
    mutation_process = MutationProcess(A, subst_probs=subst_prob,
                                       go_prob=gap_prob, ge_prob=gap_prob)
    reads = []
    for i in range(0, seq_len - read_len, int(read_len/2)):
        read, _ = mutation_process.mutate(seq[i: i + read_len])
        reads += [read.to_named('read#%d' % i)]

    db = DB(':memory:', A)
    kmer_index = KmerIndex(db, wordlen)
    seed_index = SeedIndex(kmer_index)
    seed_index.db.initialize()
    records = [db.insert(r) for r in reads]
    return seq, reads, records, gap_prob, seed_index
Exemplo n.º 3
0
def test_database_overwrite():
    A = Alphabet('ACGT')
    S = A.parse('AACT', name='foo')
    db = DB(':memory:', A)
    db.initialize()
    db.insert(S, source_file='old_source.fa')
    db.insert(S, source_file='new_source.fa')
    with db.connection() as conn:
        cursor = conn.cursor()
        cursor.execute(
            'SELECT source_file FROM sequence WHERE content_id = ?',
            (S.content_id,)
        )
        res = [x[0] for x in cursor]
        assert len(res) == 1 and res[0] == 'old_source.fa', \
            'Sequences with observed content id should be ignored'
Exemplo n.º 4
0
def test_database_find():
    A = Alphabet('ACGT')
    S = A.parse('AACT', name='foo')
    T = A.parse('GGCT', name='bar')
    db = DB(':memory:', A)
    db.initialize()
    db.insert(S)
    db.insert(T)

    sql_condition = "attrs LIKE '%s'" % '%"name": "bar"%'
    found = [rec for rec in db.find(sql_condition=sql_condition)]
    assert len(found) == 1 and found[0].content_id == T.content_id, \
        'find() should work with sql_condition'

    def condition(rec): return rec.attrs['name'] == 'foo'

    found = [rec for rec in db.find(condition=condition)]
    assert len(found) == 1 and found[0].content_id == S.content_id, \
        'find() should work with callable condition'
Exemplo n.º 5
0
def test_database_events():
    A = Alphabet('ACGT')
    S = A.parse('AACT', name='S')

    # NOTE python 2 does not support non-local, non-global variables, put it in
    # the function object.
    test_database_events.callback_called = 0

    def callback(self, *args):
        test_database_events.callback_called += 1

    db = DB(':memory:', A)
    db.add_event_listener('db-initialized', callback)
    db.add_event_listener('sequence-inserted', callback)
    db.initialize()
    assert test_database_events.callback_called == 1, \
        'event callbacks for "initialize" should be executed'

    db.insert(S)
    assert test_database_events.callback_called == 2, \
        'event callbacks for "insert-sequence" should be executed'