Exemplo n.º 1
0
def mutate_genome(infile, mutations):
    parser = khmer.ReadParser(infile)
    for record in parser:
        sequence = record.sequence
        if record.name in mutations:
            mutlist = sorted(mutations[record.name],
                             key=lambda m: m.pos,
                             reverse=True)
            sequence = mutate_sequence(sequence, mutlist)
        yield khmer.Read(name=record.name, sequence=sequence)
Exemplo n.º 2
0
def test_BrokenPairedReader_lowercase_khmer_Read():
    # use khmer.Read objects which should automatically have a `cleaned_seq`
    # attribute
    stream = [khmer.Read(name='seq1/1', sequence='acgtn'),
              khmer.Read(name='seq1/2', sequence='AcGtN'),
              khmer.Read(name='seq1/2', sequence='aCgTn')]

    results = []
    for num, is_pair, read1, read2 in broken_paired_reader(stream):
        results.append((read1, read2))

    a, b = results[0]
    assert a.sequence == 'acgtn'
    assert a.cleaned_seq == 'ACGTA'
    assert b.sequence == 'AcGtN'
    assert b.cleaned_seq == 'ACGTA'

    c, d = results[1]
    assert c.sequence == 'aCgTn'
    assert c.cleaned_seq == 'ACGTA'
    assert d is None
Exemplo n.º 3
0
def localize(contigstream, refrfile, ksize=31, delta=25):
    """
    Wrap the `kevlar localize` task as a generator.

    Input is an iterable containing contigs (assembled by `kevlar assemble`)
    stored as khmer or screed sequence records, the filename of the reference
    genome sequence, and the desired k-size.
    """
    seedmatches = KmerMatchSet(ksize)
    for seqid, pos in get_exact_matches(contigstream, refrfile, ksize):
        seedmatches.add(seqid, pos)
    if len(seedmatches) == 0:
        raise KevlarNoReferenceMatchesError()
    refrstream = kevlar.open(refrfile, 'r')
    for subseqid, subseq in extract_regions(refrstream,
                                            seedmatches,
                                            delta=delta):
        yield khmer.Read(name=subseqid, sequence=subseq)
Exemplo n.º 4
0
def test_clean_input_reads():
    # all Read attributes are read only
    stream = [khmer.Read(name='seq1/1', sequence='ACGT')]
    with pytest.raises(AttributeError):
        next(clean_input_reads(stream))