Exemplo n.º 1
0
def test_check_is_pair_4b():
    read1 = Sequence(name='seq/1', sequence='AAA')
    read2 = Sequence(name='seq/2', quality='###', sequence='AAA')

    try:
        check_is_pair(read1, read2)
        assert False                    # check_is_pair should fail here.
    except ValueError:
        pass
Exemplo n.º 2
0
def test_check_is_pair_4b():
    read1 = Sequence(name='seq/1', sequence='AAA')
    read2 = Sequence(name='seq/2', quality='###', sequence='AAA')

    try:
        check_is_pair(read1, read2)
        assert False  # check_is_pair should fail here.
    except ValueError:
        pass
Exemplo n.º 3
0
def test_check_is_pair_7():
    read1 = Sequence(name='seq/2', sequence='AAA')
    read2 = Sequence(name='seq/1', sequence='AAA')

    assert not check_is_pair(read1, read2)
Exemplo n.º 4
0
def test_check_is_pair_3_fa():
    read1 = Sequence(name='seq 1::', sequence='AAA')
    read2 = Sequence(name='seq 2::', sequence='AAA')

    assert check_is_pair(read1, read2)
Exemplo n.º 5
0
def test_check_is_pair_3_broken_fq_2():
    read1 = Sequence(name='seq 1::', quality='###', sequence='AAA')
    read2 = Sequence(name='seq', quality='###', sequence='AAA')

    assert not check_is_pair(read1, read2)
Exemplo n.º 6
0
def test_check_is_pair_2():
    read1 = Sequence(name='seq/1', quality='###', sequence='AAA')
    read2 = Sequence(name='seq/2', quality='###', sequence='AAA')

    assert check_is_pair(read1, read2)
Exemplo n.º 7
0
def broken_paired_reader(screed_iter,
                         min_length=None,
                         force_single=False,
                         require_paired=False):
    """Read pairs from a stream.

    A generator that yields singletons and pairs from a stream of FASTA/FASTQ
    records (yielded by 'screed_iter').  Yields (n, is_pair, r1, r2) where
    'r2' is None if is_pair is False.

    The input stream can be fully single-ended reads, interleaved paired-end
    reads, or paired-end reads with orphans, a.k.a. "broken paired".

    Usage::

       for n, is_pair, read1, read2 in broken_paired_reader(...):
          ...

    Note that 'n' behaves like enumerate() and starts at 0, but tracks
    the number of records read from the input stream, so is
    incremented by 2 for a pair of reads.

    If 'min_length' is set, all reads under this length are ignored (even
    if they are pairs).

    If 'force_single' is True, all reads are returned as singletons.
    """
    record = None
    prev_record = None
    num = 0

    if force_single and require_paired:
        raise ValueError("force_single and require_paired cannot both be set!")

    # handle the majority of the stream.
    for record in screed_iter:
        if prev_record:
            if check_is_pair(prev_record, record) and not force_single:
                if min_length and (len(prev_record.sequence) < min_length
                                   or len(record.sequence) < min_length):
                    if require_paired:
                        record = None
                else:
                    yield num, True, prev_record, record  # it's a pair!
                    num += 2
                    record = None
            else:  # orphan.
                if require_paired:
                    err = UnpairedReadsError(
                        "Unpaired reads when require_paired is set!",
                        prev_record, record)
                    raise err

                # ignore short reads
                if min_length and len(prev_record.sequence) < min_length:
                    pass
                else:
                    yield num, False, prev_record, None
                    num += 1

        prev_record = record
        record = None

    # handle the last record, if it exists (i.e. last two records not a pair)
    if prev_record:
        if require_paired:
            raise UnpairedReadsError(
                "Unpaired reads when require_paired "
                "is set!", prev_record, None)
        if min_length and len(prev_record.sequence) < min_length:
            pass
        else:
            yield num, False, prev_record, None
Exemplo n.º 8
0
def test_check_is_pair_7():
    read1 = Sequence(name='seq/2', sequence='AAA')
    read2 = Sequence(name='seq/1', sequence='AAA')

    assert not check_is_pair(read1, read2)
Exemplo n.º 9
0
def test_check_is_pair_3_fa():
    read1 = Sequence(name='seq 1::', sequence='AAA')
    read2 = Sequence(name='seq 2::', sequence='AAA')

    assert check_is_pair(read1, read2)
Exemplo n.º 10
0
def test_check_is_pair_3_broken_fq_2():
    read1 = Sequence(name='seq 1::', quality='###', sequence='AAA')
    read2 = Sequence(name='seq', quality='###', sequence='AAA')

    assert not check_is_pair(read1, read2)
Exemplo n.º 11
0
def test_check_is_pair_2():
    read1 = Sequence(name='seq/1', quality='###', sequence='AAA')
    read2 = Sequence(name='seq/2', quality='###', sequence='AAA')

    assert check_is_pair(read1, read2)
Exemplo n.º 12
0
def broken_paired_reader(screed_iter, min_length=None,
                         force_single=False, require_paired=False):
    """Read pairs from a stream.

    A generator that yields singletons and pairs from a stream of FASTA/FASTQ
    records (yielded by 'screed_iter').  Yields (n, is_pair, r1, r2) where
    'r2' is None if is_pair is False.

    The input stream can be fully single-ended reads, interleaved paired-end
    reads, or paired-end reads with orphans, a.k.a. "broken paired".

    Usage::

       for n, is_pair, read1, read2 in broken_paired_reader(...):
          ...

    Note that 'n' behaves like enumerate() and starts at 0, but tracks
    the number of records read from the input stream, so is
    incremented by 2 for a pair of reads.

    If 'min_length' is set, all reads under this length are ignored (even
    if they are pairs).

    If 'force_single' is True, all reads are returned as singletons.
    """
    record = None
    prev_record = None
    num = 0

    if force_single and require_paired:
        raise ValueError("force_single and require_paired cannot both be set!")

    # handle the majority of the stream.
    for record in screed_iter:
        if prev_record:
            if check_is_pair(prev_record, record) and not force_single:
                if min_length and (len(prev_record.sequence) < min_length or
                                   len(record.sequence) < min_length):
                    if require_paired:
                        record = None
                else:
                    yield num, True, prev_record, record  # it's a pair!
                    num += 2
                    record = None
            else:                                   # orphan.
                if require_paired:
                    err = UnpairedReadsError(
                        "Unpaired reads when require_paired is set!",
                        prev_record, record)
                    raise err

                # ignore short reads
                if min_length and len(prev_record.sequence) < min_length:
                    pass
                else:
                    yield num, False, prev_record, None
                    num += 1

        prev_record = record
        record = None

    # handle the last record, if it exists (i.e. last two records not a pair)
    if prev_record:
        if require_paired:
            raise UnpairedReadsError("Unpaired reads when require_paired "
                                     "is set!", prev_record, None)
        if min_length and len(prev_record.sequence) < min_length:
            pass
        else:
            yield num, False, prev_record, None