def test_check_is_pair_4b(): read1 = Sequence(name='seq/1', sequence='AAA') read2 = Sequence(name='seq/2', quality='###', sequence='AAA') try: check_is_pair(read1, read2) assert False # check_is_pair should fail here. except ValueError: pass
def test_check_is_pair_4b(): read1 = Sequence(name='seq/1', sequence='AAA') read2 = Sequence(name='seq/2', quality='###', sequence='AAA') try: check_is_pair(read1, read2) assert False # check_is_pair should fail here. except ValueError: pass
def test_check_is_pair_7(): read1 = Sequence(name='seq/2', sequence='AAA') read2 = Sequence(name='seq/1', sequence='AAA') assert not check_is_pair(read1, read2)
def test_check_is_pair_3_fa(): read1 = Sequence(name='seq 1::', sequence='AAA') read2 = Sequence(name='seq 2::', sequence='AAA') assert check_is_pair(read1, read2)
def test_check_is_pair_3_broken_fq_2(): read1 = Sequence(name='seq 1::', quality='###', sequence='AAA') read2 = Sequence(name='seq', quality='###', sequence='AAA') assert not check_is_pair(read1, read2)
def test_check_is_pair_2(): read1 = Sequence(name='seq/1', quality='###', sequence='AAA') read2 = Sequence(name='seq/2', quality='###', sequence='AAA') assert check_is_pair(read1, read2)
def broken_paired_reader(screed_iter, min_length=None, force_single=False, require_paired=False): """Read pairs from a stream. A generator that yields singletons and pairs from a stream of FASTA/FASTQ records (yielded by 'screed_iter'). Yields (n, is_pair, r1, r2) where 'r2' is None if is_pair is False. The input stream can be fully single-ended reads, interleaved paired-end reads, or paired-end reads with orphans, a.k.a. "broken paired". Usage:: for n, is_pair, read1, read2 in broken_paired_reader(...): ... Note that 'n' behaves like enumerate() and starts at 0, but tracks the number of records read from the input stream, so is incremented by 2 for a pair of reads. If 'min_length' is set, all reads under this length are ignored (even if they are pairs). If 'force_single' is True, all reads are returned as singletons. """ record = None prev_record = None num = 0 if force_single and require_paired: raise ValueError("force_single and require_paired cannot both be set!") # handle the majority of the stream. for record in screed_iter: if prev_record: if check_is_pair(prev_record, record) and not force_single: if min_length and (len(prev_record.sequence) < min_length or len(record.sequence) < min_length): if require_paired: record = None else: yield num, True, prev_record, record # it's a pair! num += 2 record = None else: # orphan. if require_paired: err = UnpairedReadsError( "Unpaired reads when require_paired is set!", prev_record, record) raise err # ignore short reads if min_length and len(prev_record.sequence) < min_length: pass else: yield num, False, prev_record, None num += 1 prev_record = record record = None # handle the last record, if it exists (i.e. last two records not a pair) if prev_record: if require_paired: raise UnpairedReadsError( "Unpaired reads when require_paired " "is set!", prev_record, None) if min_length and len(prev_record.sequence) < min_length: pass else: yield num, False, prev_record, None
def test_check_is_pair_7(): read1 = Sequence(name='seq/2', sequence='AAA') read2 = Sequence(name='seq/1', sequence='AAA') assert not check_is_pair(read1, read2)
def test_check_is_pair_3_fa(): read1 = Sequence(name='seq 1::', sequence='AAA') read2 = Sequence(name='seq 2::', sequence='AAA') assert check_is_pair(read1, read2)
def test_check_is_pair_3_broken_fq_2(): read1 = Sequence(name='seq 1::', quality='###', sequence='AAA') read2 = Sequence(name='seq', quality='###', sequence='AAA') assert not check_is_pair(read1, read2)
def test_check_is_pair_2(): read1 = Sequence(name='seq/1', quality='###', sequence='AAA') read2 = Sequence(name='seq/2', quality='###', sequence='AAA') assert check_is_pair(read1, read2)
def broken_paired_reader(screed_iter, min_length=None, force_single=False, require_paired=False): """Read pairs from a stream. A generator that yields singletons and pairs from a stream of FASTA/FASTQ records (yielded by 'screed_iter'). Yields (n, is_pair, r1, r2) where 'r2' is None if is_pair is False. The input stream can be fully single-ended reads, interleaved paired-end reads, or paired-end reads with orphans, a.k.a. "broken paired". Usage:: for n, is_pair, read1, read2 in broken_paired_reader(...): ... Note that 'n' behaves like enumerate() and starts at 0, but tracks the number of records read from the input stream, so is incremented by 2 for a pair of reads. If 'min_length' is set, all reads under this length are ignored (even if they are pairs). If 'force_single' is True, all reads are returned as singletons. """ record = None prev_record = None num = 0 if force_single and require_paired: raise ValueError("force_single and require_paired cannot both be set!") # handle the majority of the stream. for record in screed_iter: if prev_record: if check_is_pair(prev_record, record) and not force_single: if min_length and (len(prev_record.sequence) < min_length or len(record.sequence) < min_length): if require_paired: record = None else: yield num, True, prev_record, record # it's a pair! num += 2 record = None else: # orphan. if require_paired: err = UnpairedReadsError( "Unpaired reads when require_paired is set!", prev_record, record) raise err # ignore short reads if min_length and len(prev_record.sequence) < min_length: pass else: yield num, False, prev_record, None num += 1 prev_record = record record = None # handle the last record, if it exists (i.e. last two records not a pair) if prev_record: if require_paired: raise UnpairedReadsError("Unpaired reads when require_paired " "is set!", prev_record, None) if min_length and len(prev_record.sequence) < min_length: pass else: yield num, False, prev_record, None