def group_pairs(seqs, n_seqs_in_pair=None, check_all_same_n_seqs=True, check_name_matches=True): seqs = iter(seqs) if n_seqs_in_pair is None: first_pair, next_read = _get_first_pair_by_name(seqs) if first_pair is None: n_seqs_in_pair = None else: yield first_pair n_seqs_in_pair = len(first_pair) seqs = chain([next_read], seqs) if n_seqs_in_pair == 1: # No need to check anything, a pair cannot have less than one read # or more than one name check_all_same_n_seqs = False check_name_matches = False if n_seqs_in_pair: pairs = group_in_packets_fill_last(seqs, packet_size=n_seqs_in_pair) for pair in pairs: pair = filter(lambda seq: seq is not None, pair) if check_all_same_n_seqs and n_seqs_in_pair != len(pair): msg = 'The last pair has fewer reads' raise InterleaveError(msg) if check_name_matches: _check_name_and_direction_match(*pair) yield pair
def test_group_in_packets(self): 'It groups an iterator in packets of items' packets = list(group_in_packets(range(4), 2)) assert packets == [(0, 1), (2, 3)] packets = [packet for packet in group_in_packets(range(5), 2)] assert packets == [(0, 1), (2, 3), (4,)] packets = list(group_in_packets_fill_last(range(5), 2)) assert packets == [(0, 1), (2, 3), (4, None)] packets = list(group_in_packets([], 2)) assert packets == []
def _itemize_fastq(fhand): 'It returns the fhand divided in chunks, one per seq' # group_in_packets_fill_last is faster than group_in_packets blobs = group_in_packets_fill_last(ifilter(_line_is_not_empty, fhand), 4) return (SeqItem(_get_name_from_lines(lines), lines) for lines in blobs)
def _itemize_fastq_singleline(fhand): 'It returns the fhand divided in chunks, one per seq' # group_in_packets_fill_last is faster than group_in_packets blobs = group_in_packets_fill_last(ifilter(_line_is_not_empty, fhand), 4) return (SeqItem(_get_name_from_lines(lines), lines) for lines in blobs)