def __iter__(self): handle = self._handle handle.seek(0) id = None start_offset = handle.tell() line = handle.readline() if not line: #Empty file! return at_char = _as_bytes("@") plus_char = _as_bytes("+") if line[0:1] != at_char: raise ValueError("Problem with FASTQ @ line:\n%s" % repr(line)) while line: #assert line[0]=="@" #This record seems OK (so far) id = line[1:].rstrip() #Find the seq line(s) seq_len = 0 length = len(line) while line: line = handle.readline() length += len(line) if line.startswith(plus_char) : break seq_len += len(line.strip()) if not line: raise ValueError("Premature end of file in seq section") #assert line[0]=="+" #Find the qual line(s) qual_len = 0 while line: if seq_len == qual_len: #Should be end of record... end_offset = handle.tell() line = handle.readline() if line and line[0:1] != at_char: ValueError("Problem with line %s" % repr(line)) break else: line = handle.readline() qual_len += len(line.strip()) length += len(line) if seq_len != qual_len: raise ValueError("Problem with quality section") yield _bytes_to_string(id), start_offset, length start_offset = end_offset
def get_raw(self, offset): """Similar to the get method, but returns the record as a raw string.""" # TODO - Refactor this and the __init__ method to reduce code # duplication? handle = self._handle handle.seek(offset) line = handle.readline() data = line at_char = _as_bytes("@") plus_char = _as_bytes("+") if line[0:1] != at_char: raise ValueError("Problem with FASTQ @ line:\n%s" % repr(line)) # Find the seq line(s) seq_len = 0 while line: line = handle.readline() data += line if line.startswith(plus_char): break seq_len += len(line.strip()) if not line: raise ValueError("Premature end of file in seq section") assert line[0:1] == plus_char # Find the qual line(s) qual_len = 0 while line: if seq_len == qual_len: # Should be end of record... pos = handle.tell() line = handle.readline() if line and line[0:1] != at_char: ValueError("Problem with line %s" % repr(line)) break else: line = handle.readline() data += line qual_len += len(line.strip()) if seq_len != qual_len: raise ValueError("Problem with quality section") return data