示例#1
0
 def __iter__(self):
     handle = self._handle
     handle.seek(0)
     id = None
     start_offset = handle.tell()
     line = handle.readline()
     if not line:
         #Empty file!
         return
     at_char = _as_bytes("@")
     plus_char = _as_bytes("+")
     if line[0:1] != at_char:
         raise ValueError("Problem with FASTQ @ line:\n%s" % repr(line))
     while line:
         #assert line[0]=="@"
         #This record seems OK (so far)
         id = line[1:].rstrip()
         #Find the seq line(s)
         seq_len = 0
         length = len(line)
         while line:
             line = handle.readline()
             length += len(line)
             if line.startswith(plus_char) : break
             seq_len += len(line.strip())
         if not line:
             raise ValueError("Premature end of file in seq section")
         #assert line[0]=="+"
         #Find the qual line(s)
         qual_len = 0
         while line:
             if seq_len == qual_len:
                 #Should be end of record...
                 end_offset = handle.tell()
                 line = handle.readline()
                 if line and line[0:1] != at_char:
                     ValueError("Problem with line %s" % repr(line))
                 break
             else:
                 line = handle.readline()
                 qual_len += len(line.strip())
                 length += len(line)
         if seq_len != qual_len:
             raise ValueError("Problem with quality section")
         yield _bytes_to_string(id), start_offset, length
         start_offset = end_offset
示例#2
0
 def get_raw(self, offset):
     """Similar to the get method, but returns the record as a raw
     string."""
     # TODO - Refactor this and the __init__ method to reduce code
     # duplication?
     handle = self._handle
     handle.seek(offset)
     line = handle.readline()
     data = line
     at_char = _as_bytes("@")
     plus_char = _as_bytes("+")
     if line[0:1] != at_char:
         raise ValueError("Problem with FASTQ @ line:\n%s" % repr(line))
     # Find the seq line(s)
     seq_len = 0
     while line:
         line = handle.readline()
         data += line
         if line.startswith(plus_char):
             break
         seq_len += len(line.strip())
     if not line:
         raise ValueError("Premature end of file in seq section")
     assert line[0:1] == plus_char
     # Find the qual line(s)
     qual_len = 0
     while line:
         if seq_len == qual_len:
             # Should be end of record...
             pos = handle.tell()
             line = handle.readline()
             if line and line[0:1] != at_char:
                 ValueError("Problem with line %s" % repr(line))
             break
         else:
             line = handle.readline()
             data += line
             qual_len += len(line.strip())
     if seq_len != qual_len:
         raise ValueError("Problem with quality section")
     return data
示例#3
0
 def get_raw(self, offset):
     """Similar to the get method, but returns the record as a raw
     string."""
     # TODO - Refactor this and the __init__ method to reduce code
     # duplication?
     handle = self._handle
     handle.seek(offset)
     line = handle.readline()
     data = line
     at_char = _as_bytes("@")
     plus_char = _as_bytes("+")
     if line[0:1] != at_char:
         raise ValueError("Problem with FASTQ @ line:\n%s" % repr(line))
     # Find the seq line(s)
     seq_len = 0
     while line:
         line = handle.readline()
         data += line
         if line.startswith(plus_char):
             break
         seq_len += len(line.strip())
     if not line:
         raise ValueError("Premature end of file in seq section")
     assert line[0:1] == plus_char
     # Find the qual line(s)
     qual_len = 0
     while line:
         if seq_len == qual_len:
             # Should be end of record...
             pos = handle.tell()
             line = handle.readline()
             if line and line[0:1] != at_char:
                 ValueError("Problem with line %s" % repr(line))
             break
         else:
             line = handle.readline()
             data += line
             qual_len += len(line.strip())
     if seq_len != qual_len:
         raise ValueError("Problem with quality section")
     return data