def chunk_fastq_file(fastq_filename, new_filename, parse_rec): """ Create a new FASTQ file from an existing one. :param str fastq_filename: the name of the original BAM file :param str new_filename: the name of the new BAM file :param class:`ParseRecord` parse_rec: the information containing where to extract :return: """ try: os.remove(new_filename) except Exception as e: pass # copy the header from original BAM file to new bytes_from_file(fastq_filename, new_filename, 0, parse_rec.header_size) if parse_rec.begin_read_offset > 0: # if there are reads before a chunk offset, we need to extract them b = bgzf.BgzfReader(fastq_filename) b2 = bgzf.BgzfWriter(new_filename, mode="a") b.seek(parse_rec.begin_read_offset) b2.write(b.read(parse_rec.begin_read_size)) b2.close() # grab bgzf chunks from the OLD FASTQ file and append to NEW FASTQ file bytes_from_file(fastq_filename, new_filename, parse_rec.file_offset, parse_rec.file_bytes) if parse_rec.end_read_offset > 0: # if there are reads after a chunk offset, we need to extract them b = bgzf.BgzfReader(fastq_filename) b2 = bgzf.BgzfWriter(new_filename, mode="a") b.seek(parse_rec.end_read_offset) b2.write(b.read(parse_rec.end_read_size)) b2.close()
def __init__(self, pcons_file): try: self.pcons_file_handle = bgzf.BgzfReader(pcons_file, 'r', max_cache=5000) except IOError as e: msg = "Could not read Pcons file!\nI/O error({0}): {1}\n".format( e.errno, e.strerror) raise FileException(msg) self.filename = pcons_file try: index_file = open(pcons_file + '.idx3', 'rb') self.uid_index = pickle.load(index_file) except IOError as e: msg = "Could not read index file!\nI/O error({0}): {1}\n".format( e.errno, e.strerror) raise FileException(msg) except pickle.UnpicklingError as e: msg = "Could not unpickle the index file - possibly wrong format!" msg += "\nUnpickling error: {}\n".format(e.message) raise FileException(msg) except Exception as e: msg = "Could not read/unpickle the index file - unknown error! : " msg += "{}\n".format(e.__repr__()) raise FileException(msg)
def __build_index__(self): numbytes = 0 self._bcode_off_map = {} num_pe = 0 if self.fq_path.endswith('.gz'): index_name = self.fq_path + "i" if not os.path.exists(index_name): raise Exception("Only BGZF compression is supported") handle = bgzf.BgzfReader(self.fq_path) else: handle = open(self.fq_path) seen_set = set() for bcode, reads_iter in groupby( util.fastq_iter_pos(handle), lambda(x): x[0], ): assert bcode == None or bcode not in seen_set, \ "fastq {} NOT in barcode sorted order. Ensure reads that share barcodes \ are in a block together".format(self.fq_path) seen_set.add(bcode) for _, qname, file_pos, lines in reads_iter: if bcode != None and bcode not in self._bcode_off_map: self._bcode_off_map[bcode] = file_pos num_pe += 1
def check_by_char(self, old_file, new_file, old_gzip=False): for mode in ["r", "rb"]: if old_gzip: h = gzip.open(old_file,mode) else: h = open(old_file, mode) old = h.read() #Seems gzip can return bytes even if mode="r", #perhaps a bug in Python 3.2? if "b" in mode: old = _as_bytes(old) else: old = _as_string(old) h.close() for cache in [1,10]: h = bgzf.BgzfReader(new_file, mode, max_cache=cache) temp = [] while True: char = h.read(1) if not char: break temp.append(char) if "b" in mode: new = _empty_bytes_string.join(temp) else: new = "".join(temp) del temp h.close() self.assertEqual(len(old), len(new)) #If bytes vs unicode mismatch, give a short error message: self.assertEqual(old[:10], new[:10], \ "%r vs %r, mode %r" % (old[:10], new[:10], mode)) self.assertEqual(old, new)
def merge_vcf_split_chr(vcf_file, split_chr_inf): split_bed_df = pd.read_csv(split_chr_inf, index_col=3, header=None, names=['chrom', 'start', 'end'], sep='\t') merge_chr_size_str = merged_chr_size_inf(split_bed_df) vcf_file = PurePath(vcf_file) is_gz_file = vcf_file.suffix == '.gz' if is_gz_file: split_vcf_inf = bgzf.BgzfReader(vcf_file) else: split_vcf_inf = open(vcf_file) chr_header_flag = 1 # TODO add merge chr command information in vcf header for eachline in split_vcf_inf: eachline = eachline.strip() eachline_inf = eachline.split('\t') chrom = eachline_inf[0] # split chrom size -> merged chrom size if eachline.startswith('##contig='): if chr_header_flag: print(merge_chr_size_str) chr_header_flag = 0 continue elif chrom in split_bed_df.index: new_chrom, offset = split_bed_df.loc[chrom, ['chrom', 'start']] eachline_inf[0] = new_chrom eachline_inf[1] = str(offset + int(eachline_inf[1])) eachline = '\t'.join(eachline_inf) print(eachline) split_vcf_inf.close()
def _is_sorted_bam(bam): """ Checks if a BAM file is sorted by coordinate. """ with bgzf.BgzfReader(bam, "rb") as fin: bam_header = fin.readline().strip() return b"SO:coordinate" in bam_header
def check_by_line(self, old_file, new_file, old_gzip=False): for mode in ["r", "rb"]: if old_gzip: h = gzip.open(old_file, mode) else: h = open(old_file, mode) old = h.read() if "b" in mode: old = _as_bytes(old) else: old = _as_string(old) h.close() for cache in [1, 10]: h = bgzf.BgzfReader(new_file, mode, max_cache=cache) if "b" in mode: new = b"".join(line for line in h) else: new = "".join(line for line in h) h.close() self.assertEqual(len(old), len(new)) self.assertEqual( old[:10], new[:10], "%r vs %r, mode %r" % (old[:10], new[:10], mode)) self.assertEqual(old, new)
def check_by_line(self, old_file, new_file, old_gzip=False): if old_gzip: with gzip.open(old_file) as handle: old = handle.read() else: with open(old_file, "rb") as handle: old = handle.read() for mode in ["rb", "r"]: if "b" in mode: assert isinstance(old, bytes) else: # BGZF text mode is hard coded as latin1 # and does not do universal new line mode old = old.decode("latin1") for cache in [1, 10]: with bgzf.BgzfReader(new_file, mode, max_cache=cache) as h: if "b" in mode: new = b"".join(line for line in h) else: new = "".join(line for line in h) self.assertEqual(len(old), len(new)) self.assertEqual( old[:10], new[:10], "%r vs %r, mode %r" % (old[:10], new[:10], mode) ) self.assertEqual(old, new)
def check_by_line(self, old_file, new_file, old_gzip=False): for mode in ["r", "rb"]: if old_gzip: h = gzip.open(old_file, mode) else: h = open(old_file, mode) old = h.read() #Seems gzip can return bytes even if mode="r", #perhaps a bug in Python 3.2? if "b" in mode: old = _as_bytes(old) else: old = _as_string(old) h.close() for cache in [1,10]: h = bgzf.BgzfReader(new_file, mode, max_cache=cache) if "b" in mode: new = _empty_bytes_string.join(line for line in h) else: new = "".join(line for line in h) h.close() self.assertEqual(len(old), len(new)) self.assertEqual(old[:10], new[:10], \ "%r vs %r, mode %r" % (old[:10], new[:10], mode)) self.assertEqual(old, new)
def __init__(self, path, mode='r'): """ Store tabular information tied to genomic locations in a bgzipped file Args: path (str) : path to file mode (str) : mode, r: read, w: write """ self.path = path self.index_path = f'{path}.idx' self.prev_contig = None self.mode = mode self.index = {} if self.mode == 'w': self.bgzf_handle = bgzf.BgzfWriter(self.path, 'w') self.index_handle = open(self.index_path, 'wt') elif self.mode == 'r': if not os.path.exists(self.path): raise ValueError(f'BGZIP index file missing at {self.path}') self.bgzf_handle = bgzf.BgzfReader(self.path, 'rt') if not os.path.exists(self.index_path): raise ValueError( f'BGZIP index file missing at {self.index_path}') self.index_handle = open(self.index_path, 'rt') for line in self.index_handle: contig, start = line.strip().split() self.index[contig] = int(start) else: raise ValueError('Mode can be r or w') self.cache = {}
def __init__(self, filename, **kwargs): h = open(filename, 'rb') try: self._handle = bgzf.BgzfReader(mode="rb", fileobj=h) except ValueError, e: assert "BGZF" in str(e) #Not a BGZF file h.seek(0) self._handle = h
def __init__(self, filename, format, alphabet): h = open(filename, "rb") try: self._handle = bgzf.BgzfReader(mode="rb", fileobj=h) except ValueError, e: assert "BGZF" in str(e) #Not a BGZF file h.seek(0) self._handle = h
def open(self, fn): try: from Bio import bgzf except: print >> sys.stderr, "Cannot import Bio.bgzf, need to check the installation" self.handle = bgzf.BgzfReader(fn) # read fai for ln in myopen(fn + '.fai'): fd = ln.strip().split() self.index[fd[0].replace('chr', '')] = [int(i) for i in fd[1:]] return len(self.index)
def check_text_with(self, old_file, new_file): """Check text mode using context manager (with statement)""" with open(old_file) as h: # text mode! old_line = h.readline() old = old_line + h.read() with bgzf.BgzfReader(new_file, "r") as h: # Text mode! new_line = h.readline() new = new_line + h.read(len(old)) self.assertEqual(old_line, new_line) self.assertEqual(len(old), len(new)) self.assertEqual(old, new)
def __init__(self, path, mode='r', read_all=False): """ Store tabular information tied to genomic locations in a bgzipped file Args: path (str) : path to file mode (str) : mode, r: read, w: write read_all(bool) : when enabled all data is read from the file and the handles are closed """ self.path = path self.index_path = f'{path}.idx' self.prev_contig = None self.mode = mode self.index = {} self.cache = {} if self.mode == 'w': self.bgzf_handle = bgzf.BgzfWriter(self.path, 'w') self.index_handle = open(self.index_path, 'wt') elif self.mode == 'r': if not os.path.exists(self.path): raise ValueError(f'BGZIP index file missing at {self.path}') self.bgzf_handle = bgzf.BgzfReader(self.path, 'rt') if not os.path.exists(self.index_path): raise ValueError( f'BGZIP index file missing at {self.index_path}') self.index_handle = open(self.index_path, 'rt') for line in self.index_handle: contig, start = line.strip().split() self.index[contig] = int(start) if read_all: for line in self.bgzf_handle: if len(line) == 0: continue line_contig, line_pos, line_strand, rest = self.read_file_line( line) #print((line_pos, line_strand,rest)) if not line_contig in self.cache: self.cache[line_contig] = {} self.cache[line_contig][(line_pos, line_strand)] = rest cpos = line_pos self.bgzf_handle.close() self.bgzf_handle = None self.index_handle.close() self.index_handle = None else: raise ValueError('Mode can be r or w')
def check_text(self, old_file, new_file): """Check text mode using explicit open/close.""" with open(old_file) as h: # text mode! old_line = h.readline() old = old_line + h.read() h = bgzf.BgzfReader(new_file, "r") # Text mode! new_line = h.readline() new = new_line + h.read(len(old)) h.close() self.assertEqual(old_line, new_line) self.assertEqual(len(old), len(new)) self.assertEqual(old, new)
def check_text(self, old_file, new_file): h = open(old_file) #text mode! old_line = h.readline() old = old_line + h.read() h.close() h = bgzf.BgzfReader(new_file, "r") #Text mode! new_line = h.readline() new = new_line + h.read(len(old)) h.close() self.assertEqual(old_line, new_line) self.assertEqual(len(old), len(new)) self.assertEqual(old, new)
def open_haps(pathname): ##http://stackoverflow.com/questions/21529163/python-gzipped-fileinput-returns-binary-string-instead-of-text-string/21529243 ext = os.path.splitext(pathname)[1] if ext == '.gz': stop_convert_to_bgz f = gzip.open(pathname, 'rt') elif ext == '.bgz': f = bgzf.BgzfReader(pathname) else: f = open(filename) return f
def _open_for_random_access(filename): """Open a file in binary mode, spot if it is BGZF format etc (PRIVATE). This funcationality is used by the Bio.SeqIO and Bio.SearchIO index and index_db functions. """ handle = open(filename, "rb") from Bio import bgzf try: return bgzf.BgzfReader(mode="rb", fileobj=handle) except ValueError as e: assert "BGZF" in str(e) # Not a BGZF file after all, rewind to start: handle.seek(0) return handle
def sideEffect(self, filename, *args, **kwargs): if self.count <= 1: self.test.assertEqual('filename.fasta.bgz', filename) self.count += 1 writerIO = BytesIO() writer = bgzf.BgzfWriter(fileobj=writerIO) writer.write(b'>id0\nAC\n') writer.flush() fileobj = BytesIO(writerIO.getvalue()) fileobj.mode = 'rb' return bgzf.BgzfReader(fileobj=fileobj) else: self.test.fail( 'Open called too many times. Filename: %r, Args: %r, ' 'Keyword args: %r.' % (filename, args, kwargs))
def open(self): assert self.f_map == None, "fp map already populated" self.f_map = {} if self.fq_path.endswith('.gz'): index_name = self.fq_path + "i" if not os.path.exists(index_name): raise Exception("Only BGZF compression is supported") handle = bgzf.BgzfReader(self.fq_path) self.gzipped = True else: handle = open(self.fq_path) self.gzipped = False self.f_map[self.fq_path] = handle return self
def dinucleotide_to_count(logger, dinucleotide_file, count_file): logger.info("Counting %s ..." % dinucleotide_file) count_map = OrderedDict() lineCount = 0 with bgzf.BgzfReader(dinucleotide_file, "r") as fin: for line in fin: lineCount += 1 if lineCount % 100000 == 0: logger.info(lineCount) parts = line.rstrip().split('\t') chrom = parts[0] dinucleotide = parts[3] count = int(parts[4]) chrom_map = count_map.setdefault(chrom, {}) countVec = chrom_map.setdefault(dinucleotide, [0, 0]) countVec[0] = countVec[0] + count countVec[1] = countVec[1] + 1 write_count_file(logger, count_file, count_map)
def read_sites(self, logger, item): logger.info("Reading %s ..." % item.dinucleotide_file) curSites = {} line_count = 0 with bgzf.BgzfReader(item.dinucleotide_file, "r") as fin: for line in fin: line_count += 1 if line_count % 1000000 == 0: logger.info(line_count) parts = line.rstrip().split('\t') #ignore the chromomsome contigs if is_contig_reference(parts[0]): continue key = "%s_%s_%s" % (parts[0], parts[1], parts[3]) curSites[key] = parts[4] return (curSites)
def __call__(self, string): # the special argument "-" means sys.std{in,out} if string == '-': if 'r' in self._mode: return sys.stdin elif 'w' in self._mode: return sys.stdout else: raise ValueError('argument "-" with mode %r' % self._mode) # all other arguments are used as file names try: if string[-3:] == ".gz": from Bio import bgzf if 'r' in self._mode: return bgzf.BgzfReader(string, self._mode) elif 'w' in self._mode or 'a' in self._mode: return bgzf.BgzfWriter(string, self._mode) else: return open(string, self._mode, self._bufsize) except OSError as e: raise ArgumentTypeError("can't open '%s': %s" % (string, e))
def check_by_char(self, old_file, new_file, old_gzip=False): if old_gzip: with gzip.open(old_file) as handle: old = handle.read() else: with open(old_file, "rb") as handle: old = handle.read() for mode in ["rb", "r"]: if "b" in mode: assert isinstance(old, bytes) else: # BGZF text mode is hard coded as latin1 # and does not do universal new line mode old = old.decode("latin1") for cache in [1, 10]: h = bgzf.BgzfReader(new_file, mode, max_cache=cache) temp = [] while True: char = h.read(1) if not char: break temp.append(char) if "b" in mode: new = b"".join(temp) else: new = "".join(temp) del temp h.close() self.assertEqual(len(old), len(new)) # If bytes vs unicode mismatch, give a short error message: self.assertEqual( old[:10], new[:10], "%r vs %r, mode %r" % (old[:10], new[:10], mode) ) self.assertEqual(old, new)
def get_sequence(self, contig, start, end, strand=1, all_upper=False): """ Return the genomic DNA sequence spanning [start, end) on contig. :param contig: The name of the contig on which the start and end coordinates are located :param start: The start location of the sequence to be returned (this endpoint is included in the interval) :param end: The end location of the sequence to be returned (tis endpoint is not included in the interval) :param strand: The DNA strand of the sequence to be returned (-1 for negative strand, 1 for positive strand) :param all_upper: If true, return the sequence in all uppercase letters. Otherwise return lowercase letters for positions that are "soft-masked" (see https://genomevolution.org/wiki/index.php/Masked). :return: A string of DNA nucleotides of length end-start """ if contig not in self._index: raise ContigNotFound(message='Contig {} not found'.format(contig), requested_contig=contig, valid_contigs=list(self._index.keys())) if start < 0: raise CoordinateOutOfBounds( message='Start coordinate below 0', problematic_coordinate=start, problem_with_start=True, coordinate_too_small=True, valid_coordinate_range=(0, self.contig_lengths[contig]), current_contig=contig) if start > self.contig_lengths[contig]: raise CoordinateOutOfBounds( message='Start coordinate past end of contig', problematic_coordinate=start, problem_with_start=True, coordinate_too_small=False, valid_coordinate_range=(0, self.contig_lengths[contig]), current_contig=contig) if end > self.contig_lengths[contig]: raise CoordinateOutOfBounds( message='End coordinate past end of contig', problematic_coordinate=end, problem_with_start=False, coordinate_too_small=False, valid_coordinate_range=(0, self.contig_lengths[contig]), current_contig=contig) if end < 0: raise CoordinateOutOfBounds( message='End coordinate below 0', problematic_coordinate=end, problem_with_start=False, coordinate_too_small=True, valid_coordinate_range=(0, self.contig_lengths[contig]), current_contig=contig) if start >= end: raise InvalidCoordinates(start=start, end=end) query_length = end - start start_pos_file_distance = self._text_distance_to_file_distance(start) start_block = sorted( self._index[contig].search(start_pos_file_distance))[0] start_block_offset = start_block.data verbose_print('Retrieving sequence for {} [{},{}) ...'.format( contig, start, end)) sequence_start_offset = start_pos_file_distance - start_block.begin retrieved_sequence = '' with bgzf.BgzfReader(self.bgzipped_fasta_filename, 'rt') as fasta_file: fasta_file.seek( bgzf.make_virtual_offset(start_block_offset, sequence_start_offset)) while len(retrieved_sequence) < query_length: retrieved_sequence += fasta_file.readline().rstrip() trimmed_sequence = retrieved_sequence[:query_length] if all_upper: trimmed_sequence = trimmed_sequence.upper() if strand == -1: return reverse_complement(trimmed_sequence) else: return trimmed_sequence
def check_random(self, filename): """Check BGZF random access by reading blocks in forward & reverse order""" h = gzip.open(filename, "rb") old = h.read() h.close() h = open(filename, "rb") blocks = list(bgzf.BgzfBlocks(h)) h.close() #Forward new = _empty_bytes_string h = bgzf.BgzfReader(filename, "rb") self.assertTrue(h.seekable()) self.assertFalse(h.isatty()) self.assertEqual(h.fileno(), h._handle.fileno()) for start, raw_len, data_start, data_len in blocks: #print start, raw_len, data_start, data_len h.seek(bgzf.make_virtual_offset(start,0)) data = h.read(data_len) self.assertEqual(len(data), data_len) #self.assertEqual(start + raw_len, h._handle.tell()) self.assertEqual(len(new), data_start) new += data h.close() self.assertEqual(len(old), len(new)) self.assertEqual(old, new) #Reverse new = _empty_bytes_string h = bgzf.BgzfReader(filename, "rb") for start, raw_len, data_start, data_len in blocks[::-1]: #print start, raw_len, data_start, data_len h.seek(bgzf.make_virtual_offset(start,0)) data = h.read(data_len) self.assertEqual(len(data), data_len) #self.assertEqual(start + raw_len, h._handle.tell()) new = data + new h.close() self.assertEqual(len(old), len(new)) self.assertEqual(old, new) #Jump back - non-sequential seeking if len(blocks) >= 3: h = bgzf.BgzfReader(filename, "rb", max_cache = 1) #Seek to a late block in the file, #half way into the third last block start, raw_len, data_start, data_len = blocks[-3] voffset = bgzf.make_virtual_offset(start, data_len // 2) h.seek(voffset) self.assertEqual(voffset, h.tell()) data = h.read(1000) self.assertTrue(data in old) self.assertEqual(old.find(data), data_start + data_len // 2) #Now seek to an early block in the file, #half way into the second block start, raw_len, data_start, data_len = blocks[1] h.seek(bgzf.make_virtual_offset(start, data_len // 2)) voffset = bgzf.make_virtual_offset(start, data_len // 2) h.seek(voffset) self.assertEqual(voffset, h.tell()) #Now read all rest of this block and start of next block data = h.read(data_len + 1000) self.assertTrue(data in old) self.assertEqual(old.find(data), data_start + data_len // 2) h.close() #Check seek/tell at block boundaries v_offsets = [] for start, raw_len, data_start, data_len in blocks: for within_offset in [0, 1, data_len // 2, data_len - 1]: if within_offset < 0 or data_len <= within_offset: continue voffset = bgzf.make_virtual_offset(start, within_offset) real_offset = data_start + within_offset v_offsets.append((voffset, real_offset)) shuffle(v_offsets) h = bgzf.BgzfReader(filename, "rb", max_cache = 1) for voffset, real_offset in v_offsets: h.seek(0) assert voffset >= 0 and real_offset >= 0 self.assertEqual(h.read(real_offset), old[:real_offset]) self.assertEqual(h.tell(), voffset) for voffset, real_offset in v_offsets: h.seek(voffset) self.assertEqual(h.tell(), voffset) h.close()
def unzip(file_in): #orchid_dict = SeqIO.index(file_in, "genbank") #print(len(orchid_dict)) handle = bgzf.BgzfReader(file_in, "r") aa = handle.read(100000) print(aa)
def calculate_chunks(filename, num_chunks): """ Calculate the boundaries in the BAM file and partition into chunks. :param str filename: name of the BAM file :param int num_chunks: number of chunks to partition the boundaries into :return: a list of tuples containing the start and end boundaries """ if num_chunks <= 0: raise ValueError("The number of chunks to calculate should be >= 1") if num_chunks == 1: # aln_file = pysam.AlignmentFile(filename) # header_size = bgzf.split_virtual_offset(aln_file.tell())[0] # aln_file.close() pr = ParseRecord(0, 0, 0, 0, -1, 0, 0) return [pr] try: f = open(filename, 'r') # get all the block start offsets block_offsets = [] decompressed_lengths = [] i = 0 for values in FastBgzfBlocks(f): block_offsets.append(values[0]) decompressed_lengths.append(values[3]) #if i % 50000 == 0: # print 'Block {}'.format(i) i += 1 # partition the starts into manageable chunks div, mod = divmod(len(block_offsets), num_chunks) fastq_fh = bgzf.BgzfReader(filename, 'r') header_size = 0 partitioned_offsets = [(header_size, 0)] for i in range(1, num_chunks): index = div * i + min(i, mod) virtual_offset = bgzf.make_virtual_offset(block_offsets[index], 0) fastq_fh.seek(virtual_offset) line = fastq_fh.readline().strip() while line != '+': line = fastq_fh.readline().strip() quality_line = fastq_fh.readline() virtual_offset = fastq_fh.tell() # block start & within block offset partitioned_offsets.append( bgzf.split_virtual_offset(virtual_offset)) fastq_fh.close() # now let's calculate beginning and ends params = [] for i, offset in enumerate(partitioned_offsets): index = block_offsets.index(partitioned_offsets[i][0]) begin_read_offset = 0 begin_read_size = 0 file_offset = 0 file_bytes = 0 end_read_offset = 0 end_read_size = 0 if i == 0: # first begin_read_offset = 0 begin_read_size = 0 file_offset = block_offsets[index] # print 'file_offset=', file_offset file_bytes = partitioned_offsets[i + 1][0] - file_offset # print 'file_bytes=', file_bytes end_read_offset = bgzf.make_virtual_offset( partitioned_offsets[i + 1][0], 0) end_read_size = partitioned_offsets[i + 1][1] elif i == num_chunks - 1: # last begin_read_offset = bgzf.make_virtual_offset( partitioned_offsets[i][0], partitioned_offsets[i][1]) begin_read_size = decompressed_lengths[index] - \ partitioned_offsets[i][1] file_offset = block_offsets[index + 1] file_bytes = -1 end_read_offset = 0 end_read_size = 0 else: # all others if offset[1] == 0: # bgzf boundary print('****************HUH') return begin_read_offset = bgzf.make_virtual_offset( partitioned_offsets[i][0], partitioned_offsets[i][1]) begin_read_size = decompressed_lengths[index] - \ partitioned_offsets[i][1] file_offset = block_offsets[index + 1] file_bytes = partitioned_offsets[i + 1][0] - file_offset end_read_offset = bgzf.make_virtual_offset( partitioned_offsets[i + 1][0], 0) end_read_size = partitioned_offsets[i + 1][1] pr = ParseRecord(header_size, begin_read_offset, begin_read_size, file_offset, file_bytes, end_read_offset, end_read_size) params.append(pr) return params except Exception as e: print('calculate_chunks error: {}'.format(str(e)))