def from_gff3(path, attributes=None, region=None, score_fill=-1, phase_fill=-1, attributes_fill=b'.', dtype=None, blen=None, storage=None, create='table', expectedlen=200000, **storage_kwargs): # setup iterator recs = iter_gff3(path, attributes=attributes, region=region, score_fill=score_fill, phase_fill=phase_fill, attributes_fill=attributes_fill) # read a sample to determine dtype, blen recs_sample = list(itertools.islice(recs, 1000)) names = 'seqid', 'source', 'type', 'start', 'end', 'score', 'strand', \ 'phase' if attributes: names += tuple(attributes) ra = np.rec.array(recs_sample, names=names, dtype=dtype) dtype = ra.dtype # setup output storage = _chunked.get_storage(storage) out = getattr(storage, create)(ra, expectedlen=expectedlen, **storage_kwargs) blen = _chunked.get_blen_table(out, blen=blen) # read block-wise block = list(itertools.islice(recs, 0, blen)) while block: a = np.asarray(block, dtype=dtype) out.append(a) block = list(itertools.islice(recs, 0, blen)) out = FeatureChunkedTable(out) return out
def to_vcf(self, path, rename=None, number=None, description=None, fill=None, blen=None, write_header=True): names, callset = normalize_callset(self) with open(path, 'w') as vcf_file: if write_header: write_vcf_header(vcf_file, names, callset, rename=rename, number=number, description=description) blen = _chunked.get_blen_table(self, blen) for i in range(0, len(self), blen): j = min(i + blen, len(self)) block = self[i:j] write_vcf_data(vcf_file, names, block, rename=rename, fill=fill)
def to_vcf(self, path, rename=None, number=None, description=None, fill=None, blen=None, write_header=True): with open(path, 'w') as vcf_file: if write_header: write_vcf_header(vcf_file, self, rename=rename, number=number, description=description) blen = _chunked.get_blen_table(self) for i in range(0, len(self), blen): j = min(i+blen, len(self)) block = self[i:j] write_vcf_data(vcf_file, block, rename=rename, fill=fill)