Пример #1
0
    def from_gff3(path, attributes=None, region=None, score_fill=-1,
                  phase_fill=-1, attributes_fill=b'.', dtype=None,
                  blen=None, storage=None, create='table', expectedlen=200000,
                  **storage_kwargs):

        # setup iterator
        recs = iter_gff3(path, attributes=attributes, region=region,
                         score_fill=score_fill, phase_fill=phase_fill,
                         attributes_fill=attributes_fill)

        # read a sample to determine dtype, blen
        recs_sample = list(itertools.islice(recs, 1000))
        names = 'seqid', 'source', 'type', 'start', 'end', 'score', 'strand', \
                'phase'
        if attributes:
            names += tuple(attributes)
        ra = np.rec.array(recs_sample, names=names, dtype=dtype)
        dtype = ra.dtype

        # setup output
        storage = _chunked.get_storage(storage)
        out = getattr(storage, create)(ra, expectedlen=expectedlen,
                                       **storage_kwargs)
        blen = _chunked.get_blen_table(out, blen=blen)

        # read block-wise
        block = list(itertools.islice(recs, 0, blen))
        while block:
            a = np.asarray(block, dtype=dtype)
            out.append(a)
            block = list(itertools.islice(recs, 0, blen))

        out = FeatureChunkedTable(out)
        return out
Пример #2
0
 def to_vcf(self,
            path,
            rename=None,
            number=None,
            description=None,
            fill=None,
            blen=None,
            write_header=True):
     names, callset = normalize_callset(self)
     with open(path, 'w') as vcf_file:
         if write_header:
             write_vcf_header(vcf_file,
                              names,
                              callset,
                              rename=rename,
                              number=number,
                              description=description)
         blen = _chunked.get_blen_table(self, blen)
         for i in range(0, len(self), blen):
             j = min(i + blen, len(self))
             block = self[i:j]
             write_vcf_data(vcf_file,
                            names,
                            block,
                            rename=rename,
                            fill=fill)
Пример #3
0
 def to_vcf(self, path, rename=None, number=None, description=None,
            fill=None, blen=None, write_header=True):
     with open(path, 'w') as vcf_file:
         if write_header:
             write_vcf_header(vcf_file, self, rename=rename, number=number,
                              description=description)
         blen = _chunked.get_blen_table(self)
         for i in range(0, len(self), blen):
             j = min(i+blen, len(self))
             block = self[i:j]
             write_vcf_data(vcf_file, block, rename=rename, fill=fill)
Пример #4
0
    def from_gff3(path,
                  attributes=None,
                  region=None,
                  score_fill=-1,
                  phase_fill=-1,
                  attributes_fill=b'.',
                  dtype=None,
                  blen=None,
                  storage=None,
                  create='table',
                  expectedlen=200000,
                  **storage_kwargs):

        # setup iterator
        recs = iter_gff3(path,
                         attributes=attributes,
                         region=region,
                         score_fill=score_fill,
                         phase_fill=phase_fill,
                         attributes_fill=attributes_fill)

        # read a sample to determine dtype, blen
        recs_sample = list(itertools.islice(recs, 1000))
        names = 'seqid', 'source', 'type', 'start', 'end', 'score', 'strand', \
                'phase'
        if attributes:
            names += tuple(attributes)
        ra = np.rec.array(recs_sample, names=names, dtype=dtype)
        dtype = ra.dtype

        # setup output
        storage = _chunked.get_storage(storage)
        out = getattr(storage, create)(ra,
                                       expectedlen=expectedlen,
                                       **storage_kwargs)
        blen = _chunked.get_blen_table(out, blen=blen)

        # read block-wise
        block = list(itertools.islice(recs, 0, blen))
        while block:
            a = np.asarray(block, dtype=dtype)
            out.append(a)
            block = list(itertools.islice(recs, 0, blen))

        out = FeatureChunkedTable(out)
        return out