Python asinterval示例，gffutils.helpers.asinterval Python示例

示例#1

0

显示文件

def slop_list2gff():
    global_names=globals()
    for protein_id in slop_list:
        if protein_id[0:3]=="MGG":
            head_id=protein_id
            yield asinterval(MGG_db[protein_id[0:9]])
        else:
            strain_id,protein_ordinal=strain_protein_id_pattern.search(protein_id).group(1,2)
            yield asinterval(global_names.get(strain_id+"_db")["gene_"+protein_ordinal])

示例#2

0

显示文件

文件： atf3_peaks_helpers.py 项目： Al3n70rn/metaseq

def intron_generator():
    """
    Construct intron features by subtracting all exons from all genes.
    """
    genes = pybedtools.BedTool(
            asinterval(g) for g in settings.G.features_of_type('gene')\
                    if g.chrom in settings.CHROMS)
    exons = pybedtools.BedTool(
            asinterval(e) for e in settings.G.features_of_type('exon')\
                    if e.chrom in settings.CHROMS)
    for feature in genes.subtract(exons).saveas():
        yield feature

示例#3

0

显示文件

文件： atf3_peaks_helpers.py 项目： woodhaha/metaseq

def intron_generator():
    """
    Construct intron features by subtracting all exons from all genes.
    """
    genes = pybedtools.BedTool(
            asinterval(g) for g in settings.G.features_of_type('gene')\
                    if g.chrom in settings.CHROMS)
    exons = pybedtools.BedTool(
            asinterval(e) for e in settings.G.features_of_type('exon')\
                    if e.chrom in settings.CHROMS)
    for feature in genes.subtract(exons).saveas():
        yield feature

示例#4

0

显示文件

文件： results_table.py 项目： tanglingfung/metaseq

    def features(self, ignore_unknown=False):
        """
        Generator of currently-selected features.

        Looks up each feature in the attached `gffutils.FeatureDB` and converts
        it into a `pybedtools.Interval` object for use with `pybedtools`.
        Raises a warning if you haven't yet attached a `gffutils.FeatureDB` to
        this instance.

        :param ignore_unknown: If `ignore_unknown=False` then an exception will
            be raised if a feature cannot be found; if `ignore_unknown=True`
            then silently ignore these cases. Consider using the
            `strip_unknown_features()` method to handle these cases up front.
        """
        if not self.gffdb:
            raise ValueError('Please attach a GFF database created by '
                             'gffutils by setting the .gffdb attribute to the '
                             'database\'s path.')

        for i in self.data[self.id_column]:
            try:
                yield asinterval(self.gffdb[i])
            except gffutils.FeatureNotFoundError:
                if ignore_unknown:
                    continue
                else:
                    raise gffutils.FeatureNotFoundError('%s not found' % i.id)

示例#5

0

显示文件

文件： results_table.py 项目： hjanime/metaseq

    def features(self, ignore_unknown=False):
        """
        Generator of currently-selected features.

        Looks up each feature in the attached `gffutils.FeatureDB` and converts
        it into a `pybedtools.Interval` object for use with `pybedtools`.
        Raises a warning if you haven't yet attached a `gffutils.FeatureDB` to
        this instance.

        :param ignore_unknown: If `ignore_unknown=False` then an exception will
            be raised if a feature cannot be found; if `ignore_unknown=True`
            then silently ignore these cases. Consider using the
            `strip_unknown_features()` method to handle these cases up front.
        """
        if not self.gffdb:
            raise ValueError('Please attach a GFF database created by '
                             'gffutils by setting the .gffdb attribute to the '
                             'database\'s path.')

        for i in self.data[self.id_column]:
            try:
                yield asinterval(self.gffdb[i])
            except gffutils.FeatureNotFoundError:
                if ignore_unknown:
                    continue
                else:
                    raise gffutils.FeatureNotFoundError('%s not found' % i.id)

示例#6

0

显示文件

 def _make_track(self, d, cls):
     yheight = self.heights[cls]
     ybase = self.ybase + (self.heights['full'] - yheight) * 0.5
     return Track((asinterval(i) for i in d[cls]),
                  ybase=ybase,
                  yheight=yheight,
                  **self.kwargs)

示例#7

0

显示文件

文件： pybedtools_integration.py 项目： daler/gffutils

 def gen():
     """
     Generator of pybedtools.Intervals representing TSSes.
     """
     for gene in db.features_of_type('gene'):
         for transcript in db.children(gene, level=1):
             if transcript.strand == '-':
                 transcript.start = transcript.stop
             else:
                 transcript.stop = transcript.start
             transcript.featuretype = transcript.featuretype + '_TSS'
             yield helpers.asinterval(transcript)

示例#8

0

显示文件

文件： pybedtools_integration.py 项目： zorrodong/gffutils

 def gen():
     """
     Generator of pybedtools.Intervals representing TSSes.
     """
     for gene in db.features_of_type('gene'):
         for transcript in db.children(gene, level=1):
             if transcript.strand == '-':
                 transcript.start = transcript.stop
             else:
                 transcript.stop = transcript.start
             transcript.featuretype = transcript.featuretype + '_TSS'
             yield helpers.asinterval(transcript)

示例#9

0

显示文件

文件： atf3_peaks_helpers.py 项目： woodhaha/metaseq

def gene_generator():
    """
    The database has inferred full gene models from the GTF, so we can simply
    iterate over them here.

    More complex generators can be created as well -- for example, one that
    only returns unique TSS sites from all isoforms of all genes.
    """
    for g in settings.G.features_of_type('gene'):
        if g.chrom not in settings.CHROMS:
            continue
        yield asinterval(g)

示例#10

0

显示文件

文件： atf3_peaks_helpers.py 项目： Al3n70rn/metaseq

def gene_generator():
    """
    The database has inferred full gene models from the GTF, so we can simply
    iterate over them here.

    More complex generators can be created as well -- for example, one that
    only returns unique TSS sites from all isoforms of all genes.
    """
    for g in settings.G.features_of_type('gene'):
        if g.chrom not in settings.CHROMS:
            continue
        yield asinterval(g)

示例#11

0

显示文件

文件： feature_test.py 项目： computational-genomics-lab/IICB_Testing

def test_pbt_interval_conversion():
    try:
        import pybedtools
    except ImportError:
        return
    line = "chr2L FlyBase exon 7529 8116 . + . Name=CG11023:1;Parent=FBtr0300689,FBtr0300690"
    f = feature.feature_from_line(line, strict=False, keep_order=True)
    pbt = helpers.asinterval(f)
    assert pbt.chrom == f.chrom == f.seqid
    assert pbt.start == f.start - 1
    assert pbt.stop == f.stop == f.end
    pn = pbt.name
    fn = f.attributes['Name'][0]
    assert pn == fn, '%s, %s' % (pn, fn)

示例#12

0

显示文件

文件： feature_test.py 项目： DHatziioanou/gffutils

def test_pbt_interval_conversion():
    try:
        import pybedtools
    except ImportError:
        return
    line = "chr2L FlyBase exon 7529 8116 . + . Name=CG11023:1;Parent=FBtr0300689,FBtr0300690"
    f = feature.feature_from_line(line, strict=False, keep_order=True)
    pbt = helpers.asinterval(f)
    assert pbt.chrom == f.chrom == f.seqid
    assert pbt.start == f.start -1
    assert pbt.stop == f.stop == f.end
    pn = pbt.name
    fn = f.attributes['Name'][0]
    assert pn == fn, '%s, %s' % (pn, fn)

示例#13

0

显示文件

    def plot(self, feature):
        """
        Spawns a new figure showing data for `feature`.

        :param feature: A `pybedtools.Interval` object

        Using the pybedtools.Interval `feature`, creates figure specified in
        :meth:`BaseMiniBrowser.make_fig` and plots data on panels according to
        `self.panels()`.
        """
        if isinstance(feature, gffutils.Feature):
            feature = asinterval(feature)
        self.make_fig()
        axes = []
        for ax, method in self.panels():
            feature = method(ax, feature)
            axes.append(ax)
        return axes

示例#14

0

显示文件

文件： minibrowser.py 项目： lingdudefeiteng/metaseq

    def plot(self, feature):
        """
        Spawns a new figure showing data for `feature`.

        :param feature: A `pybedtools.Interval` object

        Using the pybedtools.Interval `feature`, creates figure specified in
        :meth:`BaseMiniBrowser.make_fig` and plots data on panels according to
        `self.panels()`.
        """
        if isinstance(feature, gffutils.Feature):
            feature = asinterval(feature)
        self.make_fig()
        axes = []
        for ax, method in self.panels():
            feature = method(ax, feature)
            axes.append(ax)
        return axes

示例#15

0

显示文件

文件： atf3_peaks_helpers.py 项目： Al3n70rn/metaseq

def TSS(feature, upstream=1000, downstream=1000):
    """
    Transforms a pybedtools.Interval, `feature`, into a TSS extended by
    upstream/downstream, paying attention to strand and proximity to chromosome
    limits.

    Also edits the feature type to be "TSS"
    """
    chrom_size = chromsizes[feature.chrom][1]
    if feature.strand == '-':
        start = max(0, feature.stop - downstream)
        stop = min(feature.stop + upstream, chrom_size)
    else:
        start = max(0, feature.start - upstream)
        stop = min(feature.start + downstream, chrom_size)

    # Modify featuretype
    feature[2] = 'TSS'
    feature.start = start
    feature.stop = stop
    return asinterval(feature)

示例#16

0

显示文件

文件： atf3_peaks_helpers.py 项目： woodhaha/metaseq

def TSS(feature, upstream=1000, downstream=1000):
    """
    Transforms a pybedtools.Interval, `feature`, into a TSS extended by
    upstream/downstream, paying attention to strand and proximity to chromosome
    limits.

    Also edits the feature type to be "TSS"
    """
    chrom_size = chromsizes[feature.chrom][1]
    if feature.strand == '-':
        start = max(0, feature.stop - downstream)
        stop = min(feature.stop + upstream, chrom_size)
    else:
        start = max(0, feature.start - upstream)
        stop = min(feature.start + downstream, chrom_size)

    # Modify featuretype
    feature[2] = 'TSS'
    feature.start = start
    feature.stop = stop
    return asinterval(feature)

示例#17

0

显示文件

文件： results_table.py 项目： tanglingfung/metaseq

 def scored_feature_generator(d):
     for i in range(len(d)):
         try:
             feature = db[d.id[i]]
         except gffutils.FeatureNotFoundError:
             raise gffutils.FeatureNotFoundError(d.id[i])
         score = -10 * np.log10(d.padj[i])
         lfc = d.log2foldchange[i]
         if np.isnan(lfc):
             score = 0
         if lfc < 0:
             score *= -1
         feature.score = str(score)
         feature = extend_fields(gff2bed(asinterval(feature)), 9)
         fields = feature.fields[:]
         fields[6] = fields[1]
         fields[7] = fields[2]
         fields.append(str(d.padj[i]))
         fields.append(str(d.pval[i]))
         fields.append('%.3f' % d.log2foldchange[i])
         fields.append('%.3f' % d.basemeana[i])
         fields.append('%.3f' % d.basemeanb[i])
         yield pybedtools.create_interval_from_list(fields)

示例#18

0

显示文件

 def scored_feature_generator(d):
     for i in range(len(d)):
         try:
             feature = db[d.id[i]]
         except gffutils.FeatureNotFoundError:
             raise gffutils.FeatureNotFoundError(d.id[i])
         score = -10 * np.log10(d.padj[i])
         lfc = d.log2foldchange[i]
         if np.isnan(lfc):
             score = 0
         if lfc < 0:
             score *= -1
         feature.score = str(score)
         feature = extend_fields(gff2bed(asinterval(feature)), 9)
         fields = feature.fields[:]
         fields[6] = fields[1]
         fields[7] = fields[2]
         fields.append(str(d.padj[i]))
         fields.append(str(d.pval[i]))
         fields.append('%.3f' % d.log2foldchange[i])
         fields.append('%.3f' % d.basemeana[i])
         fields.append('%.3f' % d.basemeanb[i])
         yield pybedtools.create_interval_from_list(fields)

示例#19

0

显示文件

文件： example.py 项目： lingdudefeiteng/metaseq

def tss_generator():
    for transcript in db.features_of_type("transcript"):
        yield TSS(asinterval(transcript), upstream=1000, downstream=1000)

示例#20

0

显示文件

文件： tables.py 项目： olgabot/metaseq

 def generator():
     for gene_id in df.index:
         yield asinterval(db[gene_id])

示例#21

0

显示文件

            try:  # if this transcript has an entry for 'transcript_support_level'
                #   and if the level is below the acceptable threshold:
                #   keep that transcript
                if int([
                        i[1] for i in t.attributes.items()
                        if i[0] == 'transcript_support_level'
                ][0][0]) <= max_TSL:
                    temp_txpts.append(t)
            except:
                pass

        txpts = temp_txpts
        if (len(txpts) > 0
            ):  # if there are any transcripts with a sufficiently low TSL:
            all_exons = (pybedtools.BedTool([
                helpers.asinterval(i)
                for i in db.children(gene, featuretype='exon')
            ]))
            all_exons = all_exons.sort().merge(
            )  # define the ends of the genic region by using the first
            #   and last exon in the annotation as the limits
            gene_extent = pybedtools.BedTool([
                pybedtools.cbedtools.Interval(chrom=chrom,
                                              start=min(i.start
                                                        for i in all_exons),
                                              end=max(i.end
                                                      for i in all_exons))
            ])
            t_introns = []
            for t in txpts:  # for each transcript, get all of the exons
                t_exons = (pybedtools.BedTool([

示例#22

0

显示文件

def generate_interval(category_fl):
    for strain_id_raw in category_fl:
        strain_id = strain_id_raw.strip('\n')
        if strain_id[0:3] != "MGG": continue
        yield gff2bed(asinterval(MGG_db[strain_id]), name_field=2)

示例#23

0

显示文件

文件： chipseq.py 项目： Al3n70rn/metaseq

 def generator():
     G = gffutils.FeatureDB(dbfn)
     genes = G.features_of_type('gene')
     for i in range(5000):
         yield asinterval(genes.next())

示例#24

0

显示文件

文件： plotting.py 项目： YeoLab/gffutils

 def _make_track(self, d, cls):
     yheight = self.heights[cls]
     ybase = self.ybase + (self.heights['full'] - yheight) * 0.5
     return Track(
             (asinterval(i) for i in d[cls]),
             ybase=ybase, yheight=yheight, **self.kwargs)

示例#25

0

显示文件

文件： metaseq (2).py 项目： zhimenggan/Amassing

def tss_generator():
    for transcript in db.features_of_type('mRNA'):  #CDS/gene/mRNA...
        yield TSS(asinterval(transcript), upstream=1, downstream=0)

示例#26

0

显示文件

 def generator():
     G = gffutils.FeatureDB(dbfn)
     genes = G.features_of_type('gene')
     for i in range(5000):
         yield asinterval(genes.next())

示例#27

0

显示文件

文件： compare_tracks.py 项目： Benja1972/bioinformatics

def tss_generator():
    """
    Generator function to yield TSS of each annotated transcript
    """
    for transcript in db.features_of_type('transcript'):
        yield TSS(asinterval(transcript), upstream=1, downstream=0)

示例#28

0

显示文件

文件： alignment_extract_orthologs_2.py 项目： WangZhe-1/RiceBlastPanGenome

def generate_bed(gff_feature_item):
    yield asinterval(gff_feature_item)

示例#29

0

显示文件

文件： pybedtools_integration.py 项目： zorrodong/gffutils

 def gen():
     for i in iterator:
         yield helpers.asinterval(i)

示例#30

0

显示文件

文件： tables.py 项目： tanglingfung/metaseq

 def generator():
     for gene_id in df.index:
         yield asinterval(db[gene_id])

示例#31

0

显示文件

def tss_generator():
    for transcript in db.features_of_type('transcript'):
        yield TSS(asinterval(transcript), upstream=1000, downstream=1000)

示例#32

0

显示文件

文件： test_gffutils.py 项目： WangZhe-1/RiceBlastPanGenome

def fsdu(which_id):
    yield asinterval(db[which_id])

示例#33

0

显示文件

文件： pybedtools_integration.py 项目： daler/gffutils

 def gen():
     for i in iterator:
         yield helpers.asinterval(i)

示例#34

0

显示文件

文件： metaseq_heatmaps_tutorial.py 项目： pdl30/pyngsplot

def tss_generator(gtf):
	"""
	Generator function to yield TSS +/- 1kb of each annotated transcript
	"""
	for transcript in db.features_of_type('transcript'):
		yield TSS(asinterval(transcript), upstream=1000, downstream=1000)