示例#1
0
 def main(self):
     for m in gff3.models(self.pre(sys.stdin)):
        self.checkMiRnas(m)
        self.checkPseudogene(m)
        self.checkTranscriptNames(m)
        for f in gff3.flattenModel(m):
            print(str(f), end='')
示例#2
0
def main():
    log("Starting trimForAgr...")
    #
    soterm2id = loadSOTerms()
    #
    writeHeader([
        ('data-source', 'MGI'),
        ('date-produced', time.asctime()),
        ('assembly', os.environ["ENSEMBLbuild"]),
        ('annotationSource RefSeq', os.environ["NCBIver"]),
        ('annotationSource ENSEMBL', os.environ["ENSEMBLver"]),
    ])
    #
    for m in gff3.models(sys.stdin):
        processModel(m, soterm2id)
示例#3
0
def main(featureSources):
    for fSource in featureSources:
        for m in gff3.models(fSource):
            stn = m.attributes.get("so_term_name",None)
            count(m, [m.type+("[%s]"%stn if stn else "")], m)
    #
    pcounts("root", roots)
    pcounts("mid", mids)
    pcounts("leaf", leaves)
    for k in exemplars:
        es = list(exemplars[k])
        es.sort()
        if len(es) > 5:
            es = es[::int(len(es)/5)]
        paths[k] = str(paths[k])+ "\t" + "," .join(es)
    pcounts("path", paths)
示例#4
0
 def loadPslFile(self):
     # Each PSL line is parsed and turned into a gff3 feature hierarchy (match->match_part*)
     # Here we iterate over the model roots (matches); the match_parts dangle below (f.children)
     self.counts = {}
     for m in gff3.models(psl.toGff(self.pslFile)):
         #
         seqid = m.qName.split(DOT)[0]  # get the seqid w/o version number
         mgiid = self.seqid2gene[seqid]  # lookup the corresponding mgiid
         mfeats = self.mgi2feats[
             mgiid]  # list containing the gene followed by its match features
         if m.pctLength < MIN_PCT_LENGTH:
             self.logRejects(
                 "REJECTING SEQUENCE (%s) for GENE (%s) - pctLength (%1.2f) less than minimum (%1.2f)"
                 % (seqid, mgiid, m.pctLength, MIN_PCT_LENGTH))
             self.logRejects(str(m))
             continue
         mfeats.append(m)
         self.counts[seqid] = self.counts.setdefault(seqid, 0) + 1
示例#5
0
文件: pmods.py 项目: mgijax/mgigff3
import sys
import gff3

for m in gff3.models(sys.stdin, flatten=True):
    print(m)
示例#6
0
import sys
import gff3
from OrderedSet import OrderedSet

EXCLUDE_SOURCES = OrderedSet(["NCBI"])

EXCLUDE_TYPES = OrderedSet([
    "chromosome", "biological_region", "supercontig", "three_prime_UTR",
    "five_prime_UTR"
])

filtFcn = lambda f: f.type not in EXCLUDE_TYPES and f.source not in EXCLUDE_SOURCES
feats = filter(filtFcn, gff3.iterate(sys.stdin))
for m in gff3.models(feats):
    for f in gff3.flattenModel(m):
        if f.attributes.get("ID", "").startswith("transcript:"):
            f.Name = f.transcript_id
        f.source = "ENSEMBL"
        if len(f.parents) == 0:
            f.attributes["curie"] = "ENSEMBL:" + f.ID.split(":")[1]
        biotype = f.attributes.get("biotype", None)
        if biotype and len(f.parents) == 0:
            if biotype == "protein_coding":
                biotype = "protein_coding_gene"
            f.attributes["so_term_name"] = biotype
        f.attributes.pop("biotype", None)
        f.attributes.pop("version", None)
        f.attributes.pop("description", None)
        f.attributes.pop("logic_name", None)
        f.attributes.pop("gene_id", None)
        f.attributes.pop("transcript_support_level", None)
示例#7
0
# canonicalize.py
#
# Turns tree models in canonical form DAG-shaped) models by merging of identical subfeatures.
#

import gff3
import sys


def mergeExon(ex, f):
    ex.Parent.extend(f.Parent)
    if f.source not in ex.source:
        ex.source += ("," + f.source)


for feats in gff3.models(sys.stdin, flatten=True):
    exons = {}  # (start,end) -> index into ofeats
    ofeats = []  # list of feats in current model. Root is 0th item.
    # merge exons. exons merge if they have the same coordinates.
    for f in feats:
        if f.type == "exon":
            k = (f.start, f.end)
            if k in exons:
                # merge f with a previously seen exon, ex
                #
                i = exons[k]
                ex = ofeats[i]
                mergeExon(ex, f)
                #
                # ...and move ex to end of the list
                ofeats[i] = None