Python GffTranscriptReader примеры использования

Язык программирования: Python

Пространство имен/Пакет: GffTranscriptReader

Класс/Тип: GffTranscriptReader

Примеров на hotexamples.com: 5

Python GffTranscriptReader - 5 примеров найдено. Это лучшие примеры Python кода для GffTranscriptReader.GffTranscriptReader, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

nextSequence(2)

GffTranscriptReader(1)

close(1)

hashBySubstrate(1)

loadGFF(1)

loadGeneIdHash(1)

loadGenes(1)

Пример #1

Показать файл

Файл: test-transcript-reader.py Проект: bmajoros/python

# Copyright (C)2016 William H. Majoros ([email protected]).
#=========================================================================
from __future__ import (absolute_import, division, print_function, 
   unicode_literals, generators, nested_scopes, with_statement)
from builtins import (bytes, dict, int, list, object, range, str, ascii,
   chr, hex, input, next, oct, open, pow, round, super, filter, map, zip)
# The above imports should allow this program to run in both Python 2 and
# Python 3.  You might need to update your version of module "future".
from GffTranscriptReader import GffTranscriptReader

#filename="/home/bmajoros/1000G/assembly/local-genes.gff"
#filename="/home/bmajoros/1000G/assembly/tmp.gff"
#filename="test/data/tmp.gff"
filename="test/data/local-genes.gff"

reader=GffTranscriptReader()
#transcripts=reader.loadGFF(filename)
#for transcript in transcripts:
    #print(transcript.getID())
    #gff=transcript.toGff()
    #print(gff)
   
#genes=reader.loadGenes(filename)
#for gene in genes:
#    print("gene",gene.getID())
#    n=gene.getNumTranscripts()
#    for i in range(n):
#        transcript=gene.getIthTranscript(i)
#        transID=transcript.getID()
#        print("\t"+transID+"\t"+str(transcript.getBegin())+"\t"
#              +str(transcript.getEnd()))

Пример #2

Показать файл

Файл: revisions-get-junctions-nonbroken.py Проект: ReddyLab/1000Genomes

readCounts={}
with open(readCountsFile,"rt") as IN:
    while(True):
        line=IN.readline()
        if(line==""): break
        if(rex.find("TOTAL MAPPED READS:\s*(\d+)",line)):
            totalMappedReads=rex[1]
        else:
            fields=line.split()
            (gene,count)=fields
            readCounts[gene]=count

# Read GFF file to find annotated sites to exclude
gff={}
exclude={}
reader=GffTranscriptReader()
transcripts=reader.loadGFF(gffFile)
for transcript in transcripts:
    if(transcript.getID()[0:3]=="ALT"): continue
    if(rex.find("(\S+)_\d",transcript.getID())): gff[rex[1]]=transcript
    substrate=transcript.getSubstrate()
    exclusions=exclude.get(substrate,None)
    if(exclusions is None): exclusions=exclude[substrate]={}
    exons=transcript.getRawExons()
    exons.sort(key=lambda exon:exon.begin)
    numExons=len(exons)
    for i in range(numExons-1):
        key=str(exons[i].getEnd())+"-"+str(exons[i+1].getBegin())
        exclusions[key]=True

# Read broken-sites file

Пример #3

Показать файл

Файл: ice9-subset-fasta.py Проект: ReddyLab/1000Genomes

from __future__ import (absolute_import, division, print_function, 
   unicode_literals, generators, nested_scopes, with_statement)
from builtins import (bytes, dict, int, list, object, range, str, ascii,
   chr, hex, input, next, oct, open, pow, round, super, filter, map, zip)
# The above imports should allow this program to run in both Python 2 and
# Python 3.  You might need to update your version of module "future".
import sys
from FastaReader import FastaReader
from FastaWriter import FastaWriter
from GffTranscriptReader import GffTranscriptReader

if(len(sys.argv)!=4):
    exit(sys.argv[0]+" <in.fasta> <in.gff> <out.fasta>")
(fastaFile,gffFile,outFile)=sys.argv[1:]

reader=GffTranscriptReader()
transcripts=reader.loadGFF(gffFile)
keep=set()
for transcript in transcripts:
    if(transcript.getID()[:3]!="ALT"): continue
    keep.add(transcript.getSubstrate())

reader=FastaReader(fastaFile)
writer=FastaWriter()
fh=open(outFile,"wt")
while(True):
    (defline,seq)=reader.nextSequence()
    if(not defline): break
    (id,attr)=FastaReader.parseDefline(defline)
    if(id not in keep): continue
    writer.addToFasta(defline,seq,fh)

Пример #4

Показать файл

Файл: get-transcripts.py Проект: ReddyLab/1000Genomes

   chr, hex, input, next, oct, open, pow, round, super, filter, map, zip)
# The above imports should allow this program to run in both Python 2 and
# Python 3.  You might need to update your version of module "future".
import sys
from FastaReader import FastaReader
from FastaWriter import FastaWriter
from GffTranscriptReader import GffTranscriptReader
from Rex import Rex
rex=Rex()

# Process command line
if(len(sys.argv)!=4): exit(sys.argv[0]+" <in.fasta> <in.gff> <out.fasta>")
(fastaFile,gffFile,outFile)=sys.argv[1:]

# Read GFF
reader=GffTranscriptReader()
hash=reader.hashBySubstrate(gffFile)

# Open output file
OUT=open(outFile,"wt")
writer=FastaWriter()

# Process each substrate in the FASTA file
reader=FastaReader(fastaFile)
while(True):
    [defline,seq]=reader.nextSequence()
    if(not defline): break
    if(not rex.find("^\s*>\s*(\S+)",defline)): 
        exit("Can't parse defline: "+defline)
    id=rex[1]
    transcripts=hash.get(id,None)

Пример #5

Показать файл

                        unicode_literals, generators, nested_scopes,
                        with_statement)
from builtins import (bytes, dict, int, list, object, range, str, ascii, chr,
                      hex, input, next, oct, open, pow, round, super, filter,
                      map, zip)
# The above imports should allow this program to run in both Python 2 and
# Python 3.  You might need to update your version of module "future".
from GffTranscriptReader import GffTranscriptReader

#filename="/home/bmajoros/1000G/assembly/local-genes.gff"
#filename="/home/bmajoros/1000G/assembly/tmp.gff"
#filename="test/data/tmp.gff"
#filename="test/data/local-genes.gff"
filename = "/home/bmajoros/ensembl/protein-coding.gff"

reader = GffTranscriptReader()
genes = reader.loadGenes(filename)
for gene in genes:
    exons = gene.getMergedExons()
    unmerged = 0
    for transcript in gene.transcripts:
        unmerged += len(transcript.getRawExons())
    print(unmerged, "exons merged to", len(exons))
    #for i in range(len(exons)):
    #    print("MERGED TO:",exons[i].begin,exons[i].end)
    #    print()

#transcripts=reader.loadGFF(filename)
#for transcript in transcripts:
#print(transcript.getID())
#gff=transcript.toGff()