Exemplo n.º 1
0
def runGenomescan(features, debug=False):
    """Run genomescan supplying extracted features from genome as 
    homologous proteins."""
    
    for i,feature in enumerate(features):
        print i+1, feature
        oFilename = 'gsOutput/%s.html' % feature.domain
        annotFilename = 'gsAnnotations/%s.txt' % feature.domain
        pepFilename = 'gsPeptides/%s.fa' % feature.domain
        
        if not os.path.exists(oFilename):
            html = genomescanFromFeature(feature, blastDb, oFileHandle=oFilename)
        else:
            html = open(oFilename).read()
        
        html = html.split('\n')
        annotation, peptides = parseGenomeScanOutput(html)
        
        print >> open(annotFilename, 'w'), annotation
        
        fakeFaFile = StringIO.StringIO(peptides)
        faIter = fasta.load_iter(fakeFaFile)
        writer = fasta.MfaWriter(pepFilename)
        for j,(h,s) in enumerate(faIter):
            block = h.split('|')[0]
            if j==1: print feature
            h = '%s.%i %s' % (feature.domain, j+1, block)
            writer.write(h, s+'\n')
        writer.close()
Exemplo n.º 2
0
def runGenomescan(features, debug=False):
    """Run genomescan supplying extracted features from genome as 
    homologous proteins."""

    for i, feature in enumerate(features):
        print i + 1, feature
        oFilename = 'gsOutput/%s.html' % feature.domain
        annotFilename = 'gsAnnotations/%s.txt' % feature.domain
        pepFilename = 'gsPeptides/%s.fa' % feature.domain

        if not os.path.exists(oFilename):
            html = genomescanFromFeature(feature,
                                         blastDb,
                                         oFileHandle=oFilename)
        else:
            html = open(oFilename).read()

        html = html.split('\n')
        annotation, peptides = parseGenomeScanOutput(html)

        print >> open(annotFilename, 'w'), annotation

        fakeFaFile = StringIO.StringIO(peptides)
        faIter = fasta.load_iter(fakeFaFile)
        writer = fasta.MfaWriter(pepFilename)
        for j, (h, s) in enumerate(faIter):
            block = h.split('|')[0]
            if j == 1: print feature
            h = '%s.%i %s' % (feature.domain, j + 1, block)
            writer.write(h, s + '\n')
        writer.close()
Exemplo n.º 3
0
#!/usr/bin/env python

"""
orfTest.py

Author: Tony Papenfuss
Date: Tue Aug 22 20:14:57 EST 2006

"""

import os, sys
import fasta, sequence


header,seq = fasta.load('NKC.fa')
orfIterator = fasta.load_iter('ORFs.fa')
writer = fasta.MfaWriter('ORFs2.fa')

for h,orf in orfIterator:
    chrom,block,orfId,limits = h.split()[0].split('.')
    start,end = limits.split('-')
    start = int(start)
    end = int(end)
    
    if start>end:
        strand = '-'
        start,end = end,start
        s = sequence.translate(sequence.reverseComplement(seq[start-1:end]))
    else:
        strand = '+'
        s = sequence.translate(seq[start-1:end])
Exemplo n.º 4
0
Author: Tony Papenfuss
Date: Wed Aug 23 08:52:58 EST 2006

"""

import os, sys
import re, copy
import fasta, sequence, hmmer3
from hmmer3 import hmmer2frame

pattern = re.compile('[\*|X{200,}]')
minLen = 20

i = 0
writer = fasta.MfaWriter('ORFs.fa')
faFile = fasta.load_iter('6frames.fa')

for header, seq in faFile:
    header = header.strip()
    print >> sys.stderr, header
    block, hmmerFrame = header.split(':')
    frame = hmmer2frame[int(hmmerFrame)]

    matchIter = pattern.finditer(seq)
    try:
        match = matchIter.next()
    except StopIteration:
        print match
        print seq
        sys.exit()
    start = match.start()
Exemplo n.º 5
0
def getSizes(filenames):
    for filename in filenames:
        for h, s in fasta.load_iter(filename):
            name = h.split()[0]
            L = len(s)
            print '%s\t%s' % (name, L)
Exemplo n.º 6
0
        writer.write('%s:%i' % (header,frame),p)
    writer.close()
    sys.exit()


# Initialize()

header,seq = fasta.load('MHC_hg18.fa')
L = len(seq)
hstart = header.split()[0]

pattern = re.compile('\*|X{200,}')
minLen = 20

# sixFrameIter = sequence.sixFrameTranslationIter(seq)
sixFrameIter = fasta.load_iter('6frames.fa')

writer = fasta.MfaWriter('ORFs.fa')
i = 0
for h,p in sixFrameIter:
    hmmerFrame = int(h.split(':')[-1])
    frame = hmmer.hmmer2frame[hmmerFrame]
    print >> sys.stderr, 'Frame:', frame
    if frame>0:
        strand = '+'
    else:
        strand = '-'
    
    matchIter = pattern.finditer(p)
    match = matchIter.next()
    start = match.start()
Exemplo n.º 7
0
        writer.write('%s:%i' % (header, frame), p)
    writer.close()
    sys.exit()


# Initialize()

header, seq = fasta.load('MHC_hg18.fa')
L = len(seq)
hstart = header.split()[0]

pattern = re.compile('\*|X{200,}')
minLen = 20

# sixFrameIter = sequence.sixFrameTranslationIter(seq)
sixFrameIter = fasta.load_iter('6frames.fa')

writer = fasta.MfaWriter('ORFs.fa')
i = 0
for h, p in sixFrameIter:
    hmmerFrame = int(h.split(':')[-1])
    frame = hmmer.hmmer2frame[hmmerFrame]
    print >> sys.stderr, 'Frame:', frame
    if frame > 0:
        strand = '+'
    else:
        strand = '-'

    matchIter = pattern.finditer(p)
    match = matchIter.next()
    start = match.start()
Exemplo n.º 8
0
Date: Wed Aug 23 08:52:58 EST 2006

"""

import os, sys
import re, copy
import fasta, sequence, hmmer3
from hmmer3 import hmmer2frame


pattern = re.compile('[\*|X{200,}]')
minLen = 20

i = 0
writer = fasta.MfaWriter('ORFs.fa')
faFile = fasta.load_iter('6frames.fa')

for header,seq in faFile:
    header = header.strip()
    print >> sys.stderr, header
    block,hmmerFrame = header.split(':')
    frame = hmmer2frame[int(hmmerFrame)]
    
    matchIter = pattern.finditer(seq)
    try:
        match = matchIter.next()
    except StopIteration:
        print match
        print seq
        sys.exit()
    start = match.start()
Exemplo n.º 9
0
def getSizes(filenames):
    for filename in filenames:
        for h,s in fasta.load_iter(filename):
            name = h.split()[0]
            L = len(s)
            print '%s\t%s' % (name,L)