Пример #1
0

writer = fasta.MfaWriter('peptides.fa')
annotFile = open('annot.txt', 'w')

annotFilenames = glob.glob('gsAnnotations/*.txt')
for annotFilename in annotFilenames:
    name = extractRootName(annotFilename)
    print '>>>', name + '\n'
    
    annotFilename = 'gsAnnotations/%s.txt' % name
    annotation = open(annotFilename).readlines()
    predictions = parseGenscan(annotation)
    
    pepFilename = 'gsPeptides/%s.fa' % name
    peptides = fasta.load_mfa(pepFilename)
    peptides = [(h.split()[0],s) for h,s in peptides]
    peptides = dict(peptides)
    
    print 'Lengths'
    for h in peptides:
        print h, len(peptides[h])
    print
    
    i = 1
    for prediction in predictions.values():
        for exon in prediction:
            print exon
        print
        
        isCoding,warnings,errors = testAnnotation(prediction)
Пример #2
0
def load_full(iFileHandle):
    """Load genscan predictions.
    
    Arguments:
    iFileHandle -- Input file or filename.
    
    Return values:
    data -- Annotation data (a list of lists, each list in one gene)
    proteins -- Predicted proteins (a list of tuples (header, sequence))
    meta -- Meta-data in first 8 lines of genscan output
    
    """
    iFile = smartopen(iFileHandle)
    data = {}
    proteins = []
    meta = []
    
    startPredState = '----- ---- - ------ ------ ---- -- -- ---- ---- ----- ----- ------'
    endPredState = 'Predicted peptide sequence(s):'
    skipState = 'Slice no. '
    metaState = 'GENSCAN 1.0'
    
    state = None
    for line in iFile:
        line = line.strip()
        
        if metaState in line:
            state = 'meta'
        if line==startPredState:
            state = 'pred'
        elif line=='NO EXONS/GENES PREDICTED IN SEQUENCE':
            state = 'fail'
        elif line==endPredState:
            state = 'prot'
        elif skipState in line:
            state = 'skip'
        else:
            if state=='meta':
                if line:
                    meta.append(line)
            elif state=='pred':
                if line:
                    tokens = line.split()
                    d = Predicted(tokens)
                    gene = int(d.gene_exon.split('.')[0])
                    try:
                        data[gene].append(d)
                    except KeyError:
                        data[gene] = [d]
            elif state=='prot':
                break
            elif state=='fail':
                return [], [], ''
    
    if state=='prot':
        proteins = fasta.load_mfa(iFile)
    
    data = data.items()
    data.sort()
    data = [x[1] for x in data]
    
    return data, proteins, meta
Пример #3
0
def load_full(iFileHandle):
    """Load genscan predictions.
    
    Arguments:
    iFileHandle -- Input file or filename.
    
    Return values:
    data -- Annotation data (a list of lists, each list in one gene)
    proteins -- Predicted proteins (a list of tuples (header, sequence))
    meta -- Meta-data in first 8 lines of genscan output
    
    """
    iFile = smartopen(iFileHandle)
    data = {}
    proteins = []
    meta = []

    startPredState = '----- ---- - ------ ------ ---- -- -- ---- ---- ----- ----- ------'
    endPredState = 'Predicted peptide sequence(s):'
    skipState = 'Slice no. '
    metaState = 'GENSCAN 1.0'

    state = None
    for line in iFile:
        line = line.strip()

        if metaState in line:
            state = 'meta'
        if line == startPredState:
            state = 'pred'
        elif line == 'NO EXONS/GENES PREDICTED IN SEQUENCE':
            state = 'fail'
        elif line == endPredState:
            state = 'prot'
        elif skipState in line:
            state = 'skip'
        else:
            if state == 'meta':
                if line:
                    meta.append(line)
            elif state == 'pred':
                if line:
                    tokens = line.split()
                    d = Predicted(tokens)
                    gene = int(d.gene_exon.split('.')[0])
                    try:
                        data[gene].append(d)
                    except KeyError:
                        data[gene] = [d]
            elif state == 'prot':
                break
            elif state == 'fail':
                return [], [], ''

    if state == 'prot':
        proteins = fasta.load_mfa(iFile)

    data = data.items()
    data.sort()
    data = [x[1] for x in data]

    return data, proteins, meta
Пример #4
0
}[ioDir]

writer = fasta.MfaWriter('peptides.fa')
annotFile = open('annot.txt', 'w')

annotFilenames = glob.glob('gsAnnotations/*.txt')
for annotFilename in annotFilenames:
    name = extractRootName(annotFilename)
    print '>>>', name + '\n'

    annotFilename = 'gsAnnotations/%s.txt' % name
    annotation = open(annotFilename).readlines()
    predictions = parseGenscan(annotation)

    pepFilename = 'gsPeptides/%s.fa' % name
    peptides = fasta.load_mfa(pepFilename)
    peptides = [(h.split()[0], s) for h, s in peptides]
    peptides = dict(peptides)

    print 'Lengths'
    for h in peptides:
        print h, len(peptides[h])
    print

    i = 1
    for prediction in predictions.values():
        for exon in prediction:
            print exon
        print

        isCoding, warnings, errors = testAnnotation(prediction)