writer = fasta.MfaWriter('peptides.fa') annotFile = open('annot.txt', 'w') annotFilenames = glob.glob('gsAnnotations/*.txt') for annotFilename in annotFilenames: name = extractRootName(annotFilename) print '>>>', name + '\n' annotFilename = 'gsAnnotations/%s.txt' % name annotation = open(annotFilename).readlines() predictions = parseGenscan(annotation) pepFilename = 'gsPeptides/%s.fa' % name peptides = fasta.load_mfa(pepFilename) peptides = [(h.split()[0],s) for h,s in peptides] peptides = dict(peptides) print 'Lengths' for h in peptides: print h, len(peptides[h]) print i = 1 for prediction in predictions.values(): for exon in prediction: print exon print isCoding,warnings,errors = testAnnotation(prediction)
def load_full(iFileHandle): """Load genscan predictions. Arguments: iFileHandle -- Input file or filename. Return values: data -- Annotation data (a list of lists, each list in one gene) proteins -- Predicted proteins (a list of tuples (header, sequence)) meta -- Meta-data in first 8 lines of genscan output """ iFile = smartopen(iFileHandle) data = {} proteins = [] meta = [] startPredState = '----- ---- - ------ ------ ---- -- -- ---- ---- ----- ----- ------' endPredState = 'Predicted peptide sequence(s):' skipState = 'Slice no. ' metaState = 'GENSCAN 1.0' state = None for line in iFile: line = line.strip() if metaState in line: state = 'meta' if line==startPredState: state = 'pred' elif line=='NO EXONS/GENES PREDICTED IN SEQUENCE': state = 'fail' elif line==endPredState: state = 'prot' elif skipState in line: state = 'skip' else: if state=='meta': if line: meta.append(line) elif state=='pred': if line: tokens = line.split() d = Predicted(tokens) gene = int(d.gene_exon.split('.')[0]) try: data[gene].append(d) except KeyError: data[gene] = [d] elif state=='prot': break elif state=='fail': return [], [], '' if state=='prot': proteins = fasta.load_mfa(iFile) data = data.items() data.sort() data = [x[1] for x in data] return data, proteins, meta
def load_full(iFileHandle): """Load genscan predictions. Arguments: iFileHandle -- Input file or filename. Return values: data -- Annotation data (a list of lists, each list in one gene) proteins -- Predicted proteins (a list of tuples (header, sequence)) meta -- Meta-data in first 8 lines of genscan output """ iFile = smartopen(iFileHandle) data = {} proteins = [] meta = [] startPredState = '----- ---- - ------ ------ ---- -- -- ---- ---- ----- ----- ------' endPredState = 'Predicted peptide sequence(s):' skipState = 'Slice no. ' metaState = 'GENSCAN 1.0' state = None for line in iFile: line = line.strip() if metaState in line: state = 'meta' if line == startPredState: state = 'pred' elif line == 'NO EXONS/GENES PREDICTED IN SEQUENCE': state = 'fail' elif line == endPredState: state = 'prot' elif skipState in line: state = 'skip' else: if state == 'meta': if line: meta.append(line) elif state == 'pred': if line: tokens = line.split() d = Predicted(tokens) gene = int(d.gene_exon.split('.')[0]) try: data[gene].append(d) except KeyError: data[gene] = [d] elif state == 'prot': break elif state == 'fail': return [], [], '' if state == 'prot': proteins = fasta.load_mfa(iFile) data = data.items() data.sort() data = [x[1] for x in data] return data, proteins, meta
}[ioDir] writer = fasta.MfaWriter('peptides.fa') annotFile = open('annot.txt', 'w') annotFilenames = glob.glob('gsAnnotations/*.txt') for annotFilename in annotFilenames: name = extractRootName(annotFilename) print '>>>', name + '\n' annotFilename = 'gsAnnotations/%s.txt' % name annotation = open(annotFilename).readlines() predictions = parseGenscan(annotation) pepFilename = 'gsPeptides/%s.fa' % name peptides = fasta.load_mfa(pepFilename) peptides = [(h.split()[0], s) for h, s in peptides] peptides = dict(peptides) print 'Lengths' for h in peptides: print h, len(peptides[h]) print i = 1 for prediction in predictions.values(): for exon in prediction: print exon print isCoding, warnings, errors = testAnnotation(prediction)