示例#1
0
def trim_ambiguous_nucleotides (sequence):
	return ambiguous_pattern.sub("",sequence)

def longest_ORF (sequence):
	open_reading_frames = orf_pattern.findall(sequence);
	
	if open_reading_frames:
		#trim M and X
		return max(open_reading_frames,key=len)[1:-1]
	else:
		return ""

if __name__=="__main__":
	parser = argparse.ArgumentParser(__doc__)
	parser.add_argument("input_file",type=str,help="Target fasta file")
	args = parser.parse_args()

	sequences = (seq for seq in split_fasta(args.input_file))

	for seq in sequences:
		data = trim_ambiguous_nucleotides(seq.data.lower())
		header = seq.header
		#keep track of longest ORF for each of 3 forward frames
		peptides = []
		for translation in forward_frame_translate(data):
			peptides.append(longest_ORF(translation))
		print header
		print max(peptides,key=len)
	
示例#2
0
"""
Given a fasta reference, only print requested sequences (search by identifiers)
"""

import argparse
import fastaparse

parser = argparse.ArgumentParser()

parser.add_argument("reference_file",type=str,help="Reference fasta database")
parser.add_argument("id_file",type=str,help="File containing references to sequences")
args = parser.parse_args()

sequences = fastaparse.split_fasta(args.reference_file)

results = list()

for seq_id in open(args.id_file):
	for seq in sequences:
		if seq_id.strip() in seq.header:
			results.append(seq.fasta())

for seq in results:
	print seq