def __main__(): if len(sys.argv) >= 4: glimmerfile = open(sys.argv [1], "r") sequence = open(sys.argv[2]) orf2seq = open(sys.argv [3], "w") else: print "Missing input values." sys.exit() fastafile = Bio.SeqIO.parse(sequence, "fasta") sequences = {} for entry in fastafile: sequences[entry.description] = entry for line in glimmerfile: if line.startswith('>'): print line[1:].strip() entry = sequences[ line[1:].strip() ] else: orf_start = int(line[8:17]) orf_end = int(line[18:26]) orf_name = line[0:8] if orf_start <= orf_end: new_line = record(entry.seq[orf_start-1 : orf_end], id = orf_name, description = entry.description).format("fasta") + "\n" else: new_line = record(entry.seq[orf_end-1 : orf_start].reverse_complement(), id = orf_name, description = entry.description).format("fasta") + "\n" orf2seq.write(new_line) orf2seq.close() glimmerfile.close()
def glimmer2sequence(sequence_path, glimmer_path, output_path, to_protein=False, translation_table=11): fastafile = Bio.SeqIO.parse(open(sequence_path), "fasta") glimmerfile = open(glimmer_path, "r") orf2seq = open(output_path, "w") sequences = {} for entry in fastafile: sequences[entry.description] = entry for line in glimmerfile: if line.startswith('>'): entry = sequences[line[1:].strip()] else: columns = line.strip('\t').split() try: orf_start = int(columns[1]) orf_end = int(columns[2]) except: sys.stderr.write( "Error: Failed to convert %s or %s to an integer. Is the input really a glimmer prediction file?\n" % (columns[1], columns[2])) continue orf_name = columns[0] if orf_start <= orf_end: sequence = entry.seq[orf_start - 1:orf_end] if to_protein: sequence = sequence.translate(to_stop=True, table=translation_table) new_line = record( sequence, id=orf_name, description=entry.description).format("fasta") + "\n" else: sequence = entry.seq[orf_end - 1:orf_start].reverse_complement() if to_protein: sequence = sequence.translate(to_stop=True, table=translation_table) new_line = record( sequence, id=orf_name, description=entry.description).format("fasta") + "\n" orf2seq.write(new_line) orf2seq.close() glimmerfile.close()
def glimmer2sequence(sequence_path, glimmer_path, output_path, to_protein = False, translation_table = 11): fastafile = Bio.SeqIO.parse(open(sequence_path), "fasta") glimmerfile = open(glimmer_path, "r") orf2seq = open(output_path, "w") sequences = {} for entry in fastafile: sequences[entry.description] = entry for line in glimmerfile: if line.startswith('>'): entry = sequences[ line[1:].strip() ] else: columns = line.strip('\t').split() try: orf_start = int(columns[1]) orf_end = int(columns[2]) except: sys.stderr.write("Error: Failed to convert %s or %s to an integer. Is the input really a glimmer prediction file?\n" % (columns[1], columns[2])) continue orf_name = columns[0] if orf_start <= orf_end: sequence = entry.seq[orf_start-1 : orf_end] if to_protein: sequence = sequence.translate(to_stop=True, table = translation_table) new_line = record(sequence, id = orf_name, description = entry.description).format("fasta") + "\n" else: sequence = entry.seq[orf_end-1 : orf_start].reverse_complement() if to_protein: sequence = sequence.translate(to_stop=True, table = translation_table) new_line = record(sequence, id = orf_name, description = entry.description).format("fasta") + "\n" orf2seq.write(new_line) orf2seq.close() glimmerfile.close()