def main(): parser = argparse.ArgumentParser( description='Adds gene features for RNAs which lack them') ## output file to be written parser.add_argument('-i', '--input', type=str, required=True, help='Path to the input GFF3 file') parser.add_argument('-o', '--output', type=str, required=True, help='Output GFF3 file to write') args = parser.parse_args() infile = open(args.input) ofh = open(args.output, 'wt') for line in infile: if line.startswith('#'): ofh.write(line) continue line = line.rstrip() cols = line.split("\t") if len(cols) != 9: ofh.write("{0}\n".format(line)) continue id = gff.column_9_value(cols[8], 'ID') parent = gff.column_9_value(cols[8], 'Parent') if cols[2].endswith('RNA') and parent is None: gene_cols = list(cols) gene_cols[2] = 'gene' gene_cols[8] = gff.set_column_9_value(gene_cols[8], 'ID', "{0}.gene".format(id)) ofh.write("{0}\n".format("\t".join(gene_cols))) cols[8] = gff.set_column_9_value(cols[8], 'Parent', "{0}.gene".format(id)) ofh.write("{0}\n".format("\t".join(cols))) else: ofh.write("{0}\n".format(line))
def main(): parser = argparse.ArgumentParser( description= 'Updates exon Parent attributes to point at the correct RNA feature') ## output file to be written parser.add_argument('-i', '--input', type=str, required=True, help='Path to the input GFF3 file') parser.add_argument('-o', '--output', type=str, required=True, help='Output GFF3 file to write') args = parser.parse_args() infile = open(args.input) ofh = open(args.output, 'wt') last_rna_id = None for line in infile: if line.startswith('#'): ofh.write(line) continue line = line.rstrip() cols = line.split("\t") if len(cols) != 9: ofh.write("{0}\n".format(line)) continue id = gff.column_9_value(cols[8], 'ID') parent = gff.column_9_value(cols[8], 'Parent') if cols[2].endswith('RNA'): last_rna_id = id ofh.write("{0}\n".format(line)) elif cols[2] == 'exon': if parent != last_rna_id: print( "INFO: correcting unexpected parentage for feature ({0}) type {2}. Expected ({1})" .format(id, last_rna_id, cols[2])) cols[8] = gff.set_column_9_value(cols[8], 'Parent', last_rna_id) ofh.write("{0}\n".format("\t".join(cols))) else: ofh.write("{0}\n".format(line)) else: ofh.write("{0}\n".format(line))
def main(): parser = argparse.ArgumentParser( description='Adds gene features for RNAs which lack them') ## output file to be written parser.add_argument('-i', '--input', type=str, required=True, help='Path to the input GFF3 file' ) parser.add_argument('-o', '--output', type=str, required=True, help='Output GFF3 file to write' ) args = parser.parse_args() infile = open(args.input) ofh = open(args.output, 'wt') for line in infile: if line.startswith('#'): ofh.write(line) continue line = line.rstrip() cols = line.split("\t") if len(cols) != 9: ofh.write("{0}\n".format(line) ) continue id = gff.column_9_value(cols[8], 'ID') parent = gff.column_9_value(cols[8], 'Parent') if cols[2].endswith('RNA') and parent is None: gene_cols = list(cols) gene_cols[2] = 'gene' gene_cols[8] = gff.set_column_9_value(gene_cols[8], 'ID', "{0}.gene".format(id)) ofh.write("{0}\n".format("\t".join(gene_cols)) ) cols[8] = gff.set_column_9_value(cols[8], 'Parent', "{0}.gene".format(id)) ofh.write("{0}\n".format("\t".join(cols)) ) else: ofh.write("{0}\n".format(line) )
def main(): parser = argparse.ArgumentParser( description='Updates exon Parent attributes to point at the correct RNA feature') ## output file to be written parser.add_argument('-i', '--input', type=str, required=True, help='Path to the input GFF3 file' ) parser.add_argument('-o', '--output', type=str, required=True, help='Output GFF3 file to write' ) args = parser.parse_args() infile = open(args.input) ofh = open(args.output, 'wt') last_rna_id = None for line in infile: if line.startswith('#'): ofh.write(line) continue line = line.rstrip() cols = line.split("\t") if len(cols) != 9: ofh.write("{0}\n".format(line) ) continue id = gff.column_9_value(cols[8], 'ID') parent = gff.column_9_value(cols[8], 'Parent') if cols[2].endswith('RNA'): last_rna_id = id ofh.write("{0}\n".format(line) ) elif cols[2] == 'exon': if parent != last_rna_id: print("INFO: correcting unexpected parentage for feature ({0}) type {2}. Expected ({1})".format(id, last_rna_id, cols[2]) ) cols[8] = gff.set_column_9_value(cols[8], 'Parent', last_rna_id) ofh.write("{0}\n".format("\t".join(cols)) ) else: ofh.write("{0}\n".format(line) ) else: ofh.write("{0}\n".format(line) )
def main(): parser = argparse.ArgumentParser( description='Adds locus tag identifiers to GFF3 features') ## output file to be written parser.add_argument('-i', '--input_file', type=str, required=True, help='TA file of source molecules') parser.add_argument('-o', '--output_file', type=str, required=False, help='Optional output file path (else STDOUT)') parser.add_argument('-p', '--prefix', type=str, required=True, help='The prefix portion of IDs to be generated') parser.add_argument( '-a', '--padding', type=int, required=True, help= 'Specify the minimum with to reserve for the numeric portion of the IDs. Smaller numbers will be zero-padded.' ) parser.add_argument('-n', '--interval', type=int, required=False, default=1, help='Interval between generated identifiers') parser.add_argument( '-s', '--starting_id', type=int, required=False, default=0, help='Initial numeric portion of IDs to be generated (do not zero-pad)' ) parser.add_argument( '-d', '--id_file', type=str, required=False, help= 'Pass a 2-column file of IDs to retain (in case you have mapped genes, for example)' ) parser.add_argument( '-m', '--molecule_map', type=str, required=False, help= 'Pass a 2-column file of molecule->token identifiers (see documentation)' ) parser.add_argument( '-c', '--custom', type=str, required=False, help='For custom parsing steps. Most should ignore this.') args = parser.parse_args() check_arguments(args) # used to store locus_tags associated with each gene (so children can inherit) gene_loci = dict() next_id = args.starting_id last_molecule = None id_mapping = parse_mapping_file(args.id_file) mol_mapping = parse_mapping_file(args.molecule_map) loci_assigned = list() ## if using Joana's custom options, check assumptions if args.custom == 'joana': if args.molecule_map is None or args.id_file is None: raise Exception( "ERROR: Expected --molecule_map and --id_file options when using --custom=joana" ) else: ## need to process the ID map to reformat IDs for id in id_mapping: # TP05_0002 -> TpMuguga_05g00002 m = re.match('TP(\d\d)_(\d+)', id_mapping[id]) if m: id_mapping[id] = "{0}_{1}g0{2}".format( args.prefix, m.group(1), m.group(2)) elif args.custom == 'bmicroti': microti_map = {'I': '01', 'II': '02', 'III': '03', 'IV': '04'} if args.molecule_map is None or args.id_file is None: raise Exception( "ERROR: Expected --molecule_map and --id_file options when using --custom=bmicroti" ) else: for id in id_mapping: m = re.match('BBM_(\D+)(\d+)', id_mapping[id]) if m: print("Changing id from {0} to ".format(id)) id_mapping[id] = "{0}_{1}g{2}".format( args.prefix, microti_map[m.group(1)], m.group(2)) print(id_mapping[id]) else: raise Exception( "ERROR: id ({0}) didn't match expected convention.". format(id_mapping[id])) ## output will either be a file or STDOUT fout = sys.stdout if args.output_file is not None: fout = open(args.output_file, 'wt') last_number_portion_assigned = 0 for line in open(args.input_file): line = line.rstrip() cols = line.split("\t") if len(cols) != 9: fout.write(line + "\n") continue if last_molecule is None or ( args.molecule_map is not None and mol_mapping[cols[0]] != mol_mapping[last_molecule]): print("Found molecule {0}, resetting id counter from {1}".format( cols[0], next_id)) next_id = args.starting_id last_molecule = cols[0] # grab the ID column if any id = gff.column_9_value(cols[8], 'ID') parent = gff.column_9_value(cols[8], 'Parent') type = cols[2] # issue # 66F4EEF2E3C863C251F831817FF71233 # 7F1917E4D81A959078C9A38E15488BC0 # E22888670919A4A888572155F40F2654 # B9D9CF1F7A8E5A2E1124F0A6C68840DC -> BBM_I00232 # gene before is: 6DE6BCCE69CCDC39994A0940B2ED524A - novel # errors on: BmicrotiR1_01g00233 -> BBM_I00233 #5800A4110A62E4EAE57AFAD1F8D65CB3 BBM_I00233 if type == 'gene': while True: if id in id_mapping: locus_id = id_mapping[id] else: if args.molecule_map is None: locus_id = "{0}_{1}".format( args.prefix, str(next_id).zfill(args.padding)) else: if cols[0] in mol_mapping: if args.custom == 'bmicroti': locus_id = "{0}_{2}g{1}".format( args.prefix, str(int(last_number_portion_assigned) + 1).zfill(args.padding), mol_mapping[cols[0]]) else: locus_id = "{0}_{2}g{1}".format( args.prefix, str(next_id).zfill(args.padding), mol_mapping[cols[0]]) else: raise Exception( "ERROR: --molecule_map passed but {0} wasn't found in it." .format(cols[0])) next_id += args.interval cols[8] = gff.set_column_9_value(cols[8], 'locus_tag', locus_id) ## make sure this wasn't generated already (possibly conflict between --id_file and an # auto-generated ID? if locus_id not in loci_assigned: break else: print("DEBUG: Duplicate ID assigned ({0}), trying again.". format(locus_id)) loci_assigned.append(locus_id) gene_loci[id] = locus_id m = re.search(r"(\d+)$", locus_id) if m: last_number_portion_assigned = m.group(1) elif type.endswith('RNA'): if parent in gene_loci: cols[8] = gff.set_column_9_value(cols[8], 'locus_tag', gene_loci[parent]) else: raise Exception( "ERROR: found RNA {0} whose parent {1} wasn't found yet". format(id, parent)) fout.write("\t".join(cols) + "\n")
def main(): parser = argparse.ArgumentParser( description='Converts glimmerHMM GFF output to GFF3') # output file to be written parser.add_argument('-i', '--input_file', type=str, required=True, help='Path to an input file to parse') parser.add_argument('-o', '--output_file', type=str, required=True, help='Path to an output file to be created') args = parser.parse_args() fout = open(args.output_file, 'w') current_gene = None current_mRNA = None next_exon_num = defaultdict(int) for line in open(args.input_file, 'r'): if line.startswith('#'): fout.write(line) continue line = line.rstrip() cols = line.split("\t") if len(cols) != 9: continue mol_id = cols[0] feat_type = cols[2] feat_fmin = int(cols[3]) - 1 feat_fmax = int(cols[4]) id = gff.column_9_value(cols[8], 'ID') parent = gff.column_9_value(cols[8], 'Parent') if feat_type == 'mRNA': gene_cols = list(cols) gene_cols[2] = 'gene' cols[8] = gff.set_column_9_value(cols[8], 'ID', "{0}.mRNA".format(id)) cols[8] = gff.set_column_9_value(cols[8], 'Name', "{0}.mRNA".format(id)) cols[8] = gff.order_column_9(cols[8]) # print the gene and mRNA fout.write("{0}\n".format("\t".join(gene_cols))) fout.write("{0}\n".format("\t".join(cols))) elif feat_type == 'CDS': exon_cols = list(cols) cols[8] = gff.set_column_9_value(cols[8], 'ID', "{0}.cds".format(parent)) cols[8] = gff.set_column_9_value(cols[8], 'Name', "{0}.cds".format(parent)) cols[8] = gff.set_column_9_value(cols[8], 'Parent', "{0}.mRNA".format(parent)) cols[8] = gff.order_column_9(cols[8]) exon_id = "{0}.exon.{1}".format(parent, next_exon_num[parent]) next_exon_num[parent] += 1 exon_cols[2] = 'exon' exon_cols[7] = '.' exon_cols[8] = gff.set_column_9_value(exon_cols[8], 'ID', exon_id) exon_cols[8] = gff.set_column_9_value(exon_cols[8], 'Name', exon_id) exon_cols[8] = gff.set_column_9_value(exon_cols[8], 'Parent', "{0}.mRNA".format(parent)) exon_cols[8] = gff.order_column_9(exon_cols[8]) fout.write("{0}\n".format("\t".join(exon_cols))) fout.write("{0}\n".format("\t".join(cols)))
def main(): parser = argparse.ArgumentParser( description='Adds locus tag identifiers to GFF3 features') ## output file to be written parser.add_argument('-i', '--input_file', type=str, required=True, help='TA file of source molecules' ) parser.add_argument('-o', '--output_file', type=str, required=False, help='Optional output file path (else STDOUT)' ) parser.add_argument('-p', '--prefix', type=str, required=True, help='The prefix portion of IDs to be generated') parser.add_argument('-a', '--padding', type=int, required=True, help='Specify the minimum with to reserve for the numeric portion of the IDs. Smaller numbers will be zero-padded.' ) parser.add_argument('-n', '--interval', type=int, required=False, default=1, help='Interval between generated identifiers' ) parser.add_argument('-s', '--starting_id', type=int, required=False, default=0, help='Initial numeric portion of IDs to be generated (do not zero-pad)' ) parser.add_argument('-d', '--id_file', type=str, required=False, help='Pass a 2-column file of IDs to retain (in case you have mapped genes, for example)') parser.add_argument('-m', '--molecule_map', type=str, required=False, help='Pass a 2-column file of molecule->token identifiers (see documentation)') parser.add_argument('-c', '--custom', type=str, required=False, help='For custom parsing steps. Most should ignore this.') args = parser.parse_args() check_arguments(args) # used to store locus_tags associated with each gene (so children can inherit) gene_loci = dict() next_id = args.starting_id last_molecule = None id_mapping = parse_mapping_file( args.id_file ) mol_mapping = parse_mapping_file( args.molecule_map ) loci_assigned = list() ## if using Joana's custom options, check assumptions if args.custom == 'joana': if args.molecule_map is None or args.id_file is None: raise Exception("ERROR: Expected --molecule_map and --id_file options when using --custom=joana") else: ## need to process the ID map to reformat IDs for id in id_mapping: # TP05_0002 -> TpMuguga_05g00002 m = re.match('TP(\d\d)_(\d+)', id_mapping[id]) if m: id_mapping[id] = "{0}_{1}g0{2}".format(args.prefix, m.group(1), m.group(2) ) elif args.custom == 'bmicroti': microti_map = { 'I':'01', 'II':'02', 'III':'03', 'IV':'04' } if args.molecule_map is None or args.id_file is None: raise Exception("ERROR: Expected --molecule_map and --id_file options when using --custom=bmicroti") else: for id in id_mapping: m = re.match('BBM_(\D+)(\d+)', id_mapping[id]) if m: print("Changing id from {0} to ".format(id)) id_mapping[id] = "{0}_{1}g{2}".format(args.prefix, microti_map[m.group(1)], m.group(2) ) print(id_mapping[id]) else: raise Exception("ERROR: id ({0}) didn't match expected convention.".format(id_mapping[id])) ## output will either be a file or STDOUT fout = sys.stdout if args.output_file is not None: fout = open(args.output_file, 'wt') last_number_portion_assigned = 0 for line in open(args.input_file): line = line.rstrip() cols = line.split("\t") if len(cols) != 9: fout.write(line + "\n") continue if last_molecule is None or (args.molecule_map is not None and mol_mapping[cols[0]] != mol_mapping[last_molecule]): print("Found molecule {0}, resetting id counter from {1}".format(cols[0], next_id) ) next_id = args.starting_id last_molecule = cols[0] # grab the ID column if any id = gff.column_9_value(cols[8], 'ID') parent = gff.column_9_value(cols[8], 'Parent') type = cols[2] if type == 'gene': while True: if id in id_mapping: locus_id = id_mapping[id] else: if args.molecule_map is None: locus_id = "{0}_{1}".format(args.prefix, str(next_id).zfill(args.padding)) else: if cols[0] in mol_mapping: if args.custom == 'bmicroti': locus_id = "{0}_{2}g{1}".format(args.prefix, str(int(last_number_portion_assigned) + 1).zfill(args.padding), mol_mapping[cols[0]]) else: locus_id = "{0}_{2}g{1}".format(args.prefix, str(next_id).zfill(args.padding), mol_mapping[cols[0]]) else: raise Exception("ERROR: --molecule_map passed but {0} wasn't found in it.".format(cols[0]) ) next_id += args.interval cols[8] = gff.set_column_9_value(cols[8], 'locus_tag', locus_id) ## make sure this wasn't generated already (possibly conflict between --id_file and an # auto-generated ID? if locus_id not in loci_assigned: break else: print("DEBUG: Duplicate ID assigned ({0}), trying again.".format(locus_id) ) loci_assigned.append(locus_id) gene_loci[id] = locus_id m = re.search(r"(\d+)$", locus_id) if m: last_number_portion_assigned = m.group(1) elif type.endswith('RNA'): if parent in gene_loci: cols[8] = gff.set_column_9_value(cols[8], 'locus_tag', gene_loci[parent]) else: raise Exception("ERROR: found RNA {0} whose parent {1} wasn't found yet".format(id, parent)) fout.write("\t".join(cols) + "\n")
def main(): parser = argparse.ArgumentParser( description='Converts glimmerHMM GFF output to GFF3') # output file to be written parser.add_argument('-i', '--input_file', type=str, required=True, help='Path to an input file to parse' ) parser.add_argument('-o', '--output_file', type=str, required=True, help='Path to an output file to be created' ) args = parser.parse_args() fout = open(args.output_file, 'w') current_gene = None current_mRNA = None next_exon_num = defaultdict(int) for line in open(args.input_file, 'r'): if line.startswith('#'): fout.write(line) continue line = line.rstrip() cols = line.split("\t") if len(cols) != 9: continue mol_id = cols[0] feat_type = cols[2] feat_fmin = int(cols[3]) - 1 feat_fmax = int(cols[4]) id = gff.column_9_value(cols[8], 'ID') parent = gff.column_9_value(cols[8], 'Parent') if feat_type == 'mRNA': gene_cols = list(cols) gene_cols[2] = 'gene' cols[8] = gff.set_column_9_value(cols[8], 'ID', "{0}.mRNA".format(id)) cols[8] = gff.set_column_9_value(cols[8], 'Name', "{0}.mRNA".format(id)) cols[8] = gff.order_column_9(cols[8]) # print the gene and mRNA fout.write( "{0}\n".format("\t".join(gene_cols)) ) fout.write( "{0}\n".format("\t".join(cols)) ) elif feat_type == 'CDS': exon_cols = list(cols) cols[8] = gff.set_column_9_value(cols[8], 'ID', "{0}.cds".format(parent)) cols[8] = gff.set_column_9_value(cols[8], 'Name', "{0}.cds".format(parent)) cols[8] = gff.set_column_9_value(cols[8], 'Parent', "{0}.mRNA".format(parent)) cols[8] = gff.order_column_9(cols[8]) exon_id = "{0}.exon.{1}".format(parent, next_exon_num[parent] ) next_exon_num[parent] += 1 exon_cols[2] = 'exon' exon_cols[7] = '.' exon_cols[8] = gff.set_column_9_value(exon_cols[8], 'ID', exon_id) exon_cols[8] = gff.set_column_9_value(exon_cols[8], 'Name', exon_id) exon_cols[8] = gff.set_column_9_value(exon_cols[8], 'Parent', "{0}.mRNA".format(parent)) exon_cols[8] = gff.order_column_9(exon_cols[8]) fout.write( "{0}\n".format("\t".join(exon_cols)) ) fout.write( "{0}\n".format("\t".join(cols)) )
def main(): parser = argparse.ArgumentParser(description="Adds locus tag identifiers to GFF3 features") ## output file to be written parser.add_argument("-i", "--input_file", type=str, required=True, help="TA file of source molecules") parser.add_argument("-o", "--output_file", type=str, required=False, help="Optional output file path (else STDOUT)") parser.add_argument("-p", "--prefix", type=str, required=True, help="The prefix portion of IDs to be generated") parser.add_argument( "-a", "--padding", type=int, required=True, help="Specify the minimum with to reserve for the numeric portion of the IDs. Smaller numbers will be zero-padded.", ) parser.add_argument( "-n", "--interval", type=int, required=False, default=1, help="Interval between generated identifiers" ) parser.add_argument( "-s", "--starting_id", type=int, required=False, default=0, help="Initial numeric portion of IDs to be generated (do not zero-pad)", ) parser.add_argument( "-d", "--id_file", type=str, required=False, help="Pass a 2-column file of IDs to retain (in case you have mapped genes, for example)", ) parser.add_argument( "-m", "--molecule_map", type=str, required=False, help="Pass a 2-column file of molecule->token identifiers (see documentation)", ) parser.add_argument( "-c", "--custom", type=str, required=False, help="For custom parsing steps. Most should ignore this." ) args = parser.parse_args() check_arguments(args) # used to store locus_tags associated with each gene (so children can inherit) gene_loci = dict() next_id = args.starting_id last_molecule = None id_mapping = parse_mapping_file(args.id_file) mol_mapping = parse_mapping_file(args.molecule_map) loci_assigned = list() ## if using Joana's custom options, check assumptions if args.custom == "joana": if args.molecule_map is None or args.id_file is None: raise Exception("ERROR: Expected --molecule_map and --id_file options when using --custom=joana") else: ## need to process the ID map to reformat IDs for id in id_mapping: # TP05_0002 -> TpMuguga_05g00002 m = re.match("TP(\d\d)_(\d+)", id_mapping[id]) if m: id_mapping[id] = "{0}_{1}g0{2}".format(args.prefix, m.group(1), m.group(2)) elif args.custom == "bmicroti": microti_map = {"I": "01", "II": "02", "III": "03", "IV": "04"} if args.molecule_map is None or args.id_file is None: raise Exception("ERROR: Expected --molecule_map and --id_file options when using --custom=bmicroti") else: for id in id_mapping: m = re.match("BBM_(\D+)(\d+)", id_mapping[id]) if m: print("Changing id from {0} to ".format(id)) id_mapping[id] = "{0}_{1}g{2}".format(args.prefix, microti_map[m.group(1)], m.group(2)) print(id_mapping[id]) else: raise Exception("ERROR: id ({0}) didn't match expected convention.".format(id_mapping[id])) ## output will either be a file or STDOUT fout = sys.stdout if args.output_file is not None: fout = open(args.output_file, "wt") last_number_portion_assigned = 0 for line in open(args.input_file): line = line.rstrip() cols = line.split("\t") if len(cols) != 9: fout.write(line + "\n") continue if last_molecule is None or ( args.molecule_map is not None and mol_mapping[cols[0]] != mol_mapping[last_molecule] ): print("Found molecule {0}, resetting id counter from {1}".format(cols[0], next_id)) next_id = args.starting_id last_molecule = cols[0] # grab the ID column if any id = gff.column_9_value(cols[8], "ID") parent = gff.column_9_value(cols[8], "Parent") type = cols[2] # issue # 66F4EEF2E3C863C251F831817FF71233 # 7F1917E4D81A959078C9A38E15488BC0 # E22888670919A4A888572155F40F2654 # B9D9CF1F7A8E5A2E1124F0A6C68840DC -> BBM_I00232 # gene before is: 6DE6BCCE69CCDC39994A0940B2ED524A - novel # errors on: BmicrotiR1_01g00233 -> BBM_I00233 # 5800A4110A62E4EAE57AFAD1F8D65CB3 BBM_I00233 if type == "gene": while True: if id in id_mapping: locus_id = id_mapping[id] else: if args.molecule_map is None: locus_id = "{0}_{1}".format(args.prefix, str(next_id).zfill(args.padding)) else: if cols[0] in mol_mapping: if args.custom == "bmicroti": locus_id = "{0}_{2}g{1}".format( args.prefix, str(int(last_number_portion_assigned) + 1).zfill(args.padding), mol_mapping[cols[0]], ) else: locus_id = "{0}_{2}g{1}".format( args.prefix, str(next_id).zfill(args.padding), mol_mapping[cols[0]] ) else: raise Exception("ERROR: --molecule_map passed but {0} wasn't found in it.".format(cols[0])) next_id += args.interval cols[8] = gff.set_column_9_value(cols[8], "locus_tag", locus_id) ## make sure this wasn't generated already (possibly conflict between --id_file and an # auto-generated ID? if locus_id not in loci_assigned: break else: print("DEBUG: Duplicate ID assigned ({0}), trying again.".format(locus_id)) loci_assigned.append(locus_id) gene_loci[id] = locus_id m = re.search(r"(\d+)$", locus_id) if m: last_number_portion_assigned = m.group(1) elif type.endswith("RNA"): if parent in gene_loci: cols[8] = gff.set_column_9_value(cols[8], "locus_tag", gene_loci[parent]) else: raise Exception("ERROR: found RNA {0} whose parent {1} wasn't found yet".format(id, parent)) fout.write("\t".join(cols) + "\n")