def main(): gff_to_rename, gff_with_perfect_names, output_gff = sys.argv[1:] perfect_names = io.read_tsv(gff_with_perfect_names) strain = '15V' # add parser! to_rename = io.read_tsv(gff_to_rename) new_genes = [] new_gene_count = 0 with open(output_gff, 'w') as out: for line_to_rename in to_rename: found = False for perfect_line in perfect_names: # the same contig, the same start or end if line_to_rename[0] == perfect_line[0] \ and (line_to_rename[3] == perfect_line[3] or line_to_rename[4] == perfect_line[4]): #change the next line to something more flexible out.write('\t'.join(line_to_rename[:8]) + '\t' + perfect_line[8] + '_15V-P4' + '\n') found = True if not found: out.write('\t'.join(line_to_rename[:8]) + '\t' + 'ID=new_gene_' + str(new_gene_count) + '\n') if line_to_rename[2] == 'gene' and 'trna' not in line_to_rename[ 8]: #we don't need 'new' tRNA genes print('\t'.join(line_to_rename[:8]) + '\t' + 'ID=new_gene_' + str(new_gene_count)) new_genes.append(line_to_rename) new_gene_count += 1
def main(): gff_to_rename, gff_with_perfect_names, output_gff = sys.argv[1:] perfect_names = io.read_tsv(gff_with_perfect_names) strain = '15V' # add parser! to_rename = io.read_tsv(gff_to_rename) new_genes = [] new_gene_count = 0 with open(output_gff, 'w') as out: for line_to_rename in to_rename: found = False for perfect_line in perfect_names: # the same contig, the same start or end if line_to_rename[0] == perfect_line[0] \ and (line_to_rename[3] == perfect_line[3] or line_to_rename[4] == perfect_line[4]): #change the next line to something more flexible out.write('\t'.join(line_to_rename[:8]) + '\t' + perfect_line[8] + '_15V-P4' + '\n') found = True if not found: out.write('\t'.join(line_to_rename[:8]) + '\t' + 'ID=new_gene_' + str(new_gene_count) + '\n') if line_to_rename[2] == 'gene' and 'trna' not in line_to_rename[8]: #we don't need 'new' tRNA genes print('\t'.join(line_to_rename[:8]) + '\t' + 'ID=new_gene_' + str(new_gene_count)) new_genes.append(line_to_rename) new_gene_count += 1
def main(): """ Inputs: input file, output file, conversion function, other arguments """ parser = argparse.ArgumentParser() required = parser.add_argument_group('required arguments') required.add_argument("-f", "--function", help="function name " "(currently implemented functions: " "\n gtf_to_gff3, gff_to_bed, bed_to_gff3)", required=True) required.add_argument("-i", "--input_filename", help="input file to work with", required=True) required.add_argument("-o", "--output_filename", help="output file to write to", required=True) parser.add_argument("--source", default="bed2gff", help="source information in gtf " "(program that generated this file," "database or project name)") parser.add_argument("--cds_only", type=bool, default=False, help="generate only CDS lines" "(default is FALSE)") args = parser.parse_args() input_list_of_lists = io.read_tsv(args.input_filename) if args.function == 'gtf_to_gff3': gtf_to_gff3(input_list_of_lists, args.output_filename, args.cds_only) elif args.function == "gff_to_bed": gff_to_bed(input_list_of_lists, args.output_filename) elif args.function == "bed_to_gff3": bed_to_gff3(input_list_of_lists, args.output_filename, args.source) else: print(args.function, ': This function is not implemented') print("Currently implemented functions: gtf_to_gff3, gff_to_bed, bed_to_gff3")
def main(): input_name, output_name = sys.argv[1:] features = read_tsv(input_name) strange_contigs = check_for_peculiarities(features) with open(output_name, 'w') as out: out.write('\n'.join(strange_contigs))
def main(): if len(sys.argv) < 3: print('Usage: \n' 'compare_vcf.py <reference_filename> <mutant_filename> <output_filename>') sys.exit(2) reference_filename, mutant_filename = sys.argv[1:3] output_filename = sys.stdout if len(sys.argv) == 4: output_filename = sys.argv[3] reference = read_tsv(reference_filename) mutant = read_tsv(mutant_filename) result = compare(reference, mutant) write_tsv(result, output_filename)
def main(): """ Inputs: input file, output file, conversion function, other arguments """ parser = argparse.ArgumentParser() required = parser.add_argument_group('required arguments') required.add_argument("-f", "--function", help="function name " "(currently implemented functions: " "\n gtf_to_gff3, gff_to_bed, bed_to_gff3)", required=True) required.add_argument("-i", "--input_filename", help="input file to work with", required=True) required.add_argument("-o", "--output_filename", help="output file to write to", required=True) parser.add_argument("--source", default="bed2gff", help="source information in gtf " "(program that generated this file," "database or project name)") parser.add_argument("--cds_only", type=bool, default=False, help="generate only CDS lines" "(default is FALSE)") args = parser.parse_args() input_list_of_lists = io.read_tsv(args.input_filename) if args.function == 'gtf_to_gff3': gtf_to_gff3(input_list_of_lists, args.output_filename, args.cds_only) elif args.function == "gff_to_bed": gff_to_bed(input_list_of_lists, args.output_filename) elif args.function == "bed_to_gff3": bed_to_gff3(input_list_of_lists, args.output_filename, args.source) else: print(args.function, ': This function is not implemented') print( "Currently implemented functions: gtf_to_gff3, gff_to_bed, bed_to_gff3" )
def main(): gff_to_rename, table_for_rename, output_gff = sys.argv[1:] table = io.read_tsv(table_for_rename) strain = '' # add parser! to_rename = io.read_tsv(gff_to_rename) new_genes = [] new_gene_count = 0 with open(output_gff, 'w') as out: for line_to_rename in to_rename: if line_to_rename[2] == 'mRNA': found = False for cluster in table: #exchange the next two lines if necessary (depends on the order in the table) #maker_name = cluster[3][:-27] #unique part of the name #maker_name = cluster[3][:-7] #unique part of the name if using augustus maker_name = cluster[ 4] # [:7] is not truly unique because there might be '01' and '013' #print(maker_name) proper_name = cluster[3] if maker_name in line_to_rename[8]: out.write('\t'.join(line_to_rename[:8]) + '\t' + 'ID=' + proper_name + '\n') # '_' + strain #print('yeap') found = True # continue if not found: out.write('\t'.join(line_to_rename[:8]) + '\t' + 'ID=new_gene_' + str(new_gene_count) + '\n') if line_to_rename[ 2] == 'mRNA' and 'trna' not in line_to_rename[ 8]: # we don't need 'new' tRNA genes print('\t'.join(line_to_rename[:8]) + '\t' + 'ID=new_gene_' + str(new_gene_count)) new_genes.append(line_to_rename) new_gene_count += 1
def main(): """ Inputs: input file, output file name """ parser = argparse.ArgumentParser() parser.add_argument("-i", "--input_filename", help="input file to work with", required=True) parser.add_argument("-o", "--out", default="corrected.gff", help="output gff / bed file") args = parser.parse_args() input_list_of_lists = io.read_tsv(args.input_filename) filetype = os.path.splitext(args.input_filename)[1] if filetype not in [".bed", ".gff"]: print("Unknown file extension. Please make sure you have a .bed or .gff file") sys.exit("Unknown file type") correct_coord(input_list_of_lists, filetype, args.out)