Пример #1
0
def main():
    gff_to_rename, gff_with_perfect_names, output_gff = sys.argv[1:]
    perfect_names = io.read_tsv(gff_with_perfect_names)
    strain = '15V'  # add parser!
    to_rename = io.read_tsv(gff_to_rename)
    new_genes = []

    new_gene_count = 0
    with open(output_gff, 'w') as out:
        for line_to_rename in to_rename:
            found = False
            for perfect_line in perfect_names:
                # the same contig, the same start or end
                if line_to_rename[0] == perfect_line[0] \
                        and (line_to_rename[3] == perfect_line[3]
                             or line_to_rename[4] == perfect_line[4]):
                    #change the next line to something more flexible
                    out.write('\t'.join(line_to_rename[:8]) + '\t' +
                              perfect_line[8] + '_15V-P4' + '\n')
                    found = True
            if not found:
                out.write('\t'.join(line_to_rename[:8]) + '\t' +
                          'ID=new_gene_' + str(new_gene_count) + '\n')
                if line_to_rename[2] == 'gene' and 'trna' not in line_to_rename[
                        8]:  #we don't need 'new' tRNA genes
                    print('\t'.join(line_to_rename[:8]) + '\t' +
                          'ID=new_gene_' + str(new_gene_count))
                    new_genes.append(line_to_rename)
                    new_gene_count += 1
def main():
    gff_to_rename, gff_with_perfect_names, output_gff = sys.argv[1:]
    perfect_names = io.read_tsv(gff_with_perfect_names)
    strain = '15V' # add parser!
    to_rename = io.read_tsv(gff_to_rename)
    new_genes = []

    new_gene_count = 0
    with open(output_gff, 'w') as out:
        for line_to_rename in to_rename:
            found = False
            for perfect_line in perfect_names:
                # the same contig, the same start or end
                if line_to_rename[0] == perfect_line[0] \
                        and (line_to_rename[3] == perfect_line[3]
                             or line_to_rename[4] == perfect_line[4]):
                    #change the next line to something more flexible
                    out.write('\t'.join(line_to_rename[:8]) + '\t' + perfect_line[8] + '_15V-P4' + '\n')
                    found = True
            if not found:
                out.write('\t'.join(line_to_rename[:8]) + '\t' + 'ID=new_gene_' + str(new_gene_count) + '\n')
                if line_to_rename[2] == 'gene' and 'trna' not in line_to_rename[8]: #we don't need 'new' tRNA genes
                    print('\t'.join(line_to_rename[:8]) + '\t' + 'ID=new_gene_' + str(new_gene_count))
                    new_genes.append(line_to_rename)
                    new_gene_count += 1
Пример #3
0
def main():
    """
    Inputs: input file, output file, conversion function, other arguments
    """

    parser = argparse.ArgumentParser()

    required = parser.add_argument_group('required arguments')
    required.add_argument("-f", "--function", help="function name "
                                                   "(currently implemented functions: "
                                                   "\n gtf_to_gff3, gff_to_bed, bed_to_gff3)", required=True)
    required.add_argument("-i", "--input_filename", help="input file to work with", required=True)
    required.add_argument("-o", "--output_filename", help="output file to write to", required=True)

    parser.add_argument("--source", default="bed2gff", help="source information in gtf "
                                                            "(program that generated this file,"
                                                            "database or project name)")
    parser.add_argument("--cds_only", type=bool, default=False, help="generate only CDS lines"
                                                                     "(default is FALSE)")

    args = parser.parse_args()

    input_list_of_lists = io.read_tsv(args.input_filename)

    if args.function == 'gtf_to_gff3':
        gtf_to_gff3(input_list_of_lists, args.output_filename, args.cds_only)
    elif args.function == "gff_to_bed":
        gff_to_bed(input_list_of_lists, args.output_filename)
    elif args.function == "bed_to_gff3":
        bed_to_gff3(input_list_of_lists, args.output_filename, args.source)
    else:
        print(args.function, ': This function is not implemented')
        print("Currently implemented functions: gtf_to_gff3, gff_to_bed, bed_to_gff3")
Пример #4
0
def main():
    input_name, output_name = sys.argv[1:]

    features = read_tsv(input_name)

    strange_contigs = check_for_peculiarities(features)

    with open(output_name, 'w') as out:
        out.write('\n'.join(strange_contigs))
Пример #5
0
def main():

    if len(sys.argv) < 3:
        print('Usage: \n'
              'compare_vcf.py <reference_filename> <mutant_filename> <output_filename>')
        sys.exit(2)

    reference_filename, mutant_filename = sys.argv[1:3]
    output_filename = sys.stdout

    if len(sys.argv) == 4:
            output_filename = sys.argv[3]


    reference = read_tsv(reference_filename)
    mutant = read_tsv(mutant_filename)

    result = compare(reference, mutant)

    write_tsv(result, output_filename)
Пример #6
0
def main():
    """
    Inputs: input file, output file, conversion function, other arguments
    """

    parser = argparse.ArgumentParser()

    required = parser.add_argument_group('required arguments')
    required.add_argument("-f",
                          "--function",
                          help="function name "
                          "(currently implemented functions: "
                          "\n gtf_to_gff3, gff_to_bed, bed_to_gff3)",
                          required=True)
    required.add_argument("-i",
                          "--input_filename",
                          help="input file to work with",
                          required=True)
    required.add_argument("-o",
                          "--output_filename",
                          help="output file to write to",
                          required=True)

    parser.add_argument("--source",
                        default="bed2gff",
                        help="source information in gtf "
                        "(program that generated this file,"
                        "database or project name)")
    parser.add_argument("--cds_only",
                        type=bool,
                        default=False,
                        help="generate only CDS lines"
                        "(default is FALSE)")

    args = parser.parse_args()

    input_list_of_lists = io.read_tsv(args.input_filename)

    if args.function == 'gtf_to_gff3':
        gtf_to_gff3(input_list_of_lists, args.output_filename, args.cds_only)
    elif args.function == "gff_to_bed":
        gff_to_bed(input_list_of_lists, args.output_filename)
    elif args.function == "bed_to_gff3":
        bed_to_gff3(input_list_of_lists, args.output_filename, args.source)
    else:
        print(args.function, ': This function is not implemented')
        print(
            "Currently implemented functions: gtf_to_gff3, gff_to_bed, bed_to_gff3"
        )
def main():
    gff_to_rename, table_for_rename, output_gff = sys.argv[1:]
    table = io.read_tsv(table_for_rename)
    strain = ''  # add parser!
    to_rename = io.read_tsv(gff_to_rename)
    new_genes = []

    new_gene_count = 0
    with open(output_gff, 'w') as out:
        for line_to_rename in to_rename:
            if line_to_rename[2] == 'mRNA':
                found = False
                for cluster in table:
                    #exchange the next two lines if necessary (depends on the order in the table)
                    #maker_name = cluster[3][:-27] #unique part of the name
                    #maker_name = cluster[3][:-7] #unique part of the name if using augustus
                    maker_name = cluster[
                        4]  # [:7] is not truly unique because there might be '01' and '013'
                    #print(maker_name)
                    proper_name = cluster[3]
                    if maker_name in line_to_rename[8]:
                        out.write('\t'.join(line_to_rename[:8]) + '\t' +
                                  'ID=' + proper_name + '\n')  # '_' + strain
                        #print('yeap')
                        found = True
    #                    continue
                if not found:
                    out.write('\t'.join(line_to_rename[:8]) + '\t' +
                              'ID=new_gene_' + str(new_gene_count) + '\n')
                    if line_to_rename[
                            2] == 'mRNA' and 'trna' not in line_to_rename[
                                8]:  # we don't need 'new' tRNA genes
                        print('\t'.join(line_to_rename[:8]) + '\t' +
                              'ID=new_gene_' + str(new_gene_count))
                        new_genes.append(line_to_rename)
                        new_gene_count += 1
def main():
    """
    Inputs: input file, output file name
    """
    parser = argparse.ArgumentParser()

    parser.add_argument("-i", "--input_filename", help="input file to work with", required=True)
    parser.add_argument("-o", "--out", default="corrected.gff", help="output gff / bed file")

    args = parser.parse_args()

    input_list_of_lists = io.read_tsv(args.input_filename)
    filetype = os.path.splitext(args.input_filename)[1]
    if filetype not in [".bed", ".gff"]:
        print("Unknown file extension. Please make sure you have a .bed or .gff file")
        sys.exit("Unknown file type")

    correct_coord(input_list_of_lists, filetype, args.out)