Пример #1
0
def compute_tss_type(args_srna, cdss, genes, wigs_f, wigs_r):
    tsss, num_tss = read_tss(args_srna.tss_file)
    if "TSS_class" not in os.listdir(args_srna.out_folder):
        os.mkdir(os.path.join(args_srna.out_folder, "TSS_class"))
    new_tss_file = os.path.join(args_srna.out_folder, "TSS_class",
                                "_".join([args_srna.prefix, "TSS.gff"]))
    new_tss_fh = open(new_tss_file, "w")
    num_tss = 0
    for tss in tsss:
        tss_type = compare_tss_cds(tss, cdss, genes)

        tss.attributes = tss_type[1]
        tss.attributes["ID"] = "tss" + str(num_tss)
        tss.attribute_string = "".join([tss_type[0], ";ID=tss", str(num_tss)])
        num_tss += 1
    wigs_fm = modify_wigs_for_tss_type(wigs_f, "+")
    wigs_rm = modify_wigs_for_tss_type(wigs_r, "-")
    final_tsss = fix_primary_type(tsss, wigs_fm, wigs_rm)
    for tss in final_tsss:
        tss.attribute_string = ";".join(
            ["=".join(items) for items in tss.attributes.items()])
        new_tss_fh.write("\t".join([
            str(field) for field in [
                tss.seq_id, tss.source, tss.feature, tss.start, tss.end,
                tss.score, tss.strand, tss.phase, tss.attribute_string
            ]
        ]) + "\n")
    new_tss_fh.close()
    wigs_fm = {}
    wigs_rm = {}
Пример #2
0
def compute_tss_type(args_srna, cdss, genes, wigs_f, wigs_r):
    tsss, num_tss = read_tss(args_srna.tss_file)
    if "TSS_class" not in os.listdir(args_srna.out_folder):
        os.mkdir(os.path.join(args_srna.out_folder, "TSS_class"))
    new_tss_file = os.path.join(args_srna.out_folder, "TSS_class",
                                "_".join([args_srna.prefix, "TSS.gff"]))
    new_tss_fh = open(new_tss_file, "w")
    num_tss = 0
    for tss in tsss:
        tss_type = compare_tss_cds(tss, cdss, genes)

        tss.attributes = tss_type[1]
        tss.attributes["ID"] = "tss" + str(num_tss)
        tss.attribute_string = "".join([tss_type[0], ";ID=tss", str(num_tss)])
        num_tss += 1
    wigs_fm = modify_wigs_for_tss_type(wigs_f, "+")
    wigs_rm = modify_wigs_for_tss_type(wigs_r, "-")
    final_tsss = fix_primary_type(tsss, wigs_fm, wigs_rm)
    for tss in final_tsss:
        tss.attribute_string = ";".join(
            ["=".join(items) for items in tss.attributes.items()])
        new_tss_fh.write("\t".join([str(field) for field in [
                         tss.seq_id, tss.source, tss.feature, tss.start,
                         tss.end, tss.score, tss.strand, tss.phase,
                         tss.attribute_string]]) + "\n")
    new_tss_fh.close()
    wigs_fm = {}
    wigs_rm = {}
Пример #3
0
def upstream(tss_file, fasta_file, gff_file, out_class, args_pro, prefix):
    '''get the upstream sequence of TSS'''
    if fasta_file is not None:
        files = {
            "pri": open("tmp/primary.fa", "w"),
            "sec": open("tmp/secondary.fa", "w"),
            "inter": open("tmp/internal.fa", "w"),
            "anti": open("tmp/antisense.fa", "w"),
            "orph": open("tmp/orphan.fa", "w")
        }
    tsss, seq = read_data(tss_file, fasta_file)
    num_tss = 0
    if not args_pro.source:
        out = open(out_class, "w")
        out.write("##gff-version 3\n")
        cdss, genes = read_gff(gff_file)
    for tss in tsss:
        if ("type" not in tss.attributes.keys()) and (args_pro.source):
            print("Error: The TSS gff file may not generated from ANNOgesic."
                  "Please run with --tss_source!")
            sys.exit()
        if args_pro.source:
            name = ">" + "_".join([str(tss.start), tss.strand, tss.seq_id])
            print_fasta(seq, tss, files, name, args_pro.nt_before)
        else:
            tss_type = compare_tss_cds(tss, cdss, genes)
            tss.attributes = tss_type[1]
            tss.attributes["ID"] = tss.seq_id + "_tss" + str(num_tss)
            tss.attribute_string = "".join(
                [tss_type[0], ";ID=", tss.seq_id, "_tss",
                 str(num_tss)])
            num_tss += 1
    if not args_pro.source:
        if args_pro.tex_wigs is not None:
            libs, texs = read_libs(args_pro.input_libs, args_pro.tex_wigs)
            wigs_f = read_wig(
                os.path.join(args_pro.wig_path, prefix + "_forward.wig"), "+",
                libs)
            wigs_r = read_wig(
                os.path.join(args_pro.wig_path, prefix + "_reverse.wig"), "+",
                libs)
        else:
            wigs_f = None
            wigs_r = None
        sort_tsss = sorted(tsss,
                           key=lambda k: (k.seq_id, k.start, k.end, k.strand))
        final_tsss = fix_primary_type(sort_tsss, wigs_f, wigs_r)
        for tss in final_tsss:
            name = ">" + "_".join([str(tss.start), tss.strand, tss.seq_id])
            tss.attribute_string = ";".join(
                ["=".join(items) for items in tss.attributes.items()])
            out.write("\t".join([
                str(field) for field in [
                    tss.seq_id, tss.source, tss.feature, tss.start, tss.end,
                    tss.score, tss.strand, tss.phase, tss.attribute_string
                ]
            ]) + "\n")
            if fasta_file is not None:
                print_fasta(seq, tss, files, name, args_pro.nt_before)
Пример #4
0
def upstream(tss_file, fasta_file, gff_file, out_class, args_pro, prefix):
    '''get the upstream sequence of TSS'''
    if fasta_file is not None:
        files = {"pri": open("tmp/primary.fa", "w"),
                 "sec": open("tmp/secondary.fa", "w"),
                 "inter": open("tmp/internal.fa", "w"),
                 "anti": open("tmp/antisense.fa", "w"),
                 "orph": open("tmp/orphan.fa", "w")}
    tsss, seq = read_data(tss_file, fasta_file)
    num_tss = 0
    if not args_pro.source:
        out = open(out_class, "w")
        out.write("##gff-version 3\n")
        cdss, genes = read_gff(gff_file)
    for tss in tsss:
        if ("type" not in tss.attributes.keys()) and (args_pro.source):
            print("Error: The TSS gff file may not generated from ANNOgesic."
                  "Please run with --tss_source!")
            sys.exit()
        if args_pro.source:
            name = ">" + "_".join([str(tss.start), tss.strand, tss.seq_id])
            print_fasta(seq, tss, files, name, args_pro.nt_before)
        else:
            tss_type = compare_tss_cds(tss, cdss, genes)
            tss.attributes = tss_type[1]
            tss.attributes["ID"] = tss.seq_id + "_tss" + str(num_tss)
            tss.attribute_string = "".join([
                tss_type[0], ";ID=", tss.seq_id, "_tss", str(num_tss)])
            num_tss += 1
    if not args_pro.source:
        if args_pro.tex_wigs is not None:
            libs, texs = read_libs(args_pro.input_libs, args_pro.tex_wigs)
            wigs_f = read_wig(os.path.join(
                args_pro.wig_path, prefix + "_forward.wig"), "+", libs)
            wigs_r = read_wig(os.path.join(
                args_pro.wig_path, prefix + "_reverse.wig"), "+", libs)
        else:
            wigs_f = None
            wigs_r = None
        sort_tsss = sorted(tsss, key=lambda k: (k.seq_id, k.start,
                                                k.end, k.strand))
        final_tsss = fix_primary_type(sort_tsss, wigs_f, wigs_r)
        for tss in final_tsss:
            name = ">" + "_".join([str(tss.start), tss.strand, tss.seq_id])
            tss.attribute_string = ";".join(
                ["=".join(items) for items in tss.attributes.items()])
            out.write("\t".join([str(field) for field in [
                            tss.seq_id, tss.source, tss.feature, tss.start,
                            tss.end, tss.score, tss.strand, tss.phase,
                            tss.attribute_string]]) + "\n")
            if fasta_file is not None:
                print_fasta(seq, tss, files, name, args_pro.nt_before)
Пример #5
0
def upstream(tss_file, fasta_file, gff_file, out_class, args_pro):
    files = {
        "pri": open("tmp/primary.fa", "w"),
        "sec": open("tmp/secondary.fa", "w"),
        "inter": open("tmp/internal.fa", "w"),
        "anti": open("tmp/antisense.fa", "w"),
        "orph": open("tmp/orphan.fa", "w")
    }
    tsss, seq = read_data(tss_file, fasta_file)
    num_tss = 0
    if not args_pro.source:
        out = open(out_class, "w")
        out.write("##gff-version 3\n")
        cdss, genes = read_gff(gff_file)
    for tss in tsss:
        if args_pro.source is True:
            name = ">" + "_".join([str(tss.start), tss.strand, tss.seq_id])
            print_fasta(seq, tss, files, name, args_pro.nt_before)
        else:
            tss_type = compare_tss_cds(tss, cdss, genes)
            tss.attributes = tss_type[1]
            tss.attributes["ID"] = "tss" + str(num_tss)
            tss.attribute_string = "".join(
                [tss_type[0], ";ID=tss", str(num_tss)])
            num_tss += 1
    if not args_pro.source:
        wigs_f = read_wig("tmp/merge_forward.wig", "+")
        wigs_r = read_wig("tmp/merge_reverse.wig", "-")
        sort_tsss = sorted(tsss,
                           key=lambda k: (k.seq_id, k.start, k.end, k.strand))
        final_tsss = fix_primary_type(sort_tsss, wigs_f, wigs_r)
        for tss in final_tsss:
            name = ">" + "_".join([str(tss.start), tss.strand, tss.seq_id])
            tss.attribute_string = ";".join(
                ["=".join(items) for items in tss.attributes.items()])
            out.write("\t".join([
                str(field) for field in [
                    tss.seq_id, tss.source, tss.feature, tss.start, tss.end,
                    tss.score, tss.strand, tss.phase, tss.attribute_string
                ]
            ]) + "\n")
            print_fasta(seq, tss, files, name, args_pro.nt_before)