Пример #1
0
    def gen_html_distribution(self, outputname, title, align=50):
        fp = os.path.join(dir, outputname, title)
        link_d = {title: "distribution.html"}
        html = Html(name="Viz", links_dict=link_d, fig_dir=os.path.join(dir, outputname, "fig"),
                    other_logo="viz", homepage="../index.html")
        for i, f in enumerate(self.fig):
            html.add_figure("distribution_test_" + str(i) + ".png", align="center")

        html.add_free_content(['<p style=\"margin-left: ' + str(align + 150) + '">' +
                               '** </p>'])

        type_list = 'ssssssssssssssssssssssssssssssssssssssssssssss'
        col_size_list = [10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
                         10, 10]
        data_table = []
        for ind_ty, ty in enumerate(self.disperDict.keys()):
            header_list = ["Chromosome"] + self.disperDict[ty].keys()
            html.add_heading(ty, size=4, bold=False)
            for i, ch in enumerate(self.chrom_list):
                # for ind_r,r in enumerate(self.disperDict[ty].keys()):

                data_table.append(
                    [ch] + ["{:.3f} %".format(100 * self.disperDict[ty][r][i]) for r in self.disperDict[ty].keys()])

        html.add_zebra_table(header_list, col_size_list, type_list, data_table, align=align)

        html.add_free_content(['<a href="parameters.txt" style="margin-left:100">See parameters</a>'])
        html.add_free_content([
                                  '<a href="reference_experimental_matrix.txt" style="margin-left:100">See reference experimental matrix</a>'])
        html.add_free_content(
            ['<a href="query_experimental_matrix.txt" style="margin-left:100">See query experimental matrix</a>'])
        html.write(os.path.join(fp, "distribution.html"))
Пример #2
0
    def gen_htmlhm(self, outputname, title, align=50):
        dir_name = os.path.basename(outputname)
        # check_dir(directory)
        html_header = title
        link_d = OrderedDict()
        link_d["Lineplot"] = "index.html"
        link_d["Parameters"] = "parameters.html"

        html = Html(name=html_header,
                    links_dict=link_d,
                    fig_rpath="../style",
                    RGT_header=False,
                    other_logo="viz",
                    homepage="../index.html")

        # Each row is a plot with its data
        for name in self.hmfiles:
            html.add_figure(name + ".png", align="center")
        html.write(os.path.join(outputname, title, "index.html"))

        ## Parameters
        html = Html(name=html_header,
                    links_dict=link_d,
                    fig_rpath="../style",
                    RGT_header=False,
                    other_logo="viz",
                    homepage="../index.html")

        html.add_free_content([
            '<a href="parameters.txt" style="margin-left:100">See parameters</a>'
        ])
        html.add_free_content([
            '<a href="experimental_matrix.txt" style="margin-left:100">See experimental matrix</a>'
        ])
        html.write(os.path.join(outputname, title, "parameters.html"))
Пример #3
0
    def gen_html(self, directory, title, align=50):
        dir_name = os.path.basename(directory)
        # check_dir(directory)
        html_header = dir_name + " / " + title
        link_d = OrderedDict()
        link_d["Lineplot"] = "index.html"
        link_d["Parameters"] = "parameters.html"

        html = Html(name=html_header,
                    links_dict=link_d,
                    fig_rpath="../style",
                    RGT_header=False,
                    other_logo="viz",
                    homepage="../index.html")
        html.add_figure("lineplot.png", align="center", width="80%")

        html.write(os.path.join(directory, title, "index.html"))

        ## Parameters
        html = Html(name=html_header,
                    links_dict=link_d,
                    fig_rpath="../style",
                    RGT_header=False,
                    other_logo="viz",
                    homepage="../index.html")
        type_list = 'ssssssssss'
        col_size_list = [20, 20, 20, 20, 20, 20, 20, 20, 20]
        header_list = ["Assumptions and hypothesis"]
        data_table = []
        if self.annotation:
            data_table.append([
                "Genomic annotation: TSS - Transcription Start Site; TTS - Transcription Termination Site."
            ])
        data_table.append(["Directory:      " + directory.rpartition("/")[2]])
        data_table.append(["Title:          " + title])
        data_table.append(["Extend length:  " + str(self.extend)])
        data_table.append(["Read size:      " + str(self.rs)])
        data_table.append(["Bin size:       " + str(self.bs)])
        data_table.append(["Step size:      " + str(self.ss)])
        data_table.append(["Center mode:    " + self.center])

        html.add_zebra_table(header_list,
                             col_size_list,
                             type_list,
                             data_table,
                             align=align,
                             cell_align="left")

        html.add_free_content([
            '<a href="parameters.txt" style="margin-left:100">See parameters</a>'
        ])
        html.add_free_content([
            '<a href="experimental_matrix.txt" style="margin-left:100">See experimental matrix</a>'
        ])

        html.write(os.path.join(directory, title, "parameters.html"))
Пример #4
0
    def gen_html(self, directory, title, align=50):
        dir_name = os.path.basename(directory)
        # check_dir(directory)
        html_header = dir_name + " / " + title
        link_d = OrderedDict()
        link_d["Lineplot"] = "index.html"
        link_d["Parameters"] = "parameters.html"

        html = Html(name=html_header, links_dict=link_d,
                    fig_rpath="../style", RGT_header=False, other_logo="viz", homepage="../index.html")

        for g in self.group_tags:
            html.add_heading(heading=g)
            html.add_figure("lineplot_" + g + ".png", align="center", width="80%")

        html.write(os.path.join(directory, title, "index.html"))

        ## Parameters
        html = Html(name=html_header, links_dict=link_d,
                    fig_rpath="../style", RGT_header=False, other_logo="viz", homepage="../index.html")
        type_list = 'ssssssssss'
        col_size_list = [20, 20, 20, 20, 20, 20, 20, 20, 20]
        header_list = ["Assumptions and hypothesis"]
        data_table = []
        if self.annotation:
            data_table.append(
                ["Genomic annotation: TSS - Transcription Start Site; TTS - Transcription Termination Site."])
        data_table.append(["Directory:      " + directory.rpartition("/")[2]])
        data_table.append(["Title:          " + title])
        data_table.append(["Extend length:  " + str(self.extend)])
        data_table.append(["Read size:      " + str(self.rs)])
        data_table.append(["Bin size:       " + str(self.bs)])
        data_table.append(["Step size:      " + str(self.ss)])
        data_table.append(["Center mode:    " + self.center])

        html.add_zebra_table(header_list, col_size_list, type_list, data_table, align=align,
                             cell_align="left")

        html.add_free_content(['<a href="parameters.txt" style="margin-left:100">See parameters</a>'])
        html.add_free_content(['<a href="experimental_matrix.txt" style="margin-left:100">See experimental matrix</a>'])

        html.write(os.path.join(directory, title, "parameters.html"))
Пример #5
0
    def gen_htmlhm(self, outputname, title, align=50):
        dir_name = os.path.basename(outputname)
        # check_dir(directory)
        html_header = title
        link_d = OrderedDict()
        link_d["Lineplot"] = "index.html"
        link_d["Parameters"] = "parameters.html"

        html = Html(name=html_header, links_dict=link_d,
                    fig_rpath="../style", RGT_header=False, other_logo="viz", homepage="../index.html")

        # Each row is a plot with its data
        for name in self.hmfiles:
            html.add_figure(name + ".png", align="center")
        html.write(os.path.join(outputname, title, "index.html"))

        ## Parameters
        html = Html(name=html_header, links_dict=link_d,
                    fig_rpath="../style", RGT_header=False, other_logo="viz", homepage="../index.html")

        html.add_free_content(['<a href="parameters.txt" style="margin-left:100">See parameters</a>'])
        html.add_free_content(['<a href="experimental_matrix.txt" style="margin-left:100">See experimental matrix</a>'])
        html.write(os.path.join(outputname, title, "parameters.html"))
Пример #6
0
    def gen_html(self, directory, title, args, align=50):
        dir_name = os.path.basename(directory)
        statistic_table = []
        # check_dir(directory)
        html_header = "Projection Test: " + dir_name
        link_d = OrderedDict()
        link_d["Projection test"] = "index.html"
        link_d["Parameters"] = "parameters.html"

        html = Html(name=html_header, links_dict=link_d,
                    fig_rpath="../style", RGT_header=False, other_logo="viz", homepage="../index.html")
        html.add_figure("projection_test.png", align="center")

        header_list = ["No.",
                       "Reference<br>name",
                       "Query<br>name",
                       "Reference<br>number",
                       "Query<br>number",
                       "Proportion",
                       "Background<br>proportion",
                       "Positive<br>association<br>p-value",
                       "Negative<br>association<br>p-value"]
        statistic_table.append(["Reference_name", "Query_name", "Reference_number",
                                "Query_number", "Proportion", "Background_proportion",
                                "Positive_association_p-value", "Negative_association_p-value"])
        type_list = 'ssssssssssssssss'
        col_size_list = [5, 10, 10, 10, 10, 10, 10, 15, 15]

        nalist = []
        for ind_ty, ty in enumerate(self.plist.keys()):
            html.add_heading(ty, size=4, bold=False)
            data_table = []
            for ind_r, r in enumerate(self.plist[ty].keys()):
                rlen = str(self.lenlist[r])
                for ind_q, q in enumerate(self.plist[ty][r].keys()):
                    qlen = str(self.lenlist[q])
                    backv = value2str(self.qlist[ty][r]['Background'])
                    propor = value2str(self.qlist[ty][r][q])
                    pv = self.plist[ty][r][q]
                    if pv == "na":
                        nalist.append(r)
                        continue
                    elif self.qlist[ty][r][q] < args.cfp:
                        continue
                    else:
                        pvn = 1 - pv

                        if self.plist[ty][r][q] < 0.05:
                            if self.qlist[ty][r]['Background'] < self.qlist[ty][r][q]:
                                data_table.append([str(ind_ty), r, q, rlen, qlen, propor, backv,
                                                   "<font color=\"red\">" + value2str(pv) + "</font>", value2str(pvn)])
                                statistic_table.append([r, q, rlen, qlen, propor, backv, value2str(pv), value2str(pvn)])
                            else:
                                data_table.append([str(ind_ty), r, q, rlen, qlen, propor, backv,
                                                   value2str(pvn), "<font color=\"red\">" + value2str(pv) + "</font>"])
                                statistic_table.append([r, q, rlen, qlen, propor, backv, value2str(pvn), value2str(pv)])
                        else:
                            data_table.append(
                                [str(ind_ty), r, q, rlen, qlen, propor, backv, value2str(pv), value2str(pvn)])
                            statistic_table.append([r, q, rlen, qlen, propor, backv, value2str(pv), value2str(pvn)])

            html.add_zebra_table(header_list, col_size_list, type_list, data_table, align=align, sortable=True)
            output_array(statistic_table, directory=directory, folder=title, filename="statistics" + ty + ".txt")

        header_list = ["Assumptions and hypothesis"]
        data_table = [['If the background proportion is too small, it may cause bias in p value.'],
                      [
                          'For projection test, the reference GenomicRegionSet should have non-zero length in order to calculate its background proportion.'],
                      ['P values are corrected by multiple test correction.'],
                      ['Positive association is defined by: Proportion > Background.'],
                      ['Negative association is defined by: Proportion < Background.']]

        nalist = set(nalist)
        if len(nalist) > 0:
            data_table.append([
                                  'The following references contain zero-length region which cause error in proportion calculation, please check it:<br>' +
                                  '     <font color=\"red\">' + ', '.join([s for s in nalist]) + '</font></p>'])
        html.add_zebra_table(header_list, col_size_list, type_list, data_table, align=align, cell_align="left")
        html.add_fixed_rank_sortable()

        html.write(os.path.join(directory, os.path.join(title, "index.html")))

        # Parameters
        html = Html(name=html_header, links_dict=link_d,
                    fig_rpath="../style", RGT_header=False, other_logo="viz", homepage="../index.html")
        header_list = ["Description", "Argument", "Value"]
        data_table = [["Reference", "-r", args.r],
                      ["Query", "-q", args.q],
                      ["Output directory", "-o", os.path.basename(args.o)],
                      ["Experiment title", "-t", args.t],
                      # ["Grouping tag", "-g", args.g],
                      # ["Coloring tag", "-c", args.c],
                      # ["Background", "-bg", args.bg],
                      ["Organism", "-organism", args.organism],
                      ["Cutoff of proportion", "-cfp", str(args.cfp)]]

        html.add_zebra_table(header_list, col_size_list, type_list, data_table, align=align, cell_align="left")
        html.add_free_content([
                                  '<a href="reference_experimental_matrix.txt" style="margin-left:100">See reference experimental matrix</a>'])
        html.add_free_content(
            ['<a href="query_experimental_matrix.txt" style="margin-left:100">See query experimental matrix</a>'])
        html.add_free_content(['<a href="parameters.txt" style="margin-left:100">See details</a>'])
        html.write(os.path.join(directory, os.path.join(title, "parameters.html")))
Пример #7
0
    def gen_html(self,
                 directory,
                 parameters,
                 obed,
                 align=50,
                 alpha=0.05,
                 score=False):
        """Generate the HTML file"""
        dir_name = os.path.basename(directory)
        html_header = "Genomic Region Test: " + dir_name
        link_ds = OrderedDict()
        link_ds["RNA"] = "index.html"
        link_ds["Sig Target Regions"] = "starget_regions.html"
        link_ds["Target Regions"] = "target_regions.html"
        link_ds["Parameters"] = "parameters.html"

        ##################################################
        # index.html

        html = Html(
            name=html_header,
            links_dict=link_ds,  # fig_dir=os.path.join(directory,"style"),
            fig_rpath="../style",
            RGT_header=False,
            other_logo="TDF",
            homepage="../index.html")
        # Plots
        html.add_figure("lineplot_region.png",
                        align="left",
                        width="45%",
                        more_images=["boxplot_regions.png"])
        if self.showdbs:
            html.add_figure("lineplot_dbs.png",
                            align="left",
                            width="45%",
                            more_images=["boxplot_dbs.png"])

        if self.showdbs:
            header_list = [[
                "#", "DBD", "Target Regions", None, "Non-target Regions", None,
                "Statistics", "Target Regions", "Non-target Regions", None,
                "Statistics"
            ],
                           [
                               "", "", "with DBS", "without DBS",
                               "with DBS (average)", "s.d.", "<i>p</i>-value",
                               "NO. DBSs", "NO. DBSs (average)", "s.d.",
                               "<i>p</i>-value"
                           ]]
            header_titles = [
                [
                    "Rank", "DNA Binding Domain",
                    "Given target regions on DNA", None,
                    "Regions from randomization", None,
                    "Statistics based on target regions",
                    "Given target regions on DNA",
                    "Regions from randomization", None,
                    "Statistics based on DNA Binding Sites"
                ],
                [
                    "", "", "Number of target regions with DBS binding",
                    "Number of target regions without DBS binding",
                    "Average number of regions from randomization with DBS binding",
                    "Standard deviation", "P value",
                    "Number of related DNA Binding Sites binding to target regions",
                    "Average number of DNA Binding Sites binding to random regions",
                    "Standard deviation", "P-value"
                ]
            ]
            border_list = [
                " style=\"border-right:1pt solid gray\"",
                " style=\"border-right:1pt solid gray\"", "",
                " style=\"border-right:1pt solid gray\"", "",
                " style=\"border-right:1pt solid gray\"",
                " style=\"border-right:2pt solid gray\"",
                " style=\"border-right:1pt solid gray\"", "",
                " style=\"border-right:1pt solid gray\"",
                " style=\"border-right:1pt solid gray\""
            ]
        else:
            header_list = [[
                "#", "DBD", "Target Regions", None, "Non-target Regions", None,
                "Statistics", None
            ],
                           [
                               "", "", "with DBS", "without DBS",
                               "with DBS (average)", "s.d.", "<i>p</i>-value",
                               "z-score"
                           ]]
            header_titles = [
                [
                    "Rank", "DNA Binding Domain",
                    "Given target regions on DNA", None,
                    "Regions from randomization", None,
                    "Statistics based on target regions", None
                ],
                [
                    "", "", "Number of target regions with DBS binding",
                    "Number of target regions without DBS binding",
                    "Average number of regions from randomization with DBS binding",
                    "Standard deviation", "P value", "Z-score"
                ]
            ]
            border_list = [
                " style=\"border-right:1pt solid gray\"",
                " style=\"border-right:1pt solid gray\"", "",
                " style=\"border-right:1pt solid gray\"", "",
                " style=\"border-right:1pt solid gray\"",
                " style=\"border-right:1pt solid gray\"", ""
            ]

        type_list = 'ssssssssssssssss'
        col_size_list = [
            50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50
        ]
        data_table = []

        for i, rbs in enumerate(self.rbss):
            if self.data["region"]["p"][i] < alpha:
                p_region = "<font color=\"red\">" + value2str(
                    self.data["region"]["p"][i]) + "</font>"

            else:
                p_region = value2str(self.data["region"]["p"][i])
            zs = (self.counts_tr[rbs][0] -
                  self.data["region"]["ave"][i]) / self.data["region"]["sd"][i]
            new_line = [
                str(i + 1),
                rbs.str_rna(pa=False), '<a href="dbd_region.html#' +
                rbs.str_rna() + '" style="text-align:left">' +
                str(self.counts_tr[rbs][0]) + '</a>',
                str(self.counts_tr[rbs][1]),
                value2str(self.data["region"]["ave"][i]),
                value2str(self.data["region"]["sd"][i]), p_region,
                value2str(zs)
            ]
            if self.showdbs:
                if self.data["dbs"]["p"][i] < alpha:
                    p_dbs = "<font color=\"red\">" + value2str(
                        self.data["dbs"]["p"][i]) + "</font>"
                else:
                    p_dbs = value2str(self.data["dbs"]["p"][i])

                new_line += [
                    str(self.counts_dbs[rbs]),
                    value2str(self.data["dbs"]["ave"][i]),
                    value2str(self.data["dbs"]["sd"][i]), p_dbs
                ]
            data_table.append(new_line)

        data_table = natsort.natsorted(data_table, key=lambda x: x[6])
        html.add_zebra_table(header_list,
                             col_size_list,
                             type_list,
                             data_table,
                             align=align,
                             cell_align="left",
                             auto_width=True,
                             header_titles=header_titles,
                             border_list=border_list,
                             sortable=True)

        html.add_heading("Notes")
        html.add_list([
            "RNA name: " + self.rna_name,
            "Randomization is performed for " + str(self.repeats) + " times.",
            "DBD stands for DNA Binding Domain on RNA.",
            "DBS stands for DNA Binding Site on DNA."
        ])
        html.add_fixed_rank_sortable()
        html.write(os.path.join(directory, "index.html"))

        #############################################################
        # RNA subpage: Profile of targeted regions for each merged DNA Binding Domain
        #############################################################

        header_list = [
            "#", "Target Region", "Associated Gene", "No. of DBSs",
            "DBS coverage"
        ]
        header_titles = [
            "Rank", "Given target regions from BED files",
            "Associated genes which is overlapping with the given region or close to it (less than 50000 bp)",
            "Number of DNA Binding Sites locate within the region",
            "The proportion of the region covered by DBS binding"
        ]

        #########################################################
        # dbd_region.html
        html = Html(
            name=html_header,
            links_dict=link_ds,  # fig_dir=os.path.join(directory,"style"),
            fig_rpath="../style",
            RGT_header=False,
            other_logo="TDF",
            homepage="../index.html")

        for rbsm in self.rbss:
            html.add_heading("DNA Binding Domain: " + rbsm.str_rna(),
                             idtag=rbsm.str_rna())
            data_table = []
            for i, region in enumerate(self.txp.merged_dict[rbsm]):
                # Add information
                data_table.append([
                    str(i + 1),
                    '<a href="http://genome.ucsc.edu/cgi-bin/hgTracks?db=' +
                    self.organism + "&position=" + region.chrom + "%3A" +
                    str(region.initial) + "-" + str(region.final) +
                    '" style="text-align:left">' +
                    region.toString(space=True) + '</a>',
                    split_gene_name(gene_name=region.name, org=self.organism),
                    str(len(self.region_dbs[region.toString()])),
                    value2str(self.region_coverage[region.toString()])
                ])

            html.add_zebra_table(header_list,
                                 col_size_list,
                                 type_list,
                                 data_table,
                                 align=align,
                                 cell_align="left",
                                 auto_width=True,
                                 header_titles=header_titles,
                                 sortable=True)
        html.add_fixed_rank_sortable()
        html.write(os.path.join(directory, "dbd_region.html"))

        #############################################################
        # Targeted regions centered
        #############################################################

        ##############################################################################################
        # target_regions.html
        html = Html(
            name=html_header,
            links_dict=link_ds,  # fig_dir=os.path.join(directory,"style"),
            fig_rpath="../style",
            RGT_header=False,
            other_logo="TDF",
            homepage="../index.html")

        if score:
            header_list = [
                "#", "Target region", "Associated Gene", "DBSs Count",
                "DBS coverage", "Score", "Sum of ranks"
            ]
            header_titles = [
                "Rank", "Target regions loaded from the given BED file",
                "Associated genes which is overlapping with the given region or close to it (less than 50000 bp)",
                "Number of DNA Binding Sites within the region",
                "The proportion of the region covered by DBS binding",
                "Scores from BED file", "Sum of all the left-hand-side ranks"
            ]
        else:
            header_list = [
                "#", "Target region", "Associated Gene", "DBSs Count",
                "DBS coverage", "Sum of ranks"
            ]
            header_titles = [
                "Rank", "Target regions loaded from the given BED file",
                "Associated genes which is overlapping with the given region or close to it (less than 50000 bp)",
                "Number of DNA Binding Sites within the region",
                "The proportion of the region covered by DBS binding",
                "Sum of all the left-hand-side ranks"
            ]
        html.add_heading("Target Regions")
        data_table = []

        if not self.dna_region.sorted: self.dna_region.sort()

        # Calculate the ranking
        rank_count = len(self.dna_region) - rank_array(
            [len(self.region_dbs[p.toString()]) for p in self.dna_region])
        rank_coverage = len(self.dna_region) - rank_array(
            [self.region_coverage[p.toString()] for p in self.dna_region])

        if score:
            try:
                score_list = [
                    float(p.data.split("\t")[0]) for p in self.dna_region
                ]
                rank_score = len(self.dna_region) - rank_array(
                    [abs(s) for s in score_list])
                rank_sum = [
                    x + y + z
                    for x, y, z in zip(rank_count, rank_coverage, rank_score)
                ]
                # sum_rank = rank_array(rank_sum)  # method='min'
            except ImportError:
                print(
                    "There is no score in BED file, please don't use '-score' argument."
                )
        else:
            rank_sum = [x + y for x, y in zip(rank_count, rank_coverage)]
            sum_rank = rank_array(rank_sum)

        for i, region in enumerate(self.dna_region):
            dbs_counts = str(len(self.region_dbs[region.toString()]))
            dbs_cover = value2str(self.region_coverage[region.toString()])

            newline = [
                str(i + 1),
                '<a href="http://genome.ucsc.edu/cgi-bin/hgTracks?db=' +
                self.organism + "&position=" + region.chrom + "%3A" +
                str(region.initial) + "-" + str(region.final) +
                '" style="text-align:left">' + region.toString(space=True) +
                '</a>',
                split_gene_name(gene_name=region.name, org=self.organism),
                '<a href="region_dbs.html#' + region.toString() +
                '" style="text-align:left">' + dbs_counts + '</a>', dbs_cover
            ]

            if score:
                dbs_score = value2str(score_list[i])
                region.data = "\t".join(
                    [dbs_counts, dbs_cover, dbs_score,
                     str(rank_sum[i])])
                newline.append(dbs_score)
                newline.append(str(rank_sum[i]))
            else:
                region.data = "\t".join(
                    [dbs_counts, dbs_cover,
                     str(rank_sum[i])])
                newline.append(str(rank_sum[i]))
            data_table.append(newline)

        data_table = natsort.natsorted(data_table, key=lambda x: x[-1])
        # data_table = sorted(data_table, key=lambda x: x[-1])
        html.add_zebra_table(header_list,
                             col_size_list,
                             type_list,
                             data_table,
                             align=align,
                             cell_align="left",
                             auto_width=True,
                             header_titles=header_titles,
                             sortable=True)
        html.add_heading("Notes")
        html.add_list(["All target regions without any bindings are ignored."])
        html.add_fixed_rank_sortable()
        html.write(os.path.join(directory, "target_regions.html"))

        self.dna_region.sort_score()
        self.dna_region.write_bed(
            os.path.join(directory, obed + "_target_regions.bed"))

        ##############################################################################################
        # starget_regions.html    for significant target regions

        stargets = GenomicRegionSet("sig_targets")
        sig_dbs = {}
        sig_dbs_coverage = {}
        for i, r in enumerate(self.dna_region):
            sig_bindings = self.region_dbs[r.toString()].overlap_rbss(
                rbss=self.data["region"]["sig_region"])
            dbs = sig_bindings.get_dbs()
            if len(dbs) > 0:
                stargets.add(r)
                m_dbs = dbs.merge(w_return=True)
                sig_dbs[r] = len(dbs)
                # self.promoter["de"]["merged_dbs"][promoter.toString()] = len(m_dbs)
                sig_dbs_coverage[r] = float(m_dbs.total_coverage()) / len(r)

        html = Html(
            name=html_header,
            links_dict=link_ds,  # fig_dir=os.path.join(directory,"style"),
            fig_rpath="../style",
            RGT_header=False,
            other_logo="TDF",
            homepage="../index.html")

        # Select promoters in sig DBD
        if len(self.data["region"]["sig_region"]) == 0:
            html.add_heading("There is no significant DBD.")
        else:
            html.add_heading("Target regions bound by significant DBD")
            data_table = []
            # Calculate the ranking
            rank_count = len(stargets) - rank_array(
                [sig_dbs[p] for p in stargets])
            rank_coverage = len(stargets) - rank_array(
                [sig_dbs_coverage[p] for p in stargets])
            if score:
                score_list = [float(p.data.split("\t")[0]) for p in stargets]
                rank_score = len(stargets) - rank_array(
                    [abs(s) for s in score_list])
                rank_sum = [
                    x + y + z
                    for x, y, z in zip(rank_count, rank_coverage, rank_score)
                ]
                sum_rank = rank_array(rank_sum)  # method='min'
            else:
                rank_sum = [x + y for x, y in zip(rank_count, rank_coverage)]
                sum_rank = rank_array(rank_sum)

            for i, region in enumerate(stargets):
                dbssount = '<a href="region_dbs.html#' + region.toString() + \
                           '" style="text-align:left">' + str(sig_dbs[region]) + '</a>'

                region_link = region_link_internet(self.organism, region)

                newline = [
                    str(i + 1), region_link,
                    split_gene_name(gene_name=region.name, org=self.organism),
                    dbssount,
                    value2str(sig_dbs_coverage[region])
                ]
                if score:
                    dbs_score = value2str(score_list[i])
                    # region.data = "\t".join([dbs_counts, dbs_cover, dbs_score, str(sum_rank[i])])
                    newline.append(dbs_score)
                    newline.append(str(rank_sum[i]))
                    # print([dbs_score, str(sum_rank[i])])
                else:
                    # region.data = "\t".join([dbs_counts, dbs_cover, str(sum_rank[i])])
                    newline.append(str(rank_sum[i]))

                # newline += ["<i>" + str(rank_sum[i]) + "</i>"]
                # print(newline)
                data_table.append(newline)

            # print(data_table)
            # data_table = sorted(data_table, key=lambda x: x[-1])
            data_table = natsort.natsorted(data_table, key=lambda x: x[-1])
            html.add_zebra_table(header_list,
                                 col_size_list,
                                 type_list,
                                 data_table,
                                 align=align,
                                 cell_align="left",
                                 header_titles=header_titles,
                                 border_list=None,
                                 sortable=True)
            html.add_heading("Notes")
            html.add_list([
                "DBS stands for DNA Binding Site on DNA.",
                "DBS coverage is the proportion of the region where has potential to form triple helices with the given RNA."
            ])
            html.add_fixed_rank_sortable()
            html.write(os.path.join(directory, "starget_regions.html"))

        ############################
        # Subpages for targeted region centered page
        # region_dbs.html
        header_list = ["RBS", "DBS", "Strand", "Score", "Motif", "Orientation"]

        html = Html(
            name=html_header,
            links_dict=link_ds,  # fig_dir=os.path.join(directory,"style"),
            fig_rpath="../style",
            RGT_header=False,
            other_logo="TDF",
            homepage="../index.html")

        for i, region in enumerate(self.dna_region):
            if len(self.region_dbs[region.toString()]) == 0:
                continue
            else:
                html.add_heading(
                    "Associated gene: " +
                    split_gene_name(gene_name=region.name, org=self.organism),
                    idtag=region.toString())
                html.add_free_content([
                    '<a href="http://genome.ucsc.edu/cgi-bin/hgTracks?db=' +
                    self.organism + "&position=" + region.chrom + "%3A" +
                    str(region.initial) + "-" + str(region.final) +
                    '" style="margin-left:50">' + region.toString(space=True) +
                    '</a>'
                ])
                data_table = []
                for rd in self.region_dbs[region.toString()]:
                    rbs = rd.rna.str_rna(pa=False)
                    for rbsm in self.data["region"]["sig_region"]:
                        # rbsm = rbsm.partition(":")[2].split("-")
                        if rd.rna.overlap(rbsm):
                            rbs = "<font color=\"red\">" + rbs + "</font>"
                    data_table.append([
                        rbs,
                        '<a href="http://genome.ucsc.edu/cgi-bin/hgTracks?db='
                        + self.organism + "&position=" + rd.dna.chrom + "%3A" +
                        str(rd.dna.initial) + "-" + str(rd.dna.final) +
                        '" style="text-align:left">' +
                        rd.dna.toString(space=True) + '</a>',
                        rd.dna.orientation, rd.score, rd.motif, rd.orient
                    ])
                html.add_zebra_table(header_list,
                                     col_size_list,
                                     type_list,
                                     data_table,
                                     align=align,
                                     cell_align="left",
                                     auto_width=True)
        html.write(os.path.join(directory, "region_dbs.html"))

        ###############################################################################33
        ################ Parameters.html

        html = Html(
            name=html_header,
            links_dict=link_ds,  # fig_dir=os.path.join(directory,"style"),
            fig_rpath="../style",
            RGT_header=False,
            other_logo="TDF",
            homepage="../index.html")
        html.add_heading("Parameters")
        header_list = ["Description", "Arguments", "Value"]

        data_table = [
            ["RNA sequence name", "-rn", parameters.rn],
            ["Input RNA sequence file", "-r",
             os.path.basename(parameters.r)],
            ["Input BED file", "-bed",
             os.path.basename(parameters.bed)],
            ["Output directory", "-o",
             os.path.basename(parameters.o)],
            ["Organism", "-organism", parameters.organism],
            ["Number of repitetion of andomization", "-n",
             str(parameters.n)],
            ["Alpha level for rejection p value", "-a",
             str(parameters.a)],
            [
                "Cut off value for filtering out the low counts of DBSs",
                "-ccf",
                str(parameters.ccf)
            ], ["Remove temporary files", "-rt",
                str(parameters.rt)],
            [
                "Input BED file for masking in randomization", "-f",
                str(parameters.f)
            ], ["Input file for RNA accecibility", "-ac",
                str(parameters.ac)],
            [
                "Cut off value for RNA accecibility", "-accf",
                str(parameters.accf)
            ],
            [
                "Output the BED files for DNA binding sites.", "-obed",
                str(parameters.obed)
            ],
            [
                "Show parallel and antiparallel bindings in the plot separately.",
                "-showpa",
                str(parameters.showpa)
            ], ["Minimum length", "-l",
                str(self.triplexator_p[0])],
            ["Maximum error rate", "-e",
             str(self.triplexator_p[1])],
            [
                "Tolerated number of consecutive errors", "-c",
                str(self.triplexator_p[2])
            ], ["Filtering repeats", "-fr",
                str(self.triplexator_p[3])],
            ["Filtering mode", "-fm",
             str(self.triplexator_p[4])],
            ["Output format", "-of",
             str(self.triplexator_p[5])],
            ["Merge features", "-mf",
             str(self.triplexator_p[6])]
        ]
        html.add_zebra_table(header_list,
                             col_size_list,
                             type_list,
                             data_table,
                             align=align,
                             cell_align="left",
                             auto_width=True)
        html.add_free_content(
            ['<a href="summary.txt" style="margin-left:100">See details</a>'])
        html.write(os.path.join(directory, "parameters.html"))
Пример #8
0
    def make_html(self):
        html_header = "THOR"
        from rgt.THOR.dpc_help import FOLDER_REPORT

        #Links
        links_dict = OrderedDict()
        links_dict['Experimental Configuration'] = 'index.html#extinfo'
        links_dict['Sample Information'] = 'index.html#sampleinfo'
        links_dict['HMM Information'] = 'index.html#hmminfo'
        links_dict['Mean Variance Function Estimate'] = 'index.html#mvfunction'

        p = path.join(FOLDER_REPORT, 'pics/fragment_size_estimate.png')
        if path.isfile(p):
            links_dict['Fragment Size Estimate'] = 'index.html#fsestimate'

        p = path.join(FOLDER_REPORT, 'pics/data/sample.data')
        if path.isfile(p):
            links_dict['Housekeeping Gene Normalization'] = 'index.html#norm'

        links_dict['References'] = 'index.html#ref'
        links_dict['Contact'] = 'index.html#contact'

        # copy basic rgt logo, style etc to local directory inside report
        fig_path = path.join(FOLDER_REPORT, "fig")
        html = Html(name=html_header,
                    links_dict=links_dict,
                    fig_dir=fig_path,
                    fig_rpath="fig")

        try:
            html.add_heading("Experimental Configuration", idtag='extinfo')
            self.make_ext_config(html)
        except:
            pass

        html.add_heading("Pre- and post-processing Features",
                         idtag='prepostinfo')
        self.make_pre_post(html)

        try:
            html.add_heading("Sample Information", idtag='sampleinfo')
            self.make_ext_scaling_table(html)
        except:
            pass

        #Run Info
        try:
            html.add_heading("HMM Information", idtag='hmminfo')
            self.make_hmm(html)
        except:
            pass

        #Mean Variance Function
        try:
            p = path.join(FOLDER_REPORT,
                          'pics/mean_variance_func_cond_0_original.png')
            if path.isfile(p):
                html.add_heading("Mean Variance Function", idtag='mvfunction')
                html.add_figure(
                    path.relpath(p, FOLDER_REPORT),
                    align="left",
                    width="45%",
                    more_images=[
                        'pics/mean_variance_func_cond_1_original.png'
                    ])
                info = "THOR uses a polynomial function to empirically describe the relationship between mean and variance in the data.\
                The data the plot is based on can be found at report/pics/data for further downstream analysis."

                self._write_text(html, info)
        except:
            pass

        #Fragment Size Estimate
        try:
            p = path.join(FOLDER_REPORT, 'pics/fragment_size_estimate.png')
            if path.isfile(p):
                html.add_heading("Fragment Size Estimate", idtag='fsestimate')
                html.add_figure(path.relpath(p, FOLDER_REPORT),
                                align="left",
                                width="45%")
                info = "THOR estimates the fragmentation sizes of each sample's reads. Here, the cross-correlation function [1] is shown. Their maxima give the\
                fragmentation extension sizes.<br> The data the plot is based on can be found at report/pics/data for further downstream analysis."

                self._write_text(html, info)
        except:
            pass

        #HK normalization
        try:
            p = path.join(FOLDER_REPORT, 'pics/data/gene.data')
            if path.isfile(p):
                d = self._read_hk(p)
                html.add_heading("Housekeeping Gene Normalization",
                                 idtag='norm')
                html.add_zebra_table(header_list=['gene', 'quality q'],
                                     col_size_list=[1, 150],
                                     type_list='s' * len(d),
                                     data_table=d)
                info = "For active histone marks, housekeeping genes given by [4] can be used for normalization [1]. Here, the genes for the experiments are\
                evaluated. For each gene i, we estimate the normalization factors with gene i and without gene i and compute the sums of squared deviations q.\
                High values (higher than 2) indicate striking genes which should be considered to be left our for normalization.,<br> One can also \
                use other genes or regions for normalization.<br> The data the plot is based on can be found at report/pics/data for further downstream analysis."

                self._write_text(html, info)

            p = path.join(FOLDER_REPORT, 'pics/data/sample.data')
            if path.isfile(p):
                d = self._read_hk(p)
                html.add_zebra_table(header_list=['sample', 'quality p'],
                                     col_size_list=[1, 150],
                                     type_list='s' * len(d),
                                     data_table=d)
                info = "We evaluate the effect of samples to the normalization factors. For sample j, we estimate the normalization factors with sample j\
                and without sample j and compute the sums of squared deviations p. High values (higher than 2) indicate striking samples which should be\
                considered to be left out for the analysis.<br> The data the plot is based on can be found at report/pics/data for further downstream analysis."

                self._write_text(html, info)
        except:
            pass

        html.add_heading("References", idtag='ref')
        info = "[1] M. Allhoff, J. F. Pires, K. Ser&eacute;, M. Zenke, and I. G. Costa. Differential Peak Calling of ChIP-Seq \
        Signals with Replicates with THOR. <i>submitted.</i> <br>\
        [2] A. Mammana, M. Vingron, and H.-R. Chung. Inferring nucleosome positions with their histone mark annotation from chip data. \
        Bioinformatics, 29(20):2547-2554, 2013. <br>\
        [3] M. D. Robinson and A. Oshlack. A scaling normalization method for differential expression analysis of RNA-seq data. \
        Genome Biology, 11(3):R25, 2010. <br>\
        [4] E. Eisenberg and E. Y. Levanon. Human housekeeping genes, revisited. Trends in genetics: TIG, 29(10):569-574, 2013."

        self._write_text(html, info)

        html.add_heading("Contact", idtag='contact')
        info = "If you have any questions, please don't hesitate to contact us: [email protected]"
        self._write_text(html, info)

        html.write(path.join(FOLDER_REPORT, "index.html"))
Пример #9
0
 def make_html(self):
     html_header = "THOR"
     from rgt.THOR.dpc_help import FOLDER_REPORT
     #Links
     links_dict = OrderedDict()
     links_dict['Experimental Configuration'] = 'index.html#extinfo'
     links_dict['Sample Information'] = 'index.html#sampleinfo'
     links_dict['HMM Information'] = 'index.html#hmminfo'
     links_dict['Mean Variance Function Estimate'] = 'index.html#mvfunction'
     
     p = path.join(FOLDER_REPORT, 'pics/fragment_size_estimate.png')
     if path.isfile(p):
         links_dict['Fragment Size Estimate'] = 'index.html#fsestimate'
     
     p = path.join(FOLDER_REPORT, 'pics/data/sample.data')
     if path.isfile(p):
         links_dict['Housekeeping Gene Normalization'] = 'index.html#norm'
     
     links_dict['References'] = 'index.html#ref'
     links_dict['Contact'] = 'index.html#contact'
     
     config_class = ConfigurationFile()
     html = Html(name=html_header, links_dict=links_dict, fig_rpath= config_class.data_dir + '/fig/')
     
     try:
         html.add_heading("Experimental Configuration", idtag = 'extinfo')
         self.make_ext_config(html)
     except:
         pass
     
     html.add_heading("Pre- and post-processing Features", idtag = 'prepostinfo')
     self.make_pre_post(html)
     
     try:
         html.add_heading("Sample Information", idtag = 'sampleinfo')
         self.make_ext_scaling_table(html)
     except:
         pass
     
     #Run Info
     try:
         html.add_heading("HMM Information", idtag = 'hmminfo')
         self.make_hmm(html)
     except:
         pass
     
     #Mean Variance Function
     try:
         p = path.join(FOLDER_REPORT, "pics/mean_variance_func_cond_0_original.png")
         if path.isfile(p):
             html.add_heading("Mean Variance Function", idtag='mvfunction')
             html.add_figure(p, align="left", width="45%", more_images=[path.join(FOLDER_REPORT, 'pics/mean_variance_func_cond_1_original.png')])
             info = "THOR uses a polynomial function to empirically describe the relationship between mean and variance in the data.\
             The data the plot is based on can be found at report/pics/data for further downstream analysis."
             self._write_text(html, info)
     except:
         pass
     
     #Fragment Size Estimate
     try:
         p = path.join(FOLDER_REPORT, 'pics/fragment_size_estimate.png')
         if path.isfile(p):
             html.add_heading("Fragment Size Estimate", idtag = 'fsestimate')
             html.add_figure(p, align="left", width="45%")
             info = "THOR estimates the fragmentation sizes of each sample's reads. Here, the cross-correlation function [1] is shown. Their maxima give the\
             fragmentation extension sizes.<br> The data the plot is based on can be found at report/pics/data for further downstream analysis."
             self._write_text(html, info)
     except:
         pass
     
     #HK normalization
     try:
         p = path.join(FOLDER_REPORT, 'pics/data/gene.data')
         if path.isfile(p):
             d = self._read_hk(p)
             html.add_heading("Housekeeping Gene Normalization", idtag = 'norm')
             html.add_zebra_table(header_list=['gene', 'quality q'], col_size_list=[1,150], type_list='s'*len(d), data_table=d)
             info = "For active histone marks, housekeeping genes given by [4] can be used for normalization [1]. Here, the genes for the experiments are\
             evaluated. For each gene i, we estimate the normalization factors with gene i and without gene i and compute the sums of squared deviations q.\
             High values (higher than 2) indicate striking genes which should be considered to be left our for normalization.,<br> One can also \
             use other genes or regions for normalization.<br> The data the plot is based on can be found at report/pics/data for further downstream analysis."
             self._write_text(html, info)
             
         p = path.join(FOLDER_REPORT, 'pics/data/sample.data')
         if path.isfile(p):
             d = self._read_hk(p)
             html.add_zebra_table(header_list=['sample', 'quality p'], col_size_list=[1,150], type_list='s'*len(d), data_table=d)
             info = "We evaluate the effect of samples to the normalization factors. For sample j, we estimate the normalization factors with sample j\
             and without sample j and compute the sums of squared deviations p. High values (higher than 2) indicate striking samples which should be\
             considered to be left out for the analysis.<br> The data the plot is based on can be found at report/pics/data for further downstream analysis."
             self._write_text(html, info)
     except:
         pass
     
     html.add_heading("References", idtag = 'ref')
     info = "[1] M. Allhoff, J. F. Pires, K. Ser&eacute;, M. Zenke, and I. G. Costa. Differential Peak Calling of ChIP-Seq \
     Signals with Replicates with THOR. <i>submitted.</i> <br>\
     [2] A. Mammana, M. Vingron, and H.-R. Chung. Inferring nucleosome positions with their histone mark annotation from chip data. \
     Bioinformatics, 29(20):2547-2554, 2013. <br>\
     [3] M. D. Robinson and A. Oshlack. A scaling normalization method for differential expression analysis of RNA-seq data. \
     Genome Biology, 11(3):R25, 2010. <br>\
     [4] E. Eisenberg and E. Y. Levanon. Human housekeeping genes, revisited. Trends in genetics: TIG, 29(10):569-574, 2013."
     self._write_text(html, info)
     
     html.add_heading("Contact", idtag = 'contact')
     info = "If you have any questions, please don't hesitate to contact us: [email protected]"
     self._write_text(html, info)
     
     html.write(path.join(FOLDER_REPORT, "index.html"))
     
Пример #10
0
    def gen_html(self, directory, title, align=50):
        dir_name = os.path.basename(directory)
        # check_dir(directory)
        html_header = title
        link_d = OrderedDict()
        link_d["Boxplot"] = "index.html"
        link_d["Parameters"] = "parameters.html"

        html = Html(name=html_header, links_dict=link_d,
                    fig_rpath="../style", RGT_header=False, other_logo="viz", homepage="../index.html")
        # fp = os.path.join(dir,outputname,title)

        html.add_figure("boxplot.png", align="center")

        type_list = 'ssssssssssssssssssssssssssssssssssssssssssssss'

        #### Calculate p value ####
        plist = {}
        for g in self.sortDict.keys():
            plist[g] = {}
            for s1 in self.sortDict[g].keys():
                for c1 in self.sortDict[g][s1].keys():
                    data1 = self.sortDict[g][s1][c1]
                    plist[g][s1 + c1] = {}
                    for s2 in self.sortDict[g].keys():
                        for c2 in self.sortDict[g][s2].keys():
                            if s2 == s1 and c2 == c1:
                                pass
                            else:
                                data2 = self.sortDict[g][s2][c2]
                                u, p_value = mannwhitneyu(data1, data2)
                                plist[g][s1 + c1][s2 + c2] = p_value

        print("Multiple test correction.")
        multiple_correction(plist)

        for g in self.sortDict.keys():
            html.add_heading(g, size=4, bold=False)
            data_table = []
            col_size_list = [15]
            header_list = ["p-value"]
            for s in self.sortDict[g].keys():
                for c in self.sortDict[g][s1].keys():
                    header_list.append(s + "\n" + c)
                    col_size_list.append(15)

            for s1 in self.sortDict[g].keys():
                for c1 in self.sortDict[g][s1].keys():
                    row = [s1 + "\n" + c1]
                    for s2 in self.sortDict[g].keys():
                        for c2 in self.sortDict[g][s2].keys():
                            if s2 == s1 and c2 == c1:
                                row.append("-")
                            else:
                                p = plist[g][s1 + c1][s2 + c2]
                                if p > 0.05:
                                    row.append(value2str(p))
                                else:
                                    row.append("<font color=\"red\">" + value2str(p) + "</font>")
                    data_table.append(row)

            html.add_zebra_table(header_list, col_size_list, type_list, data_table, align=align + 50)

        # html.add_fixed_rank_sortable()
        html.write(os.path.join(directory, title, "index.html"))

        ## Parameters
        html = Html(name=html_header, links_dict=link_d,
                    fig_rpath="../style", RGT_header=False, other_logo="viz", homepage="../index.html")

        header_list = ["Assumptions and hypothesis"]
        col_size_list = [50]
        data_table = [['All the regions among different BED files are normalized by quantile normalization.'],
                      [
                          'If there is any grouping problem, please check all the optional columns in input experimental matrix.']]
        html.add_zebra_table(header_list, col_size_list, type_list, data_table, align=align, cell_align="left")

        html.add_free_content(['<a href="parameters.txt" style="margin-left:100">See parameters</a>'])
        html.add_free_content(['<a href="experimental_matrix.txt" style="margin-left:100">See experimental matrix</a>'])
        html.write(os.path.join(directory, title, "parameters.html"))
Пример #11
0
    def gen_html(self, directory, parameters, obed, align=50, alpha=0.05, score=False):
        """Generate the HTML file"""
        dir_name = os.path.basename(directory)
        html_header = "Genomic Region Test: " + dir_name
        link_ds = OrderedDict()
        link_ds["RNA"] = "index.html"
        link_ds["Sig Target Regions"] = "starget_regions.html"
        link_ds["Target Regions"] = "target_regions.html"
        link_ds["Parameters"] = "parameters.html"

        ##################################################
        # index.html

        html = Html(name=html_header, links_dict=link_ds,  # fig_dir=os.path.join(directory,"style"),
                    fig_rpath="../style", RGT_header=False, other_logo="TDF", homepage="../index.html")
        # Plots
        html.add_figure("lineplot_region.png", align="left", width="45%", more_images=["boxplot_regions.png"])
        if self.showdbs:
            html.add_figure("lineplot_dbs.png", align="left", width="45%", more_images=["boxplot_dbs.png"])

        if self.showdbs:
            header_list = [["#", "DBD", "Target Regions", None, "Non-target Regions", None, "Statistics",
                            "Target Regions", "Non-target Regions", None, "Statistics"],
                           ["", "", "with DBS", "without DBS", "with DBS (average)", "s.d.", "<i>p</i>-value",
                            "NO. DBSs", "NO. DBSs (average)", "s.d.", "<i>p</i>-value"]]
            header_titles = [["Rank", "DNA Binding Domain", "Given target regions on DNA", None,
                              "Regions from randomization", None, "Statistics based on target regions",
                              "Given target regions on DNA", "Regions from randomization", None,
                              "Statistics based on DNA Binding Sites"],
                             ["", "",
                              "Number of target regions with DBS binding",
                              "Number of target regions without DBS binding",
                              "Average number of regions from randomization with DBS binding",
                              "Standard deviation", "P value",
                              "Number of related DNA Binding Sites binding to target regions",
                              "Average number of DNA Binding Sites binding to random regions",
                              "Standard deviation", "P-value"]]
            border_list = [" style=\"border-right:1pt solid gray\"",
                           " style=\"border-right:1pt solid gray\"", "",
                           " style=\"border-right:1pt solid gray\"", "",
                           " style=\"border-right:1pt solid gray\"",
                           " style=\"border-right:2pt solid gray\"",
                           " style=\"border-right:1pt solid gray\"", "",
                           " style=\"border-right:1pt solid gray\"",
                           " style=\"border-right:1pt solid gray\""]
        else:
            header_list = [["#", "DBD", "Target Regions", None, "Non-target Regions", None, "Statistics", None],
                           ["", "", "with DBS", "without DBS", "with DBS (average)", "s.d.", "<i>p</i>-value",
                            "z-score"]]
            header_titles = [["Rank", "DNA Binding Domain", "Given target regions on DNA", None,
                              "Regions from randomization", None, "Statistics based on target regions", None],
                             ["", "",
                              "Number of target regions with DBS binding",
                              "Number of target regions without DBS binding",
                              "Average number of regions from randomization with DBS binding",
                              "Standard deviation", "P value", "Z-score"]]
            border_list = [" style=\"border-right:1pt solid gray\"",
                           " style=\"border-right:1pt solid gray\"", "",
                           " style=\"border-right:1pt solid gray\"", "",
                           " style=\"border-right:1pt solid gray\"",
                           " style=\"border-right:1pt solid gray\"", ""]

        type_list = 'ssssssssssssssss'
        col_size_list = [50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50]
        data_table = []

        for i, rbs in enumerate(self.rbss):
            if self.data["region"]["p"][i] < alpha:
                p_region = "<font color=\"red\">" + value2str(self.data["region"]["p"][i]) + "</font>"

            else:
                p_region = value2str(self.data["region"]["p"][i])
            zs = (self.counts_tr[rbs][0] - self.data["region"]["ave"][i]) / self.data["region"]["sd"][i]
            new_line = [str(i + 1),
                        rbs.str_rna(pa=False),
                        '<a href="dbd_region.html#' + rbs.str_rna() +
                        '" style="text-align:left">' + str(self.counts_tr[rbs][0]) + '</a>',
                        str(self.counts_tr[rbs][1]),
                        value2str(self.data["region"]["ave"][i]),
                        value2str(self.data["region"]["sd"][i]),
                        p_region,
                        value2str(zs)]
            if self.showdbs:
                if self.data["dbs"]["p"][i] < alpha:
                    p_dbs = "<font color=\"red\">" + value2str(self.data["dbs"]["p"][i]) + "</font>"
                else:
                    p_dbs = value2str(self.data["dbs"]["p"][i])

                new_line += [str(self.counts_dbs[rbs]),
                             value2str(self.data["dbs"]["ave"][i]),
                             value2str(self.data["dbs"]["sd"][i]),
                             p_dbs]
            data_table.append(new_line)

        data_table = natsort.natsorted(data_table, key=lambda x: x[6])
        html.add_zebra_table(header_list, col_size_list, type_list, data_table, align=align, cell_align="left",
                             auto_width=True, header_titles=header_titles, border_list=border_list, sortable=True)

        html.add_heading("Notes")
        html.add_list(["RNA name: " + self.rna_name,
                       "Randomization is performed for " + str(self.repeats) + " times.",
                       "DBD stands for DNA Binding Domain on RNA.",
                       "DBS stands for DNA Binding Site on DNA."])
        html.add_fixed_rank_sortable()
        html.write(os.path.join(directory, "index.html"))

        #############################################################
        # RNA subpage: Profile of targeted regions for each merged DNA Binding Domain
        #############################################################

        header_list = ["#", "Target Region",
                       "Associated Gene",
                       "No. of DBSs",
                       "DBS coverage"]
        header_titles = ["Rank", "Given target regions from BED files",
                         "Associated genes which is overlapping with the given region or close to it (less than 50000 bp)",
                         "Number of DNA Binding Sites locate within the region",
                         "The proportion of the region covered by DBS binding"]

        #########################################################
        # dbd_region.html
        html = Html(name=html_header, links_dict=link_ds,  # fig_dir=os.path.join(directory,"style"),
                    fig_rpath="../style", RGT_header=False, other_logo="TDF", homepage="../index.html")

        for rbsm in self.rbss:
            html.add_heading("DNA Binding Domain: " + rbsm.str_rna(),
                             idtag=rbsm.str_rna())
            data_table = []
            for i, region in enumerate(self.txp.merged_dict[rbsm]):
                # Add information
                data_table.append([str(i + 1),
                                   '<a href="http://genome.ucsc.edu/cgi-bin/hgTracks?db=' + self.organism +
                                   "&position=" + region.chrom + "%3A" + str(region.initial) + "-" + str(region.final) +
                                   '" style="text-align:left">' + region.toString(space=True) + '</a>',
                                   split_gene_name(gene_name=region.name, org=self.organism),
                                   str(len(self.region_dbs[region.toString()])),
                                   value2str(self.region_coverage[region.toString()])
                                   ])

            html.add_zebra_table(header_list, col_size_list, type_list, data_table, align=align, cell_align="left",
                                 auto_width=True, header_titles=header_titles, sortable=True)
        html.add_fixed_rank_sortable()
        html.write(os.path.join(directory, "dbd_region.html"))

        #############################################################
        # Targeted regions centered
        #############################################################

        ##############################################################################################
        # target_regions.html
        html = Html(name=html_header, links_dict=link_ds,  # fig_dir=os.path.join(directory,"style"),
                    fig_rpath="../style", RGT_header=False, other_logo="TDF", homepage="../index.html")

        if score:
            header_list = ["#", "Target region", "Associated Gene", "DBSs Count",
                           "DBS coverage", "Score", "Sum of ranks"]
            header_titles = ["Rank",
                             "Target regions loaded from the given BED file",
                             "Associated genes which is overlapping with the given region or close to it (less than 50000 bp)",
                             "Number of DNA Binding Sites within the region",
                             "The proportion of the region covered by DBS binding",
                             "Scores from BED file",
                             "Sum of all the left-hand-side ranks"]
        else:
            header_list = ["#", "Target region", "Associated Gene", "DBSs Count",
                           "DBS coverage", "Sum of ranks"]
            header_titles = ["Rank",
                             "Target regions loaded from the given BED file",
                             "Associated genes which is overlapping with the given region or close to it (less than 50000 bp)",
                             "Number of DNA Binding Sites within the region",
                             "The proportion of the region covered by DBS binding",
                             "Sum of all the left-hand-side ranks"]
        html.add_heading("Target Regions")
        data_table = []

        if not self.dna_region.sorted: self.dna_region.sort()

        # Calculate the ranking
        rank_count = len(self.dna_region) - rank_array([len(self.region_dbs[p.toString()]) for p in self.dna_region])
        rank_coverage = len(self.dna_region) - rank_array([self.region_coverage[p.toString()] for p in self.dna_region])

        if score:
            try:
                score_list = [float(p.data.split("\t")[0]) for p in self.dna_region]
                rank_score = len(self.dna_region) - rank_array([abs(s) for s in score_list])
                rank_sum = [x + y + z for x, y, z in zip(rank_count, rank_coverage, rank_score)]
                # sum_rank = rank_array(rank_sum)  # method='min'
            except ImportError:
                print("There is no score in BED file, please don't use '-score' argument.")
        else:
            rank_sum = [x + y for x, y in zip(rank_count, rank_coverage)]
            sum_rank = rank_array(rank_sum)

        for i, region in enumerate(self.dna_region):
            dbs_counts = str(len(self.region_dbs[region.toString()]))
            dbs_cover = value2str(self.region_coverage[region.toString()])

            newline = [str(i + 1),
                       '<a href="http://genome.ucsc.edu/cgi-bin/hgTracks?db=' + self.organism +
                       "&position=" + region.chrom + "%3A" + str(region.initial) + "-" + str(region.final) +
                       '" style="text-align:left">' + region.toString(space=True) + '</a>',
                       split_gene_name(gene_name=region.name, org=self.organism),
                       '<a href="region_dbs.html#' + region.toString() +
                       '" style="text-align:left">' + dbs_counts + '</a>',
                       dbs_cover]

            if score:
                dbs_score = value2str(score_list[i])
                region.data = "\t".join([dbs_counts, dbs_cover, dbs_score, str(rank_sum[i])])
                newline.append(dbs_score)
                newline.append(str(rank_sum[i]))
            else:
                region.data = "\t".join([dbs_counts, dbs_cover, str(rank_sum[i])])
                newline.append(str(rank_sum[i]))
            data_table.append(newline)

        data_table = natsort.natsorted(data_table, key=lambda x: x[-1])
        # data_table = sorted(data_table, key=lambda x: x[-1])
        html.add_zebra_table(header_list, col_size_list, type_list, data_table, align=align, cell_align="left",
                             auto_width=True, header_titles=header_titles, sortable=True)
        html.add_heading("Notes")
        html.add_list(["All target regions without any bindings are ignored."])
        html.add_fixed_rank_sortable()
        html.write(os.path.join(directory, "target_regions.html"))

        self.dna_region.sort_score()
        self.dna_region.write_bed(os.path.join(directory, obed + "_target_regions.bed"))



        ##############################################################################################
        # starget_regions.html    for significant target regions

        stargets = GenomicRegionSet("sig_targets")
        sig_dbs = {}
        sig_dbs_coverage = {}
        for i, r in enumerate(self.dna_region):
            sig_bindings = self.region_dbs[r.toString()].overlap_rbss(rbss=self.data["region"]["sig_region"])
            dbs = sig_bindings.get_dbs()
            if len(dbs) > 0:
                stargets.add(r)
                m_dbs = dbs.merge(w_return=True)
                sig_dbs[r] = len(dbs)
                # self.promoter["de"]["merged_dbs"][promoter.toString()] = len(m_dbs)
                sig_dbs_coverage[r] = float(m_dbs.total_coverage()) / len(r)

        html = Html(name=html_header, links_dict=link_ds,  # fig_dir=os.path.join(directory,"style"),
                    fig_rpath="../style", RGT_header=False, other_logo="TDF", homepage="../index.html")

        # Select promoters in sig DBD
        if len(self.data["region"]["sig_region"]) == 0:
            html.add_heading("There is no significant DBD.")
        else:
            html.add_heading("Target regions bound by significant DBD")
            data_table = []
            # Calculate the ranking
            rank_count = len(stargets) - rank_array([sig_dbs[p] for p in stargets])
            rank_coverage = len(stargets) - rank_array([sig_dbs_coverage[p] for p in stargets])
            if score:
                score_list = [float(p.data.split("\t")[0]) for p in stargets]
                rank_score = len(stargets) - rank_array([abs(s) for s in score_list])
                rank_sum = [x + y + z for x, y, z in zip(rank_count, rank_coverage, rank_score)]
                sum_rank = rank_array(rank_sum)  # method='min'
            else:
                rank_sum = [x + y for x, y in zip(rank_count, rank_coverage)]
                sum_rank = rank_array(rank_sum)

            for i, region in enumerate(stargets):
                dbssount = '<a href="region_dbs.html#' + region.toString() + \
                           '" style="text-align:left">' + str(sig_dbs[region]) + '</a>'

                region_link = region_link_internet(self.organism, region)

                newline = [str(i + 1), region_link,
                           split_gene_name(gene_name=region.name, org=self.organism),
                           dbssount, value2str(sig_dbs_coverage[region]) ]
                if score:
                    dbs_score = value2str(score_list[i])
                    # region.data = "\t".join([dbs_counts, dbs_cover, dbs_score, str(sum_rank[i])])
                    newline.append(dbs_score)
                    newline.append(str(rank_sum[i]))
                    # print([dbs_score, str(sum_rank[i])])
                else:
                    # region.data = "\t".join([dbs_counts, dbs_cover, str(sum_rank[i])])
                    newline.append(str(rank_sum[i]))

                # newline += ["<i>" + str(rank_sum[i]) + "</i>"]
                # print(newline)
                data_table.append(newline)

            # print(data_table)
            # data_table = sorted(data_table, key=lambda x: x[-1])
            data_table = natsort.natsorted(data_table, key=lambda x: x[-1])
            html.add_zebra_table(header_list, col_size_list, type_list, data_table, align=align, cell_align="left",
                                 header_titles=header_titles, border_list=None, sortable=True)
            html.add_heading("Notes")
            html.add_list(["DBS stands for DNA Binding Site on DNA.",
                           "DBS coverage is the proportion of the region where has potential to form triple helices with the given RNA."])
            html.add_fixed_rank_sortable()
            html.write(os.path.join(directory, "starget_regions.html"))

        ############################
        # Subpages for targeted region centered page
        # region_dbs.html
        header_list = ["RBS", "DBS", "Strand", "Score", "Motif", "Orientation"]

        html = Html(name=html_header, links_dict=link_ds,  # fig_dir=os.path.join(directory,"style"),
                    fig_rpath="../style", RGT_header=False, other_logo="TDF", homepage="../index.html")

        for i, region in enumerate(self.dna_region):
            if len(self.region_dbs[region.toString()]) == 0:
                continue
            else:
                html.add_heading("Associated gene: " + split_gene_name(gene_name=region.name, org=self.organism),
                                 idtag=region.toString())
                html.add_free_content(['<a href="http://genome.ucsc.edu/cgi-bin/hgTracks?db=' + self.organism +
                                       "&position=" + region.chrom + "%3A" + str(region.initial) +
                                       "-" + str(region.final) + '" style="margin-left:50">' +
                                       region.toString(space=True) + '</a>'])
                data_table = []
                for rd in self.region_dbs[region.toString()]:
                    rbs = rd.rna.str_rna(pa=False)
                    for rbsm in self.data["region"]["sig_region"]:
                        # rbsm = rbsm.partition(":")[2].split("-")
                        if rd.rna.overlap(rbsm):
                            rbs = "<font color=\"red\">" + rbs + "</font>"
                    data_table.append([rbs,
                                       '<a href="http://genome.ucsc.edu/cgi-bin/hgTracks?db=' + self.organism +
                                       "&position=" + rd.dna.chrom + "%3A" + str(rd.dna.initial) + "-" + str(
                                           rd.dna.final) +
                                       '" style="text-align:left">' + rd.dna.toString(space=True) + '</a>',
                                       rd.dna.orientation, rd.score, rd.motif, rd.orient])
                html.add_zebra_table(header_list, col_size_list, type_list, data_table, align=align, cell_align="left",
                                     auto_width=True)
        html.write(os.path.join(directory, "region_dbs.html"))

        ###############################################################################33
        ################ Parameters.html

        html = Html(name=html_header, links_dict=link_ds,  # fig_dir=os.path.join(directory,"style"),
                    fig_rpath="../style", RGT_header=False, other_logo="TDF", homepage="../index.html")
        html.add_heading("Parameters")
        header_list = ["Description", "Arguments", "Value"]

        data_table = [["RNA sequence name", "-rn", parameters.rn],
                      ["Input RNA sequence file", "-r", os.path.basename(parameters.r)],
                      ["Input BED file", "-bed", os.path.basename(parameters.bed)],
                      ["Output directory", "-o", os.path.basename(parameters.o)],
                      ["Organism", "-organism", parameters.organism],
                      ["Number of repitetion of andomization", "-n", str(parameters.n)],
                      ["Alpha level for rejection p value", "-a", str(parameters.a)],
                      ["Cut off value for filtering out the low counts of DBSs", "-ccf", str(parameters.ccf)],
                      ["Remove temporary files", "-rt", str(parameters.rt)],
                      ["Input BED file for masking in randomization", "-f", str(parameters.f)],
                      ["Input file for RNA accecibility", "-ac", str(parameters.ac)],
                      ["Cut off value for RNA accecibility", "-accf", str(parameters.accf)],
                      ["Output the BED files for DNA binding sites.", "-obed", str(parameters.obed)],
                      ["Show parallel and antiparallel bindings in the plot separately.", "-showpa",
                       str(parameters.showpa)],
                      ["Minimum length", "-l", str(self.triplexator_p[0])],
                      ["Maximum error rate", "-e", str(self.triplexator_p[1])],
                      ["Tolerated number of consecutive errors", "-c", str(self.triplexator_p[2])],
                      ["Filtering repeats", "-fr", str(self.triplexator_p[3])],
                      ["Filtering mode", "-fm", str(self.triplexator_p[4])],
                      ["Output format", "-of", str(self.triplexator_p[5])],
                      ["Merge features", "-mf", str(self.triplexator_p[6])]]
        html.add_zebra_table(header_list, col_size_list, type_list, data_table, align=align, cell_align="left",
                             auto_width=True)
        html.add_free_content(['<a href="summary.txt" style="margin-left:100">See details</a>'])
        html.write(os.path.join(directory, "parameters.html"))
Пример #12
0
    def gen_html(self, directory, title, align=50):
        dir_name = os.path.basename(directory)
        # check_dir(directory)
        html_header = title
        link_d = OrderedDict()
        link_d["Boxplot"] = "index.html"
        link_d["Parameters"] = "parameters.html"

        html = Html(name=html_header,
                    links_dict=link_d,
                    fig_rpath="../style",
                    RGT_header=False,
                    other_logo="viz",
                    homepage="../index.html")
        # fp = os.path.join(dir,outputname,title)

        html.add_figure("boxplot.png", align="center")

        type_list = 'ssssssssssssssssssssssssssssssssssssssssssssss'

        #### Calculate p value ####
        plist = {}
        for g in self.sortDict.keys():
            plist[g] = {}
            for s1 in self.sortDict[g].keys():
                for c1 in self.sortDict[g][s1].keys():
                    data1 = self.sortDict[g][s1][c1]
                    plist[g][s1 + c1] = {}
                    for s2 in self.sortDict[g].keys():
                        for c2 in self.sortDict[g][s2].keys():
                            if s2 == s1 and c2 == c1:
                                pass
                            else:
                                data2 = self.sortDict[g][s2][c2]
                                u, p_value = mannwhitneyu(data1, data2)
                                plist[g][s1 + c1][s2 + c2] = p_value

        print("Multiple test correction.")
        multiple_correction(plist)

        for g in self.sortDict.keys():
            html.add_heading(g, size=4, bold=False)
            data_table = []
            col_size_list = [15]
            header_list = ["p-value"]
            for s in self.sortDict[g].keys():
                for c in self.sortDict[g][s1].keys():
                    header_list.append(s + "\n" + c)
                    col_size_list.append(15)

            for s1 in self.sortDict[g].keys():
                for c1 in self.sortDict[g][s1].keys():
                    row = [s1 + "\n" + c1]
                    for s2 in self.sortDict[g].keys():
                        for c2 in self.sortDict[g][s2].keys():
                            if s2 == s1 and c2 == c1:
                                row.append("-")
                            else:
                                p = plist[g][s1 + c1][s2 + c2]
                                if p > 0.05:
                                    row.append(value2str(p))
                                else:
                                    row.append("<font color=\"red\">" +
                                               value2str(p) + "</font>")
                    data_table.append(row)

            html.add_zebra_table(header_list,
                                 col_size_list,
                                 type_list,
                                 data_table,
                                 align=align + 50)

        # html.add_fixed_rank_sortable()
        html.write(os.path.join(directory, title, "index.html"))

        ## Parameters
        html = Html(name=html_header,
                    links_dict=link_d,
                    fig_rpath="../style",
                    RGT_header=False,
                    other_logo="viz",
                    homepage="../index.html")

        header_list = ["Assumptions and hypothesis"]
        col_size_list = [50]
        data_table = [
            [
                'All the regions among different BED files are normalized by quantile normalization.'
            ],
            [
                'If there is any grouping problem, please check all the optional columns in input experimental matrix.'
            ]
        ]
        html.add_zebra_table(header_list,
                             col_size_list,
                             type_list,
                             data_table,
                             align=align,
                             cell_align="left")

        html.add_free_content([
            '<a href="parameters.txt" style="margin-left:100">See parameters</a>'
        ])
        html.add_free_content([
            '<a href="experimental_matrix.txt" style="margin-left:100">See experimental matrix</a>'
        ])
        html.write(os.path.join(directory, title, "parameters.html"))