示例#1
0
def df2html(df, name=None, dom='Brt', show_index=False, pageLength=15):
    """Simple wrapper to create HTML from dataframe

    If a columns ends in _links and a name_links exists, then the columns name 
    will be shown with the clickable name_links.
    """

    if name is None:
        name = uuid.uuid1().time_low
        # looks like datatable does not like ID made of numbers, even in string
        # so we convert to ABCDEFGH values
        name = "".join([chr(65 + int(x)) for x in str(name)])

    datatable = DataTable(df, name, index=show_index)
    datatable.datatable.datatable_options = {
        'pageLength': pageLength,
        'scrollCollapse': 'false',
        'dom': dom,
        'buttons': ['copy', 'csv']
    }

    # identify links (columns ending in _links)
    for column in df.columns:
        if column.endswith('_links'):
            prefix = column.replace('_links', '')
            if prefix in df.columns:
                datatable.datatable.set_links_to_column(column, prefix)

    js = datatable.create_javascript_function()
    html = datatable.create_datatable(float_format='%.6g')
    return js + html
示例#2
0
文件: fastqc.py 项目: sequana/sequana
    def add_main_section(self):
        links = glob.glob("{}".format(self.pattern))
        names = [filename.rsplit('/',1)[1].split('.html')[0] for filename in links]

        df = pd.DataFrame({
            "names": names,
            "links": [link.split(os.sep,1)[1] for link in links]
        })
        df.sort_values(by='names')

        datatable = DataTable(df, "fastqc", index=False)
        datatable.datatable.set_links_to_column("links", "names")

        datatable.datatable.datatable_options = {
            'scrollX': '300px',
            'pageLength': 15,
            'scrollCollapse': 'true',
            'dom': 'rtpB',
            "paging": "false",
            'buttons': ['copy', 'csv']}
        js = datatable.create_javascript_function()
        html_tab = datatable.create_datatable()

        html = "{} {}".format(html_tab, js)

        self.sections.append({
             "name": "FastQC report(s)",
             "anchor": "fastqc",
             "content": "<p> Here below are link(s) to original FastQC report. "
                        "Please click on one of the links to jump to the main "
                        "report.  {} </p>".format(html)
        })
示例#3
0
    def add_adapters_section(self):
        # Create a Table with adapters
        df = pd.DataFrame()
        df = pd.DataFrame({'Length': [], 'Trimmed':[], 'Type':[], 'Sequence': [], })

        for count, adapter in enumerate(self.data['adapters']):
            name = adapter['name']
            info = adapter['info']
            df.ix[name] = [info['Length'], info['Trimmed'],
                info['Type'], info['Sequence']]
        df.columns = ['Length', 'Trimmed', 'Type', 'Sequence']
        df['Trimmed'] = df.Trimmed.map(lambda x: int(x.replace("times.", "")))

        # df.to_json(self.sample_name + "/cutadapt/cutadapt_stats2.json")
        df.sort_values(by="Trimmed", ascending=False, inplace=True)

        datatable = DataTable(df, "adapters", index=True)
        datatable.datatable.datatable_options = {
            'scrollX': 'true',
            'pageLength': 15,
            'scrollCollapse': 'true',
            'dom': 'frtipB',
            'buttons': ['copy', 'csv']}
        js = datatable.create_javascript_function()
        html_tab = datatable.create_datatable(float_format='%.3g')
        self.jinja['adapters'] = ""
        self.sections.append({
            "name": "Adapters",
            "anchor": "adapters",
            "content": "<p>{} {}</p>".format(html_tab, js)
        })
示例#4
0
    def get_table_dependencies(self):
        """ Return dependencies of Sequana.
        """
        dep_list = easydev.get_dependencies('sequana')
        # if installed with conda, this will be empty
        if len(dep_list) == 0:
            return ""

        project_name = list()
        version = list()
        link = list()
        pypi = 'https://pypi.python.org/pypi/{0}'
        for dep in dep_list:
            version.append(dep.version)
            project_name.append(dep.project_name)
            link.append(pypi.format(dep.project_name))
        df = pd.DataFrame({'package': project_name, 'version': version,
                           'link': link})
        df['sort'] = df['package'].str.lower()
        df.sort_values(by='sort', axis=0, inplace=True)
        df.drop('sort', axis=1, inplace=True)
        datatable = DataTable(df, 'dep')
        datatable.datatable.datatable_options = {'paging': 'false',
                                                 'bFilter': 'false',
                                                 'bInfo': 'false',
                                                 'bSort': 'false'}
        datatable.datatable.set_links_to_column('link', 'package')
        js = datatable.create_javascript_function()
        html = datatable.create_datatable()
        return js + '\n' + html
示例#5
0
文件: phix.py 项目: sequana/sequana
    def _get_html_stats_section(self):
        df = self._get_stats()
        datatable = DataTable(df, "phix_stats", index=True)
        datatable.datatable.datatable_options = {
            'scrollX': '300px',
            'pageLength': 15,
            'scrollCollapse': 'true',
            'dom': 'tpB',
            "paging": "false",
            'buttons': ['copy', 'csv']}
        js = datatable.create_javascript_function()
        # Important that the columns of type integer are indeed in integer type
        # otherwise the %.3g herebelow would round integers. For instance 123456
        # would appear as 123000. The dtypes must be taken care in _get_stats()
        # method
        html_tab = datatable.create_datatable(float_format='%.3g')
        html = """<p>We mapped the raw reads on a reference (see config file).
The reads mapped are removed and the unmapped reads are kept for further
cleaning (adapter removal). Here below are some statistics about the mapped and unmapped reads.
</p><p>
The A, C, G, T, N columns report the percentage of each bases in the overall
sequences. The GC content column is in percentage. Finally, note that for paired
data, the number of reads in the mapped files (R1 and R2) may differ due to . However,
the unmapped reads must agree. </p>""" 
        html += "{} {}".format(html_tab, js)
        return html
示例#6
0
    def _get_html_stats(self):
        from sequana.tools import StatsBAM2Mapped
        from easydev import precision
        data = StatsBAM2Mapped(self.directory + "bwa_mem_stats.json").data
        html = "Reads with Phix: %s %%<br>" % precision(data['contamination'], 3)

        # add HTML table
        if "R2_mapped" in data.keys():
            df = pd.DataFrame({
              'R1': [data['R1_mapped'], data['R1_unmapped']],
              'R2': [data['R2_mapped'], data['R2_unmapped']]})
        else:
            df = pd.DataFrame({
              'R1': [data['R1_mapped'], data['R1_unmapped']]})
        df.index = ['mapped', 'unmapped']

        datatable = DataTable(df, "bwa_bam")
        datatable.datatable.datatable_options = {
             'scrollX': '300px',
             'pageLength': 15,
             'scrollCollapse': 'true',
             'dom': 'irtpB',
             "paging": "false",
             'buttons': ['copy', 'csv']}
        js = datatable.create_javascript_function()
        html_tab = datatable.create_datatable(float_format='%.3g')
        #html += "{} {}".format(html_tab, js)

        html += "Unpaired: %s <br>" % data['unpaired']
        html += "duplicated: %s <br>" % data['duplicated']
        return html
示例#7
0
    def _get_summary_section(self):

        df = self._get_stats()
        if len(df) == 1 and df.iloc[0]['taxon'] == -1:
            pngimage = sequana_data("no_data.jpg")
            extra = "<p> no reads could be identified with the given the database(s)."
        else:
            pngimage = self.directory + os.sep + "kraken.png"
            extra = """<p>The following <b>clickable image</b> is a simplified 
version (only genus are shown) of an interactive and more detailled version 
based on Krona. Finally, note that the unclassified species in the pie plot 
may correspond to species not present in the data base or adapters (if not 
removed).</p>"""

        html = """
    <p>Overview of the Taxonomic content of the filtered reads. </p>
    <p>The taxonomic analysis is performed with Kraken (see database name in 
the configuration file. The analysis is performed with a Kmer
approach.
The details about the database itself are available in the <a
href="http://sequana.readthedocs.io">Sequana documentation</a>.
The taxonomic analysis should give a good idea of the content of the FastQ
files but should be used as a sanity check. Indeed, species absent
from the database won't be detected leading to false detection (close species 
may be detected instead). 
Besides, be aware that closely related species may not be classified precisely.
</p>

    {0}
    <div style="text-align:center"><a href="./{1}/kraken.html"> {2} </a></div>
    <br>
""".format(extra,
           self.directory.split(os.sep, 1)[1],
           self.png_to_embedded_png(pngimage))

        datatable = DataTable(df, "kraken", index=False)
        # add links
        if "ena" in df.columns:
            urlena = "http://www.ebi.ac.uk/ena/data/view/"
            datatable.datatable.set_links_to_column(
                "ena", [urlena + this for this in df['ena']])
        datatable.datatable.datatable_options = {
            'scrollX': '300px',
            'pageLength': 30,
            'scrollCollapse': 'true',
            'dom': 'irtpB',
            "paging": "false",
            "order": [[2, "desc"]],
            'buttons': ['copy', 'csv']
        }
        js = datatable.create_javascript_function()
        html_tab = datatable.create_datatable(float_format='%.3g')

        html += "{} {}".format(html_tab, js)
        """# Rounding and convert in string to avoid exp notation
        df['percentage']  = df['percentage'].apply(lambda x: str(round(x,4)))
        #self.jinja['kraken_json'] = df.to_json()"""

        return html
示例#8
0
    def _get_stats_section(self, tablename="stats"):
        self.df_stats = self.get_stats()
        filenames, mode = self._get_files("*boxplot.png")

        datatable = DataTable(self.df_stats, tablename, index=True)
        datatable.datatable.datatable_options = {
            'scrollX': '300px',
            'pageLength': 15,
            'scrollCollapse': 'true',
            'dom': 'rtpB',
            "paging": "false",
            'buttons': ['copy', 'csv']
        }
        js = datatable.create_javascript_function()
        html_tab = datatable.create_datatable(float_format='%.3g')

        html = """<p>The following table gives some basic statistics about the data before any filtering.
   The A, C, G, T, N columns report the percentage of each bases in the overall sequences.
   The GC content is provided in percentage as well. </p>
   <div>{} {}</div>
   <div>""".format(html_tab, js)

        html += """
   <p>The following figure(s) gives the average quality (red line) of raw reads
   (500,000 at max). The x-axis being the length of the reads. The yellow
   enveloppe gives the variation of the quality (1 standard deviation).</p>
   <p> Click on the image to jump to a full FastQC report.</p>"""

        if len(filenames) == 2: width = "49"
        else: width = "65"

        filename = os.path.split(filenames[0])[1].replace(
            "_boxplot.png", "_fastqc.html")
        href = self.path_to_fastqc + os.sep + filename
        html += """
   <figure style="float:left; width:{}%; padding:0px; margin:0px;">
       <a href="{}">{}</a>
   <figcaption style="font-style:italic">Fig1: R1 reads</figcaption>
   </figure>""".format(width, href, self.png_to_embedded_png(filenames[0]))

        if len(filenames) == 2:
            filename = os.path.split(filenames[1])[1].replace(
                "_boxplot.png", "_fastqc.html")
            href = self.path_to_fastqc + os.sep + filename
            html += """
   <figure style="float:right; width:{}%; padding:0px; margin:0px;">
       <a href="{}">{}</a>
   <figcaption style="font-style:italic">Fig2: R2 reads</figcaption>
   </figure>""".format(width, href, self.png_to_embedded_png(filenames[1]))

        return html
示例#9
0
    def _get_stats_section(self, tablename="stats"):
        self.df_stats = self.get_stats()
        filenames, mode = self._get_files("*boxplot.png")

        datatable = DataTable(self.df_stats, tablename, index=True)
        datatable.datatable.datatable_options = {
            'scrollX': '300px',
            'pageLength': 15,
            'scrollCollapse': 'true',
            'dom': 'rtpB',
            "paging": "false",
            'buttons': ['copy', 'csv']}
        js = datatable.create_javascript_function()
        html_tab = datatable.create_datatable(float_format='%.3g')

        html = """<p>The following table gives some basic statistics about the data before any filtering.
   The A, C, G, T, N columns report the percentage of each bases in the overall sequences.
   The GC content is provided in percentage as well. </p>
   <div>{} {}</div>
   <div>""".format(html_tab, js)

        html += """
   <p>The following figure(s) gives the average quality (red line) of raw reads
   (500,000 at max). The x-axis being the length of the reads. The yellow
   enveloppe gives the variation of the quality (1 standard deviation).</p>
   <p> Click on the image to jump to a full FastQC report.</p>"""

        if len(filenames)==2: width="49"
        else: width="65"

        filename = os.path.split(filenames[0])[1].replace("_boxplot.png", "_fastqc.html")
        href = self.path_to_fastqc + os.sep + filename
        html += """
   <figure style="float:left; width:{}%; padding:0px; margin:0px;">
       <a href="{}">{}</a>
   <figcaption style="font-style:italic">Fig1: R1 reads</figcaption>
   </figure>""".format(width, href, self.png_to_embedded_png(filenames[0]))

        if len(filenames) == 2:
            filename = os.path.split(filenames[1])[1].replace("_boxplot.png", "_fastqc.html")
            href = self.path_to_fastqc + os.sep + filename
            html += """
   <figure style="float:right; width:{}%; padding:0px; margin:0px;">
       <a href="{}">{}</a>
   <figcaption style="font-style:italic">Fig2: R2 reads</figcaption>
   </figure>""".format(width, href, self.png_to_embedded_png(filenames[1]))


        return html
示例#10
0
 def create_datatable(self):
     """ Variants detected section.
     """
     datatable = DataTable(self.vcf.df, 'jc')
     datatable.datatable.datatable_options = {
             'scrollX': 'true',
             'pageLength': 15,
             'scrollCollapse': 'true',
             'dom': 'Bfrtip',
             'buttons': ['copy', 'csv']
     }
     for i, s in enumerate(self.vcf.vcf.samples):
         datatable.datatable.set_tooltips_to_column('info_{0}'.format(i), s)
     options = datatable.datatable._create_datatable_option()
     html_tab = datatable._create_hidden_csv(float_format='%.3f')
     html_tab += datatable._create_html_table(style='width: 100%;')
     return html_tab, options
示例#11
0
 def _get_stat_section(self):
     datatable = DataTable(self._get_stats(), "cutadapt", index=True)
     datatable.datatable.datatable_options = {
         'scrollX': '300px',
         'pageLength': 15,
         'scrollCollapse': 'true',
         'dom': 'rtpB',
         "paging": "false",
         'buttons': ['copy', 'csv']}
     js = datatable.create_javascript_function()
     html_tab = datatable.create_datatable(float_format='%.3g')
     #csv_link = self.create_link('link', self.filename)
     #vcf_link = self.create_link('here', 'test.vcf')
     html = "Reads statistics after trimming and adapter removal. The " +\
            "A, C, G, T, N columns report the percentage of each bases in " +\
            "the overall sequences"
     html += "<p>{} {}</p>".format(html_tab, js)
     return html
示例#12
0
def test_datatables():
        bed = bedtools.GenomeCov(sequana_data("JB409847.bed"),
                                 sequana_data("JB409847.gbk"))
        fasta = sequana_data("JB409847.fasta")
        bed.compute_gc_content(fasta)

        c = bed.chr_list[0]
        c.run(4001)
        rois = c.get_rois()
        rois.df['link'] = 'test'
        datatable_js = DataTableFunction(rois.df, 'roi')
        datatable_js.set_links_to_column('link', 'start')
        datatable_js.datatable_options = {'scrollX': 'true',
                                          'pageLength': 15,
                                          'scrollCollapse' : 'true',
                                          'dom': 'Bfrtip',
                                          'buttons': ['copy', 'csv']}
        datatable = DataTable(rois.df, 'rois', datatable_js)
        html_table = datatable.create_datatable(float_format='%.3g')
示例#13
0
def test_datatables():
    bed = bedtools.GenomeCov(sequana_data("JB409847.bed"),
                             sequana_data("JB409847.gbk"))
    fasta = sequana_data("JB409847.fasta")
    bed.compute_gc_content(fasta)

    c = bed.chr_list[0]
    c.run(4001)
    rois = c.get_rois()
    rois.df['link'] = 'test'
    datatable_js = DataTableFunction(rois.df, 'roi')
    datatable_js.set_links_to_column('link', 'start')
    datatable_js.datatable_options = {
        'scrollX': 'true',
        'pageLength': 15,
        'scrollCollapse': 'true',
        'dom': 'Bfrtip',
        'buttons': ['copy', 'csv']
    }
    datatable = DataTable(rois.df, 'rois', datatable_js)
    html_table = datatable.create_datatable(float_format='%.3g')
示例#14
0
文件: bamqc.py 项目: sequana/sequana
    def add_flag_section(self):
        data = self._computation()
        df = data['flags']

        datatable = DataTable(df, "flags", index=True)
        datatable.datatable.datatable_options = {
            'scrollX': '300px',
            'pageLength': 15,
            'scrollCollapse': 'true',
            'dom': 'tB',
            "paging": "false",
            'buttons': ['copy', 'csv']}
        js = datatable.create_javascript_function()
        html_tab = datatable.create_datatable(float_format='%.3g')

        html = ""
        html += "{} {}".format(html_tab, js)

        self.sections.append({
          "name": "Flags information",
          "anchor": "flags",
          "content": html
        })
示例#15
0
    def add_flag_section(self):
        data = self._computation()
        df = data['flags']

        datatable = DataTable(df, "flags", index=True)
        datatable.datatable.datatable_options = {
            'scrollX': '300px',
            'pageLength': 15,
            'scrollCollapse': 'true',
            'dom': 'tB',
            "paging": "false",
            'buttons': ['copy', 'csv']
        }
        js = datatable.create_javascript_function()
        html_tab = datatable.create_datatable(float_format='%.3g')

        html = ""
        html += "{} {}".format(html_tab, js)

        self.sections.append({
            "name": "Flags information",
            "anchor": "flags",
            "content": html
        })
示例#16
0
    def add_table(self):
        df = self.summary.copy()
        df.columns = ['data']
        df['url'] = ['http://sequana.readthedocs.org'] * len(df)

        table = DataTable(df, "table", index=True)
        table.datatable.datatable_options = {
            'scrollX': '300px',
            'pageLength': 15,
            'scrollCollapse': 'true',
            'dom': 'tB',
            "paging": "false",
            'buttons': ['copy', 'csv']}
        table.datatable.set_links_to_column('url', 'data')

        js = table.create_javascript_function()
        html_tab = table.create_datatable(float_format='%.3g')
        html = "{} {}".format(html_tab, js)

        self.sections.append({
          "name": "Table",
          "anchor": "table",
          "content": html
        })
示例#17
0
    def add_stats(self):
        df = pd.Series(self.summary['read_stats']).to_frame().T
        df.index = ['read length stats']
        table = DataTable(df, "table", index=True)
        table.datatable.datatable_options = {
            'scrollX': '300px',
            'pageLength': 15,
            'scrollCollapse': 'true',
            'dom': 't',
            "paging": "false",
            'buttons': ['copy', 'csv']
            }
        js = table.create_javascript_function()
        # IMPORTANT: here conversion to integer with %d
        # to round and make integer. !! The GC is therefore
        # converted to integer as well.
        html_tab = table.create_datatable(float_format='%d')
        html = "{} {}".format(html_tab, js)

        self.sections.append({
          "name": "Basic stats on read length",
          "anchor": "table",
          "content": html
        })