Exemplo n.º 1
0
    def __repr__(self):
        ''' Return a string representation for a particular DataFrame '''
        buf = six.StringIO('')

        # Calling a private DataFrame method here, so protect it.
        if getattr(self, '_info_repr', lambda: False)():
            self.info(buf=buf)
            return buf.getvalue()

        if self.label:
            buf.write('%s\n\n' % self.label)

        kwargs = {}
        try:
            kwargs['min_rows'] = pd.get_option('display.min_rows')
        except Exception:  # noqa: E722
            pass
        max_rows = pd.get_option('display.max_rows')
        max_cols = pd.get_option('display.max_columns')
        show_dimensions = pd.get_option('display.show_dimensions')
        if pd.get_option('display.expand_frame_repr'):
            width, _ = get_console_size()
        else:
            width = None
        if not hasattr(pdfmt, 'DataFrameRenderer'):
            kwargs['line_width'] = width

        if get_option('display.apply_formats'):
            kwargs['formatters'] = self._get_formatters()
            if 'na_rep' not in kwargs:
                kwargs['na_rep'] = '.'

        formatter = pdfmt.DataFrameFormatter(self,
                                             max_rows=max_rows,
                                             max_cols=max_cols,
                                             show_dimensions=show_dimensions,
                                             **kwargs)
        # NOTE: Patch for bug in pandas DataFrameFormatter when using
        #       formatters on a DataFrame that is truncated in the console.
        formatter.columns = formatter.tr_frame.columns

        # pandas 1.2.0 uses a renderer instead of just a formatter
        if hasattr(pdfmt, 'DataFrameRenderer'):
            formatter = pdfmt.DataFrameRenderer(formatter)
            txt = formatter.to_string(line_width=width)
        else:
            txt = formatter.to_string()

        if txt is None:
            if getattr(formatter, 'buf', None) is not None:
                buf.write(formatter.buf.getvalue())
        else:
            buf.write(txt)

        return buf.getvalue()
Exemplo n.º 2
0
def patchPandasHTMLrepr(self, **kwargs):
  '''
  Patched default escaping of HTML control characters to allow molecule image rendering dataframes
  '''
  formatter = fmt.DataFrameFormatter(
    self, buf=None, columns=None, col_space=None, colSpace=None, header=True, index=True,
    na_rep='NaN', formatters=None, float_format=None, sparsify=None, index_names=True, justify=None,
    force_unicode=None, bold_rows=True, classes=None, escape=False)
  formatter.to_html()
  html = formatter.buf.getvalue()
  return html
Exemplo n.º 3
0
    def to_string(self, apply_formats=None, **kwargs):
        '''
        Return a string representation of a DataFrame

        Parameters
        ----------
        apply_formats : bool or None, optional
            Should SAS formats be applied to the data values in the
            rendered output?  If None, the `display.apply_formats`
            option value will be used.
        **kwargs : keyword-parameters, optional
            All keyword parameters for the `pandas.DataFrame.to_string`
            method are accepted here as well.

        See Also
        --------
        :meth:`pandas.DataFrame.to_string`

        Returns
        -------
        string

        '''
        buf = six.StringIO('')

        if self.label:
            buf.write('%s\n\n' % self.label)

        formatters = kwargs.get('formatters', None)
        if apply_formats or (apply_formats is None
                             and get_option('display.apply_formats')):
            kwargs['formatters'] = self._get_formatters(formatters)
            if 'na_rep' not in kwargs:
                kwargs['na_rep'] = '.'

        formatter = pdfmt.DataFrameFormatter(self, **kwargs)
        # NOTE: Patch for bug in pandas DataFrameFormatter when using
        #       formatters on a DataFrame that is truncated in the console.
        formatter.columns = formatter.tr_frame.columns

        # pandas 1.2.0 uses a renderer instead of just a formatter
        if hasattr(pdfmt, 'DataFrameRenderer'):
            formatter = pdfmt.DataFrameRenderer(formatter)

        txt = formatter.to_string()
        if txt is None:
            if getattr(formatter, 'buf', None) is not None:
                buf.write(formatter.buf.getvalue())
        else:
            buf.write(txt)

        return buf.getvalue()
Exemplo n.º 4
0
    def _repr_html_(self):
        ''' Return a html representation for a particular DataFrame  '''

        # Calling a private DataFrame method here, so protect it.
        if getattr(self, '_info_repr', lambda: False)():
            buf = six.StringIO('')
            self.info(buf=buf)
            # need to escape the <class>, should be the first line.
            val = buf.getvalue().replace('<', r'&lt;', 1)
            val = val.replace('>', r'&gt;', 1)
            return '<pre>' + val + '</pre>'

        kwargs = {}
        if get_option('display.apply_formats'):
            kwargs['formatters'] = self._get_formatters()
            kwargs['na_rep'] = '.'

        if pd.get_option('display.notebook_repr_html'):
            try:
                kwargs['min_rows'] = pd.get_option('display.min_rows')
            except:  # noqa: E722
                pass
            max_rows = pd.get_option('display.max_rows')
            max_cols = pd.get_option('display.max_columns')
            show_dimensions = pd.get_option('display.show_dimensions')

            formatter = pdfmt.DataFrameFormatter(
                self,
                max_rows=max_rows,
                max_cols=max_cols,
                show_dimensions=show_dimensions,
                **kwargs)
            # NOTE: Patch for bug in pandas DataFrameFormatter when using
            #       formatters on a DataFrame that is truncated in the console.
            formatter.columns = formatter.tr_frame.columns

            # pandas 1.2.0 uses a renderer instead of just a formatter
            if hasattr(pdfmt, 'DataFrameRenderer'):
                formatter = pdfmt.DataFrameRenderer(formatter)

            html = formatter.to_html(**notebook_opts)
            if html is None:
                if getattr(formatter, 'buf', None) is not None:
                    html = formatter.buf.getvalue()
                else:
                    return None
            return self._post_process_html(html)

        return None
# out_topics_html_dir = outdir + f'topics-central-docs-abstracts-{datafile_date}-html/'
out_topics_html_dir = web_out_dir
os.makedirs(out_topics_html_dir, mode=out_path_mode, exist_ok=True)
for c in predominant_doc_dfd.keys():
    for n in predominant_doc_dfd[c].keys():
        ofdir = out_topics_html_dir + f'{c}-{n}/'
        os.makedirs(ofdir, mode=out_path_mode, exist_ok=True)
        print(ofdir)
        for i in predominant_doc_dfd[c][n].keys():
            ofname = ofdir + f'Topic_{i+1:02d}.html'
            with open(ofname, 'w') as ofp:
                html_df = (predominant_doc_dfd[c][n][i].drop(columns=[
                    'sha', 'major topics', 'abstract clean',
                    'predominant topic', 'predominant topic num'
                ] + [' '.join(c.split('_'))
                     for c in sims_columns]).copy().set_index(
                         np.arange(1,
                                   len(predominant_doc_dfd[c][n][i]) + 1)))
                # html_table = html_df.to_html(escape=False)
                df_formatter = fmt.DataFrameFormatter(escape=False,
                                                      frame=html_df,
                                                      index=True,
                                                      bold_rows=True)
                html_formatter = MyHTMLFormatter('cord uid',
                                                 formatter=df_formatter)
                # html_formatter = HTMLFormatter(formatter=df_formatter)
                html_table = html_formatter.get_result()
                html_str = html_template.format(f'Topic {i+1:02d}', html_style,
                                                html_table)
                ofp.write(html_str)