Python generate_html_associations示例

编程语言: Python

命名空间/包名称: sweetviz.sv_html

方法/功能: generate_html_associations

hotexamples.com的示例: 6

Python generate_html_associations - 已找到6个示例。这些是从开源项目中提取的最受好评的sweetviz.sv_html.generate_html_associations现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

    def show_html(self, filepath='SWEETVIZ_REPORT.html', open_browser=True, layout='widescreen', scale=None):
        scale = float(self.use_config_if_none(scale, "html_scale"))
        layout = self.use_config_if_none(layout, "html_layout")
        if layout not in ['widescreen', 'vertical']:
            raise ValueError(f"'layout' parameter must be either 'widescreen' or 'vertical'")
        sv_html.load_layout_globals_from_config()
        self.page_layout = layout
        self.scale = scale
        sv_html.set_summary_positions(self)
        sv_html.generate_html_detail(self)
        if self.associations_html_source:
            self.associations_html_source = sv_html.generate_html_associations(self, "source")
        if self.associations_html_compare:
            self.associations_html_compare = sv_html.generate_html_associations(self, "compare")
        self._page_html = sv_html.generate_html_dataframe_page(self)

        f = open(filepath, 'w', encoding="utf-8")
        f.write(self._page_html)
        f.close()
        if open_browser:
            print(f"Report {filepath} was generated! NOTEBOOK/COLAB USERS: the web browser MAY not pop up, regardless, the report IS saved in your notebook/colab files.")
            # Not sure how to work around this: not fatal but annoying...Notebook/colab
            # https://bugs.python.org/issue5993
            webbrowser.open('file://' + os.path.realpath(filepath))
        else:
            print(f"Report {filepath} was generated.")

示例#2

显示文件

    def show_notebook(self,
                      w=None,
                      h=None,
                      scale=None,
                      layout=None,
                      filepath=None):
        w = self.use_config_if_none(w, "notebook_width")
        h = self.use_config_if_none(h, "notebook_height")
        scale = float(self.use_config_if_none(scale, "notebook_scale"))
        layout = self.use_config_if_none(layout, "notebook_layout")
        if layout not in ['widescreen', 'vertical']:
            raise ValueError(
                f"'layout' parameter must be either 'widescreen' or 'vertical'"
            )

        sv_html.load_layout_globals_from_config()
        self.page_layout = layout
        self.scale = scale
        sv_html.set_summary_positions(self)
        sv_html.generate_html_detail(self)
        if self.associations_html_source:
            self.associations_html_source = sv_html.generate_html_associations(
                self, "source")
        if self.associations_html_compare:
            self.associations_html_compare = sv_html.generate_html_associations(
                self, "compare")
        self._page_html = sv_html.generate_html_dataframe_page(self)

        width = w
        height = h
        if str(height).lower() == "full":
            height = self.page_height

        # Output to iFrame
        import html
        self._page_html = html.escape(self._page_html)
        iframe = f' <iframe width="{width}" height="{height}" srcdoc="{self._page_html}" frameborder="0" allowfullscreen></iframe>'
        from IPython.core.display import display
        from IPython.core.display import HTML
        display(HTML(iframe))

        if filepath is not None:
            f = open(filepath, 'w', encoding="utf-8")
            f.write(self._page_html)
            f.close()
            print(f"Report '{filepath}' was saved to storage.")

        if len(self.corr_warning):
            print(
                "WARNING: one or more correlations had an edge-case/error and a 1.0 correlation was assigned\n"
                "(likely due to only a single row containing non-NaN values for both correlated features)\n"
                "Affected correlations:" + str(self.corr_warning))

        # Auto-log to comet_ml if desired & present
        self._comet_ml_logger = comet_ml_logger.CometLogger()
        if self._comet_ml_logger._logging:
            self.generate_comet_friendly_html()
            self._comet_ml_logger.log_html(self._page_html)
            self._comet_ml_logger.end()

示例#3

显示文件

 def generate_comet_friendly_html(self):
     # Enforce comet_ml-friendly layout and re-output report based on INI settings (comet_ml_Defaults)
     self.page_layout = config["comet_ml_defaults"]["html_layout"]
     self.scale = float(config["comet_ml_defaults"]["html_scale"])
     sv_html.set_summary_positions(self)
     sv_html.generate_html_detail(self)
     if self.associations_html_source:
         self.associations_html_source = sv_html.generate_html_associations(
             self, "source")
     if self.associations_html_compare:
         self.associations_html_compare = sv_html.generate_html_associations(
             self, "compare")
     self._page_html = sv_html.generate_html_dataframe_page(self)

示例#4

显示文件

    def show_html(self,
                  filepath='SWEETVIZ_REPORT.html',
                  open_browser=True,
                  layout='widescreen',
                  scale=None):
        scale = float(self.use_config_if_none(scale, "html_scale"))
        layout = self.use_config_if_none(layout, "html_layout")
        if layout not in ['widescreen', 'vertical']:
            raise ValueError(
                f"'layout' parameter must be either 'widescreen' or 'vertical'"
            )
        sv_html.load_layout_globals_from_config()
        self.page_layout = layout
        self.scale = scale
        sv_html.set_summary_positions(self)
        sv_html.generate_html_detail(self)
        if self.associations_html_source:
            self.associations_html_source = sv_html.generate_html_associations(
                self, "source")
        if self.associations_html_compare:
            self.associations_html_compare = sv_html.generate_html_associations(
                self, "compare")
        self._page_html = sv_html.generate_html_dataframe_page(self)

        f = open(filepath, 'w', encoding="utf-8")
        f.write(self._page_html)
        f.close()
        if open_browser:
            print(
                f"Report {filepath} was generated! NOTEBOOK/COLAB USERS: the web browser MAY not pop up, regardless, the report IS saved in your notebook/colab files."
            )
            # Not sure how to work around this: not fatal but annoying...Notebook/colab
            # https://bugs.python.org/issue5993
            webbrowser.open('file://' + os.path.realpath(filepath))
        else:
            print(f"Report {filepath} was generated.")
        if len(self.corr_warning):
            print(
                "---\nWARNING: one or more correlations had an edge-case/error and a 1.0 correlation was assigned\n"
                "(likely due to only a single row containing non-NaN values for both correlated features)\n"
                "Affected correlations:" + str(self.corr_warning))

        # Auto-log to comet_ml if desired & present
        self._comet_ml_logger = comet_ml_logger.CometLogger()
        if self._comet_ml_logger._logging:
            self.generate_comet_friendly_html()
            self._comet_ml_logger.log_html(self._page_html)
            self._comet_ml_logger.end()

示例#5

显示文件

文件： dataframe_report.py 项目： lqgblozs/sweetviz

    def show_notebook(self,
                      w=None,
                      h=None,
                      scale=None,
                      layout='widescreen',
                      filepath=None):
        w = self.use_config_if_none(w, "notebook_width")
        h = self.use_config_if_none(h, "notebook_height")
        scale = float(self.use_config_if_none(scale, "notebook_scale"))
        layout = self.use_config_if_none(layout, "notebook_layout")
        if layout not in ['widescreen', 'vertical']:
            raise ValueError(
                f"'layout' parameter must be either 'widescreen' or 'vertical'"
            )

        sv_html.load_layout_globals_from_config()
        self.page_layout = layout
        self.scale = scale
        sv_html.set_summary_positions(self)
        sv_html.generate_html_detail(self)
        if self.associations_html_source:
            self.associations_html_source = sv_html.generate_html_associations(
                self, "source")
        if self.associations_html_compare:
            self.associations_html_compare = sv_html.generate_html_associations(
                self, "compare")
        self._page_html = sv_html.generate_html_dataframe_page(self)

        width = w
        height = h
        if str(height).lower() == "full":
            height = self.page_height

        # Output to iFrame
        import html
        self._page_html = html.escape(self._page_html)
        iframe = f' <iframe width="{width}" height="{height}" srcdoc="{self._page_html}" frameborder="0" allowfullscreen></iframe>'
        from IPython.core.display import display
        from IPython.core.display import HTML
        display(HTML(iframe))

        if filepath is not None:
            f = open(filepath, 'w', encoding="utf-8")
            f.write(self._page_html)
            f.close()
            print(f"Report '{filepath}' was saved to storage.")

示例#6

显示文件

    def __init__(self,
                 source: Union[pd.DataFrame, Tuple[pd.DataFrame, str]],
                 target_feature_name: str = None,
                 compare: Union[pd.DataFrame, Tuple[pd.DataFrame, str]] = None,
                 pairwise_analysis: str = 'auto',
                 fc: FeatureConfig = None):
        pairwise_analysis = pairwise_analysis.lower()
        if pairwise_analysis not in ["on", "auto", "off"]:
            raise ValueError(
                '"pairwise_analysis" parameter should be one of: "on", "auto", "off"'
            )

        sv_html.load_layout_globals_from_config()

        self._jupyter_html = ""
        self._page_html = ""
        self._features = dict()
        self.compare_name = None
        self._target = None
        self.test_mode = False
        if fc is None:
            fc = FeatureConfig()

        # Associations: _associations[FEATURE][GIVES INFORMATION ABOUT THIS FEATURE]
        self._associations = dict()
        self._associations_compare = dict()
        self._association_graphs = dict()
        self._association_graphs_compare = dict()

        # Handle source and compare dataframes and names
        if type(source) == pd.DataFrame:
            source_df = source
            self.source_name = "DataFrame"
        elif type(source) == list:
            if len(source) != 2:
                raise ValueError(
                    '"source" parameter should either be a string or a list of 2 elements: [dataframe, "Name"].'
                )
            source_df = source[0]
            self.source_name = source[1]
        else:
            raise ValueError(
                '"source" parameter should either be a string or a list of 2 elements: [dataframe, "Name"].'
            )
        if len(su.get_duplicate_cols(source_df)) > 0:
            raise ValueError(
                'Duplicate column names detected in "source"; this is not supported.'
            )

        all_source_names = [
            cur_name for cur_name, cur_series in source_df.iteritems()
        ]
        if compare is None:
            compare_df = None
            self.compare_name = None
            all_compare_names = list()
        elif type(compare) == pd.DataFrame:
            compare_df = compare
            self.compare_name = "Compared"
            all_compare_names = [
                cur_name for cur_name, cur_series in compare_df.iteritems()
            ]
        elif type(compare) == list:
            if len(compare) != 2:
                raise ValueError(
                    '"compare" parameter should either be a string or a list of 2 elements: [dataframe, "Name"].'
                )
            compare_df = compare[0]
            self.compare_name = compare[1]
            all_compare_names = [
                cur_name for cur_name, cur_series in compare_df.iteritems()
            ]
        else:
            raise ValueError(
                '"compare" parameter should either be a string or a list of 2 elements: [dataframe, "Name"].'
            )

        # Validate some params
        if compare_df is not None and len(
                su.get_duplicate_cols(compare_df)) > 0:
            raise ValueError(
                'Duplicate column names detected in "compare"; this is not supported.'
            )

        if target_feature_name in fc.skip:
            raise ValueError(
                f'"{target_feature_name}" was also specified as "skip". Target cannot be skipped.'
            )

        for key in fc.get_all_mentioned_features():
            if key not in all_source_names:
                raise ValueError(
                    f'"{key}" was specified in "feature_config" but is not found in source dataframe (watch case-sensitivity?).'
                )

        # Find Features and Target (FILTER SKIPPED)
        filtered_series_names_in_source = [
            cur_name for cur_name, cur_series in source_df.iteritems()
            if cur_name not in fc.skip
        ]
        for skipped in fc.skip:
            if skipped not in all_source_names and skipped not in all_compare_names:
                raise ValueError(
                    f'"{skipped}" was marked as "skip" but is not in any provided dataframe (watch case-sensitivity?).'
                )

        # Progress bar setup
        ratio_progress_of_df_summary_vs_feature = 1.0
        number_features = len(filtered_series_names_in_source)
        exponential_checks = number_features * number_features
        progress_chunks = ratio_progress_of_df_summary_vs_feature \
                            + number_features + (0 if target_feature_name is not None else 0)

        self.progress_bar = tqdm(total=progress_chunks, bar_format= \
                '{desc:35}|{bar}| [{percentage:3.0f}%]   {elapsed}  -> ({remaining} left)', \
                ascii=False, ncols=90)

        # Summarize dataframe
        self.progress_bar.set_description("Summarizing dataframe")
        self.summary_source = dict()
        self.summarize_dataframe(source_df, self.source_name,
                                 self.summary_source, fc.skip)
        if target_feature_name:
            self.summary_source[
                "num_columns"] = self.summary_source["num_columns"] - 1
        if compare_df is not None:
            self.summary_compare = dict()
            self.summarize_dataframe(compare_df, self.compare_name,
                                     self.summary_compare, fc.skip)
            if target_feature_name:
                if target_feature_name in compare_df.columns:
                    self.summary_compare["num_columns"] = self.summary_compare[
                        "num_columns"] - 1
        else:
            self.summary_compare = None
        self.progress_bar.update(ratio_progress_of_df_summary_vs_feature)

        self.num_summaries = number_features

        # Association check
        if pairwise_analysis == 'auto' and \
                number_features > config["Processing"].getint("association_auto_threshold"):
            print(
                f"PAIRWISE CALCULATION LENGTH WARNING: There are {number_features} features in "
                f"this dataframe and the "
                f"'pairwise_analysis' parameter is set to 'auto'.\nPairwise analysis is exponential in "
                f"length: {number_features} features will cause ~"
                f"{number_features * number_features} pairs to be "
                f"evaluated, which could take a long time.\n\nYou must call the function with the "
                f"parameter pairwise_analysis='on' or 'off' to explicitly select desired behavior."
            )
            self.progress_bar.close()
            return

        # Validate and process TARGETT
        target_to_process = None
        target_type = None
        if target_feature_name:
            self.progress_bar.set_description(":TARGET:")
            targets_found = [
                item for item in filtered_series_names_in_source
                if item == target_feature_name
            ]
            if len(targets_found) == 0:
                raise KeyError(f"Feature '{target_feature_name}' was "
                               f"specified as TARGET, but is NOT FOUND in "
                               f"the dataframe (watch case-sensitivity?).")
            compare_target_series = None
            if compare_df is not None:
                if target_feature_name in compare_df.columns:
                    compare_target_series = compare_df[target_feature_name]

            # TARGET processed HERE with COMPARE if present
            target_to_process = FeatureToProcess(
                -1, source_df[targets_found[0]], compare_target_series, None,
                None, fc.get_predetermined_type(targets_found[0]))
            self._target = sa.analyze_feature_to_dictionary(target_to_process)
            filtered_series_names_in_source.remove(targets_found[0])
            target_type = self._target["type"]
            self.progress_bar.update(1)

        # Set final target series and sanitize targets (e.g. bool->truly bool)
        source_target_series = None
        compare_target_series = None
        if target_feature_name:
            if target_feature_name not in source_df.columns:
                raise ValueError
            if self._target["type"] == sa.FeatureType.TYPE_BOOL:
                source_target_series = self.get_sanitized_bool_series(
                    source_df[target_feature_name])
            else:
                source_target_series = source_df[target_feature_name]

            if compare_df is not None:
                if target_feature_name in compare_df.columns:
                    if self._target["type"] == sa.FeatureType.TYPE_BOOL:
                        compare_target_series = self.get_sanitized_bool_series(
                            compare_df[target_feature_name])
                    else:
                        compare_target_series = compare_df[target_feature_name]

        # Create list of features to process
        features_to_process = []
        for cur_series_name, cur_order_index in zip(
                filtered_series_names_in_source,
                range(0, len(filtered_series_names_in_source))):
            # TODO: BETTER HANDLING OF DIFFERENT COLUMNS IN SOURCE/COMPARE
            if compare_df is not None and cur_series_name in \
                    compare_df.columns:
                this_feat = FeatureToProcess(
                    cur_order_index, source_df[cur_series_name],
                    compare_df[cur_series_name], source_target_series,
                    compare_target_series,
                    fc.get_predetermined_type(cur_series_name), target_type)
            else:
                this_feat = FeatureToProcess(
                    cur_order_index, source_df[cur_series_name], None,
                    source_target_series, None,
                    fc.get_predetermined_type(cur_series_name), target_type)
            features_to_process.append(this_feat)

        # Process columns -> features
        self.run_id = hex(int(time.time()))[2:] + "_"  # removes the decimals
        # self.temp_folder = config["Files"].get("temp_folder")
        # os.makedirs(os.path.normpath(self.temp_folder), exist_ok=True)

        for f in features_to_process:
            # start = time.perf_counter()
            self.progress_bar.set_description(':' + f.source.name + '')
            self._features[f.source.name] = sa.analyze_feature_to_dictionary(f)
            self.progress_bar.update(1)
            # print(f"DONE FEATURE------> {f.source.name}"
            #       f" {(time.perf_counter() - start):.2f}   {self._features[f.source.name]['type']}")
        self.progress_bar.set_description(':FEATURES DONE')
        self.progress_bar.close()

        # Wrap up summary
        self.summarize_category_types(source_df, self.summary_source, fc.skip)
        if compare is not None:
            self.summarize_category_types(compare_df, self.summary_compare,
                                          fc.skip)
        self.dataframe_summary_html = sv_html.generate_html_dataframe_summary(
            self)

        self.graph_legend = GraphLegend(self)

        # Process all associations
        # ----------------------------------------------------
        # Put target first
        if target_to_process is not None:
            features_to_process.insert(0, target_to_process)

        if pairwise_analysis.lower() != 'off':
            self.progress_bar = tqdm(total=len(features_to_process), \
                bar_format='{desc:35}|{bar}| [{percentage:3.0f}%]   {elapsed}  -> ({remaining} left)', \
                ascii=False, ncols=90)
            self.progress_bar.set_description(":Processing Pairwise Features")
            self.process_associations(features_to_process,
                                      source_target_series,
                                      compare_target_series)
            self.progress_bar.set_description(':PAIRWISE DONE')
            self.progress_bar.close()

            self.progress_bar = tqdm(total=1, \
                bar_format='{desc:35}|{bar}| [{percentage:3.0f}%]   {elapsed}', \
                ascii=False, ncols=73)
            self.progress_bar.set_description(":Generating associations graph")
            self._association_graphs["all"] = GraphAssoc(
                self, "all", self._associations)
            self._association_graphs_compare["all"] = GraphAssoc(
                self, "all", self._associations_compare)
            self.associations_html_source = sv_html.generate_html_associations(
                self, "source")
            self.associations_html_compare = sv_html.generate_html_associations(
                self, "compare")
            self.progress_bar.set_description(":ASSOCIATIONS GRAPH DONE")
            self.progress_bar.update(1)
            self.progress_bar.close()
        else:
            self._associations = None
            self._associations_compare = None
            self.associations_html_source = None
            self.associations_html_compare = None
        return