Exemplo n.º 1
0
    def test_generate_fig_univariate_3(self, mock_plot_cat, mock_plot_cont):
        df = pd.DataFrame({
            "int_cat_data": [10, 10, 20, 20, 20, 10],
            "data_train_test":
            ['train', 'train', 'train', 'train', 'test', 'test']
        })

        generate_fig_univariate(df,
                                'int_cat_data',
                                'data_train_test',
                                type=VarType.TYPE_CAT)
        mock_plot_cat.assert_called_once()
        self.assertEqual(mock_plot_cont.call_count, 0)
Exemplo n.º 2
0
    def test_generate_fig_univariate_1(self, mock_plot_cat, mock_plot_cont):
        df = pd.DataFrame({
            "string_data": ["a", "b", "c", "d", "e", np.nan],
            "data_train_test":
            ['train', 'train', 'train', 'train', 'test', 'test']
        })

        generate_fig_univariate(df,
                                'string_data',
                                'data_train_test',
                                type=VarType.TYPE_CAT)
        mock_plot_cat.assert_called_once()
        self.assertEqual(mock_plot_cont.call_count, 0)
Exemplo n.º 3
0
    def test_generate_fig_univariate_2(self, mock_plot_cat, mock_plot_cont):
        df = pd.DataFrame({
            "int_data":
            list(range(50)),
            "data_train_test":
            ['train', 'train', 'train', 'train', 'test'] * 10
        })

        generate_fig_univariate(df,
                                'int_data',
                                'data_train_test',
                                type=VarType.TYPE_NUM)
        mock_plot_cont.assert_called_once()
        self.assertEqual(mock_plot_cat.call_count, 0)
Exemplo n.º 4
0
    def _perform_and_display_analysis_univariate(self, df: pd.DataFrame,
                                                 col_splitter: str,
                                                 split_values: list,
                                                 names: list, group_id: str):
        col_types = compute_col_types(df)
        n_splits = df[col_splitter].nunique()
        test_stats_univariate = perform_univariate_dataframe_analysis(
            df.loc[df[col_splitter] == split_values[0]], col_types=col_types)
        if n_splits > 1:
            train_stats_univariate = perform_univariate_dataframe_analysis(
                df.loc[df[col_splitter] == split_values[1]],
                col_types=col_types)

        univariate_template = template_env.get_template("univariate.html")
        univariate_features_desc = list()
        list_cols_labels = [
            self.explainer.features_dict.get(col, col)
            for col in df.drop(col_splitter, axis=1).columns.to_list()
        ]
        for col_label in sorted(list_cols_labels):
            col = self.explainer.inv_features_dict.get(col_label, col_label)
            fig = generate_fig_univariate(df_all=df,
                                          col=col,
                                          hue=col_splitter,
                                          type=col_types[col])
            df_col_stats = self._stats_to_table(
                test_stats=test_stats_univariate[col],
                train_stats=train_stats_univariate[col]
                if n_splits > 1 else None,
                names=names)
            univariate_features_desc.append({
                'feature_index':
                int(self.explainer.inv_columns_dict.get(col, 0)),
                'name':
                col,
                'type':
                str(series_dtype(df[col])),
                'description':
                col_label,
                'table':
                df_col_stats.to_html(classes="greyGridTable"),
                'image':
                convert_fig_to_html(fig)
            })
        print_html(
            univariate_template.render(features=univariate_features_desc,
                                       groupId=group_id))