def calculate(self, reference_data: pd.DataFrame,
                  production_data: pd.DataFrame, column_mapping):
        if column_mapping:
            date_column = column_mapping.get('datetime')
            id_column = column_mapping.get('id')
            target_column = column_mapping.get('target')
            prediction_column = column_mapping.get('prediction')
            num_feature_names = column_mapping.get('numerical_features')
            if num_feature_names is None:
                num_feature_names = []
            else:
                num_feature_names = [
                    name for name in num_feature_names
                    if is_numeric_dtype(reference_data[name])
                ]

            cat_feature_names = column_mapping.get('categorical_features')
            if cat_feature_names is None:
                cat_feature_names = []
            else:
                cat_feature_names = [
                    name for name in cat_feature_names
                    if is_numeric_dtype(reference_data[name])
                ]

        else:
            date_column = 'datetime' if 'datetime' in reference_data.columns else None
            id_column = None
            target_column = 'target' if 'target' in reference_data.columns else None
            prediction_column = 'prediction' if 'prediction' in reference_data.columns else None

            utility_columns = [
                date_column, id_column, target_column, prediction_column
            ]

            num_feature_names = list(
                set(reference_data.select_dtypes([np.number]).columns) -
                set(utility_columns))
            cat_feature_names = list(
                set(reference_data.select_dtypes([np.object]).columns) -
                set(utility_columns))

        if production_data is not None:
            production_data.replace([np.inf, -np.inf], np.nan, inplace=True)
            production_data.dropna(axis=0, how='any', inplace=True)

            reference_data.replace([np.inf, -np.inf], np.nan, inplace=True)
            reference_data.dropna(axis=0, how='any', inplace=True)

            ref_error = reference_data[prediction_column] - reference_data[
                target_column]
            prod_error = production_data[prediction_column] - production_data[
                target_column]

            ref_quntile_5 = np.quantile(ref_error, .05)
            ref_quntile_95 = np.quantile(ref_error, .95)

            prod_quntile_5 = np.quantile(prod_error, .05)
            prod_quntile_95 = np.quantile(prod_error, .95)

            #create subplots
            reference_data['dataset'] = 'Reference'
            reference_data['Error bias'] = list(
                map(
                    lambda x: 'Underestimation'
                    if x <= ref_quntile_5 else 'Majority'
                    if x < ref_quntile_95 else 'Overestimation', ref_error))

            production_data['dataset'] = 'Current'
            production_data['Error bias'] = list(
                map(
                    lambda x: 'Underestimation'
                    if x <= prod_quntile_5 else 'Majority'
                    if x < prod_quntile_95 else 'Overestimation', prod_error))
            merged_data = pd.concat([reference_data, production_data])

            reference_data.drop(['dataset', 'Error bias'],
                                axis=1,
                                inplace=True)
            production_data.drop(['dataset', 'Error bias'],
                                 axis=1,
                                 inplace=True)

            params_data = []
            additional_graphs_data = []

            for feature_name in num_feature_names:
                feature_type = 'num'

                ref_overal_value = np.mean(reference_data[feature_name])
                ref_under_value = np.mean(
                    reference_data[ref_error <= ref_quntile_5][feature_name])
                ref_expected_value = np.mean(
                    reference_data[(ref_error > ref_quntile_5) &
                                   (ref_error < ref_quntile_95)][feature_name])
                ref_over_value = np.mean(
                    reference_data[ref_error >= ref_quntile_95][feature_name])
                ref_range_value = 0 if ref_over_value == ref_under_value else 100 * abs(
                    ref_over_value -
                    ref_under_value) / (np.max(reference_data[feature_name]) -
                                        np.min(reference_data[feature_name]))

                prod_overal_value = np.mean(production_data[feature_name])
                prod_under_value = np.mean(production_data[
                    prod_error <= prod_quntile_5][feature_name])
                prod_expected_value = np.mean(production_data[
                    (prod_error > prod_quntile_5)
                    & (prod_error < prod_quntile_95)][feature_name])
                prod_over_value = np.mean(production_data[
                    prod_error >= prod_quntile_95][feature_name])
                prod_range_value = 0 if prod_over_value == prod_under_value else 100 * abs(
                    prod_over_value - prod_under_value) / (
                        np.max(production_data[feature_name]) -
                        np.min(production_data[feature_name]))

                feature_hist = px.histogram(
                    merged_data,
                    x=feature_name,
                    color='Error bias',
                    facet_col="dataset",
                    histnorm='percent',
                    barmode='overlay',
                    category_orders={
                        "dataset": ["Reference", "Current"],
                        "Error bias":
                        ["Underestimation", "Overestimation", "Majority"]
                    })

                feature_hist_json = json.loads(feature_hist.to_json())

                params_data.append({
                    "details": {
                        "parts": [{
                            "title": "Error bias",
                            "id": feature_name + "_hist"
                        }],
                        "insights": []
                    },
                    "f1": feature_name,
                    "f2": feature_type,
                    "f3": round(ref_expected_value, 2),
                    "f4": round(ref_under_value, 2),
                    "f5": round(ref_over_value, 2),
                    "f6": round(ref_range_value, 2),
                    "f7": round(prod_expected_value, 2),
                    "f8": round(prod_under_value, 2),
                    "f9": round(prod_over_value, 2),
                    "f10": round(prod_range_value, 2)
                })

                additional_graphs_data.append(
                    AdditionalGraphInfo(
                        feature_name + '_hist', {
                            "data": feature_hist_json['data'],
                            "layout": feature_hist_json['layout']
                        }))

            for feature_name in cat_feature_names:
                feature_type = 'cat'

                ref_overal_value = reference_data[feature_name].value_counts(
                ).idxmax()
                ref_under_value = reference_data[ref_error <= ref_quntile_5][
                    feature_name].value_counts().idxmax()
                #ref_expected_value = reference_data[(ref_error > ref_quntile_5) & (ref_error < ref_quntile_95)][feature_name].value_counts().idxmax()
                ref_over_value = reference_data[ref_error >= ref_quntile_95][
                    feature_name].value_counts().idxmax()
                ref_range_value = 1 if (ref_overal_value != ref_under_value) or (ref_over_value != ref_overal_value) \
                   or (ref_under_value != ref_overal_value) else 0

                prod_overal_value = production_data[feature_name].value_counts(
                ).idxmax()
                prod_under_value = production_data[
                    prod_error <= prod_quntile_5][feature_name].value_counts(
                    ).idxmax()
                #prod_expected_value = production_data[(prod_error > prod_quntile_5) & (prod_error < prod_quntile_95)][feature_name].value_counts().idxmax()
                prod_over_value = production_data[
                    prod_error >= prod_quntile_95][feature_name].value_counts(
                    ).idxmax()
                prod_range_value = 1 if (prod_overal_value != prod_under_value) or (prod_over_value != prod_overal_value) \
                   or (prod_under_value != prod_overal_value) else 0

                feature_hist = px.histogram(
                    merged_data,
                    x=feature_name,
                    color='Error bias',
                    facet_col="dataset",
                    histnorm='percent',
                    barmode='overlay',
                    category_orders={
                        "dataset": ["Reference", "Current"],
                        "Error bias":
                        ["Underestimation", "Overestimation", "Majority"]
                    })

                feature_hist_json = json.loads(feature_hist.to_json())

                params_data.append({
                    "details": {
                        "parts": [{
                            "title": "Error bias",
                            "id": feature_name + "_hist"
                        }],
                        "insights": []
                    },
                    "f1": feature_name,
                    "f2": feature_type,
                    "f3": str(ref_overal_value),
                    "f4": str(ref_under_value),
                    "f5": str(ref_over_value),
                    "f6": str(ref_range_value),
                    "f7": str(prod_overal_value),
                    "f8": str(prod_under_value),
                    "f9": str(prod_over_value),
                    "f10": int(prod_range_value)
                })

                additional_graphs_data.append(
                    AdditionalGraphInfo(
                        feature_name + '_hist', {
                            "data": feature_hist_json['data'],
                            "layout": feature_hist_json['layout']
                        }))

            self.wi = BaseWidgetInfo(
                title=self.title,
                type="big_table",
                details="",
                alertStats=AlertStats(),
                alerts=[],
                alertsPosition="row",
                insights=[],
                size=2,
                params={
                    "rowsPerPage":
                    min(len(num_feature_names) + len(cat_feature_names), 10),
                    "columns": [{
                        "title": "Feature",
                        "field": "f1"
                    }, {
                        "title": "Type",
                        "field": "f2"
                    }, {
                        "title": "REF: Majority",
                        "field": "f3"
                    }, {
                        "title": "REF: Under",
                        "field": "f4"
                    }, {
                        "title": "REF: Over",
                        "field": "f5"
                    }, {
                        "title": "REF: Range(%)",
                        "field": "f6"
                    }, {
                        "title": "CURR: Majority",
                        "field": "f7"
                    }, {
                        "title": "CURR: Under",
                        "field": "f8"
                    }, {
                        "title": "CURR: Over",
                        "field": "f9"
                    }, {
                        "title": "CURR: Range(%)",
                        "field": "f10",
                        "sort": "desc"
                    }],
                    "data":
                    params_data
                },
                additionalGraphs=additional_graphs_data)

        else:
            reference_data.replace([np.inf, -np.inf], np.nan, inplace=True)
            reference_data.dropna(axis=0, how='any', inplace=True)

            error = reference_data[prediction_column] - reference_data[
                target_column]

            quntile_5 = np.quantile(error, .05)
            quntile_95 = np.quantile(error, .95)

            reference_data['Error bias'] = reference_data['Error bias'] = list(
                map(
                    lambda x: 'Underestimation'
                    if x <= quntile_5 else 'Majority'
                    if x < quntile_95 else 'Overestimation', error))

            params_data = []
            additional_graphs_data = []

            for feature_name in num_feature_names:  # + cat_feature_names: #feature_names:

                feature_type = 'num'
                ref_overal_value = np.mean(reference_data[feature_name])
                ref_under_value = np.mean(
                    reference_data[error <= quntile_5][feature_name])
                #ref_expected_value = np.mean(reference_data[(error > quntile_5) & (error < quntile_95)][feature_name])
                ref_over_value = np.mean(
                    reference_data[error >= quntile_95][feature_name])
                ref_range_value = 0 if ref_over_value == ref_under_value else 100 * abs(
                    ref_over_value -
                    ref_under_value) / (np.max(reference_data[feature_name]) -
                                        np.min(reference_data[feature_name]))

                hist = px.histogram(
                    reference_data,
                    x=feature_name,
                    color='Error bias',
                    histnorm='percent',
                    barmode='overlay',
                    category_orders={
                        "Error bias":
                        ["Underestimation", "Overestimation", "Majority"]
                    })

                #hist_fig = px.histogram(reference_data, x=feature_name, color=target_column, facet_col="dataset",
                #        category_orders={"dataset": ["Reference", "Production"]})

                hist_figure = json.loads(hist.to_json())

                params_data.append({
                    "details": {
                        "parts": [{
                            "title": "Error bias",
                            "id": feature_name + "_hist"
                        }],
                        "insights": []
                    },
                    "f1": feature_name,
                    "f2": feature_type,
                    "f3": round(ref_overal_value, 2),
                    "f4": round(ref_under_value, 2),
                    "f5": round(ref_over_value, 2),
                    "f6": round(ref_range_value, 2)
                })

                additional_graphs_data.append(
                    AdditionalGraphInfo(
                        feature_name + '_hist', {
                            "data": hist_figure['data'],
                            "layout": hist_figure['layout']
                        }))

            for feature_name in cat_feature_names:  #feature_names:

                feature_type = 'cat'
                ref_overal_value = reference_data[feature_name].value_counts(
                ).idxmax()
                ref_under_value = reference_data[
                    error <= quntile_5][feature_name].value_counts().idxmax()
                #ref_expected_value = reference_data[(error > quntile_5) & (error < quntile_95)][feature_name].value_counts().idxmax()
                ref_over_value = reference_data[
                    error >= quntile_95][feature_name].value_counts().idxmax()
                ref_range_value = 1 if (ref_overal_value != ref_under_value) or (ref_over_value != ref_overal_value) \
                   or (ref_under_value != ref_overal_value) else 0

                hist = px.histogram(
                    reference_data,
                    x=feature_name,
                    color='Error bias',
                    histnorm='percent',
                    barmode='overlay',
                    category_orders={
                        "Error bias":
                        ["Underestimation", "Overestimation", "Majority"]
                    })

                #hist_fig = px.histogram(reference_data, x=feature_name, color=target_column, facet_col="dataset",
                #        category_orders={"dataset": ["Reference", "Production"]})

                hist_figure = json.loads(hist.to_json())

                params_data.append({
                    "details": {
                        "parts": [{
                            "title": "Error bias",
                            "id": feature_name + "_hist"
                        }],
                        "insights": []
                    },
                    "f1": feature_name,
                    "f2": feature_type,
                    "f3": str(ref_overal_value),
                    "f4": str(ref_under_value),
                    "f5": str(ref_over_value),
                    "f6": int(ref_range_value)
                })

                additional_graphs_data.append(
                    AdditionalGraphInfo(
                        feature_name + '_hist', {
                            "data": hist_figure['data'],
                            "layout": hist_figure['layout']
                        }))

            reference_data.drop('Error bias', axis=1, inplace=True)

            self.wi = BaseWidgetInfo(
                title=self.title,
                type="big_table",
                details="",
                alertStats=AlertStats(),
                alerts=[],
                alertsPosition="row",
                insights=[],
                size=2,
                params={
                    "rowsPerPage":
                    min(len(num_feature_names) + len(cat_feature_names), 10),
                    "columns": [{
                        "title": "Feature",
                        "field": "f1"
                    }, {
                        "title": "Type",
                        "field": "f2"
                    }, {
                        "title": "Majority",
                        "field": "f3"
                    }, {
                        "title": "Underestimation",
                        "field": "f4"
                    }, {
                        "title": "Overestimation",
                        "field": "f5"
                    }, {
                        "title": "Range(%)",
                        "field": "f6",
                        "sort": "desc"
                    }],
                    "data":
                    params_data
                },
                additionalGraphs=additional_graphs_data)
    def calculate(self, reference_data: pd.DataFrame,
                  production_data: pd.DataFrame, column_mapping):
        if column_mapping:
            date_column = column_mapping.get('datetime')
            id_column = column_mapping.get('id')
            target_column = column_mapping.get('target')
            prediction_column = column_mapping.get('prediction')
            num_feature_names = column_mapping.get('numerical_features')
            if num_feature_names is None:
                num_feature_names = []
            else:
                num_feature_names = [
                    name for name in num_feature_names
                    if is_numeric_dtype(reference_data[name])
                ]

            cat_feature_names = column_mapping.get('categorical_features')
            if cat_feature_names is None:
                cat_feature_names = []
            else:
                cat_feature_names = [
                    name for name in cat_feature_names
                    if is_numeric_dtype(reference_data[name])
                ]

        else:
            date_column = 'datetime' if 'datetime' in reference_data.columns else None
            id_column = None
            target_column = 'target' if 'target' in reference_data.columns else None
            prediction_column = 'prediction' if 'prediction' in reference_data.columns else None

            utility_columns = [
                date_column, id_column, target_column, prediction_column
            ]

            num_feature_names = list(
                set(reference_data.select_dtypes([np.number]).columns) -
                set(utility_columns))
            cat_feature_names = list(
                set(reference_data.select_dtypes([np.object]).columns) -
                set(utility_columns))

        if prediction_column is not None or target_column is not None:
            additional_graphs_data = []
            params_data = []
            for feature_name in num_feature_names + cat_feature_names:
                #add data for table in params
                params_data.append({
                    "details": {
                        "parts": [{
                            "title": "Feature values",
                            "id": feature_name + "_values"
                        }],
                        "insights": []
                    },
                    "f1": feature_name
                })

                #create plot
                fig = make_subplots(rows=1,
                                    cols=2,
                                    subplot_titles=("Reference", "Production"))

                if prediction_column is not None:
                    fig.add_trace(go.Scatter(
                        x=reference_data[feature_name],
                        y=reference_data[prediction_column],
                        mode='markers',
                        name='Prediction (ref)',
                        marker=dict(size=6, color=grey)),
                                  row=1,
                                  col=1)

                if target_column is not None:
                    fig.add_trace(go.Scatter(x=reference_data[feature_name],
                                             y=reference_data[target_column],
                                             mode='markers',
                                             name='Target (ref)',
                                             marker=dict(size=6, color=red)),
                                  row=1,
                                  col=1)

                if prediction_column is not None:
                    fig.add_trace(go.Scatter(
                        x=production_data[feature_name],
                        y=production_data[prediction_column],
                        mode='markers',
                        name='Prediction (prod)',
                        marker=dict(size=6, color=grey)),
                                  row=1,
                                  col=2)

                if target_column is not None:
                    fig.add_trace(go.Scatter(x=production_data[feature_name],
                                             y=production_data[target_column],
                                             mode='markers',
                                             name='Target (prod)',
                                             marker=dict(size=6, color=red)),
                                  row=1,
                                  col=2)

                # Update xaxis properties
                fig.update_xaxes(title_text=feature_name,
                                 showgrid=True,
                                 row=1,
                                 col=1)
                fig.update_xaxes(title_text=feature_name,
                                 showgrid=True,
                                 row=1,
                                 col=2)

                # Update yaxis properties
                fig.update_yaxes(title_text="Value",
                                 showgrid=True,
                                 row=1,
                                 col=1)
                fig.update_yaxes(title_text="Value",
                                 showgrid=True,
                                 row=1,
                                 col=2)

                fig_json = json.loads(fig.to_json())

                #write plot data in table as additional data
                additional_graphs_data.append(
                    AdditionalGraphInfo(feature_name + '_values', {
                        "data": fig_json['data'],
                        "layout": fig_json['layout']
                    }))

            self.wi = BaseWidgetInfo(
                title=self.title,
                type="big_table",
                details="",
                alertStats=AlertStats(),
                alerts=[],
                alertsPosition="row",
                insights=[],
                size=2,
                params={
                    "rowsPerPage":
                    min(len(num_feature_names) + len(cat_feature_names), 10),
                    "columns": [{
                        "title": "Feature",
                        "field": "f1"
                    }],
                    "data":
                    params_data
                },
                additionalGraphs=additional_graphs_data)

        else:
            self.wi = None
    def calculate(self, reference_data: pd.DataFrame,
                  production_data: pd.DataFrame, column_mapping):
        if column_mapping:
            date_column = column_mapping.get('datetime')
            id_column = column_mapping.get('id')
            target_column = column_mapping.get('target')
            prediction_column = column_mapping.get('prediction')
            num_feature_names = column_mapping.get('numerical_features')
            if num_feature_names is None:
                num_feature_names = []
            else:
                num_feature_names = [
                    name for name in num_feature_names
                    if is_numeric_dtype(reference_data[name])
                ]

            cat_feature_names = column_mapping.get('categorical_features')
            if cat_feature_names is None:
                cat_feature_names = []
            else:
                cat_feature_names = [
                    name for name in cat_feature_names
                    if is_numeric_dtype(reference_data[name])
                ]

        else:
            date_column = 'datetime' if 'datetime' in reference_data.columns else None
            id_column = None
            target_column = 'target' if 'target' in reference_data.columns else None
            prediction_column = 'prediction' if 'prediction' in reference_data.columns else None

            utility_columns = [
                date_column, id_column, target_column, prediction_column
            ]

            num_feature_names = list(
                set(reference_data.select_dtypes([np.number]).columns) -
                set(utility_columns))
            cat_feature_names = list(
                set(reference_data.select_dtypes([np.object]).columns) -
                set(utility_columns))

        #set params data
        params_data = []
        drifted_fetures_count = 0
        #plt.ioff()
        for feature_name in num_feature_names:  # + cat_feature_names: #feature_names:
            prod_small_hist = np.histogram(
                production_data[feature_name][np.isfinite(
                    production_data[feature_name])],
                bins=10,
                density=True)
            ref_small_hist = np.histogram(
                reference_data[feature_name][np.isfinite(
                    reference_data[feature_name])],
                bins=10,
                density=True)

            feature_type = 'num'

            p_value = ks_2samp(reference_data[feature_name],
                               production_data[feature_name])[1]

            distr_sim_test = "Detected" if p_value < 0.05 else "Not Detected"
            drifted_fetures_count += 1 if p_value < 0.05 else 0

            params_data.append({
                "details": {
                    "parts": [{
                        "title": "Data drift",
                        "id": feature_name + "_drift",
                        "type": "widget"
                    }, {
                        "title": "Data distribution",
                        "id": feature_name + "_distr"
                    }],
                    "insights": []
                },
                "f1": feature_name,
                "f6": feature_type,
                "f3": {
                    "x": list(ref_small_hist[1]),
                    "y": list(ref_small_hist[0])
                },
                "f4": {
                    "x": list(prod_small_hist[1]),
                    "y": list(prod_small_hist[0])
                },
                "f2": distr_sim_test,
                "f5": round(p_value, 6)
            })

        for feature_name in cat_feature_names:  #feature_names:
            prod_small_hist = np.histogram(
                production_data[feature_name][np.isfinite(
                    production_data[feature_name])],
                bins=10,
                density=True)
            ref_small_hist = np.histogram(
                reference_data[feature_name][np.isfinite(
                    reference_data[feature_name])],
                bins=10,
                density=True)

            feature_type = 'cat'

            #p_value = ks_2samp(reference_data[feature_name], production_data[feature_name])[1]
            #CHI2 to be implemented for cases with different categories
            ref_feature_vc = reference_data[feature_name][np.isfinite(
                reference_data[feature_name])].value_counts()
            prod_feature_vc = production_data[feature_name][np.isfinite(
                production_data[feature_name])].value_counts()

            keys = set(
                list(reference_data[feature_name][np.isfinite(
                    reference_data[feature_name])].unique()) +
                list(production_data[feature_name][np.isfinite(
                    production_data[feature_name])].unique()))

            ref_feature_dict = dict.fromkeys(keys, 0)
            for key, item in zip(ref_feature_vc.index, ref_feature_vc.values):
                ref_feature_dict[key] = item

            prod_feature_dict = dict.fromkeys(keys, 0)
            for key, item in zip(prod_feature_vc.index,
                                 prod_feature_vc.values):
                prod_feature_dict[key] = item

            f_exp = [value[1] for value in sorted(ref_feature_dict.items())]
            f_obs = [value[1] for value in sorted(prod_feature_dict.items())]

            p_value = chisquare(f_exp, f_obs)[1]

            distr_sim_test = "Detected" if p_value < 0.05 else "Not Detected"
            drifted_fetures_count += 1 if p_value < 0.05 else 0

            params_data.append({
                "details": {
                    "parts": [{
                        "title": "Data drift",
                        "id": feature_name + "_drift",
                        "type": "widget"
                    }, {
                        "title": "Data distribution",
                        "id": feature_name + "_distr"
                    }],
                    "insights": []
                },
                "f1": feature_name,
                "f6": feature_type,
                "f3": {
                    "x": list(ref_small_hist[1]),
                    "y": list(ref_small_hist[0])
                },
                "f4": {
                    "x": list(prod_small_hist[1]),
                    "y": list(prod_small_hist[0])
                },
                "f2": distr_sim_test,
                "f5": round(p_value, 6)
            })

        #set additionalGraphs
        additional_graphs_data = []
        for feature_name in num_feature_names + cat_feature_names:  #feature_names:

            #plot distributions
            fig = go.Figure()
            fig.add_trace(
                go.Histogram(x=reference_data[feature_name],
                             marker_color=grey,
                             opacity=0.6,
                             nbinsx=10,
                             name='Reference',
                             histnorm='probability'))

            fig.add_trace(
                go.Histogram(x=production_data[feature_name],
                             marker_color=red,
                             opacity=0.6,
                             nbinsx=10,
                             name='Current',
                             histnorm='probability'))

            fig.update_layout(legend=dict(orientation="h",
                                          yanchor="bottom",
                                          y=1.02,
                                          xanchor="right",
                                          x=1),
                              xaxis_title=feature_name,
                              yaxis_title="Share")

            distr_figure = json.loads(fig.to_json())

            #plot drift
            reference_mean = np.mean(reference_data[feature_name][np.isfinite(
                reference_data[feature_name])])
            reference_std = np.std(reference_data[feature_name][np.isfinite(
                reference_data[feature_name])],
                                   ddof=1)
            x_title = "Timestamp" if date_column else "Index"

            fig = go.Figure()

            fig.add_trace(
                go.Scatter(x=production_data[date_column]
                           if date_column else production_data.index,
                           y=production_data[feature_name],
                           mode='markers',
                           name='Current',
                           marker=dict(size=6, color=grey)))

            fig.update_layout(
                xaxis_title=x_title,
                yaxis_title=feature_name,
                showlegend=True,
                legend=dict(orientation="h",
                            yanchor="bottom",
                            y=1.02,
                            xanchor="right",
                            x=1),
                shapes=[
                    dict(
                        type="rect",
                        # x-reference is assigned to the x-values
                        xref="paper",
                        # y-reference is assigned to the plot paper [0,1]
                        yref="y",
                        x0=0,
                        y0=reference_mean - reference_std,
                        x1=1,
                        y1=reference_mean + reference_std,
                        fillcolor="LightGreen",
                        opacity=0.5,
                        layer="below",
                        line_width=0,
                    ),
                    dict(
                        type="line",
                        name='Reference',
                        xref="paper",
                        yref="y",
                        x0=0,  #min(testset_agg_by_date.index),
                        y0=reference_mean,
                        x1=1,  #max(testset_agg_by_date.index),
                        y1=reference_mean,
                        line=dict(color="Green", width=3)),
                ])

            drift_figure = json.loads(fig.to_json())

            #add distributions data
            additional_graphs_data.append(
                AdditionalGraphInfo(feature_name + '_distr', {
                    "data": distr_figure['data'],
                    "layout": distr_figure['layout']
                }))

            #add drift data
            additional_graphs_data.append(
                AdditionalGraphInfo(
                    feature_name + '_drift', {
                        "title": "",
                        "size": 2,
                        "text": "",
                        "type": "big_graph",
                        "params": {
                            "data": drift_figure['data'],
                            "layout": drift_figure['layout']
                        }
                    }))

        self.wi = BaseWidgetInfo(
            title="Data Drift: drift detected for " +
            str(drifted_fetures_count) + " out of " +
            str(len(num_feature_names) + len(cat_feature_names)) + " features",
            type="big_table",
            details="",
            alertStats=AlertStats(),
            alerts=[],
            alertsPosition="row",
            insights=[],
            size=2,
            params={
                "rowsPerPage":
                min(len(num_feature_names) + len(cat_feature_names), 10),
                "columns": [{
                    "title": "Feature",
                    "field": "f1"
                }, {
                    "title": "Type",
                    "field": "f6"
                }, {
                    "title": "Reference Distribution",
                    "field": "f3",
                    "type": "histogram",
                    "options": {
                        "xField": "x",
                        "yField": "y"
                    }
                }, {
                    "title": "Current Distribution",
                    "field": "f4",
                    "type": "histogram",
                    "options": {
                        "xField": "x",
                        "yField": "y"
                    }
                }, {
                    "title": "Data drift",
                    "field": "f2"
                }, {
                    "title": "P-Value for Similarity Test",
                    "field": "f5",
                    "sort": "asc"
                }],
                "data":
                params_data
            },
            additionalGraphs=additional_graphs_data)
Пример #4
0
    def calculate(self, reference_data: pd.DataFrame,
                  production_data: pd.DataFrame, column_mapping):
        if column_mapping:
            date_column = column_mapping.get('datetime')
            id_column = column_mapping.get('id')
            target_column = column_mapping.get('target')
            prediction_column = column_mapping.get('prediction')
            num_feature_names = column_mapping.get('numerical_features')
            target_names = column_mapping.get('target_names')
            if num_feature_names is None:
                num_feature_names = []
            else:
                num_feature_names = [
                    name for name in num_feature_names
                    if is_numeric_dtype(reference_data[name])
                ]

            cat_feature_names = column_mapping.get('categorical_features')
            if cat_feature_names is None:
                cat_feature_names = []
            else:
                cat_feature_names = [
                    name for name in cat_feature_names
                    if is_numeric_dtype(reference_data[name])
                ]

        else:
            date_column = 'datetime' if 'datetime' in reference_data.columns else None
            id_column = None
            target_column = 'target' if 'target' in reference_data.columns else None
            prediction_column = 'prediction' if 'prediction' in reference_data.columns else None

            utility_columns = [
                date_column, id_column, target_column, prediction_column
            ]

            target_names = None

            num_feature_names = list(
                set(reference_data.select_dtypes([np.number]).columns) -
                set(utility_columns))
            cat_feature_names = list(
                set(reference_data.select_dtypes([np.object]).columns) -
                set(utility_columns))

        if prediction_column is not None and target_column is not None:
            binaraizer = preprocessing.LabelBinarizer()
            binaraizer.fit(reference_data[target_column])
            binaraized_target = binaraizer.transform(
                reference_data[target_column])
            if production_data is not None:
                ref_array_prediction = reference_data[
                    prediction_column].to_numpy()
                ref_prediction_ids = np.argmax(ref_array_prediction, axis=-1)
                ref_prediction_labels = [
                    prediction_column[x] for x in ref_prediction_ids
                ]
                reference_data['prediction_labels'] = ref_prediction_labels

                prod_array_prediction = production_data[
                    prediction_column].to_numpy()
                prod_prediction_ids = np.argmax(prod_array_prediction, axis=-1)
                prod_prediction_labels = [
                    prediction_column[x] for x in prod_prediction_ids
                ]
                production_data['prediction_labels'] = prod_prediction_labels

                additional_graphs_data = []
                params_data = []

                for feature_name in num_feature_names + cat_feature_names:
                    #add data for table in params
                    labels = prediction_column

                    params_data.append({
                        "details": {
                            "parts": [{
                                "title": "All",
                                "id": "All"
                            }] + [{
                                "title": str(label),
                                "id": feature_name + "_" + str(label)
                            } for label in labels],
                            "insights": []
                        },
                        "f1": feature_name
                    })

                    #create confusion based plots
                    reference_data['dataset'] = 'Reference'
                    production_data['dataset'] = 'Production'
                    merged_data = pd.concat([reference_data, production_data])

                    fig = px.histogram(merged_data,
                                       x=feature_name,
                                       color=target_column,
                                       facet_col="dataset",
                                       histnorm='',
                                       category_orders={
                                           "dataset":
                                           ["Reference", "Production"]
                                       })

                    fig_json = json.loads(fig.to_json())

                    #write plot data in table as additional data
                    additional_graphs_data.append(
                        AdditionalGraphInfo(
                            "All",
                            {
                                "data": fig_json['data'],
                                "layout": fig_json['layout']
                            },
                        ))

                    for label in labels:
                        merged_data['Confusion'] = merged_data.apply(lambda x : 'TP' if (x['target'] == label and x['prediction_labels'] == label)
                                                 else ('FP' if(x['target'] != label and x['prediction_labels'] == label) else \
                                                       ('FN' if (x['target'] == label and x['prediction_labels'] != label) else 'TN')), axis = 1)

                        fig = px.histogram(merged_data,
                                           x=feature_name,
                                           color='Confusion',
                                           facet_col="dataset",
                                           histnorm='',
                                           category_orders={
                                               "dataset":
                                               ["Reference", "Production"],
                                               "Confusion":
                                               ["TP", "TN", "FP", "FN"]
                                           })

                        fig_json = json.loads(fig.to_json())

                        #write plot data in table as additional data
                        additional_graphs_data.append(
                            AdditionalGraphInfo(
                                feature_name + "_" + str(label),
                                {
                                    "data": fig_json['data'],
                                    "layout": fig_json['layout']
                                },
                            ))

                self.wi = BaseWidgetInfo(
                    title=self.title,
                    type="big_table",
                    details="",
                    alertStats=AlertStats(),
                    alerts=[],
                    alertsPosition="row",
                    insights=[],
                    size=2,
                    params={
                        "rowsPerPage":
                        min(
                            len(num_feature_names) + len(cat_feature_names),
                            10),
                        "columns": [{
                            "title": "Feature",
                            "field": "f1"
                        }],
                        "data":
                        params_data
                    },
                    additionalGraphs=additional_graphs_data)

            else:
                ref_array_prediction = reference_data[
                    prediction_column].to_numpy()
                ref_prediction_ids = np.argmax(ref_array_prediction, axis=-1)
                ref_prediction_labels = [
                    prediction_column[x] for x in ref_prediction_ids
                ]
                reference_data['prediction_labels'] = ref_prediction_labels

                additional_graphs_data = []
                params_data = []

                for feature_name in num_feature_names + cat_feature_names:
                    #add data for table in params
                    labels = prediction_column

                    params_data.append({
                        "details": {
                            "parts": [{
                                "title": "All",
                                "id": "All"
                            }] + [{
                                "title": str(label),
                                "id": feature_name + "_" + str(label)
                            } for label in labels],
                            "insights": []
                        },
                        "f1": feature_name
                    })

                    #create confusion based plots
                    fig = px.histogram(reference_data,
                                       x=feature_name,
                                       color=target_column,
                                       histnorm='')

                    fig_json = json.loads(fig.to_json())

                    #write plot data in table as additional data
                    additional_graphs_data.append(
                        AdditionalGraphInfo(
                            "All",
                            {
                                "data": fig_json['data'],
                                "layout": fig_json['layout']
                            },
                        ))

                    for label in labels:
                        reference_data['Confusion'] = reference_data.apply(lambda x : 'TP' if (x['target'] == label and x['prediction_labels'] == label)
                                                 else ('FP' if(x['target'] != label and x['prediction_labels'] == label) else \
                                                       ('FN' if (x['target'] == label and x['prediction_labels'] != label) else 'TN')), axis = 1)

                        fig = px.histogram(reference_data,
                                           x=feature_name,
                                           color='Confusion',
                                           histnorm='',
                                           category_orders={
                                               "Confusion":
                                               ["TP", "TN", "FP", "FN"]
                                           })

                        fig_json = json.loads(fig.to_json())

                        #write plot data in table as additional data
                        additional_graphs_data.append(
                            AdditionalGraphInfo(
                                feature_name + "_" + str(label),
                                {
                                    "data": fig_json['data'],
                                    "layout": fig_json['layout']
                                },
                            ))

                self.wi = BaseWidgetInfo(
                    title=self.title,
                    type="big_table",
                    details="",
                    alertStats=AlertStats(),
                    alerts=[],
                    alertsPosition="row",
                    insights=[],
                    size=2,
                    params={
                        "rowsPerPage":
                        min(
                            len(num_feature_names) + len(cat_feature_names),
                            10),
                        "columns": [{
                            "title": "Feature",
                            "field": "f1"
                        }],
                        "data":
                        params_data
                    },
                    additionalGraphs=additional_graphs_data)
        else:
            self.wi = None
    def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame, column_mapping): 
        if column_mapping:
            date_column = column_mapping.get('datetime')
            id_column = column_mapping.get('id')
            target_column = column_mapping.get('target')
            prediction_column = column_mapping.get('prediction')
            num_feature_names = column_mapping.get('numerical_features')
            if num_feature_names is None:
                num_feature_names = []
            else:
                num_feature_names = [name for name in num_feature_names if is_numeric_dtype(reference_data[name])] 

            cat_feature_names = column_mapping.get('categorical_features')
            if cat_feature_names is None:
                cat_feature_names = []
            else:
                cat_feature_names = [name for name in cat_feature_names if is_numeric_dtype(reference_data[name])] 
        
        else:
            date_column = 'datetime' if 'datetime' in reference_data.columns else None
            id_column = None
            target_column = 'target' if 'target' in reference_data.columns else None
            prediction_column = 'prediction' if 'prediction' in reference_data.columns else None

            utility_columns = [date_column, id_column, target_column, prediction_column]

            num_feature_names = list(set(reference_data.select_dtypes([np.number]).columns) - set(utility_columns))
            cat_feature_names = list(set(reference_data.select_dtypes([np.object]).columns) - set(utility_columns))

        if prediction_column is not None and target_column is not None:           
            additional_graphs_data = []
            params_data = []
            for feature_name in num_feature_names + cat_feature_names: 
                #add data for table in params
                params_data.append(
                    {
                        "details": {
                                "parts": [
                                    {
                                        "title": "Target",
                                        "id": feature_name + "_target_values"
                                    },
                                    {
                                        "title": "Prediction",
                                        "id": feature_name + "_prediction_values"
                                    }
                                ],
                                "insights": []
                            },
                            "f1": feature_name
                    }
                    )

                #create target plot
                reference_data['dataset'] = 'Reference'
                production_data['dataset'] = 'Production'
                merged_data = pd.concat([reference_data, production_data])

                target_fig = px.histogram(merged_data, x=feature_name, color=target_column, facet_col="dataset",
                    category_orders={"dataset": ["Reference", "Production"]})

                target_fig_json  = json.loads(target_fig.to_json())

                #create prediction plot
                pred_fig = px.histogram(merged_data, x=feature_name, color=prediction_column, facet_col="dataset",
                    category_orders={"dataset": ["Reference", "Production"]})

                pred_fig_json  = json.loads(pred_fig.to_json())

                #write plot data in table as additional data
                additional_graphs_data.append(
                    AdditionalGraphInfo(
                        feature_name + '_target_values',
                        {
                            "data" : target_fig_json['data'],
                            "layout" : target_fig_json['layout']
                        }, 
                    )
                )

                additional_graphs_data.append(
                    AdditionalGraphInfo(
                        feature_name + '_prediction_values',
                        {
                            "data" : pred_fig_json['data'],
                            "layout" : pred_fig_json['layout']
                        }, 
                    )
                )

            self.wi = BaseWidgetInfo(
                title=self.title,
                type="big_table",
                details="",
                alertStats=AlertStats(),
                alerts=[],
                alertsPosition="row",
                insights=[],
                size=2,
                params={
                    "rowsPerPage" : min(len(num_feature_names) + len(cat_feature_names), 10),
                    "columns": [
                        {
                            "title": "Feature",
                            "field": "f1"
                        }
                    ],
                    "data": params_data
                },
                additionalGraphs=additional_graphs_data
            )

        elif target_column is not None:
            additional_graphs_data = []
            params_data = []
            for feature_name in num_feature_names + cat_feature_names: 
                #add data for table in params
                params_data.append(
                    {
                        "details": {
                                "parts": [
                                    {
                                        "title": "Target",
                                        "id": feature_name + "_target_values"
                                    }
                                ],
                                "insights": []
                            },
                            "f1": feature_name
                    }
                    )

                #create target plot
                reference_data['dataset'] = 'Reference'
                production_data['dataset'] = 'Production'
                merged_data = pd.concat([reference_data, production_data])

                target_fig = px.histogram(merged_data, x=feature_name, color=target_column, facet_col="dataset",
                    category_orders={"dataset": ["Reference", "Production"]})

                target_fig_json  = json.loads(target_fig.to_json())

                #write plot data in table as additional data
                additional_graphs_data.append(
                    AdditionalGraphInfo(
                        feature_name + '_target_values',
                        {
                            "data" : target_fig_json['data'],
                            "layout" : target_fig_json['layout']
                        }, 
                    )
                )

            self.wi = BaseWidgetInfo(
                title=self.title,
                type="big_table",
                details="",
                alertStats=AlertStats(),
                alerts=[],
                alertsPosition="row",
                insights=[],
                size=2,
                params={
                    "rowsPerPage" : min(len(num_feature_names) + len(cat_feature_names), 10),
                    "columns": [
                        {
                            "title": "Feature",
                            "field": "f1"
                        }
                    ],
                    "data": params_data
                },
                additionalGraphs=additional_graphs_data
            )
        elif prediction_column is not None:
            additional_graphs_data = []
            params_data = []
            for feature_name in num_feature_names + cat_feature_names: 
                #add data for table in params
                params_data.append(
                    {
                        "details": {
                                "parts": [
                                    {
                                        "title": "Prediction",
                                        "id": feature_name + "_prediction_values"
                                    }
                                ],
                                "insights": []
                            },
                            "f1": feature_name
                    }
                    )

                #create target plot
                reference_data['dataset'] = 'Reference'
                production_data['dataset'] = 'Production'
                merged_data = pd.concat([reference_data, production_data])

                prediction_fig = px.histogram(merged_data, x=feature_name, color=prediction_column, facet_col="dataset",
                    category_orders={"dataset": ["Reference", "Production"]})

                prediction_fig_json  = json.loads(prediction_fig.to_json())

                #write plot data in table as additional data
                additional_graphs_data.append(
                    AdditionalGraphInfo(
                        feature_name + '_prediction_values',
                        {
                            "data" : prediction_fig_json['data'],
                            "layout" : prediction_fig_json['layout']
                        }, 
                    )
                )

            self.wi = BaseWidgetInfo(
                title=self.title,
                type="big_table",
                details="",
                alertStats=AlertStats(),
                alerts=[],
                alertsPosition="row",
                insights=[],
                size=2,
                params={
                    "rowsPerPage" : min(len(num_feature_names) + len(cat_feature_names), 10),
                    "columns": [
                        {
                            "title": "Feature",
                            "field": "f1"
                        }
                    ],
                    "data": params_data
                },
                additionalGraphs=additional_graphs_data
            )            

        else:
            self.wi = None
Пример #6
0
 def get_info(self) -> BaseWidgetInfo:
     return BaseWidgetInfo(
         title=self.title,
         type="big_table",
         details="",
         alertStats=AlertStats(),
         alerts=[],
         alertsPosition="row",
         insights=[],
         size=2,
         params={
             "columns": [{
                 "title": "Feature",
                 "field": "f1"
             }, {
                 "title": "Data drift",
                 "field": "f2"
             }, {
                 "title": "Distribution",
                 "field": "f3",
                 "type": "histogram",
                 "options": {
                     "xField": "x",
                     "yField": "y"
                 }
             }, {
                 "title":
                 "Distribution shift (similarity test at 95% confidence level)",
                 "field": "f4"
             }, {
                 "title": "Alerts",
                 "field": "f5"
             }],
             "data": [{
                 "details": {
                     "parts": [{
                         "title": "Data drift",
                         "id": "season_drift"
                     }, {
                         "title": "Data distribution",
                         "id": "season_distr"
                     }],
                     "insights": []
                 },
                 "f1": "season",
                 "f2": "Detected",
                 "f3": {
                     "x":
                     [0.0, 0.0, 0.0, 0.0, 0.0, 1000.0, 0.0, 0.0, 0.0, 0.0],
                     "y": [
                         3.5, 3.6, 3.7, 3.8, 3.9, 4.0, 4.1, 4.2, 4.3, 4.4,
                         4.5
                     ]
                 },
                 "f4": "Rejected",
                 "f5": " "
             }, {
                 "details": {
                     "parts": [{
                         "title": "Data drift",
                         "id": "holiday_drift"
                     }, {
                         "title": "Data distribution",
                         "id": "holiday_distr"
                     }],
                     "insights": []
                 },
                 "f1": "holiday",
                 "f2": "Not Detected",
                 "f3": {
                     "x":
                     [976.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 24.0],
                     "y": [
                         0.0, 0.1, 0.2, 0.30000000000000004, 0.4, 0.5,
                         0.6000000000000001, 0.7000000000000001, 0.8, 0.9,
                         1.0
                     ]
                 },
                 "f4": "Not rejected",
                 "f5": " "
             }, {
                 "details": {
                     "parts": [{
                         "title": "Data drift",
                         "id": "workingday_drift"
                     }, {
                         "title": "Data distribution",
                         "id": "workingday_distr"
                     }],
                     "insights": []
                 },
                 "f1": "workingday",
                 "f2": "Not Detected",
                 "f3": {
                     "x":
                     [312.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 688.0],
                     "y": [
                         0.0, 0.1, 0.2, 0.30000000000000004, 0.4, 0.5,
                         0.6000000000000001, 0.7000000000000001, 0.8, 0.9,
                         1.0
                     ]
                 },
                 "f4": "Not rejected",
                 "f5": " "
             }, {
                 "details": {
                     "parts": [{
                         "title": "Data drift",
                         "id": "weather_drift"
                     }, {
                         "title": "Data distribution",
                         "id": "weather_distr"
                     }],
                     "insights": []
                 },
                 "f1": "weather",
                 "f2": "Detected",
                 "f3": {
                     "x": [
                         566.0, 0.0, 0.0, 0.0, 0.0, 382.0, 0.0, 0.0, 0.0,
                         52.0
                     ],
                     "y": [
                         1.0, 1.2, 1.4, 1.6, 1.8, 2.0, 2.2,
                         2.4000000000000004, 2.6, 2.8, 3.0
                     ]
                 },
                 "f4": "Rejected",
                 "f5": " "
             }, {
                 "details": {
                     "parts": [{
                         "title": "Data drift",
                         "id": "temp_drift"
                     }, {
                         "title": "Data distribution",
                         "id": "temp_distr"
                     }],
                     "insights": []
                 },
                 "f1": "temp",
                 "f2": "Detected",
                 "f3": {
                     "x": [
                         7.0, 55.0, 197.0, 182.0, 307.0, 93.0, 73.0, 46.0,
                         32.0, 8.0
                     ],
                     "y": [
                         6.56, 8.61, 10.66, 12.709999999999999,
                         14.759999999999998, 16.81, 18.86,
                         20.909999999999997, 22.959999999999997,
                         25.009999999999998, 27.06
                     ]
                 },
                 "f4": "Rejected",
                 "f5": " "
             }, {
                 "details": {
                     "parts": [{
                         "title": "Data drift",
                         "id": "atemp_drift"
                     }, {
                         "title": "Data distribution",
                         "id": "atemp_distr"
                     }],
                     "insights": []
                 },
                 "f1": "atemp",
                 "f2": "Detected",
                 "f3": {
                     "x": [
                         12.0, 84.0, 193.0, 237.0, 132.0, 183.0, 73.0, 61.0,
                         8.0, 17.0
                     ],
                     "y": [
                         9.09, 11.286999999999999, 13.484,
                         15.681000000000001, 17.878, 20.075, 22.272, 24.469,
                         26.666, 28.863, 31.06
                     ]
                 },
                 "f4": "Rejected",
                 "f5": " "
             }, {
                 "details": {
                     "parts": [{
                         "title": "Data drift",
                         "id": "humidity_drift"
                     }, {
                         "title": "Data distribution",
                         "id": "humidity_distr"
                     }],
                     "insights": []
                 },
                 "f1": "humidity",
                 "f2": "Detected",
                 "f3": {
                     "x": [
                         7.0, 5.0, 59.0, 144.0, 188.0, 180.0, 80.0, 149.0,
                         105.0, 83.0
                     ],
                     "y": [
                         16.0, 24.4, 32.8, 41.2, 49.6, 58.0, 66.4,
                         74.80000000000001, 83.2, 91.60000000000001, 100.0
                     ]
                 },
                 "f4": "Rejected",
                 "f5": " "
             }, {
                 "details": {
                     "parts": [{
                         "title": "Data drift",
                         "id": "windspeed_drift"
                     }, {
                         "title": "Data distribution",
                         "id": "windspeed_distr"
                     }],
                     "insights": []
                 },
                 "f1": "windspeed",
                 "f2": "Not Detected",
                 "f3": {
                     "x": [
                         117.0, 193.0, 201.0, 271.0, 112.0, 57.0, 39.0, 9.0,
                         0.0, 1.0
                     ],
                     "y": [
                         0.0, 4.30006, 8.60012, 12.90018, 17.20024,
                         21.500300000000003, 25.80036, 30.10042, 34.40048,
                         38.700540000000004, 43.0006
                     ]
                 },
                 "f4": "Not rejected",
                 "f5": " "
             }, {
                 "details": {
                     "parts": [{
                         "title": "Data drift",
                         "id": "month_drift"
                     }, {
                         "title": "Data distribution",
                         "id": "month_distr"
                     }],
                     "insights": []
                 },
                 "f1": "month",
                 "f2": "Detected",
                 "f3": {
                     "x": [
                         89.0, 0.0, 0.0, 0.0, 0.0, 455.0, 0.0, 0.0, 0.0,
                         456.0
                     ],
                     "y": [
                         10.0, 10.2, 10.4, 10.6, 10.8, 11.0, 11.2, 11.4,
                         11.6, 11.8, 12.0
                     ]
                 },
                 "f4": "Rejected",
                 "f5": " "
             }, {
                 "details": {
                     "parts": [{
                         "title": "Data drift",
                         "id": "hour_drift"
                     }, {
                         "title": "Data distribution",
                         "id": "hour_distr"
                     }],
                     "insights": []
                 },
                 "f1": "hour",
                 "f2": "Not Detected",
                 "f3": {
                     "x": [
                         123.0, 81.0, 82.0, 126.0, 84.0, 84.0, 126.0, 84.0,
                         84.0, 126.0
                     ],
                     "y": [
                         0.0, 2.3, 4.6, 6.8999999999999995, 9.2, 11.5,
                         13.799999999999999, 16.099999999999998, 18.4, 20.7,
                         23.0
                     ]
                 },
                 "f4": "Not rejected",
                 "f5": " "
             }, {
                 "details": {
                     "parts": [{
                         "title": "Data drift",
                         "id": "year_drift"
                     }, {
                         "title": "Data distribution",
                         "id": "year_distr"
                     }],
                     "insights": []
                 },
                 "f1": "year",
                 "f2": "Detected",
                 "f3": {
                     "x":
                     [0.0, 0.0, 0.0, 0.0, 0.0, 1000.0, 0.0, 0.0, 0.0, 0.0],
                     "y": [
                         2011.5, 2011.6, 2011.7, 2011.8, 2011.9, 2012.0,
                         2012.1, 2012.2, 2012.3, 2012.4, 2012.5
                     ]
                 },
                 "f4": "Rejected",
                 "f5": " "
             }, {
                 "details": {
                     "parts": [{
                         "title": "Data drift",
                         "id": "week_day_drift"
                     }, {
                         "title": "Data distribution",
                         "id": "week_day_distr"
                     }],
                     "insights": []
                 },
                 "f1": "week_day",
                 "f2": "Not Detected",
                 "f3": {
                     "x": [
                         144.0, 137.0, 0.0, 144.0, 0.0, 143.0, 144.0, 0.0,
                         144.0, 144.0
                     ],
                     "y": [
                         1.0, 1.6, 2.2, 2.8, 3.4, 4.0, 4.6, 5.2, 5.8,
                         6.3999999999999995, 7.0
                     ]
                 },
                 "f4": "Not rejected",
                 "f5": " "
             }]
         },
         additionalGraphs=[
             AdditionalGraphInfo(
                 "holiday_drift", {
                     "data": [{
                         "marker": {
                             "color": "#4d4d4d",
                             "size": 6
                         },
                         "mode":
                         "markers",
                         "name":
                         "Production",
                         "type":
                         "scatter",
                         "x": [
                             "2012-10-16", "2012-10-17", "2012-10-18",
                             "2012-10-19", "2012-11-01", "2012-11-02",
                             "2012-11-03", "2012-11-04", "2012-11-05",
                             "2012-11-06", "2012-11-07", "2012-11-08",
                             "2012-11-09", "2012-11-10", "2012-11-11",
                             "2012-11-12", "2012-11-13", "2012-11-14",
                             "2012-11-15", "2012-11-16", "2012-11-17",
                             "2012-11-18", "2012-11-19", "2012-12-01",
                             "2012-12-02", "2012-12-03", "2012-12-04",
                             "2012-12-05", "2012-12-06", "2012-12-07",
                             "2012-12-08", "2012-12-09", "2012-12-10",
                             "2012-12-11", "2012-12-12", "2012-12-13",
                             "2012-12-14", "2012-12-15", "2012-12-16",
                             "2012-12-17", "2012-12-18", "2012-12-19"
                         ],
                         "y": [
                             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
                             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                             0, 0, 0, 0, 0, 0, 0, 0, 0, 0
                         ]
                     }],
                     "layout": {
                         "legend": {
                             "orientation": "h",
                             "x": 1,
                             "xanchor": "right",
                             "y": 1.02,
                             "yanchor": "bottom"
                         },
                         "shapes": [{
                             "fillcolor": "LightGreen",
                             "layer": "below",
                             "line": {
                                 "width": 0
                             },
                             "opacity": 0.5,
                             "type": "rect",
                             "x0": 0,
                             "x1": 1,
                             "xref": "paper",
                             "y0": -0.13886233657597655,
                             "y1": 0.19692424230124458,
                             "yref": "y"
                         }, {
                             "line": {
                                 "color": "Green",
                                 "width": 3
                             },
                             "name": "Reference",
                             "type": "line",
                             "x0": 0,
                             "x1": 1,
                             "xref": "paper",
                             "y0": 0.02903095286263403,
                             "y1": 0.02903095286263403,
                             "yref": "y"
                         }],
                         "showlegend":
                         True,
                         "template": {
                             "data": {
                                 "bar": [{
                                     "error_x": {
                                         "color": "#2a3f5f"
                                     },
                                     "error_y": {
                                         "color": "#2a3f5f"
                                     },
                                     "marker": {
                                         "line": {
                                             "color": "#E5ECF6",
                                             "width": 0.5
                                         }
                                     },
                                     "type": "bar"
                                 }],
                                 "barpolar": [{
                                     "marker": {
                                         "line": {
                                             "color": "#E5ECF6",
                                             "width": 0.5
                                         }
                                     },
                                     "type": "barpolar"
                                 }],
                                 "carpet": [{
                                     "aaxis": {
                                         "endlinecolor": "#2a3f5f",
                                         "gridcolor": "white",
                                         "linecolor": "white",
                                         "minorgridcolor": "white",
                                         "startlinecolor": "#2a3f5f"
                                     },
                                     "baxis": {
                                         "endlinecolor": "#2a3f5f",
                                         "gridcolor": "white",
                                         "linecolor": "white",
                                         "minorgridcolor": "white",
                                         "startlinecolor": "#2a3f5f"
                                     },
                                     "type": "carpet"
                                 }],
                                 "choropleth": [{
                                     "colorbar": {
                                         "outlinewidth": 0,
                                         "ticks": ""
                                     },
                                     "type": "choropleth"
                                 }],
                                 "contour": [{
                                     "colorbar": {
                                         "outlinewidth": 0,
                                         "ticks": ""
                                     },
                                     "colorscale":
                                     [[0.0, "#0d0887"],
                                      [0.1111111111111111, "#46039f"],
                                      [0.2222222222222222, "#7201a8"],
                                      [0.3333333333333333, "#9c179e"],
                                      [0.4444444444444444, "#bd3786"],
                                      [0.5555555555555556, "#d8576b"],
                                      [0.6666666666666666, "#ed7953"],
                                      [0.7777777777777778, "#fb9f3a"],
                                      [0.8888888888888888, "#fdca26"],
                                      [1.0, "#f0f921"]],
                                     "type":
                                     "contour"
                                 }],
                                 "contourcarpet": [{
                                     "colorbar": {
                                         "outlinewidth": 0,
                                         "ticks": ""
                                     },
                                     "type": "contourcarpet"
                                 }],
                                 "heatmap": [{
                                     "colorbar": {
                                         "outlinewidth": 0,
                                         "ticks": ""
                                     },
                                     "colorscale":
                                     [[0.0, "#0d0887"],
                                      [0.1111111111111111, "#46039f"],
                                      [0.2222222222222222, "#7201a8"],
                                      [0.3333333333333333, "#9c179e"],
                                      [0.4444444444444444, "#bd3786"],
                                      [0.5555555555555556, "#d8576b"],
                                      [0.6666666666666666, "#ed7953"],
                                      [0.7777777777777778, "#fb9f3a"],
                                      [0.8888888888888888, "#fdca26"],
                                      [1.0, "#f0f921"]],
                                     "type":
                                     "heatmap"
                                 }],
                                 "heatmapgl": [{
                                     "colorbar": {
                                         "outlinewidth": 0,
                                         "ticks": ""
                                     },
                                     "colorscale":
                                     [[0.0, "#0d0887"],
                                      [0.1111111111111111, "#46039f"],
                                      [0.2222222222222222, "#7201a8"],
                                      [0.3333333333333333, "#9c179e"],
                                      [0.4444444444444444, "#bd3786"],
                                      [0.5555555555555556, "#d8576b"],
                                      [0.6666666666666666, "#ed7953"],
                                      [0.7777777777777778, "#fb9f3a"],
                                      [0.8888888888888888, "#fdca26"],
                                      [1.0, "#f0f921"]],
                                     "type":
                                     "heatmapgl"
                                 }],
                                 "histogram": [{
                                     "marker": {
                                         "colorbar": {
                                             "outlinewidth": 0,
                                             "ticks": ""
                                         }
                                     },
                                     "type": "histogram"
                                 }],
                                 "histogram2d": [{
                                     "colorbar": {
                                         "outlinewidth": 0,
                                         "ticks": ""
                                     },
                                     "colorscale":
                                     [[0.0, "#0d0887"],
                                      [0.1111111111111111, "#46039f"],
                                      [0.2222222222222222, "#7201a8"],
                                      [0.3333333333333333, "#9c179e"],
                                      [0.4444444444444444, "#bd3786"],
                                      [0.5555555555555556, "#d8576b"],
                                      [0.6666666666666666, "#ed7953"],
                                      [0.7777777777777778, "#fb9f3a"],
                                      [0.8888888888888888, "#fdca26"],
                                      [1.0, "#f0f921"]],
                                     "type":
                                     "histogram2d"
                                 }],
                                 "histogram2dcontour": [{
                                     "colorbar": {
                                         "outlinewidth": 0,
                                         "ticks": ""
                                     },
                                     "colorscale":
                                     [[0.0, "#0d0887"],
                                      [0.1111111111111111, "#46039f"],
                                      [0.2222222222222222, "#7201a8"],
                                      [0.3333333333333333, "#9c179e"],
                                      [0.4444444444444444, "#bd3786"],
                                      [0.5555555555555556, "#d8576b"],
                                      [0.6666666666666666, "#ed7953"],
                                      [0.7777777777777778, "#fb9f3a"],
                                      [0.8888888888888888, "#fdca26"],
                                      [1.0, "#f0f921"]],
                                     "type":
                                     "histogram2dcontour"
                                 }],
                                 "mesh3d": [{
                                     "colorbar": {
                                         "outlinewidth": 0,
                                         "ticks": ""
                                     },
                                     "type": "mesh3d"
                                 }],
                                 "parcoords": [{
                                     "line": {
                                         "colorbar": {
                                             "outlinewidth": 0,
                                             "ticks": ""
                                         }
                                     },
                                     "type": "parcoords"
                                 }],
                                 "pie": [{
                                     "automargin": True,
                                     "type": "pie"
                                 }],
                                 "scatter": [{
                                     "marker": {
                                         "colorbar": {
                                             "outlinewidth": 0,
                                             "ticks": ""
                                         }
                                     },
                                     "type": "scatter"
                                 }],
                                 "scatter3d": [{
                                     "line": {
                                         "colorbar": {
                                             "outlinewidth": 0,
                                             "ticks": ""
                                         }
                                     },
                                     "marker": {
                                         "colorbar": {
                                             "outlinewidth": 0,
                                             "ticks": ""
                                         }
                                     },
                                     "type": "scatter3d"
                                 }],
                                 "scattercarpet": [{
                                     "marker": {
                                         "colorbar": {
                                             "outlinewidth": 0,
                                             "ticks": ""
                                         }
                                     },
                                     "type": "scattercarpet"
                                 }],
                                 "scattergeo": [{
                                     "marker": {
                                         "colorbar": {
                                             "outlinewidth": 0,
                                             "ticks": ""
                                         }
                                     },
                                     "type": "scattergeo"
                                 }],
                                 "scattergl": [{
                                     "marker": {
                                         "colorbar": {
                                             "outlinewidth": 0,
                                             "ticks": ""
                                         }
                                     },
                                     "type": "scattergl"
                                 }],
                                 "scattermapbox": [{
                                     "marker": {
                                         "colorbar": {
                                             "outlinewidth": 0,
                                             "ticks": ""
                                         }
                                     },
                                     "type": "scattermapbox"
                                 }],
                                 "scatterpolar": [{
                                     "marker": {
                                         "colorbar": {
                                             "outlinewidth": 0,
                                             "ticks": ""
                                         }
                                     },
                                     "type": "scatterpolar"
                                 }],
                                 "scatterpolargl": [{
                                     "marker": {
                                         "colorbar": {
                                             "outlinewidth": 0,
                                             "ticks": ""
                                         }
                                     },
                                     "type": "scatterpolargl"
                                 }],
                                 "scatterternary": [{
                                     "marker": {
                                         "colorbar": {
                                             "outlinewidth": 0,
                                             "ticks": ""
                                         }
                                     },
                                     "type": "scatterternary"
                                 }],
                                 "surface": [{
                                     "colorbar": {
                                         "outlinewidth": 0,
                                         "ticks": ""
                                     },
                                     "colorscale":
                                     [[0.0, "#0d0887"],
                                      [0.1111111111111111, "#46039f"],
                                      [0.2222222222222222, "#7201a8"],
                                      [0.3333333333333333, "#9c179e"],
                                      [0.4444444444444444, "#bd3786"],
                                      [0.5555555555555556, "#d8576b"],
                                      [0.6666666666666666, "#ed7953"],
                                      [0.7777777777777778, "#fb9f3a"],
                                      [0.8888888888888888, "#fdca26"],
                                      [1.0, "#f0f921"]],
                                     "type":
                                     "surface"
                                 }],
                                 "table": [{
                                     "cells": {
                                         "fill": {
                                             "color": "#EBF0F8"
                                         },
                                         "line": {
                                             "color": "white"
                                         }
                                     },
                                     "header": {
                                         "fill": {
                                             "color": "#C8D4E3"
                                         },
                                         "line": {
                                             "color": "white"
                                         }
                                     },
                                     "type": "table"
                                 }]
                             },
                             "layout": {
                                 "annotationdefaults": {
                                     "arrowcolor": "#2a3f5f",
                                     "arrowhead": 0,
                                     "arrowwidth": 1
                                 },
                                 "coloraxis": {
                                     "colorbar": {
                                         "outlinewidth": 0,
                                         "ticks": ""
                                     }
                                 },
                                 "colorscale": {
                                     "diverging":
                                     [[0, "#8e0152"], [0.1, "#c51b7d"],
                                      [0.2, "#de77ae"], [0.3, "#f1b6da"],
                                      [0.4, "#fde0ef"], [0.5, "#f7f7f7"],
                                      [0.6, "#e6f5d0"], [0.7, "#b8e186"],
                                      [0.8, "#7fbc41"], [0.9, "#4d9221"],
                                      [1, "#276419"]],
                                     "sequential":
                                     [[0.0, "#0d0887"],
                                      [0.1111111111111111, "#46039f"],
                                      [0.2222222222222222, "#7201a8"],
                                      [0.3333333333333333, "#9c179e"],
                                      [0.4444444444444444, "#bd3786"],
                                      [0.5555555555555556, "#d8576b"],
                                      [0.6666666666666666, "#ed7953"],
                                      [0.7777777777777778, "#fb9f3a"],
                                      [0.8888888888888888, "#fdca26"],
                                      [1.0, "#f0f921"]],
                                     "sequentialminus":
                                     [[0.0, "#0d0887"],
                                      [0.1111111111111111, "#46039f"],
                                      [0.2222222222222222, "#7201a8"],
                                      [0.3333333333333333, "#9c179e"],
                                      [0.4444444444444444, "#bd3786"],
                                      [0.5555555555555556, "#d8576b"],
                                      [0.6666666666666666, "#ed7953"],
                                      [0.7777777777777778, "#fb9f3a"],
                                      [0.8888888888888888, "#fdca26"],
                                      [1.0, "#f0f921"]]
                                 },
                                 "colorway": [
                                     "#636efa", "#EF553B", "#00cc96",
                                     "#ab63fa", "#FFA15A", "#19d3f3",
                                     "#FF6692", "#B6E880", "#FF97FF",
                                     "#FECB52"
                                 ],
                                 "font": {
                                     "color": "#2a3f5f"
                                 },
                                 "geo": {
                                     "bgcolor": "white",
                                     "lakecolor": "white",
                                     "landcolor": "#E5ECF6",
                                     "showlakes": True,
                                     "showland": True,
                                     "subunitcolor": "white"
                                 },
                                 "hoverlabel": {
                                     "align": "left"
                                 },
                                 "hovermode":
                                 "closest",
                                 "mapbox": {
                                     "style": "light"
                                 },
                                 "paper_bgcolor":
                                 "white",
                                 "plot_bgcolor":
                                 "#E5ECF6",
                                 "polar": {
                                     "angularaxis": {
                                         "gridcolor": "white",
                                         "linecolor": "white",
                                         "ticks": ""
                                     },
                                     "bgcolor": "#E5ECF6",
                                     "radialaxis": {
                                         "gridcolor": "white",
                                         "linecolor": "white",
                                         "ticks": ""
                                     }
                                 },
                                 "scene": {
                                     "xaxis": {
                                         "backgroundcolor": "#E5ECF6",
                                         "gridcolor": "white",
                                         "gridwidth": 2,
                                         "linecolor": "white",
                                         "showbackground": True,
                                         "ticks": "",
                                         "zerolinecolor": "white"
                                     },
                                     "yaxis": {
                                         "backgroundcolor": "#E5ECF6",
                                         "gridcolor": "white",
                                         "gridwidth": 2,
                                         "linecolor": "white",
                                         "showbackground": True,
                                         "ticks": "",
                                         "zerolinecolor": "white"
                                     },
                                     "zaxis": {
                                         "backgroundcolor": "#E5ECF6",
                                         "gridcolor": "white",
                                         "gridwidth": 2,
                                         "linecolor": "white",
                                         "showbackground": True,
                                         "ticks": "",
                                         "zerolinecolor": "white"
                                     }
                                 },
                                 "shapedefaults": {
                                     "line": {
                                         "color": "#2a3f5f"
                                     }
                                 },
                                 "ternary": {
                                     "aaxis": {
                                         "gridcolor": "white",
                                         "linecolor": "white",
                                         "ticks": ""
                                     },
                                     "baxis": {
                                         "gridcolor": "white",
                                         "linecolor": "white",
                                         "ticks": ""
                                     },
                                     "bgcolor": "#E5ECF6",
                                     "caxis": {
                                         "gridcolor": "white",
                                         "linecolor": "white",
                                         "ticks": ""
                                     }
                                 },
                                 "title": {
                                     "x": 0.05
                                 },
                                 "xaxis": {
                                     "automargin": True,
                                     "gridcolor": "white",
                                     "linecolor": "white",
                                     "ticks": "",
                                     "title": {
                                         "standoff": 15
                                     },
                                     "zerolinecolor": "white",
                                     "zerolinewidth": 2
                                 },
                                 "yaxis": {
                                     "automargin": True,
                                     "gridcolor": "white",
                                     "linecolor": "white",
                                     "ticks": "",
                                     "title": {
                                         "standoff": 15
                                     },
                                     "zerolinecolor": "white",
                                     "zerolinewidth": 2
                                 }
                             }
                         },
                         "xaxis": {
                             "title": {
                                 "text": "Timestamp"
                             }
                         },
                         "yaxis": {
                             "title": {
                                 "text": "holiday"
                             }
                         }
                     }
                 })
         ],
     )
    def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame, column_mapping): 
        if column_mapping:
            date_column = column_mapping.get('datetime')
            id_column = column_mapping.get('id')
            target_column = column_mapping.get('target')
            prediction_column = column_mapping.get('prediction')
            num_feature_names = column_mapping.get('numerical_features')
            target_names = column_mapping.get('target_names')
            if num_feature_names is None:
                num_feature_names = []
            else:
                num_feature_names = [name for name in num_feature_names if is_numeric_dtype(reference_data[name])] 

            cat_feature_names = column_mapping.get('categorical_features')
            if cat_feature_names is None:
                cat_feature_names = []
            else:
                cat_feature_names = [name for name in cat_feature_names if is_numeric_dtype(reference_data[name])] 
        
        else:
            date_column = 'datetime' if 'datetime' in reference_data.columns else None
            id_column = None
            target_column = 'target' if 'target' in reference_data.columns else None
            prediction_column = 'prediction' if 'prediction' in reference_data.columns else None

            utility_columns = [date_column, id_column, target_column, prediction_column]

            target_names = None

            num_feature_names = list(set(reference_data.select_dtypes([np.number]).columns) - set(utility_columns))
            cat_feature_names = list(set(reference_data.select_dtypes([np.object]).columns) - set(utility_columns))

        if prediction_column is not None and target_column is not None:
            binaraizer = preprocessing.LabelBinarizer()
            binaraizer.fit(reference_data[target_column])
            binaraized_target = binaraizer.transform(reference_data[target_column])
            if production_data is not None:
                ref_array_prediction = reference_data[prediction_column].to_numpy()
                ref_prediction_ids = np.argmax(ref_array_prediction, axis=-1)
                ref_prediction_labels = [prediction_column[x] for x in ref_prediction_ids]
                reference_data['prediction_labels'] = ref_prediction_labels

                prod_array_prediction = production_data[prediction_column].to_numpy()
                prod_prediction_ids = np.argmax(prod_array_prediction, axis=-1)
                prod_prediction_labels = [prediction_column[x] for x in prod_prediction_ids]
                production_data['prediction_labels'] = prod_prediction_labels

                additional_graphs_data = []
                params_data = []

                for feature_name in num_feature_names + cat_feature_names: 
                    #add data for table in params
                    labels = prediction_column

                    params_data.append(
                        {
                            "details": {
                                    "parts": [{"title":"All", "id":"All" + "_" + str(feature_name)}] + [{"title":str(label), "id": feature_name + "_" + str(label)} for label in labels],
                                    "insights": []
                                },
                            "f1": feature_name
                        }
                        )

                    #create confusion based plots 
                    reference_data['dataset'] = 'Reference'
                    production_data['dataset'] = 'Current'
                    merged_data = pd.concat([reference_data, production_data])

                    fig = px.histogram(merged_data, x=feature_name, color=target_column, facet_col="dataset", histnorm = '',
                        category_orders={"dataset": ["Reference", "Current"]})

                    fig_json  = json.loads(fig.to_json())

                    #write plot data in table as additional data
                    additional_graphs_data.append(
                        AdditionalGraphInfo(
                            "All" + "_" + str(feature_name),
                            {
                                "data" : fig_json['data'],
                                "layout" : fig_json['layout']
                            }, 
                        )
                    )

                    for label in labels:
                        fig = make_subplots(rows=1, cols=2, subplot_titles=("Reference", "Current"))

                        #REF 
                        fig.add_trace(go.Scatter(
                            x = reference_data[reference_data[target_column] == label][feature_name],
                            y = reference_data[reference_data[target_column] == label][label],
                            mode = 'markers',
                            name = str(label) + ' (ref)',
                            marker=dict(
                                size=6,
                                color=red 
                                )
                            ),
                            row=1, col=1
                        )

                        fig.add_trace(go.Scatter(
                            x = reference_data[reference_data[target_column] != label][feature_name],
                            y = reference_data[reference_data[target_column] != label][label],
                            mode = 'markers',
                            name = 'other (ref)',
                            marker=dict(
                                size=6,
                                color=grey 
                                )
                            ),
                            row=1, col=1
                        )


                        fig.update_layout(
                            xaxis_title=feature_name,
                            yaxis_title='Probability',
                            xaxis = dict(
                                showticklabels=True
                            ),
                             yaxis = dict(
                                range=(0, 1),
                                showticklabels=True
                            )
                        )

                        #PROD Prediction
                        fig.add_trace(go.Scatter(
                            x = production_data[production_data[target_column] == label][feature_name],
                            y = production_data[production_data[target_column] == label][label],
                            mode = 'markers',
                            name = str(label) + ' (curr)',
                            marker=dict(
                                size=6,
                                color=red #set color equal to a variable
                                )
                            ),
                            row=1, col=2
                        )

                        fig.add_trace(go.Scatter(
                            x = production_data[production_data[target_column] != label][feature_name],
                            y = production_data[production_data[target_column] != label][label],
                            mode = 'markers',
                            name = 'other (curr)',
                            marker=dict(
                                size=6,
                                color=grey #set color equal to a variable
                                )
                            ),
                            row=1, col=2
                        )

                        fig.update_layout(
                            xaxis_title=feature_name,
                            yaxis_title='Probability',
                            xaxis = dict(
                                showticklabels=True
                            ),
                             yaxis = dict(
                                range=(0, 1),
                                showticklabels=True
                            )
                        )

                        # Update xaxis properties
                        fig.update_xaxes(title_text=feature_name, showgrid=True, row=1, col=1)
                        fig.update_xaxes(title_text=feature_name, showgrid=True, row=1, col=2)

                        # Update yaxis properties
                        fig.update_yaxes(title_text="Probability", showgrid=True, row=1, col=1)
                        fig.update_yaxes(title_text="Probability", showgrid=True, row=1, col=2)

                        fig_json  = json.loads(fig.to_json())

                        #write plot data in table as additional data
                        additional_graphs_data.append(
                            AdditionalGraphInfo(
                                feature_name + "_" + str(label),
                                {
                                    "data" : fig_json['data'],
                                    "layout" : fig_json['layout']
                                }, 
                            )
                        )

                self.wi = BaseWidgetInfo(
                    title=self.title,
                    type="big_table",
                    details="",
                    alertStats=AlertStats(),
                    alerts=[],
                    alertsPosition="row",
                    insights=[],
                    size=2,
                    params={
                        "rowsPerPage" : min(len(num_feature_names) + len(cat_feature_names), 10),
                        "columns": [
                            {
                                "title": "Feature",
                                "field": "f1"
                            }
                        ],
                        "data": params_data
                    },
                    additionalGraphs=additional_graphs_data
                )

            else:
                ref_array_prediction = reference_data[prediction_column].to_numpy()
                ref_prediction_ids = np.argmax(ref_array_prediction, axis=-1)
                ref_prediction_labels = [prediction_column[x] for x in ref_prediction_ids]
                reference_data['prediction_labels'] = ref_prediction_labels

                additional_graphs_data = []
                params_data = []

                for feature_name in num_feature_names + cat_feature_names: 
                    #add data for table in params
                    labels = prediction_column

                    params_data.append(
                        {
                            "details": {
                                    "parts": [{"title":"All", "id":"All" + "_" + str(feature_name)}] + [{"title":str(label), "id": feature_name + "_" + str(label)} for label in labels],
                                    "insights": []
                                },
                            "f1": feature_name
                        }
                        )

                    #create confusion based plots 
                    fig = px.histogram(reference_data, x=feature_name, color=target_column, histnorm = '')

                    fig_json  = json.loads(fig.to_json())

                    #write plot data in table as additional data
                    additional_graphs_data.append(
                        AdditionalGraphInfo(
                            "All" + "_" + str(feature_name),
                            {
                                "data" : fig_json['data'],
                                "layout" : fig_json['layout']
                            }, 
                        )
                    )

                    for label in labels:

                        fig = go.Figure()

                        fig.add_trace(go.Scatter(
                            x = reference_data[reference_data[target_column] == label][feature_name],
                            y = reference_data[reference_data[target_column] == label][label],
                            mode = 'markers',
                            name = str(label),
                            marker=dict(
                                size=6,
                                color=red #set color equal to a variable
                            )
                        ))

                        fig.add_trace(go.Scatter(
                            x = reference_data[reference_data[target_column] != label][feature_name],
                            y = reference_data[reference_data[target_column] != label][label],
                            mode = 'markers',
                            name = 'other',
                            marker=dict(
                                size=6,
                                color=grey 
                            )
                        ))


                        fig.update_layout(
                            xaxis_title=feature_name,
                            yaxis_title='Probability',
                            xaxis = dict(
                                showticklabels=True
                            ),
                             yaxis = dict(
                                range=(0, 1),
                                showticklabels=True
                            )
                        )

                        fig_json  = json.loads(fig.to_json())

                        #write plot data in table as additional data
                        additional_graphs_data.append(
                            AdditionalGraphInfo(
                                feature_name + "_" + str(label),
                                {
                                    "data" : fig_json['data'],
                                    "layout" : fig_json['layout']
                                }, 
                            )
                        )

                self.wi = BaseWidgetInfo(
                    title=self.title,
                    type="big_table",
                    details="",
                    alertStats=AlertStats(),
                    alerts=[],
                    alertsPosition="row",
                    insights=[],
                    size=2,
                    params={
                        "rowsPerPage" : min(len(num_feature_names) + len(cat_feature_names), 10),
                        "columns": [
                            {
                                "title": "Feature",
                                "field": "f1"
                            }
                        ],
                        "data": params_data
                    },
                    additionalGraphs=additional_graphs_data
                )  
        else:
            self.wi = None