Exemplo n.º 1
0
def selection_rate(y_true,
                   y_pred,
                   *,
                   pos_label: Any = 1,
                   sample_weight=None) -> float:
    """Calculate the fraction of predicted labels matching the 'good' outcome.

    The argument `pos_label` specifies the 'good' outcome. For consistency with
    other metric functions, the ``y_true`` argument is required, but ignored.


    Parameters
    ----------
    y_true : array_like
        The true labels (ignored)

    y_pred : array_like
        The predicted labels

    pos_label : Scalar
        The label to treat as the 'good' outcome

    sample_weight : array_like
        Optional array of sample weights
    """
    if len(y_pred) == 0:
        raise ValueError(_EMPTY_INPUT_PREDICTIONS_ERROR_MESSAGE)

    selected = (_convert_to_ndarray_and_squeeze(y_pred) == pos_label)
    s_w = np.ones(len(selected))
    if sample_weight is not None:
        s_w = np.squeeze(np.asarray(sample_weight))

    return np.dot(selected, s_w) / s_w.sum()
Exemplo n.º 2
0
    def test_single_element(self):
        X = [[[1]]]

        result = fmim._convert_to_ndarray_and_squeeze(X)

        assert isinstance(result, np.ndarray)
        assert result.shape == (1, )
        assert result[0] == 1
Exemplo n.º 3
0
    def test_multi_columns(self):
        X = [[0, 1]]

        result = fmim._convert_to_ndarray_and_squeeze(X)

        assert isinstance(result, np.ndarray)
        assert result.shape == (2, )
        assert result[0] == 0
        assert result[1] == 1
Exemplo n.º 4
0
    def __init__(self,
                 *,
                 metrics: Union[Callable, Dict[str, Callable]],
                 y_true,
                 y_pred,
                 sensitive_features,
                 control_features: Optional = None,
                 sample_params: Optional[Union[Dict[str, Any],
                                               Dict[str, Dict[str,
                                                              Any]]]] = None):
        """Read a placeholder comment."""
        check_consistent_length(y_true, y_pred)
        y_t = _convert_to_ndarray_and_squeeze(y_true)
        y_p = _convert_to_ndarray_and_squeeze(y_pred)

        func_dict = self._process_functions(metrics, sample_params)

        # Now, prepare the sensitive features
        sf_list = self._process_features("sensitive_feature_",
                                         sensitive_features, y_t)
        self._sf_names = [x.name for x in sf_list]

        # Prepare the control features
        # Adjust _sf_indices if needed
        cf_list = None
        self._cf_names = None
        if control_features is not None:
            cf_list = self._process_features("control_feature_",
                                             control_features, y_t)
            self._cf_names = [x.name for x in cf_list]

        # Check for duplicate feature names
        nameset = set()
        namelist = self._sf_names
        if self._cf_names:
            namelist = namelist + self._cf_names
        for name in namelist:
            if name in nameset:
                raise ValueError(_DUPLICATE_FEATURE_NAME.format(name))
            nameset.add(name)

        self._overall = self._compute_overall(func_dict, y_t, y_p, cf_list)
        self._by_group = self._compute_by_group(func_dict, y_t, y_p, sf_list,
                                                cf_list)
Exemplo n.º 5
0
    def test_simple_list(self):
        X = [0, 1, 2]

        result = fmim._convert_to_ndarray_and_squeeze(X)

        assert isinstance(result, np.ndarray)
        assert result.shape == (3, )
        assert result[0] == 0
        assert result[1] == 1
        assert result[2] == 2
Exemplo n.º 6
0
def selection_rate(y_true, y_pred, *, pos_label=1, sample_weight=None):
    """Calculate the fraction of predicted labels matching the 'good' outcome.

    The argument `pos_label` specifies the 'good' outcome.
    """
    if len(y_pred) == 0:
        raise ValueError(_EMPTY_INPUT_PREDICTIONS_ERROR_MESSAGE)

    selected = (_convert_to_ndarray_and_squeeze(y_pred) == pos_label)
    s_w = np.ones(len(selected))
    if sample_weight is not None:
        s_w = np.squeeze(np.asarray(sample_weight))

    return np.dot(selected, s_w) / s_w.sum()
Exemplo n.º 7
0
    def __init__(
        self,
        *,
        metrics: Union[Callable, Dict[str, Callable]],
        y_true,
        y_pred,
        sensitive_features,
        control_features=None,
        sample_params: Optional[Union[Dict[str, Any],
                                      Dict[str, Dict[str, Any]]]] = None,
    ):
        """Read a placeholder comment."""
        check_consistent_length(y_true, y_pred)

        y_t = _convert_to_ndarray_and_squeeze(y_true)
        y_p = _convert_to_ndarray_and_squeeze(y_pred)

        all_data = pd.DataFrame.from_dict({
            "y_true": list(y_t),
            "y_pred": list(y_p)
        })

        annotated_funcs = self._process_functions(metrics, sample_params,
                                                  all_data)

        # Now, prepare the sensitive features
        sf_list = self._process_features("sensitive_feature_",
                                         sensitive_features, y_t)
        self._sf_names = [x.name_ for x in sf_list]

        # Prepare the control features
        # Adjust _sf_indices if needed
        cf_list = None
        self._cf_names = None
        if control_features is not None:
            cf_list = self._process_features("control_feature_",
                                             control_features, y_t)
            self._cf_names = [x.name_ for x in cf_list]

        # Add sensitive and conditional features to all_data
        for sf in sf_list:
            all_data[sf.name_] = list(sf.raw_feature_)
        if cf_list is not None:
            for cf in cf_list:
                all_data[cf.name_] = list(cf.raw_feature_)

        # Check for duplicate feature names
        nameset = set()
        namelist = self._sf_names
        if self._cf_names:
            namelist = namelist + self._cf_names
        for name in namelist:
            if name in nameset:
                raise ValueError(_DUPLICATE_FEATURE_NAME.format(name))
            nameset.add(name)

        # Create the 'overall' results
        self._overall = self._build_overall_frame(all_data, annotated_funcs,
                                                  cf_list, self._cf_names)

        grouping_features = copy.deepcopy(sf_list)
        if cf_list is not None:
            # Prepend the conditional features, so they are 'higher'
            grouping_features = copy.deepcopy(cf_list) + grouping_features

        # Create the 'by group' results
        self._by_group = self._build_by_group_frame(all_data, annotated_funcs,
                                                    grouping_features)
Exemplo n.º 8
0
    def __init__(
        self,
        *,
        metrics: Union[Callable, Dict[str, Callable]],
        y_true,
        y_pred,
        sensitive_features,
        control_features=None,
        sample_params: Optional[Union[Dict[str, Any],
                                      Dict[str, Dict[str, Any]]]] = None,
    ):
        """Read a placeholder comment."""
        check_consistent_length(y_true, y_pred)

        y_t = _convert_to_ndarray_and_squeeze(y_true)
        y_p = _convert_to_ndarray_and_squeeze(y_pred)

        all_data = pd.DataFrame.from_dict({
            "y_true": list(y_t),
            "y_pred": list(y_p)
        })

        annotated_funcs = self._process_functions(metrics, sample_params,
                                                  all_data)

        # Now, prepare the sensitive features
        sf_list = self._process_features("sensitive_feature_",
                                         sensitive_features, y_t)
        self._sf_names = [x.name_ for x in sf_list]

        # Prepare the control features
        # Adjust _sf_indices if needed
        cf_list = None
        self._cf_names = None
        if control_features is not None:
            cf_list = self._process_features("control_feature_",
                                             control_features, y_t)
            self._cf_names = [x.name_ for x in cf_list]

        # Add sensitive and conditional features to all_data
        for sf in sf_list:
            all_data[sf.name_] = list(sf.raw_feature_)
        if cf_list is not None:
            for cf in cf_list:
                all_data[cf.name_] = list(cf.raw_feature_)

        # Check for duplicate feature names
        nameset = set()
        namelist = self._sf_names
        if self._cf_names:
            namelist = namelist + self._cf_names
        for name in namelist:
            if name in nameset:
                raise ValueError(_DUPLICATE_FEATURE_NAME.format(name))
            nameset.add(name)

        if self._cf_names is None:
            self._overall = apply_to_dataframe(
                all_data, metric_functions=annotated_funcs)
        else:
            temp = all_data.groupby(by=self._cf_names).apply(
                apply_to_dataframe, metric_functions=annotated_funcs)
            # If there are multiple control features, might have missing combinations
            if len(self._cf_names) > 1:
                all_indices = pd.MultiIndex.from_product(
                    [x.classes_ for x in cf_list],
                    names=[x.name_ for x in cf_list],
                )

                self._overall = temp.reindex(index=all_indices)
            else:
                self._overall = temp

        grouping_features = copy.deepcopy(sf_list)
        if cf_list is not None:
            # Prepend the conditional features, so they are 'higher'
            grouping_features = copy.deepcopy(cf_list) + grouping_features

        temp = all_data.groupby([x.name_ for x in grouping_features
                                 ]).apply(apply_to_dataframe,
                                          metric_functions=annotated_funcs)
        if len(grouping_features) > 1:
            all_indices = pd.MultiIndex.from_product(
                [x.classes_ for x in grouping_features],
                names=[x.name_ for x in grouping_features],
            )

            self._by_group = temp.reindex(index=all_indices)
        else:
            self._by_group = temp