示例#1
0
def test_confident_learning_filter(return_indices_of_off_diagonals):
    if return_indices_of_off_diagonals:
        cj, indices = count.compute_confident_joint(
            labels=data["labels"],
            pred_probs=data["pred_probs"],
            calibrate=False,
            return_indices_of_off_diagonals=True,
        )
        # Check that the number of 'label issues' found in off diagonals
        # matches the off diagonals of the uncalibrated confident joint
        assert len(indices) == (np.sum(cj) - np.trace(cj))
    else:
        cj = count.compute_confident_joint(
            labels=data["labels"],
            pred_probs=data["pred_probs"],
            calibrate=False,
            return_indices_of_off_diagonals=False,
        )
        assert np.trace(cj) > -1
示例#2
0
def test_compute_confident_joint():
    cj = count.compute_confident_joint(
        labels=data["labels"],
        pred_probs=data["pred_probs"],
    )

    # Check that confident joint doesn't overcount number of examples.
    assert np.sum(cj) <= data["n"]
    # Check that confident joint is correct shape
    assert np.shape(cj) == (data["m"], data["m"])
示例#3
0
def test_calibrate_joint():
    cj = count.compute_confident_joint(
        labels=data["labels"],
        pred_probs=data["pred_probs"],
        calibrate=False,
    )
    calibrated_cj = count.calibrate_confident_joint(
        confident_joint=cj,
        labels=data["labels"],
    )
    label_counts = np.bincount(data["labels"])

    # Check calibration
    assert all(calibrated_cj.sum(axis=1).round().astype(int) == label_counts)
    assert len(data["labels"]) == int(round(np.sum(calibrated_cj)))

    calibrated_cj2 = count.compute_confident_joint(
        labels=data["labels"],
        pred_probs=data["pred_probs"],
        calibrate=True,
    )

    # Check equivalency
    assert np.all(calibrated_cj == calibrated_cj2)
def test_cj_in_find_label_issues_kwargs(filter_by, seed):
    labels = DATA["labels"]
    num_issues = []
    for provide_confident_joint in [True, False]:
        print(
            f"\nfilter_by: {filter_by} | seed: {seed} | cj_provided: {provide_confident_joint}"
        )
        np.random.seed(seed=seed)
        if provide_confident_joint:
            pred_probs = estimate_cv_predicted_probabilities(X=DATA["X_train"],
                                                             labels=labels,
                                                             seed=seed)
            confident_joint = compute_confident_joint(labels=labels,
                                                      pred_probs=pred_probs)
            cl = CleanLearning(
                find_label_issues_kwargs={
                    "confident_joint": confident_joint,
                    "filter_by": "both",
                    "min_examples_per_class": 1,
                },
                verbose=1,
            )
        else:
            cl = CleanLearning(
                clf=LogisticRegression(random_state=seed),
                find_label_issues_kwargs={
                    "filter_by": "both",
                    "min_examples_per_class": 1,
                },
                verbose=0,
            )
        label_issues_df = cl.find_label_issues(DATA["X_train"], labels=labels)
        label_issues_mask = label_issues_df["is_label_issue"].values
        # Check if the noise matrix was computed based on the passed in confident joint
        cj_reconstruct = (cl.inverse_noise_matrix *
                          np.bincount(DATA["labels"])).T.astype(int)
        np.all(cl.confident_joint == cj_reconstruct)
        num_issues.append(sum(label_issues_mask))

    # Chceck that the same exact number of issues are found regardless if the confident joint
    # is computed during find_label_issues or precomputed and provided as a kwargs parameter.
    assert num_issues[0] == num_issues[1]
示例#5
0
def test_issue_158():
    # ref: https://github.com/cleanlab/cleanlab/issues/158
    pred_probs = np.array([
        [0.27916167, 0.14589103, 0.29264585, 0.28230144],
        [0.13429196, 0.12536383, 0.47943979, 0.26090442],
        [0.41348584, 0.13463275, 0.25845595, 0.19342546],
        [0.27753469, 0.12295569, 0.33125886, 0.26825075],
        [0.11649856, 0.11219034, 0.51857122, 0.25273988],
        [0.38010026, 0.25572261, 0.13305410, 0.23112304],
        [0.31583755, 0.13630690, 0.29246806, 0.25538750],
        [0.30240076, 0.16925207, 0.26499082, 0.26335636],
        [0.44524505, 0.27410085, 0.08305069, 0.19760341],
        [0.22903975, 0.07783631, 0.38035414, 0.31276980],
        [0.25071560, 0.12072900, 0.32551729, 0.30303812],
        [0.43809229, 0.14401381, 0.20839300, 0.20950090],
        [0.20749181, 0.11883556, 0.38402152, 0.28965111],
        [0.43840254, 0.13538447, 0.24518806, 0.18102493],
        [0.28504779, 0.10309750, 0.34258602, 0.26926868],
        [0.38425408, 0.29168969, 0.15181255, 0.17224368],
        [0.19339907, 0.10804265, 0.37570368, 0.32285460],
        [0.21509781, 0.07190167, 0.38914722, 0.32385330],
        [0.27040334, 0.13037840, 0.32842320, 0.27079507],
        [0.40590210, 0.16713560, 0.24889193, 0.17807036],
    ])
    labels = np.array(
        [3, 3, 1, 3, 0, 2, 2, 2, 0, 2, 2, 1, 0, 0, 0, 0, 2, 1, 3, 3])

    cj = count.compute_confident_joint(labels, pred_probs, calibrate=False)
    # should be no zeros on the diagonal
    assert np.all(cj.diagonal() != 0)

    py, noise_matrix, inv_noise_matrix = count.estimate_latent(cj, labels)
    # no nans anywhere
    assert not np.any(np.isnan(py))
    assert not np.any(np.isnan(noise_matrix))
    assert not np.any(np.isnan(inv_noise_matrix))
示例#6
0
    def find_label_issues(
        self,
        X=None,
        labels=None,
        *,
        pred_probs=None,
        thresholds=None,
        noise_matrix=None,
        inverse_noise_matrix=None,
        save_space=False,
        clf_kwargs={},
    ):
        """
        Identifies potential label issues in the dataset using confident learning.

        Runs cross-validation to get out-of-sample pred_probs from `clf`
        and then calls :py:func:`filter.find_label_issues
        <cleanlab.filter.find_label_issues>` to find label issues.
        These label issues are cached internally and returned in a pandas DataFrame.
        Kwargs for :py:func:`filter.find_label_issues
        <cleanlab.filter.find_label_issues>` must have already been specified
        in the initialization of this class, not here.

        Unlike :py:func:`filter.find_label_issues
        <cleanlab.filter.find_label_issues>`, which requires `pred_probs`,
        this method only requires a classifier and it can do the cross-validation for you.
        Both methods return the same boolean mask that identifies which examples have label issues.
        This is the preferred method to use if you plan to subsequently invoke:
        :py:meth:`CleanLearning.fit()
        <cleanlab.classification.CleanLearning.fit>`.

        Note: this method computes the label issues from scratch. To access
        previously-computed label issues from this :py:class:`CleanLearning
        <cleanlab.classification.CleanLearning>` instance, use the
        :py:meth:`get_label_issues
        <cleanlab.classification.CleanLearning.get_label_issues>` method.

        This is the method called to find label issues inside
        :py:meth:`CleanLearning.fit()
        <cleanlab.classification.CleanLearning.fit>`
        and they share mostly the same parameters.

        Parameters
        ----------
        save_space : bool, optional
          If True, then returned `label_issues_df` will not be stored as attribute.
          This means some other methods like `self.get_label_issues()` will no longer work.


        For info about the **other parameters**, see the docstring of :py:meth:`CleanLearning.fit()
        <cleanlab.classification.CleanLearning.fit>`.

        Returns
        -------
        pd.DataFrame
          pandas DataFrame of label issues for each example.
          Unless `save_space` argument is specified, same DataFrame is also stored as
          `self.label_issues_df` attribute accessible via
          :py:meth:`get_label_issues<cleanlab.classification.CleanLearning.get_label_issues>`.
          Each row represents an example from our dataset and
          the DataFrame may contain the following columns:

          * *is_label_issue*: boolean mask for the entire dataset where ``True`` represents a label issue and ``False`` represents an example that is accurately labeled with high confidence. This column is equivalent to `label_issues_mask` output from :py:func:`filter.find_label_issues<cleanlab.filter.find_label_issues>`.
          * *label_quality*: Numeric score that measures the quality of each label (how likely it is to be correct, with lower scores indicating potentially erroneous labels).
          * *given_label*: Integer indices corresponding to the class label originally given for this example (same as `labels` input). Included here for ease of comparison against `clf` predictions, only present if "predicted_label" column is present.
          * *predicted_label*: Integer indices corresponding to the class predicted by trained `clf` model. Only present if ``pred_probs`` were provided as input or computed during label-issue-finding.
          * *sample_weight*: Numeric values used to weight examples during the final training of `clf` in :py:meth:`CleanLearning.fit()<cleanlab.classification.CleanLearning.fit>`. This column not be present after `self.find_label_issues()` but may be added after call to :py:meth:`CleanLearning.fit()<cleanlab.classification.CleanLearning.fit>`. For more precise definition of sample weights, see documentation of :py:meth:`CleanLearning.fit()<cleanlab.classification.CleanLearning.fit>`
        """

        # Check inputs
        allow_empty_X = False if pred_probs is None else True
        assert_inputs_are_valid(X,
                                labels,
                                pred_probs,
                                allow_empty_X=allow_empty_X)
        if noise_matrix is not None and np.trace(noise_matrix) <= 1:
            t = np.round(np.trace(noise_matrix), 2)
            raise ValueError(
                "Trace(noise_matrix) is {}, but must exceed 1.".format(t))
        if inverse_noise_matrix is not None and (np.trace(inverse_noise_matrix)
                                                 <= 1):
            t = np.round(np.trace(inverse_noise_matrix), 2)
            raise ValueError(
                "Trace(inverse_noise_matrix) is {}. Must exceed 1.".format(t))

        # Number of classes
        self.num_classes = len(np.unique(labels))
        if len(labels) / self.num_classes < self.cv_n_folds:
            raise ValueError(
                "Need more data from each class for cross-validation. "
                "Try decreasing cv_n_folds (eg. to 2 or 3) in CleanLearning()")
        # 'ps' is p(labels=k)
        self.ps = value_counts(labels) / float(len(labels))

        self.clf_kwargs = clf_kwargs
        self._process_label_issues_kwargs(self.find_label_issues_kwargs)
        # self._process_label_issues_kwargs might set self.confident_joint. If so, we should use it.
        if self.confident_joint is not None:
            self.py, noise_matrix, inv_noise_matrix = estimate_latent(
                confident_joint=self.confident_joint,
                labels=labels,
            )

        # If needed, compute noise rates (probability of class-conditional mislabeling).
        if noise_matrix is not None:
            self.noise_matrix = noise_matrix
            if inverse_noise_matrix is None:
                if self.verbose:
                    print(
                        "Computing label noise estimates from provided noise matrix ..."
                    )
                self.py, self.inverse_noise_matrix = compute_py_inv_noise_matrix(
                    ps=self.ps,
                    noise_matrix=self.noise_matrix,
                )
        if inverse_noise_matrix is not None:
            self.inverse_noise_matrix = inverse_noise_matrix
            if noise_matrix is None:
                if self.verbose:
                    print(
                        "Computing label noise estimates from provided inverse noise matrix ..."
                    )
                self.noise_matrix = compute_noise_matrix_from_inverse(
                    ps=self.ps,
                    inverse_noise_matrix=self.inverse_noise_matrix,
                )

        if noise_matrix is None and inverse_noise_matrix is None:
            if pred_probs is None:
                if self.verbose:
                    print(
                        "Computing out of sample predicted probabilites via "
                        f"{self.cv_n_folds}-fold cross validation. May take a while ..."
                    )
                (
                    self.py,
                    self.noise_matrix,
                    self.inverse_noise_matrix,
                    self.confident_joint,
                    pred_probs,
                ) = estimate_py_noise_matrices_and_cv_pred_proba(
                    X=X,
                    labels=labels,
                    clf=self.clf,
                    cv_n_folds=self.cv_n_folds,
                    thresholds=thresholds,
                    converge_latent_estimates=self.converge_latent_estimates,
                    seed=self.seed,
                    clf_kwargs=self.clf_kwargs,
                )
            else:  # pred_probs is provided by user (assumed holdout probabilities)
                if self.verbose:
                    print(
                        "Computing label noise estimates from provided pred_probs ..."
                    )
                (
                    self.py,
                    self.noise_matrix,
                    self.inverse_noise_matrix,
                    self.confident_joint,
                ) = estimate_py_and_noise_matrices_from_probabilities(
                    labels=labels,
                    pred_probs=pred_probs,
                    thresholds=thresholds,
                    converge_latent_estimates=self.converge_latent_estimates,
                )
        # If needed, compute P(label=k|x), denoted pred_probs (the predicted probabilities)
        if pred_probs is None:
            if self.verbose:
                print(
                    "Computing out of sample predicted probabilites via "
                    f"{self.cv_n_folds}-fold cross validation. May take a while ..."
                )

            pred_probs = estimate_cv_predicted_probabilities(
                X=X,
                labels=labels,
                clf=self.clf,
                cv_n_folds=self.cv_n_folds,
                seed=self.seed,
                clf_kwargs=self.clf_kwargs,
            )
        # If needed, compute the confident_joint (e.g. occurs if noise_matrix was given)
        if self.confident_joint is None:
            self.confident_joint = compute_confident_joint(
                labels=labels,
                pred_probs=pred_probs,
                thresholds=thresholds,
            )
        # if pulearning == the integer specifying the class without noise.
        if self.num_classes == 2 and self.pulearning is not None:  # pragma: no cover
            # pulearning = 1 (no error in 1 class) implies p(label=1|true_label=0) = 0
            self.noise_matrix[self.pulearning][1 - self.pulearning] = 0
            self.noise_matrix[1 - self.pulearning][1 - self.pulearning] = 1
            # pulearning = 1 (no error in 1 class) implies p(true_label=0|label=1) = 0
            self.inverse_noise_matrix[1 - self.pulearning][self.pulearning] = 0
            self.inverse_noise_matrix[self.pulearning][self.pulearning] = 1
            # pulearning = 1 (no error in 1 class) implies p(label=1,true_label=0) = 0
            self.confident_joint[self.pulearning][1 - self.pulearning] = 0
            self.confident_joint[1 - self.pulearning][1 - self.pulearning] = 1

        if self.verbose:
            print("Using predicted probabilities to identify label issues ...")
        label_issues_mask = filter.find_label_issues(
            labels,
            pred_probs,
            **self.find_label_issues_kwargs,
        )
        label_quality_scores = get_label_quality_scores(
            labels, pred_probs, **self.label_quality_scores_kwargs)
        label_issues_df = pd.DataFrame({
            "is_label_issue": label_issues_mask,
            "label_quality": label_quality_scores
        })
        if self.verbose:
            print(
                f"Identified {np.sum(label_issues_mask)} examples with label issues."
            )

        predicted_labels = pred_probs.argmax(axis=1)
        label_issues_df["given_label"] = compress_int_array(
            labels, self.num_classes)
        label_issues_df["predicted_label"] = compress_int_array(
            predicted_labels, self.num_classes)

        if not save_space:
            if self.label_issues_df is not None and self.verbose:
                print(
                    "Overwriting previously identified label issues stored at self.label_issues_df. "
                    "self.get_label_issues() will now return the newly identified label issues. "
                )
            self.label_issues_df = label_issues_df
            self.label_issues_mask = label_issues_df[
                "is_label_issue"]  # pointer to here to avoid duplication
        elif self.verbose:
            print(  # pragma: no cover
                "Not storing label_issues as attributes since save_space was specified."
            )

        return label_issues_df
示例#7
0
def find_label_issues(
    labels,
    pred_probs,
    *,
    confident_joint=None,
    filter_by="prune_by_noise_rate",
    return_indices_ranked_by=None,
    rank_by_kwargs={},
    multi_label=False,
    frac_noise=1.0,
    num_to_remove_per_class=None,
    min_examples_per_class=1,
    n_jobs=None,
    verbose=False,
):
    """
    Identifies potential label issues in the dataset using confident learning.

    Returns a boolean mask for the entire dataset where ``True`` represents
    a label issue and ``False`` represents an example that is confidently/accurately labeled.

    Instead of a mask, you can obtain *indices* of the label issues in your
    dataset by setting `return_indices_ranked_by` to specify the label quality
    score used to order the label issues.

    The number of indices returned is controlled by `frac_noise`: reduce its
    value to identify fewer label issues. If you aren't sure, leave this set to 1.0.

    Tip: if you encounter the error "pred_probs is not defined", try setting
    ``n_jobs=1``.

    Parameters
    ----------
    labels : np.array
      A discrete vector of noisy labels, i.e. some labels may be erroneous.
      *Format requirements*: for dataset with K classes, labels must be in 0, 1, ..., K-1.

    pred_probs : np.array, optional
      An array of shape ``(N, K)`` of model-predicted probabilities,
      ``P(label=k|x)``. Each row of this matrix corresponds
      to an example `x` and contains the model-predicted probabilities that
      `x` belongs to each possible class, for each of the K classes. The
      columns must be ordered such that these probabilities correspond to
      class 0, 1, ..., K-1.

      **Caution**: `pred_probs` from your model must be out-of-sample!
      You should never provide predictions on the same examples used to train the model,
      as these will be overfit and unsuitable for finding label-errors.
      To obtain out-of-sample predicted probabilities for every datapoint in your dataset, you can use :ref:`cross-validation <pred_probs_cross_val>`.
      Alternatively it is ok if your model was trained on a separate dataset and you are only evaluating
      data that was previously held-out.

    confident_joint : np.array, optional
      An array of shape ``(K, K)`` representing the confident joint, the matrix used for identifying label issues, which
      estimates a confident subset of the joint distribution of the noisy and true labels, ``P_{noisy label, true label}``.
      Entry ``(j, k)`` in the matrix is the number of examples confidently counted into the pair of ``(noisy label=j, true label=k)`` classes.
      The `confident_joint` can be computed using :py:func:`count.compute_confident_joint <cleanlab.count.compute_confident_joint>`.
      If not provided, it is computed from the given (noisy) `labels` and `pred_probs`.

    filter_by : {'prune_by_class', 'prune_by_noise_rate', 'both', 'confident_learning', 'predicted_neq_given'}, default='prune_by_noise_rate'

      Method used for filtering/pruning out the label issues:

      - ``'prune_by_noise_rate'``: works by removing examples with *high probability* of being mislabeled for every non-diagonal in the confident joint (see `prune_counts_matrix` in `filter.py`). These are the examples where (with high confidence) the given label is unlikely to match the predicted label for the example.
      - ``'prune_by_class'``: works by removing the examples with *smallest probability* of belonging to their given class label for every class.
      - ``'both'``: Removes only the examples that would be filtered by both ``'prune_by_noise_rate'`` and ``'prune_by_class'``.
      - ``'confident_learning'``: Returns the examples in the off-diagonals of the confident joint. These are the examples that are confidently predicted to be a different label than their given label.
      - ``'predicted_neq_given'``: Find examples where the predicted class (i.e. argmax of the predicted probabilities) does not match the given label.

    return_indices_ranked_by : {None, 'self_confidence', 'normalized_margin', 'confidence_weighted_entropy'}, default=None
      If ``None``, returns a boolean mask (``True`` if example at index is label error).
      If not ``None``, returns an array of the label error indices
      (instead of a boolean mask) where error indices are ordered:

      - ``'normalized_margin'``: ``normalized margin (p(label = k) - max(p(label != k)))``
      - ``'self_confidence'``: ``[pred_probs[i][labels[i]] for i in label_issues_idx]``
      - ``'confidence_weighted_entropy'``: ``entropy(pred_probs) / self_confidence``

    rank_by_kwargs : dict, optional
      Optional keyword arguments to pass into scoring functions for ranking by
      label quality score (see :py:func:`rank.get_label_quality_scores
      <cleanlab.rank.get_label_quality_scores>`).

    multi_label : bool, optional
      If ``True``, labels should be an iterable (e.g. list) of iterables, containing a
      list of labels for each example, instead of just a single label.
      The multi-label setting supports classification tasks where an example has 1 or more labels.
      Example of a multi-labeled `labels` input: ``[[0,1], [1], [0,2], [0,1,2], [0], [1], ...]``.

    frac_noise : float, default=1.0
      Used to only return the "top" ``frac_noise * num_label_issues``. The choice of which "top"
      label issues to return is dependent on the `filter_by` method used. It works by reducing the
      size of the off-diagonals of the `joint` distribution of given labels and true labels
      proportionally by `frac_noise` prior to estimating label issues with each method.
      This parameter only applies for `filter_by=both`, `filter_by=prune_by_class`, and
      `filter_by=prune_by_noise_rate` methods and currently is unused by other methods.
      When ``frac_noise=1.0``, return all "confident" estimated noise indices (recommended).

      frac_noise * number_of_mislabeled_examples_in_class_k.

    num_to_remove_per_class : array_like
      An iterable of length K, the number of classes.
      E.g. if K = 3, ``num_to_remove_per_class=[5, 0, 1]`` would return
      the indices of the 5 most likely mislabeled examples in class 0,
      and the most likely mislabeled example in class 2.

      Note
      ----
      Only set this parameter if ``filter_by='prune_by_class'``.
      You may use with ``filter_by='prune_by_noise_rate'``, but
      if ``num_to_remove_per_class=k``, then either k-1, k, or k+1
      examples may be removed for any class due to rounding error. If you need
      exactly 'k' examples removed from every class, you should use
      ``filter_by='prune_by_class'``.

    min_examples_per_class : int, default=1
      Minimum number of examples per class to avoid flagging as label issues.
      This is useful to avoid deleting too much data from one class
      when pruning noisy examples in datasets with rare classes.

    n_jobs : optional
      Number of processing threads used by multiprocessing. Default ``None``
      sets to the number of cores on your CPU.
      Set this to 1 to *disable* parallel processing (if its causing issues).
      Windows users may see a speed-up with ``n_jobs=1``.

    verbose : optional
      If ``True``, prints when multiprocessing happens.

    Returns
    -------
    label_issues : np.array
      A boolean mask for the entire dataset where ``True`` represents a
      label issue and ``False`` represents an example that is accurately
      labeled with high confidence.

      Note
      ----
      You can also return the *indices* of the label issues in your dataset by setting
      `return_indices_ranked_by`.
    """

    assert filter_by in [
        "prune_by_noise_rate",
        "prune_by_class",
        "both",
        "confident_learning",
        "predicted_neq_given",
    ]  # TODO: change default to confident_learning ?
    assert len(labels) == len(pred_probs)
    if filter_by in [
            "confident_learning", "predicted_neq_given"
    ] and (frac_noise != 1.0 or num_to_remove_per_class is not None):
        warn_str = (
            "WARNING! frac_noise and num_to_remove_per_class parameters are only supported"
            " for filter_by 'prune_by_noise_rate', 'prune_by_class', and 'both'. They "
            "are not supported for methods 'confident_learning' or "
            "'predicted_neq_given'.")
        warnings.warn(warn_str)
    if (num_to_remove_per_class is not None) and (filter_by in [
            "confident_learning", "predicted_neq_given"
    ]):
        # TODO - add support for these two filters
        raise ValueError(
            "filter_by 'confident_learning' or 'predicted_neq_given' is not supported (yet) when setting 'num_to_remove_per_class'"
        )

    # Set-up number of multiprocessing threads
    if n_jobs is None:
        n_jobs = multiprocessing.cpu_count()
    else:
        assert n_jobs >= 1

    # Number of examples in each class of labels
    if multi_label:
        label_counts = value_counts([i for lst in labels for i in lst])
    else:
        label_counts = value_counts(labels)
    # Number of classes labels
    K = len(pred_probs.T)
    # Boolean set to true if dataset is large
    big_dataset = K * len(labels) > 1e8
    # Ensure labels are of type np.array()
    labels = np.asarray(labels)
    if confident_joint is None or filter_by == "confident_learning":
        from cleanlab.count import compute_confident_joint

        confident_joint, cl_error_indices = compute_confident_joint(
            labels=labels,
            pred_probs=pred_probs,
            multi_label=multi_label,
            return_indices_of_off_diagonals=True,
        )
    if filter_by in ["prune_by_noise_rate", "prune_by_class", "both"]:
        # Create `prune_count_matrix` with the number of examples to remove in each class and
        # leave at least min_examples_per_class examples per class.
        # `prune_count_matrix` is transposed relative to the confident_joint.
        prune_count_matrix = _keep_at_least_n_per_class(
            prune_count_matrix=confident_joint.T,
            n=min_examples_per_class,
            frac_noise=frac_noise,
        )

        if num_to_remove_per_class is not None:
            # Estimate joint probability distribution over label issues
            psy = prune_count_matrix / np.sum(prune_count_matrix, axis=1)
            noise_per_s = psy.sum(axis=1) - psy.diagonal()
            # Calibrate labels.t. noise rates sum to num_to_remove_per_class
            tmp = (psy.T * num_to_remove_per_class / noise_per_s).T
            np.fill_diagonal(tmp, label_counts - num_to_remove_per_class)
            prune_count_matrix = round_preserving_row_totals(tmp)

        # Prepare multiprocessing shared data
        if n_jobs > 1:
            if multi_label:
                _labels = RawArray("I", int2onehot(labels).flatten())
            else:
                _labels = RawArray("I", labels)
            _label_counts = RawArray("I", label_counts)
            _prune_count_matrix = RawArray("I", prune_count_matrix.flatten())
            _pred_probs = RawArray("f", pred_probs.flatten())
        else:  # Multiprocessing is turned off. Create tuple with all parameters
            args = (
                labels,
                label_counts,
                prune_count_matrix,
                pred_probs,
                multi_label,
                min_examples_per_class,
            )

    # Perform Pruning with threshold probabilities from BFPRT algorithm in O(n)
    # Operations are parallelized across all CPU processes
    if filter_by == "prune_by_class" or filter_by == "both":
        if n_jobs > 1:  # parallelize
            with multiprocessing.Pool(
                    n_jobs,
                    initializer=_init,
                    initargs=(
                        _labels,
                        _label_counts,
                        _prune_count_matrix,
                        prune_count_matrix.shape,
                        _pred_probs,
                        pred_probs.shape,
                        multi_label,
                        min_examples_per_class,
                    ),
            ) as p:
                if verbose:  # pragma: no cover
                    print("Parallel processing label issues by class.")
                sys.stdout.flush()
                if big_dataset and tqdm_exists:
                    label_issues_masks_per_class = list(
                        tqdm.tqdm(p.imap(_prune_by_class, range(K)),
                                  total=K), )
                else:
                    label_issues_masks_per_class = p.map(
                        _prune_by_class, range(K))
        else:  # n_jobs = 1, so no parallelization
            label_issues_masks_per_class = [
                _prune_by_class(k, args) for k in range(K)
            ]
        label_issues_mask = np.stack(label_issues_masks_per_class).any(axis=0)

    if filter_by == "both":
        label_issues_mask_by_class = label_issues_mask

    if filter_by == "prune_by_noise_rate" or filter_by == "both":
        if n_jobs > 1:  # parallelize
            with multiprocessing.Pool(
                    n_jobs,
                    initializer=_init,
                    initargs=(
                        _labels,
                        _label_counts,
                        _prune_count_matrix,
                        prune_count_matrix.shape,
                        _pred_probs,
                        pred_probs.shape,
                        multi_label,
                        min_examples_per_class,
                    ),
            ) as p:
                if verbose:  # pragma: no cover
                    print("Parallel processing label issues by noise rate.")
                sys.stdout.flush()
                if big_dataset and tqdm_exists:
                    label_issues_masks_per_class = list(
                        tqdm.tqdm(p.imap(_prune_by_count, range(K)), total=K))
                else:
                    label_issues_masks_per_class = p.map(
                        _prune_by_count, range(K))
        else:  # n_jobs = 1, so no parallelization
            label_issues_masks_per_class = [
                _prune_by_count(k, args) for k in range(K)
            ]
        label_issues_mask = np.stack(label_issues_masks_per_class).any(axis=0)

    if filter_by == "both":
        label_issues_mask = label_issues_mask & label_issues_mask_by_class

    if filter_by == "confident_learning":
        label_issues_mask = np.zeros(len(labels), dtype=bool)
        for idx in cl_error_indices:
            label_issues_mask[idx] = True

    if filter_by == "predicted_neq_given":
        label_issues_mask = find_predicted_neq_given(labels,
                                                     pred_probs,
                                                     multi_label=multi_label)

    # Remove label issues if given label == model prediction
    if multi_label:
        pred = _multiclass_crossval_predict(labels, pred_probs)
        labels = MultiLabelBinarizer().fit_transform(labels)
    else:
        pred = pred_probs.argmax(axis=1)
    for i, pred_label in enumerate(pred):
        if (multi_label and np.all(pred_label == labels[i])
                or not multi_label and pred_label == labels[i]):
            label_issues_mask[i] = False

    if verbose:
        print("Number of label issues found: {}".format(
            sum(label_issues_mask)))

    # TODO: run count.num_label_issues() and adjust the total issues found here to match
    if return_indices_ranked_by is not None:
        er = order_label_issues(
            label_issues_mask=label_issues_mask,
            labels=labels,
            pred_probs=pred_probs,
            rank_by=return_indices_ranked_by,
            rank_by_kwargs=rank_by_kwargs,
        )
        return er
    return label_issues_mask