示例#1
0
    def test_check_indices(self):
        indices = check_indices(None, max_index=4)
        np.testing.assert_array_equal(list(range(5)), indices)

        indices = check_indices([2, 3], max_index=4)
        np.testing.assert_array_equal([2, 3], indices)

        indices = [3, 3]
        self.assertRaises(ValueError, check_indices, indices=indices, max_index=5)

        indices = [2, 6]
        self.assertRaises(ValueError, check_indices, indices=indices, max_index=5)
示例#2
0
    def confidence_scores(self, X, annotator_ids=None, **kwargs):
        """Method returning the confidence scores for labelling the given samples.

        Parameters
        ----------
        X: array-like, shape (n_samples, n_features)
             Samples whose class labels are queried.
        annotator_ids: array-like, shape (n_queried_annotators)
            The indices of the annotators whose confidence scores are queried.

        Returns
        -------
        C: numpy.ndarray, shape (n_samples, n_annotators)
            confidence scores of the queried annotators for labelling the given samples.
            The non queried annotators should return np.nan values.
        """
        # check annotator_ids
        annotator_ids = check_indices(annotator_ids,
                                      self.n_annotators() - 1, 'annotator_ids')

        # obtain ids of queried samples
        X = check_array(X)
        sample_ids = indices(self.X_, X, missing=-1)
        sample_ids_flag = sample_ids >= 0

        # confidence scores provided by queried annotators
        C = np.full((np.size(X, 0), self.n_annotators()), np.nan)
        C[sample_ids_flag,
          annotator_ids[:, None]] = self.C_[sample_ids[sample_ids_flag],
                                            annotator_ids[:, None]]

        return C
示例#3
0
    def class_labels(self, X, annotator_ids=None, query_value=1, **kwargs):
        """Method returning the class labels of the given samples.

        Parameters
        ----------
        X: array-like, shape (n_samples, n_features)
             Samples whose class labels are queried.
        y_true: array-like, shape (n_samples)
            The true class label of each given sample.
        annotator_ids: array-like, shape (n_queried_annotators)
            The indices of the annotators whose class labels are queried.
        query_value: int
            The query value represents the increment of the query statistics of the queried annotators.

        Returns
        -------
        Y: numpy.ndarray, shape (n_samples, n_annotators)
            Class labels of the given samples which were provided by the queried annotators.
            The non queried annotators return np.nan values.
        """
        # check parameters
        X, y_true = check_X_y(X, kwargs.get('y_true'))
        annotator_ids = check_indices(annotator_ids, self.n_annotators() - 1, 'annotator_ids')
        query_value = check_positive_integer(query_value, 'query_value')
        prev_n_queries = np.copy(self.n_queries())
        Y = self.annotator_model_.class_labels(X=X, annotator_ids=annotator_ids, query_value=query_value, **kwargs)

        # ensures constant accuracy for given number of queries
        random_states = [np.random.RandomState(prev_n_queries[a_idx]) for a_idx in range(self.n_annotators())]

        if np.sum(prev_n_queries) > 0:
            # obtain class labels
            for x_idx in range(len(X)):
                Y_x = Y[x_idx, :]
                for a_idx in range(self.annotator_model_.n_annotators()):
                    flip_p = min(abs(self.learning_rates_[a_idx]) * prev_n_queries[a_idx], 1)
                    flip = random_states[a_idx].binomial(1, flip_p)
                    if self.learning_rates_[a_idx] < 0:
                        if y_true[x_idx] == Y_x[a_idx]:
                            if flip and not self.adversarial_[a_idx]:
                                Y_x[a_idx] = random_states[a_idx].choice(self.y_unique_)
                            if flip and self.adversarial_[a_idx]:
                                false_labels = self.y_unique_[self.y_unique_ != y_true[x_idx]]
                                Y_x[a_idx] = random_states[a_idx].choice(false_labels)
                        else:
                            if flip and not self.adversarial_[a_idx]:
                                Y_x[a_idx] = random_states[a_idx].choice(self.y_unique_)
                    elif self.learning_rates_[a_idx] > 0 and y_true[x_idx] != Y_x[a_idx]:
                        if flip:
                            Y_x[a_idx] = y_true[x_idx]
                Y[x_idx, :] = Y_x
        return Y
示例#4
0
    def plot_labelling_accuracy(self,
                                X,
                                y_true,
                                annotator_ids=None,
                                figsize=(4, 4),
                                dpi=150,
                                fontsize=12):
        """Method plotting the labelling accuracy of each desired annotator.

        Parameters
        ----------
        X: array-like, shape (n_samples, n_features)
             Samples on which the labelling accuracies of the annotators is evaluated.
        y_true: array-like, shape (n_samples)
            True class labels of the given samples.
        annotator_ids: array-like, shape (n_queried_annotators)
            The indices of the annotators whose labelling accuracies are plotted.
        figsize: 2-tuple of floats, default: (4, 4
            Figure dimension (width, height) in inches.
        dpi: float, default: 150
            Dots per inch.
        fontsize: int
            Font size of plotted text.

        Returns
        -------
        fig: matplotlib.figure.Figure object
            Created figure.

        ax: :py:class:`matplotlib.axes.Axes` object
            Created axes.
        """
        annotator_ids = check_indices(annotator_ids,
                                      self.n_annotators() - 1, 'annotator_ids')
        acc = np.asarray(self.labelling_performance(X, y_true))
        acc[np.isnan(acc)] = 0
        x = np.arange(self.n_annotators())
        annot_names = [
            r'annotator $a_' + str(a_idx) + '$' for a_idx in annotator_ids
        ]
        fig, ax = plt.subplots(1, 1, figsize=figsize, dpi=dpi)
        ax.bar(x, acc[annotator_ids])
        plt.xticks(x, annot_names, fontsize=fontsize)
        plt.yticks(fontsize=fontsize)
        plt.ylabel('labelling accuracy', fontsize=fontsize)
        plt.title('labelling accuracy of annotators', fontsize=fontsize)
        return fig, ax
示例#5
0
    def class_labels(self, X, annotator_ids=None, query_value=1, **kwargs):
        """Method returning the class labels of the given samples.

        Parameters
        ----------
        X: array-like, shape (n_samples, n_features)
             Samples whose class labels are queried.
        annotator_ids: array-like, shape (n_queried_annotators)
            The indices of the annotators whose class labels are queried.
        query_value: int
            The query value represents the increment of the query statistics of the queried annotators.

        Returns
        -------
        Y: numpy.ndarray, shape (n_samples, n_annotators)
            Class labels of the given samples which were provided by the queried annotators.
            The non queried annotators return np.nan values.
        """
        X = check_array(X)
        if annotator_ids is None:
            Y = np.hstack([
                a.class_labels(X, None, query_value, **kwargs)
                for a in self.annotator_types_
            ])
        else:
            annotator_ids = self._transform_ids(
                check_indices(annotator_ids,
                              self.n_annotators() - 1, 'annotator_ids'))
            Y = []
            for a in range(len(self.annotator_types_)):
                if len(annotator_ids[a]) > 0:
                    Y_a = self.annotator_types_[a].class_labels(
                        X=X,
                        annotator_ids=annotator_ids[a],
                        query_value=query_value,
                        **kwargs)
                else:
                    Y_a = np.empty(
                        (len(X), self.annotator_types_[a].n_annotators()))
                    Y_a.fill(np.nan)
                Y.append(Y_a)
            Y = np.hstack(Y)
        return Y
示例#6
0
    def class_labels(self, X, annotator_ids=None, query_value=1, **kwargs):
        """Method returning the class labels of the given samples.
        If the query value is greater than zero, it updates the n_queries and queried sample statistics

        Parameters
        ----------
        X: array-like, shape (n_samples, n_features)
             Samples whose class labels are queried.
        annotator_ids: array-like, shape (n_queried_annotators)
            The indices of the annotators whose class labels are queried.
        query_value: int
            The query value represents the increment of the query statistics of the queried annotators.

        Returns
        -------
        Y: numpy.ndarray, shape (n_samples, n_annotators)
            Class labels of the given samples which were provided by the queried annotators.
            The non queried annotators return np.nan values.
        """
        # check annotator_ids
        annotator_ids = check_indices(annotator_ids,
                                      self.n_annotators() - 1, 'annotator_ids')

        # obtain ids of queried samples
        X = check_array(X)
        sample_ids = indices(self.X_, X, missing=-1)
        sample_ids_flag = sample_ids >= 0

        # class labels provided by queried annotators
        Y = np.full((np.size(X, 0), self.n_annotators()), np.nan)
        Y[sample_ids_flag,
          annotator_ids[:, None]] = self.Y_[sample_ids[sample_ids_flag],
                                            annotator_ids[:, None]]

        # update query statistics
        if query_value > 0:
            self.queried_flags_[sample_ids, annotator_ids[:, None]] = True
            self.n_queries_[annotator_ids] += query_value

        return Y
示例#7
0
    def confidence_scores(self, X, annotator_ids=None, **kwargs):
        """Method returning the confidence scores for labelling the given samples.

        Parameters
        ----------
        X: array-like, shape (n_samples, n_features)
             Samples whose class labels are queried.
        annotator_ids: array-like, shape (n_queried_annotators)
            The indices of the annotators whose confidence scores are queried.

        Returns
        -------
        C: numpy.ndarray, shape (n_samples, n_annotators)
            Confidence scores of the queried annotators for labelling the given samples.
            The non queried annotators should return np.nan values.
        """
        X = check_array(X)
        if annotator_ids is None:
            C = np.hstack(
                [a.confidence_scores(X) for a in self.annotator_types_])
        else:
            annotator_ids = self._transform_ids(
                check_indices(annotator_ids,
                              self.n_annotators() - 1, 'annotator_ids'))
            C = []
            for a in range(len(self.annotator_types_)):
                if len(annotator_ids[a]) > 0:
                    C_a = self.annotator_types_[a].confidence_scores(
                        X, annotator_ids[a])
                else:
                    C_a = np.empty(
                        (len(X), self.annotator_types_[a].n_annotators()))
                    C_a.fill(np.nan)
                C.append(C_a)
            C = np.hstack(C)
        return C
示例#8
0
    def plot_class_labels(self,
                          X,
                          features_ids=None,
                          annotator_ids=None,
                          plot_confidences=5,
                          y_true=None,
                          figsize=(5, 3),
                          dpi=150,
                          fontsize=7,
                          **kwargs):
        """Method creating scatter plots of the given samples for each annotator.
        In each scatter plot, the samples are colored according the class labels provided by the
        corresponding annotator.

        Parameters
        ----------
        X: array-like, shape (n_samples, n_features)
            Samples which are plotted.
        features_ids: array-like, shape (2)
            The feature indices to be plotted. The array is limited to two indices.
        annotator_ids: array-like, shape (n_queried_annotators)
            The indices of the annotators whose class label distributions are plotted.
        plot_confidences: boolean
            If true, the size of the markers is plotted according to the given confidence scores.
        y_true: array-like, shape (n_samples)
            This is a optional parameter. If the true class labels are given, the samples are marked according to
            correctness of their predicted class label
        figsize: 2-tuple of floats, default: (5, 3*n_annotators)
            Figure dimension (width, height*n_annotators) in inches.
        dpi: float, default: 150
            Dots per inch.
        fontsize: int
            Font size of plotted text.

        Returns
        -------
        fig: matplotlib.axes.Axes object
            Created figure.

        ax: array-like, shape (n_annotator_ids).
            The array ax is a collection of :py:class:`matplotlib.axes.Axes` instances representing the plots of
            the annotators.
        """
        # check annotator_ids
        annotator_ids = check_indices(annotator_ids,
                                      self.n_annotators() - 1, 'annotator_ids')

        # check features_ids
        X = check_array(X)
        features_ids = np.arange(
            1 + (np.size(X, 1) > 1),
            dtype=int) if features_ids is None else features_ids
        n_features = len(features_ids)
        features_ids = check_indices(features_ids, n_features - 1,
                                     'feature_ids')
        x_0 = X[:, features_ids[0]]
        if n_features > 1:
            x_1 = X[:, features_ids[1]]
        else:
            x_1 = np.zeros(np.size(X, 0))

        # check true class labels
        # y_true = kwargs.get('y_true', None)
        if y_true is not None:
            X, y_true = check_X_y(X, y_true)
        m_true = 'o'
        m_false = 'x'

        # query class labels
        Y = self.class_labels(X=X,
                              annotator_ids=annotator_ids,
                              query_value=0,
                              y_true=y_true,
                              **kwargs)
        C = self.confidence_scores(X=X, annotator_ids=annotator_ids)
        y_unique = np.unique(Y[~np.isnan(Y)])
        n_classes = len(y_unique)

        # setup the scatter plots
        fig, ax = plt.subplots(len(annotator_ids),
                               1,
                               figsize=(figsize[0],
                                        len(annotator_ids) * figsize[1]),
                               dpi=dpi)
        ax = [ax] if len(annotator_ids) == 1 else ax
        colors = cm.rainbow(np.linspace(0, 1, n_classes + 1))
        for a in range(len(annotator_ids)):
            y_a = Y[:, annotator_ids[a]]
            if plot_confidences:
                c_a = C[:, annotator_ids[a]]
                c_a = (.015 * dpi + .0175 * dpi * c_a / np.nanmax(C))**(350 /
                                                                        dpi)
                c_a[np.isnan(c_a)] = 1
            else:
                c_a = np.full(len(y_a), .1 * dpi)
            if y_true is None:
                for i, y in enumerate(y_unique):
                    flag = y_a == y
                    ax[a].scatter(x_0[flag],
                                  x_1[flag],
                                  color=colors[i].reshape(1, -1),
                                  label='prediction is' + str(int(y)),
                                  s=c_a[flag])
            else:
                for i, y in enumerate(y_unique):
                    true_predictions = np.logical_and(y_a == y, y_a == y_true)
                    false_predictions = np.logical_and(y_a == y, y_a != y_true)
                    ax[a].scatter(x_0[true_predictions],
                                  x_1[true_predictions],
                                  color=colors[i].reshape(1, -1),
                                  label='prediction ' + str(int(y)) +
                                  ' is true',
                                  marker=m_true,
                                  s=c_a[true_predictions])
                    ax[a].scatter(x_0[false_predictions],
                                  x_1[false_predictions],
                                  color=colors[i].reshape(1, -1),
                                  label='prediction ' + str(int(y)) +
                                  ' is false',
                                  marker=m_false,
                                  s=c_a[false_predictions])
            if np.sum(np.isnan(y_a)) > 0:
                ax[a].scatter(x_0[np.isnan(y_a)],
                              x_1[np.isnan(y_a)],
                              color=colors[n_classes].reshape(1, -1),
                              label='prediction is NA',
                              marker=m_false,
                              s=np.full(np.sum(np.isnan(y_a)), 5))
            lgnd = ax[a].legend(loc='best',
                                fancybox=False,
                                framealpha=0.5,
                                prop={'size': fontsize})
            for handle in lgnd.legendHandles:
                handle.set_sizes([fontsize])
            ax[a].tick_params(labelsize=fontsize)
            ax[a].set_title(r'class labels predicted by annotator $a_' +
                            str(annotator_ids[a]) + '$',
                            fontsize=fontsize)

        return fig, ax
示例#9
0
    def plot_labelling_confusion_matrices(self,
                                          X,
                                          y_true,
                                          y_unique,
                                          annotator_ids=None,
                                          figsize=(4, 4),
                                          dpi=150,
                                          fontsize=12):
        """Method plotting the labelling confusion matrix of each desired annotator.

        Parameters
        ----------
        X: array-like, shape (n_samples, n_features)
             Samples on which the labelling confusion matrices of the annotators is evaluated.
        y_true: array-like, shape (n_samples)
            True class labels of the given samples.
        annotator_ids: array-like, shape (n_queried_annotators)
            The indices of the annotators whose labelling confusion matrices are plotted.
        figsize: 2-tuple of floats, default: (4, 4*n_annotators)
            Figure dimension (width, height*n_annotators) in inches.
        dpi: float, default: 150
            Dots per inch.
        fontsize: int
            Font size of plotted text.

        Returns
        -------
        fig: matplotlib.figure.Figure object
            Created figure.

        ax: array-like, shape (n_annotator_ids).
            The array ax is a collection of :py:class:`matplotlib.axes.Axes` instances representing the plots of
            the annotators.
        """
        annotator_ids = check_indices(annotator_ids,
                                      self.n_annotators() - 1, 'annotator_ids')
        conf_matrices = self.labelling_performance(X,
                                                   y_true,
                                                   perf_func=confusion_matrix)
        n_classes = len(y_unique)
        for matrix_idx in range(len(conf_matrices)):
            if not isinstance(conf_matrices[matrix_idx], np.ndarray):
                conf_matrices[matrix_idx] = np.zeros((n_classes, n_classes),
                                                     dtype=int)
        y_unique = np.sort(y_unique)
        fig, ax = plt.subplots(len(annotator_ids),
                               1,
                               figsize=(figsize[0],
                                        figsize[1] * len(annotator_ids)),
                               dpi=dpi)
        ax = [ax] if len(annotator_ids) == 1 else ax
        for i, a_idx in enumerate(annotator_ids):
            df_cm = pd.DataFrame(conf_matrices[a_idx], range(n_classes),
                                 range(n_classes))
            sn.heatmap(df_cm,
                       annot=True,
                       annot_kws={"size": fontsize},
                       xticklabels=y_unique,
                       yticklabels=y_unique,
                       cmap="YlGnBu",
                       fmt="d",
                       ax=ax[i],
                       cbar=False)
            ax[i].tick_params(labelsize=fontsize)
            ax[i].set_xlabel('predicted class labels', fontsize=fontsize)
            ax[i].set_ylabel('true class labels', fontsize=fontsize)
            ax[i].set_title('confusion matrix of annotator $a_' + str(a_idx) +
                            '$',
                            fontsize=fontsize)
        return fig, ax