Пример #1
0
    def plot(self):
        self.graph.clearPlot()
        self.validindices = numpy.empty((0,), dtype=int)
        self.current_selection = []
        group, target_indices = self.selected_split()
        self.warning([0, 1])
        self.error(1)

        if self.data and group is not None and target_indices:
            X = self.data.X
            I1 = grouputils.group_selection_mask(
                self.data, group, target_indices)
            I2 = ~I1
            if isinstance(group, grouputils.RowGroup):
                X = X.T

            N1, N2 = numpy.count_nonzero(I1), numpy.count_nonzero(I2)

            if not N1 or not N2:
                self.error(
                    1, "Target labels most exclude/include at least one value."
                )

            if N1 < 2 and N2 < 2:
                self.warning(
                    0, "Insufficient data to compute statistics. "
                       "More than one measurement per class should be provided"
                )

            X1, X2 = X[:, I1], X[:, I2]
            if numpy.any(X1 < 0.0) or numpy.any(X2 < 0):
                self.error(
                    "Negative values in the input. The inputs cannot be in "
                    "ratio scale."
                )
                X1 = numpy.full_like(X1, numpy.nan)
                X2 = numpy.full_like(X2, numpy.nan)

            with numpy.errstate(divide="ignore", invalid="ignore"):
                fold = numpy.log2(numpy.mean(X1, axis=1) /
                                  numpy.mean(X2, axis=1))
                # TODO: handle missing values better (mstats)
                _, P = scipy.stats.ttest_ind(X1, X2, axis=1, equal_var=True)
                logP = numpy.log10(P)
                if numpy.isscalar(logP):
                    # ttest_ind does not preserve output shape if either
                    # a or b is empty
                    logP = numpy.full(fold.shape, numpy.nan)

            mask = numpy.isfinite(fold) & numpy.isfinite(logP)
            self.validindices = numpy.flatnonzero(mask)
            self.graph.setPlotData(numpy.array([fold[mask], -logP[mask]]).T)

            self.infoLabel.setText("%i genes on input" % len(fold))
            # ("{displayed} displayed, {undef} with undefined ratio "
            #  "or t-statistics.")

            if not len(numpy.flatnonzero(mask)):
                self.warning(1, "Could not compute statistics for any genes!")
Пример #2
0
    def plot(self):
        self.graph.clearPlot()
        self.validindices = numpy.empty((0, ), dtype=int)
        self.current_selection = []
        group, target_indices = self.selected_split()
        self.warning([0, 1])
        self.error(1)

        if self.data and group is not None and target_indices:
            X = self.data.X
            I1 = grouputils.group_selection_mask(self.data, group,
                                                 target_indices)
            I2 = ~I1
            if isinstance(group, grouputils.RowGroup):
                X = X.T

            N1, N2 = numpy.count_nonzero(I1), numpy.count_nonzero(I2)

            if not N1 or not N2:
                self.error(
                    1,
                    "Target labels most exclude/include at least one value.")

            if N1 < 2 and N2 < 2:
                self.warning(
                    0, "Insufficient data to compute statistics. "
                    "More than one measurement per class should be provided")

            X1, X2 = X[:, I1], X[:, I2]
            if numpy.any(X1 < 0.0) or numpy.any(X2 < 0):
                self.error(
                    "Negative values in the input. The inputs cannot be in "
                    "ratio scale.")
                X1 = numpy.full_like(X1, numpy.nan)
                X2 = numpy.full_like(X2, numpy.nan)

            with numpy.errstate(divide="ignore", invalid="ignore"):
                fold = numpy.log2(
                    numpy.mean(X1, axis=1) / numpy.mean(X2, axis=1))
                # TODO: handle missing values better (mstats)
                _, P = scipy.stats.ttest_ind(X1, X2, axis=1, equal_var=True)
                logP = numpy.log10(P)
                if numpy.isscalar(logP):
                    # ttest_ind does not preserve output shape if either
                    # a or b is empty
                    logP = numpy.full(fold.shape, numpy.nan)

            mask = numpy.isfinite(fold) & numpy.isfinite(logP)
            self.validindices = numpy.flatnonzero(mask)
            self.graph.setPlotData(numpy.array([fold[mask], -logP[mask]]).T)

            self.infoLabel.setText("%i genes on input" % len(fold))
            # ("{displayed} displayed, {undef} with undefined ratio "
            #  "or t-statistics.")

            if not len(numpy.flatnonzero(mask)):
                self.warning(1, "Could not compute statistics for any genes!")
Пример #3
0
    def update_scores(self):
        """Compute the scores and update the histogram.
        """
        self.__cancel_pending()
        self.clear_plot()
        self.scores = None
        self.nulldist = None
        self.error(0)

        grp, split_selection = self.selected_split()

        if not self.data or grp is None:
            return

        _, side, test_type, score_func = self.Scores[self.score_index]

        def compute_scores(X, group_indices, warn=False):
            arrays = [X[ind] for ind in group_indices]
            ss = score_func(*arrays, axis=0)
            return ss[0] if isinstance(ss, tuple) and not warn else ss

        def permute_indices(group_indices, random_state=None):
            assert all(ind.dtype.kind == "i" for ind in group_indices)
            assert all(ind.ndim == 1 for ind in group_indices)
            if random_state is None:
                random_state = np.random
            joined = np.hstack(group_indices)
            random_state.shuffle(joined)
            split_ind = np.cumsum([len(ind) for ind in group_indices])
            return np.split(joined, split_ind[:-1])

        if isinstance(grp, grouputils.RowGroup):
            axis = 0
        else:
            axis = 1

        if test_type == OWFeatureSelection.TwoSampleTest:
            G1 = grouputils.group_selection_mask(self.data, grp,
                                                 split_selection)
            G2 = ~G1
            indices = [np.flatnonzero(G1), np.flatnonzero(G2)]
        elif test_type == self.VarSampleTest:
            indices = [
                grouputils.group_selection_mask(self.data, grp, [i])
                for i in range(len(grp.values))
            ]
            indices = [np.flatnonzero(ind) for ind in indices]
        else:
            assert False

        if not all(np.count_nonzero(ind) > 0 for ind in indices):
            self.error(
                0, "Target labels most exclude/include at least one "
                "value.")
            self.scores = None
            self.nulldist = None
            self.update_data_info_label()
            return

        X = self.data.X
        if axis == 1:
            X = X.T

        # TODO: Check that each label has more than one measurement,
        # raise warning otherwise.

        def compute_scores_with_perm(X,
                                     indices,
                                     nperm=0,
                                     rstate=None,
                                     progress_advance=None):
            warning = None
            scores = compute_scores(X, indices, warn=True)
            if isinstance(scores, tuple):
                scores, warning = scores

            if progress_advance is not None:
                progress_advance()
            null_scores = []
            if nperm > 0:
                if rstate is None:
                    rstate = np.random.RandomState(0)

                for i in range(nperm):
                    p_indices = permute_indices(indices, rstate)
                    assert all(pind.shape == ind.shape
                               for pind, ind in zip(indices, p_indices))
                    pscore = compute_scores(X, p_indices)
                    assert pscore.shape == scores.shape
                    null_scores.append(pscore)
                    if progress_advance is not None:
                        progress_advance()

            return scores, null_scores, warning

        p_advance = concurrent.methodinvoke(self, "progressBarAdvance",
                                            (float, ))
        state = namespace(cancelled=False, advance=p_advance)

        def progress():
            if state.cancelled:
                raise concurrent.CancelledError
            else:
                state.advance(100 / (nperm + 1))

        self.progressBarInit()
        set_scores = concurrent.methodinvoke(self, "__set_score_results",
                                             (concurrent.Future, ))

        nperm = self.permutations_count if self.compute_null else 0
        self.__scores_state = state
        self.__scores_future = self._executor.submit(compute_scores_with_perm,
                                                     X,
                                                     indices,
                                                     nperm,
                                                     progress_advance=progress)
        self.__scores_future.add_done_callback(set_scores)
Пример #4
0
    def update_scores(self):
        """Compute the scores and update the histogram.
        """
        self.__cancel_pending()
        self.clear_plot()
        self.scores = None
        self.nulldist = None
        self.error(0)

        grp, split_selection = self.selected_split()

        if not self.data or grp is None:
            return

        _, side, test_type, score_func = self.Scores[self.score_index]

        def compute_scores(X, group_indices, warn=False):
            arrays = [X[ind] for ind in group_indices]
            ss = score_func(*arrays, axis=0)
            return ss[0] if isinstance(ss, tuple) and not warn else ss

        def permute_indices(group_indices, random_state=None):
            assert all(ind.dtype.kind == "i" for ind in group_indices)
            assert all(ind.ndim == 1 for ind in group_indices)
            if random_state is None:
                random_state = np.random
            joined = np.hstack(group_indices)
            random_state.shuffle(joined)
            split_ind = np.cumsum([len(ind) for ind in group_indices])
            return np.split(joined, split_ind[:-1])

        if isinstance(grp, grouputils.RowGroup):
            axis = 0
        else:
            axis = 1

        if test_type == OWFeatureSelection.TwoSampleTest:
            G1 = grouputils.group_selection_mask(
                self.data, grp, split_selection)
            G2 = ~G1
            indices = [np.flatnonzero(G1), np.flatnonzero(G2)]
        elif test_type == self.VarSampleTest:
            indices = [grouputils.group_selection_mask(self.data, grp, [i])
                       for i in range(len(grp.values))]
            indices = [np.flatnonzero(ind) for ind in indices]
        else:
            assert False

        if not all(ind.size > 0 for ind in indices):
            self.error(0, "Target labels most exclude/include at least one "
                          "value.")
            self.scores = None
            self.nulldist = None
            self.update_data_info_label()
            return

        X = self.data.X
        if axis == 1:
            X = X.T

        # TODO: Check that each label has more than one measurement,
        # raise warning otherwise.

        def compute_scores_with_perm(X, indices, nperm=0, rstate=None,
                                     progress_advance=None):
            warning = None
            scores = compute_scores(X, indices, warn=True)
            if isinstance(scores, tuple):
                scores, warning = scores

            if progress_advance is not None:
                progress_advance()
            null_scores = []
            if nperm > 0:
                if rstate is None:
                    rstate = np.random.RandomState(0)

                for i in range(nperm):
                    p_indices = permute_indices(indices, rstate)
                    assert all(pind.shape == ind.shape
                               for pind, ind in zip(indices, p_indices))
                    pscore = compute_scores(X, p_indices)
                    assert pscore.shape == scores.shape
                    null_scores.append(pscore)
                    if progress_advance is not None:
                        progress_advance()

            return scores, null_scores, warning

        p_advance = concurrent.methodinvoke(
            self, "progressBarAdvance", (float,))
        state = namespace(cancelled=False, advance=p_advance)

        def progress():
            if state.cancelled:
                raise concurrent.CancelledError
            else:
                state.advance(100 / (nperm + 1))

        self.progressBarInit()
        set_scores = concurrent.methodinvoke(
            self, "__set_score_results", (concurrent.Future,))

        nperm = self.permutations_count if self.compute_null else 0
        self.__scores_state = state
        self.__scores_future = self._executor.submit(
                compute_scores_with_perm, X, indices, nperm,
                progress_advance=progress)
        self.__scores_future.add_done_callback(set_scores)