def plot(self): self.graph.clearPlot() self.validindices = numpy.empty((0,), dtype=int) self.current_selection = [] group, target_indices = self.selected_split() self.warning([0, 1]) self.error(1) if self.data and group is not None and target_indices: X = self.data.X I1 = grouputils.group_selection_mask( self.data, group, target_indices) I2 = ~I1 if isinstance(group, grouputils.RowGroup): X = X.T N1, N2 = numpy.count_nonzero(I1), numpy.count_nonzero(I2) if not N1 or not N2: self.error( 1, "Target labels most exclude/include at least one value." ) if N1 < 2 and N2 < 2: self.warning( 0, "Insufficient data to compute statistics. " "More than one measurement per class should be provided" ) X1, X2 = X[:, I1], X[:, I2] if numpy.any(X1 < 0.0) or numpy.any(X2 < 0): self.error( "Negative values in the input. The inputs cannot be in " "ratio scale." ) X1 = numpy.full_like(X1, numpy.nan) X2 = numpy.full_like(X2, numpy.nan) with numpy.errstate(divide="ignore", invalid="ignore"): fold = numpy.log2(numpy.mean(X1, axis=1) / numpy.mean(X2, axis=1)) # TODO: handle missing values better (mstats) _, P = scipy.stats.ttest_ind(X1, X2, axis=1, equal_var=True) logP = numpy.log10(P) if numpy.isscalar(logP): # ttest_ind does not preserve output shape if either # a or b is empty logP = numpy.full(fold.shape, numpy.nan) mask = numpy.isfinite(fold) & numpy.isfinite(logP) self.validindices = numpy.flatnonzero(mask) self.graph.setPlotData(numpy.array([fold[mask], -logP[mask]]).T) self.infoLabel.setText("%i genes on input" % len(fold)) # ("{displayed} displayed, {undef} with undefined ratio " # "or t-statistics.") if not len(numpy.flatnonzero(mask)): self.warning(1, "Could not compute statistics for any genes!")
def plot(self): self.graph.clearPlot() self.validindices = numpy.empty((0, ), dtype=int) self.current_selection = [] group, target_indices = self.selected_split() self.warning([0, 1]) self.error(1) if self.data and group is not None and target_indices: X = self.data.X I1 = grouputils.group_selection_mask(self.data, group, target_indices) I2 = ~I1 if isinstance(group, grouputils.RowGroup): X = X.T N1, N2 = numpy.count_nonzero(I1), numpy.count_nonzero(I2) if not N1 or not N2: self.error( 1, "Target labels most exclude/include at least one value.") if N1 < 2 and N2 < 2: self.warning( 0, "Insufficient data to compute statistics. " "More than one measurement per class should be provided") X1, X2 = X[:, I1], X[:, I2] if numpy.any(X1 < 0.0) or numpy.any(X2 < 0): self.error( "Negative values in the input. The inputs cannot be in " "ratio scale.") X1 = numpy.full_like(X1, numpy.nan) X2 = numpy.full_like(X2, numpy.nan) with numpy.errstate(divide="ignore", invalid="ignore"): fold = numpy.log2( numpy.mean(X1, axis=1) / numpy.mean(X2, axis=1)) # TODO: handle missing values better (mstats) _, P = scipy.stats.ttest_ind(X1, X2, axis=1, equal_var=True) logP = numpy.log10(P) if numpy.isscalar(logP): # ttest_ind does not preserve output shape if either # a or b is empty logP = numpy.full(fold.shape, numpy.nan) mask = numpy.isfinite(fold) & numpy.isfinite(logP) self.validindices = numpy.flatnonzero(mask) self.graph.setPlotData(numpy.array([fold[mask], -logP[mask]]).T) self.infoLabel.setText("%i genes on input" % len(fold)) # ("{displayed} displayed, {undef} with undefined ratio " # "or t-statistics.") if not len(numpy.flatnonzero(mask)): self.warning(1, "Could not compute statistics for any genes!")
def update_scores(self): """Compute the scores and update the histogram. """ self.__cancel_pending() self.clear_plot() self.scores = None self.nulldist = None self.error(0) grp, split_selection = self.selected_split() if not self.data or grp is None: return _, side, test_type, score_func = self.Scores[self.score_index] def compute_scores(X, group_indices, warn=False): arrays = [X[ind] for ind in group_indices] ss = score_func(*arrays, axis=0) return ss[0] if isinstance(ss, tuple) and not warn else ss def permute_indices(group_indices, random_state=None): assert all(ind.dtype.kind == "i" for ind in group_indices) assert all(ind.ndim == 1 for ind in group_indices) if random_state is None: random_state = np.random joined = np.hstack(group_indices) random_state.shuffle(joined) split_ind = np.cumsum([len(ind) for ind in group_indices]) return np.split(joined, split_ind[:-1]) if isinstance(grp, grouputils.RowGroup): axis = 0 else: axis = 1 if test_type == OWFeatureSelection.TwoSampleTest: G1 = grouputils.group_selection_mask(self.data, grp, split_selection) G2 = ~G1 indices = [np.flatnonzero(G1), np.flatnonzero(G2)] elif test_type == self.VarSampleTest: indices = [ grouputils.group_selection_mask(self.data, grp, [i]) for i in range(len(grp.values)) ] indices = [np.flatnonzero(ind) for ind in indices] else: assert False if not all(np.count_nonzero(ind) > 0 for ind in indices): self.error( 0, "Target labels most exclude/include at least one " "value.") self.scores = None self.nulldist = None self.update_data_info_label() return X = self.data.X if axis == 1: X = X.T # TODO: Check that each label has more than one measurement, # raise warning otherwise. def compute_scores_with_perm(X, indices, nperm=0, rstate=None, progress_advance=None): warning = None scores = compute_scores(X, indices, warn=True) if isinstance(scores, tuple): scores, warning = scores if progress_advance is not None: progress_advance() null_scores = [] if nperm > 0: if rstate is None: rstate = np.random.RandomState(0) for i in range(nperm): p_indices = permute_indices(indices, rstate) assert all(pind.shape == ind.shape for pind, ind in zip(indices, p_indices)) pscore = compute_scores(X, p_indices) assert pscore.shape == scores.shape null_scores.append(pscore) if progress_advance is not None: progress_advance() return scores, null_scores, warning p_advance = concurrent.methodinvoke(self, "progressBarAdvance", (float, )) state = namespace(cancelled=False, advance=p_advance) def progress(): if state.cancelled: raise concurrent.CancelledError else: state.advance(100 / (nperm + 1)) self.progressBarInit() set_scores = concurrent.methodinvoke(self, "__set_score_results", (concurrent.Future, )) nperm = self.permutations_count if self.compute_null else 0 self.__scores_state = state self.__scores_future = self._executor.submit(compute_scores_with_perm, X, indices, nperm, progress_advance=progress) self.__scores_future.add_done_callback(set_scores)
def update_scores(self): """Compute the scores and update the histogram. """ self.__cancel_pending() self.clear_plot() self.scores = None self.nulldist = None self.error(0) grp, split_selection = self.selected_split() if not self.data or grp is None: return _, side, test_type, score_func = self.Scores[self.score_index] def compute_scores(X, group_indices, warn=False): arrays = [X[ind] for ind in group_indices] ss = score_func(*arrays, axis=0) return ss[0] if isinstance(ss, tuple) and not warn else ss def permute_indices(group_indices, random_state=None): assert all(ind.dtype.kind == "i" for ind in group_indices) assert all(ind.ndim == 1 for ind in group_indices) if random_state is None: random_state = np.random joined = np.hstack(group_indices) random_state.shuffle(joined) split_ind = np.cumsum([len(ind) for ind in group_indices]) return np.split(joined, split_ind[:-1]) if isinstance(grp, grouputils.RowGroup): axis = 0 else: axis = 1 if test_type == OWFeatureSelection.TwoSampleTest: G1 = grouputils.group_selection_mask( self.data, grp, split_selection) G2 = ~G1 indices = [np.flatnonzero(G1), np.flatnonzero(G2)] elif test_type == self.VarSampleTest: indices = [grouputils.group_selection_mask(self.data, grp, [i]) for i in range(len(grp.values))] indices = [np.flatnonzero(ind) for ind in indices] else: assert False if not all(ind.size > 0 for ind in indices): self.error(0, "Target labels most exclude/include at least one " "value.") self.scores = None self.nulldist = None self.update_data_info_label() return X = self.data.X if axis == 1: X = X.T # TODO: Check that each label has more than one measurement, # raise warning otherwise. def compute_scores_with_perm(X, indices, nperm=0, rstate=None, progress_advance=None): warning = None scores = compute_scores(X, indices, warn=True) if isinstance(scores, tuple): scores, warning = scores if progress_advance is not None: progress_advance() null_scores = [] if nperm > 0: if rstate is None: rstate = np.random.RandomState(0) for i in range(nperm): p_indices = permute_indices(indices, rstate) assert all(pind.shape == ind.shape for pind, ind in zip(indices, p_indices)) pscore = compute_scores(X, p_indices) assert pscore.shape == scores.shape null_scores.append(pscore) if progress_advance is not None: progress_advance() return scores, null_scores, warning p_advance = concurrent.methodinvoke( self, "progressBarAdvance", (float,)) state = namespace(cancelled=False, advance=p_advance) def progress(): if state.cancelled: raise concurrent.CancelledError else: state.advance(100 / (nperm + 1)) self.progressBarInit() set_scores = concurrent.methodinvoke( self, "__set_score_results", (concurrent.Future,)) nperm = self.permutations_count if self.compute_null else 0 self.__scores_state = state self.__scores_future = self._executor.submit( compute_scores_with_perm, X, indices, nperm, progress_advance=progress) self.__scores_future.add_done_callback(set_scores)