def transformed(self, data): if data.X.shape[0] == 0: return data.X data = data.copy() if self.method == Normalize.Vector: nans = np.isnan(data.X) nan_num = nans.sum(axis=1, keepdims=True) ys = data.X if np.any(nan_num > 0): # interpolate nan elements for normalization x = getx(data) ys = interp1d_with_unknowns_numpy(x, ys, x) ys = np.nan_to_num(ys) # edge elements can still be zero data.X = sknormalize(ys, norm='l2', axis=1, copy=False) if np.any(nan_num > 0): # keep nans where they were data.X[nans] = float("nan") elif self.method == Normalize.Area: norm_data = Integrate(methods=self.int_method, limits=[[self.lower, self.upper]])(data) data.X /= norm_data.X replace_infs(data.X) elif self.method == Normalize.Attribute: if self.attr in data.domain and isinstance( data.domain[self.attr], Orange.data.ContinuousVariable): ndom = Orange.data.Domain([data.domain[self.attr]]) factors = data.transform(ndom) data.X /= factors.X replace_infs(data.X) nd = data.domain[self.attr] else: # invalid attribute for normalization data.X *= float("nan") return data.X
def __call__(self, data): """ Apply randomization of the given data. Returns a new data table. Parameters ---------- data : Orange.data.Table A data table to be randomized. Returns ------- data : Orange.data.Table Randomized data table. """ new_data = data.copy() rstate = np.random.RandomState(self.rand_seed) # ensure the same seed is not used to shuffle X and Y at the same time r1, r2, r3 = rstate.randint(0, 2 ** 32 - 1, size=3, dtype=np.int64) if self.rand_type & Randomize.RandomizeClasses: new_data.Y = self.randomize(new_data.Y, r1) if self.rand_type & Randomize.RandomizeAttributes: new_data.X = self.randomize(new_data.X, r2) if self.rand_type & Randomize.RandomizeMetas: new_data.metas = self.randomize(new_data.metas, r3) return new_data
def __call__(self, data): """ Apply randomization of the given data. Returns a new data table. Parameters ---------- data : Orange.data.Table A data table to be randomized. Returns ------- data : Orange.data.Table Randomized data table. """ new_data = data.copy() rstate = np.random.RandomState(self.rand_seed) # ensure the same seed is not used to shuffle X and Y at the same time r1, r2, r3 = rstate.randint(0, 2**32 - 1, size=3, dtype=np.int64) if self.rand_type & Randomize.RandomizeClasses: new_data.Y = self.randomize(new_data.Y, r1) if self.rand_type & Randomize.RandomizeAttributes: new_data.X = self.randomize(new_data.X, r2) if self.rand_type & Randomize.RandomizeMetas: new_data.metas = self.randomize(new_data.metas, r3) return new_data
def transformed(self, data): if data.X.shape[0] == 0: return data.X data = data.copy() if self.method == Normalize.Vector: nans = np.isnan(data.X) nan_num = nans.sum(axis=1, keepdims=True) ys = data.X if np.any(nan_num > 0): # interpolate nan elements for normalization x = getx(data) ys = interp1d_with_unknowns_numpy(x, ys, x) ys = np.nan_to_num(ys) # edge elements can still be zero data.X = sknormalize(ys, norm='l2', axis=1, copy=False) if np.any(nan_num > 0): # keep nans where they were data.X[nans] = float("nan") elif self.method == Normalize.Area: norm_data = Integrate(methods=self.int_method, limits=[[self.lower, self.upper]])(data) data.X /= norm_data.X elif self.method == Normalize.Attribute: if self.attr in data.domain and isinstance(data.domain[self.attr], Orange.data.ContinuousVariable): ndom = Orange.data.Domain([data.domain[self.attr]]) factors = data.transform(ndom) data.X /= factors.X nd = data.domain[self.attr] else: # invalid attribute for normalization data.X *= float("nan") return data.X
def __call__(self, data): if data.domain != self.domain: data = data.from_table(self.domain, data) if data.X.shape[0] == 0: return data.X data = data.copy() if self.method == Normalize.Vector: nans = np.isnan(data.X) nan_num = nans.sum(axis=1, keepdims=True) ys = data.X if np.any(nan_num > 0): # interpolate nan elements for normalization x = getx(data) ys = interp1d_with_unknowns_numpy(x, ys, x) ys = np.nan_to_num(ys) # edge elements can still be zero data.X = sknormalize(ys, norm='l2', axis=1, copy=False) if np.any(nan_num > 0): # keep nans where they were data.X[nans] = float("nan") elif self.method == Normalize.Area: norm_data = Integrate(method=self.int_method, limits=[[self.lower, self.upper]])(data) data.X /= norm_data.X elif self.method == Normalize.Attribute: # attr normalization applies to entire spectrum, regardless of limits # meta indices are -ve and start at -1 if self.attr not in (None, "None", ""): attr_index = -1 - data.domain.index(self.attr) factors = data.metas[:, attr_index].astype(float) data.X /= factors[:, None] return data.X
def commit(self): self._committimer.stop() data = self.data if data is not None and self._is_filter_enabled(): if self.filter_type() in [Cells, Genes]: state = self._state assert state is not None counts = state.x cmax = self.limit_upper cmin = self.limit_lower mask = np.ones(counts.shape, dtype=bool) if self.limit_lower_enabled: mask &= cmin <= counts if self.limit_upper_enabled: mask &= counts <= cmax if self.filter_type() == Cells: assert counts.size == len(data) data = data[mask] else: assert counts.size == len(data.domain.attributes) atts = [v for v, m in zip(data.domain.attributes, mask) if m] data = data.from_table( Orange.data.Domain( atts, data.domain.class_vars, data.domain.metas ), data ) if len(data) == 0 or \ len(data.domain) + len(data.domain.metas) == 0: data = None elif self.filter_type() == Data: dmin, dmax = self.limit_lower, self.limit_upper data = data.copy() assert data.X.base is None mask = None if self.limit_lower_enabled: mask = data.X < dmin if self.limit_upper_enabled: if mask is not None: mask |= data.X > dmax else: mask = data.X < dmax data.X[mask] = 0.0 else: assert False self.Outputs.data.send(data)
def __call__(self, data): if data.domain != self.domain: data = data.from_table(self.domain, data) x = getx(data) data = data.copy() if self.limits == 1: x_sorter = np.argsort(x) lim_min = np.searchsorted(x, self.lower, sorter=x_sorter, side="left") lim_max = np.searchsorted(x, self.upper, sorter=x_sorter, side="right") limits = [lim_min, lim_max] y_s = data.X[:, x_sorter][:, limits[0]:limits[1]] else: y_s = data.X if self.method == Normalize.MinMax: data.X /= nanmax(np.abs(y_s), axis=1).reshape((-1, 1)) elif self.method == Normalize.Vector: # zero offset correction applies to entire spectrum, regardless of limits y_offsets = nanmean(data.X, axis=1).reshape((-1, 1)) data.X -= y_offsets y_s -= y_offsets rssq = np.sqrt(nansum(y_s**2, axis=1).reshape((-1, 1))) data.X /= rssq elif self.method == Normalize.Offset: data.X -= nanmin(y_s, axis=1).reshape((-1, 1)) elif self.method == Normalize.Attribute: # attr normalization applies to entire spectrum, regardless of limits # meta indices are -ve and start at -1 if self.attr not in (None, "None", ""): attr_index = -1 - data.domain.index(self.attr) factors = data.metas[:, attr_index].astype(float) data.X /= factors[:, None] return data.X
def __call__(self, data): """ Apply randomization of the given data. Returns a new data table. Parameters ---------- data : Orange.data.Table A data table to be randomized. Returns ------- data : Orange.data.Table Randomized data table. """ new_data = data.copy() if self.rand_type & Randomize.RandomizeClasses: new_data.Y = self.randomize(new_data.Y) if self.rand_type & Randomize.RandomizeAttributes: new_data.X = self.randomize(new_data.X) if self.rand_type & Randomize.RandomizeMetas: new_data.metas = self.randomize(new_data.metas) return new_data