def ranges_to_weight_table(ranges): """ Create a table of weights from ranges. Include only edge points of ranges. Include each edge point twice: once as values within the range and zero value outside the range (with this output the weights can easily be interpolated). Weights of overlapping intervals are summed. Assumes 64-bit floats. :param ranges: list of triples (edge1, edge2, weight) :return: an Orange.data.Table """ values = {} inf = float("inf") minf = float("-inf") def dict_to_numpy(d): x = [] y = [] for a, b in d.items(): x.append(a) y.append(b) return np.array(x), np.array([y]) for l, r, w in ranges: l, r = min(l, r), max(l, r) positions = [nextafter(l, minf), l, r, nextafter(r, inf)] weights = [0., float(w), float(w), 0.] all_positions = list(set(positions) | set(values)) # new and old positions # current values on all position x, y = dict_to_numpy(values) current = interp1d_with_unknowns_numpy(x, y, all_positions)[0] current[np.isnan(current)] = 0 # new values on all positions new = interp1d_with_unknowns_numpy(np.array(positions), np.array([weights]), all_positions)[0] new[np.isnan(new)] = 0 # update values for p, f in zip(all_positions, current + new): values[p] = f x, y = dict_to_numpy(values) dom = Orange.data.Domain( [Orange.data.ContinuousVariable(name=str(float(a))) for a in x]) data = Orange.data.Table.from_numpy(dom, y) return data
def transformed(self, X, wavenumbers): # about 85% of time in __call__ function is spent is lstsq # compute average spectrum from the reference ref_X = np.atleast_2d(spectra_mean(self.reference.X)) # interpolate reference to the data ref_X = interp1d_with_unknowns_numpy(getx(self.reference), ref_X, wavenumbers) # we know that X is not NaN. same handling of reference as of X ref_X, _ = nan_extend_edges_and_interpolate(wavenumbers, ref_X) if self.weights: # interpolate reference to the data wei_X = interp1d_with_unknowns_numpy(getx(self.weights), self.weights.X, wavenumbers) # set whichever weights are undefined (usually at edges) to zero wei_X[np.isnan(wei_X)] = 0 else: wei_X = np.ones((1, len(wavenumbers))) N = wavenumbers.shape[0] m0 = -2.0 / (wavenumbers[0] - wavenumbers[N - 1]) c_coeff = 0.5 * (wavenumbers[0] + wavenumbers[N - 1]) M = [] for x in range(0, self.order + 1): M.append((m0 * (wavenumbers - c_coeff))**x) M.append(ref_X) # always add reference spectrum to the model n_add_model = len(M) M = np.vstack( M ).T # M is needed below for the correction, for par estimation M_weigheted is used M_weighted = M * wei_X.T newspectra = np.zeros((X.shape[0], X.shape[1] + n_add_model)) for i, rawspectrum in enumerate(X): rawspectrumW = (rawspectrum * wei_X)[0] m = np.linalg.lstsq(M_weighted, rawspectrum)[0] corrected = rawspectrum for x in range(0, self.order + 1): corrected = (corrected - (m[x] * M[:, x])) if self.scaling: corrected = corrected / m[self.order + 1] corrected[np.isinf( corrected )] = np.nan # fix values which can be caused by zero weights corrected = np.hstack( (corrected, m)) # append the model parameters newspectra[i] = corrected return newspectra
def ranges_to_weight_table(ranges): """ Create a table of weights from ranges. Include only edge points of ranges. Include each edge point twice: once as values within the range and zero value outside the range (with this output the weights can easily be interpolated). Weights of overlapping intervals are summed. Assumes 64-bit floats. :param ranges: list of triples (edge1, edge2, weight) :return: an Orange.data.Table """ values = {} inf = float("inf") minf = float("-inf") def dict_to_numpy(d): x = [] y = [] for a, b in d.items(): x.append(a) y.append(b) return np.array(x), np.array([y]) for l, r, w in ranges: l, r = min(l, r), max(l, r) positions = [nextafter(l, minf), l, r, nextafter(r, inf)] weights = [0., float(w), float(w), 0.] all_positions = list(set(positions) | set(values)) # new and old positions # current values on all position x, y = dict_to_numpy(values) current = interp1d_with_unknowns_numpy(x, y, all_positions)[0] current[np.isnan(current)] = 0 # new values on all positions new = interp1d_with_unknowns_numpy(np.array(positions), np.array([weights]), all_positions)[0] new[np.isnan(new)] = 0 # update values for p, f in zip(all_positions, current + new): values[p] = f x, y = dict_to_numpy(values) dom = Orange.data.Domain([Orange.data.ContinuousVariable(name=str(float(a))) for a in x]) data = Orange.data.Table.from_numpy(dom, y) return data
def transformed(self, data): if data.X.shape[0] == 0: return data.X data = data.copy() if self.method == Normalize.Vector: nans = np.isnan(data.X) nan_num = nans.sum(axis=1, keepdims=True) ys = data.X if np.any(nan_num > 0): # interpolate nan elements for normalization x = getx(data) ys = interp1d_with_unknowns_numpy(x, ys, x) ys = np.nan_to_num(ys) # edge elements can still be zero data.X = sknormalize(ys, norm='l2', axis=1, copy=False) if np.any(nan_num > 0): # keep nans where they were data.X[nans] = float("nan") elif self.method == Normalize.Area: norm_data = Integrate(methods=self.int_method, limits=[[self.lower, self.upper]])(data) data.X /= norm_data.X replace_infs(data.X) elif self.method == Normalize.Attribute: if self.attr in data.domain and isinstance( data.domain[self.attr], Orange.data.ContinuousVariable): ndom = Orange.data.Domain([data.domain[self.attr]]) factors = data.transform(ndom) data.X /= factors.X replace_infs(data.X) nd = data.domain[self.attr] else: # invalid attribute for normalization data.X *= float("nan") return data.X
def transformed(self, data): if data.X.shape[0] == 0: return data.X data = data.copy() if self.method == Normalize.Vector: nans = np.isnan(data.X) nan_num = nans.sum(axis=1, keepdims=True) ys = data.X if np.any(nan_num > 0): # interpolate nan elements for normalization x = getx(data) ys = interp1d_with_unknowns_numpy(x, ys, x) ys = np.nan_to_num(ys) # edge elements can still be zero data.X = sknormalize(ys, norm='l2', axis=1, copy=False) if np.any(nan_num > 0): # keep nans where they were data.X[nans] = float("nan") elif self.method == Normalize.Area: norm_data = Integrate(methods=self.int_method, limits=[[self.lower, self.upper]])(data) data.X /= norm_data.X elif self.method == Normalize.Attribute: if self.attr in data.domain and isinstance(data.domain[self.attr], Orange.data.ContinuousVariable): ndom = Orange.data.Domain([data.domain[self.attr]]) factors = data.transform(ndom) data.X /= factors.X nd = data.domain[self.attr] else: # invalid attribute for normalization data.X *= float("nan") return data.X
def interpolate_to_data(other_xs, other_data): # all input data needs to be interpolated (and NaNs removed) interpolated = interp1d_with_unknowns_numpy( other_xs, other_data, wavenumbers) # we know that X is not NaN. same handling of reference as of X interpolated, _ = nan_extend_edges_and_interpolate( wavenumbers, interpolated) return interpolated
def transformed(self, X, wavenumbers): # wavenumber have to be input as sorted # about 85% of time in __call__ function is spent is lstsq # compute average spectrum from the reference ref_X = np.atleast_2d(spectra_mean(self.reference.X)) # interpolate reference to the data ref_X = interp1d_with_unknowns_numpy(getx(self.reference), ref_X, wavenumbers) # we know that X is not NaN. same handling of reference as of X ref_X, _ = nan_extend_edges_and_interpolate(wavenumbers, ref_X) if self.weights: # interpolate reference to the data wei_X = interp1d_with_unknowns_numpy(getx(self.weights), self.weights.X, wavenumbers) # set whichever weights are undefined (usually at edges) to zero wei_X[np.isnan(wei_X)] = 0 else: wei_X =np.ones((1,len(wavenumbers))) N = wavenumbers.shape[0] m0 = - 2.0 / (wavenumbers[0] - wavenumbers[N - 1]) c_coeff = 0.5 * (wavenumbers[0] + wavenumbers[N - 1]) M = [] for x in range(0, self.order+1): M.append((m0 * (wavenumbers - c_coeff)) ** x) M.append(ref_X) # always add reference spectrum to the model n_add_model = len(M) M = np.vstack(M).T # M is needed below for the correction, for par estimation M_weigheted is used M_weighted=M*wei_X.T newspectra = np.zeros((X.shape[0], X.shape[1] + n_add_model)) for i, rawspectrum in enumerate(X): rawspectrumW=(rawspectrum*wei_X)[0] m = np.linalg.lstsq(M_weighted, rawspectrum)[0] corrected = rawspectrum for x in range(0, self.order+1): corrected = (corrected - (m[x] * M[:, x])) if self.scaling: corrected = corrected/m[self.order+1] corrected[np.isinf(corrected)] = np.nan # fix values which can be caused by zero weights corrected = np.hstack((corrected, m)) # append the model parameters newspectra[i] = corrected return newspectra
def interpolate_extend_to(self, interpolate, wavenumbers): """ Interpolate data to given wavenumbers and extend the possibly nan-edges with the nearest values. """ # interpolate reference to the given wavenumbers X = interp1d_with_unknowns_numpy(getx(interpolate), interpolate.X, wavenumbers) # we know that X is not NaN. same handling of reference as of X X, _ = nan_extend_edges_and_interpolate(wavenumbers, X) return X
def weighted_wavenumbers(weights, wavenumbers): """ Return weights for the given wavenumbers. If weights are a data table, the weights are interpolated. If they are a npfunc.Function, the function is computed on the given wavenumbers. """ if isinstance(weights, Function): return weights(wavenumbers).reshape(1, -1) elif weights: # interpolate reference to the data w = interp1d_with_unknowns_numpy(getx(weights), weights.X, wavenumbers) # set whichever weights are undefined (usually at edges) to zero w[np.isnan(w)] = 0 return w else: w = np.ones((1, len(wavenumbers))) return w
def interpolate_to_data(other_xs, other_data): # all input data needs to be interpolated (and NaNs removed) interpolated = interp1d_with_unknowns_numpy(other_xs, other_data, wavenumbers) # we know that X is not NaN. same handling of reference as of X interpolated, _ = nan_extend_edges_and_interpolate(wavenumbers, interpolated) return interpolated