def test_memory_leak() -> None: import resource arr = np.arange(1).reshape((1, 1)) n_attempts = 3 results = [] for _ in range(n_attempts): starting = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss for _ in range(1000): for axis in [None, 0, 1]: bn.nansum(arr, axis=axis) bn.nanargmax(arr, axis=axis) bn.nanargmin(arr, axis=axis) bn.nanmedian(arr, axis=axis) bn.nansum(arr, axis=axis) bn.nanmean(arr, axis=axis) bn.nanmin(arr, axis=axis) bn.nanmax(arr, axis=axis) bn.nanvar(arr, axis=axis) ending = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss diff = ending - starting diff_bytes = diff * resource.getpagesize() # For 1.3.0 release, this had value of ~100kB if diff_bytes: results.append(diff_bytes) else: break assert len(results) < n_attempts
def test_memory_leak(): import resource arr = np.arange(1).reshape((1, 1)) starting = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss for i in range(1000): for axis in [None, 0, 1]: bn.nansum(arr, axis=axis) bn.nanargmax(arr, axis=axis) bn.nanargmin(arr, axis=axis) bn.nanmedian(arr, axis=axis) bn.nansum(arr, axis=axis) bn.nanmean(arr, axis=axis) bn.nanmin(arr, axis=axis) bn.nanmax(arr, axis=axis) bn.nanvar(arr, axis=axis) ending = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss diff = ending - starting diff_bytes = diff * resource.getpagesize() print(diff_bytes) # For 1.3.0 release, this had value of ~100kB assert diff_bytes == 0
def fit(self, X, y): X_y = self._check_params(X, y) self.X = X_y[0] self.y = X_y[1].reshape((-1, 1)) n, p = X.shape S = [] # list of selected features F = range(p) # list of unselected features if self.n_features != 'auto': feature_mi_matrix = np.zeros((self.n_features, p)) else: feature_mi_matrix = np.zeros((n, p)) feature_mi_matrix[:] = np.nan S_mi = [] # Find the first feature k_min = 3 range_k = 7 xy_MI = np.empty((range_k, p)) for i in range(range_k): xy_MI[i, :] = self._get_first_mi_vector(i + k_min) xy_MI = bn.nanmedian(xy_MI, axis=0) S, F = self._add_remove(S, F, bn.nanargmax(xy_MI)) S_mi.append(bn.nanmax(xy_MI)) if self.verbose > 0: self._info_print(S, S_mi) # Find the next features if self.n_features == 'auto': n_features = np.inf else: n_features = self.n_features while len(S) < n_features: s = len(S) - 1 feature_mi_matrix[s, F] = self._get_mi_vector(F, S[-1]) fmm = feature_mi_matrix[:len(S), F] if bn.allnan(bn.nanmean(fmm, axis=0)): break MRMR = xy_MI[F] - bn.nanmean(fmm, axis=0) if np.isnan(MRMR).all(): break selected = F[bn.nanargmax(MRMR)] S_mi.append(bn.nanmax(bn.nanmin(fmm, axis=0))) S, F = self._add_remove(S, F, selected) if self.verbose > 0: self._info_print(S, S_mi) if self.n_features == 'auto' and len(S) > 10: MI_dd = signal.savgol_filter(S_mi[1:], 9, 2, 1) if np.abs(np.mean(MI_dd[-5:])) < 1e-3: break self.n_features_ = len(S) self.ranking_ = S self.mi_ = S_mi return self
def get_first_maximum_index(self, wfm, peak_minimum_power): """ Return the index of the first peak (first maximum) on the leading edge before the absolute power maximum. The first peak is only valid if its power exceeds a certain threshold """ # Get the main maximum first absolute_maximum_index = bn.nanargmax(wfm) # Find relative maxima before the absolute maximum try: peaks = cytfmra_findpeaks(wfm[0:absolute_maximum_index]) except: return -1 # Check if relative maximum are above the required threshold leading_maxima = np.where(wfm[peaks] >= peak_minimum_power)[0] # Identify the first maximum first_maximum_index = absolute_maximum_index if len(leading_maxima) > 0: # first_maximum_index = leading_maxima[0] first_maximum_index = peaks[leading_maxima[0]] return first_maximum_index
def compute_draw_info(self, x, ys): bs = self.compute_baseline(x, ys) im = bottleneck.nanargmax(ys-bs, axis=1) lines = (x[im], bs[np.arange(bs.shape[0]), im]), (x[im], ys[np.arange(ys.shape[0]), im]) return [("curve", (x, self.compute_baseline(x, ys), INTEGRATE_DRAW_BASELINE_PENARGS)), ("curve", (x, ys, INTEGRATE_DRAW_BASELINE_PENARGS)), ("line", lines)]
def _most_likely_cp(xs, minnobs): N = len(xs) check_nobs(N, minnobs) start, end = compute_endpoints(N, minnobs) wstats = np.array([welch(xs[:i], xs[i:]) for i in xrange(start, end)]) cp = bn.nanargmax(wstats) stat = wstats[cp] return cp + start, stat
def compute(self, today, assets, out, data): drawdowns = fmax.accumulate(data, axis=0) - data drawdowns[isnan(drawdowns)] = NINF drawdown_ends = nanargmax(drawdowns, axis=0) # TODO: Accelerate this loop in Cython or Numba. for i, end in enumerate(drawdown_ends): peak = nanmax(data[:end + 1, i]) out[i] = (peak - data[end, i]) / data[end, i]
def reducepoints(x, y, n=2000, further=True): """ Reduce the total number of x, y coordinates for plotting. The algorithm looks windows roughly one pixel wide and plots the minimum and maximum point within that window. NOTE: both the min and max for each n will be determined. This will yield a length of approximately 2n. If further, remove nonessential points. """ # Can only work on blocks if len(x) < n*3: return (x, y) # Calculate the block size to average over block = int(math.floor(float(len(x))/n)) newn = int(math.ceil(float(len(x))/block)) ox, oy = np.zeros(2*newn), np.zeros(2*newn) # Search over each block for the min and max y value, order # correctly, and add to the output for i in range(newn): # Avoid just adding NaN's for all NaN blocks try: pmn = nanargmin(y[i*block:(i + 1)*block]) except ValueError: pmn = 0 try: pmx = nanargmax(y[i*block:(i + 1)*block]) except ValueError: pmx = 0 if pmn < pmx: ox[2*i], oy[2*i] = x[i*block + pmn], y[i*block + pmn] ox[2*i + 1], oy[2*i + 1] = x[i*block + pmx], y[i*block + pmx] else: ox[2*i + 1], oy[2*i + 1] = x[i*block + pmn], y[i*block + pmn] ox[2*i], oy[2*i] = x[i*block + pmx], y[i*block + pmx] if further: last = -1 match = 0 # Search through all values and set >= triplets to 0 for i in range(len(ox)): if oy[i] != last: last = oy[i] match = 0 else: match += 1 if match > 1: ox[i - 1] = np.nan # Eliminate those positions where ox is nan if np.sum(np.isnan(ox)) > 0: oy = oy[np.isfinite(ox)] ox = ox[np.isfinite(ox)] return ox, oy
def least_square_method(dspt): npol = 6 com = np.array([bn.nanmean(dspt.lon), bn.nanmean(dspt.lat)]) timeseries = False ncc = dspt.lon.size dlon = [] dlat = [] for i in range(ncc): # haversine(p1,p2) dlon.append( haversine([dspt.lon[i], com[1]], com) * 1000 * np.sign(dspt.lon[i] - com[0])) dlat.append( haversine([com[0], dspt.lat[i]], com) * 1000 * np.sign(dspt.lat[i] - com[1])) dlon = np.array(dlon) dlat = np.array(dlat) if not timeseries: R = np.mat(np.vstack((np.ones((ncc)), dlon, dlat)).T) u0 = np.mat(dspt.u.values).T v0 = np.mat(dspt.v.values).T if (np.isnan(u0).sum() == 0) & (np.isnan(v0).sum() == 0) & (np.isnan(R).sum() == 0): A, _, _, _ = la.lstsq(R, u0) B, _, _, _ = la.lstsq(R, v0) else: A = np.nan * np.ones(ncc) B = np.nan * np.ones(ncc) points = np.vstack([dlon, dlat]) if (np.isfinite(dlon).sum() == npol) and (np.isfinite(dlat).sum() == npol): # careful with nans cov = np.cov(points) w, v = np.linalg.eig(cov) aspect = bn.nanmin(w) / bn.nanmax(w) if aspect < 0.99: ind = bn.nanargmax(w) angle = np.arctan(v[ind, 1] / v[ind, 0]) * 180 / np.pi if (angle < 0): angle += 360. else: angle = np.nan else: aspect = np.nan angle = np.nan dspt['ux'] = float(A[1]) dspt['uy'] = float(A[2]) dspt['vx'] = float(B[1]) dspt['vy'] = float(B[2]) dspt['aspect'] = aspect dspt['angle'] = angle return dspt
def _normalize_log_probs(probs): max_i = bn.nanargmax(probs) try: exp_probs = np.exp(probs[np.arange(probs.size) != max_i] \ - probs[max_i]) except FloatingPointError: exp_probs = np.exp( np.clip(probs[np.arange(probs.size) != max_i] - probs[max_i], log_EPSILON, 0)) probs_norm = probs - probs[max_i] - np.log1p(bn.nansum(exp_probs)) return np.exp(np.clip(probs_norm, log_EPSILON, 0))
def evaluate_rule(self, rule): # as an exception, when target class is not set, # the majority class is chosen to stand against # all others tc = rule.target_class dist = rule.curr_class_dist if tc is None: tc = bn.nanargmax(dist) target = dist[tc] p_dist = rule.prior_class_dist pa = p_dist[tc] / p_dist.sum() return (target + self.m * pa) / (dist.sum() + self.m)
def compute_integral(self, x_s, y_s): y_s = y_s - self.compute_baseline(x_s, y_s) if len(x_s) == 0: return np.zeros((y_s.shape[0],)) * np.nan # avoid whole nan rows whole_nan_rows = np.isnan(y_s).all(axis=1) y_s[whole_nan_rows] = 0 # select positions pos = x_s[bottleneck.nanargmax(y_s, axis=1)] # set unknown results pos[whole_nan_rows] = np.nan return pos
def rpredict(classifier, x, categories): """ Same as predict, but outputs names only. :param classifier: The trained classifier. :param x: List of x to predict on. :param categories: List of all model names. :return: List of predicted names. """ encoder = OnehotEncoder(categories) prediction = classifier.predict(fix_dims(x), verbose=1) index = [nanargmax(prob) for prob in prediction] label = encoder.label(index) return label.tolist()
def _normalize_log(probs): max_i = bn.nanargmax(probs, axis=0) try: log_probs_norm = probs - probs[max_i] - np.log1p( bn.nansum( np.exp(probs[np.arange(probs.size) != max_i] - probs[max_i]))) except FloatingPointError: if probs[0] > probs[1]: log_probs_norm = np.array([0, log_EPSILON]) else: log_probs_norm = np.array([log_EPSILON, 0]) return log_probs_norm
def confusion_matrix(classifier, x, y, categories): """ Runs a Keras classifier to create a confusion matrix. :param classifier: The trained classifier. :param x: A list of x values to predict on. :return: A confusion matrix, with proportions in [0, 1] """ encoder = OnehotEncoder(categories) index = encoder.index(y) prediction = classifier.predict(fix_dims(x), verbose=1) n = len(categories) res = np.zeros((n, n)) weight = np.zeros(n) for k, (prob, row) in enumerate(zip(prediction, index)): mle = nanargmax(prob) res[row][mle] += 1 weight[row] += 1 return res / weight[:, None] # TODO: divide row or column?
def _calc_parameters(self, wfm_counts): # loop over the waveforms for i in np.arange(self._n): y = wfm_counts[i, :].flatten().astype(np.float32) y -= bn.nanmean(y[0:11]) # Remove Noise y[np.where(y < 0.0)[0]] = 0.0 # Set negative counts to zero yp = np.nanmax(y) # Waveform peak value ypi = bn.nanargmax(y) # Waveform peak index # AMANDINE: implementation for wf of 256 bins (128 zero-padded)? onediv = float(1)/float(41) # AMANDINE: here i seems to be understood as gate index but it is wf index!? if i == 256: break if [i > (ypi + 100)] and [i < (ypi + 140)]: # AMANDINE: syntax to be checked try: self._ltpp[i] = (onediv*float(y[i]))/float(yp) # AMANDINE: where is the sum in this formula? except ZeroDivisionError: self._ltpp[i] = np.nan
def find_nearest(array, value): """ Search array for value and return the index where the value is closest. Parameters: array (ndarray): Array to search. value: Value to search array for. Returns: int: Index of ``array`` closest to ``value``. Raises: ValueError: If ``value`` is NaN. .. codeauthor:: Rasmus Handberg <*****@*****.**> """ if np.isnan(value): raise ValueError("Invalid search value") if np.isposinf(value): return nanargmax(array) if np.isneginf(value): return nanargmin(array) return nanargmin(np.abs(array - value))
def fit(self, num_workers=7, debug_plots=True, force=False): """ Train A+1 classifiers (the first one is for no features observed.) Find order of A features to run in. """ if not force and os.path.exists(self.filename): with open(self.filename) as f: sc = pickle.load(f) self.action_inds = sc.action_inds self.clfs = sc.clfs self.has_been_fit = True return ds = self.ds instances = ds.X labels = ds.y A = len(ds.actions) # Train classifier on initially empty states. action_inds = [] states = get_states(ds, instances, action_inds) clf, score, entropy = get_classifier( ds, states, labels, 1, num_workers) # We will collect values for visualization. scores = np.empty((A, A)) scores.fill(np.nan) entropies = np.empty((A, A)) entropies.fill(np.nan) infogains = np.empty((A, A)) infogains.fill(np.nan) # While there are feasible actions, consider them. costs = ds.action_costs.copy() remaining_mask = np.ones(len(ds.actions), dtype=bool) selected_clfs = [clf] for iteration in xrange(A): print('-'*80) print('Iteration {}'.format(iteration)) feas_inds = np.flatnonzero(remaining_mask & (costs <= ds.max_budget)) if len(feas_inds) == 0: break new_clfs = np.empty(A, dtype=object) for action_ind in feas_inds: print(ds.actions[action_ind]), new_action_inds = action_inds + [action_ind] states = get_states(ds, instances, new_action_inds) new_clf, new_score, new_entropy = get_classifier( ds, states, labels, 1, num_workers) new_clfs[action_ind] = new_clf infogains[action_ind, iteration] = entropy - new_entropy scores[action_ind, iteration] = new_score entropies[action_ind, iteration] = new_entropy rewards = infogains[:, iteration] / ds.action_costs ind = bn.nanargmax(rewards) selected_clfs.append(new_clfs[ind]) action_inds.append(ind) print('Selected {} with infogain {:.2f} and cost {:.2f}'.format(ds.actions[ind], infogains[ind, iteration], ds.action_costs[ind])) remaining_mask[ind] = False costs += ds.action_costs[ind] entropy = entropies[ind, iteration] actions = np.take(ds.actions, action_inds) print('Selected actions in order: {}'.format(actions)) self.action_inds = action_inds self.clfs = selected_clfs assert(len(self.clfs) == len(self.action_inds) + 1) self.has_been_fit = True if debug_plots: self.plot_stuff(scores, entropies, infogains, rewards) self.save()
def func1(tnlhf, tnlhf_curr, residual, y, e, o, a, _s_prev, p, indT): m, n = y.shape w = arange(m) if p.probType == 'IP': oc_modL, oc_modU = o[:, :n], o[:, n:] ac_modL, ac_modU = a[:, :n], a[:, n:] # # TODO: handle nans mino = where(oc_modL < oc_modU, oc_modL, oc_modU) maxa = where(ac_modL < ac_modU, ac_modU, ac_modL) # Prev tmp = a[:, 0:n] - o[:, 0:n] + a[:, n:] - o[:, n:] t = nanargmin(tmp, 1) d = 0.5 * tmp[w, t] #New # tmp = a - o # t_ = nanargmin(tmp,1) # t = t_% n # d = tmp[w, t_] # ind = 2**(-n) >= (_s_prev - d)/asarray(d, 'float64') ind = 2**(1.0 / n) * d >= _s_prev #new # ind = 2**(1.0/n) * d >= nanmax(maxa-mino, 1) #ind = 2**(-n) >= (_s_prev - _s)/asarray(_s, 'float64') #s2 = nanmin(maxa - mino, 1) #print (abs(s2/_s)) # Prev _s = nanmin(maxa - mino, 1) # New #_s = nanmax(maxa - mino, 1) # _s = nanmax(a - o, 1) #ind = _s_prev <= _s + ((2**-n / log(2)) if n > 15 else log2(1+2**-n)) indD = logical_not(ind) indD = ind indD = None #print len(where(indD)[0]), len(where(logical_not(indD))[0]) # elif p.probType == 'MOP': # # raise 'unimplemented' else: if p.solver.dataHandling == 'sorted': _s = func13(o, a) t = nanargmin(a, 1) % n d = nanmax([a[w, t] - o[w, t], a[w, n + t] - o[w, n + t]], 0) ## !!!! Don't replace it by (_s_prev /d- 1) to omit rounding errors ### #ind = 2**(-n) >= (_s_prev - d)/asarray(d, 'float64') #NEW ind = d >= _s_prev / 2**(1.0e-12 / n) #ind = d >= _s_prev / 2 ** (1.0/n) indD = empty(m, bool) indD.fill(True) #ind.fill(False) ################################################### elif p.solver.dataHandling == 'raw': if p.probType == 'MOP': t = p._t[:m] p._t = p._t[m:] d = _s = p.__s[:m] p.__s = p.__s[m:] else: # tnlh_1, tnlh_2 = tnlhf[:, 0:n], tnlhf[:, n:] # TNHLF_min = where(logical_or(tnlh_1 > tnlh_2, isnan(tnlh_1)), tnlh_2, tnlh_1) # # Set _s # _s = nanmin(TNHLF_min, 1) T = tnlhf_curr tnlh_curr_1, tnlh_curr_2 = T[:, 0:n], T[:, n:] TNHL_curr_min = where( logical_or(tnlh_curr_1 < tnlh_curr_2, isnan(tnlh_curr_2)), tnlh_curr_1, tnlh_curr_2) t = nanargmin(TNHL_curr_min, 1) T = tnlhf d = nanmin(vstack(([T[w, t], T[w, n + t]])), 0) _s = d #OLD #!#!#!#! Don't replace it by _s_prev - d <= ... to omit inf-inf = nan !#!#!# #ind = _s_prev <= d + ((2**-n / log(2)) if n > 15 else log2(1+2**-n)) #ind = _s_prev - d <= ((2**-n / log(2)) if n > 15 else log2(1+2**-n)) #NEW if any(_s_prev < d): pass ind = _s_prev <= d + 1.0 / n # T = TNHL_curr_min #ind2 = nanmin(TNHL_curr_min, 0) indQ = d >= _s_prev - 1.0 / n #indQ = logical_and(indQ, False) indD = logical_or(indQ, logical_not(indT)) # print '------' # print indQ[:10] # print indD[:10] # print _s_prev[:2], d[:2] #print len(where(indD)[0]), len(where(indQ)[0]), len(where(indT)[0]) #print _s_prev - d ################################################### #d = ((tnlh[w, t]* tnlh[w, n+t])**0.5) else: assert 0 if any(ind): r10 = where(ind)[0] #print('r10:', r10) # print _s_prev # print ((_s_prev -d)*n)[r10] # print('ind length: %d' % len(where(ind)[0])) # print where(ind)[0].size #bs = e[ind] - y[ind] #t[ind] = nanargmax(bs, 1) # ordinary numpy.argmax can be used as well bs = e[r10] - y[r10] t[r10] = nanargmax(bs, 1) # ordinary numpy.argmax can be used as well return t, _s, indD
def _fit(self, X, y): self.X, y = self._check_params(X, y) n, p = X.shape self.y = y.reshape((n, 1)) # list of selected features S = [] # list of all features F = range(p) if self.n_features != 'auto': feature_mi_matrix = np.zeros((self.n_features, p)) else: feature_mi_matrix = np.zeros((n, p)) feature_mi_matrix[:] = np.nan S_mi = [] # ---------------------------------------------------------------------- # FIND FIRST FEATURE # ---------------------------------------------------------------------- # check a range of ks (3-10), and choose the one with the max median MI k_min = 3 k_max = 11 xy_MI = np.zeros((k_max-k_min, p)) xy_MI[:] = np.nan for i, k in enumerate(range(k_min, k_max)): xy_MI [i, :] = mi.get_first_mi_vector(self, k) xy_MI = bn.nanmedian(xy_MI, axis=0) # choose the best, add it to S, remove it from F S, F = self._add_remove(S, F, bn.nanargmax(xy_MI)) S_mi.append(bn.nanmax(xy_MI)) # notify user if self.verbose > 0: self._print_results(S, S_mi) # ---------------------------------------------------------------------- # FIND SUBSEQUENT FEATURES # ---------------------------------------------------------------------- while len(S) < self.n_features: # loop through the remaining unselected features and calculate MI s = len(S) - 1 feature_mi_matrix[s, F] = mi.get_mi_vector(self, F, s) # make decision based on the chosen FS algorithm fmm = feature_mi_matrix[:len(S),F] if self.method == 'JMI': selected = F[bn.nanargmax(bn.nansum(fmm, axis=0))] elif self.method == 'JMIM': selected = F[bn.nanargmax(bn.nanmin(fmm, axis=0))] elif self.method == 'MRMR': MRMR = xy_MI[F] - bn.nanmean(fmm, axis=0) selected = F[bn.nanargmax(MRMR)] # record the JMIM of the newly selected feature and add it to S S_mi.append(bn.nanmax(bn.nanmin(fmm, axis=0))) S, F = self._add_remove(S, F, selected) # notify user if self.verbose > 0: self._print_results(S, S_mi) # if n_features == 'auto', let's check the S_mi to stop if self.n_features == 'auto' and len(S) > 10: # smooth the 1st derivative of the MI values of previously sel MI_dd = signal.savgol_filter(S_mi[1:],9,2,1) # does the mean of the last 5 converge to 0? if np.abs(np.mean(MI_dd[-5:])) < 1e-3: break # ---------------------------------------------------------------------- # SAVE RESULTS # ---------------------------------------------------------------------- self.n_features_ = len(S) self.support_ = np.zeros(p, dtype=np.bool) self.support_[S] = 1 self.ranking_ = S self.mi_ = S_mi return self
def _fit(self, X, y): self.X, y = self._check_params(X, y) n, p = X.shape self.y = y.reshape((n, 1)) # list of selected features S = [] # list of all features F = [v for v in range(p)] if self.n_features != 'auto': feature_mi_matrix = np.zeros((self.n_features, p)) else: feature_mi_matrix = np.zeros((n, p)) feature_mi_matrix[:] = np.nan S_mi = [] # --------------------------------------------------------------------- # FIND FIRST FEATURE # --------------------------------------------------------------------- # check a range of ks (3-10), and choose the one with the max median MI k_min = 3 k_max = 11 xy_MI = np.zeros((k_max - k_min, p)) xy_MI[:] = np.nan for i, k in enumerate(range(k_min, k_max)): xy_MI[i, :] = mi.get_first_mi_vector(self, k) xy_MI = bn.nanmedian(xy_MI, axis=0) # choose the best, add it to S, remove it from F S, F = self._add_remove(S, F, bn.nanargmax(xy_MI)) S_mi.append(bn.nanmax(xy_MI)) # notify user if self.verbose > 0: self._print_results(S, S_mi) # --------------------------------------------------------------------- # FIND SUBSEQUENT FEATURES # --------------------------------------------------------------------- while len(S) < self.n_features if not isinstance(self.n_features, str) else True: # loop through the remaining unselected features and calculate MI s = len(S) - 1 feature_mi_matrix[s, F] = mi.get_mi_vector(self, F, s) # make decision based on the chosen FS algorithm fmm = feature_mi_matrix[:len(S), F] if self.method == 'JMI': selected = F[bn.nanargmax(bn.nansum(fmm, axis=0))] elif self.method == 'JMIM': selected = F[bn.nanargmax(bn.nanmin(fmm, axis=0))] elif self.method == 'MRMR': MRMR = xy_MI[F] - bn.nanmean(fmm, axis=0) selected = F[bn.nanargmax(MRMR)] # record the JMIM of the newly selected feature and add it to S S_mi.append(bn.nanmax(bn.nanmin(fmm, axis=0))) S, F = self._add_remove(S, F, selected) # notify user if self.verbose > 0: self._print_results(S, S_mi) # if n_features == 'auto', let's check the S_mi to stop if self.n_features == 'auto' and len(S) > 10: # smooth the 1st derivative of the MI values of previously sel MI_dd = signal.savgol_filter(S_mi[1:], 9, 2, 1) # does the mean of the last 5 converge to 0? if np.abs(np.mean(MI_dd[-5:])) < 1e-3: break # --------------------------------------------------------------------- # SAVE RESULTS # --------------------------------------------------------------------- self.n_features_ = len(S) self.support_ = np.zeros(p, dtype=np.bool) self.support_[S] = 1 self.ranking_ = S self.mi_ = S_mi return self
def func1(tnlhf, tnlhf_curr, residual, y, e, o, a, _s_prev, p, indT): m, n = y.shape w = arange(m) if p.probType == 'IP': oc_modL, oc_modU = o[:, :n], o[:, n:] ac_modL, ac_modU = a[:, :n], a[:, n:] # # TODO: handle nans mino = where(oc_modL < oc_modU, oc_modL, oc_modU) maxa = where(ac_modL < ac_modU, ac_modU, ac_modL) # Prev tmp = a[:, 0:n]-o[:, 0:n]+a[:, n:]-o[:, n:] t = nanargmin(tmp,1) d = 0.5*tmp[w, t] #New # tmp = a - o # t_ = nanargmin(tmp,1) # t = t_% n # d = tmp[w, t_] # ind = 2**(-n) >= (_s_prev - d)/asarray(d, 'float64') ind = 2**(1.0/n) * d >= _s_prev #new # ind = 2**(1.0/n) * d >= nanmax(maxa-mino, 1) #ind = 2**(-n) >= (_s_prev - _s)/asarray(_s, 'float64') #s2 = nanmin(maxa - mino, 1) #print (abs(s2/_s)) # Prev _s = nanmin(maxa - mino, 1) # New #_s = nanmax(maxa - mino, 1) # _s = nanmax(a - o, 1) #ind = _s_prev <= _s + ((2**-n / log(2)) if n > 15 else log2(1+2**-n)) indD = logical_not(ind) indD = ind indD = None #print len(where(indD)[0]), len(where(logical_not(indD))[0]) # elif p.probType == 'MOP': # # raise 'unimplemented' else: if p.solver.dataHandling == 'sorted': _s = func13(o, a) t = nanargmin(a, 1) % n d = nanmax([a[w, t] - o[w, t], a[w, n+t] - o[w, n+t]], 0) ## !!!! Don't replace it by (_s_prev /d- 1) to omit rounding errors ### #ind = 2**(-n) >= (_s_prev - d)/asarray(d, 'float64') #NEW ind = d >= _s_prev / 2 ** (1.0e-12/n) #ind = d >= _s_prev / 2 ** (1.0/n) indD = empty(m, bool) indD.fill(True) #ind.fill(False) ################################################### elif p.solver.dataHandling == 'raw': if p.probType == 'MOP': t = p._t[:m] p._t = p._t[m:] d = _s = p.__s[:m] p.__s = p.__s[m:] else: # tnlh_1, tnlh_2 = tnlhf[:, 0:n], tnlhf[:, n:] # TNHLF_min = where(logical_or(tnlh_1 > tnlh_2, isnan(tnlh_1)), tnlh_2, tnlh_1) # # Set _s # _s = nanmin(TNHLF_min, 1) T = tnlhf_curr tnlh_curr_1, tnlh_curr_2 = T[:, 0:n], T[:, n:] TNHL_curr_min = where(logical_or(tnlh_curr_1 < tnlh_curr_2, isnan(tnlh_curr_2)), tnlh_curr_1, tnlh_curr_2) t = nanargmin(TNHL_curr_min, 1) T = tnlhf d = nanmin(vstack(([T[w, t], T[w, n+t]])), 0) _s = d #OLD #!#!#!#! Don't replace it by _s_prev - d <= ... to omit inf-inf = nan !#!#!# #ind = _s_prev <= d + ((2**-n / log(2)) if n > 15 else log2(1+2**-n)) #ind = _s_prev - d <= ((2**-n / log(2)) if n > 15 else log2(1+2**-n)) #NEW if any(_s_prev < d): pass ind = _s_prev <= d + 1.0/n # T = TNHL_curr_min #ind2 = nanmin(TNHL_curr_min, 0) indQ = d >= _s_prev - 1.0/n #indQ = logical_and(indQ, False) indD = logical_or(indQ, logical_not(indT)) # print _s_prev[:2], d[:2] #print len(where(indD)[0]), len(where(indQ)[0]), len(where(indT)[0]) #print _s_prev - d ################################################### #d = ((tnlh[w, t]* tnlh[w, n+t])**0.5) else: assert 0 if any(ind): r10 = where(ind)[0] #print('r10:', r10) # print _s_prev # print ((_s_prev -d)*n)[r10] # print('ind length: %d' % len(where(ind)[0])) # print where(ind)[0].size #bs = e[ind] - y[ind] #t[ind] = nanargmax(bs, 1) # ordinary numpy.argmax can be used as well bs = e[r10] - y[r10] t[r10] = nanargmax(bs, 1) # ordinary numpy.argmax can be used as well return t, _s, indD
def fit(self, num_workers=7, debug_plots=True, force=False): """ Train A+1 classifiers (the first one is for no features observed.) Find order of A features to run in. """ if not force and os.path.exists(self.filename): with open(self.filename) as f: sc = pickle.load(f) self.action_inds = sc.action_inds self.clfs = sc.clfs self.has_been_fit = True return ds = self.ds instances = ds.X labels = ds.y A = len(ds.actions) # Train classifier on initially empty states. action_inds = [] states = get_states(ds, instances, action_inds) clf, score, entropy = get_classifier(ds, states, labels, 1, num_workers) # We will collect values for visualization. scores = np.empty((A, A)) scores.fill(np.nan) entropies = np.empty((A, A)) entropies.fill(np.nan) infogains = np.empty((A, A)) infogains.fill(np.nan) # While there are feasible actions, consider them. costs = ds.action_costs.copy() remaining_mask = np.ones(len(ds.actions), dtype=bool) selected_clfs = [clf] for iteration in xrange(A): print ("-" * 80) print ("Iteration {}".format(iteration)) feas_inds = np.flatnonzero(remaining_mask & (costs <= ds.max_budget)) if len(feas_inds) == 0: break new_clfs = np.empty(A, dtype=object) for action_ind in feas_inds: print (ds.actions[action_ind]), new_action_inds = action_inds + [action_ind] states = get_states(ds, instances, new_action_inds) new_clf, new_score, new_entropy = get_classifier(ds, states, labels, 1, num_workers) new_clfs[action_ind] = new_clf infogains[action_ind, iteration] = entropy - new_entropy scores[action_ind, iteration] = new_score entropies[action_ind, iteration] = new_entropy rewards = infogains[:, iteration] / ds.action_costs ind = bn.nanargmax(rewards) selected_clfs.append(new_clfs[ind]) action_inds.append(ind) print ( "Selected {} with infogain {:.2f} and cost {:.2f}".format( ds.actions[ind], infogains[ind, iteration], ds.action_costs[ind] ) ) remaining_mask[ind] = False costs += ds.action_costs[ind] entropy = entropies[ind, iteration] actions = np.take(ds.actions, action_inds) print ("Selected actions in order: {}".format(actions)) self.action_inds = action_inds self.clfs = selected_clfs assert len(self.clfs) == len(self.action_inds) + 1 self.has_been_fit = True if debug_plots: self.plot_stuff(scores, entropies, infogains, rewards) self.save()
def fit(self, num_workers=1, debug_plots=True, force=False): if not force and os.path.exists(self.filename): with open(self.filename) as f: sc = pickle.load(f) self.__dict__.update(sc.__dict__) return instances = ds.X labels = ds.y instances_train, instances_val, labels_train, labels_val = train_test_split( instances, labels, test_size=1 / 3.0 ) A = len(ds.actions) N_train = instances_train.shape[0] # Initialize imputation mechanism if self.impute_method == "mean": mi = tc.MeanImputer(ds.action_dims).fit(instances_train) else: mi = tc.GaussianImputer(ds.action_dims).fit(instances_train) # Train classifier on initially empty states. action_inds = [] states_train = get_states(ds, instances_train, action_inds, mi) states_val = get_states(ds, instances_val, action_inds, mi) if self.clf_method == "logreg": clf, score_train, entropy_train = get_classifier(ds, states_train, labels_train, self.num_clf, num_workers) else: clf = tc.StateClassifierImagenet(ds) score_val, entropy_val = eval_classifier(clf, states_val, labels_val) # We collect values for visualization. scores = np.empty((A, A)) scores.fill(np.nan) entropies = np.empty((A, A)) entropies.fill(np.nan) infogains = np.empty((A, A)) infogains.fill(np.nan) # While there are feasible actions, consider them. costs = ds.action_costs.copy() remaining_mask = np.ones(len(ds.actions), dtype=bool) policy_masks = [remaining_mask.copy()] for iteration in xrange(A): print ("-" * 80) print ("Iteration {}".format(iteration)) feas_inds = np.flatnonzero(remaining_mask & (costs <= ds.max_budget)) if len(feas_inds) == 0: break # Train classifier with new mask distribution. if self.clf_method != "imagenet": new_masks = [] for action_ind in feas_inds: mask = remaining_mask.copy() mask[action_ind] = False new_masks.append(mask) md = tc.MaskDistribution() md.update(np.array(policy_masks + new_masks)) N = N_train * ((iteration + 1) + len(feas_inds)) states_, labels_ = get_states_from_mask_distribution(ds, md, instances_train, labels_train, N, mi) clf, score_train, entropy_train = get_classifier(ds, states_, labels_, self.num_clf, num_workers) # Evaluate the infogain of individual features. for action_ind in feas_inds: print (ds.actions[action_ind]), states_val = get_states(ds, instances_val, action_inds + [action_ind], mi) new_score_val, new_entropy_val = eval_classifier(clf, states_val, labels_val) infogains[action_ind, iteration] = entropy_val - new_entropy_val scores[action_ind, iteration] = new_score_val entropies[action_ind, iteration] = new_entropy_val rewards = infogains[:, iteration] / ds.action_costs ind = bn.nanargmax(rewards) action_inds.append(ind) entropy_val = entropies[ind, iteration] print ( "Selected {} with infogain {:.2f} and cost {:.2f}".format( ds.actions[ind], infogains[ind, iteration], ds.action_costs[ind] ) ) remaining_mask[ind] = False costs += ds.action_costs[ind] policy_masks += [remaining_mask.copy()] # Fit imputer with all data self.mi = mi.fit(instances) # Train final classifier, with the final masks and on full data if self.clf_method != "imagenet": md = tc.MaskDistribution() md.update(np.array(policy_masks)) N = N_train * len(policy_masks) states_, labels_ = get_states_from_mask_distribution(ds, md, instances, labels, N, self.mi) clf, score, entropy = get_classifier(ds, states_, labels_, self.num_clf, num_workers) actions = np.take(ds.actions, action_inds) print ("Selected actions in order: {}".format(actions)) self.clf = clf self.action_inds = action_inds self.has_been_fit = True if debug_plots: self.plot_stuff(scores, entropies, infogains, rewards) self.save()
tit = time.time() # Compute responsibilities for i in xrange(tb, te, ll): il = i - tb Ss.select_hyperslab((il, 0), (ll, N)) S.id.read(ms, Ss, tS) Rs.select_hyperslab((il, 0), (ll, N)) R.id.read(ms, Rs, tRold) As.select_hyperslab((i, 0), (ll, N)) A.id.read(ms, As, tAS) #tAS = A[i, :] tAS += tS #tRold = R[i, :] tI = bn.nanargmax(tAS, axis=1) tY = tAS[ind, tI] tAS[ind, tI[ind]] = z tY2 = bn.nanmax(tAS, axis=1) tR = tS - tY[:, np.newaxis] tR[ind, tI[ind]] = tS[ind, tI[ind]] - tY2[ind] tR = (1 - damping) * tR + damping * tRold tRp = np.maximum(tR, 0) for il in xrange(ll): tRp[il, i + il] = tR[il, i + il] tdR[i - tb + il] = tR[il, i + il] if disk is True:
def filter_phase(t, x, Plist, smooth_factor=1000): """ Filter out specific periods by smoothing the phase-curve. Parameters: t (ndarray): Time vector (days). x (ndarray): Flux vector. P (list): List of periods to remove. smooth_factor (float, optional): Factor of phase to use as smooth width. Returns: Filter flux vector that can be removed from timeseries. Note: Does not require time to be sorted. Can handle NaN in flux vector. """ # Prepare arrays: Plist = np.atleast_1d(Plist) # Hack to handle 0-dim input Np = len(Plist) Nt = len(t) phase = zeros((Np,Nt), dtype='float64') indx = zeros((Np,Nt), dtype='int') indx_inv = zeros((Np,Nt), dtype='int') phase_tot = zeros(Nt, dtype='float64') phase_smooth_t = zeros((Np,Nt), dtype='float64') dphase = zeros(Np, dtype='float64') # Loop through periods to be removed: for k in range(Np): # Calculate the phase and sort it: phase[k] = mod(t, Plist[k]) indx[k] = argsort(phase[k]) indx_inv[k] = argsort(indx[k]) dphase[k] = median(diff( phase[k,indx[k]] )) # Calculate smooth version of the phase curve: phase_smooth = _filter_single_phase(phase[k,indx[k]], x[indx[k]]-phase_tot[indx[k]], Plist[k]/smooth_factor, dphase[k]) # Un-sort phase_smoooth back to time-sorted order: phase_smooth_t[k] = phase_smooth[indx_inv[k]] # Add to the total phase filter: phase_tot += phase_smooth_t[k,:] # If removing multiple periods perform iterative procedure where # phase curves are added and removed to avoid cross-talk between periods: if k != 0: for j in range(k): # Add the transit back into to the timeseries (by subtracting it from the filter): phase_tot -= phase_smooth_t[j,:] # Re-calculate the phase curve of the transit: phase_smooth = _filter_single_phase(phase[j,indx[j]], x[indx[j]]-phase_tot[indx[j]], Plist[j]/smooth_factor, dphase[j]) phase_smooth_t[j] = phase_smooth[indx_inv[j]] # Remove the transit again: phase_tot += phase_smooth_t[j,:] # Make plots of phase curves: if not _output_folder is None: # Find the point on the smoothed curve that deviates the most from zero: imax = nanargmax(np.abs(phase_smooth_t), axis=1) s = nanstd(x) fig = plt.figure() fig.canvas.set_window_title('phasecurve') fig.subplots_adjust(hspace=0.05) for k,P in enumerate(Plist): # Plot phasecurve for this period: ax = plt.subplot(Np, 1, k+1) ax.plot(phase[k]/P, x, 'k.', markersize=2) # No need to sort if we only plot points ax.plot(phase[k,indx[k]]/P, phase_smooth_t[k,indx[k]], 'r-') ax.axvline(phase[k,imax[k]]/P, color='b', linestyle='--') # Line indicating the (likely) planet transit ax.set_xlim(0, 1) ax.set_ylim(-6*s, 6*s) ax.text(0.02, 0.97, 'P = %f d'%(P), horizontalalignment='left', verticalalignment='top', transform=ax.transAxes, backgroundcolor='w', color='k') if k!=Np-1: plt.setp(ax.get_xticklabels(), visible=False) ax.set_xlabel('Phase') fig.text(0.03, 0.5, u'Flux (counts/s)', ha='center', va='center', rotation='vertical', transform=fig.transFigure) if _output_format != 'native': fig.savefig(os.path.join(_output_folder, _output_prefix+'phasecurve.'+_output_format), format=_output_format, bbox_inches='tight') plt.close(fig) # Return the total time-sorted phase curve: return phase_tot
def fit(self, num_workers=1, debug_plots=True, force=False): if not force and os.path.exists(self.filename): with open(self.filename) as f: sc = pickle.load(f) self.__dict__.update(sc.__dict__) return instances = ds.X labels = ds.y instances_train, instances_val, labels_train, labels_val = \ train_test_split(instances, labels, test_size=1/3.) A = len(ds.actions) N_train = instances_train.shape[0] # Initialize imputation mechanism if self.impute_method == 'mean': mi = tc.MeanImputer(ds.action_dims).fit(instances_train) else: mi = tc.GaussianImputer(ds.action_dims).fit(instances_train) # Train classifier on initially empty states. action_inds = [] states_train = get_states(ds, instances_train, action_inds, mi) states_val = get_states(ds, instances_val, action_inds, mi) if self.clf_method == 'logreg': clf, score_train, entropy_train = get_classifier( ds, states_train, labels_train, self.num_clf, num_workers) else: clf = tc.StateClassifierImagenet(ds) score_val, entropy_val = eval_classifier(clf, states_val, labels_val) # We collect values for visualization. scores = np.empty((A, A)) scores.fill(np.nan) entropies = np.empty((A, A)) entropies.fill(np.nan) infogains = np.empty((A, A)) infogains.fill(np.nan) # While there are feasible actions, consider them. costs = ds.action_costs.copy() remaining_mask = np.ones(len(ds.actions), dtype=bool) policy_masks = [remaining_mask.copy()] for iteration in xrange(A): print('-'*80) print('Iteration {}'.format(iteration)) feas_inds = np.flatnonzero(remaining_mask & (costs <= ds.max_budget)) if len(feas_inds) == 0: break # Train classifier with new mask distribution. if self.clf_method != 'imagenet': new_masks = [] for action_ind in feas_inds: mask = remaining_mask.copy() mask[action_ind] = False new_masks.append(mask) md = tc.MaskDistribution() md.update(np.array(policy_masks + new_masks)) N = N_train * ((iteration + 1) + len(feas_inds)) states_, labels_ = get_states_from_mask_distribution( ds, md, instances_train, labels_train, N, mi) clf, score_train, entropy_train = get_classifier( ds, states_, labels_, self.num_clf, num_workers) # Evaluate the infogain of individual features. for action_ind in feas_inds: print(ds.actions[action_ind]), states_val = get_states( ds, instances_val, action_inds + [action_ind], mi) new_score_val, new_entropy_val = eval_classifier( clf, states_val, labels_val) infogains[action_ind, iteration] = entropy_val - new_entropy_val scores[action_ind, iteration] = new_score_val entropies[action_ind, iteration] = new_entropy_val rewards = infogains[:, iteration] / ds.action_costs ind = bn.nanargmax(rewards) action_inds.append(ind) entropy_val = entropies[ind, iteration] print('Selected {} with infogain {:.2f} and cost {:.2f}'.format(ds.actions[ind], infogains[ind, iteration], ds.action_costs[ind])) remaining_mask[ind] = False costs += ds.action_costs[ind] policy_masks += [remaining_mask.copy()] # Fit imputer with all data self.mi = mi.fit(instances) # Train final classifier, with the final masks and on full data if self.clf_method != 'imagenet': md = tc.MaskDistribution() md.update(np.array(policy_masks)) N = N_train * len(policy_masks) states_, labels_ = get_states_from_mask_distribution( ds, md, instances, labels, N, self.mi) clf, score, entropy = get_classifier( ds, states_, labels_, self.num_clf, num_workers) actions = np.take(ds.actions, action_inds) print('Selected actions in order: {}'.format(actions)) self.clf = clf self.action_inds = action_inds self.has_been_fit = True if debug_plots: self.plot_stuff(scores, entropies, infogains, rewards) self.save()
def time_nanargmax(self, dtype, shape): bn.nanargmax(self.arr)
def argf(self, *args, **kwargs): return bn.nanargmax(*args, **kwargs) class Extremum(ch.Ch):
def aff_cluster(Sfn, conv_iter=15, max_iter=2000, damping=0.95, mpi=None, verbose=False, debug=False, *args, **kwargs): comm, NPROCS, rank = mpi NPROCS_LOCAL = int(os.environ['OMPI_COMM_WORLD_LOCAL_SIZE']) #Init storage for matrices #Get file name #Open matrix file in parallel mode SSf = h5py.File(Sfn, 'r+', driver='mpio', comm=comm) SSf.atomic = True #Open table with data for clusterization SS = SSf['cluster'] SSs = SS.id.get_space() params = { 'N': 0, 'l': 0, 'll': 0, 'TMfn': '', 'disk': False, 'preference': 0.0 } P = Bunch(params) ft = np.float32 if rank == 0: N, N1 = SS.shape if N != N1: raise ValueError("S must be a square array \ (shape=%s)" % repr((N, N1))) else: P.N = N try: preference = SS.attrs['preference'] except: raise ValueError('Unable to get preference from cluster matrix') if max_iter < 0: raise ValueError('max_iter must be > 0') if not 0 < conv_iter < max_iter: raise ValueError('conv_iter must lie in \ interval between 0 and max_iter') if damping < 0.5 or damping >= 1: raise ValueError('damping must lie in interval between 0.5 and 1') print '#' * 10, 'Main params', '#' * 10 print 'preference: %.3f' % preference print 'damping: %.3f' % damping print 'conv_iter: %d' % conv_iter print 'max_iter: %d' % max_iter print '#' * 31 P.TMbfn = str(uuid.uuid1()) P.TMfn = P.TMbfn + '.hdf5' # Magic 4 to fit MPI.Gather r = N % (NPROCS * 4) N -= r l = N // NPROCS if r > 0: print 'Truncating matrix to %sx%s to fit on %d procs' \ % (N, N, NPROCS) P.N = N # Fit to memory MEM = psutil.virtual_memory().available / NPROCS_LOCAL # MEM = 500 * 10 ** 6 ts = np.dtype(ft).itemsize * N # Python give bits ts *= 8 * 1.1 # Allocate memory for e, tE, and ... # MEM -= ts # ---- tl = int(MEM // ts) # Allocate memory for tS, tA, tR.... def adjust_cache(tl, l): while float(l) % float(tl) > 0: tl -= 1 return tl if tl < l: P.disk = True try: cache = 0 # cache = int(sys.argv[1]) # print sys.argv[1] assert cache < l except: cache = tl #print 'Wrong cache settings, set cache to %d' % tl tl = adjust_cache(tl, l) P.l = l P.ll = tl else: P.l = l P.ll = l if verbose: print "Available memory per process: %.2fG" % (MEM / 10.0**9) print "Memory per row: %.2fM" % (ts / 10.0**6) print "Estimated memory per process: %.2fG" \ % (ts * P.ll / 10.0 ** 9) print 'Cache size is %d of %d' % (P.ll, P.l) P = comm.bcast(P) N = P.N l = P.l ll = P.ll ms = h5s.create_simple((ll, N)) ms_l = h5s.create_simple((N, )) tb, te = task(N, NPROCS, rank) tS = np.ndarray((ll, N), dtype=ft) tSl = np.ndarray((N, ), dtype=ft) disk = P.disk if disk is True: TMLfd = tempfile.mkdtemp() TMLfn = osp(TMLfd, P.TMbfn + '_' + str(rank) + '.hdf5') TMLf = h5py.File(TMLfn, 'w') TMLf.atomic = True S = TMLf.create_dataset('S', (l, N), dtype=ft) Ss = S.id.get_space() #Copy input data and #place preference on diagonal z = -np.finfo(ft).max for i in range(tb, te, ll): SSs.select_hyperslab((i, 0), (ll, N)) SS.id.read(ms, SSs, tS) if disk is True: Ss.select_hyperslab((i - tb, 0), (ll, N)) S.id.write(ms, Ss, tS) if disk is True: R = TMLf.create_dataset('R', (l, N), dtype=ft) Rs = R.id.get_space() tRold = np.zeros((ll, N), dtype=ft) tR = np.zeros((ll, N), dtype=ft) tdR = np.zeros((l, ), dtype=ft) #Shared storage TMf = h5py.File(P.TMfn, 'w', driver='mpio', comm=comm) TMf.atomic = True Rp = TMf.create_dataset('Rp', (N, N), dtype=ft) Rps = Rp.id.get_space() tRp = np.ndarray((ll, N), dtype=ft) tRpa = np.ndarray((N, ll), dtype=ft) A = TMf.create_dataset('A', (N, N), dtype=ft) As = A.id.get_space() tAS = np.ndarray((ll, N), dtype=ft) tAold = np.ndarray((N, ll), dtype=ft) tA = np.ndarray((N, ll), dtype=ft) tdA = np.ndarray((l, ), dtype=ft) e = np.ndarray((N, conv_iter), dtype=np.int8) tE = np.ndarray((N, ), dtype=np.int8) ttE = np.ndarray((l, ), dtype=np.int8) converged = False cK = 0 K = 0 ind = np.arange(ll) for it in range(max_iter): if rank == 0: if verbose is True: print '=' * 10 + 'It %d' % (it) + '=' * 10 tit = time.time() # Compute responsibilities for i in range(tb, te, ll): if disk is True: il = i - tb Ss.select_hyperslab((il, 0), (ll, N)) S.id.read(ms, Ss, tS) #tS = S[i, :] Rs.select_hyperslab((il, 0), (ll, N)) R.id.read(ms, Rs, tRold) else: tRold = tR.copy() As.select_hyperslab((i, 0), (ll, N)) A.id.read(ms, As, tAS) #Tas = a[I, :] tAS += tS #tRold = R[i, :] tI = bn.nanargmax(tAS, axis=1) tY = tAS[ind, tI] tAS[ind, tI[ind]] = z tY2 = bn.nanmax(tAS, axis=1) tR = tS - tY[:, np.newaxis] tR[ind, tI[ind]] = tS[ind, tI[ind]] - tY2[ind] tR = (1 - damping) * tR + damping * tRold tRp = np.maximum(tR, 0) for il in range(ll): tRp[il, i + il] = tR[il, i + il] tdR[i - tb + il] = tR[il, i + il] if disk is True: R.id.write(ms, Rs, tR) #R[i, :] = tR Rps.select_hyperslab((i, 0), (ll, N)) Rp.id.write(ms, Rps, tRp) #Rp[i, :] = tRp if rank == 0: if verbose is True: teit1 = time.time() print 'R T %s' % (teit1 - tit) comm.Barrier() # Compute availabilities for j in range(tb, te, ll): As.select_hyperslab((0, j), (N, ll)) if disk is True: A.id.read(ms, As, tAold) else: tAold = tA.copy() Rps.select_hyperslab((0, j), (N, ll)) Rp.id.read(ms, Rps, tRpa) #tRp = Rp[:, j] tA = bn.nansum(tRpa, axis=0)[np.newaxis, :] - tRpa for jl in range(ll): tdA[j - tb + jl] = tA[j + jl, jl] tA = np.minimum(tA, 0) for jl in range(ll): tA[j + jl, jl] = tdA[j - tb + jl] tA *= (1 - damping) tA += damping * tAold for jl in range(ll): tdA[j - tb + jl] = tA[j + jl, jl] A.id.write(ms, As, tA) if rank == 0: if verbose is True: teit2 = time.time() print 'A T %s' % (teit2 - teit1) ttE = np.array(((tdA + tdR) > 0), dtype=np.int8) if NPROCS > 1: comm.Gather([ttE, MPI.INT], [tE, MPI.INT]) comm.Bcast([tE, MPI.INT]) else: tE = ttE e[:, it % conv_iter] = tE pK = K K = bn.nansum(tE) if rank == 0: if verbose is True: teit = time.time() cc = '' if K == pK: if cK == 0: cK += 1 elif cK > 1: cc = ' Conv %d of %d' % (cK, conv_iter) else: cK = 0 print 'Total K %d T %s%s' % (K, teit - tit, cc) if it >= conv_iter: if rank == 0: se = bn.nansum(e, axis=1) converged = (bn.nansum((se == conv_iter) + (se == 0)) == N) if (converged == np.bool_(True)) and (K > 0): if verbose is True: print("Converged after %d iterations." % (it)) converged = True else: converged = False converged = comm.bcast(converged, root=0) if converged is True: break if not converged and verbose and rank == 0: print("Failed to converge after %d iterations." % (max_iter)) if K > 0: I = np.nonzero(e[:, 0])[0] C = np.zeros((N, ), dtype=np.int) tC = np.zeros((l, ), dtype=np.int) for i in range(l): if disk is True: Ss.select_hyperslab((i, 0), (1, N)) S.id.read(ms_l, Ss, tSl) else: tSl = tS[i] tC[i] = bn.nanargmax(tSl[I]) comm.Gather([tC, MPI.INT], [C, MPI.INT]) if rank == 0: C[I] = np.arange(K) comm.Bcast([C, MPI.INT]) for k in range(K): ii = np.where(C == k)[0] tN = ii.shape[0] tI = np.zeros((tN, ), dtype=np.float32) ttI = np.zeros((tN, ), dtype=np.float32) tttI = np.zeros((tN, ), dtype=np.float32) ms_k = h5s.create_simple((tN, )) j = rank while j < tN: ind = [(ii[i], ii[j]) for i in range(tN)] SSs.select_elements(ind) SS.id.read(ms_k, SSs, tttI) ttI[j] = bn.nansum(tttI) j += NPROCS comm.Reduce([ttI, MPI.FLOAT], [tI, MPI.FLOAT]) if rank == 0: I[k] = ii[bn.nanargmax(tI)] I.sort() comm.Bcast([I, MPI.INT]) for i in range(l): if disk is True: Ss.select_hyperslab((i, 0), (1, N)) S.id.read(ms_l, Ss, tSl) else: tSl = tS[i] tC[i] = bn.nanargmax(tSl[I]) comm.Gather([tC, MPI.INT], [C, MPI.INT]) if rank == 0: C[I] = np.arange(K) else: if rank == 0: I = np.zeros(()) C = np.zeros(()) #Cleanup SSf.close() TMf.close() if disk is True: TMLf.close() shutil.rmtree(TMLfd) comm.Barrier() if rank == 0: os.remove(P.TMfn) if verbose: print 'APN: %d' % K if I.size and C.size: Sf = h5py.File(Sfn, 'r+', driver='sec2') if 'aff_labels' in Sf.keys(): del Sf['aff_labels'] LM = Sf.require_dataset('aff_labels', shape=C.shape, dtype=np.int) LM[:] = C[:] if 'aff_centers' in Sf.keys(): del Sf['aff_centers'] CM = Sf.require_dataset('aff_centers', shape=I.shape, dtype=np.int) CM[:] = I[:] Sf.close()
def _normalize_log_probs(probs): max_i = bn.nanargmax(probs) probs_norm = probs - probs[max_i] - np.log1p( bn.nansum( np.exp(probs[np.arange(probs.size) != max_i] - probs[max_i]))) return np.exp(probs_norm)
def time_nanargmax(self, dtype, shape, order, axis): bn.nanargmax(self.arr, axis=axis)
il = i - tb Ss.select_hyperslab((il, 0), (ll, N)) S.id.read(ms, Ss, tS) #tS = S[i, :] Rs.select_hyperslab((il, 0), (ll, N)) R.id.read(ms, Rs, tRold) else: tRold = tR.copy() As.select_hyperslab((i, 0), (ll, N)) A.id.read(ms, As, tAS) #Tas = a[I, :] tAS += tS #tRold = R[i, :] tI = bn.nanargmax(tAS, axis=1) tY = tAS[ind, tI] tAS[ind, tI[ind]] = z tY2 = bn.nanmax(tAS, axis=1) tR = tS - tY[:, np.newaxis] tR[ind, tI[ind]] = tS[ind, tI[ind]] - tY2[ind] tR = (1 - damping) * tR + damping * tRold tRp = np.maximum(tR, 0) for il in xrange(ll): tRp[il, i + il] = tR[il, i + il] tdR[i - tb + il] = tR[il, i + il] if disk is True:
def fit(self, X, y): """ Fits the MI_FS feature selection with the chosen MI_FS method. Parameters ---------- X : array-like, shape = [n_samples, n_features] The training input samples. y : array-like, shape = [n_samples] The target values. """ # Check if n_jobs is negative if self.n_jobs < 0: self.n_jobs = NUM_CORES - self.n_jobs self.X, y = self._check_params(X, y) n, p = X.shape self.y = y.reshape((n, 1)) # list of selected features S = [] # list of all features F = list(range(p)) if self.n_features != 'auto': feature_mi_matrix = np.zeros((self.n_features, p)) else: feature_mi_matrix = np.zeros((n, p)) feature_mi_matrix[:] = np.nan S_mi = [] # --------------------------------------------------------------------- # FIND FIRST FEATURE # --------------------------------------------------------------------- xy_MI = np.array(mimy.get_first_mi_vector(self, self.k)) #print(xy_MI) #xy_MI[np.where(np.isnan(xy_MI))]=0. #print("first", sorted(enumerate(xy_MI), key=lambda x:x[1], reverse=True)[0]) # choose the best, add it to S, remove it from F S, F = self._add_remove(S, F, bn.nanargmax(xy_MI)) S_mi.append(bn.nanmax(xy_MI)) # notify user if self.verbose > 0: self._print_results(S, S_mi) # --------------------------------------------------------------------- # FIND SUBSEQUENT FEATURES # --------------------------------------------------------------------- if self.n_features == 'auto': n_features = np.inf else: n_features = self.n_features while len(S) < n_features: # loop through the remaining unselected features and calculate MI s = len(S) - 1 # Calculate s-th row of feature_mi_matrix which contains the JMI score of the last element in S # with all remaining features in F feature_mi_matrix[s, F] = mimy.get_mi_vector(self, F, S[-1]) # make decision based on the chosen FS algorithm fmm = feature_mi_matrix[:len(S), F] if self.method == 'JMI': # Which feature in F has the largest \sum_{s\in S} selected = F[bn.nanargmax(bn.nansum(fmm, axis=0))] # Find out which pair of features is the jmim for if self.verbose > 0: jmim = bn.nanmax(bn.nanmin(fmm, axis=0)) jmi_vals = fmm[:, bn.nanargmax(bn.nanmin(fmm, axis=0))] jmi_idx = np.where(jmi_vals == jmim)[0] print(jmim, S[jmi_idx[0]], selected) elif self.method == 'JMIM': if bn.allnan(bn.nanmin(fmm, axis=0)): break selected = F[bn.nanargmax(bn.nanmin(fmm, axis=0))] # Find out which pair of features is the jmim for if self.verbose > 0: jmim = bn.nanmax(bn.nanmin(fmm, axis=0)) jmi_vals = fmm[:, bn.nanargmax(bn.nanmin(fmm, axis=0))] jmi_idx = np.where(jmi_vals == jmim)[0] print(jmim, S[jmi_idx[0]], selected) elif self.method == 'MRMR': if bn.allnan(bn.nanmean(fmm, axis=0)): break MRMR = xy_MI[F] - bn.nanmean(fmm, axis=0) selected = F[bn.nanargmax(MRMR)] S_mi.append(bn.nanmax(MRMR)) # record the JMIM of the newly selected feature and add it to S if self.method != 'MRMR': S_mi.append(bn.nanmax(bn.nanmin(fmm, axis=0))) S, F = self._add_remove(S, F, selected) # notify user if self.verbose > 0: self._print_results(S, S_mi) # if n_features == 'auto', let's check the S_mi to stop if self.n_features == 'auto' and len(S) > 10: # smooth the 1st derivative of the MI values of previously sel MI_dd = signal.savgol_filter(S_mi[1:], 9, 2, 1) # does the mean of the last 5 converge to 0? if np.abs(np.mean(MI_dd[-5:])) < 1e-3: break # --------------------------------------------------------------------- # SAVE RESULTS # --------------------------------------------------------------------- self.n_features_ = len(S) self._support_mask = np.zeros(p, dtype=np.bool) self._support_mask[S] = True self.ranking_ = S self.mi_ = S_mi return self
def argf(self, *args, **kwargs): return bn.nanargmax(*args, **kwargs)