def svm_gradient_batch_fast(X_pred, X_exp, y, X_pred_ids, X_exp_ids, w, C=.0001, sigma=1.): # sample Kernel rnpred = X_pred_ids#sp.random.randint(low=0,high=len(y),size=n_pred_samples) rnexpand = X_exp_ids#sp.random.randint(low=0,high=len(y),size=n_expand_samples) #K = GaussKernMini_fast(X_pred.T,X_exp.T,sigma) X1 = X_pred.T X2 = X_exp.T if sp.sparse.issparse(X1): G = sp.outer(X1.multiply(X1).sum(axis=0), sp.ones(X2.shape[1])) else: G = sp.outer((X1 * X1).sum(axis=0), sp.ones(X2.shape[1])) if sp.sparse.issparse(X2): H = sp.outer(X2.multiply(X2).sum(axis=0), sp.ones(X1.shape[1])) else: H = sp.outer((X2 * X2).sum(axis=0), sp.ones(X1.shape[1])) K = sp.exp(-(G + H.T - 2. * fast_dot(X1.T, X2)) / (2. * sigma ** 2)) # K = sp.exp(-(G + H.T - 2.*(X1.T.dot(X2)))/(2.*sigma**2)) if sp.sparse.issparse(X1) | sp.sparse.issparse(X2): K = sp.array(K) # compute predictions yhat = fast_dot(K,w[rnexpand]) # compute whether or not prediction is in margin inmargin = (yhat * y[rnpred]) <= 1 # compute gradient G = C * w[rnexpand] - fast_dot((y[rnpred] * inmargin), K) return G,rnexpand
def _gradient_func(self, w): bias, wf = self._split_coefficents(w) l_plus, xv_plus, l_minus, xv_minus = self._counter.calculate(wf) x = self._counter.x xw = self._xw z = numexpr.evaluate( '(l_plus + l_minus) * xw - xv_plus - xv_minus - l_minus + l_plus') grad = wf + self._rank_penalty * fast_dot(x.T, z) if self._has_time: xc = x.compress(self.regr_mask, axis=0) xcs = numpy.dot(xc, wf) grad += self._regr_penalty * (fast_dot(xc.T, xcs) + xc.sum(axis=0) * bias - fast_dot(xc.T, self.y_compressed)) # intercept if self._fit_intercept: grad_intercept = self._regr_penalty * ( xcs.sum() + xc.shape[0] * bias - self.y_compressed.sum()) grad = numpy.concatenate(([grad_intercept], grad)) return grad
def gain(self, X, y): H = np.zeros(X.shape[1]) y = np.array(y) i = 0 batch = 100000 n_features = X.shape[1] n_examples = X.shape[0] p = np.zeros(shape=(2, 2, X.shape[1])) while i < n_features: if n_features - i < batch: batch = n_features - i X_batch_raw = X[:, i : (i + batch)] X_batch = X_batch_raw.toarray() p[1, 1, i : (i + batch)] = y * X_batch_raw p[1, 0, i : (i + batch)] = np.fabs(y - 1) * X_batch_raw p[0, 1, i : (i + batch)] = fast_dot(y, np.fabs(X_batch - 1)) p[0, 0, i : (i + batch)] = fast_dot(np.fabs(y - 1), np.fabs(X_batch - 1)) p_batch = p[:, :, i : (i + batch)] / n_examples p_sum = np.sum(p_batch, axis=0) s = X_batch_raw.sum(axis=0) p_x = np.array([s, 1 - s]) H[i : (i + batch)] = np.sum(p_batch * np.log(p_batch + self.smoother)) - 4 * np.sum(np.multiply(p_x, p_sum)) i += batch print(i / X.shape[1]) return H
def _ica_par(X, tol, g, fun_args, max_iter, w_init): """Parallel FastICA. Used internally by FastICA --main loop """ W = _sym_decorrelation(w_init) del w_init p_ = float(X.shape[1]) for ii in moves.xrange(max_iter): U = fast_dot(W, X) gwtx, g_wtx = g(np.abs(U)**2, fun_args) W1 = _sym_decorrelation( fast_dot(gwtx * U, np.conj(X.T)) / p_ - g_wtx[:, np.newaxis] * W) del gwtx, g_wtx # builtin max, abs are faster than numpy counter parts. lim = max(abs(abs(np.diag(fast_dot(W1, np.conj(W.T)))) - 1)) W = W1 if lim < tol: break else: warnings.warn( 'FastICA did not converge. Consider increasing tolerance or the maximum number of iterations.' ) return W, ii + 1
def _gradient_func(self, beta_bias): bias, beta = self._split_coefficents(beta_bias) K = self._counter.x Kw = self._Kw l_plus, xv_plus, l_minus, xv_minus = self._counter.calculate(beta) z = numexpr.evaluate( '(l_plus + l_minus) * Kw - xv_plus - xv_minus - l_minus + l_plus') gradient = Kw + self._rank_penalty * fast_dot(K, z) if self._has_time: K_comp = K.compress(self.regr_mask, axis=0) K_comp_beta = numpy.dot(K_comp, beta) gradient += self._regr_penalty * ( fast_dot(K_comp.T, K_comp_beta) + K_comp.sum(axis=0) * bias - fast_dot(K_comp.T, self.y_compressed)) # intercept if self._fit_intercept: grad_intercept = self._regr_penalty * (K_comp_beta.sum() + K_comp.shape[0] * bias - self.y_compressed.sum()) gradient = numpy.concatenate(([grad_intercept], gradient)) return gradient
def _hessian_func(self, w, s): l_plus, xv_plus, l_minus, xv_minus = self._counter.calculate(s) x = self._counter.x xs = numpy.dot(x, s) xs = numexpr.evaluate('(l_plus + l_minus) * xs - xv_plus - xv_minus') if self._has_time: xc = x.compress(self.regr_mask, axis=0) s = s + self._regr_penalty * fast_dot(xc.T, numpy.dot(xc, s)) return s + self._rank_penalty * fast_dot(x.T, xs)
def svm_gradient_batch(X_pred,X_exp,y,X_pred_ids,X_exp_ids,w,C=.0001,sigma=1.): # sample Kernel rnpred = X_pred_ids#sp.random.randint(low=0,high=len(y),size=n_pred_samples) rnexpand = X_exp_ids#sp.random.randint(low=0,high=len(y),size=n_expand_samples) K = GaussKernMini(X_pred.T,X_exp.T,sigma) # compute predictions yhat = fast_dot(K,w[rnexpand]) # compute whether or not prediction is in margin inmargin = (yhat * y[rnpred]) <= 1 # compute gradient G = C * w[rnexpand] - fast_dot((y[rnpred] * inmargin), K) return G,rnexpand
def zca_whitening(self, image, eps): """ N = 1 X = image[:,:].reshape((N, -1)).astype(np.float64) X = check_array(X, dtype=[np.float64], ensure_2d=True, copy=True) # Center data self.mean_ = np.mean(X, axis=0) print(X.shape) X -= self.mean_ U, S, V = linalg.svd(X, full_matrices=False) # flip eigenvectors' sign to enforce deterministic output U, V = svd_flip(U, V) zca_matrix = U.dot(np.diag(1.0/np.sqrt(np.diag(S) + 1))).dot(U.T) #ZCA Whitening matrix return fast_dot(zca_matrix, X).reshape(image.shape) #Data whitening """ image = self.local_contrast_normalization(image) N = 1 X = image.reshape((N, -1)) pca = PCA(whiten=True, svd_solver='full', n_components=X.shape[-1]) transformed = pca.fit_transform(X) # return U pca.whiten = False zca = fast_dot(transformed, pca.components_ + eps) + pca.mean_ # zca = pca.inverse_transform(transformed) return zca.reshape(image.shape)
def _update_coordinate_descent(X, W, Ht, l1_reg, l2_reg, shuffle, random_state): """Helper function for _fit_coordinate_descent Update W to minimize the objective function, iterating once over all coordinates. By symmetry, to update H, one can call _update_coordinate_descent(X.T, Ht, W, ...) """ n_components = Ht.shape[1] HHt = fast_dot(Ht.T, Ht) XHt = safe_sparse_dot(X, Ht) # L2 regularization corresponds to increase of the diagonal of HHt if l2_reg != 0.: # adds l2_reg only on the diagonal HHt.flat[::n_components + 1] += l2_reg # L1 regularization corresponds to decrease of each element of XHt if l1_reg != 0.: XHt -= l1_reg if shuffle: permutation = random_state.permutation(n_components) else: permutation = np.arange(n_components) # The following seems to be required on 64-bit Windows w/ Python 3.5. permutation = np.asarray(permutation, dtype=np.intp) return _update_cdnmf_fast(W, HHt, XHt, permutation)
def sliding_window(image, window_size, step_size): pair = np.mgrid[890:1400:step_size[0], 250:730:step_size[1], 0.0:6.28:5].reshape(3, -1).T for pts in pair: xx = int(pts[0]) yy = int(pts[1]) tt = pts[2] #crop = image[yy-halfy:yy+halfy+1,xx-halfx:xx+halfx+1] crop = np.mgrid[yy - halfy:yy + halfy + 1, xx - halfx:xx + halfx + 1].reshape(2, -1).T crop[:, [0, 1]] = crop[:, [1, 0]] col = crop.shape[0] newp = np.ones((col, 3)) newp[:, :-1] = crop transform = np.array([[ np.cos(tt), -np.sin(tt), -xx * np.cos(tt) + xx + yy * np.sin(tt) ], [np.sin(tt), np.cos(tt), -xx * np.sin(tt) - yy * np.cos(tt) + yy], [0, 0, 1]]) newp = fast_dot(transform, newp.T) newp = newp[0:2, :] newp = np.transpose(newp).astype(int) imx = newp[:, 0] imy = newp[:, 1] newim = np.array(image[imy, imx]).reshape( (window_size[1], window_size[0])) print(xx, yy, tt) newim = read_one_image(newim) #print(xx,yy,newim.shape) yield (xx, yy, tt, newim)
def _gradient_func(self, w): # sum over columns without running into overflow problems # scipy.sparse.spmatrix.sum uses dtype of matrix, which is too small col_sum = numpy.asmatrix(numpy.ones((1, self.Aw.shape[0]), dtype=numpy.int_)) * self.Aw v = numpy.asarray(col_sum).squeeze() z = fast_dot(self.data_x.T, self.Aw.T.dot(self.AXw) - v) return w + self.alpha * z
def get_bmu(self, yn): """Returns the ID of the best matching unit. Best is determined from the cosine similarity of the sample with the normalized Kohonen network. See https://en.wikipedia.org/wiki/Cosine_similarity for cosine similarity documentation. TODO: make possible the finding the second best matching unit Parameters ---------- KN : sparse matrix Shape = [n_nodes, n_features] must be normalized according to l2 norm as used in the sklearn Normalizer() y : vector of dimension 1 x nfeatures Target sample. Returns ------- tuple : (loc, cosine_distance) index of the matching unit, with the corresponding cosine distance """ #d = ((self.K_-y)**2).sum(axis=1) #loc = np.argmin(d) #qe = np.sqrt(d[loc]) similarity = fast_dot(self.KN_, yn.T) loc = np.argmax(similarity) qe = 1/(1.0e-4+similarity[loc])-1 return loc, qe
def _update_delta(self, m, mask=None, drop_diag=False): self.delta_DK_M[m][:, :] = self.alpha * self.beta_M[m] if mask is None and not drop_diag: self.sumE_MK[m, :] = 1. self.delta_DK_M[m][:, :] += self.sumE_MK.prod(axis=0) assert np.isfinite(self.delta_DK_M[m]).all() elif mask is None and drop_diag: assert self.mode_dims[0] == self.mode_dims[1] mask = np.abs(np.identity(self.mode_dims[0]) - 1) if m > 1: tmp = np.zeros(self.n_components) for k in xrange(self.n_components): tmp[k] = (mask * np.outer(self.E_DK_M[0][:, k], self.E_DK_M[1][:, k])).sum() assert tmp.shape == (self.n_components, ) self.sumE_MK[m, :] = 1. else: tmp = np.dot(mask, self.E_DK_M[np.abs(m - 1)]) assert tmp.shape == self.E_DK_M[m].shape self.delta_DK_M[m][:, :] += self.sumE_MK[2:].prod(axis=0) * tmp assert np.isfinite(self.delta_DK_M[m]).all() else: if drop_diag: diag_idx = np.identity(self.mode_dims[0]).astype(bool) assert (mask[diag_idx] == 0).all() tmp = mask.copy() tmp, order = make_first_mode(tmp, m) tmp = fast_dot(tmp, self.E_DK_M[order[-1]]) for i in xrange(self.n_modes - 2, 0, -1): tmp *= self.E_DK_M[order[i]] tmp = tmp.sum(axis=-2) self.delta_DK_M[m][:, :] += tmp assert np.isfinite(self.delta_DK_M[m]).all()
def _beta_divergence_dense(X, W, H, beta): """Compute the beta-divergence of X and W.H for dense array only. Used as a reference for testing nmf._beta_divergence. """ if isinstance(X, numbers.Number): W = np.array([[W]]) H = np.array([[H]]) X = np.array([[X]]) WH = fast_dot(W, H) if beta == 2: return squared_norm(X - WH) / 2 WH_Xnonzero = WH[X != 0] X_nonzero = X[X != 0] np.maximum(WH_Xnonzero, 1e-9, out=WH_Xnonzero) if beta == 1: res = np.sum(X_nonzero * np.log(X_nonzero / WH_Xnonzero)) res += WH.sum() - X.sum() elif beta == 0: div = X_nonzero / WH_Xnonzero res = np.sum(div) - X.size - np.sum(np.log(div)) else: res = (X_nonzero ** beta).sum() res += (beta - 1) * (WH ** beta).sum() res -= beta * (X_nonzero * (WH_Xnonzero ** (beta - 1))).sum() res /= beta * (beta - 1) return res
def _beta_divergence_dense(X, W, H, beta): """Compute the beta-divergence of X and W.H for dense array only. Used as a reference for testing nmf._beta_divergence. """ if isinstance(X, numbers.Number): W = np.array([[W]]) H = np.array([[H]]) X = np.array([[X]]) WH = fast_dot(W, H) if beta == 2: return squared_norm(X - WH) / 2 WH_Xnonzero = WH[X != 0] X_nonzero = X[X != 0] np.maximum(WH_Xnonzero, 1e-9, out=WH_Xnonzero) if beta == 1: res = np.sum(X_nonzero * np.log(X_nonzero / WH_Xnonzero)) res += WH.sum() - X.sum() elif beta == 0: div = X_nonzero / WH_Xnonzero res = np.sum(div) - X.size - np.sum(np.log(div)) else: res = (X_nonzero**beta).sum() res += (beta - 1) * (WH**beta).sum() res -= beta * (X_nonzero * (WH_Xnonzero**(beta - 1))).sum() res /= beta * (beta - 1) return res
def repeated_corr(X, y, dtype=float): """Computes pearson correlations between a vector and a matrix. Adapted from Jona-Sassenhagen's PR #L1772 on mne-python. Parameters ---------- y : np.array, shape (n_samples) Data vector. X : np.array, shape (n_samples, n_measures) Data matrix onto which the vector is correlated. dtype : type, optional Data type used to compute correlation values to optimize memory. Returns ------- rho : np.array, shape (n_measures) """ from sklearn.utils.extmath import fast_dot if X.ndim not in [1, 2] or y.ndim != 1 or X.shape[0] != y.shape[0]: raise ValueError('y must be a vector, and X a matrix with an equal' 'number of rows.') if X.ndim == 1: X = X[:, None] y -= np.array(y.mean(0), dtype=dtype) X -= np.array(X.mean(0), dtype=dtype) y_sd = y.std(0, ddof=1) X_sd = X.std(0, ddof=1)[:, None if y.shape == X.shape else Ellipsis] return (fast_dot(y.T, X) / float(len(y) - 1)) / (y_sd * X_sd)
def _ica_def(X, tol, g, fun_args, max_iter, w_init): """Deflationary FastICA using fun approx to neg-entropy function Used internally by FastICA. """ n_components = w_init.shape[0] W = np.zeros((n_components, n_components), dtype=X.dtype) n_iter = [] # j is the index of the extracted component for j in range(n_components): w = w_init[j, :].copy() w /= np.sqrt((w ** 2).sum()) for i in moves.xrange(max_iter): gwtx, g_wtx = g(fast_dot(w.T, X), fun_args) w1 = (X * gwtx).mean(axis=1) - g_wtx.mean() * w _gs_decorrelation(w1, W, j) w1 /= np.sqrt((w1 ** 2).sum()) lim = np.abs(np.abs((w1 * w).sum()) - 1) w = w1 if lim < tol: break n_iter.append(i + 1) W[j, :] = w return W, max(n_iter)
def _update_delta(self, m, mask=None, drop_diag=False): self.delta_DK_M[m][:, :] = self.alpha * self.beta_M[m] if mask is None and not drop_diag: self.sumE_MK[m, :] = 1. self.delta_DK_M[m][:, :] += self.sumE_MK.prod(axis=0) assert np.isfinite(self.delta_DK_M[m]).all() elif mask is None and drop_diag: assert self.mode_dims[0] == self.mode_dims[1] mask = np.abs(np.identity(self.mode_dims[0]) - 1) if m > 1: tmp = np.zeros(self.n_components) for k in xrange(self.n_components): tmp[k] = (mask * np.outer(self.E_DK_M[0][:, k], self.E_DK_M[1][:, k])).sum() assert tmp.shape == (self.n_components,) self.sumE_MK[m, :] = 1. else: tmp = np.dot(mask, self.E_DK_M[np.abs(m-1)]) assert tmp.shape == self.E_DK_M[m].shape self.delta_DK_M[m][:, :] += self.sumE_MK[2:].prod(axis=0) * tmp assert np.isfinite(self.delta_DK_M[m]).all() else: if drop_diag: diag_idx = np.identity(self.mode_dims[0]).astype(bool) assert (mask[diag_idx] == 0).all() tmp = mask.copy() tmp, order = make_first_mode(tmp, m) tmp = fast_dot(tmp, self.E_DK_M[order[-1]]) for i in xrange(self.n_modes - 2, 0, -1): tmp *= self.E_DK_M[order[i]] tmp = tmp.sum(axis=-2) self.delta_DK_M[m][:, :] += tmp assert np.isfinite(self.delta_DK_M[m]).all()
def _gradient_func(self, w): if self._last_w is not None and (w == self._last_w).all(): return self._last_gradient l_plus, xv_plus, l_minus, xv_minus = self._counter.calculate(w) x = self._counter.x xs = numpy.dot(x, w) z = numexpr.evaluate( '(l_plus + l_minus) * xs - xv_plus - xv_minus - l_minus + l_plus') if self._has_time: xc = x.compress(self.regr_mask, axis=0) w = w + self._regr_penalty * (fast_dot(xc.T, numpy.dot(xc, w)) - fast_dot(xc.T, self.y_compressed)) self._last_gradient = w + self._rank_penalty * fast_dot(x.T, z) self._last_w = w return self._last_gradient
def applyPCA(sampleData, mean, components): pca = PCA(n_components=components.shape[0]) pca.components_ = components pca.mean_ = mean transform = pca.transform(np.array([sampleData])) reconstructed = fast_dot(transform, pca.components_) + pca.mean_ reconstructed = reconstructed[0] return sampleData / reconstructed
def transform(self, mf): if self.sparse: block = safe_sparse_dot(mf.as_csr_matrix(), self.rot) else: X = mf.as_np_array() if self.means is not None: X = X - self.means block = fast_dot(X, self.rot) / np.sqrt(self.explained_variance) mf.select_columns([]) # clear the multiframe mf.append_np_block("pca_features", block, self.output_names) return mf
def _gradient_func(self, w): bias, wf = self._split_coefficents(w) l_plus, xv_plus, l_minus, xv_minus = self._counter.calculate(wf) x = self._counter.x xw = self._xw z = numexpr.evaluate('(l_plus + l_minus) * xw - xv_plus - xv_minus - l_minus + l_plus') grad = wf + self._rank_penalty * fast_dot(x.T, z) if self._has_time: xc = x.compress(self.regr_mask, axis=0) xcs = numpy.dot(xc, wf) grad += self._regr_penalty * (fast_dot(xc.T, xcs) + xc.sum(axis=0) * bias - fast_dot(xc.T, self.y_compressed)) # intercept if self._fit_intercept: grad_intercept = self._regr_penalty * (xcs.sum() + xc.shape[0] * bias - self.y_compressed.sum()) grad = numpy.concatenate(([grad_intercept], grad)) return grad
def get_bmu(kn, yn, epsilon=1.0e-6): """Returns the ID of the best matching unit. Best is determined from the cosine similarity of the sample with the normalized Kohonen network. See https://en.wikipedia.org/wiki/Cosine_similarity for cosine similarity documentation. """ similarity = fast_dot(kn, yn.T) loc = np.argmax(similarity) qe = 1 / (epsilon + similarity[loc]) - 1 return loc, qe
def inverse_transform(self, X): """Transform data back to its original space, i.e., return an input X_original whose transform would be X Parameters ---------- X : array-like, shape (n_samples, n_components) New data, where n_samples is the number of samples and n_components is the number of components. Returns ------- X_original array-like, shape (n_samples, n_features) """ check_is_fitted(self, 'mean_') if self.whiten: return fast_dot( X, np.sqrt(self.explained_variance_[:, np.newaxis]) * self.components_) + self.mean_ else: return fast_dot(X, self.components_) + self.mean_
def _special_dot_X(W, H, X): """Computes np.dot(W, H) in a special way: - If X is sparse, np.dot(W, H) is computed only where X is non zero, and a sparse matrix is returned, with the same sparsity as X. - If X is masked, np.dot(W, H) is computed entirely, and a masked array is returned, with the same mask as X. - If X is dense, np.dot(W, H) is computed entirely, and returned as a dense array. """ if sp.issparse(X): ii, jj = X.nonzero() dot_vals = np.multiply(W[ii, :], H.T[jj, :]).sum(axis=1) WH = sp.coo_matrix((dot_vals, (ii, jj)), shape=X.shape) return WH.tocsr() elif isinstance(X, np.ma.masked_array): WH = np.ma.masked_array(fast_dot(W, H), mask=X.mask) WH.unshare_mask() return WH else: return fast_dot(W, H)
def _gradient_func(self, beta_bias): bias, beta = self._split_coefficents(beta_bias) K = self._counter.x Kw = self._Kw l_plus, xv_plus, l_minus, xv_minus = self._counter.calculate(beta) z = numexpr.evaluate('(l_plus + l_minus) * Kw - xv_plus - xv_minus - l_minus + l_plus') gradient = Kw + self._rank_penalty * fast_dot(K, z) if self._has_time: K_comp = K.compress(self.regr_mask, axis=0) K_comp_beta = numpy.dot(K_comp, beta) gradient += self._regr_penalty * (fast_dot(K_comp.T, K_comp_beta) + K_comp.sum(axis=0) * bias - fast_dot(K_comp.T, self.y_compressed)) # intercept if self._fit_intercept: grad_intercept = self._regr_penalty * (K_comp_beta.sum() + K_comp.shape[0] * bias - self.y_compressed.sum()) gradient = numpy.concatenate(([grad_intercept], gradient)) return gradient
def GaussKernMini_fast(X1,X2,sigma): if sp.sparse.issparse(X1): G = sp.outer(X1.multiply(X1).sum(axis=0),sp.ones(X2.shape[1])) else: G = sp.outer((X1 * X1).sum(axis=0),sp.ones(X2.shape[1])) if sp.sparse.issparse(X2): H = sp.outer(X2.multiply(X2).sum(axis=0),sp.ones(X1.shape[1])) else: H = sp.outer((X2 * X2).sum(axis=0),sp.ones(X1.shape[1])) K = sp.exp(-(G + H.T - 2.*fast_dot(X1.T,X2))/(2.*sigma**2)) # K = sp.exp(-(G + H.T - 2.*(X1.T.dot(X2)))/(2.*sigma**2)) if sp.sparse.issparse(X1) | sp.sparse.issparse(X2): K = sp.array(K) return K
def _hessian_func(self, w, s): s_bias, s_feat = self._split_coefficents(s) l_plus, xv_plus, l_minus, xv_minus = self._counter.calculate(s_feat) x = self._counter.x xs = numpy.dot(x, s_feat) xs = numexpr.evaluate('(l_plus + l_minus) * xs - xv_plus - xv_minus') hessp = s_feat + self._rank_penalty * fast_dot(x.T, xs) if self._has_time: xc = x.compress(self.regr_mask, axis=0) hessp += self._regr_penalty * fast_dot(xc.T, numpy.dot(xc, s_feat)) # intercept if self._fit_intercept: xsum = xc.sum(axis=0) hessp += self._regr_penalty * xsum * s_bias hessp_intercept = self._regr_penalty * xc.shape[0] * s_bias + self._regr_penalty * numpy.dot(xsum, s_feat) hessp = numpy.concatenate(([hessp_intercept], hessp)) return hessp
def transform(self, subspace, feats): if self.debug: print('-- coding features ...') sys.stdout.flush() feats -= subspace['mean'] transformed_feats = fast_dot(feats, subspace['components'].T) # transformed_feats /= np.sqrt(subspace['explained_variance']) return transformed_feats
def fit_transform(self, X, y=None): """Fit the model with X and apply the dimensionality reduction on X. Parameters ---------- X : array-like, shape (n_samples, n_features) New data, where n_samples in the number of samples and n_features is the number of features. Returns ------- X_new : array-like, shape (n_samples, n_components) """ X = check_array(X) X = self._fit(X) return fast_dot(X, self.components_.T)
def _hessian_func(self, beta, s): s_bias, s_feat = self._split_coefficents(s) K = self._counter.x Ks = numpy.dot(K, s_feat) l_plus, xv_plus, l_minus, xv_minus = self._counter.calculate(s_feat) xs = numexpr.evaluate('(l_plus + l_minus) * Ks - xv_plus - xv_minus') hessian = Ks + self._rank_penalty * fast_dot(K, xs) if self._has_time: K_comp = K.compress(self.regr_mask, axis=0) hessian += self._regr_penalty * fast_dot(K_comp.T, numpy.dot(K_comp, s_feat)) # intercept if self._fit_intercept: xsum = K_comp.sum(axis=0) hessian += self._regr_penalty * xsum * s_bias hessian_intercept = self._regr_penalty * K_comp.shape[0] * s_bias \ + self._regr_penalty * numpy.dot(xsum, s_feat) hessian = numpy.concatenate(([hessian_intercept], hessian)) return hessian
def transform_PCA(pca, k, X): X_reduced = X - pca.mean_ X_reduced = fast_dot(X_reduced, pca.components_[0:k].T) # Transform test data with principal components: #X_reduced = pca.transform(test_X) # Reconstruct: X_rec = np.dot(X_reduced, pca.components_[0:k]) # Restore mean: X_rec += pca.mean_ print "Variance Explained: {}".format(np.sum(pca.explained_variance_ratio_[:k])) return X_reduced, X_rec
def nmf_predict_direct(rate_matrix,user_distribution,item_distribution,user_ids_list,item_ids_list,top_n,fout_str): fout = open(fout_str,'w') #method 1 : w*h for u_ix,u in enumerate(user_distribution): predict_vec = fast_dot(u,item_distribution) filter_vec = np.where(rate_matrix.getrow(u_ix).toarray()>0,0,1) predict_vec = predict_vec * filter_vec sort_ix_vec = np.argpartition(-predict_vec[0],top_n)[:top_n] candidate_item_list = list() for i_ix in sort_ix_vec: item_id = item_ids_list[i_ix] candidate_item_list.append(item_id) user_id = user_ids_list[u_ix] print >> fout,'%s,%s' %(user_id,'#'.join(candidate_item_list)) fout.close()
def fit_transform(self, X, y=None): """Fit the model with X and apply the dimensionality reduction on X. Parameters ---------- X : array-like, shape (n_samples, n_features) Training data, where n_samples is the number of samples and n_features is the number of features. Returns ------- X_new : array-like, shape (n_samples, n_components) """ self._fit(X) if self.copy and self.center_ is not None: X = X - self.center_ return fast_dot(X, self.components_.T)
def _reorth(basis, target, rows=None, alpha=0.5): """Reorthogonalize a vector using iterated Gram-Schmidt Parameters ---------- basis: ndarray, shape (n_features, n_basis) The matrix whose rows are a set of basis to reorthogonalize against target: ndarray, shape (n_features,) The target vector to be reorthogonalized rows: {array-like, None}, default None Indices of rows from basis to use. Use all if None alpha: float, default 0.5 Parameter for determining whether to do a second reorthogonalization. Returns ------- reorthed_target: ndarray, shape (n_features,) The reorthogonalized vector """ if rows is not None: basis = basis[rows] norm_target = norm(target) norm_target_old = 0 n_reorth = 0 while norm_target < alpha * norm_target_old or n_reorth == 0: for row in basis: t = fast_dot(row, target) target = target - t * row norm_target_old = norm_target norm_target = norm(target) n_reorth += 1 if n_reorth > 4: # target in span(basis) => accpet target = 0 target = np.zeros(basis.shape[0]) break return target
def repeated_corr(X, y, dtype=float): """Computes pearson correlations between a vector and a matrix. Adapted from Jona-Sassenhagen's PR #L1772 on mne-python. Parameters ---------- X : np.array, shape (n_samples, n_measures) Data matrix onto which the vector is correlated. y : np.array, shape (n_samples) Data vector. dtype : type, optional Data type used to compute correlation values to optimize memory. Returns ------- rho : np.array, shape (n_measures) """ from sklearn.utils.extmath import fast_dot if not isinstance(X, np.ndarray): X = np.array(X) if X.ndim == 1: X = X[:, None] shape = X.shape X = np.reshape(X, [shape[0], -1]) if X.ndim not in [1, 2] or y.ndim != 1 or X.shape[0] != y.shape[0]: raise ValueError('y must be a vector, and X a matrix with an equal' 'number of rows.') if X.ndim == 1: X = X[:, None] ym = np.array(y.mean(0), dtype=dtype) Xm = np.array(X.mean(0), dtype=dtype) y -= ym X -= Xm y_sd = y.std(0, ddof=1) X_sd = X.std(0, ddof=1)[:, None if y.shape == X.shape else Ellipsis] R = (fast_dot(y.T, X) / float(len(y) - 1)) / (y_sd * X_sd) R = np.reshape(R, shape[1:]) # cleanup variable changed in place y += ym X += Xm return R
def inverse_transform(self, X): """Transform data back to its original space, i.e., return an input X_original whose transform would be X Parameters ---------- X : array-like, shape (n_samples, n_components) New data, where n_samples is the number of samples and n_components is the number of components. Returns ------- X_original: array-like, shape (n_samples, n_features) """ check_is_fitted(self, 'center_') X_original = fast_dot(X, self.components_) if self.center_ is not None: X_original = X_original + self.center_ return X_original
def inverse_transform(self, X): """Transform data back to its original space, i.e., return an input X_original whose transform would be X Parameters ---------- X : array-like, shape (n_samples, n_components) New data, where n_samples is the number of samples and n_components is the number of components. Returns ------- X_original array-like, shape (n_samples, n_features) Notes ----- If whitening is enabled, inverse_transform does not compute the exact inverse operation as transform. """ return fast_dot(X, self.components_) + self.mean_
def transform(self, X, y=None): """Apply dimensionality reduction on X. X is projected on the first principal components previous extracted from a training set. Parameters ---------- X : array-like, shape (n_samples, n_features) New data, where n_samples in the number of samples and n_features is the number of features. Returns ------- X_new : array-like, shape (n_samples, n_components) """ check_is_fitted(self, 'mean_') X = check_array(X) if self.mean_ is not None: X = X - self.mean_ X = fast_dot(X, self.components_.T) return X
def _bound(self, mask=None, drop_diag=False): """Evidence Lower Bound (ELBO)""" if mask is None and not drop_diag: B = self.sumE_MK.prod(axis=0).sum() elif mask is None and drop_diag: assert self.mode_dims[0] == self.mode_dims[1] mask = np.abs(np.identity(self.mode_dims[0]) - 1) tmp = np.zeros(self.n_components) for k in xrange(self.n_components): tmp[k] = (mask * np.outer(self.E_DK_M[0][:, k], self.E_DK_M[1][:, k])).sum() B = (self.sumE_MK[2:].prod(axis=0) * tmp).sum() else: tmp = mask.copy() tmp = fast_dot(tmp, self.E_DK_M[-1]) for i in xrange(self.n_modes - 2, 0, -1): tmp *= self.E_DK_M[i] tmp = tmp.sum(axis=-2) assert tmp.shape == self.E_DK_M[0].shape B = (tmp * self.E_DK_M[0]).sum() B -= np.log(self.sparse_data + 1).sum() B += (self.sparse_data * np.log(self._sparse_zeta())).sum() K = self.n_components for m in xrange(self.n_modes): D = self.mode_dims[m] B += (self.alpha - 1.) * (np.log(self.G_DK_M[m]).sum()) B -= (self.alpha * self.beta_M[m]) * (self.sumE_MK[m, :].sum()) B -= K * D * (sp.gammaln(self.alpha) - self.alpha * np.log(self.alpha * self.beta_M[m])) gamma_DK = self.gamma_DK_M[m] delta_DK = self.delta_DK_M[m] B += (-(gamma_DK - 1.) * sp.psi(gamma_DK) - np.log(delta_DK) + gamma_DK + sp.gammaln(gamma_DK)).sum() return B
def inverse_transform(self, X, y=None): """Transform data back to its original space. Returns an array X_original whose transform would be X. Parameters ---------- X : array-like, shape (n_samples, n_components) New data, where n_samples in the number of samples and n_components is the number of components. Returns ------- X_original array-like, shape (n_samples, n_features) Notes ----- If whitening is enabled, inverse_transform does not compute the exact inverse operation of transform. """ check_is_fitted(self, 'mean_') X_original = fast_dot(X, self.components_) if self.mean_ is not None: X_original = X_original + self.mean_ return X_original
def transform(self, X): """Apply the dimensionality reduction on X. X is projected on the first principal components previous extracted from a training set. Parameters ---------- X : array-like, shape (n_samples, n_features) New data, where n_samples is the number of samples and n_features is the number of features. Returns ------- X_new : array-like, shape (n_samples, n_components) """ X = array2d(X) if self.mean_ is not None: X = X - self.mean_ X_transformed = fast_dot(X, self.components_.T) return X_transformed
def _bound(self, mask=None, drop_diag=False): """Evidence Lower Bound (ELBO)""" if mask is None and not drop_diag: B = self.sumE_MK.prod(axis=0).sum() elif mask is None and drop_diag: assert self.mode_dims[0] == self.mode_dims[1] mask = np.abs(np.identity(self.mode_dims[0]) - 1) tmp = np.zeros(self.n_components) for k in xrange(self.n_components): tmp[k] = (mask * np.outer(self.E_DK_M[0][:, k], self.E_DK_M[1][:, k])).sum() B = (self.sumE_MK[2:].prod(axis=0) * tmp).sum() else: tmp = mask.copy() tmp = fast_dot(tmp, self.E_DK_M[-1]) for i in xrange(self.n_modes - 2, 0, -1): tmp *= self.E_DK_M[i] tmp = tmp.sum(axis=-2) assert tmp.shape == self.E_DK_M[0].shape B = (tmp * self.E_DK_M[0]).sum() B -= np.log(self.sparse_data + 1).sum() B += (self.sparse_data * np.log(self._sparse_zeta())).sum() K = self.n_components for m in xrange(self.n_modes): D = self.mode_dims[m] B += (self.alpha - 1.) * (np.log(self.G_DK_M[m]).sum()) B -= (self.alpha * self.beta_M[m])*(self.sumE_MK[m, :].sum()) B -= K*D*(sp.gammaln(self.alpha) - self.alpha*np.log(self.alpha * self.beta_M[m])) gamma_DK = self.gamma_DK_M[m] delta_DK = self.delta_DK_M[m] B += (-(gamma_DK - 1.)*sp.psi(gamma_DK) - np.log(delta_DK) + gamma_DK + sp.gammaln(gamma_DK)).sum() return B
def inverse_transform(self, X, copy=True): """Transform the sources back to the mixed data (apply mixing matrix). Parameters ---------- X : array-like, shape (n_samples, n_components) Sources, where n_samples is the number of samples and n_components is the number of components. copy : bool (optional) If False, data passed to fit are overwritten. Defaults to True. Returns ------- X_new : array-like, shape (n_samples, n_features) """ check_is_fitted(self, 'mixing_') ####X = check_array(X, copy=(copy and self.whiten), dtype=FLOAT_DTYPES) X = fast_dot(X, self.mixing_.T) if self.whiten: X += self.mean_ return X
def transform(self, X, y=None): """Apply dimensionality reduction on X. X is projected on the principal components previous extracted from a training set. Parameters ---------- X : array-like, shape (n_samples, n_features) New data, where n_samples in the number of samples and n_features is the number of features. Returns ------- X_transformed : array-like, shape (n_samples, n_components) """ check_is_fitted(self, 'center_') X = check_array(X) if self.center_ is not None: X = X - self.center_ X_transformed = fast_dot(X, self.components_.T) return X_transformed
def test_fast_dot(): """Check fast dot blas wrapper function""" if fast_dot is np.dot: return rng = np.random.RandomState(42) A = rng.random_sample([2, 10]) B = rng.random_sample([2, 10]) try: linalg.get_blas_funcs(['gemm'])[0] has_blas = True except (AttributeError, ValueError): has_blas = False if has_blas: # Test _fast_dot for invalid input. # Maltyped data. for dt1, dt2 in [['f8', 'f4'], ['i4', 'i4']]: assert_raises(ValueError, _fast_dot, A.astype(dt1), B.astype(dt2).T) # Malformed data. ## ndim == 0 E = np.empty(0) assert_raises(ValueError, _fast_dot, E, E) ## ndim == 1 assert_raises(ValueError, _fast_dot, A, A[0]) ## ndim > 2 assert_raises(ValueError, _fast_dot, A.T, np.array([A, A])) ## min(shape) == 1 assert_raises(ValueError, _fast_dot, A, A[0, :][None, :]) # test for matrix mismatch error assert_raises(ValueError, _fast_dot, A, A) # Test cov-like use case + dtypes. for dtype in ['f8', 'f4']: A = A.astype(dtype) B = B.astype(dtype) # col < row C = np.dot(A.T, A) C_ = fast_dot(A.T, A) assert_almost_equal(C, C_, decimal=5) C = np.dot(A.T, B) C_ = fast_dot(A.T, B) assert_almost_equal(C, C_, decimal=5) C = np.dot(A, B.T) C_ = fast_dot(A, B.T) assert_almost_equal(C, C_, decimal=5) # Test square matrix * rectangular use case. A = rng.random_sample([2, 2]) for dtype in ['f8', 'f4']: A = A.astype(dtype) B = B.astype(dtype) C = np.dot(A, B) C_ = fast_dot(A, B) assert_almost_equal(C, C_, decimal=5) C = np.dot(A.T, B) C_ = fast_dot(A.T, B) assert_almost_equal(C, C_, decimal=5) if has_blas: for x in [np.array([[d] * 10] * 2) for d in [np.inf, np.nan]]: assert_raises(ValueError, _fast_dot, x, x.T)
A = rng.random_sample([2, 10]) B = rng.random_sample([2, 10]) try: linalg.get_blas_funcs('gemm') has_blas = True except AttributeError, ValueError: has_blas = False if has_blas: # test dispatch to np.dot with warnings.catch_warnings(record=True) as w: warnings.simplefilter('always', NonBLASDotWarning) # maltyped data for dt1, dt2 in [['f8', 'f4'], ['i4', 'i4']]: fast_dot(A.astype(dt1), B.astype(dt2).T) assert_true(isinstance(w.pop(-1).message, NonBLASDotWarning)) # malformed data # ndim == 0 E = np.empty(0) fast_dot(E, E) assert_true(isinstance(w.pop(-1).message, NonBLASDotWarning)) ## ndim == 1 fast_dot(A, A[0]) assert_true(isinstance(w.pop(-1).message, NonBLASDotWarning)) ## ndim > 2 fast_dot(A.T, np.array([A, A])) assert_true(isinstance(w.pop(-1).message, NonBLASDotWarning)) ## min(shape) == 1 assert_raises(ValueError, fast_dot, A, A[0, :][None, :]) # test for matrix mismatch error
def pca_biplot_with_clustering(data_matrix, feature_labels, mean_normalize = False, k_means_post = True, K = 5, n_components=2, f_out = "foo"): """ Inputs: data_matrix: numpy array of shape n x f where n is the number of samples and f is the number of features (variables) feature_labels: the list of human-interpretable labels for the rows in data_matrix. Used for the biplot mean_normalize: if true, each row i of data_matrix is replaced with i - mean(i) k_means_post: if True, k means is run after the PCA analysis on the projected data matrix. I.e., in the reduced space. if False, it is run before the PCA, and before the mean_normalization, and then the clusters are projected into the space. n_components: the number of PCA vectors you want. f_out: the path to write the graph (as PDF) to TODO: the graph currently only supports n=2. Outputs: r_loadings: the loading vectors Side Effects: file created on disk at f_out References: http://scikit-learn.org/stable/modules/generated/sklearn.decomposition.PCA.html https://github.com/teddyroland/python-biplot/blob/master/biplot.py http://stackoverflow.com/questions/21217710/factor-loadings-using-sklearn http://stackoverflow.com/questions/14716965/r-principle-component-analysis-label-of-component http://nxn.se/post/36838219245/loadings-with-scikit-learn-pca """ data_matrix = np.nan_to_num(data_matrix) #clustering does not allow infs, nans if not k_means_post: centroids, labels = clustering.kmpp(data_matrix, K) if mean_normalize: for iindex, i in enumerate(data_matrix): data_matrix[iindex] = i - np.mean(i) rows, variables = np.shape(data_matrix) pca = PCA(n_components=n_components) pca.fit(data_matrix) r_loadings = np.matrix.transpose(pca.components_) #components is n_components x f; transpase to get rows as features, like R does it transformed_matrix = fast_dot(data_matrix, r_loadings) loading_vectors = [] loading_labels = [] for i in range(0, variables): loading_vectors.append((list(r_loadings[i]))) loading_labels.append(str(feature_labels[i])) if k_means_post: centroids, labels = clustering.kmpp(transformed_matrix, K) #do the plot fig, ax = plt.subplots() for l in range(0, K): transformed_matrix_i = [transformed_matrix[i] for i in range(0, rows) if labels[i] == l] ax.scatter([x[0] for x in transformed_matrix_i], [x[1] for x in transformed_matrix_i], color = _colors[l % len(_colors)], label = "cluster" + str(l)) pxs = [i[0] for i in loading_vectors] pys = [i[1] for i in loading_vectors] for xindex, x in enumerate(pxs): ax.arrow(0, 0, pxs[xindex], pys[xindex], linewidth=3, width=0.0005, head_width=0.0025, color="r", label = "loading") ax.arrow(0, 0, pxs[xindex]*5, pys[xindex]*5, alpha = .5, linewidth = 1, linestyle="dashed", width=0.0005, head_width=0.0025, color="r") ax.text(pxs[xindex]*5, pys[xindex]*5, loading_labels[xindex], color="r") ax.legend(loc='best') x0,x1 = ax.get_xlim() y0,y1 = ax.get_ylim() ax.set_xlim(min(x0, y0), max(x1,y1)) #make it square ax.set_ylim(min(x0, y0), max(x1,y1)) ax.set_xlabel("PCA[0]") ax.set_ylabel("PCA[1]") ax.set_title("", fontsize = 20) ax.grid(b=True, which='major', color='k', linestyle='--') fig.savefig(f_out + "{0}{1}{2}".format(K, "_meannormalized" if mean_normalize else "", "_kmpost" if k_means_post else "_kmfirst") + ".pdf", format="pdf") plt.close() #THIS IS CRUCIAL SEE: http://stackoverflow.com/questions/26132693/matplotlib-saving-state-between-different-uses-of-io-bytesio return r_loadings
def _multiplicative_update_h(X, W, H, beta_loss, l1_reg_H, l2_reg_H, gamma): """update H in Multiplicative Update NMF""" X_mask = X.mask if isinstance(X, np.ma.masked_array) else False if beta_loss == 2: if X_mask is False: numerator = safe_sparse_dot(W.T, X) denominator = fast_dot(fast_dot(W.T, W), H) else: numerator = _safe_dot(W.T, X) WH = _special_dot_X(W, H, X) denominator = _safe_dot(W.T, WH) else: # Numerator WH_safe_X = _special_dot_X(W, H, X) if sp.issparse(X): WH_safe_X_data = WH_safe_X.data X_data = X.data else: WH_safe_X_data = WH_safe_X X_data = X # copy used in the Denominator WH = WH_safe_X.copy() if beta_loss - 1. < 0: WH[np.logical_and(WH == 0, ~X_mask)] = EPSILON # to avoid division by zero if beta_loss - 2. < 0: WH_safe_X_data[ np.logical_and(WH_safe_X_data == 0, ~X_mask)] = EPSILON if beta_loss == 1: # to work around spurious warnings coming out of masked arrays with np.errstate(invalid='ignore'): np.divide(X_data, WH_safe_X_data, out=WH_safe_X_data) else: WH_safe_X_data **= beta_loss - 2 # element-wise multiplication WH_safe_X_data *= X_data # here numerator = dot(W.T, (dot(W, H) ** (beta_loss - 2)) * X) numerator = _safe_dot(W.T, WH_safe_X) # Denominator if beta_loss == 1: if X_mask is False: W_sum = np.sum(W, axis=0) # shape(n_components, ) W_sum = W_sum[:, np.newaxis] else: W_sum = np.dot(W.T, ~X_mask) W_sum[W_sum == 0] = 1. denominator = W_sum else: # computation of WtWH = dot(W.T, dot(W, H) ** beta_loss - 1) if sp.issparse(X): # memory efficient computation # (compute column by column, avoiding the dense matrix WH) WtWH = np.empty(H.shape) for i in range(X.shape[1]): WHi = fast_dot(W, H[:, i]) if beta_loss - 1 < 0: WHi[WHi == 0] = EPSILON WHi **= beta_loss - 1 WtWH[:, i] = fast_dot(W.T, WHi) else: WH **= beta_loss - 1 WtWH = _safe_dot(W.T, WH) denominator = WtWH # Add L1 and L2 regularization if l1_reg_H > 0: denominator += l1_reg_H if l2_reg_H > 0: denominator = denominator + l2_reg_H * H denominator[denominator == 0] = EPSILON numerator /= denominator delta_H = numerator # gamma is in ]0, 1] if gamma != 1: delta_H **= gamma return delta_H