def newton(X, y, max_iter=50, tol=1e-8, family='logistic', **kwargs): """Newtons Method for Logistic Regression. Parameters ---------- X : array-like, shape (n_samples, n_features) y : array-like, shape (n_samples,) max_iter : int maximum number of iterations to attempt before declaring failure to converge tol : float Maximum allowed change from prior iteration required to declare convergence family : Family Returns ------- beta : array-like, shape (n_features,) """ family = Family.get(family) gradient, hessian = family.gradient, family.hessian n, p = X.shape beta = np.zeros(p) # always init to zeros? Xbeta = dot(X, beta) iter_count = 0 converged = False while not converged: beta_old = beta # should this use map_blocks()? hess = hessian(Xbeta, X) grad = gradient(Xbeta, X, y) hess, grad = da.compute(hess, grad) # should this be dask or numpy? # currently uses Python 3 specific syntax step, _, _, _ = np.linalg.lstsq(hess, grad) beta = (beta_old - step) iter_count += 1 # should change this criterion coef_change = np.absolute(beta_old - beta) converged = ( (not np.any(coef_change > tol)) or (iter_count > max_iter)) if not converged: Xbeta = dot(X, beta) # numpy -> dask converstion of beta return beta
def test_dot_with_sparse(): A = sparse.random((1024, 64)) B = sparse.random((64)) ans = sparse.dot(A, B) # dot(sparse.array, sparse.array) res = utils.dot(A, B) assert_eq(ans, res) # dot(sparse.array, dask.array) res = utils.dot(A, da.from_array(B, chunks=B.shape)) assert_eq(ans, res.compute()) # dot(dask.array, sparse.array) res = utils.dot(da.from_array(A, chunks=A.shape), B) assert_eq(ans, res.compute())
def test_dot_with_cupy(): cupy = pytest.importorskip('cupy') # dot(cupy.array, cupy.array) A = cupy.random.rand(100, 100) B = cupy.random.rand(100) ans = cupy.dot(A, B) res = utils.dot(A, B) assert_eq(ans, res) # dot(dask.array, cupy.array) dA = da.from_array(A, chunks=(10, 100)) res = utils.dot(dA, B).compute() assert_eq(ans, res) # dot(cupy.array, dask.array) dB = da.from_array(B, chunks=(10)) res = utils.dot(A, dB).compute() assert_eq(ans, res)
def loglikelihood(self, Xbeta, y): """ Evaluate the logistic loglikelihood Parameters ---------- Xbeta : array, shape (n_samples, n_features) y : array, shape (n_samples) """ enXbeta = exp(-Xbeta) return (Xbeta + log1p(enXbeta)).sum() - dot(y, Xbeta)
def decision_function(self, X): """Predict confidence scores for samples in X. Parameters ---------- X : array-like, shape = [n_samples, n_features] Returns ------- T : array-like, shape = [n_samples, n_classes] The confidence score of the sample for each class in the model. """ X_ = self._check_array(X) return dot(X_, self._coef)
def predict(self, X): """Predict count for samples in X. Parameters ---------- X : array-like, shape = [n_samples, n_features] Returns ------- C : array, shape = [n_samples,] Predicted count for each sample """ X_ = self._check_array(X) return exp(dot(X_, self._coef))
def decision_function(self, X): """Predict confidence scores for samples in X. Parameters ---------- X : array-like, shape = [n_samples, n_features] Returns ------- S : array-like, shape = [n_samples,] Confidence scores for each sample. """ X_ = self._check_array(X) return dot(X_, self._coef)
def predict_proba(self, X): """Probability estimates for samples in X. Parameters ---------- X : array-like, shape = [n_samples, n_features] Returns ------- T : array-like, shape = [n_samples, n_classes] The probability of the sample for each class in the model. """ X_ = self._check_array(X) return sigmoid(dot(X_, self._coef))
def newton(X, y, max_iter=50, tol=1e-8, family=Logistic): '''Newtons Method for Logistic Regression.''' gradient, hessian = family.gradient, family.hessian n, p = X.shape beta = np.zeros(p) # always init to zeros? Xbeta = dot(X, beta) iter_count = 0 converged = False while not converged: beta_old = beta # should this use map_blocks()? hess = hessian(Xbeta, X) grad = gradient(Xbeta, X, y) hess, grad = da.compute(hess, grad) # should this be dask or numpy? # currently uses Python 3 specific syntax step, _, _, _ = np.linalg.lstsq(hess, grad) beta = (beta_old - step) iter_count += 1 # should change this criterion coef_change = np.absolute(beta_old - beta) converged = ((not np.any(coef_change > tol)) or (iter_count > max_iter)) if not converged: Xbeta = dot(X, beta) # numpy -> dask converstion of beta return beta
def gradient(self, Xbeta, X, y): eXbeta = exp(Xbeta) return dot(X.T, eXbeta - y)
def hessian(self, Xbeta, X): return 2 * dot(X.T, X)
def gradient(self, Xbeta, X, y): return 2 * dot(X.T, Xbeta) - 2 * dot(X.T, y)
def hessian(self, Xbeta, X): """Logistic hessian""" p = sigmoid(Xbeta) return dot(p * (1 - p) * X.T, X)
def gradient(self, Xbeta, X, y): """Logistic gradient""" p = sigmoid(Xbeta) return dot(X.T, p - y)
def gradient(Xbeta, X, y): p = sigmoid(Xbeta) return dot(X.T, p - y)
def hessian(self, Xbeta, X): eXbeta = exp(Xbeta) x_diag_eXbeta = eXbeta[:, None] * X return dot(X.T, x_diag_eXbeta)
def hessian(Xbeta, X): p = sigmoid(Xbeta) return dot(p * (1 - p) * X.T, X)
def bfgs(X, y, max_iter=500, tol=1e-14, family=Logistic): '''Simple implementation of BFGS.''' n, p = X.shape y = y.squeeze() recalcRate = 10 stepSize = 1.0 armijoMult = 1e-4 backtrackMult = 0.5 stepGrowth = 1.25 beta = np.zeros(p) Hk = np.eye(p) for k in range(max_iter): if k % recalcRate == 0: Xbeta = X.dot(beta) eXbeta = exp(Xbeta) func = log1p(eXbeta).sum() - dot(y, Xbeta) e1 = eXbeta + 1.0 gradient = dot(X.T, eXbeta / e1 - y) # implicit numpy -> dask conversion if k: yk = yk + gradient # TODO: gradient is dasky and yk is numpy-y rhok = 1 / yk.dot(sk) adj = np.eye(p) - rhok * dot(sk, yk.T) Hk = dot(adj, dot(Hk, adj.T)) + rhok * dot(sk, sk.T) step = dot(Hk, gradient) steplen = dot(step, gradient) Xstep = dot(X, step) # backtracking line search lf = func old_Xbeta = Xbeta stepSize, _, _, func = compute_stepsize_dask( beta, step, Xbeta, Xstep, y, func, family=family, backtrackMult=backtrackMult, armijoMult=armijoMult, stepSize=stepSize) beta, stepSize, Xbeta, gradient, lf, func, step, Xstep = persist( beta, stepSize, Xbeta, gradient, lf, func, step, Xstep) stepSize, lf, func, step = compute(stepSize, lf, func, step) beta = beta - stepSize * step # tiny bit of repeat work here to avoid communication Xbeta = Xbeta - stepSize * Xstep if stepSize == 0: print('No more progress') break # necessary for gradient computation eXbeta = exp(Xbeta) yk = -gradient sk = -stepSize * step stepSize *= stepGrowth if stepSize == 0: print('No more progress') break df = lf - func df /= max(func, lf) if df < tol: print('Converged') break return beta
def hessian(Xbeta, X): return 2 * dot(X.T, X)
def loglike(Xbeta, y): eXbeta = exp(Xbeta) return (log1p(eXbeta)).sum() - dot(y, Xbeta)