def loglikelihood(self, Xbeta, y): """ Evaluate the logistic loglikelihood Parameters ---------- Xbeta : array, shape (n_samples, n_features) y : array, shape (n_samples) """ enXbeta = exp(-Xbeta) return (Xbeta + log1p(enXbeta)).sum() - dot(y, Xbeta)
def bfgs(X, y, max_iter=500, tol=1e-14, family=Logistic): '''Simple implementation of BFGS.''' n, p = X.shape y = y.squeeze() recalcRate = 10 stepSize = 1.0 armijoMult = 1e-4 backtrackMult = 0.5 stepGrowth = 1.25 beta = np.zeros(p) Hk = np.eye(p) for k in range(max_iter): if k % recalcRate == 0: Xbeta = X.dot(beta) eXbeta = exp(Xbeta) func = log1p(eXbeta).sum() - dot(y, Xbeta) e1 = eXbeta + 1.0 gradient = dot(X.T, eXbeta / e1 - y) # implicit numpy -> dask conversion if k: yk = yk + gradient # TODO: gradient is dasky and yk is numpy-y rhok = 1 / yk.dot(sk) adj = np.eye(p) - rhok * dot(sk, yk.T) Hk = dot(adj, dot(Hk, adj.T)) + rhok * dot(sk, sk.T) step = dot(Hk, gradient) steplen = dot(step, gradient) Xstep = dot(X, step) # backtracking line search lf = func old_Xbeta = Xbeta stepSize, _, _, func = compute_stepsize_dask( beta, step, Xbeta, Xstep, y, func, family=family, backtrackMult=backtrackMult, armijoMult=armijoMult, stepSize=stepSize) beta, stepSize, Xbeta, gradient, lf, func, step, Xstep = persist( beta, stepSize, Xbeta, gradient, lf, func, step, Xstep) stepSize, lf, func, step = compute(stepSize, lf, func, step) beta = beta - stepSize * step # tiny bit of repeat work here to avoid communication Xbeta = Xbeta - stepSize * Xstep if stepSize == 0: print('No more progress') break # necessary for gradient computation eXbeta = exp(Xbeta) yk = -gradient sk = -stepSize * step stepSize *= stepGrowth if stepSize == 0: print('No more progress') break df = lf - func df /= max(func, lf) if df < tol: print('Converged') break return beta
def loglike(Xbeta, y): eXbeta = exp(Xbeta) return (log1p(eXbeta)).sum() - dot(y, Xbeta)