示例#1
0
def gradient_descent(X, y, max_steps=100, tol=1e-14):
    '''Michael Grant's implementation of Gradient Descent.'''

    n, p = X.shape
    firstBacktrackMult = 0.1
    nextBacktrackMult = 0.5
    armijoMult = 0.1
    stepGrowth = 1.25
    stepSize = 1.0
    recalcRate = 10
    backtrackMult = firstBacktrackMult
    beta = np.zeros(p)
    y_local = y.compute()

    for k in range(max_steps):
        # how necessary is this recalculation?
        if k % recalcRate == 0:
            Xbeta = X.dot(beta)
            eXbeta = da.exp(Xbeta)
            func = da.log1p(eXbeta).sum() - y.dot(Xbeta)

        e1 = eXbeta + 1.0
        gradient = X.T.dot(eXbeta / e1 - y)
        Xgradient = X.dot(gradient)

        Xbeta, eXbeta, func, gradient, Xgradient = da.compute(
            Xbeta, eXbeta, func, gradient, Xgradient)

        # backtracking line search
        lf = func
        stepSize, beta, Xbeta, func = compute_stepsize(beta, gradient,
                                                       Xbeta, Xgradient,
                                                       y_local, func,
                                                       **{
                                                           'backtrackMult': backtrackMult,
                                                           'armijoMult': armijoMult,
                                                           'stepSize': stepSize})
        if stepSize == 0:
            print('No more progress')
            break

        # necessary for gradient computation
        eXbeta = exp(Xbeta)

        df = lf - func
        df /= max(func, lf)

        if df < tol:
            print('Converged')
            break
        stepSize *= stepGrowth
        backtrackMult = nextBacktrackMult

    return beta
示例#2
0
    def loglikelihood(self, Xbeta, y):
        """
        Evaluate the logistic loglikelihood

        Parameters
        ----------
        Xbeta : array, shape (n_samples, n_features)
        y : array, shape (n_samples)
        """
        enXbeta = exp(-Xbeta)
        return (Xbeta + log1p(enXbeta)).sum() - dot(y, Xbeta)
示例#3
0
文件: glm.py 项目: selvamshan/dask-ml
    def predict(self, X):
        """Predict count for samples in X.

        Parameters
        ----------
        X : array-like, shape = [n_samples, n_features]

        Returns
        -------
        C : array, shape = [n_samples,]
            Predicted count for each sample
        """
        X_ = self._check_array(X)
        return exp(dot(X_, self._coef))
示例#4
0
def make_poisson(n_samples=1000,
                 n_features=100,
                 n_informative=2,
                 scale=1.0,
                 chunksize=100,
                 is_sparse=False):
    """
    Generate a dummy dataset for modeling count data.

    Parameters
    ----------
    n_samples : int
        number of rows in the output array
    n_features : int
        number of columns (features) in the output array
    n_informative : int
        number of features that are correlated with the outcome
    scale : float
        Scale the true coefficient array by this
    chunksize : int
        Number of rows per dask array block.
    is_sparse: bool
        Return a sparse matrix

    Returns
    -------
    X : dask.array, size ``(n_samples, n_features)``
    y : dask.array, size ``(n_samples,)``
        array of non-negative integer-valued data

    Examples
    --------
    >>> X, y = make_classification()
    >>> X
    dask.array<da.random.normal, shape=(1000, 100), dtype=float64, chunksize=(100, 100)>
    >>> y
    dask.array<da.random.poisson, shape=(1000,), dtype=int64, chunksize=(100,)>
    """
    X = da.random.normal(0,
                         1,
                         size=(n_samples, n_features),
                         chunks=(chunksize, n_features))
    if is_sparse:
        X = X.map_blocks(sparse.COO)
    informative_idx = np.random.choice(n_features, n_informative)
    beta = (np.random.random(n_features) - 1) * scale
    z0 = X[:, informative_idx].dot(beta[informative_idx])
    rate = exp(z0)
    y = da.random.poisson(rate, size=1, chunks=(chunksize, ))
    return X, y
示例#5
0
def make_poisson(n_samples=1000,
                 n_features=100,
                 n_informative=2,
                 scale=1.0,
                 chunksize=100):
    X = da.random.normal(0,
                         1,
                         size=(n_samples, n_features),
                         chunks=(chunksize, n_features))
    informative_idx = np.random.choice(n_features, n_informative)
    beta = (np.random.random(n_features) - 1) * scale
    z0 = X[:, informative_idx].dot(beta[informative_idx])
    rate = exp(z0)
    y = da.random.poisson(rate, size=1, chunks=(chunksize, ))
    return X, y
示例#6
0
 def gradient(self, Xbeta, X, y):
     eXbeta = exp(Xbeta)
     return dot(X.T, eXbeta - y)
示例#7
0
 def loglikelihood(self, Xbeta, y):
     eXbeta = exp(Xbeta)
     yXbeta = y * Xbeta
     return (eXbeta - yXbeta).sum()
示例#8
0
 def hessian(self, Xbeta, X):
     eXbeta = exp(Xbeta)
     x_diag_eXbeta = eXbeta[:, None] * X
     return dot(X.T, x_diag_eXbeta)
示例#9
0
def bfgs(X, y, max_iter=500, tol=1e-14, family=Logistic):
    '''Simple implementation of BFGS.'''

    n, p = X.shape
    y = y.squeeze()

    recalcRate = 10
    stepSize = 1.0
    armijoMult = 1e-4
    backtrackMult = 0.5
    stepGrowth = 1.25

    beta = np.zeros(p)
    Hk = np.eye(p)
    for k in range(max_iter):

        if k % recalcRate == 0:
            Xbeta = X.dot(beta)
            eXbeta = exp(Xbeta)
            func = log1p(eXbeta).sum() - dot(y, Xbeta)

        e1 = eXbeta + 1.0
        gradient = dot(X.T,
                       eXbeta / e1 - y)  # implicit numpy -> dask conversion

        if k:
            yk = yk + gradient  # TODO: gradient is dasky and yk is numpy-y
            rhok = 1 / yk.dot(sk)
            adj = np.eye(p) - rhok * dot(sk, yk.T)
            Hk = dot(adj, dot(Hk, adj.T)) + rhok * dot(sk, sk.T)

        step = dot(Hk, gradient)
        steplen = dot(step, gradient)
        Xstep = dot(X, step)

        # backtracking line search
        lf = func
        old_Xbeta = Xbeta
        stepSize, _, _, func = compute_stepsize_dask(
            beta,
            step,
            Xbeta,
            Xstep,
            y,
            func,
            family=family,
            backtrackMult=backtrackMult,
            armijoMult=armijoMult,
            stepSize=stepSize)

        beta, stepSize, Xbeta, gradient, lf, func, step, Xstep = persist(
            beta, stepSize, Xbeta, gradient, lf, func, step, Xstep)

        stepSize, lf, func, step = compute(stepSize, lf, func, step)

        beta = beta - stepSize * step  # tiny bit of repeat work here to avoid communication
        Xbeta = Xbeta - stepSize * Xstep

        if stepSize == 0:
            print('No more progress')
            break

        # necessary for gradient computation
        eXbeta = exp(Xbeta)

        yk = -gradient
        sk = -stepSize * step
        stepSize *= stepGrowth

        if stepSize == 0:
            print('No more progress')
            break

        df = lf - func
        df /= max(func, lf)
        if df < tol:
            print('Converged')
            break

    return beta
示例#10
0
 def loglike(Xbeta, y):
     eXbeta = exp(Xbeta)
     yXbeta = y * Xbeta
     return (eXbeta - yXbeta).sum()
示例#11
0
 def loglike(Xbeta, y):
     eXbeta = exp(Xbeta)
     return (log1p(eXbeta)).sum() - dot(y, Xbeta)
示例#12
0
def sigmoid(x):
    return 1 / (1 + exp(-x))