示例#1
0
def _logistic_loss_and_grad(w, X, y, alpha, sample_weight=None, rho=None, q=None):
    """Computes the logistic loss and gradient.

    Parameters
    ----------
    w : ndarray, shape (n_features,) or (n_features + 1,)
        Coefficient vector.

    X : {array-like, sparse matrix}, shape (n_samples, n_features)
        Training data.

    y : ndarray, shape (n_samples,)
        Array of labels.

    alpha : float
        Regularization parameter. alpha is equal to 1 / C.

    sample_weight : ndarray, shape (n_samples,) optional
        Array of weights that are assigned to individual samples.
        If not provided, then each sample is given unit weight.

    Returns
    -------
    out : float
        Logistic loss.

    grad : ndarray, shape (n_features,) or (n_features + 1,)
        Logistic gradient.
    """

    _, n_features = X.shape
    grad = np.empty_like(w)

    if sample_weight is None:
        sample_weight = np.ones(y.shape[0])

    # 0: noise, 1: clean
    if q is None:
        q = np.zeros_like(y)

    y01 = np.array(y == 1, dtype=int)
    w, c, yz = _intercept_dot(w, X, y)

    loss_yzp = -log_logistic(+yz)
    loss_yzn = -log_logistic(-yz)
    wp = 1 - np.take(rho, 1 - y01)
    wn = np.take(rho, y01)
    noise_loss = np.sum(sample_weight * (1-q) * (wp * loss_yzp - wn * loss_yzn)) / (1 - rho[0] - rho[1])
    clean_loss = np.sum(sample_weight * q * loss_yzp)
    out = clean_loss + noise_loss + .5 * alpha * np.dot(w, w)

    z = expit(yz)
    z0 = sample_weight * (q * (z-1) * y + (1-q) * (wp * (z-1) * y + wn * z * y))
    grad[:n_features] = safe_sparse_dot(X.T, z0) + alpha * w

    # Case where we fit the intercept.
    if grad.shape[0] > n_features:
        grad[-1] = z0.sum()

    return out, grad
def log_loss(wp, X, target, C, PN, NP):
    """
    It is minimized using "L-BFGS-B" method of "scipy.optimize.minimize" function, and results in
    similar coefficients as sklearn's Logistic Regression when PN=NP=0.0.

    Parameters
    -------------
    wp: Coefficients & Intercept
    X: (N,M) shaped data matrix
    target: (N,) shaped 1-D array of targets
    C: Regularization
    PN: % of Positive samples labeled as Negative
    NP: % of Positive samples labeled as Negative

    Returns
    ------------
    loss_value: float

    """
    c = wp[-1]
    w = wp[:-1]
    z = np.dot(X, w) + c
    yz = target * z  # to compute l(t,y)
    nyz = -target * z  # to compute l(t,-y)
    ls = -log_logistic(yz)  # l(t,y)
    nls = -log_logistic(nyz)  # l(t,-y)
    idx = target == 1  # indexes of samples w/ P label
    loss = ls.copy()  # To store l-hat
    loss[idx] = (1 - NP) * ls[idx] - PN * nls[idx]  # Modified loss for P samples
    loss[~idx] = (1 - PN) * ls[~idx] - NP * nls[~idx]  # Modified loss for N samples
    loss = loss / (1 - PN - NP) + .5 * (1. / C) * np.dot(w, w)  # Normalization & regularization
    return loss.sum()  # Final loss
def test_logistic_sigmoid():
    # Check correctness and robustness of logistic sigmoid implementation
    def naive_log_logistic(x):
        return np.log(1 / (1 + np.exp(-x)))

    x = np.linspace(-2, 2, 50)
    assert_array_almost_equal(log_logistic(x), naive_log_logistic(x))

    extreme_x = np.array([-100., 100.])
    assert_array_almost_equal(log_logistic(extreme_x), [-100, 0])
示例#4
0
def test_logistic_sigmoid():
    """Check correctness and robustness of logistic sigmoid implementation"""
    naive_logistic = lambda x: 1 / (1 + np.exp(-x))
    naive_log_logistic = lambda x: np.log(naive_logistic(x))

    x = np.linspace(-2, 2, 50)
    assert_array_almost_equal(log_logistic(x), naive_log_logistic(x))

    extreme_x = np.array([-100., 100.])
    assert_array_almost_equal(log_logistic(extreme_x), [-100, 0])
示例#5
0
def test_logistic_sigmoid():
    # Check correctness and robustness of logistic sigmoid implementation
    def naive_log_logistic(x):
        return np.log(1 / (1 + np.exp(-x)))

    x = np.linspace(-2, 2, 50)
    assert_array_almost_equal(log_logistic(x), naive_log_logistic(x))

    extreme_x = np.array([-100., 100.])
    assert_array_almost_equal(log_logistic(extreme_x), [-100, 0])
示例#6
0
def test_logistic_sigmoid():
    """Check correctness and robustness of logistic sigmoid implementation"""
    naive_logistic = lambda x: 1 / (1 + np.exp(-x))
    naive_log_logistic = lambda x: np.log(naive_logistic(x))

    x = np.linspace(-2, 2, 50)
    assert_array_almost_equal(log_logistic(x), naive_log_logistic(x))

    extreme_x = np.array([-100., 100.])
    assert_array_almost_equal(log_logistic(extreme_x), [-100, 0])
示例#7
0
def test_logistic_sigmoid():
    # Check correctness and robustness of logistic sigmoid implementation
    naive_logistic = lambda x: 1 / (1 + np.exp(-x))
    naive_log_logistic = lambda x: np.log(naive_logistic(x))

    x = np.linspace(-2, 2, 50)
    with warnings.catch_warnings(record=True):
        assert_array_almost_equal(logistic_sigmoid(x), naive_logistic(x))
    assert_array_almost_equal(log_logistic(x), naive_log_logistic(x))

    extreme_x = np.array([-100., 100.])
    assert_array_almost_equal(log_logistic(extreme_x), [-100, 0])
示例#8
0
def test_logistic_sigmoid():
    # Check correctness and robustness of logistic sigmoid implementation
    naive_logistic = lambda x: 1 / (1 + np.exp(-x))
    naive_log_logistic = lambda x: np.log(naive_logistic(x))

    x = np.linspace(-2, 2, 50)
    with warnings.catch_warnings(record=True):
        assert_array_almost_equal(logistic_sigmoid(x), naive_logistic(x))
    assert_array_almost_equal(log_logistic(x), naive_log_logistic(x))

    extreme_x = np.array([-100.0, 100.0])
    assert_array_almost_equal(log_logistic(extreme_x), [-100, 0])
def _logistic_loss_and_grad(w, X, y, alpha, penalty, fit_intercept,
                            sample_weight):

    n_samples, n_features = X.shape
    grad = np.empty_like(w)

    c = 0.
    if fit_intercept:
        c = w[-1]
        w = w[:-1]

    z = safe_sparse_dot(X, w) + c
    yz = y * z

    if penalty == "l2":
        reg = .5 * alpha * np.dot(w, w)
        reg_grad = alpha * w
    else:
        reg = 0
        reg_grad = 0

    out = -np.sum(sample_weight * log_logistic(yz)) + reg

    z = expit(yz)
    z0 = sample_weight * (z - 1) * y

    grad[:n_features] = safe_sparse_dot(X.T, z0) + reg_grad

    if fit_intercept:
        grad[-1] = z0.sum()
    return out, grad
示例#10
0
文件: PCD.py 项目: Teedious/gcn_qrbm
    def score_samples(self, X):
        """Compute the pseudo-likelihood of X.

        Parameters
        ----------
        X : {array-like, sparse matrix} of shape (n_samples, n_features)
            Values of the visible layer. Must be all-boolean (not checked).

        Returns
        -------
        pseudo_likelihood : ndarray of shape (n_samples,)
            Value of the pseudo-likelihood (proxy for likelihood).

        Notes
        -----
        This method is not deterministic: it computes a quantity called the
        free energy on X, then on a randomly corrupted version of X, and
        returns the log of the logistic function of the difference.
        """

        # Randomly corrupt one feature in each sample in v.
        ind = (np.arange(X.shape[0]),
               np.random.randint(0, X.shape[1], X.shape[0]))
        X_ = X.copy()
        X_[ind] = 1 - X_[ind]

        fe = self._free_energy(X)
        fe_ = self._free_energy(X_)
        return (X.shape[1] * log_logistic(fe_ - fe)).mean()
示例#11
0
文件: logistic.py 项目: OuYag/hoag
def _logistic_loss(w, X, y, alpha, sample_weight=None):
    """Computes the logistic loss.
    Parameters
    ----------
    w : ndarray, shape (n_features,) or (n_features + 1,)
        Coefficient vector.
    X : {array-like, sparse matrix}, shape (n_samples, n_features)
        Training data.
    y : ndarray, shape (n_samples,)
        Array of labels.
    alpha : float
        Regularization parameter. alpha is equal to 1 / C.
    sample_weight : array-like, shape (n_samples,) optional
        Array of weights that are assigned to individual samples.
        If not provided, then each sample is given unit weight.
    Returns
    -------
    out : float
        Logistic loss.
    """
    w, c, yz = _intercept_dot(w, X, y)

    if sample_weight is None:
        sample_weight = np.ones(y.shape[0])

    # Logistic loss is the negative of the log of the logistic function.
    out = -np.sum(sample_weight * log_logistic(yz)) + .5 * alpha * np.dot(w, w)
    return out
示例#12
0
def _logistic_loss(w, X, y, alpha, sample_weight=None):
    """Computes the logistic loss.
    Parameters
    ----------
    w : ndarray, shape (n_features,) or (n_features + 1,)
        Coefficient vector.
    X : {array-like, sparse matrix}, shape (n_samples, n_features)
        Training data.
    y : ndarray, shape (n_samples,)
        Array of labels.
    alpha : float
        Regularization parameter. alpha is equal to 1 / C.
    sample_weight : array-like, shape (n_samples,) optional
        Array of weights that are assigned to individual samples.
        If not provided, then each sample is given unit weight.
    Returns
    -------
    out : float
        Logistic loss.
    """
    w, c, yz = _intercept_dot(w, X, y)

    if sample_weight is None:
        sample_weight = np.ones(y.shape[0])

    # Logistic loss is the negative of the log of the logistic function.
    out = -np.sum(sample_weight * log_logistic(yz)) + .5 * alpha * np.dot(w, w)
    return out
示例#13
0
    def score_samples(self, X):
        """Compute the pseudo-likelihood of X.
        Parameters
        ----------
        X : {array-like, sparse matrix} shape (n_samples, n_features)
            Values of the visible layer. Must be all-boolean (not checked).
        Returns
        -------
        pseudo_likelihood : array-like, shape (n_temperatures, n_samples,)
            Value of the pseudo-likelihood (proxy for likelihood).
        Notes
        -----
        This method is not deterministic: it computes a quantity called the
        free energy on X, then on a randomly corrupted version of X, and
        returns the log of the logistic function of the difference.
        """
        check_is_fitted(self, "components_")

        v = check_array(X, accept_sparse='csr')
        rng = check_random_state(self.random_state)

        # Randomly corrupt one feature in each sample in v.
        ind = (np.arange(v.shape[0]), rng.randint(0, v.shape[1], v.shape[0]))
        if issparse(v):
            data = -2 * v[ind] + 1
            v_ = v + sp.csr_matrix((data.A.ravel(), ind), shape=v.shape)
        else:
            v_ = v.copy()
            v_[ind] = 1 - v_[ind]

        fe = self._free_energy(v, 0)
        fe_ = self._free_energy(v_, 0)
        return v.shape[1] * log_logistic(fe_ - fe)
示例#14
0
def _logistic_loss(w, X, y, alpha, sample_weight=None, rho=None, q=None):
    """Computes the logistic loss.

    Parameters
    ----------
    w : ndarray, shape (n_features,) or (n_features + 1,)
        Coefficient vector.

    X : {array-like, sparse matrix}, shape (n_samples, n_features)
        Training data.

    y : ndarray, shape (n_samples,)
        Array of labels.

    alpha : float
        Regularization parameter. alpha is equal to 1 / C.

    sample_weight : ndarray, shape (n_samples,) optional
        Array of weights that are assigned to individual samples.
        If not provided, then each sample is given unit weight.

    Returns
    -------
    out : float
        Logistic loss.
    """
    # 0: noise, 1: clean
    if q is None:
        q = np.zeros_like(y)

    if sample_weight is None:
        sample_weight = np.ones(y.shape[0])

    y01 = np.array(y == 1, dtype=int)
    w, c, yz = _intercept_dot(w, X, y)

    loss_yzp = -log_logistic(+yz)
    loss_yzn = -log_logistic(-yz)
    wp = (1-np.take(rho, 1-y01)) / (1-rho[0]-rho[1])
    wn = ( -np.take(rho,   y01)) / (1-rho[0]-rho[1])
    noise_loss = np.sum(sample_weight * (1-q) * (wp * loss_yzp + wn * loss_yzn))
    clean_loss = np.sum(sample_weight * q * loss_yzp)
    out = clean_loss + noise_loss + .5 * alpha * np.dot(w, w)

    return out
示例#15
0
def _logistic_cost_grad(X,Y,w,diagA):
    '''
    Calculates cost and gradient for logistic regression
    '''
    n     = X.shape[0]
    Xw    = np.dot(X,w)
    s     = expit(Xw)
    wdA   = w*diagA
    wdA[0] = 1e-3 # broad prior for bias term => almost no regularization
    cost = np.sum( Xw* (1-Y) - log_logistic(Xw)) + np.sum(w*wdA)/2 
    grad  = np.dot(X.T, s - Y) + wdA
    return [cost/n,grad/n]
示例#16
0
def _logistic_cost_grad(X, Y, w, diagA):
    '''
    Calculates cost and gradient for logistic regression
    '''
    n = X.shape[0]
    Xw = np.dot(X, w)
    s = expit(Xw)
    wdA = w * diagA
    wdA[0] = 1e-3  # broad prior for bias term => almost no regularization
    cost = np.sum(Xw * (1 - Y) - log_logistic(Xw)) + np.sum(w * wdA) / 2
    grad = np.dot(X.T, s - Y) + wdA
    return [cost / n, grad / n]
示例#17
0
def logistic_loss(w, X, Y, alpha):
    """
    Implementation of the logistic loss function when Y is a probability
    distribution.

    loss = -SUM_i SUM_k y_ik * log(P[yi == k]) + alpha * ||w||^2
    """
    n_classes = Y.shape[1]
    n_features = X.shape[1]
    intercept = 0

    if n_classes > 2:
        fit_intercept = w.size == (n_classes * (n_features + 1))
        w = w.reshape(n_classes, -1)
        if fit_intercept:
            intercept = w[:, -1]
            w = w[:, :-1]
    else:
        fit_intercept = w.size == (n_features + 1)
        if fit_intercept:
            intercept = w[-1]
            w = w[:-1]

    z = safe_sparse_dot(X, w.T) + intercept

    if n_classes == 2:
        # in the binary case, simply compute the logistic function
        p = np.vstack([log_logistic(-z), log_logistic(z)]).T
    else:
        # compute the logistic function for each class and normalize
        denom = expit(z)
        denom = denom.sum(axis=1).reshape((denom.shape[0], -1))
        p = log_logistic(z)
        loss = -(Y * p).sum()
        loss += np.log(denom).sum()  # Y.sum() = 1
        loss += 0.5 * alpha * squared_norm(w)
        return loss

    loss = -(Y * p).sum() + 0.5 * alpha * squared_norm(w)
    return loss
示例#18
0
def _logistic_cost_grad(X, Y, w, diagA, penalise_intercept):
    '''
    Calculates cost and gradient for logistic regression
    '''
    n = X.shape[0]
    Xw = np.dot(X, w)
    s = expit(Xw)
    wdA = w * diagA
    if not penalise_intercept:
        wdA[0] = 0
    cost = np.sum(-Xw * Y - log_logistic(-Xw)) + np.sum(w * wdA) / 2
    grad = np.dot(X.T, s - Y) + wdA
    return [cost / n, grad / n]
示例#19
0
def _logistic_cost_grad(X,Y,w,diagA, penalise_intercept):
    '''
    Calculates cost and gradient for logistic regression
    '''
    n     = X.shape[0]
    Xw    = np.dot(X,w)
    s     = expit(Xw)
    wdA   = w*diagA
    if not penalise_intercept:
        wdA[0] = 0
    cost = np.sum( -Xw*Y - log_logistic(-Xw)) + np.sum(w*wdA)/2 
    grad  = np.dot(X.T, s - Y) + wdA
    return [cost/n,grad/n]
示例#20
0
def pseudo_likelihood(v, weights, biases_v, biases_h):
    corruption = (np.arange(v.shape[0]),
                  np.random.randint(0, v.shape[1], v.shape[0]))

    v_copy = v.copy()
    v_copy[corruption] = 1 - v_copy[corruption]

    energy = free_energy(v, weights, biases_v, biases_h)
    energy_copy = free_energy(v_copy, weights, biases_v, biases_h)

    likelihoods = v.shape[1] * log_logistic(energy_copy - energy)

    return likelihoods.mean()
示例#21
0
def _logistic_loss_and_grad(w, X, y, alpha, mask, sample_weight=None):
    """Computes the logistic loss and gradient.

    Parameters
    ----------
    w : ndarray, shape (n_features,) or (n_features + 1,)
        Coefficient vector.
    X : {array-like, sparse matrix}, shape (n_samples, n_features)
        Training data.
    y : ndarray, shape (n_samples,)
        Array of labels.
    alpha : float
        Regularization parameter. alpha is equal to 1 / C.
    mask : array-like, shape (n_features), (n_classes, n_features) optional
        Masking array for coef.
    sample_weight : array-like, shape (n_samples,) optional
        Array of weights that are assigned to individual samples.
        If not provided, then each sample is given unit weight.

    Returns
    -------
    out : float
        Logistic loss.
    grad : ndarray, shape (n_features,) or (n_features + 1,)
        Logistic gradient.
    """
    n_samples, n_features = X.shape
    if mask is not None:
        w[:n_features] *= mask
    grad = np.empty_like(w)

    w, c, yz = _intercept_dot(w, X, y)

    if sample_weight is None:
        sample_weight = np.ones(n_samples)

    # Logistic loss is the negative of the log of the logistic function.
    out = -np.sum(sample_weight * log_logistic(yz)) / n_samples
    out += .5 * alpha * np.dot(w, w)

    z = expit(yz)
    z0 = sample_weight * (z - 1) * y

    grad[:n_features] = (safe_sparse_dot(X.T, z0) / n_samples) + alpha * w
    if mask is not None:
        grad[:n_features] *= mask

    # Case where we fit the intercept.
    if grad.shape[0] > n_features:
        grad[-1] = z0.sum() / n_samples
    return out, grad
示例#22
0
    def predict_proba(self, X):
        """Predict probabilities for samples

        Args:
            X : {array-like, sparse matrix}, shape = (n_samples, n_features)
                Samples.

        Returns:
            array-like of shape (n_samples, n_classes): T. Returns the probability of the sample for each class in the model,
                where classes are ordered as they are in ``self.classes_``.
        """

        probs = np.exp(log_logistic(self.decision_function(X)))
        return np.column_stack((1 - probs, probs))
def _logistic_l1_loss_and_grad(w2, X, y, alpha, penalty, fit_intercept,
                               l1_ratio, sample_weight):

    n_samples, n_features = X.shape

    grad = np.empty_like(w2)
    reg_grad = np.zeros(w2.size)

    c = 0.
    if fit_intercept:
        c = w2[-1]
        w = w2[:n_features] - w2[n_features:-1]
        t = w2[:n_features] + w2[n_features:-1]
    else:
        w = w2[:n_features] - w2[n_features:]
        t = w2[:n_features] + w2[n_features:]

    z = safe_sparse_dot(X, w) + c
    yz = y * z

    if penalty == "l1":
        reg = alpha * t.sum()
        reg_grad = alpha
    elif penalty == "elasticnet":
        regl2 = 0.5 * (1 - l1_ratio) * alpha * np.dot(w, w)
        regl1 = l1_ratio * alpha * t.sum()
        reg = regl2 + regl1
        rg1 = alpha * l1_ratio
        rg2 = alpha * (1 - l1_ratio) * w
        reg_grad[:2 * n_features] = np.concatenate([rg2, -rg2]) + rg1

    out = -np.sum(sample_weight * log_logistic(yz)) + reg

    z = expit(yz)
    z0 = sample_weight * (z - 1) * y

    g = safe_sparse_dot(X.T, z0)

    if fit_intercept:
        grad[:n_features] = g
        grad[n_features:-1] = -g
        grad[-1] = z0.sum()
    else:
        grad[:n_features] = g
        grad[n_features:] = -g

    grad += reg_grad

    return out, grad
    def fgrad(we, X, y, l1, l2):
        nsamples, nfactors = X.shape

        w0 = we[0]
        w = we[1:(nfactors + 1)] - we[(nfactors + 1):]
        yz = y * (safe_sparse_dot(X, w) + w0)
        f = -np.sum(log_logistic(yz)) + l1 * np.sum(
            we[1:]) + 0.5 * l2 * np.dot(w, w)

        e = (expit(yz) - 1) * y
        g = safe_sparse_dot(X.T, e) + l2 * w
        g0 = np.sum(e)

        grad = np.concatenate([g, -g]) + l1
        grad = np.insert(grad, 0, g0)

        return f, grad
示例#25
0
def _logistic_loss(w, X, y, alpha, sample_weight=None, rho=None, q=None):
    """Computes the logistic loss.

    Parameters
    ----------
    w : ndarray, shape (n_features,) or (n_features + 1,)
        Coefficient vector.

    X : {array-like, sparse matrix}, shape (n_samples, n_features)
        Training data.

    y : ndarray, shape (n_samples,)
        Array of labels.

    alpha : float
        Regularization parameter. alpha is equal to 1 / C.

    sample_weight : ndarray, shape (n_samples,) optional
        Array of weights that are assigned to individual samples.
        If not provided, then each sample is given unit weight.

    Returns
    -------
    out : float
        Logistic loss.
    """
    w, c, yz = _intercept_dot(w, X, y)

    if sample_weight is None:
        sample_weight = np.ones(y.shape[0])

    # Logistic loss is the negative of the log of the logistic function.
    out = -np.sum(sample_weight * log_logistic(yz)) + .5 * alpha * np.dot(w, w)

    # add noise term
    if q is None:
        q = np.zeros_like(y)
    y01 = np.array(y == 1, dtype=int)
    qnoise = np.array(q==0, dtype=np.bool)
    if(q is None or np.any(qnoise)):
        rho_y = np.array([[rho[1-label],rho[label]] for label in y01])
        yzq = yz[qnoise]
        wq  = sample_weight[qnoise]
        out += np.sum(wq * log_noise_logistic(yzq, rho_y[qnoise,:]))

    return out
示例#26
0
def temp_log_loss(w, X, Y, alpha):
    n_classes = Y.shape[1]
    w = w.reshape(n_classes, -1)
    intercept = w[:, -1]
    w = w[:, :-1]
    z = safe_sparse_dot(X, w.T) + intercept

    denom = expit(z)
    #print denom
    #print denom.sum()
    denom = denom.sum(axis=1).reshape((denom.shape[0], -1))
    #print denom
    p = log_logistic(z)

    loss = -(Y * p).sum()
    loss += np.log(denom).sum()
    loss += 0.5 * alpha * squared_norm(w)

    return loss
示例#27
0
文件: utils.py 项目: ftramer/Steal-ML
def temp_log_loss(w, X, Y, alpha):
    n_classes = Y.shape[1]
    w = w.reshape(n_classes, -1)
    intercept = w[:, -1]
    w = w[:, :-1]
    z = safe_sparse_dot(X, w.T) + intercept

    denom = expit(z)
    #print denom
    #print denom.sum()
    denom = denom.sum(axis=1).reshape((denom.shape[0], -1))
    #print denom
    p = log_logistic(z)

    loss = - (Y * p).sum()
    loss += np.log(denom).sum()
    loss += 0.5 * alpha * squared_norm(w)

    return loss
示例#28
0
文件: logistic.py 项目: OuYag/hoag
def _logistic_loss_and_grad(w, X, y, alpha, sample_weight=None):
    """Computes the logistic loss and gradient.
    Parameters
    ----------
    w : ndarray, shape (n_features,) or (n_features + 1,)
        Coefficient vector.
    X : {array-like, sparse matrix}, shape (n_samples, n_features)
        Training data.
    y : ndarray, shape (n_samples,)
        Array of labels.
    alpha : float
        Regularization parameter. alpha is equal to 1 / C.
    sample_weight : array-like, shape (n_samples,) optional
        Array of weights that are assigned to individual samples.
        If not provided, then each sample is given unit weight.
    Returns
    -------
    out : float
        Logistic loss.
    grad : ndarray, shape (n_features,) or (n_features + 1,)
        Logistic gradient.
    """
    _, n_features = X.shape
    grad = np.empty_like(w)

    w, c, yz = _intercept_dot(w, X, y)

    if sample_weight is None:
        sample_weight = np.ones(y.shape[0])

    # Logistic loss is the negative of the log of the logistic function.
    out = -np.sum(sample_weight * log_logistic(yz)) + .5 * alpha * np.dot(w, w)

    z = expit(yz)
    z0 = sample_weight * (z - 1) * y

    grad[:n_features] = safe_sparse_dot(X.T, z0) + alpha * w

    # Case where we fit the intercept.
    if grad.shape[0] > n_features:
        grad[-1] = z0.sum()
    return out, grad
示例#29
0
    def score_samples(self, X):
        """Compute the pseudo-likelihood of X.

        X : {array-like, sparse matrix} shape (n_samples, n_features)
            Values of the visible layer. Must be all-boolean (not checked).

        Returns
        -------
        pseudo_likelihood : array-like, shape (n_samples,)
            Value of the pseudo-likelihood (proxy for likelihood).

        Notes
        -----
        This method is not deterministic: it computes a quantity called the
        free energy on X, then on a randomly corrupted version of X, and
        returns the log of the logistic function of the difference.
        """
        check_is_fitted(self, "components_")

        v = check_array(X, accept_sparse='csr')
        fe = self._free_energy(v)

        v_, state = self.corrupt(v)
        # TODO: If I wanted to be really fancy here, I would do one of those "with..." things.
        fe_corrupted = self._free_energy(v)
        self.uncorrupt(v, state)

        # See https://en.wikipedia.org/wiki/Pseudolikelihood
        # Let x be some visible vector. x_i is the ith entry. x_-i is the vector except that entry. 
        #       x_iflipped is x with the ith bit flipped. F() is free energy.
        # P(x_i | x_-i) = P(x) / P(x_-i) = P(x) / (P(x) + p(x_iflipped))
        # expand def'n of P(x), cancel out the partition function on each term, and divide top and bottom by e^{-F(x)} to get...
        # 1 / (1 + e^{F(x) - F(x_iflipped)})
        # So we're just calculating the log of that. We multiply by the number of
        # visible units because we're approximating P(x) as the product of the conditional likelihood
        # of each individual unit. But we're too lazy to do each one individually, so we say the unit
        # we tested represents an average.
        if hasattr(self, 'codec'):
            normalizer = self.codec.shape()[0]
        else:
            normalizer = v.shape[1]
        return normalizer * log_logistic(fe_corrupted - fe)
示例#30
0
    def score_samples(self, X):
        """Compute the pseudo-likelihood of X.

        X : {array-like, sparse matrix} shape (n_samples, n_features)
            Values of the visible layer. Must be all-boolean (not checked).

        Returns
        -------
        pseudo_likelihood : array-like, shape (n_samples,)
            Value of the pseudo-likelihood (proxy for likelihood).

        Notes
        -----
        This method is not deterministic: it computes a quantity called the
        free energy on X, then on a randomly corrupted version of X, and
        returns the log of the logistic function of the difference.
        """
        check_is_fitted(self, "components_")

        v = check_array(X, accept_sparse='csr')
        fe = self._free_energy(v)

        v_, state = self.corrupt(v)
        # TODO: If I wanted to be really fancy here, I would do one of those "with..." things.
        fe_corrupted = self._free_energy(v)
        self.uncorrupt(v, state)

        # See https://en.wikipedia.org/wiki/Pseudolikelihood
        # Let x be some visible vector. x_i is the ith entry. x_-i is the vector except that entry.
        #       x_iflipped is x with the ith bit flipped. F() is free energy.
        # P(x_i | x_-i) = P(x) / P(x_-i) = P(x) / (P(x) + p(x_iflipped))
        # expand def'n of P(x), cancel out the partition function on each term, and divide top and bottom by e^{-F(x)} to get...
        # 1 / (1 + e^{F(x) - F(x_iflipped)})
        # So we're just calculating the log of that. We multiply by the number of
        # visible units because we're approximating P(x) as the product of the conditional likelihood
        # of each individual unit. But we're too lazy to do each one individually, so we say the unit
        # we tested represents an average.
        if hasattr(self, 'codec'):
            normalizer = self.codec.shape()[0]
        else:
            normalizer = v.shape[1]
        return normalizer * log_logistic(fe_corrupted - fe)
示例#31
0
    def score_samples(self, X):
        """Compute the pseudo-likelihood of X.

        Parameters
        ----------
        X : {array-like, sparse matrix} shape (n_samples, n_features)
            Values of the visible layer. Must be all-boolean (not checked).

        Returns
        -------
        pseudo_likelihood : array-like, shape (n_samples,)
            Value of the pseudo-likelihood (proxy for likelihood).

        Notes
        -----
        This method is not deterministic: it computes a quantity called the
        free energy on X, then on a randomly corrupted version of X, and
        returns the log of the logistic function of the difference.
        """
        check_is_fitted(self, "components_")

        v = check_array(X, accept_sparse='csr')
        rng = check_random_state(self.random_state)

        # Randomly corrupt one feature in each sample in v.
        ind = (np.arange(v.shape[0]),
               rng.randint(0, v.shape[1], v.shape[0]))
        if issparse(v):
            data = -2 * v[ind] + 1
            v_ = v + sp.csr_matrix((data.A.ravel(), ind), shape=v.shape)
        else:
            v_ = v.copy()
            v_[ind] = 1 - v_[ind]

        fe = self._free_energy(v)
        fe_ = self._free_energy(v_)
        return v.shape[1] * log_logistic(fe_ - fe)
示例#32
0
    def score_samples_TAP(self, X):
        """Compute the pseudo-likelihood of X using second order TAP
        Parameters
        ----------
        X : {array-like, sparse matrix} shape (n_samples, n_features)
            Values of the visible layer. Must be all-boolean (not checked).
        Returns
        -------
        pseudo_likelihood : array-like, shape (n_samples,)
            Value of the pseudo-likelihood (proxy for likelihood).
        Notes
        -----
        This method is not deterministic: it computes the TAP Free Energy on X,
        then on a randomly corrupted version of X, and
        returns the log of the logistic function of the difference.
        """
        check_is_fitted(self, "W")

        v = check_array(X, accept_sparse='csr')
        v, v_ = self._corrupt_data(v)

        fe = self._free_energy_TAP(v)
        fe_ = self._free_energy_TAP(v_)
        return v.shape[1] * log_logistic(fe_ - fe)
示例#33
0
def _logistic_loss_and_grad(w, alpha, X, y, lamda, sample_weight=None):
    """Computes the logistic loss and gradient.

    Parameters
    ----------
    w : ndarray, shape (n_features,) or (n_features + 1,)
        Coefficient vector.

    X : {array-like, sparse matrix}, shape (n_samples, n_features)
        Training data.

    y : ndarray, shape (n_samples,)
        Array of labels.

    alpha : float
        Regularization parameter. alpha is equal to 1 / C.

    sample_weight : array-like, shape (n_samples,) optional
        Array of weights that are assigned to individual samples.
        If not provided, then each sample is given unit weight.

    Returns
    -------
    out : float
        Logistic loss.

    grad : ndarray, shape (n_features,) or (n_features + 1,)
        Logistic gradient.
    """
    n_patients = len(X)
    out = 0.
    grad = np.zeros_like(w)
    sample_weight_orig = sample_weight.copy() if sample_weight is not None \
        else None

    for i in range(n_patients):
        n_kernels, n_samples, n_features = X[i].shape
        x_i = np.tensordot(w, X[i], axes=1)
        alpha_i, c, yz = _intercept_dot(alpha[i], x_i, y[i])

        if sample_weight_orig is None:
            sample_weight = np.ones(n_samples)

        # Logistic loss is the negative of the log of the logistic function.
        out += -np.sum(sample_weight * log_logistic(yz))

        z = expit(yz)
        z0 = sample_weight * (z - 1) * y[i]

        grad += safe_sparse_dot(X[i].dot(alpha_i), z0)

        # alpha_i, c_i, x_i = _intercept_dot(alpha[i][:-1], X[i], 1.)
        # out_i, grad_i = _loglossgrad(
        #     np.append(w, alpha[i][-1]), x_i.T, y[i], 0,
        #     sample_weight=sample_weight)
        # out += out_i
        # grad += grad_i[:n_kernels]

    out += .5 * lamda * np.dot(w, w)
    grad += lamda * w
    return out, grad
示例#34
0
def _logistic_loss_and_grad(w, X, y, alpha, sample_weight=None, rho=None, q=None):
    """Computes the logistic loss and gradient.

    Parameters
    ----------
    w : ndarray, shape (n_features,) or (n_features + 1,)
        Coefficient vector.

    X : {array-like, sparse matrix}, shape (n_samples, n_features)
        Training data.

    y : ndarray, shape (n_samples,)
        Array of labels.

    alpha : float
        Regularization parameter. alpha is equal to 1 / C.

    sample_weight : ndarray, shape (n_samples,) optional
        Array of weights that are assigned to individual samples.
        If not provided, then each sample is given unit weight.

    Returns
    -------
    out : float
        Logistic loss.

    grad : ndarray, shape (n_features,) or (n_features + 1,)
        Logistic gradient.
    """

    _, n_features = X.shape
    grad = np.empty_like(w)

    w, c, yz = _intercept_dot(w, X, y)

    if sample_weight is None:
        sample_weight = np.ones(y.shape[0])

    # Logistic loss is the negative of the log of the logistic function.
    out = -np.sum(sample_weight * log_logistic(yz)) + .5 * alpha * np.dot(w, w)
    z = expit(yz)

    # add noise term
    if q is None:
        q = np.zeros_like(y)
    y01 = np.array(y == 1, dtype=int)
    qnoise = np.array(q==0, dtype=np.bool)
    if np.any(qnoise):
        rho_y = np.array([[rho[1-label],rho[label]] for label in y01])
        z += expit_noise(yz, qnoise, rho_y)
        yzq = yz[qnoise]
        wq  = sample_weight[qnoise]
        out += np.sum(wq * log_noise_logistic(yzq, rho_y[qnoise,:]))

    z0 = sample_weight * (z - 1) * y
    grad[:n_features] = safe_sparse_dot(X.T, z0) + alpha * w

    # Case where we fit the intercept.
    if grad.shape[0] > n_features:
        grad[-1] = z0.sum()
    return out, grad
 def loss(self, y, pred):
     return (-log_logistic(y * pred))[0][0]
def _l1_logistic_loss_grad(w_extended, X, y, C, D, k, idx, ignore2w):
    # print(k)
    _, n_features = X.shape
    w = w_extended[:n_features] - w_extended[n_features:]
    # w[regularized_alphas] = 0.
    yz = y * safe_sparse_dot(X, w)

    # Logistic loss is the negative of the log of the logistic function.
    out = -np.sum(log_logistic(yz))
    # out += .5 * alpha * np.dot(w, w)  # L2

    w_extended_ = copy.copy(w_extended)
    # don't regularize \alphas
    if ignore2w == 0:
        reg_idx = list(range(1, idx))
        reg_idx2 = list(range(n_features + 1, n_features + idx))
        w_extended_[reg_idx] = 0.
        w_extended_[reg_idx2] = 0.
    # model_user_event
    elif ignore2w == 1:
        reg_idx = list(range(idx, n_features))
        reg_idx2 = list(range(n_features + idx, 2 * n_features))
        w_extended_[reg_idx] = 0.
        w_extended_[reg_idx2] = 0.
    # model_user_event_fb
    else:
        reg_idx = list(range(1, idx)) + list(range(idx, n_features))
        reg_idx2 = list(range(n_features, n_features + idx)) + list(
            range(n_features + idx, 2 * n_features))
        # if len(regularized_alphas) > 0:
        #    unpenalized_idx = list(set(list(range(0, idx - 1))) - set(regularized_alphas)) + list(
        #        set(list(range(idx, n_features))) - set(idx + np.array(regularized_alphas)))
        #    penalized_idx = list(regularized_alphas) + list(idx + np.array(regularized_alphas))
        #    w_extended_[penalized_idx] = w_extended_[penalized_idx]*1000000
        #    w_extended_[unpenalized_idx] = 0.
        # else:
        w_extended_[reg_idx] = 0.
        w_extended_[reg_idx2] = 0.

    if ignore2w > 0:
        w_ = w[idx:]
        w_ = np.transpose(w_.flatten().reshape(k, -1))
        # print(w_.shape)
        Dsmooth = w_[1:, :] - w_[:-1, :]
        zero = np.zeros((1, k))

        Dsmooth = np.concatenate((Dsmooth, zero), axis=0)
        Dsmooth = np.transpose(Dsmooth)
        Dsmooth = Dsmooth.flatten()
        Dsmooth_squared = Dsmooth * Dsmooth

        # out += alpha * w_extended.sum()
        out += C * w_extended_.sum() + 0.5 * D * Dsmooth_squared.sum(
        )  # L1, w_extended is non-negative

        z = expit(yz)
        z0 = (z - 1) * y

        grad = safe_sparse_dot(X.T, z0)
        grad = np.concatenate([grad, -grad])

        # grad += alpha * w  # L2
        # grad += alpha +  # L1
        D_grad = np.zeros((n_features, ))
        D_grad[idx:] = Dsmooth
        D_grad = np.concatenate([D_grad, -D_grad])
        grad += C - D * D_grad
    else:
        out += C + w_extended.sum()
        z = expit(yz)
        z0 = (z - 1) * y
        grad = safe_sparse_dot(X.T, z0)
        grad = np.concatenate([grad, -grad])
        grad += C

    return out, grad
def _l1_logistic_loss_grad(w_extended, X, y, C, D, k, idx, ignore2w):
    # print(k)
    _, n_features = X.shape
    w = w_extended[:n_features] - w_extended[n_features:]

    yz = y * safe_sparse_dot(X, w)

    # Logistic loss is the negative of the log of the logistic function.
    out = -np.sum(log_logistic(yz))
    # out += .5 * alpha * np.dot(w, w)  # L2

    w_extended_ = copy.copy(w_extended)
    if ignore2w == 0:
        reg_idx = list(range(0, idx - 1))
        reg_idx2 = list(range(n_features, n_features + idx - 1))
        w_extended_[reg_idx] = 0.
        w_extended_[reg_idx2] = 0.
    elif ignore2w == 1:
        reg_idx = list(range(idx, n_features))
        reg_idx2 = list(range(n_features + idx, 2 * n_features))
        w_extended_[reg_idx] = 0.
        w_extended_[reg_idx2] = 0.
    else:
        n_user = int((idx - 1) / 2)
        reg_idx = list(range(0, n_user)) + list(range(idx, n_features))
        reg_idx2 = list(range(n_features, n_features + n_user)) + list(
            range(n_features + idx, 2 * n_features))
        #reg_idx = list(range(0, idx-1)) + list(range(idx, n_features))
        #reg_idx2 = list(range(n_features, n_features + idx-1)) + list(range(n_features + idx, 2 * n_features))
        w_extended_[reg_idx] = 0.
        w_extended_[reg_idx2] = 0.

    if ignore2w > 0:
        w_ = w[idx:]
        w_ = np.transpose(w_.flatten().reshape(k, -1))
        # print(w_.shape)
        Dsmooth = w_[1:, :] - w_[:-1, :]
        zero = np.zeros((1, k))

        Dsmooth = np.concatenate((Dsmooth, zero), axis=0)
        Dsmooth = np.transpose(Dsmooth)
        Dsmooth = Dsmooth.flatten()
        Dsmooth_squared = Dsmooth * Dsmooth

        # out += alpha * w_extended.sum()
        out += C * w_extended_.sum() + D * Dsmooth_squared.sum(
        )  # L1, w_extended is non-negative

        z = expit(yz)
        z0 = (z - 1) * y

        grad = safe_sparse_dot(X.T, z0)
        grad = np.concatenate([grad, -grad])

        # grad += alpha * w  # L2
        # grad += alpha +  # L1
        D_grad = np.zeros((n_features, ))
        D_grad[idx:] = Dsmooth
        D_grad = np.concatenate([D_grad, -D_grad])
        grad += C - 2 * D * D_grad
    else:
        out += C + w_extended.sum()
        z = expit(yz)
        z0 = (z - 1) * y
        grad = safe_sparse_dot(X.T, z0)
        grad = np.concatenate([grad, -grad])
        grad += C

    return out, grad
示例#38
0
def _logistic_loss_and_grad(w, alpha, X, y, lamda, sample_weight=None):
    """Computes the logistic loss and gradient.

    Parameters
    ----------
    w : ndarray, shape (n_features,) or (n_features + 1,)
        Coefficient vector.

    X : {array-like, sparse matrix}, shape (n_samples, n_features)
        Training data.

    y : ndarray, shape (n_samples,)
        Array of labels.

    alpha : float
        Regularization parameter. alpha is equal to 1 / C.

    sample_weight : array-like, shape (n_samples,) optional
        Array of weights that are assigned to individual samples.
        If not provided, then each sample is given unit weight.

    Returns
    -------
    out : float
        Logistic loss.

    grad : ndarray, shape (n_features,) or (n_features + 1,)
        Logistic gradient.
    """
    n_patients = len(X)
    out = 0.
    grad = np.zeros_like(w)
    sample_weight_orig = sample_weight.copy() if sample_weight is not None \
        else None

    for i in range(n_patients):
        n_kernels, n_samples, n_features = X[i].shape
        x_i = np.tensordot(w, X[i], axes=1)
        alpha_i, c, yz = _intercept_dot(alpha[i], x_i, y[i])

        if sample_weight_orig is None:
            sample_weight = np.ones(n_samples)

        # Logistic loss is the negative of the log of the logistic function.
        out += -np.sum(sample_weight * log_logistic(yz))

        z = expit(yz)
        z0 = sample_weight * (z - 1) * y[i]

        grad += safe_sparse_dot(X[i].dot(alpha_i), z0)

        # alpha_i, c_i, x_i = _intercept_dot(alpha[i][:-1], X[i], 1.)
        # out_i, grad_i = _loglossgrad(
        #     np.append(w, alpha[i][-1]), x_i.T, y[i], 0,
        #     sample_weight=sample_weight)
        # out += out_i
        # grad += grad_i[:n_kernels]

    out += .5 * lamda * np.dot(w, w)
    grad += lamda * w
    return out, grad