Exemplo n.º 1
0
def AdaptiveLasso(X, y, logistic=False, sample_weight=None, adaptive_weights=None, random_state=None):
    """
    Adaptive Lasso with cross-validation for otpimal lambda
    """
    if logistic:
        enet = glmnet.LogitNet(standardize=False, fit_intercept=False, n_splits=5, scoring='accuracy', alpha=1)
        enet.fit(X, y, relative_penalties=adaptive_weights, sample_weight=sample_weight)
    else:
        enet = glmnet.ElasticNet(standardize=False, fit_intercept=False,
                                 n_splits=5, scoring='mean_squared_error', alpha=1)
        enet.fit(X, y, relative_penalties=adaptive_weights, sample_weight=sample_weight)
    return enet.coef_
Exemplo n.º 2
0
    def _logistic_regression(self, theta, K=10):
        """
        Computes logistic regression coefficients for data sampled from the
        likelihood and from the marginal.

        theta: np.ndarray
            Parameter sample for which to compute the coefficients.
        K: int
            The number of K-fold cross-validations to be used.
        """

        # Select model params according to weights
        ws_norm = self.weights / np.sum(self.weights)
        p_selec = list()
        idx_selec = list()
        for _ in range(self.prior_samples.shape[0]):
            cat = np.random.choice(range(len(ws_norm)), p=ws_norm)
            p_selec.append(self.prior_samples[cat])
            idx_selec.append(cat)

        # Simulate from marginal using selected model params
        y_m = self.simobj.sample_data(self.d, p_selec)
        y_m = self.simobj.summary(y_m)

        # Simulate from likelihood
        y_t = self.simobj.sample_data(self.d, theta, num=len(ws_norm))
        y_t = self.simobj.summary(y_t)

        # Prepare targets
        t_t = np.ones(y_t.shape[0])
        t_m = np.zeros(y_m.shape[0])

        # Concatenate data
        Y = np.concatenate((y_t, y_m), axis=0)
        T = np.concatenate((t_t, t_m))

        # Define glmnet model
        model = glmnet.LogitNet(n_splits=K,
                                verbose=False,
                                n_jobs=1,
                                scoring="log_loss")
        model.fit(Y, T)

        # collect coefficients and intercept
        cf_choice = model.coef_path_[..., model.lambda_max_inx_].T.reshape(-1)
        inter = model.intercept_path_[..., model.lambda_max_inx_]
        cf = np.array(list(inter) + list(cf_choice)).reshape(-1, 1)

        return cf
Exemplo n.º 3
0
def run_glmnet_classification(df, X, y, dataset, reg_alpha, reg_lambda, metric,
                              density):
    tmp = time.time()
    glm = glmnet.LogitNet(alpha=reg_alpha / (reg_alpha + reg_lambda),
                          lambda_path=[reg_alpha + reg_lambda])
    glm.fit(X, y)
    glmnet_time = time.time() - tmp
    glmnet_score = metrics.accuracy_score(y, glm.predict(X))
    glmnet_zero = count_zero_coefficients(glm.coef_)

    df.loc[len(df)] = [
        dataset, X.shape[1], X.shape[0], density, 'Classification', reg_alpha,
        reg_lambda, 'Glmnet', metric, glmnet_score, glmnet_time, '-',
        glmnet_zero
    ]
Exemplo n.º 4
0
def ElasticNet(X, y, logistic=False, sample_weight=None, random_state=None):
    """
    Elastic Net with cross-validation for otpimal alpha and lambda
    """
    mses = np.array([])
    cv_result_dict = {}
    if logistic:
        for i, alpha in enumerate(np.arange(0, 1.1, 0.1)):
            cv_enet = glmnet.LogitNet(standardize=False, fit_intercept=False, n_splits=5, scoring='accuracy',
                                      alpha=alpha).fit(X, y, sample_weight=sample_weight)
            cv_enet.fit(X, y, sample_weight=sample_weight)
            mses = np.append(mses, cv_enet.cv_mean_score_.max())
            cv_result_dict[f'cv_result_{i}'] = cv_enet
    else:
        for i, alpha in enumerate(np.arange(0, 1.1, 0.1)):
            cv_enet = glmnet.ElasticNet(standardize=False, fit_intercept=False, n_splits=5,
                                        scoring='mean_squared_error',
                                        alpha=alpha).fit(X, y, sample_weight=sample_weight)
            cv_enet.fit(X, y, sample_weight=sample_weight)
            mses = np.append(mses, cv_enet.cv_mean_score_.max())
            cv_result_dict[f'cv_result_{i}'] = cv_enet

    cv_max_model = cv_result_dict[f'cv_result_{np.argmax(mses)}']
    return cv_max_model.coef_
Exemplo n.º 5
0
        y_test.append(k)
    k += 1

X_train = np.array(X_train)
X_test = np.array(X_test)
y_train = np.array(y_train)
y_test = np.array(y_test)

# model.docvecs.most_similar()

## fit classifier ##
import glmnet
from sklearn.metrics import confusion_matrix
import pandas as pd

glm = glmnet.LogitNet()
glm.fit(X_train, y_train)
glm.score(X_test, y_test)
y_pred = glm.predict(X_test)

confusion_matrix(y_test, y_pred)

## fit classifier subset ##
subset = [0, 2]  # Fox and NYT
ix_train = np.isin(y_train, subset)
ix_test = np.isin(y_test, subset)

glm = glmnet.LogitNet()
glm.fit(X_train[ix_train], y_train[ix_train])
glm.score(X_test[ix_test], y_test[ix_test])
y_pred = glm.predict(X_test[ix_test])
Exemplo n.º 6
0
import glmnet
import sklearn.datasets
import sklearn.model_selection
import sklearn.metrics

X, y = sklearn.datasets.load_iris(return_X_y=True)
Xtrn, Xtst, ytrn, ytst = sklearn.model_selection.train_test_split(
    X, y, train_size=0.8, random_state=4)

clf = glmnet.LogitNet()
clf.fit(Xtrn, ytrn)
ypred = clf.predict(Xtst)
acc = sklearn.metrics.accuracy_score(ytst, ypred)

print('glmnet accuracy on iris:', acc)
assert acc > 0.9