def train_lrpipe(trainX, trainY, params):
    """ trains LogisiticRegression model with params
        logreg_C specified by params 
        """
    lrpipe = Pipeline([('logreg', LogisticRegression(penalty="l1", C=1))])
    lrpipe = lrpipe.fit(trainX, trainY, **params)
    return lrpipe
def SciKits(featureValues, classLabels):

    # TODO: check how non-convergence is reported

    (ns, nf) = featureValues.shape
    assert classLabels.shape == (ns, )
    #NOTE: There is a version issue
    try:
        from scikits.learn.linear_model import LogisticRegression
    except:
        #scikits version >= 0.9
        from sklearn.linear_model       import LogisticRegression
    RegularisationParameter = 1.0e+30 # For us, higher is better (no regularisation!)
    Tolerance               = 1.0e-30 # Smaller is better

    # From the documentation page at
    #
    #     http://scikit-learn.sourceforge.net/modules/generated/scikits.learn.linear_model.LogisticRegression.html
    #
    # "The underlying C implementation uses a random number generator to select features when fitting the model.
    #  It is thus not uncommon, to have slightly different results for the same input data.
    #  If that happens, try with a smaller tol parameter."

    classifier = LogisticRegression(penalty = 'l1', C = RegularisationParameter, tol = Tolerance)
    classifier.fit(featureValues, classLabels)

    beta = -classifier.raw_coef_[0,:]
    beta = numpy.append(beta[-1], beta[:-1])

    if not all(numpy.isfinite(beta)):
        return None

    return beta
def do_grid_search(X, Y, gs_params):
    """ Given data (X,Y) will perform a grid search on g_params
        for a LogisticRegression called logreg
        """
    lrpipe = Pipeline([('logreg', LogisticRegression())])
    gs = GridSearchCV(lrpipe, gs_params, n_jobs=-1)
    #print gs
    gs = gs.fit(X, Y)

    best_parameters, score = max(gs.grid_scores_, key=lambda x: x[1])
    logger.info("best_parameters: " + str(best_parameters))
    logger.info("expected score: " + str(score))

    return best_parameters
Пример #4
0
def test_auto_weight():
    """Test class weights for imbalanced data"""
    from scikits.learn.linear_model import LogisticRegression
    # we take as dataset a the two-dimensional projection of iris so
    # that it is not separable and remove half of predictors from
    # class 1
    from scikits.learn.svm.base import _get_class_weight
    X, y = iris.data[:, :2], iris.target
    unbalanced = np.delete(np.arange(y.size), np.where(y > 1)[0][::2])

    assert np.argmax(_get_class_weight('auto', y[unbalanced])[0]) == 2

    for clf in (svm.SVC(kernel='linear'), svm.LinearSVC(), LogisticRegression()):
        # check that score is better when class='auto' is set.
        y_pred = clf.fit(X[unbalanced], y[unbalanced],
                         class_weight={}).predict(X)
        y_pred_balanced = clf.fit(X[unbalanced], y[unbalanced],
                                  class_weight='auto').predict(X)
        assert metrics.f1_score(y, y_pred) <= metrics.f1_score(y, y_pred_balanced)
    def pairwise_regress(thetas, cats, ii, jj, reg_type, reg_param):

        en_cats = enumerate(cats)
        set0 = [idx for (idx, cat) in en_cats if cat == ii]
        en_cats = enumerate(cats)
        set1 = [idx for (idx, cat) in en_cats if cat == jj]

        X0 = thetas[set0, :]
        Y0 = np.zeros(X0.shape[0])
        X1 = thetas[set1, :]
        Y1 = np.ones(X1.shape[0])

        X = np.vstack([X0, X1])
        Y = np.concatenate([Y0, Y1])

        lr = LogisticRegression(penalty=reg_type,
                                tol=0.00000000001,
                                C=reg_param)
        lr.fit(X, Y)

        return lr
Пример #6
0
from scikits.learn.linear_model import LogisticRegression
from scikits.learn.svm import SVC
from scikits.learn import datasets

iris = datasets.load_iris()
X = iris.data[:, :2] # we only take the first two features for visualization
y = iris.target

n_features = X.shape[1]

C = 1.0

# Create different classifiers. The logistic regression cannot do
# multiclass out of the box.
classifiers = {
                'L1 logistic': LogisticRegression(C=C, penalty='l1'),
                'L2 logistic': LogisticRegression(C=C, penalty='l2'),
                'Linear SVC': SVC(kernel='linear', C=C, probability=True),
              }

n_classifiers = len(classifiers)

pl.figure(figsize=(3*2, n_classifiers*2))
pl.subplots_adjust(bottom=.2, top=.95)

for index, (name, classifier) in enumerate(classifiers.iteritems()):
    classifier.fit(X, y)

    y_pred = classifier.predict(X)
    classif_rate = np.mean(y_pred.ravel() == y.ravel()) * 100
    print  "classif_rate for %s : %f " % (name, classif_rate)
Пример #7
0
        dstask[ds][t] = ctr
        ctr = ctr + 1

Y = N.zeros(len(copedata))
for x in range(len(copedata)):
    Y[x] = dstask[copedata[x, 0]][copedata[x, 1]]

X = melodic_mix[usedata == 1, :]

loo = LeaveOneOut(len(Y))

predclass = N.zeros(len(Y))

for train, test in loo:
    X_train, X_test, y_train, y_test = X[train], X[test], Y[train], Y[test]
    clf = LogisticRegression(C=0.1, penalty='l1')
    clf.fit(X_train, y_train)
    predclass[test] = clf.predict(X_test)

print 'Mean accuracy=%0.3f' % N.mean(predclass == Y)

# randomize labels 1000 times and store accuracy
nruns = 500
randacc = N.zeros(nruns)

for r in range(nruns):
    N.random.shuffle(Y)
    for train, test in loo:
        X_train, X_test, y_train, y_test = X[train], X[test], Y[train], Y[test]
        clf = LogisticRegression(C=1, penalty='l2')
        clf.fit(X_train, y_train)
# Author: Alexandre Gramfort <*****@*****.**>
# License: BSD Style.

import numpy as np

from scikits.learn.linear_model import LogisticRegression
from scikits.learn import datasets

iris = datasets.load_iris()
X = iris.data
y = iris.target

# Set regularization parameter
C = 0.1

classifier_l1_LR = LogisticRegression(C=C, penalty='l1')
classifier_l2_LR = LogisticRegression(C=C, penalty='l2')
classifier_l1_LR.fit(X, y)
classifier_l2_LR.fit(X, y)

hyperplane_coefficients_l1_LR = classifier_l1_LR.coef_[:]
hyperplane_coefficients_l2_LR = classifier_l2_LR.coef_[:]

# hyperplane_coefficients_l1_LR contains zeros due to the
# L1 sparsity inducing norm

pct_non_zeros_l1_LR = np.mean(hyperplane_coefficients_l1_LR != 0) * 100
pct_non_zeros_l2_LR = np.mean(hyperplane_coefficients_l2_LR != 0) * 100

print "Percentage of non zeros coefficients (L1) : %f" % pct_non_zeros_l1_LR
print "Percentage of non zeros coefficients (L2) : %f" % pct_non_zeros_l2_LR
Пример #9
0
def logistic_regression(state_matrix, teacher_matrix):
    for i in range(state_matrix.shape[1]):
        clf = LogisticRegression(tol=0.05)
        clf.fit(state_matrix, teacher_matrix[:, i])
        self.output_weights[i, :] = clf.coef_
        print i
Пример #10
0
from rpy2.robjects import FloatVector as rfloat
import numpy as np

iris = datasets.load_iris()

def test_algorithm(algorithm, results, train_data, train_target, test_data):
    algorithm.fit(train_data, train_target)
    y_pred = algorithm.predict(test_data)
    results.append(precision(y_pred))

def precision(y_pred):
    prec = sum(y_pred == test_target)
    return float(prec) / len(test_target)

svmclf = svm.SVC()
logisticclf = LogisticRegression()
nnclf= neighbors.Neighbors()
svmli = []
logli = []
nnli = []

cv = StratifiedKFold(iris.target, 20)
for train_index, test_index in cv:
    train_data = iris.data[train_index]
    train_target = iris.target[train_index]
    test_data = iris.data[test_index]
    test_target = iris.target[test_index]

    #svm
    test_algorithm(svmclf, svmli, train_data, train_target, test_data)