def test_liblinear_random_state(): X, y = datasets.make_classification(n_samples=20) lr1 = logistic.LogisticRegression(random_state=0) lr1.fit(X, y) lr2 = logistic.LogisticRegression(random_state=0) lr2.fit(X, y) assert_array_equal(lr1.coef_, lr2.coef_)
def test_transform(): clf = logistic.LogisticRegression(penalty="l1") clf.fit(iris.data, iris.target) X_new = clf.transform(iris.data) clf = logistic.LogisticRegression() clf.fit(X_new, iris.target) pred = clf.predict(X_new) assert np.mean(pred == iris.target) >= 0.75
def test_predict_3_classes(): clf = logistic.LogisticRegression(C=10).fit(X, Y2) assert_array_equal(clf.predict(X), Y2) assert_array_equal(clf.predict_proba(X).argmax(axis=1), Y2) clf = logistic.LogisticRegression(C=10).fit(X_sp, Y2) assert_array_equal(clf.predict(X_sp), Y2) assert_array_equal(clf.predict_proba(X_sp).argmax(axis=1), Y2)
def getEstimator(scorer_type): if scorer_type == 'grad_boost': clf = GradientBoostingClassifier(n_estimators=200, random_state=14128, verbose=True) if scorer_type == 'svm1': # stochastic gradient decent classifier clf = svm.SVC(gamma=0.001, C=100., verbose=True) if scorer_type == 'logistic_regression': clf = logistic.LogisticRegression() if scorer_type == 'svm3': clf = svm.SVC(kernel='poly', C=1.0, probability=True, class_weight='unbalanced') if scorer_type == "bayes": clf = naive_bayes.GaussianNB() if scorer_type == 'voting_hard_svm_gradboost_logistic': svm2 = svm.SVC(kernel='linear', C=1.0, probability=True, class_weight='balanced', verbose=True) log_reg = logistic.LogisticRegression() gradboost = GradientBoostingClassifier(n_estimators=200, random_state=14128, verbose=True) clf = VotingClassifier( estimators=[ # ('gb', gb), ('svm', svm2), ('grad_boost', gradboost), ('logisitc_regression', log_reg) ], n_jobs=1, voting='hard') if scorer_type == 'voting_hard_bayes_gradboost': bayes = naive_bayes.GaussianNB() gradboost = GradientBoostingClassifier(n_estimators=200, random_state=14128, verbose=True) clf = VotingClassifier( estimators=[ # ('gb', gb), ('bayes', bayes), ('grad_boost', gradboost), ], n_jobs=1, voting='hard') return clf
def init(self, class_num, init_params: dict): self.clf_name = "sl_lr_sag" self.class_num = class_num self.max_iter = init_params.get("max_iter") self.model = logistic.LogisticRegression(C=1.0, max_iter=self.max_iter, solver="sag", multi_class="auto") self.ml_model = OneVsRestClassifier( logistic.LogisticRegression(solver="liblinear"))
def test_inconsistent_input(): """Test that an exception is raised on inconsistent input to predict""" X_ = np.random.random((5, 10)) y_ = np.ones(X_.shape[0]) assert_raises(ValueError, logistic.LogisticRegression().fit(X_, y_).predict, np.random.random((3, 12)))
def test_predict_2_classes(): """Simple sanity check on a 2 classes dataset Make sure it predicts the correct result on simple datasets. """ clf = logistic.LogisticRegression().fit(X, Y1) assert_array_equal(clf.predict(X), Y1) assert_array_equal(clf.predict_proba(X).argmax(axis=1), Y1) clf = logistic.LogisticRegression(C=100).fit(X, Y1) assert_array_equal(clf.predict(X), Y1) assert_array_equal(clf.predict_proba(X).argmax(axis=1), Y1) clf = logistic.LogisticRegression(fit_intercept=False).fit(X, Y1) assert_array_equal(clf.predict(X), Y1) assert_array_equal(clf.predict_proba(X).argmax(axis=1), Y1)
def test_sklearn(): for alpha in np.logspace(-3, 3): def logloss(x): return logistic._logistic_loss(x, X, y, 0.) def fprime_logloss(x): return logistic._logistic_loss_and_grad(x, X, y, 0.)[1] def g_prox(x, step_size): """ L1 regularization """ return np.fmax(x - step_size * alpha, 0) - \ np.fmax(- x - step_size * alpha, 0) clf = logistic.LogisticRegression(penalty='l1', fit_intercept=False, C=1 / alpha) clf.fit(X, y) opt = fmin_cgprox(logloss, fprime_logloss, g_prox, np.zeros(n_features), rtol=1e-12) assert linalg.norm(opt.x - clf.coef_) < 1e-3
def __init__(self, **kwargs): self.name = "LR" self._model = logistic.LogisticRegression(C=1.0, solver="liblinear", multi_class="auto", class_weight=None, max_iter=100, random_state=666)
def init(self, class_num: int, init_params: dict = None): self.clf_name = "sl_lr_liblinear" self.class_num = class_num self.model = logistic.LogisticRegression(solver="liblinear") self.ml_mode = 2 self.ml_models = [ OneVsRestClassifier( logistic.LogisticRegression(solver="liblinear")) for i in range(class_num) ] self.ml_model = OneVsRestClassifier( logistic.LogisticRegression(solver="liblinear")) self.logReg_pipeline = Pipeline([ ('clf', OneVsRestClassifier( logistic.LogisticRegression(solver='liblinear'), n_jobs=-1)), ])
def test_nan(): """Test proper NaN handling. Regression test for Issue #252: fit used to go into an infinite loop. """ Xnan = np.array(X, dtype=np.float64) Xnan[0, 1] = np.nan logistic.LogisticRegression().fit(Xnan, Y1)
def init_model(self, kernel, max_iter=200, C=1.0, **kwargs): self._model = logistic.LogisticRegression( C=C, max_iter=max_iter, solver='liblinear', multi_class='auto') self.is_init = True
def init(self, class_num: int, init_params: dict): self.clf_name = "ml_sl_lr_liblinear" self.class_num = class_num self.model = logistic.LogisticRegression(solver="liblinear") info( "Backbone classifier=SLLRLiblinear is init, class_num={}, init_params={}" .format(self.class_num, init_params)) pass
def init(self, class_num, init_params: dict): self.clf_name = "sl_lr_sag" self.class_num = class_num self.max_iter = init_params.get("max_iter") # self.model = logistic.LogisticRegression(solver="sag", max_iter=self.max_iter) self.model = logistic.LogisticRegression(C=1.0, max_iter=self.max_iter, solver="sag", multi_class="auto") # self.model = OneVsRestClassifier(logistic.LogisticRegression(C=1.0, max_iter=self.max_iter, solver="sag", multi_class="auto")) self.ml_model = OneVsRestClassifier( logistic.LogisticRegression(solver="liblinear")) info( "Backbone classifier=SLLRLiblinear is init, class_num={}, init_params={}" .format(self.class_num, init_params))
def CorpFitModel(train_set, train_label): ''' train_set, test_set -- shape [n_sample, n_feature] ''' check_length(train_set, train_label) fit_model = lgst.LogisticRegression() fit_model.fit(train_set, train_label) return fit_model
def test_write_parameters(): """Test that we can write to coef_ and intercept_""" #rng = np.random.RandomState(0) #X = rng.random_sample((5, 10)) #y = np.ones(X.shape[0]) clf = logistic.LogisticRegression() clf.fit(X, Y1) clf.coef_[:] = 0 clf.intercept_[:] = 0 assert_array_equal(clf.decision_function(X), 0)
def test_predict_iris(): """Test logisic regression with the iris dataset""" clf = logistic.LogisticRegression().fit(iris.data, iris.target) pred = clf.predict(iris.data) assert np.mean(pred == iris.target) > .95 pred = clf.predict_proba(iris.data).argmax(axis=1) assert np.mean(pred == iris.target) > .95
def init_model(self, kernel, num_classes, max_iter=200, C=1.0, **kwargs): self._num_classes = num_classes if num_classes <= 5: class_weight = None else: class_weight = "balanced" self._model = logistic.LogisticRegression(C=C, max_iter=max_iter, solver='liblinear', multi_class='auto', class_weight=class_weight) self.is_init = True
def init_model(self, config, **kwargs): num_classes = config['num_classes'] sample_num = config['sample_num'] max_iter = 200 C = 1.0 self._model = logistic.LogisticRegression(C=C, max_iter=max_iter, solver='liblinear', multi_class='auto') self.is_init = True
def test_predict_iris(): """Test logisic regression with the iris dataset""" target = iris.target_names[iris.target] clf = logistic.LogisticRegression(C=len(iris.data)).fit(iris.data, target) assert_equal(set(target), set(clf.classes_)) pred = clf.predict(iris.data) assert_greater(np.mean(pred == target), .95) pred = iris.target_names[clf.predict_proba(iris.data).argmax(axis=1)] assert_greater(np.mean(pred == target), .95)
def __init__(self, inputArray, classes): """ Initialize the log_reg object and fit the training_data into the algorithm :param self: The current object of the class :param inputArray: The array used to train the algorithm :param classes: Array containing the class of each input """ self.lr = logistic.LogisticRegression() d90 = np.rot90(inputArray) d90 = np.rot90(d90) d90 = np.rot90(d90) self.lr.fit(d90, classes)
def test_inconsistent_input(): """Test that an exception is raised on inconsistent input""" X_ = np.random.random((5, 10)) y_ = np.ones(X_.shape[0]) clf = logistic.LogisticRegression() # Wrong dimensions for training data y_wrong = y_[:-1] assert_raises(ValueError, clf.fit, X, y_wrong) # Wrong dimensions for test data assert_raises(ValueError, clf.fit(X_, y_).predict, np.random.random((3, 12)))
def init(self, class_num: int, init_params: dict = None): self.clf_name = "sl_lr_liblinear" self.class_num = class_num # for single labels. self.model = logistic.LogisticRegression(solver="liblinear") self.ml_mode = 2 # for multi-labels # mode-1: class_num * onevsrestclassifier+lr self.ml_models = [ OneVsRestClassifier( logistic.LogisticRegression(solver="liblinear")) for i in range(class_num) ] # mode-2: onevsrestclassifier+lr self.ml_model = OneVsRestClassifier( logistic.LogisticRegression(solver="liblinear")) #mode-3: Pipeline + onevsrestclassifier+lr self.logReg_pipeline = Pipeline([ ('clf', OneVsRestClassifier( logistic.LogisticRegression(solver='liblinear'), n_jobs=-1)), ]) # for multi-labels. # mode-1: + onevsrestclassifier # mode-2: + decision tree. # self.model = DecisionTreeClassifier() info( "Backbone classifier=SLLRLiblinear is init, class_num={}, init_params={}" .format(self.class_num, init_params))
def test_predict_iris(): """Test logisic regression with the iris dataset""" n_samples, n_features = iris.data.shape target = iris.target_names[iris.target] clf = logistic.LogisticRegression(C=len(iris.data)).fit(iris.data, target) assert_array_equal(np.unique(target), clf.classes_) pred = clf.predict(iris.data) assert_greater(np.mean(pred == target), .95) probabilities = clf.predict_proba(iris.data) assert_array_almost_equal(probabilities.sum(axis=1), np.ones(n_samples)) pred = iris.target_names[probabilities.argmax(axis=1)] assert_greater(np.mean(pred == target), .95)
def test_inconsistent_input(): """Test that an exception is raised on inconsistent input""" rng = np.random.RandomState(0) X_ = rng.random_sample((5, 10)) y_ = np.ones(X_.shape[0]) y_[0] = 0 clf = logistic.LogisticRegression(random_state=0) # Wrong dimensions for training data y_wrong = y_[:-1] assert_raises(ValueError, clf.fit, X, y_wrong) # Wrong dimensions for test data assert_raises(ValueError, clf.fit(X_, y_).predict, rng.random_sample((3, 12)))
def test_predict_2_classes(): """Simple sanity check on a 2 classes dataset Make sure it predicts the correct result on simple datasets. """ check_predictions(logistic.LogisticRegression(), X, Y1) check_predictions(logistic.LogisticRegression(), X_sp, Y1) check_predictions(logistic.LogisticRegression(C=100), X, Y1) check_predictions(logistic.LogisticRegression(C=100), X_sp, Y1) check_predictions(logistic.LogisticRegression(fit_intercept=False), X, Y1) check_predictions(logistic.LogisticRegression(fit_intercept=False), X_sp, Y1)
def train_model(train_f): """ train/learn the logistic regression model """ import random from sklearn.linear_model import logistic train_data = loadtxt(train_f) [r, c] = train_data.shape pos_train = train_data[train_data[:, c - 1] == 1] neg_train_e = train_data[train_data[:, c - 1] == 0] pos_ratio = 0.21; pos_nb = len(pos_train) neg_nb = int(pos_nb / pos_ratio) - pos_nb neg_train = array(random.sample(neg_train_e, neg_nb)) Xtrain = vstack((pos_train[:, 0:c - 1], neg_train[:, 0:c - 1])) ytrain = array(list(pos_train[:, c - 1]) + list(neg_train[:, c - 1])) clf = logistic.LogisticRegression().fit(Xtrain, ytrain) serialize.saveData("model", clf, where="./", suffix=".seg")
def test_sparsify(): """Test sparsify and densify members.""" n_samples, n_features = iris.data.shape target = iris.target_names[iris.target] clf = logistic.LogisticRegression(random_state=0).fit(iris.data, target) pred_d_d = clf.decision_function(iris.data) clf.sparsify() assert_true(sp.issparse(clf.coef_)) pred_s_d = clf.decision_function(iris.data) sp_data = sp.coo_matrix(iris.data) pred_s_s = clf.decision_function(sp_data) clf.densify() pred_d_s = clf.decision_function(sp_data) assert_array_almost_equal(pred_d_d, pred_s_d) assert_array_almost_equal(pred_d_d, pred_s_s) assert_array_almost_equal(pred_d_d, pred_d_s)
def init_model(self, num_classes, max_iter=200, C=1.0, is_multilabel=False, **kwargs): self._num_classes = num_classes self._is_multilabel = is_multilabel if num_classes <= 5: class_weight = None else: class_weight = "balanced" self._model = logistic.LogisticRegression(C=C, max_iter=max_iter, solver='liblinear', multi_class='auto', class_weight=class_weight) if is_multilabel: self._model = OneVsRestClassifier(self._model) self.is_init = True
def sklearn_logistic(X, y, out_file=None, k_fold=10): ''' Use the built-in logistic regression from sklearn. Separate in k folds ''' skf = cross_validation.StratifiedKFold(y, n_folds=k_fold, shuffle=True) total_score = list() y_predictions = list() for train_index, test_index in skf: #scale the data scaler = preprocessing.StandardScaler().fit(X[train_index]) X_train = scaler.transform(X[train_index]) X_test = scaler.transform(X[test_index]) #apply logisitic regression log_reg = logist.LogisticRegression() score = (log_reg.fit(X_train, y[train_index]).score(X_test, y[test_index])) if out_file != None: out_file.write(str(score) + '\n') confuse = metrics.confusion_matrix(y[test_index], log_reg.predict(X[test_index])) out_file.write(str(confuse) + '\n')